kodit 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/code_indexing_factory.py +77 -28
- kodit/application/services/code_indexing_application_service.py +142 -116
- kodit/cli.py +14 -41
- kodit/domain/entities.py +268 -197
- kodit/domain/protocols.py +61 -0
- kodit/domain/services/embedding_service.py +1 -1
- kodit/domain/services/index_query_service.py +66 -0
- kodit/domain/services/index_service.py +323 -0
- kodit/domain/value_objects.py +150 -60
- kodit/infrastructure/cloning/git/working_copy.py +17 -8
- kodit/infrastructure/cloning/metadata.py +37 -67
- kodit/infrastructure/embedding/embedding_factory.py +1 -1
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
- kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
- kodit/infrastructure/git/git_utils.py +1 -63
- kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
- kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/__init__.py +1 -0
- kodit/infrastructure/mappers/index_mapper.py +344 -0
- kodit/infrastructure/snippet_extraction/factories.py +13 -0
- kodit/infrastructure/snippet_extraction/language_detection_service.py +1 -1
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -1
- kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +1 -1
- kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
- kodit/infrastructure/sqlalchemy/entities.py +203 -0
- kodit/infrastructure/sqlalchemy/file_repository.py +1 -1
- kodit/infrastructure/sqlalchemy/index_repository.py +550 -0
- kodit/mcp.py +0 -7
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +34 -0
- kodit/utils/__init__.py +1 -0
- kodit/utils/path_utils.py +54 -0
- {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/METADATA +1 -1
- {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/RECORD +40 -44
- kodit/domain/enums.py +0 -9
- kodit/domain/repositories.py +0 -128
- kodit/domain/services/ignore_service.py +0 -45
- kodit/domain/services/indexing_service.py +0 -204
- kodit/domain/services/snippet_extraction_service.py +0 -89
- kodit/domain/services/snippet_service.py +0 -215
- kodit/domain/services/source_service.py +0 -85
- kodit/infrastructure/cloning/folder/__init__.py +0 -1
- kodit/infrastructure/cloning/folder/factory.py +0 -128
- kodit/infrastructure/cloning/folder/working_copy.py +0 -38
- kodit/infrastructure/cloning/git/factory.py +0 -153
- kodit/infrastructure/indexing/index_repository.py +0 -286
- kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
- kodit/infrastructure/sqlalchemy/repository.py +0 -133
- kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
- {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/WHEEL +0 -0
- {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/entry_points.txt +0 -0
- {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/licenses/LICENSE +0 -0
kodit/cli.py
CHANGED
|
@@ -19,12 +19,14 @@ from kodit.config import (
|
|
|
19
19
|
with_session,
|
|
20
20
|
)
|
|
21
21
|
from kodit.domain.errors import EmptySourceError
|
|
22
|
-
from kodit.domain.services.
|
|
22
|
+
from kodit.domain.services.index_query_service import IndexQueryService
|
|
23
23
|
from kodit.domain.value_objects import (
|
|
24
24
|
MultiSearchRequest,
|
|
25
25
|
MultiSearchResult,
|
|
26
26
|
SnippetSearchFilters,
|
|
27
27
|
)
|
|
28
|
+
from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
|
|
29
|
+
from kodit.infrastructure.sqlalchemy.index_repository import SqlAlchemyIndexRepository
|
|
28
30
|
from kodit.infrastructure.ui.progress import (
|
|
29
31
|
create_lazy_progress_callback,
|
|
30
32
|
create_multi_stage_progress_callback,
|
|
@@ -77,30 +79,28 @@ async def index(
|
|
|
77
79
|
) -> None:
|
|
78
80
|
"""List indexes, or index data sources."""
|
|
79
81
|
log = structlog.get_logger(__name__)
|
|
80
|
-
source_service = SourceService(
|
|
81
|
-
clone_dir=app_context.get_clone_dir(),
|
|
82
|
-
session_factory=lambda: session,
|
|
83
|
-
)
|
|
84
82
|
service = create_code_indexing_application_service(
|
|
85
83
|
app_context=app_context,
|
|
86
84
|
session=session,
|
|
87
|
-
|
|
85
|
+
)
|
|
86
|
+
index_query_service = IndexQueryService(
|
|
87
|
+
index_repository=SqlAlchemyIndexRepository(session=session),
|
|
88
|
+
fusion_service=ReciprocalRankFusionService(),
|
|
88
89
|
)
|
|
89
90
|
|
|
90
91
|
if auto_index:
|
|
91
92
|
log.info("Auto-indexing configuration", config=app_context.auto_indexing)
|
|
92
|
-
|
|
93
|
-
if not auto_sources:
|
|
93
|
+
if not app_context.auto_indexing or not app_context.auto_indexing.sources:
|
|
94
94
|
click.echo("No auto-index sources configured.")
|
|
95
95
|
return
|
|
96
|
-
|
|
96
|
+
auto_sources = app_context.auto_indexing.sources
|
|
97
97
|
click.echo(f"Auto-indexing {len(auto_sources)} configured sources...")
|
|
98
98
|
sources = [source.uri for source in auto_sources]
|
|
99
99
|
|
|
100
100
|
if not sources:
|
|
101
101
|
log_event("kodit.cli.index.list")
|
|
102
102
|
# No source specified, list all indexes
|
|
103
|
-
indexes = await
|
|
103
|
+
indexes = await index_query_service.list_indexes()
|
|
104
104
|
headers: list[str | Cell] = [
|
|
105
105
|
"ID",
|
|
106
106
|
"Created At",
|
|
@@ -113,8 +113,8 @@ async def index(
|
|
|
113
113
|
index.id,
|
|
114
114
|
index.created_at,
|
|
115
115
|
index.updated_at,
|
|
116
|
-
index.source,
|
|
117
|
-
index.
|
|
116
|
+
index.source.working_copy.remote_uri,
|
|
117
|
+
len(index.source.working_copy.files),
|
|
118
118
|
]
|
|
119
119
|
for index in indexes
|
|
120
120
|
]
|
|
@@ -131,14 +131,12 @@ async def index(
|
|
|
131
131
|
|
|
132
132
|
# Create a lazy progress callback that only shows progress when needed
|
|
133
133
|
progress_callback = create_lazy_progress_callback()
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
index = await service.create_index(s.id)
|
|
134
|
+
index = await service.create_index_from_uri(source, progress_callback)
|
|
137
135
|
|
|
138
136
|
# Create a new progress callback for the indexing operations
|
|
139
137
|
indexing_progress_callback = create_multi_stage_progress_callback()
|
|
140
138
|
try:
|
|
141
|
-
await service.run_index(index
|
|
139
|
+
await service.run_index(index, indexing_progress_callback)
|
|
142
140
|
except EmptySourceError as e:
|
|
143
141
|
log.exception("Empty source error", error=e)
|
|
144
142
|
msg = f"""{e}. This could mean:
|
|
@@ -243,14 +241,9 @@ async def code( # noqa: PLR0913
|
|
|
243
241
|
This works best if your query is code.
|
|
244
242
|
"""
|
|
245
243
|
log_event("kodit.cli.search.code")
|
|
246
|
-
source_service = SourceService(
|
|
247
|
-
clone_dir=app_context.get_clone_dir(),
|
|
248
|
-
session_factory=lambda: session,
|
|
249
|
-
)
|
|
250
244
|
service = create_code_indexing_application_service(
|
|
251
245
|
app_context=app_context,
|
|
252
246
|
session=session,
|
|
253
|
-
source_service=source_service,
|
|
254
247
|
)
|
|
255
248
|
|
|
256
249
|
filters = _parse_filters(
|
|
@@ -304,14 +297,9 @@ async def keyword( # noqa: PLR0913
|
|
|
304
297
|
) -> None:
|
|
305
298
|
"""Search for snippets using keyword search."""
|
|
306
299
|
log_event("kodit.cli.search.keyword")
|
|
307
|
-
source_service = SourceService(
|
|
308
|
-
clone_dir=app_context.get_clone_dir(),
|
|
309
|
-
session_factory=lambda: session,
|
|
310
|
-
)
|
|
311
300
|
service = create_code_indexing_application_service(
|
|
312
301
|
app_context=app_context,
|
|
313
302
|
session=session,
|
|
314
|
-
source_service=source_service,
|
|
315
303
|
)
|
|
316
304
|
|
|
317
305
|
filters = _parse_filters(
|
|
@@ -368,14 +356,9 @@ async def text( # noqa: PLR0913
|
|
|
368
356
|
This works best if your query is text.
|
|
369
357
|
"""
|
|
370
358
|
log_event("kodit.cli.search.text")
|
|
371
|
-
source_service = SourceService(
|
|
372
|
-
clone_dir=app_context.get_clone_dir(),
|
|
373
|
-
session_factory=lambda: session,
|
|
374
|
-
)
|
|
375
359
|
service = create_code_indexing_application_service(
|
|
376
360
|
app_context=app_context,
|
|
377
361
|
session=session,
|
|
378
|
-
source_service=source_service,
|
|
379
362
|
)
|
|
380
363
|
|
|
381
364
|
filters = _parse_filters(
|
|
@@ -433,14 +416,9 @@ async def hybrid( # noqa: PLR0913
|
|
|
433
416
|
) -> None:
|
|
434
417
|
"""Search for snippets using hybrid search."""
|
|
435
418
|
log_event("kodit.cli.search.hybrid")
|
|
436
|
-
source_service = SourceService(
|
|
437
|
-
clone_dir=app_context.get_clone_dir(),
|
|
438
|
-
session_factory=lambda: session,
|
|
439
|
-
)
|
|
440
419
|
service = create_code_indexing_application_service(
|
|
441
420
|
app_context=app_context,
|
|
442
421
|
session=session,
|
|
443
|
-
source_service=source_service,
|
|
444
422
|
)
|
|
445
423
|
|
|
446
424
|
# Parse keywords into a list of strings
|
|
@@ -490,14 +468,9 @@ async def snippets(
|
|
|
490
468
|
) -> None:
|
|
491
469
|
"""Show snippets with optional filtering by path or source."""
|
|
492
470
|
log_event("kodit.cli.show.snippets")
|
|
493
|
-
source_service = SourceService(
|
|
494
|
-
clone_dir=app_context.get_clone_dir(),
|
|
495
|
-
session_factory=lambda: session,
|
|
496
|
-
)
|
|
497
471
|
service = create_code_indexing_application_service(
|
|
498
472
|
app_context=app_context,
|
|
499
473
|
session=session,
|
|
500
|
-
source_service=source_service,
|
|
501
474
|
)
|
|
502
475
|
snippets = await service.list_snippets(file_path=by_path, source_uri=by_source)
|
|
503
476
|
if output_format == "text":
|
kodit/domain/entities.py
CHANGED
|
@@ -1,200 +1,271 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
from datetime import UTC, datetime
|
|
4
|
-
from enum import Enum
|
|
5
|
-
|
|
6
|
-
from git import Actor
|
|
7
|
-
from sqlalchemy import (
|
|
8
|
-
DateTime,
|
|
9
|
-
ForeignKey,
|
|
10
|
-
Integer,
|
|
11
|
-
String,
|
|
12
|
-
UnicodeText,
|
|
13
|
-
UniqueConstraint,
|
|
14
|
-
)
|
|
15
|
-
from sqlalchemy import Enum as SQLAlchemyEnum
|
|
16
|
-
from sqlalchemy.ext.asyncio import AsyncAttrs
|
|
17
|
-
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
18
|
-
from sqlalchemy.types import JSON
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class Base(AsyncAttrs, DeclarativeBase):
|
|
22
|
-
"""Base class for all models."""
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class CommonMixin:
|
|
26
|
-
"""Common mixin for all models."""
|
|
27
|
-
|
|
28
|
-
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
|
29
|
-
created_at: Mapped[datetime] = mapped_column(
|
|
30
|
-
DateTime(timezone=True), default=lambda: datetime.now(UTC)
|
|
31
|
-
)
|
|
32
|
-
updated_at: Mapped[datetime] = mapped_column(
|
|
33
|
-
DateTime(timezone=True),
|
|
34
|
-
default=lambda: datetime.now(UTC),
|
|
35
|
-
onupdate=lambda: datetime.now(UTC),
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class SourceType(Enum):
|
|
40
|
-
"""The type of source."""
|
|
41
|
-
|
|
42
|
-
UNKNOWN = 0
|
|
43
|
-
FOLDER = 1
|
|
44
|
-
GIT = 2
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
class Source(Base, CommonMixin):
|
|
48
|
-
"""Base model for tracking code sources.
|
|
49
|
-
|
|
50
|
-
This model serves as the parent table for different types of sources.
|
|
51
|
-
It provides common fields and relationships for all source types.
|
|
52
|
-
|
|
53
|
-
Attributes:
|
|
54
|
-
id: The unique identifier for the source.
|
|
55
|
-
created_at: Timestamp when the source was created.
|
|
56
|
-
updated_at: Timestamp when the source was last updated.
|
|
57
|
-
cloned_uri: A URI to a copy of the source on the local filesystem.
|
|
58
|
-
uri: The URI of the source.
|
|
59
|
-
|
|
60
|
-
"""
|
|
61
|
-
|
|
62
|
-
__tablename__ = "sources"
|
|
63
|
-
uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
|
|
64
|
-
cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
65
|
-
type: Mapped[SourceType] = mapped_column(
|
|
66
|
-
SQLAlchemyEnum(SourceType), default=SourceType.UNKNOWN, index=True
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
|
|
70
|
-
"""Initialize a new Source instance for typing purposes."""
|
|
71
|
-
super().__init__()
|
|
72
|
-
self.uri = uri
|
|
73
|
-
self.cloned_path = cloned_path
|
|
74
|
-
self.type = source_type
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
class Author(Base, CommonMixin):
|
|
78
|
-
"""Author model."""
|
|
79
|
-
|
|
80
|
-
__tablename__ = "authors"
|
|
81
|
-
|
|
82
|
-
__table_args__ = (UniqueConstraint("name", "email", name="uix_author"),)
|
|
83
|
-
|
|
84
|
-
name: Mapped[str] = mapped_column(String(255), index=True)
|
|
85
|
-
email: Mapped[str] = mapped_column(String(255), index=True)
|
|
86
|
-
|
|
87
|
-
@staticmethod
|
|
88
|
-
def from_actor(actor: Actor) -> "Author":
|
|
89
|
-
"""Create an Author from an Actor."""
|
|
90
|
-
return Author(name=actor.name, email=actor.email)
|
|
91
|
-
|
|
1
|
+
"""Pure domain entities using Pydantic."""
|
|
92
2
|
|
|
93
|
-
|
|
94
|
-
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Protocol
|
|
7
|
+
from urllib.parse import urlparse, urlunparse
|
|
95
8
|
|
|
96
|
-
|
|
9
|
+
from pydantic import AnyUrl, BaseModel
|
|
97
10
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
self.uri
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
self.
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
class
|
|
145
|
-
"""
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
11
|
+
from kodit.domain.value_objects import (
|
|
12
|
+
FileProcessingStatus,
|
|
13
|
+
SnippetContent,
|
|
14
|
+
SnippetContentType,
|
|
15
|
+
SourceType,
|
|
16
|
+
)
|
|
17
|
+
from kodit.utils.path_utils import path_from_uri
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class IgnorePatternProvider(Protocol):
|
|
21
|
+
"""Protocol for ignore pattern providers."""
|
|
22
|
+
|
|
23
|
+
def should_ignore(self, path: Path) -> bool:
|
|
24
|
+
"""Check if a path should be ignored."""
|
|
25
|
+
...
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Author(BaseModel):
|
|
29
|
+
"""Author domain entity."""
|
|
30
|
+
|
|
31
|
+
id: int | None = None
|
|
32
|
+
name: str
|
|
33
|
+
email: str
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class File(BaseModel):
|
|
37
|
+
"""File domain entity."""
|
|
38
|
+
|
|
39
|
+
id: int | None = None # Is populated by repository
|
|
40
|
+
created_at: datetime | None = None # Is populated by repository
|
|
41
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
42
|
+
uri: AnyUrl
|
|
43
|
+
sha256: str
|
|
44
|
+
authors: list[Author]
|
|
45
|
+
mime_type: str
|
|
46
|
+
file_processing_status: FileProcessingStatus
|
|
47
|
+
|
|
48
|
+
def as_path(self) -> Path:
|
|
49
|
+
"""Return the file as a path."""
|
|
50
|
+
return path_from_uri(str(self.uri))
|
|
51
|
+
|
|
52
|
+
def extension(self) -> str:
|
|
53
|
+
"""Return the file extension."""
|
|
54
|
+
return Path(self.as_path()).suffix.lstrip(".")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class WorkingCopy(BaseModel):
|
|
58
|
+
"""Working copy value object representing cloned source location."""
|
|
59
|
+
|
|
60
|
+
created_at: datetime | None = None # Is populated by repository
|
|
61
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
62
|
+
remote_uri: AnyUrl
|
|
63
|
+
cloned_path: Path
|
|
64
|
+
source_type: SourceType
|
|
65
|
+
files: list[File]
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def sanitize_local_path(cls, path: str) -> AnyUrl:
|
|
69
|
+
"""Sanitize a local path."""
|
|
70
|
+
return AnyUrl(Path(path).resolve().absolute().as_uri())
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def sanitize_git_url(cls, url: str) -> AnyUrl:
|
|
74
|
+
"""Remove credentials from a git URL while preserving the rest of the URL.
|
|
75
|
+
|
|
76
|
+
This function handles various git URL formats:
|
|
77
|
+
- HTTPS URLs with username:password@host
|
|
78
|
+
- HTTPS URLs with username@host (no password)
|
|
79
|
+
- SSH URLs (left unchanged)
|
|
80
|
+
- File URLs (left unchanged)
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
url: The git URL that may contain credentials.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
The sanitized URL with credentials removed.
|
|
87
|
+
|
|
88
|
+
Examples:
|
|
89
|
+
>>> sanitize_git_url("https://phil:token@dev.azure.com/org/project/_git/repo")
|
|
90
|
+
"https://dev.azure.com/org/project/_git/repo"
|
|
91
|
+
>>> sanitize_git_url("https://username@github.com/user/repo.git")
|
|
92
|
+
"https://github.com/user/repo.git"
|
|
93
|
+
>>> sanitize_git_url("git@github.com:user/repo.git")
|
|
94
|
+
"ssh://git@github.com/user/repo.git"
|
|
95
|
+
|
|
96
|
+
"""
|
|
97
|
+
# Handle SSH URLs (they don't have credentials in the URL format)
|
|
98
|
+
if url.startswith("git@"):
|
|
99
|
+
# Convert git@host:path to ssh://git@host/path format for AnyUrl
|
|
100
|
+
# This maintains the same semantic meaning while making it a valid URL
|
|
101
|
+
if ":" in url and not url.startswith("ssh://"):
|
|
102
|
+
host_path = url[4:] # Remove "git@"
|
|
103
|
+
if ":" in host_path:
|
|
104
|
+
host, path = host_path.split(":", 1)
|
|
105
|
+
ssh_url = f"ssh://git@{host}/{path}"
|
|
106
|
+
return AnyUrl(ssh_url)
|
|
107
|
+
return AnyUrl(url)
|
|
108
|
+
if url.startswith("ssh://"):
|
|
109
|
+
return AnyUrl(url)
|
|
110
|
+
|
|
111
|
+
# Handle file URLs
|
|
112
|
+
if url.startswith("file://"):
|
|
113
|
+
return AnyUrl(url)
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
# Parse the URL
|
|
117
|
+
parsed = urlparse(url)
|
|
118
|
+
|
|
119
|
+
# If there are no credentials, return the URL as-is
|
|
120
|
+
if not parsed.username:
|
|
121
|
+
return AnyUrl(url)
|
|
122
|
+
|
|
123
|
+
# Reconstruct the URL without credentials
|
|
124
|
+
# scheme, netloc (without username/password), path, params, query, fragment
|
|
125
|
+
sanitized_netloc = parsed.hostname
|
|
126
|
+
if parsed.port:
|
|
127
|
+
sanitized_netloc = f"{parsed.hostname}:{parsed.port}"
|
|
128
|
+
|
|
129
|
+
return AnyUrl(
|
|
130
|
+
urlunparse(
|
|
131
|
+
(
|
|
132
|
+
parsed.scheme,
|
|
133
|
+
sanitized_netloc,
|
|
134
|
+
parsed.path,
|
|
135
|
+
parsed.params,
|
|
136
|
+
parsed.query,
|
|
137
|
+
parsed.fragment,
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
except Exception as e:
|
|
143
|
+
raise ValueError(f"Invalid URL: {url}") from e
|
|
144
|
+
|
|
145
|
+
def modified_or_deleted_files(self) -> list[File]:
|
|
146
|
+
"""Return the modified or deleted files."""
|
|
147
|
+
return [
|
|
148
|
+
file
|
|
149
|
+
for file in self.files
|
|
150
|
+
if file.file_processing_status
|
|
151
|
+
in (FileProcessingStatus.MODIFIED, FileProcessingStatus.DELETED)
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
def list_filesystem_paths(
|
|
155
|
+
self, ignore_provider: IgnorePatternProvider
|
|
156
|
+
) -> list[Path]:
|
|
157
|
+
"""List the filesystem paths of the files in the working copy."""
|
|
158
|
+
if not self.cloned_path.exists():
|
|
159
|
+
raise ValueError(f"Cloned path does not exist: {self.cloned_path}")
|
|
160
|
+
|
|
161
|
+
return [
|
|
162
|
+
f
|
|
163
|
+
for f in self.cloned_path.rglob("*")
|
|
164
|
+
if f.is_file() and not ignore_provider.should_ignore(f)
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
def dirty_files(self) -> list[File]:
|
|
168
|
+
"""Return the dirty files."""
|
|
169
|
+
return [
|
|
170
|
+
file
|
|
171
|
+
for file in self.files
|
|
172
|
+
if file.file_processing_status
|
|
173
|
+
in (FileProcessingStatus.MODIFIED, FileProcessingStatus.ADDED)
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
def changed_files(self) -> list[File]:
|
|
177
|
+
"""Return the changed files."""
|
|
178
|
+
return [
|
|
179
|
+
file
|
|
180
|
+
for file in self.files
|
|
181
|
+
if file.file_processing_status != FileProcessingStatus.CLEAN
|
|
182
|
+
]
|
|
183
|
+
|
|
184
|
+
def clear_file_processing_statuses(self) -> None:
|
|
185
|
+
"""Clear the file processing statuses."""
|
|
186
|
+
# First remove any files that are marked for deletion
|
|
187
|
+
self.files = [
|
|
188
|
+
file
|
|
189
|
+
for file in self.files
|
|
190
|
+
if file.file_processing_status != FileProcessingStatus.DELETED
|
|
191
|
+
]
|
|
192
|
+
# Then clear the statuses for the remaining files
|
|
193
|
+
for file in self.files:
|
|
194
|
+
file.file_processing_status = FileProcessingStatus.CLEAN
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class Source(BaseModel):
|
|
198
|
+
"""Source domain entity."""
|
|
199
|
+
|
|
200
|
+
id: int | None = None # Is populated by repository
|
|
201
|
+
created_at: datetime | None = None # Is populated by repository
|
|
202
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
203
|
+
working_copy: WorkingCopy
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class Snippet(BaseModel):
|
|
207
|
+
"""Snippet domain entity."""
|
|
208
|
+
|
|
209
|
+
id: int | None = None # Is populated by repository
|
|
210
|
+
created_at: datetime | None = None # Is populated by repository
|
|
211
|
+
updated_at: datetime | None = None # Is populated by repository
|
|
212
|
+
derives_from: list[File]
|
|
213
|
+
original_content: SnippetContent | None = None
|
|
214
|
+
summary_content: SnippetContent | None = None
|
|
215
|
+
|
|
216
|
+
def original_text(self) -> str:
|
|
217
|
+
"""Return the original content of the snippet."""
|
|
218
|
+
if self.original_content is None:
|
|
219
|
+
return ""
|
|
220
|
+
return self.original_content.value
|
|
221
|
+
|
|
222
|
+
def summary_text(self) -> str:
|
|
223
|
+
"""Return the summary content of the snippet."""
|
|
224
|
+
if self.summary_content is None:
|
|
225
|
+
return ""
|
|
226
|
+
return self.summary_content.value
|
|
227
|
+
|
|
228
|
+
def add_original_content(self, content: str, language: str) -> None:
|
|
229
|
+
"""Add an original content to the snippet."""
|
|
230
|
+
self.original_content = SnippetContent(
|
|
231
|
+
type=SnippetContentType.ORIGINAL,
|
|
232
|
+
value=content,
|
|
233
|
+
language=language,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
def add_summary(self, summary: str) -> None:
|
|
237
|
+
"""Add a summary to the snippet."""
|
|
238
|
+
self.summary_content = SnippetContent(
|
|
239
|
+
type=SnippetContentType.SUMMARY,
|
|
240
|
+
value=summary,
|
|
241
|
+
language="markdown",
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class Index(BaseModel):
|
|
246
|
+
"""Index domain entity."""
|
|
247
|
+
|
|
248
|
+
id: int
|
|
249
|
+
created_at: datetime
|
|
250
|
+
updated_at: datetime
|
|
251
|
+
source: Source
|
|
252
|
+
snippets: list[Snippet]
|
|
253
|
+
|
|
254
|
+
def delete_snippets_for_files(self, files: list[File]) -> None:
|
|
255
|
+
"""Delete the snippets that derive from a list of files."""
|
|
256
|
+
self.snippets = [
|
|
257
|
+
snippet
|
|
258
|
+
for snippet in self.snippets
|
|
259
|
+
if not any(file in snippet.derives_from for file in files)
|
|
260
|
+
]
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
# FUTURE: Remove this type, use the domain to get the required information.
|
|
264
|
+
@dataclass(frozen=True)
|
|
265
|
+
class SnippetWithContext:
|
|
266
|
+
"""Domain model for snippet with associated context information."""
|
|
267
|
+
|
|
268
|
+
source: Source
|
|
269
|
+
file: File
|
|
270
|
+
authors: list[Author]
|
|
271
|
+
snippet: Snippet
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Repository protocol interfaces for the domain layer."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
|
|
6
|
+
from pydantic import AnyUrl
|
|
7
|
+
|
|
8
|
+
from kodit.domain.entities import Index, Snippet, SnippetWithContext, WorkingCopy
|
|
9
|
+
from kodit.domain.value_objects import MultiSearchRequest
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class IndexRepository(Protocol):
|
|
13
|
+
"""Repository interface for Index entities."""
|
|
14
|
+
|
|
15
|
+
async def create(self, uri: AnyUrl, working_copy: WorkingCopy) -> Index:
|
|
16
|
+
"""Create an index for a source."""
|
|
17
|
+
...
|
|
18
|
+
|
|
19
|
+
async def update(self, index: Index) -> None:
|
|
20
|
+
"""Update an index."""
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
async def get(self, index_id: int) -> Index | None:
|
|
24
|
+
"""Get an index by ID."""
|
|
25
|
+
...
|
|
26
|
+
|
|
27
|
+
async def all(self) -> list[Index]:
|
|
28
|
+
"""List all indexes."""
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
async def get_by_uri(self, uri: AnyUrl) -> Index | None:
|
|
32
|
+
"""Get an index by source URI."""
|
|
33
|
+
...
|
|
34
|
+
|
|
35
|
+
async def update_index_timestamp(self, index_id: int) -> None:
|
|
36
|
+
"""Update the timestamp of an index."""
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
async def add_snippets(self, index_id: int, snippets: list[Snippet]) -> None:
|
|
40
|
+
"""Add snippets to an index."""
|
|
41
|
+
...
|
|
42
|
+
|
|
43
|
+
async def update_snippets(self, index_id: int, snippets: list[Snippet]) -> None:
|
|
44
|
+
"""Update snippets for an index."""
|
|
45
|
+
...
|
|
46
|
+
|
|
47
|
+
async def delete_snippets(self, index_id: int) -> None:
|
|
48
|
+
"""Delete all snippets from an index."""
|
|
49
|
+
...
|
|
50
|
+
|
|
51
|
+
async def delete_snippets_by_file_ids(self, file_ids: list[int]) -> None:
|
|
52
|
+
"""Delete snippets by file IDs."""
|
|
53
|
+
...
|
|
54
|
+
|
|
55
|
+
async def search(self, request: MultiSearchRequest) -> Sequence[SnippetWithContext]:
|
|
56
|
+
"""Search snippets with filters."""
|
|
57
|
+
...
|
|
58
|
+
|
|
59
|
+
async def get_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
|
|
60
|
+
"""Get snippets by their IDs."""
|
|
61
|
+
...
|