kodit 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/__init__.py +1 -0
- kodit/application/factories/code_indexing_factory.py +119 -0
- kodit/application/services/{indexing_application_service.py → code_indexing_application_service.py} +159 -198
- kodit/cli.py +199 -62
- kodit/domain/entities.py +7 -5
- kodit/domain/repositories.py +33 -0
- kodit/domain/services/bm25_service.py +14 -17
- kodit/domain/services/embedding_service.py +10 -14
- kodit/domain/services/snippet_service.py +198 -0
- kodit/domain/value_objects.py +301 -21
- kodit/infrastructure/bm25/local_bm25_repository.py +20 -12
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +31 -11
- kodit/infrastructure/cloning/git/working_copy.py +5 -2
- kodit/infrastructure/cloning/metadata.py +1 -0
- kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +14 -25
- kodit/infrastructure/embedding/local_vector_search_repository.py +26 -38
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +50 -35
- kodit/infrastructure/enrichment/enrichment_factory.py +1 -1
- kodit/infrastructure/indexing/indexing_factory.py +8 -91
- kodit/infrastructure/indexing/snippet_domain_service_factory.py +37 -0
- kodit/infrastructure/snippet_extraction/languages/java.scm +12 -0
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +3 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +14 -3
- kodit/infrastructure/sqlalchemy/snippet_repository.py +174 -2
- kodit/mcp.py +61 -49
- {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/METADATA +1 -1
- {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/RECORD +31 -30
- kodit/application/commands/__init__.py +0 -1
- kodit/application/commands/snippet_commands.py +0 -22
- kodit/application/services/snippet_application_service.py +0 -149
- kodit/infrastructure/enrichment/legacy_enrichment_models.py +0 -42
- {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/WHEEL +0 -0
- {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/entry_points.txt +0 -0
- {kodit-0.2.7.dist-info → kodit-0.2.9.dist-info}/licenses/LICENSE +0 -0
kodit/cli.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Command line interface for kodit."""
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
import signal
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
from typing import Any
|
|
@@ -11,8 +10,8 @@ import uvicorn
|
|
|
11
10
|
from pytable_formatter import Cell, Table
|
|
12
11
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
12
|
|
|
14
|
-
from kodit.application.
|
|
15
|
-
|
|
13
|
+
from kodit.application.factories.code_indexing_factory import (
|
|
14
|
+
create_code_indexing_application_service,
|
|
16
15
|
)
|
|
17
16
|
from kodit.config import (
|
|
18
17
|
AppContext,
|
|
@@ -21,14 +20,7 @@ from kodit.config import (
|
|
|
21
20
|
)
|
|
22
21
|
from kodit.domain.errors import EmptySourceError
|
|
23
22
|
from kodit.domain.services.source_service import SourceService
|
|
24
|
-
from kodit.domain.value_objects import MultiSearchRequest
|
|
25
|
-
from kodit.infrastructure.indexing.indexing_factory import (
|
|
26
|
-
create_indexing_application_service,
|
|
27
|
-
)
|
|
28
|
-
from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
|
|
29
|
-
create_snippet_extraction_domain_service,
|
|
30
|
-
create_snippet_repositories,
|
|
31
|
-
)
|
|
23
|
+
from kodit.domain.value_objects import MultiSearchRequest, SnippetSearchFilters
|
|
32
24
|
from kodit.infrastructure.ui.progress import (
|
|
33
25
|
create_lazy_progress_callback,
|
|
34
26
|
create_multi_stage_progress_callback,
|
|
@@ -36,33 +28,6 @@ from kodit.infrastructure.ui.progress import (
|
|
|
36
28
|
from kodit.log import configure_logging, configure_telemetry, log_event
|
|
37
29
|
|
|
38
30
|
|
|
39
|
-
def create_snippet_application_service(
|
|
40
|
-
session: AsyncSession,
|
|
41
|
-
) -> SnippetApplicationService:
|
|
42
|
-
"""Create a snippet application service with all dependencies.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
session: SQLAlchemy session
|
|
46
|
-
|
|
47
|
-
Returns:
|
|
48
|
-
Configured snippet application service
|
|
49
|
-
|
|
50
|
-
"""
|
|
51
|
-
# Create domain service
|
|
52
|
-
snippet_extraction_service = create_snippet_extraction_domain_service()
|
|
53
|
-
|
|
54
|
-
# Create repositories
|
|
55
|
-
snippet_repository, file_repository = create_snippet_repositories(session)
|
|
56
|
-
|
|
57
|
-
# Create application service
|
|
58
|
-
return SnippetApplicationService(
|
|
59
|
-
snippet_extraction_service=snippet_extraction_service,
|
|
60
|
-
snippet_repository=snippet_repository,
|
|
61
|
-
file_repository=file_repository,
|
|
62
|
-
session=session,
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
|
|
66
31
|
@click.group(context_settings={"max_content_width": 100})
|
|
67
32
|
@click.option(
|
|
68
33
|
"--env-file",
|
|
@@ -107,12 +72,10 @@ async def index(
|
|
|
107
72
|
clone_dir=app_context.get_clone_dir(),
|
|
108
73
|
session_factory=lambda: session,
|
|
109
74
|
)
|
|
110
|
-
|
|
111
|
-
service = create_indexing_application_service(
|
|
75
|
+
service = create_code_indexing_application_service(
|
|
112
76
|
app_context=app_context,
|
|
113
77
|
session=session,
|
|
114
78
|
source_service=source_service,
|
|
115
|
-
snippet_application_service=snippet_service,
|
|
116
79
|
)
|
|
117
80
|
|
|
118
81
|
if not sources:
|
|
@@ -173,16 +136,86 @@ def search() -> None:
|
|
|
173
136
|
"""Search for snippets in the database."""
|
|
174
137
|
|
|
175
138
|
|
|
139
|
+
# Utility for robust filter parsing
|
|
140
|
+
def _parse_filters(
|
|
141
|
+
language: str | None,
|
|
142
|
+
author: str | None,
|
|
143
|
+
created_after: str | None,
|
|
144
|
+
created_before: str | None,
|
|
145
|
+
source_repo: str | None,
|
|
146
|
+
) -> SnippetSearchFilters | None:
|
|
147
|
+
from datetime import datetime
|
|
148
|
+
|
|
149
|
+
# Normalize language to lowercase if provided
|
|
150
|
+
norm_language = language.lower() if language else None
|
|
151
|
+
# Try to parse dates, raise error if invalid
|
|
152
|
+
parsed_created_after = None
|
|
153
|
+
if created_after:
|
|
154
|
+
try:
|
|
155
|
+
parsed_created_after = datetime.fromisoformat(created_after)
|
|
156
|
+
except ValueError as err:
|
|
157
|
+
raise ValueError(
|
|
158
|
+
f"Invalid date format for --created-after: {created_after}. "
|
|
159
|
+
"Expected ISO 8601 format (YYYY-MM-DD)"
|
|
160
|
+
) from err
|
|
161
|
+
parsed_created_before = None
|
|
162
|
+
if created_before:
|
|
163
|
+
try:
|
|
164
|
+
parsed_created_before = datetime.fromisoformat(created_before)
|
|
165
|
+
except ValueError as err:
|
|
166
|
+
raise ValueError(
|
|
167
|
+
f"Invalid date format for --created-before: {created_before}. "
|
|
168
|
+
"Expected ISO 8601 format (YYYY-MM-DD)"
|
|
169
|
+
) from err
|
|
170
|
+
# Return None if no filters provided, otherwise return SnippetSearchFilters
|
|
171
|
+
# Check if any original parameters were provided (not just the parsed values)
|
|
172
|
+
if any(
|
|
173
|
+
[
|
|
174
|
+
language,
|
|
175
|
+
author,
|
|
176
|
+
created_after,
|
|
177
|
+
created_before,
|
|
178
|
+
source_repo,
|
|
179
|
+
]
|
|
180
|
+
):
|
|
181
|
+
return SnippetSearchFilters(
|
|
182
|
+
language=norm_language,
|
|
183
|
+
author=author,
|
|
184
|
+
created_after=parsed_created_after,
|
|
185
|
+
created_before=parsed_created_before,
|
|
186
|
+
source_repo=source_repo,
|
|
187
|
+
)
|
|
188
|
+
return None
|
|
189
|
+
|
|
190
|
+
|
|
176
191
|
@search.command()
|
|
177
192
|
@click.argument("query")
|
|
178
193
|
@click.option("--top-k", default=10, help="Number of snippets to retrieve")
|
|
194
|
+
@click.option(
|
|
195
|
+
"--language", help="Filter by programming language (e.g., python, go, javascript)"
|
|
196
|
+
)
|
|
197
|
+
@click.option("--author", help="Filter by author name")
|
|
198
|
+
@click.option(
|
|
199
|
+
"--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
|
|
200
|
+
)
|
|
201
|
+
@click.option(
|
|
202
|
+
"--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
|
|
203
|
+
)
|
|
204
|
+
@click.option(
|
|
205
|
+
"--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
|
|
206
|
+
)
|
|
179
207
|
@with_app_context
|
|
180
208
|
@with_session
|
|
181
|
-
async def code(
|
|
209
|
+
async def code( # noqa: PLR0913
|
|
182
210
|
session: AsyncSession,
|
|
183
211
|
app_context: AppContext,
|
|
184
212
|
query: str,
|
|
185
213
|
top_k: int,
|
|
214
|
+
language: str | None,
|
|
215
|
+
author: str | None,
|
|
216
|
+
created_after: str | None,
|
|
217
|
+
created_before: str | None,
|
|
218
|
+
source_repo: str | None,
|
|
186
219
|
) -> None:
|
|
187
220
|
"""Search for snippets using semantic code search.
|
|
188
221
|
|
|
@@ -193,15 +226,19 @@ async def code(
|
|
|
193
226
|
clone_dir=app_context.get_clone_dir(),
|
|
194
227
|
session_factory=lambda: session,
|
|
195
228
|
)
|
|
196
|
-
|
|
197
|
-
service = create_indexing_application_service(
|
|
229
|
+
service = create_code_indexing_application_service(
|
|
198
230
|
app_context=app_context,
|
|
199
231
|
session=session,
|
|
200
232
|
source_service=source_service,
|
|
201
|
-
snippet_application_service=snippet_service,
|
|
202
233
|
)
|
|
203
234
|
|
|
204
|
-
|
|
235
|
+
filters = _parse_filters(
|
|
236
|
+
language, author, created_after, created_before, source_repo
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
snippets = await service.search(
|
|
240
|
+
MultiSearchRequest(code_query=query, top_k=top_k, filters=filters)
|
|
241
|
+
)
|
|
205
242
|
|
|
206
243
|
if len(snippets) == 0:
|
|
207
244
|
click.echo("No snippets found")
|
|
@@ -219,13 +256,31 @@ async def code(
|
|
|
219
256
|
@search.command()
|
|
220
257
|
@click.argument("keywords", nargs=-1)
|
|
221
258
|
@click.option("--top-k", default=10, help="Number of snippets to retrieve")
|
|
259
|
+
@click.option(
|
|
260
|
+
"--language", help="Filter by programming language (e.g., python, go, javascript)"
|
|
261
|
+
)
|
|
262
|
+
@click.option("--author", help="Filter by author name")
|
|
263
|
+
@click.option(
|
|
264
|
+
"--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
|
|
265
|
+
)
|
|
266
|
+
@click.option(
|
|
267
|
+
"--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
|
|
268
|
+
)
|
|
269
|
+
@click.option(
|
|
270
|
+
"--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
|
|
271
|
+
)
|
|
222
272
|
@with_app_context
|
|
223
273
|
@with_session
|
|
224
|
-
async def keyword(
|
|
274
|
+
async def keyword( # noqa: PLR0913
|
|
225
275
|
session: AsyncSession,
|
|
226
276
|
app_context: AppContext,
|
|
227
277
|
keywords: list[str],
|
|
228
278
|
top_k: int,
|
|
279
|
+
language: str | None,
|
|
280
|
+
author: str | None,
|
|
281
|
+
created_after: str | None,
|
|
282
|
+
created_before: str | None,
|
|
283
|
+
source_repo: str | None,
|
|
229
284
|
) -> None:
|
|
230
285
|
"""Search for snippets using keyword search."""
|
|
231
286
|
log_event("kodit.cli.search.keyword")
|
|
@@ -233,15 +288,19 @@ async def keyword(
|
|
|
233
288
|
clone_dir=app_context.get_clone_dir(),
|
|
234
289
|
session_factory=lambda: session,
|
|
235
290
|
)
|
|
236
|
-
|
|
237
|
-
service = create_indexing_application_service(
|
|
291
|
+
service = create_code_indexing_application_service(
|
|
238
292
|
app_context=app_context,
|
|
239
293
|
session=session,
|
|
240
294
|
source_service=source_service,
|
|
241
|
-
snippet_application_service=snippet_service,
|
|
242
295
|
)
|
|
243
296
|
|
|
244
|
-
|
|
297
|
+
filters = _parse_filters(
|
|
298
|
+
language, author, created_after, created_before, source_repo
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
snippets = await service.search(
|
|
302
|
+
MultiSearchRequest(keywords=keywords, top_k=top_k, filters=filters)
|
|
303
|
+
)
|
|
245
304
|
|
|
246
305
|
if len(snippets) == 0:
|
|
247
306
|
click.echo("No snippets found")
|
|
@@ -259,13 +318,31 @@ async def keyword(
|
|
|
259
318
|
@search.command()
|
|
260
319
|
@click.argument("query")
|
|
261
320
|
@click.option("--top-k", default=10, help="Number of snippets to retrieve")
|
|
321
|
+
@click.option(
|
|
322
|
+
"--language", help="Filter by programming language (e.g., python, go, javascript)"
|
|
323
|
+
)
|
|
324
|
+
@click.option("--author", help="Filter by author name")
|
|
325
|
+
@click.option(
|
|
326
|
+
"--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
|
|
327
|
+
)
|
|
328
|
+
@click.option(
|
|
329
|
+
"--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
|
|
330
|
+
)
|
|
331
|
+
@click.option(
|
|
332
|
+
"--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
|
|
333
|
+
)
|
|
262
334
|
@with_app_context
|
|
263
335
|
@with_session
|
|
264
|
-
async def text(
|
|
336
|
+
async def text( # noqa: PLR0913
|
|
265
337
|
session: AsyncSession,
|
|
266
338
|
app_context: AppContext,
|
|
267
339
|
query: str,
|
|
268
340
|
top_k: int,
|
|
341
|
+
language: str | None,
|
|
342
|
+
author: str | None,
|
|
343
|
+
created_after: str | None,
|
|
344
|
+
created_before: str | None,
|
|
345
|
+
source_repo: str | None,
|
|
269
346
|
) -> None:
|
|
270
347
|
"""Search for snippets using semantic text search.
|
|
271
348
|
|
|
@@ -276,15 +353,19 @@ async def text(
|
|
|
276
353
|
clone_dir=app_context.get_clone_dir(),
|
|
277
354
|
session_factory=lambda: session,
|
|
278
355
|
)
|
|
279
|
-
|
|
280
|
-
service = create_indexing_application_service(
|
|
356
|
+
service = create_code_indexing_application_service(
|
|
281
357
|
app_context=app_context,
|
|
282
358
|
session=session,
|
|
283
359
|
source_service=source_service,
|
|
284
|
-
snippet_application_service=snippet_service,
|
|
285
360
|
)
|
|
286
361
|
|
|
287
|
-
|
|
362
|
+
filters = _parse_filters(
|
|
363
|
+
language, author, created_after, created_before, source_repo
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
snippets = await service.search(
|
|
367
|
+
MultiSearchRequest(text_query=query, top_k=top_k, filters=filters)
|
|
368
|
+
)
|
|
288
369
|
|
|
289
370
|
if len(snippets) == 0:
|
|
290
371
|
click.echo("No snippets found")
|
|
@@ -304,6 +385,19 @@ async def text(
|
|
|
304
385
|
@click.option("--keywords", required=True, help="Comma separated list of keywords")
|
|
305
386
|
@click.option("--code", required=True, help="Semantic code search query")
|
|
306
387
|
@click.option("--text", required=True, help="Semantic text search query")
|
|
388
|
+
@click.option(
|
|
389
|
+
"--language", help="Filter by programming language (e.g., python, go, javascript)"
|
|
390
|
+
)
|
|
391
|
+
@click.option("--author", help="Filter by author name")
|
|
392
|
+
@click.option(
|
|
393
|
+
"--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
|
|
394
|
+
)
|
|
395
|
+
@click.option(
|
|
396
|
+
"--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
|
|
397
|
+
)
|
|
398
|
+
@click.option(
|
|
399
|
+
"--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
|
|
400
|
+
)
|
|
307
401
|
@with_app_context
|
|
308
402
|
@with_session
|
|
309
403
|
async def hybrid( # noqa: PLR0913
|
|
@@ -313,6 +407,11 @@ async def hybrid( # noqa: PLR0913
|
|
|
313
407
|
keywords: str,
|
|
314
408
|
code: str,
|
|
315
409
|
text: str,
|
|
410
|
+
language: str | None,
|
|
411
|
+
author: str | None,
|
|
412
|
+
created_after: str | None,
|
|
413
|
+
created_before: str | None,
|
|
414
|
+
source_repo: str | None,
|
|
316
415
|
) -> None:
|
|
317
416
|
"""Search for snippets using hybrid search."""
|
|
318
417
|
log_event("kodit.cli.search.hybrid")
|
|
@@ -320,23 +419,26 @@ async def hybrid( # noqa: PLR0913
|
|
|
320
419
|
clone_dir=app_context.get_clone_dir(),
|
|
321
420
|
session_factory=lambda: session,
|
|
322
421
|
)
|
|
323
|
-
|
|
324
|
-
service = create_indexing_application_service(
|
|
422
|
+
service = create_code_indexing_application_service(
|
|
325
423
|
app_context=app_context,
|
|
326
424
|
session=session,
|
|
327
425
|
source_service=source_service,
|
|
328
|
-
snippet_application_service=snippet_service,
|
|
329
426
|
)
|
|
330
427
|
|
|
331
428
|
# Parse keywords into a list of strings
|
|
332
429
|
keywords_list = [k.strip().lower() for k in keywords.split(",")]
|
|
333
430
|
|
|
431
|
+
filters = _parse_filters(
|
|
432
|
+
language, author, created_after, created_before, source_repo
|
|
433
|
+
)
|
|
434
|
+
|
|
334
435
|
snippets = await service.search(
|
|
335
436
|
MultiSearchRequest(
|
|
336
437
|
keywords=keywords_list,
|
|
337
438
|
code_query=code,
|
|
338
439
|
text_query=text,
|
|
339
440
|
top_k=top_k,
|
|
441
|
+
filters=filters,
|
|
340
442
|
)
|
|
341
443
|
)
|
|
342
444
|
|
|
@@ -353,6 +455,40 @@ async def hybrid( # noqa: PLR0913
|
|
|
353
455
|
click.echo()
|
|
354
456
|
|
|
355
457
|
|
|
458
|
+
@cli.group()
|
|
459
|
+
def show() -> None:
|
|
460
|
+
"""Show information about elements in the database."""
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
@show.command()
|
|
464
|
+
@click.option("--by-path", help="File or directory path to search for snippets")
|
|
465
|
+
@click.option("--by-source", help="Source URI to filter snippets by")
|
|
466
|
+
@with_app_context
|
|
467
|
+
@with_session
|
|
468
|
+
async def snippets(
|
|
469
|
+
session: AsyncSession,
|
|
470
|
+
app_context: AppContext,
|
|
471
|
+
by_path: str | None,
|
|
472
|
+
by_source: str | None,
|
|
473
|
+
) -> None:
|
|
474
|
+
"""Show snippets with optional filtering by path or source."""
|
|
475
|
+
log_event("kodit.cli.show.snippets")
|
|
476
|
+
source_service = SourceService(
|
|
477
|
+
clone_dir=app_context.get_clone_dir(),
|
|
478
|
+
session_factory=lambda: session,
|
|
479
|
+
)
|
|
480
|
+
service = create_code_indexing_application_service(
|
|
481
|
+
app_context=app_context,
|
|
482
|
+
session=session,
|
|
483
|
+
source_service=source_service,
|
|
484
|
+
)
|
|
485
|
+
snippets = await service.list_snippets(file_path=by_path, source_uri=by_source)
|
|
486
|
+
for snippet in snippets:
|
|
487
|
+
click.echo(f"{snippet.id}: [{snippet.source_uri}] {snippet.file_path}")
|
|
488
|
+
click.echo(f" {snippet.content}")
|
|
489
|
+
click.echo()
|
|
490
|
+
|
|
491
|
+
|
|
356
492
|
@cli.command()
|
|
357
493
|
@click.option("--host", default="127.0.0.1", help="Host to bind the server to")
|
|
358
494
|
@click.option("--port", default=8080, help="Port to bind the server to")
|
|
@@ -393,9 +529,10 @@ def version() -> None:
|
|
|
393
529
|
from kodit import _version
|
|
394
530
|
except ImportError:
|
|
395
531
|
print("unknown, try running `uv build`, which is what happens in ci") # noqa: T201
|
|
396
|
-
|
|
397
|
-
|
|
532
|
+
return
|
|
533
|
+
|
|
534
|
+
print(f"kodit {_version.__version__}") # noqa: T201
|
|
398
535
|
|
|
399
536
|
|
|
400
537
|
if __name__ == "__main__":
|
|
401
|
-
|
|
538
|
+
cli()
|
kodit/domain/entities.py
CHANGED
|
@@ -121,22 +121,24 @@ class File(Base, CommonMixin):
|
|
|
121
121
|
created_at: datetime,
|
|
122
122
|
updated_at: datetime,
|
|
123
123
|
source_id: int,
|
|
124
|
+
mime_type: str,
|
|
125
|
+
uri: str,
|
|
124
126
|
cloned_path: str,
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
size_bytes: int = 0,
|
|
127
|
+
sha256: str,
|
|
128
|
+
size_bytes: int,
|
|
129
|
+
extension: str,
|
|
129
130
|
) -> None:
|
|
130
131
|
"""Initialize a new File instance for typing purposes."""
|
|
131
132
|
super().__init__()
|
|
132
133
|
self.created_at = created_at
|
|
133
134
|
self.updated_at = updated_at
|
|
134
135
|
self.source_id = source_id
|
|
135
|
-
self.cloned_path = cloned_path
|
|
136
136
|
self.mime_type = mime_type
|
|
137
137
|
self.uri = uri
|
|
138
|
+
self.cloned_path = cloned_path
|
|
138
139
|
self.sha256 = sha256
|
|
139
140
|
self.size_bytes = size_bytes
|
|
141
|
+
self.extension = extension
|
|
140
142
|
|
|
141
143
|
|
|
142
144
|
class EmbeddingType(Enum):
|
kodit/domain/repositories.py
CHANGED
|
@@ -11,6 +11,10 @@ from kodit.domain.entities import (
|
|
|
11
11
|
Source,
|
|
12
12
|
SourceType,
|
|
13
13
|
)
|
|
14
|
+
from kodit.domain.value_objects import (
|
|
15
|
+
MultiSearchRequest,
|
|
16
|
+
SnippetListItem,
|
|
17
|
+
)
|
|
14
18
|
|
|
15
19
|
T = TypeVar("T")
|
|
16
20
|
|
|
@@ -86,6 +90,35 @@ class SnippetRepository(GenericRepository[Snippet]):
|
|
|
86
90
|
"""Delete all snippets for an index."""
|
|
87
91
|
raise NotImplementedError
|
|
88
92
|
|
|
93
|
+
async def list_snippets(
|
|
94
|
+
self, file_path: str | None = None, source_uri: str | None = None
|
|
95
|
+
) -> Sequence[SnippetListItem]:
|
|
96
|
+
"""List snippets with optional filtering by file path and source URI.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
file_path: Optional file or directory path to filter by. Can be relative
|
|
100
|
+
(uri) or absolute (cloned_path).
|
|
101
|
+
source_uri: Optional source URI to filter by. If None, returns snippets from
|
|
102
|
+
all sources.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
A sequence of SnippetListItem instances matching the criteria
|
|
106
|
+
|
|
107
|
+
"""
|
|
108
|
+
raise NotImplementedError
|
|
109
|
+
|
|
110
|
+
async def search(self, request: MultiSearchRequest) -> Sequence[SnippetListItem]:
|
|
111
|
+
"""Search snippets with filters.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
request: The search request containing queries and optional filters.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
A sequence of SnippetListItem instances matching the search criteria.
|
|
118
|
+
|
|
119
|
+
"""
|
|
120
|
+
raise NotImplementedError
|
|
121
|
+
|
|
89
122
|
|
|
90
123
|
class FileRepository(GenericRepository[File]):
|
|
91
124
|
"""File repository with specific methods."""
|
|
@@ -4,10 +4,10 @@ from abc import ABC, abstractmethod
|
|
|
4
4
|
from collections.abc import Sequence
|
|
5
5
|
|
|
6
6
|
from kodit.domain.value_objects import (
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
DeleteRequest,
|
|
8
|
+
IndexRequest,
|
|
9
|
+
SearchRequest,
|
|
10
|
+
SearchResult,
|
|
11
11
|
)
|
|
12
12
|
|
|
13
13
|
|
|
@@ -15,15 +15,15 @@ class BM25Repository(ABC):
|
|
|
15
15
|
"""Abstract interface for BM25 repository."""
|
|
16
16
|
|
|
17
17
|
@abstractmethod
|
|
18
|
-
async def index_documents(self, request:
|
|
18
|
+
async def index_documents(self, request: IndexRequest) -> None:
|
|
19
19
|
"""Index documents for BM25 search."""
|
|
20
20
|
|
|
21
21
|
@abstractmethod
|
|
22
|
-
async def search(self, request:
|
|
22
|
+
async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
|
|
23
23
|
"""Search documents using BM25."""
|
|
24
24
|
|
|
25
25
|
@abstractmethod
|
|
26
|
-
async def delete_documents(self, request:
|
|
26
|
+
async def delete_documents(self, request: DeleteRequest) -> None:
|
|
27
27
|
"""Delete documents from the BM25 index."""
|
|
28
28
|
|
|
29
29
|
|
|
@@ -39,7 +39,7 @@ class BM25DomainService:
|
|
|
39
39
|
"""
|
|
40
40
|
self.repository = repository
|
|
41
41
|
|
|
42
|
-
async def index_documents(self, request:
|
|
42
|
+
async def index_documents(self, request: IndexRequest) -> None:
|
|
43
43
|
"""Index documents using domain business rules.
|
|
44
44
|
|
|
45
45
|
Args:
|
|
@@ -64,10 +64,10 @@ class BM25DomainService:
|
|
|
64
64
|
raise ValueError("No valid documents to index")
|
|
65
65
|
|
|
66
66
|
# Domain logic: create new request with validated documents
|
|
67
|
-
validated_request =
|
|
67
|
+
validated_request = IndexRequest(documents=valid_documents)
|
|
68
68
|
await self.repository.index_documents(validated_request)
|
|
69
69
|
|
|
70
|
-
async def search(self, request:
|
|
70
|
+
async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
|
|
71
71
|
"""Search documents using domain business rules.
|
|
72
72
|
|
|
73
73
|
Args:
|
|
@@ -88,14 +88,11 @@ class BM25DomainService:
|
|
|
88
88
|
raise ValueError("Top-k must be positive")
|
|
89
89
|
|
|
90
90
|
# Domain logic: normalize query
|
|
91
|
-
|
|
92
|
-
normalized_request = BM25SearchRequest(
|
|
93
|
-
query=normalized_query, top_k=request.top_k
|
|
94
|
-
)
|
|
91
|
+
request.query = request.query.strip()
|
|
95
92
|
|
|
96
|
-
return await self.repository.search(
|
|
93
|
+
return await self.repository.search(request)
|
|
97
94
|
|
|
98
|
-
async def delete_documents(self, request:
|
|
95
|
+
async def delete_documents(self, request: DeleteRequest) -> None:
|
|
99
96
|
"""Delete documents using domain business rules.
|
|
100
97
|
|
|
101
98
|
Args:
|
|
@@ -120,5 +117,5 @@ class BM25DomainService:
|
|
|
120
117
|
raise ValueError("No valid snippet IDs to delete")
|
|
121
118
|
|
|
122
119
|
# Domain logic: create new request with validated IDs
|
|
123
|
-
validated_request =
|
|
120
|
+
validated_request = DeleteRequest(snippet_ids=valid_ids)
|
|
124
121
|
await self.repository.delete_documents(validated_request)
|
|
@@ -7,10 +7,10 @@ from kodit.domain.entities import EmbeddingType
|
|
|
7
7
|
from kodit.domain.value_objects import (
|
|
8
8
|
EmbeddingRequest,
|
|
9
9
|
EmbeddingResponse,
|
|
10
|
+
IndexRequest,
|
|
10
11
|
IndexResult,
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
VectorSearchResult,
|
|
12
|
+
SearchRequest,
|
|
13
|
+
SearchResult,
|
|
14
14
|
)
|
|
15
15
|
|
|
16
16
|
|
|
@@ -29,14 +29,12 @@ class VectorSearchRepository(ABC):
|
|
|
29
29
|
|
|
30
30
|
@abstractmethod
|
|
31
31
|
def index_documents(
|
|
32
|
-
self, request:
|
|
32
|
+
self, request: IndexRequest
|
|
33
33
|
) -> AsyncGenerator[list[IndexResult], None]:
|
|
34
34
|
"""Index documents for vector search."""
|
|
35
35
|
|
|
36
36
|
@abstractmethod
|
|
37
|
-
async def search(
|
|
38
|
-
self, request: VectorSearchQueryRequest
|
|
39
|
-
) -> Sequence[VectorSearchResult]:
|
|
37
|
+
async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
|
|
40
38
|
"""Search documents using vector similarity."""
|
|
41
39
|
|
|
42
40
|
@abstractmethod
|
|
@@ -65,7 +63,7 @@ class EmbeddingDomainService:
|
|
|
65
63
|
self.vector_search_repository = vector_search_repository
|
|
66
64
|
|
|
67
65
|
async def index_documents(
|
|
68
|
-
self, request:
|
|
66
|
+
self, request: IndexRequest
|
|
69
67
|
) -> AsyncGenerator[list[IndexResult], None]:
|
|
70
68
|
"""Index documents using domain business rules.
|
|
71
69
|
|
|
@@ -94,15 +92,13 @@ class EmbeddingDomainService:
|
|
|
94
92
|
return
|
|
95
93
|
|
|
96
94
|
# Domain logic: create new request with validated documents
|
|
97
|
-
validated_request =
|
|
95
|
+
validated_request = IndexRequest(documents=valid_documents)
|
|
98
96
|
async for result in self.vector_search_repository.index_documents(
|
|
99
97
|
validated_request
|
|
100
98
|
):
|
|
101
99
|
yield result
|
|
102
100
|
|
|
103
|
-
async def search(
|
|
104
|
-
self, request: VectorSearchQueryRequest
|
|
105
|
-
) -> Sequence[VectorSearchResult]:
|
|
101
|
+
async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
|
|
106
102
|
"""Search documents using domain business rules.
|
|
107
103
|
|
|
108
104
|
Args:
|
|
@@ -124,8 +120,8 @@ class EmbeddingDomainService:
|
|
|
124
120
|
|
|
125
121
|
# Domain logic: normalize query
|
|
126
122
|
normalized_query = request.query.strip()
|
|
127
|
-
normalized_request =
|
|
128
|
-
query=normalized_query, top_k=request.top_k
|
|
123
|
+
normalized_request = SearchRequest(
|
|
124
|
+
query=normalized_query, top_k=request.top_k, snippet_ids=request.snippet_ids
|
|
129
125
|
)
|
|
130
126
|
|
|
131
127
|
return await self.vector_search_repository.search(normalized_request)
|