kodit 0.2.8__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +36 -1
- kodit/application/factories/__init__.py +1 -0
- kodit/application/factories/code_indexing_factory.py +119 -0
- kodit/application/services/{indexing_application_service.py → code_indexing_application_service.py} +159 -198
- kodit/cli.py +214 -62
- kodit/config.py +40 -3
- kodit/domain/entities.py +7 -5
- kodit/domain/repositories.py +33 -0
- kodit/domain/services/bm25_service.py +14 -17
- kodit/domain/services/embedding_service.py +10 -14
- kodit/domain/services/snippet_service.py +198 -0
- kodit/domain/value_objects.py +301 -21
- kodit/infrastructure/bm25/local_bm25_repository.py +20 -12
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +31 -11
- kodit/infrastructure/cloning/metadata.py +1 -0
- kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +14 -25
- kodit/infrastructure/embedding/local_vector_search_repository.py +26 -38
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +50 -35
- kodit/infrastructure/enrichment/enrichment_factory.py +1 -1
- kodit/infrastructure/indexing/auto_indexing_service.py +84 -0
- kodit/infrastructure/indexing/indexing_factory.py +8 -91
- kodit/infrastructure/indexing/snippet_domain_service_factory.py +37 -0
- kodit/infrastructure/snippet_extraction/languages/java.scm +12 -0
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +3 -31
- kodit/infrastructure/sqlalchemy/embedding_repository.py +14 -3
- kodit/infrastructure/sqlalchemy/snippet_repository.py +174 -2
- kodit/mcp.py +61 -49
- {kodit-0.2.8.dist-info → kodit-0.3.0.dist-info}/METADATA +1 -1
- {kodit-0.2.8.dist-info → kodit-0.3.0.dist-info}/RECORD +33 -31
- kodit/application/commands/__init__.py +0 -1
- kodit/application/commands/snippet_commands.py +0 -22
- kodit/application/services/snippet_application_service.py +0 -149
- kodit/infrastructure/enrichment/legacy_enrichment_models.py +0 -42
- {kodit-0.2.8.dist-info → kodit-0.3.0.dist-info}/WHEEL +0 -0
- {kodit-0.2.8.dist-info → kodit-0.3.0.dist-info}/entry_points.txt +0 -0
- {kodit-0.2.8.dist-info → kodit-0.3.0.dist-info}/licenses/LICENSE +0 -0
kodit/cli.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Command line interface for kodit."""
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
import signal
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
from typing import Any
|
|
@@ -11,8 +10,8 @@ import uvicorn
|
|
|
11
10
|
from pytable_formatter import Cell, Table
|
|
12
11
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
12
|
|
|
14
|
-
from kodit.application.
|
|
15
|
-
|
|
13
|
+
from kodit.application.factories.code_indexing_factory import (
|
|
14
|
+
create_code_indexing_application_service,
|
|
16
15
|
)
|
|
17
16
|
from kodit.config import (
|
|
18
17
|
AppContext,
|
|
@@ -21,14 +20,7 @@ from kodit.config import (
|
|
|
21
20
|
)
|
|
22
21
|
from kodit.domain.errors import EmptySourceError
|
|
23
22
|
from kodit.domain.services.source_service import SourceService
|
|
24
|
-
from kodit.domain.value_objects import MultiSearchRequest
|
|
25
|
-
from kodit.infrastructure.indexing.indexing_factory import (
|
|
26
|
-
create_indexing_application_service,
|
|
27
|
-
)
|
|
28
|
-
from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
|
|
29
|
-
create_snippet_extraction_domain_service,
|
|
30
|
-
create_snippet_repositories,
|
|
31
|
-
)
|
|
23
|
+
from kodit.domain.value_objects import MultiSearchRequest, SnippetSearchFilters
|
|
32
24
|
from kodit.infrastructure.ui.progress import (
|
|
33
25
|
create_lazy_progress_callback,
|
|
34
26
|
create_multi_stage_progress_callback,
|
|
@@ -36,33 +28,6 @@ from kodit.infrastructure.ui.progress import (
|
|
|
36
28
|
from kodit.log import configure_logging, configure_telemetry, log_event
|
|
37
29
|
|
|
38
30
|
|
|
39
|
-
def create_snippet_application_service(
|
|
40
|
-
session: AsyncSession,
|
|
41
|
-
) -> SnippetApplicationService:
|
|
42
|
-
"""Create a snippet application service with all dependencies.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
session: SQLAlchemy session
|
|
46
|
-
|
|
47
|
-
Returns:
|
|
48
|
-
Configured snippet application service
|
|
49
|
-
|
|
50
|
-
"""
|
|
51
|
-
# Create domain service
|
|
52
|
-
snippet_extraction_service = create_snippet_extraction_domain_service()
|
|
53
|
-
|
|
54
|
-
# Create repositories
|
|
55
|
-
snippet_repository, file_repository = create_snippet_repositories(session)
|
|
56
|
-
|
|
57
|
-
# Create application service
|
|
58
|
-
return SnippetApplicationService(
|
|
59
|
-
snippet_extraction_service=snippet_extraction_service,
|
|
60
|
-
snippet_repository=snippet_repository,
|
|
61
|
-
file_repository=file_repository,
|
|
62
|
-
session=session,
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
|
|
66
31
|
@click.group(context_settings={"max_content_width": 100})
|
|
67
32
|
@click.option(
|
|
68
33
|
"--env-file",
|
|
@@ -94,12 +59,17 @@ def cli(
|
|
|
94
59
|
|
|
95
60
|
@cli.command()
|
|
96
61
|
@click.argument("sources", nargs=-1)
|
|
62
|
+
@click.option(
|
|
63
|
+
"--auto-index", is_flag=True, help="Index all configured auto-index sources"
|
|
64
|
+
)
|
|
97
65
|
@with_app_context
|
|
98
66
|
@with_session
|
|
99
67
|
async def index(
|
|
100
68
|
session: AsyncSession,
|
|
101
69
|
app_context: AppContext,
|
|
102
70
|
sources: list[str],
|
|
71
|
+
*, # Force keyword-only arguments
|
|
72
|
+
auto_index: bool,
|
|
103
73
|
) -> None:
|
|
104
74
|
"""List indexes, or index data sources."""
|
|
105
75
|
log = structlog.get_logger(__name__)
|
|
@@ -107,14 +77,22 @@ async def index(
|
|
|
107
77
|
clone_dir=app_context.get_clone_dir(),
|
|
108
78
|
session_factory=lambda: session,
|
|
109
79
|
)
|
|
110
|
-
|
|
111
|
-
service = create_indexing_application_service(
|
|
80
|
+
service = create_code_indexing_application_service(
|
|
112
81
|
app_context=app_context,
|
|
113
82
|
session=session,
|
|
114
83
|
source_service=source_service,
|
|
115
|
-
snippet_application_service=snippet_service,
|
|
116
84
|
)
|
|
117
85
|
|
|
86
|
+
if auto_index:
|
|
87
|
+
log.info("Auto-indexing configuration", config=app_context.auto_indexing)
|
|
88
|
+
auto_sources = app_context.auto_indexing.sources
|
|
89
|
+
if not auto_sources:
|
|
90
|
+
click.echo("No auto-index sources configured.")
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
click.echo(f"Auto-indexing {len(auto_sources)} configured sources...")
|
|
94
|
+
sources = [source.uri for source in auto_sources]
|
|
95
|
+
|
|
118
96
|
if not sources:
|
|
119
97
|
log_event("kodit.cli.index.list")
|
|
120
98
|
# No source specified, list all indexes
|
|
@@ -173,16 +151,86 @@ def search() -> None:
|
|
|
173
151
|
"""Search for snippets in the database."""
|
|
174
152
|
|
|
175
153
|
|
|
154
|
+
# Utility for robust filter parsing
|
|
155
|
+
def _parse_filters(
|
|
156
|
+
language: str | None,
|
|
157
|
+
author: str | None,
|
|
158
|
+
created_after: str | None,
|
|
159
|
+
created_before: str | None,
|
|
160
|
+
source_repo: str | None,
|
|
161
|
+
) -> SnippetSearchFilters | None:
|
|
162
|
+
from datetime import datetime
|
|
163
|
+
|
|
164
|
+
# Normalize language to lowercase if provided
|
|
165
|
+
norm_language = language.lower() if language else None
|
|
166
|
+
# Try to parse dates, raise error if invalid
|
|
167
|
+
parsed_created_after = None
|
|
168
|
+
if created_after:
|
|
169
|
+
try:
|
|
170
|
+
parsed_created_after = datetime.fromisoformat(created_after)
|
|
171
|
+
except ValueError as err:
|
|
172
|
+
raise ValueError(
|
|
173
|
+
f"Invalid date format for --created-after: {created_after}. "
|
|
174
|
+
"Expected ISO 8601 format (YYYY-MM-DD)"
|
|
175
|
+
) from err
|
|
176
|
+
parsed_created_before = None
|
|
177
|
+
if created_before:
|
|
178
|
+
try:
|
|
179
|
+
parsed_created_before = datetime.fromisoformat(created_before)
|
|
180
|
+
except ValueError as err:
|
|
181
|
+
raise ValueError(
|
|
182
|
+
f"Invalid date format for --created-before: {created_before}. "
|
|
183
|
+
"Expected ISO 8601 format (YYYY-MM-DD)"
|
|
184
|
+
) from err
|
|
185
|
+
# Return None if no filters provided, otherwise return SnippetSearchFilters
|
|
186
|
+
# Check if any original parameters were provided (not just the parsed values)
|
|
187
|
+
if any(
|
|
188
|
+
[
|
|
189
|
+
language,
|
|
190
|
+
author,
|
|
191
|
+
created_after,
|
|
192
|
+
created_before,
|
|
193
|
+
source_repo,
|
|
194
|
+
]
|
|
195
|
+
):
|
|
196
|
+
return SnippetSearchFilters(
|
|
197
|
+
language=norm_language,
|
|
198
|
+
author=author,
|
|
199
|
+
created_after=parsed_created_after,
|
|
200
|
+
created_before=parsed_created_before,
|
|
201
|
+
source_repo=source_repo,
|
|
202
|
+
)
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
|
|
176
206
|
@search.command()
|
|
177
207
|
@click.argument("query")
|
|
178
208
|
@click.option("--top-k", default=10, help="Number of snippets to retrieve")
|
|
209
|
+
@click.option(
|
|
210
|
+
"--language", help="Filter by programming language (e.g., python, go, javascript)"
|
|
211
|
+
)
|
|
212
|
+
@click.option("--author", help="Filter by author name")
|
|
213
|
+
@click.option(
|
|
214
|
+
"--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
|
|
215
|
+
)
|
|
216
|
+
@click.option(
|
|
217
|
+
"--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
|
|
218
|
+
)
|
|
219
|
+
@click.option(
|
|
220
|
+
"--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
|
|
221
|
+
)
|
|
179
222
|
@with_app_context
|
|
180
223
|
@with_session
|
|
181
|
-
async def code(
|
|
224
|
+
async def code( # noqa: PLR0913
|
|
182
225
|
session: AsyncSession,
|
|
183
226
|
app_context: AppContext,
|
|
184
227
|
query: str,
|
|
185
228
|
top_k: int,
|
|
229
|
+
language: str | None,
|
|
230
|
+
author: str | None,
|
|
231
|
+
created_after: str | None,
|
|
232
|
+
created_before: str | None,
|
|
233
|
+
source_repo: str | None,
|
|
186
234
|
) -> None:
|
|
187
235
|
"""Search for snippets using semantic code search.
|
|
188
236
|
|
|
@@ -193,15 +241,19 @@ async def code(
|
|
|
193
241
|
clone_dir=app_context.get_clone_dir(),
|
|
194
242
|
session_factory=lambda: session,
|
|
195
243
|
)
|
|
196
|
-
|
|
197
|
-
service = create_indexing_application_service(
|
|
244
|
+
service = create_code_indexing_application_service(
|
|
198
245
|
app_context=app_context,
|
|
199
246
|
session=session,
|
|
200
247
|
source_service=source_service,
|
|
201
|
-
snippet_application_service=snippet_service,
|
|
202
248
|
)
|
|
203
249
|
|
|
204
|
-
|
|
250
|
+
filters = _parse_filters(
|
|
251
|
+
language, author, created_after, created_before, source_repo
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
snippets = await service.search(
|
|
255
|
+
MultiSearchRequest(code_query=query, top_k=top_k, filters=filters)
|
|
256
|
+
)
|
|
205
257
|
|
|
206
258
|
if len(snippets) == 0:
|
|
207
259
|
click.echo("No snippets found")
|
|
@@ -219,13 +271,31 @@ async def code(
|
|
|
219
271
|
@search.command()
|
|
220
272
|
@click.argument("keywords", nargs=-1)
|
|
221
273
|
@click.option("--top-k", default=10, help="Number of snippets to retrieve")
|
|
274
|
+
@click.option(
|
|
275
|
+
"--language", help="Filter by programming language (e.g., python, go, javascript)"
|
|
276
|
+
)
|
|
277
|
+
@click.option("--author", help="Filter by author name")
|
|
278
|
+
@click.option(
|
|
279
|
+
"--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
|
|
280
|
+
)
|
|
281
|
+
@click.option(
|
|
282
|
+
"--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
|
|
283
|
+
)
|
|
284
|
+
@click.option(
|
|
285
|
+
"--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
|
|
286
|
+
)
|
|
222
287
|
@with_app_context
|
|
223
288
|
@with_session
|
|
224
|
-
async def keyword(
|
|
289
|
+
async def keyword( # noqa: PLR0913
|
|
225
290
|
session: AsyncSession,
|
|
226
291
|
app_context: AppContext,
|
|
227
292
|
keywords: list[str],
|
|
228
293
|
top_k: int,
|
|
294
|
+
language: str | None,
|
|
295
|
+
author: str | None,
|
|
296
|
+
created_after: str | None,
|
|
297
|
+
created_before: str | None,
|
|
298
|
+
source_repo: str | None,
|
|
229
299
|
) -> None:
|
|
230
300
|
"""Search for snippets using keyword search."""
|
|
231
301
|
log_event("kodit.cli.search.keyword")
|
|
@@ -233,15 +303,19 @@ async def keyword(
|
|
|
233
303
|
clone_dir=app_context.get_clone_dir(),
|
|
234
304
|
session_factory=lambda: session,
|
|
235
305
|
)
|
|
236
|
-
|
|
237
|
-
service = create_indexing_application_service(
|
|
306
|
+
service = create_code_indexing_application_service(
|
|
238
307
|
app_context=app_context,
|
|
239
308
|
session=session,
|
|
240
309
|
source_service=source_service,
|
|
241
|
-
snippet_application_service=snippet_service,
|
|
242
310
|
)
|
|
243
311
|
|
|
244
|
-
|
|
312
|
+
filters = _parse_filters(
|
|
313
|
+
language, author, created_after, created_before, source_repo
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
snippets = await service.search(
|
|
317
|
+
MultiSearchRequest(keywords=keywords, top_k=top_k, filters=filters)
|
|
318
|
+
)
|
|
245
319
|
|
|
246
320
|
if len(snippets) == 0:
|
|
247
321
|
click.echo("No snippets found")
|
|
@@ -259,13 +333,31 @@ async def keyword(
|
|
|
259
333
|
@search.command()
|
|
260
334
|
@click.argument("query")
|
|
261
335
|
@click.option("--top-k", default=10, help="Number of snippets to retrieve")
|
|
336
|
+
@click.option(
|
|
337
|
+
"--language", help="Filter by programming language (e.g., python, go, javascript)"
|
|
338
|
+
)
|
|
339
|
+
@click.option("--author", help="Filter by author name")
|
|
340
|
+
@click.option(
|
|
341
|
+
"--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
|
|
342
|
+
)
|
|
343
|
+
@click.option(
|
|
344
|
+
"--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
|
|
345
|
+
)
|
|
346
|
+
@click.option(
|
|
347
|
+
"--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
|
|
348
|
+
)
|
|
262
349
|
@with_app_context
|
|
263
350
|
@with_session
|
|
264
|
-
async def text(
|
|
351
|
+
async def text( # noqa: PLR0913
|
|
265
352
|
session: AsyncSession,
|
|
266
353
|
app_context: AppContext,
|
|
267
354
|
query: str,
|
|
268
355
|
top_k: int,
|
|
356
|
+
language: str | None,
|
|
357
|
+
author: str | None,
|
|
358
|
+
created_after: str | None,
|
|
359
|
+
created_before: str | None,
|
|
360
|
+
source_repo: str | None,
|
|
269
361
|
) -> None:
|
|
270
362
|
"""Search for snippets using semantic text search.
|
|
271
363
|
|
|
@@ -276,15 +368,19 @@ async def text(
|
|
|
276
368
|
clone_dir=app_context.get_clone_dir(),
|
|
277
369
|
session_factory=lambda: session,
|
|
278
370
|
)
|
|
279
|
-
|
|
280
|
-
service = create_indexing_application_service(
|
|
371
|
+
service = create_code_indexing_application_service(
|
|
281
372
|
app_context=app_context,
|
|
282
373
|
session=session,
|
|
283
374
|
source_service=source_service,
|
|
284
|
-
snippet_application_service=snippet_service,
|
|
285
375
|
)
|
|
286
376
|
|
|
287
|
-
|
|
377
|
+
filters = _parse_filters(
|
|
378
|
+
language, author, created_after, created_before, source_repo
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
snippets = await service.search(
|
|
382
|
+
MultiSearchRequest(text_query=query, top_k=top_k, filters=filters)
|
|
383
|
+
)
|
|
288
384
|
|
|
289
385
|
if len(snippets) == 0:
|
|
290
386
|
click.echo("No snippets found")
|
|
@@ -304,6 +400,19 @@ async def text(
|
|
|
304
400
|
@click.option("--keywords", required=True, help="Comma separated list of keywords")
|
|
305
401
|
@click.option("--code", required=True, help="Semantic code search query")
|
|
306
402
|
@click.option("--text", required=True, help="Semantic text search query")
|
|
403
|
+
@click.option(
|
|
404
|
+
"--language", help="Filter by programming language (e.g., python, go, javascript)"
|
|
405
|
+
)
|
|
406
|
+
@click.option("--author", help="Filter by author name")
|
|
407
|
+
@click.option(
|
|
408
|
+
"--created-after", help="Filter snippets created after this date (YYYY-MM-DD)"
|
|
409
|
+
)
|
|
410
|
+
@click.option(
|
|
411
|
+
"--created-before", help="Filter snippets created before this date (YYYY-MM-DD)"
|
|
412
|
+
)
|
|
413
|
+
@click.option(
|
|
414
|
+
"--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
|
|
415
|
+
)
|
|
307
416
|
@with_app_context
|
|
308
417
|
@with_session
|
|
309
418
|
async def hybrid( # noqa: PLR0913
|
|
@@ -313,6 +422,11 @@ async def hybrid( # noqa: PLR0913
|
|
|
313
422
|
keywords: str,
|
|
314
423
|
code: str,
|
|
315
424
|
text: str,
|
|
425
|
+
language: str | None,
|
|
426
|
+
author: str | None,
|
|
427
|
+
created_after: str | None,
|
|
428
|
+
created_before: str | None,
|
|
429
|
+
source_repo: str | None,
|
|
316
430
|
) -> None:
|
|
317
431
|
"""Search for snippets using hybrid search."""
|
|
318
432
|
log_event("kodit.cli.search.hybrid")
|
|
@@ -320,23 +434,26 @@ async def hybrid( # noqa: PLR0913
|
|
|
320
434
|
clone_dir=app_context.get_clone_dir(),
|
|
321
435
|
session_factory=lambda: session,
|
|
322
436
|
)
|
|
323
|
-
|
|
324
|
-
service = create_indexing_application_service(
|
|
437
|
+
service = create_code_indexing_application_service(
|
|
325
438
|
app_context=app_context,
|
|
326
439
|
session=session,
|
|
327
440
|
source_service=source_service,
|
|
328
|
-
snippet_application_service=snippet_service,
|
|
329
441
|
)
|
|
330
442
|
|
|
331
443
|
# Parse keywords into a list of strings
|
|
332
444
|
keywords_list = [k.strip().lower() for k in keywords.split(",")]
|
|
333
445
|
|
|
446
|
+
filters = _parse_filters(
|
|
447
|
+
language, author, created_after, created_before, source_repo
|
|
448
|
+
)
|
|
449
|
+
|
|
334
450
|
snippets = await service.search(
|
|
335
451
|
MultiSearchRequest(
|
|
336
452
|
keywords=keywords_list,
|
|
337
453
|
code_query=code,
|
|
338
454
|
text_query=text,
|
|
339
455
|
top_k=top_k,
|
|
456
|
+
filters=filters,
|
|
340
457
|
)
|
|
341
458
|
)
|
|
342
459
|
|
|
@@ -353,6 +470,40 @@ async def hybrid( # noqa: PLR0913
|
|
|
353
470
|
click.echo()
|
|
354
471
|
|
|
355
472
|
|
|
473
|
+
@cli.group()
|
|
474
|
+
def show() -> None:
|
|
475
|
+
"""Show information about elements in the database."""
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
@show.command()
|
|
479
|
+
@click.option("--by-path", help="File or directory path to search for snippets")
|
|
480
|
+
@click.option("--by-source", help="Source URI to filter snippets by")
|
|
481
|
+
@with_app_context
|
|
482
|
+
@with_session
|
|
483
|
+
async def snippets(
|
|
484
|
+
session: AsyncSession,
|
|
485
|
+
app_context: AppContext,
|
|
486
|
+
by_path: str | None,
|
|
487
|
+
by_source: str | None,
|
|
488
|
+
) -> None:
|
|
489
|
+
"""Show snippets with optional filtering by path or source."""
|
|
490
|
+
log_event("kodit.cli.show.snippets")
|
|
491
|
+
source_service = SourceService(
|
|
492
|
+
clone_dir=app_context.get_clone_dir(),
|
|
493
|
+
session_factory=lambda: session,
|
|
494
|
+
)
|
|
495
|
+
service = create_code_indexing_application_service(
|
|
496
|
+
app_context=app_context,
|
|
497
|
+
session=session,
|
|
498
|
+
source_service=source_service,
|
|
499
|
+
)
|
|
500
|
+
snippets = await service.list_snippets(file_path=by_path, source_uri=by_source)
|
|
501
|
+
for snippet in snippets:
|
|
502
|
+
click.echo(f"{snippet.id}: [{snippet.source_uri}] {snippet.file_path}")
|
|
503
|
+
click.echo(f" {snippet.content}")
|
|
504
|
+
click.echo()
|
|
505
|
+
|
|
506
|
+
|
|
356
507
|
@cli.command()
|
|
357
508
|
@click.option("--host", default="127.0.0.1", help="Host to bind the server to")
|
|
358
509
|
@click.option("--port", default=8080, help="Port to bind the server to")
|
|
@@ -393,9 +544,10 @@ def version() -> None:
|
|
|
393
544
|
from kodit import _version
|
|
394
545
|
except ImportError:
|
|
395
546
|
print("unknown, try running `uv build`, which is what happens in ci") # noqa: T201
|
|
396
|
-
|
|
397
|
-
|
|
547
|
+
return
|
|
548
|
+
|
|
549
|
+
print(f"kodit {_version.__version__}") # noqa: T201
|
|
398
550
|
|
|
399
551
|
|
|
400
552
|
if __name__ == "__main__":
|
|
401
|
-
|
|
553
|
+
cli()
|
kodit/config.py
CHANGED
|
@@ -8,7 +8,7 @@ from pathlib import Path
|
|
|
8
8
|
from typing import TYPE_CHECKING, Any, Literal, TypeVar
|
|
9
9
|
|
|
10
10
|
import click
|
|
11
|
-
from pydantic import BaseModel, Field
|
|
11
|
+
from pydantic import BaseModel, Field, field_validator
|
|
12
12
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
13
13
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
@@ -37,11 +37,45 @@ class Endpoint(BaseModel):
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
class Search(BaseModel):
|
|
40
|
-
"""Search
|
|
40
|
+
"""Search configuration."""
|
|
41
41
|
|
|
42
42
|
provider: Literal["sqlite", "vectorchord"] = Field(default="sqlite")
|
|
43
43
|
|
|
44
44
|
|
|
45
|
+
class AutoIndexingSource(BaseModel):
|
|
46
|
+
"""Configuration for a single auto-indexing source."""
|
|
47
|
+
|
|
48
|
+
uri: str = Field(description="URI of the source to index (git URL or local path)")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class AutoIndexingConfig(BaseModel):
|
|
52
|
+
"""Configuration for auto-indexing."""
|
|
53
|
+
|
|
54
|
+
sources: list[AutoIndexingSource] = Field(
|
|
55
|
+
default_factory=list, description="List of sources to auto-index"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
@field_validator("sources", mode="before")
|
|
59
|
+
@classmethod
|
|
60
|
+
def parse_sources(cls, v: Any) -> list[AutoIndexingSource]:
|
|
61
|
+
"""Parse sources from environment variables or other formats."""
|
|
62
|
+
if v is None:
|
|
63
|
+
return []
|
|
64
|
+
if isinstance(v, list):
|
|
65
|
+
return v
|
|
66
|
+
if isinstance(v, dict):
|
|
67
|
+
# Handle case where env vars are numbered keys like {'0': {'uri': '...'}}
|
|
68
|
+
sources = []
|
|
69
|
+
i = 0
|
|
70
|
+
while str(i) in v:
|
|
71
|
+
source_data = v[str(i)]
|
|
72
|
+
if isinstance(source_data, dict) and "uri" in source_data:
|
|
73
|
+
sources.append(AutoIndexingSource(uri=source_data["uri"]))
|
|
74
|
+
i += 1
|
|
75
|
+
return sources
|
|
76
|
+
return v
|
|
77
|
+
|
|
78
|
+
|
|
45
79
|
class AppContext(BaseSettings):
|
|
46
80
|
"""Global context for the kodit project. Provides a shared state for the app."""
|
|
47
81
|
|
|
@@ -50,7 +84,7 @@ class AppContext(BaseSettings):
|
|
|
50
84
|
env_file_encoding="utf-8",
|
|
51
85
|
env_nested_delimiter="_",
|
|
52
86
|
nested_model_default_partial_update=True,
|
|
53
|
-
|
|
87
|
+
extra="ignore",
|
|
54
88
|
)
|
|
55
89
|
|
|
56
90
|
data_dir: Path = Field(default=DEFAULT_BASE_DIR)
|
|
@@ -76,6 +110,9 @@ class AppContext(BaseSettings):
|
|
|
76
110
|
default_search: Search = Field(
|
|
77
111
|
default=Search(),
|
|
78
112
|
)
|
|
113
|
+
auto_indexing: AutoIndexingConfig | None = Field(
|
|
114
|
+
default=AutoIndexingConfig(), description="Auto-indexing configuration"
|
|
115
|
+
)
|
|
79
116
|
_db: Database | None = None
|
|
80
117
|
|
|
81
118
|
def model_post_init(self, _: Any) -> None:
|
kodit/domain/entities.py
CHANGED
|
@@ -121,22 +121,24 @@ class File(Base, CommonMixin):
|
|
|
121
121
|
created_at: datetime,
|
|
122
122
|
updated_at: datetime,
|
|
123
123
|
source_id: int,
|
|
124
|
+
mime_type: str,
|
|
125
|
+
uri: str,
|
|
124
126
|
cloned_path: str,
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
size_bytes: int = 0,
|
|
127
|
+
sha256: str,
|
|
128
|
+
size_bytes: int,
|
|
129
|
+
extension: str,
|
|
129
130
|
) -> None:
|
|
130
131
|
"""Initialize a new File instance for typing purposes."""
|
|
131
132
|
super().__init__()
|
|
132
133
|
self.created_at = created_at
|
|
133
134
|
self.updated_at = updated_at
|
|
134
135
|
self.source_id = source_id
|
|
135
|
-
self.cloned_path = cloned_path
|
|
136
136
|
self.mime_type = mime_type
|
|
137
137
|
self.uri = uri
|
|
138
|
+
self.cloned_path = cloned_path
|
|
138
139
|
self.sha256 = sha256
|
|
139
140
|
self.size_bytes = size_bytes
|
|
141
|
+
self.extension = extension
|
|
140
142
|
|
|
141
143
|
|
|
142
144
|
class EmbeddingType(Enum):
|
kodit/domain/repositories.py
CHANGED
|
@@ -11,6 +11,10 @@ from kodit.domain.entities import (
|
|
|
11
11
|
Source,
|
|
12
12
|
SourceType,
|
|
13
13
|
)
|
|
14
|
+
from kodit.domain.value_objects import (
|
|
15
|
+
MultiSearchRequest,
|
|
16
|
+
SnippetListItem,
|
|
17
|
+
)
|
|
14
18
|
|
|
15
19
|
T = TypeVar("T")
|
|
16
20
|
|
|
@@ -86,6 +90,35 @@ class SnippetRepository(GenericRepository[Snippet]):
|
|
|
86
90
|
"""Delete all snippets for an index."""
|
|
87
91
|
raise NotImplementedError
|
|
88
92
|
|
|
93
|
+
async def list_snippets(
|
|
94
|
+
self, file_path: str | None = None, source_uri: str | None = None
|
|
95
|
+
) -> Sequence[SnippetListItem]:
|
|
96
|
+
"""List snippets with optional filtering by file path and source URI.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
file_path: Optional file or directory path to filter by. Can be relative
|
|
100
|
+
(uri) or absolute (cloned_path).
|
|
101
|
+
source_uri: Optional source URI to filter by. If None, returns snippets from
|
|
102
|
+
all sources.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
A sequence of SnippetListItem instances matching the criteria
|
|
106
|
+
|
|
107
|
+
"""
|
|
108
|
+
raise NotImplementedError
|
|
109
|
+
|
|
110
|
+
async def search(self, request: MultiSearchRequest) -> Sequence[SnippetListItem]:
|
|
111
|
+
"""Search snippets with filters.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
request: The search request containing queries and optional filters.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
A sequence of SnippetListItem instances matching the search criteria.
|
|
118
|
+
|
|
119
|
+
"""
|
|
120
|
+
raise NotImplementedError
|
|
121
|
+
|
|
89
122
|
|
|
90
123
|
class FileRepository(GenericRepository[File]):
|
|
91
124
|
"""File repository with specific methods."""
|
|
@@ -4,10 +4,10 @@ from abc import ABC, abstractmethod
|
|
|
4
4
|
from collections.abc import Sequence
|
|
5
5
|
|
|
6
6
|
from kodit.domain.value_objects import (
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
DeleteRequest,
|
|
8
|
+
IndexRequest,
|
|
9
|
+
SearchRequest,
|
|
10
|
+
SearchResult,
|
|
11
11
|
)
|
|
12
12
|
|
|
13
13
|
|
|
@@ -15,15 +15,15 @@ class BM25Repository(ABC):
|
|
|
15
15
|
"""Abstract interface for BM25 repository."""
|
|
16
16
|
|
|
17
17
|
@abstractmethod
|
|
18
|
-
async def index_documents(self, request:
|
|
18
|
+
async def index_documents(self, request: IndexRequest) -> None:
|
|
19
19
|
"""Index documents for BM25 search."""
|
|
20
20
|
|
|
21
21
|
@abstractmethod
|
|
22
|
-
async def search(self, request:
|
|
22
|
+
async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
|
|
23
23
|
"""Search documents using BM25."""
|
|
24
24
|
|
|
25
25
|
@abstractmethod
|
|
26
|
-
async def delete_documents(self, request:
|
|
26
|
+
async def delete_documents(self, request: DeleteRequest) -> None:
|
|
27
27
|
"""Delete documents from the BM25 index."""
|
|
28
28
|
|
|
29
29
|
|
|
@@ -39,7 +39,7 @@ class BM25DomainService:
|
|
|
39
39
|
"""
|
|
40
40
|
self.repository = repository
|
|
41
41
|
|
|
42
|
-
async def index_documents(self, request:
|
|
42
|
+
async def index_documents(self, request: IndexRequest) -> None:
|
|
43
43
|
"""Index documents using domain business rules.
|
|
44
44
|
|
|
45
45
|
Args:
|
|
@@ -64,10 +64,10 @@ class BM25DomainService:
|
|
|
64
64
|
raise ValueError("No valid documents to index")
|
|
65
65
|
|
|
66
66
|
# Domain logic: create new request with validated documents
|
|
67
|
-
validated_request =
|
|
67
|
+
validated_request = IndexRequest(documents=valid_documents)
|
|
68
68
|
await self.repository.index_documents(validated_request)
|
|
69
69
|
|
|
70
|
-
async def search(self, request:
|
|
70
|
+
async def search(self, request: SearchRequest) -> Sequence[SearchResult]:
|
|
71
71
|
"""Search documents using domain business rules.
|
|
72
72
|
|
|
73
73
|
Args:
|
|
@@ -88,14 +88,11 @@ class BM25DomainService:
|
|
|
88
88
|
raise ValueError("Top-k must be positive")
|
|
89
89
|
|
|
90
90
|
# Domain logic: normalize query
|
|
91
|
-
|
|
92
|
-
normalized_request = BM25SearchRequest(
|
|
93
|
-
query=normalized_query, top_k=request.top_k
|
|
94
|
-
)
|
|
91
|
+
request.query = request.query.strip()
|
|
95
92
|
|
|
96
|
-
return await self.repository.search(
|
|
93
|
+
return await self.repository.search(request)
|
|
97
94
|
|
|
98
|
-
async def delete_documents(self, request:
|
|
95
|
+
async def delete_documents(self, request: DeleteRequest) -> None:
|
|
99
96
|
"""Delete documents using domain business rules.
|
|
100
97
|
|
|
101
98
|
Args:
|
|
@@ -120,5 +117,5 @@ class BM25DomainService:
|
|
|
120
117
|
raise ValueError("No valid snippet IDs to delete")
|
|
121
118
|
|
|
122
119
|
# Domain logic: create new request with validated IDs
|
|
123
|
-
validated_request =
|
|
120
|
+
validated_request = DeleteRequest(snippet_ids=valid_ids)
|
|
124
121
|
await self.repository.delete_documents(validated_request)
|