kodit 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (57) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +77 -28
  3. kodit/application/services/code_indexing_application_service.py +148 -119
  4. kodit/cli.py +49 -52
  5. kodit/domain/entities.py +268 -189
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +323 -0
  10. kodit/domain/value_objects.py +225 -92
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/snippet_extraction/factories.py +13 -0
  24. kodit/infrastructure/snippet_extraction/language_detection_service.py +1 -1
  25. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -1
  26. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +1 -1
  27. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  28. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  29. kodit/infrastructure/sqlalchemy/file_repository.py +1 -1
  30. kodit/infrastructure/sqlalchemy/index_repository.py +550 -0
  31. kodit/log.py +4 -1
  32. kodit/mcp.py +1 -13
  33. kodit/migrations/env.py +1 -1
  34. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +34 -0
  35. kodit/migrations/versions/4552eb3f23ce_add_summary.py +34 -0
  36. kodit/utils/__init__.py +1 -0
  37. kodit/utils/path_utils.py +54 -0
  38. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/METADATA +1 -1
  39. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/RECORD +42 -45
  40. kodit/domain/enums.py +0 -9
  41. kodit/domain/repositories.py +0 -128
  42. kodit/domain/services/ignore_service.py +0 -45
  43. kodit/domain/services/indexing_service.py +0 -204
  44. kodit/domain/services/snippet_extraction_service.py +0 -89
  45. kodit/domain/services/snippet_service.py +0 -211
  46. kodit/domain/services/source_service.py +0 -85
  47. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  48. kodit/infrastructure/cloning/folder/factory.py +0 -128
  49. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  50. kodit/infrastructure/cloning/git/factory.py +0 -153
  51. kodit/infrastructure/indexing/index_repository.py +0 -273
  52. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  53. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  54. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -251
  55. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/WHEEL +0 -0
  56. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/entry_points.txt +0 -0
  57. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/licenses/LICENSE +0 -0
kodit/cli.py CHANGED
@@ -19,8 +19,14 @@ from kodit.config import (
19
19
  with_session,
20
20
  )
21
21
  from kodit.domain.errors import EmptySourceError
22
- from kodit.domain.services.source_service import SourceService
23
- from kodit.domain.value_objects import MultiSearchRequest, SnippetSearchFilters
22
+ from kodit.domain.services.index_query_service import IndexQueryService
23
+ from kodit.domain.value_objects import (
24
+ MultiSearchRequest,
25
+ MultiSearchResult,
26
+ SnippetSearchFilters,
27
+ )
28
+ from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
29
+ from kodit.infrastructure.sqlalchemy.index_repository import SqlAlchemyIndexRepository
24
30
  from kodit.infrastructure.ui.progress import (
25
31
  create_lazy_progress_callback,
26
32
  create_multi_stage_progress_callback,
@@ -73,30 +79,28 @@ async def index(
73
79
  ) -> None:
74
80
  """List indexes, or index data sources."""
75
81
  log = structlog.get_logger(__name__)
76
- source_service = SourceService(
77
- clone_dir=app_context.get_clone_dir(),
78
- session_factory=lambda: session,
79
- )
80
82
  service = create_code_indexing_application_service(
81
83
  app_context=app_context,
82
84
  session=session,
83
- source_service=source_service,
85
+ )
86
+ index_query_service = IndexQueryService(
87
+ index_repository=SqlAlchemyIndexRepository(session=session),
88
+ fusion_service=ReciprocalRankFusionService(),
84
89
  )
85
90
 
86
91
  if auto_index:
87
92
  log.info("Auto-indexing configuration", config=app_context.auto_indexing)
88
- auto_sources = app_context.auto_indexing.sources
89
- if not auto_sources:
93
+ if not app_context.auto_indexing or not app_context.auto_indexing.sources:
90
94
  click.echo("No auto-index sources configured.")
91
95
  return
92
-
96
+ auto_sources = app_context.auto_indexing.sources
93
97
  click.echo(f"Auto-indexing {len(auto_sources)} configured sources...")
94
98
  sources = [source.uri for source in auto_sources]
95
99
 
96
100
  if not sources:
97
101
  log_event("kodit.cli.index.list")
98
102
  # No source specified, list all indexes
99
- indexes = await service.list_indexes()
103
+ indexes = await index_query_service.list_indexes()
100
104
  headers: list[str | Cell] = [
101
105
  "ID",
102
106
  "Created At",
@@ -109,8 +113,8 @@ async def index(
109
113
  index.id,
110
114
  index.created_at,
111
115
  index.updated_at,
112
- index.source,
113
- index.num_snippets,
116
+ index.source.working_copy.remote_uri,
117
+ len(index.source.working_copy.files),
114
118
  ]
115
119
  for index in indexes
116
120
  ]
@@ -127,14 +131,12 @@ async def index(
127
131
 
128
132
  # Create a lazy progress callback that only shows progress when needed
129
133
  progress_callback = create_lazy_progress_callback()
130
- s = await source_service.create(source, progress_callback)
131
-
132
- index = await service.create_index(s.id)
134
+ index = await service.create_index_from_uri(source, progress_callback)
133
135
 
134
136
  # Create a new progress callback for the indexing operations
135
137
  indexing_progress_callback = create_multi_stage_progress_callback()
136
138
  try:
137
- await service.run_index(index.id, indexing_progress_callback)
139
+ await service.run_index(index, indexing_progress_callback)
138
140
  except EmptySourceError as e:
139
141
  log.exception("Empty source error", error=e)
140
142
  msg = f"""{e}. This could mean:
@@ -219,6 +221,7 @@ def _parse_filters(
219
221
  @click.option(
220
222
  "--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
221
223
  )
224
+ @click.option("--output-format", default="text", help="Format to display snippets in")
222
225
  @with_app_context
223
226
  @with_session
224
227
  async def code( # noqa: PLR0913
@@ -231,20 +234,16 @@ async def code( # noqa: PLR0913
231
234
  created_after: str | None,
232
235
  created_before: str | None,
233
236
  source_repo: str | None,
237
+ output_format: str,
234
238
  ) -> None:
235
239
  """Search for snippets using semantic code search.
236
240
 
237
241
  This works best if your query is code.
238
242
  """
239
243
  log_event("kodit.cli.search.code")
240
- source_service = SourceService(
241
- clone_dir=app_context.get_clone_dir(),
242
- session_factory=lambda: session,
243
- )
244
244
  service = create_code_indexing_application_service(
245
245
  app_context=app_context,
246
246
  session=session,
247
- source_service=source_service,
248
247
  )
249
248
 
250
249
  filters = _parse_filters(
@@ -259,8 +258,10 @@ async def code( # noqa: PLR0913
259
258
  click.echo("No snippets found")
260
259
  return
261
260
 
262
- for snippet in snippets:
263
- click.echo(str(snippet))
261
+ if output_format == "text":
262
+ click.echo(MultiSearchResult.to_string(snippets))
263
+ elif output_format == "json":
264
+ click.echo(MultiSearchResult.to_jsonlines(snippets))
264
265
 
265
266
 
266
267
  @search.command()
@@ -279,6 +280,7 @@ async def code( # noqa: PLR0913
279
280
  @click.option(
280
281
  "--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
281
282
  )
283
+ @click.option("--output-format", default="text", help="Format to display snippets in")
282
284
  @with_app_context
283
285
  @with_session
284
286
  async def keyword( # noqa: PLR0913
@@ -291,17 +293,13 @@ async def keyword( # noqa: PLR0913
291
293
  created_after: str | None,
292
294
  created_before: str | None,
293
295
  source_repo: str | None,
296
+ output_format: str,
294
297
  ) -> None:
295
298
  """Search for snippets using keyword search."""
296
299
  log_event("kodit.cli.search.keyword")
297
- source_service = SourceService(
298
- clone_dir=app_context.get_clone_dir(),
299
- session_factory=lambda: session,
300
- )
301
300
  service = create_code_indexing_application_service(
302
301
  app_context=app_context,
303
302
  session=session,
304
- source_service=source_service,
305
303
  )
306
304
 
307
305
  filters = _parse_filters(
@@ -316,8 +314,10 @@ async def keyword( # noqa: PLR0913
316
314
  click.echo("No snippets found")
317
315
  return
318
316
 
319
- for snippet in snippets:
320
- click.echo(str(snippet))
317
+ if output_format == "text":
318
+ click.echo(MultiSearchResult.to_string(snippets))
319
+ elif output_format == "json":
320
+ click.echo(MultiSearchResult.to_jsonlines(snippets))
321
321
 
322
322
 
323
323
  @search.command()
@@ -336,6 +336,7 @@ async def keyword( # noqa: PLR0913
336
336
  @click.option(
337
337
  "--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
338
338
  )
339
+ @click.option("--output-format", default="text", help="Format to display snippets in")
339
340
  @with_app_context
340
341
  @with_session
341
342
  async def text( # noqa: PLR0913
@@ -348,20 +349,16 @@ async def text( # noqa: PLR0913
348
349
  created_after: str | None,
349
350
  created_before: str | None,
350
351
  source_repo: str | None,
352
+ output_format: str,
351
353
  ) -> None:
352
354
  """Search for snippets using semantic text search.
353
355
 
354
356
  This works best if your query is text.
355
357
  """
356
358
  log_event("kodit.cli.search.text")
357
- source_service = SourceService(
358
- clone_dir=app_context.get_clone_dir(),
359
- session_factory=lambda: session,
360
- )
361
359
  service = create_code_indexing_application_service(
362
360
  app_context=app_context,
363
361
  session=session,
364
- source_service=source_service,
365
362
  )
366
363
 
367
364
  filters = _parse_filters(
@@ -376,8 +373,10 @@ async def text( # noqa: PLR0913
376
373
  click.echo("No snippets found")
377
374
  return
378
375
 
379
- for snippet in snippets:
380
- click.echo(str(snippet))
376
+ if output_format == "text":
377
+ click.echo(MultiSearchResult.to_string(snippets))
378
+ elif output_format == "json":
379
+ click.echo(MultiSearchResult.to_jsonlines(snippets))
381
380
 
382
381
 
383
382
  @search.command()
@@ -398,6 +397,7 @@ async def text( # noqa: PLR0913
398
397
  @click.option(
399
398
  "--source-repo", help="Filter by source repository (e.g., github.com/example/repo)"
400
399
  )
400
+ @click.option("--output-format", default="text", help="Format to display snippets in")
401
401
  @with_app_context
402
402
  @with_session
403
403
  async def hybrid( # noqa: PLR0913
@@ -412,17 +412,13 @@ async def hybrid( # noqa: PLR0913
412
412
  created_after: str | None,
413
413
  created_before: str | None,
414
414
  source_repo: str | None,
415
+ output_format: str,
415
416
  ) -> None:
416
417
  """Search for snippets using hybrid search."""
417
418
  log_event("kodit.cli.search.hybrid")
418
- source_service = SourceService(
419
- clone_dir=app_context.get_clone_dir(),
420
- session_factory=lambda: session,
421
- )
422
419
  service = create_code_indexing_application_service(
423
420
  app_context=app_context,
424
421
  session=session,
425
- source_service=source_service,
426
422
  )
427
423
 
428
424
  # Parse keywords into a list of strings
@@ -446,8 +442,10 @@ async def hybrid( # noqa: PLR0913
446
442
  click.echo("No snippets found")
447
443
  return
448
444
 
449
- for snippet in snippets:
450
- click.echo(str(snippet))
445
+ if output_format == "text":
446
+ click.echo(MultiSearchResult.to_string(snippets))
447
+ elif output_format == "json":
448
+ click.echo(MultiSearchResult.to_jsonlines(snippets))
451
449
 
452
450
 
453
451
  @cli.group()
@@ -458,6 +456,7 @@ def show() -> None:
458
456
  @show.command()
459
457
  @click.option("--by-path", help="File or directory path to search for snippets")
460
458
  @click.option("--by-source", help="Source URI to filter snippets by")
459
+ @click.option("--output-format", default="text", help="Format to display snippets in")
461
460
  @with_app_context
462
461
  @with_session
463
462
  async def snippets(
@@ -465,21 +464,19 @@ async def snippets(
465
464
  app_context: AppContext,
466
465
  by_path: str | None,
467
466
  by_source: str | None,
467
+ output_format: str,
468
468
  ) -> None:
469
469
  """Show snippets with optional filtering by path or source."""
470
470
  log_event("kodit.cli.show.snippets")
471
- source_service = SourceService(
472
- clone_dir=app_context.get_clone_dir(),
473
- session_factory=lambda: session,
474
- )
475
471
  service = create_code_indexing_application_service(
476
472
  app_context=app_context,
477
473
  session=session,
478
- source_service=source_service,
479
474
  )
480
475
  snippets = await service.list_snippets(file_path=by_path, source_uri=by_source)
481
- for snippet in snippets:
482
- click.echo(str(snippet))
476
+ if output_format == "text":
477
+ click.echo(MultiSearchResult.to_string(snippets))
478
+ elif output_format == "json":
479
+ click.echo(MultiSearchResult.to_jsonlines(snippets))
483
480
 
484
481
 
485
482
  @cli.command()