kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (135) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +51 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +353 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +700 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +4 -97
  14. kodit/database.py +38 -1
  15. kodit/domain/enrichments/__init__.py +1 -0
  16. kodit/domain/enrichments/architecture/__init__.py +1 -0
  17. kodit/domain/enrichments/architecture/architecture.py +20 -0
  18. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  19. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  20. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  21. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  22. kodit/domain/enrichments/development/__init__.py +1 -0
  23. kodit/domain/enrichments/development/development.py +18 -0
  24. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  25. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  26. kodit/domain/enrichments/enricher.py +17 -0
  27. kodit/domain/enrichments/enrichment.py +39 -0
  28. kodit/domain/enrichments/request.py +12 -0
  29. kodit/domain/enrichments/response.py +11 -0
  30. kodit/domain/enrichments/usage/__init__.py +1 -0
  31. kodit/domain/enrichments/usage/api_docs.py +19 -0
  32. kodit/domain/enrichments/usage/usage.py +18 -0
  33. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  34. kodit/domain/entities/git.py +190 -0
  35. kodit/domain/factories/__init__.py +1 -0
  36. kodit/domain/factories/git_repo_factory.py +76 -0
  37. kodit/domain/protocols.py +264 -64
  38. kodit/domain/services/bm25_service.py +5 -1
  39. kodit/domain/services/embedding_service.py +3 -0
  40. kodit/domain/services/enrichment_service.py +9 -30
  41. kodit/domain/services/git_repository_service.py +429 -0
  42. kodit/domain/services/git_service.py +300 -0
  43. kodit/domain/services/physical_architecture_service.py +182 -0
  44. kodit/domain/services/task_status_query_service.py +2 -2
  45. kodit/domain/value_objects.py +87 -135
  46. kodit/infrastructure/api/client/__init__.py +0 -2
  47. kodit/infrastructure/api/v1/__init__.py +0 -4
  48. kodit/infrastructure/api/v1/dependencies.py +92 -46
  49. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  50. kodit/infrastructure/api/v1/routers/commits.py +352 -0
  51. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  52. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  53. kodit/infrastructure/api/v1/routers/search.py +31 -14
  54. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  55. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  56. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  57. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  58. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  59. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  60. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  61. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  62. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  63. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  64. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  65. kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
  66. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  67. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  68. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  69. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  70. kodit/infrastructure/enricher/__init__.py +1 -0
  71. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  72. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
  73. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  74. kodit/infrastructure/enricher/null_enricher.py +36 -0
  75. kodit/infrastructure/indexing/fusion_service.py +1 -1
  76. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  77. kodit/infrastructure/mappers/git_mapper.py +193 -0
  78. kodit/infrastructure/mappers/snippet_mapper.py +104 -0
  79. kodit/infrastructure/mappers/task_mapper.py +5 -44
  80. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  81. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  82. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  83. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  84. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  85. kodit/infrastructure/reporting/log_progress.py +8 -5
  86. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  87. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  88. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  89. kodit/infrastructure/slicing/slicer.py +87 -421
  90. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  91. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  92. kodit/infrastructure/sqlalchemy/entities.py +402 -158
  93. kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
  94. kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
  95. kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
  96. kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
  97. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
  98. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  99. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  100. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  101. kodit/mcp.py +12 -30
  102. kodit/migrations/env.py +1 -0
  103. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  104. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  105. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  106. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  107. kodit/py.typed +0 -0
  108. kodit/utils/dump_config.py +361 -0
  109. kodit/utils/dump_openapi.py +6 -4
  110. kodit/utils/path_utils.py +29 -0
  111. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
  112. kodit-0.5.1.dist-info/RECORD +168 -0
  113. kodit/application/factories/code_indexing_factory.py +0 -195
  114. kodit/application/services/auto_indexing_service.py +0 -99
  115. kodit/application/services/code_indexing_application_service.py +0 -410
  116. kodit/domain/services/index_query_service.py +0 -70
  117. kodit/domain/services/index_service.py +0 -269
  118. kodit/infrastructure/api/client/index_client.py +0 -57
  119. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  120. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  121. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  122. kodit/infrastructure/cloning/__init__.py +0 -1
  123. kodit/infrastructure/cloning/metadata.py +0 -98
  124. kodit/infrastructure/enrichment/__init__.py +0 -1
  125. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  126. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  127. kodit/infrastructure/mappers/index_mapper.py +0 -345
  128. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  129. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  130. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  131. kodit-0.4.3.dist-info/RECORD +0 -125
  132. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  133. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  134. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  135. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.4.3'
32
- __version_tuple__ = version_tuple = (0, 4, 3)
31
+ __version__ = version = '0.5.1'
32
+ __version_tuple__ = version_tuple = (0, 5, 1)
33
33
 
34
34
  __commit_id__ = commit_id = None
kodit/app.py CHANGED
@@ -3,75 +3,103 @@
3
3
  from collections.abc import AsyncIterator
4
4
  from contextlib import asynccontextmanager
5
5
 
6
+ import structlog
6
7
  from asgi_correlation_id import CorrelationIdMiddleware
7
8
  from fastapi import FastAPI, Response
8
9
  from fastapi.responses import RedirectResponse
9
10
 
10
11
  from kodit._version import version
11
12
  from kodit.application.factories.reporting_factory import create_server_operation
12
- from kodit.application.services.auto_indexing_service import AutoIndexingService
13
+ from kodit.application.factories.server_factory import ServerFactory
13
14
  from kodit.application.services.indexing_worker_service import IndexingWorkerService
14
15
  from kodit.application.services.sync_scheduler import SyncSchedulerService
15
16
  from kodit.config import AppContext
16
- from kodit.infrastructure.api.v1.routers import (
17
- indexes_router,
18
- queue_router,
19
- search_router,
17
+ from kodit.domain.enrichments.request import EnrichmentRequest
18
+ from kodit.domain.value_objects import Document, IndexRequest
19
+ from kodit.infrastructure.api.v1.routers.commits import router as commits_router
20
+ from kodit.infrastructure.api.v1.routers.queue import router as queue_router
21
+ from kodit.infrastructure.api.v1.routers.repositories import (
22
+ router as repositories_router,
20
23
  )
24
+ from kodit.infrastructure.api.v1.routers.search import router as search_router
21
25
  from kodit.infrastructure.api.v1.schemas.context import AppLifespanState
22
26
  from kodit.infrastructure.sqlalchemy.task_status_repository import (
23
27
  create_task_status_repository,
24
28
  )
25
29
  from kodit.mcp import mcp
26
- from kodit.middleware import ASGICancelledErrorMiddleware, logging_middleware
30
+ from kodit.middleware import (
31
+ ASGICancelledErrorMiddleware,
32
+ logging_middleware,
33
+ )
27
34
 
28
35
  # Global services
29
- _auto_indexing_service: AutoIndexingService | None = None
30
36
  _sync_scheduler_service: SyncSchedulerService | None = None
37
+ _server_factory: ServerFactory | None = None
31
38
 
32
39
 
33
40
  @asynccontextmanager
34
41
  async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
35
42
  """Manage application lifespan for auto-indexing and sync."""
36
- global _auto_indexing_service, _sync_scheduler_service # noqa: PLW0603
43
+ global _sync_scheduler_service # noqa: PLW0603
44
+ global _server_factory # noqa: PLW0603
37
45
 
38
46
  # App context has already been configured by the CLI.
39
47
  app_context = AppContext()
40
48
  db = await app_context.get_db()
49
+ log = structlog.get_logger(__name__)
41
50
  operation = create_server_operation(
42
51
  create_task_status_repository(db.session_factory)
43
52
  )
44
53
 
54
+ _server_factory = ServerFactory(app_context, db.session_factory)
55
+
56
+ # Quickly check if the providers are accessible and raise an error if not
57
+ log.info("Checking providers are accessible")
58
+ try:
59
+ await anext(
60
+ _server_factory.code_search_service().index_documents(
61
+ IndexRequest(
62
+ documents=[Document(snippet_id="1", text="def hello(): pass")]
63
+ )
64
+ )
65
+ )
66
+ except Exception as e:
67
+ raise ValueError("Embedding service is not accessible") from e
68
+ try:
69
+ await anext(
70
+ _server_factory.enricher().enrich(
71
+ [
72
+ EnrichmentRequest(
73
+ id="1",
74
+ text="def hello(): pass",
75
+ system_prompt="Explain this code",
76
+ )
77
+ ]
78
+ )
79
+ )
80
+ except Exception as e:
81
+ raise ValueError("Enrichment service is not accessible") from e
82
+
45
83
  # Start the queue worker service
46
84
  _indexing_worker_service = IndexingWorkerService(
47
85
  app_context=app_context,
48
86
  session_factory=db.session_factory,
87
+ server_factory=_server_factory,
49
88
  )
50
89
  await _indexing_worker_service.start(operation)
51
90
 
52
- # Start auto-indexing service
53
- _auto_indexing_service = AutoIndexingService(
54
- app_context=app_context,
55
- session_factory=db.session_factory,
56
- )
57
- await _auto_indexing_service.start_background_indexing(operation)
58
-
59
91
  # Start sync scheduler service
60
92
  if app_context.periodic_sync.enabled:
61
- _sync_scheduler_service = SyncSchedulerService(
62
- session_factory=db.session_factory,
63
- )
93
+ _sync_scheduler_service = _server_factory.sync_scheduler_service()
64
94
  _sync_scheduler_service.start_periodic_sync(
65
95
  interval_seconds=app_context.periodic_sync.interval_seconds
66
96
  )
67
97
 
68
- yield AppLifespanState(app_context=app_context)
98
+ yield AppLifespanState(app_context=app_context, server_factory=_server_factory)
69
99
 
70
100
  # Stop services
71
101
  if _sync_scheduler_service:
72
102
  await _sync_scheduler_service.stop_periodic_sync()
73
- if _auto_indexing_service:
74
- await _auto_indexing_service.stop()
75
103
  if _indexing_worker_service:
76
104
  await _indexing_worker_service.stop()
77
105
 
@@ -123,10 +151,10 @@ async def healthz() -> Response:
123
151
 
124
152
 
125
153
  # Include API routers
126
- app.include_router(indexes_router)
127
154
  app.include_router(queue_router)
128
155
  app.include_router(search_router)
129
-
156
+ app.include_router(commits_router)
157
+ app.include_router(repositories_router)
130
158
 
131
159
  # Add mcp routes last, otherwise previous routes aren't added
132
160
  # Mount both apps at root - they have different internal paths
@@ -5,7 +5,9 @@ from kodit.config import ReportingConfig
5
5
  from kodit.domain.protocols import TaskStatusRepository
6
6
  from kodit.infrastructure.reporting.db_progress import DBProgressReportingModule
7
7
  from kodit.infrastructure.reporting.log_progress import LoggingReportingModule
8
- from kodit.infrastructure.reporting.tdqm_progress import TQDMReportingModule
8
+ from kodit.infrastructure.reporting.telemetry_progress import (
9
+ TelemetryProgressReportingModule,
10
+ )
9
11
 
10
12
 
11
13
  def create_noop_operation() -> ProgressTracker:
@@ -17,7 +19,8 @@ def create_cli_operation(config: ReportingConfig | None = None) -> ProgressTrack
17
19
  """Create a CLI reporter."""
18
20
  shared_config = config or ReportingConfig()
19
21
  s = ProgressTracker.create(TaskOperation.ROOT)
20
- s.subscribe(TQDMReportingModule(shared_config))
22
+ s.subscribe(TelemetryProgressReportingModule())
23
+ s.subscribe(LoggingReportingModule(shared_config))
21
24
  return s
22
25
 
23
26
 
@@ -27,6 +30,7 @@ def create_server_operation(
27
30
  """Create a server reporter."""
28
31
  shared_config = config or ReportingConfig()
29
32
  s = ProgressTracker.create(TaskOperation.ROOT)
33
+ s.subscribe(TelemetryProgressReportingModule())
30
34
  s.subscribe(LoggingReportingModule(shared_config))
31
35
  s.subscribe(DBProgressReportingModule(task_status_repository, shared_config))
32
36
  return s
@@ -0,0 +1,353 @@
1
+ """Create a big object that contains all the application services."""
2
+
3
+ from collections.abc import Callable
4
+ from typing import TYPE_CHECKING
5
+
6
+ from sqlalchemy.ext.asyncio import AsyncSession
7
+
8
+ from kodit.application.factories.reporting_factory import create_server_operation
9
+ from kodit.application.services.code_search_application_service import (
10
+ CodeSearchApplicationService,
11
+ )
12
+ from kodit.application.services.commit_indexing_application_service import (
13
+ CommitIndexingApplicationService,
14
+ )
15
+ from kodit.application.services.queue_service import QueueService
16
+ from kodit.application.services.reporting import ProgressTracker
17
+ from kodit.application.services.sync_scheduler import SyncSchedulerService
18
+ from kodit.config import AppContext
19
+ from kodit.domain.enrichments.architecture.physical.formatter import (
20
+ PhysicalArchitectureFormatter,
21
+ )
22
+ from kodit.domain.enrichments.enricher import Enricher
23
+ from kodit.domain.protocols import (
24
+ FusionService,
25
+ GitAdapter,
26
+ GitBranchRepository,
27
+ GitCommitRepository,
28
+ GitRepoRepository,
29
+ GitTagRepository,
30
+ SnippetRepositoryV2,
31
+ TaskStatusRepository,
32
+ )
33
+ from kodit.domain.services.bm25_service import BM25DomainService, BM25Repository
34
+ from kodit.domain.services.embedding_service import EmbeddingDomainService
35
+ from kodit.domain.services.git_repository_service import (
36
+ GitRepositoryScanner,
37
+ RepositoryCloner,
38
+ )
39
+ from kodit.domain.services.physical_architecture_service import (
40
+ PhysicalArchitectureService,
41
+ )
42
+ from kodit.infrastructure.bm25.local_bm25_repository import LocalBM25Repository
43
+ from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
44
+ VectorChordBM25Repository,
45
+ )
46
+ from kodit.infrastructure.cloning.git.git_python_adaptor import GitPythonAdapter
47
+ from kodit.infrastructure.embedding.embedding_factory import (
48
+ embedding_domain_service_factory,
49
+ )
50
+ from kodit.infrastructure.enricher.enricher_factory import (
51
+ enricher_domain_service_factory,
52
+ )
53
+
54
+ # InMemoryGitTagRepository removed - now handled by InMemoryGitRepoRepository
55
+ from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
56
+ from kodit.infrastructure.physical_architecture.formatters.narrative_formatter import (
57
+ NarrativeFormatter,
58
+ )
59
+ from kodit.infrastructure.slicing.slicer import Slicer
60
+ from kodit.infrastructure.sqlalchemy.embedding_repository import (
61
+ SqlAlchemyEmbeddingRepository,
62
+ create_embedding_repository,
63
+ )
64
+ from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
65
+ EnrichmentV2Repository,
66
+ )
67
+ from kodit.infrastructure.sqlalchemy.git_branch_repository import (
68
+ create_git_branch_repository,
69
+ )
70
+ from kodit.infrastructure.sqlalchemy.git_commit_repository import (
71
+ create_git_commit_repository,
72
+ )
73
+ from kodit.infrastructure.sqlalchemy.git_repository import create_git_repo_repository
74
+ from kodit.infrastructure.sqlalchemy.git_tag_repository import (
75
+ create_git_tag_repository,
76
+ )
77
+ from kodit.infrastructure.sqlalchemy.snippet_v2_repository import (
78
+ create_snippet_v2_repository,
79
+ )
80
+ from kodit.infrastructure.sqlalchemy.task_status_repository import (
81
+ create_task_status_repository,
82
+ )
83
+ from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
84
+
85
+ if TYPE_CHECKING:
86
+ from kodit.domain.services.enrichment_service import EnrichmentDomainService
87
+
88
+
89
+ class ServerFactory:
90
+ """Factory for creating server application services."""
91
+
92
+ def __init__(
93
+ self,
94
+ app_context: AppContext,
95
+ session_factory: Callable[[], AsyncSession],
96
+ ) -> None:
97
+ """Initialize the ServerFactory."""
98
+ self.app_context = app_context
99
+ self.session_factory = session_factory
100
+ self._repo_repository: GitRepoRepository | None = None
101
+ self._snippet_v2_repository: SnippetRepositoryV2 | None = None
102
+ self._git_adapter: GitAdapter | None = None
103
+ self._scanner: GitRepositoryScanner | None = None
104
+ self._cloner: RepositoryCloner | None = None
105
+ self._commit_indexing_application_service: (
106
+ CommitIndexingApplicationService | None
107
+ ) = None
108
+ self._enrichment_service: EnrichmentDomainService | None = None
109
+ self._enricher_service: Enricher | None = None
110
+ self._task_status_repository: TaskStatusRepository | None = None
111
+ self._operation: ProgressTracker | None = None
112
+ self._queue_service: QueueService | None = None
113
+ self._slicer: Slicer | None = None
114
+ self._bm25_service: BM25DomainService | None = None
115
+ self._bm25_repository: BM25Repository | None = None
116
+ self._code_search_service: EmbeddingDomainService | None = None
117
+ self._text_search_service: EmbeddingDomainService | None = None
118
+ self._sync_scheduler_service: SyncSchedulerService | None = None
119
+ self._embedding_repository: SqlAlchemyEmbeddingRepository | None = None
120
+ self._fusion_service: FusionService | None = None
121
+ self._code_search_application_service: CodeSearchApplicationService | None = (
122
+ None
123
+ )
124
+ self._git_commit_repository: GitCommitRepository | None = None
125
+ self._git_branch_repository: GitBranchRepository | None = None
126
+ self._git_tag_repository: GitTagRepository | None = None
127
+ self._architecture_service: PhysicalArchitectureService | None = None
128
+ self._enrichment_v2_repository: EnrichmentV2Repository | None = None
129
+ self._architecture_formatter: PhysicalArchitectureFormatter | None = None
130
+
131
+ def architecture_formatter(self) -> PhysicalArchitectureFormatter:
132
+ """Create a PhysicalArchitectureFormatter instance."""
133
+ if not self._architecture_formatter:
134
+ self._architecture_formatter = NarrativeFormatter()
135
+ return self._architecture_formatter
136
+
137
+ def architecture_service(self) -> PhysicalArchitectureService:
138
+ """Create a PhysicalArchitectureService instance."""
139
+ if not self._architecture_service:
140
+ self._architecture_service = PhysicalArchitectureService(
141
+ formatter=self.architecture_formatter()
142
+ )
143
+ return self._architecture_service
144
+
145
+ def enrichment_v2_repository(self) -> EnrichmentV2Repository:
146
+ """Create a EnrichmentV2Repository instance."""
147
+ if not self._enrichment_v2_repository:
148
+ self._enrichment_v2_repository = EnrichmentV2Repository(
149
+ session_factory=self.session_factory
150
+ )
151
+ return self._enrichment_v2_repository
152
+
153
+ def queue_service(self) -> QueueService:
154
+ """Create a QueueService instance."""
155
+ if not self._queue_service:
156
+ self._queue_service = QueueService(session_factory=self.session_factory)
157
+ return self._queue_service
158
+
159
+ def task_status_repository(self) -> TaskStatusRepository:
160
+ """Create a TaskStatusRepository instance."""
161
+ if not self._task_status_repository:
162
+ self._task_status_repository = create_task_status_repository(
163
+ session_factory=self.session_factory
164
+ )
165
+ return self._task_status_repository
166
+
167
+ def operation(self) -> ProgressTracker:
168
+ """Create a ProgressTracker instance."""
169
+ if not self._operation:
170
+ self._operation = create_server_operation(
171
+ task_status_repository=self.task_status_repository()
172
+ )
173
+ return self._operation
174
+
175
+ def slicer(self) -> Slicer:
176
+ """Create a Slicer instance."""
177
+ if not self._slicer:
178
+ self._slicer = Slicer()
179
+ return self._slicer
180
+
181
+ def bm25_repository(self) -> BM25Repository:
182
+ """Create a BM25Repository instance."""
183
+ if not self._bm25_repository:
184
+ if self.app_context.default_search.provider == "vectorchord":
185
+ self._bm25_repository = VectorChordBM25Repository(
186
+ session_factory=self.session_factory
187
+ )
188
+ else:
189
+ self._bm25_repository = LocalBM25Repository(
190
+ data_dir=self.app_context.get_data_dir()
191
+ )
192
+ return self._bm25_repository
193
+
194
+ def bm25_service(self) -> BM25DomainService:
195
+ """Create a BM25DomainService instance."""
196
+ if not self._bm25_service:
197
+ self._bm25_service = BM25DomainService(repository=self.bm25_repository())
198
+ return self._bm25_service
199
+
200
+ def code_search_service(self) -> EmbeddingDomainService:
201
+ """Create a EmbeddingDomainService instance."""
202
+ if not self._code_search_service:
203
+ self._code_search_service = embedding_domain_service_factory(
204
+ "code", self.app_context, self.session_factory
205
+ )
206
+ return self._code_search_service
207
+
208
+ def text_search_service(self) -> EmbeddingDomainService:
209
+ """Create a EmbeddingDomainService instance."""
210
+ if not self._text_search_service:
211
+ self._text_search_service = embedding_domain_service_factory(
212
+ "text", self.app_context, self.session_factory
213
+ )
214
+ return self._text_search_service
215
+
216
+ def commit_indexing_application_service(self) -> CommitIndexingApplicationService:
217
+ """Create a CommitIndexingApplicationService instance."""
218
+ if not self._commit_indexing_application_service:
219
+ self._commit_indexing_application_service = (
220
+ CommitIndexingApplicationService(
221
+ snippet_v2_repository=self.snippet_v2_repository(),
222
+ repo_repository=self.repo_repository(),
223
+ git_commit_repository=self.git_commit_repository(),
224
+ git_branch_repository=self.git_branch_repository(),
225
+ git_tag_repository=self.git_tag_repository(),
226
+ operation=self.operation(),
227
+ scanner=self.scanner(),
228
+ cloner=self.cloner(),
229
+ snippet_repository=self.snippet_v2_repository(),
230
+ slicer=self.slicer(),
231
+ queue=self.queue_service(),
232
+ bm25_service=self.bm25_service(),
233
+ code_search_service=self.code_search_service(),
234
+ text_search_service=self.text_search_service(),
235
+ embedding_repository=self.embedding_repository(),
236
+ architecture_service=self.architecture_service(),
237
+ enrichment_v2_repository=self.enrichment_v2_repository(),
238
+ enricher_service=self.enricher(),
239
+ )
240
+ )
241
+
242
+ return self._commit_indexing_application_service
243
+
244
+ def unit_of_work(self) -> SqlAlchemyUnitOfWork:
245
+ """Create a SqlAlchemyUnitOfWork instance."""
246
+ return SqlAlchemyUnitOfWork(session_factory=self.session_factory)
247
+
248
+ def repo_repository(self) -> GitRepoRepository:
249
+ """Create a GitRepoRepository instance."""
250
+ if not self._repo_repository:
251
+ self._repo_repository = create_git_repo_repository(
252
+ session_factory=self.session_factory
253
+ )
254
+ return self._repo_repository
255
+
256
+ # branch_repository and commit_repository removed - now handled by repo_repository
257
+ # as GitRepo is the aggregate root
258
+
259
+ def git_adapter(self) -> GitAdapter:
260
+ """Create a GitAdapter instance."""
261
+ if not self._git_adapter:
262
+ self._git_adapter = GitPythonAdapter()
263
+ return self._git_adapter
264
+
265
+ # tag_repository removed - now handled by repo_repository
266
+
267
+ def scanner(self) -> GitRepositoryScanner:
268
+ """Create a GitRepositoryScanner instance."""
269
+ if not self._scanner:
270
+ self._scanner = GitRepositoryScanner(self.git_adapter())
271
+ return self._scanner
272
+
273
+ def cloner(self) -> RepositoryCloner:
274
+ """Create a RepositoryCloner instance."""
275
+ if not self._cloner:
276
+ self._cloner = RepositoryCloner(
277
+ self.git_adapter(), self.app_context.get_clone_dir()
278
+ )
279
+ return self._cloner
280
+
281
+ def snippet_v2_repository(self) -> SnippetRepositoryV2:
282
+ """Create a SnippetRepositoryV2 instance."""
283
+ if not self._snippet_v2_repository:
284
+ self._snippet_v2_repository = create_snippet_v2_repository(
285
+ session_factory=self.session_factory
286
+ )
287
+ return self._snippet_v2_repository
288
+
289
+ def enricher(self) -> Enricher:
290
+ """Create a EnricherDomainService instance."""
291
+ if not self._enricher_service:
292
+ self._enricher_service = enricher_domain_service_factory(self.app_context)
293
+ return self._enricher_service
294
+
295
+ def sync_scheduler_service(self) -> SyncSchedulerService:
296
+ """Create a SyncSchedulerService instance."""
297
+ if not self._sync_scheduler_service:
298
+ self._sync_scheduler_service = SyncSchedulerService(
299
+ queue_service=self.queue_service(),
300
+ repo_repository=self.repo_repository(),
301
+ )
302
+ return self._sync_scheduler_service
303
+
304
+ def embedding_repository(self) -> SqlAlchemyEmbeddingRepository:
305
+ """Create a SqlAlchemyEmbeddingRepository instance."""
306
+ if not self._embedding_repository:
307
+ self._embedding_repository = create_embedding_repository(
308
+ session_factory=self.session_factory
309
+ )
310
+ return self._embedding_repository
311
+
312
+ def fusion_service(self) -> FusionService:
313
+ """Create a FusionService instance."""
314
+ if not self._fusion_service:
315
+ self._fusion_service = ReciprocalRankFusionService()
316
+ return self._fusion_service
317
+
318
+ def code_search_application_service(self) -> CodeSearchApplicationService:
319
+ """Create a CodeSearchApplicationService instance."""
320
+ if not self._code_search_application_service:
321
+ self._code_search_application_service = CodeSearchApplicationService(
322
+ bm25_service=self.bm25_service(),
323
+ code_search_service=self.code_search_service(),
324
+ text_search_service=self.text_search_service(),
325
+ progress_tracker=self.operation(),
326
+ snippet_repository=self.snippet_v2_repository(),
327
+ fusion_service=self.fusion_service(),
328
+ )
329
+ return self._code_search_application_service
330
+
331
+ def git_commit_repository(self) -> GitCommitRepository:
332
+ """Create a GitCommitRepository instance."""
333
+ if not self._git_commit_repository:
334
+ self._git_commit_repository = create_git_commit_repository(
335
+ session_factory=self.session_factory
336
+ )
337
+ return self._git_commit_repository
338
+
339
+ def git_branch_repository(self) -> GitBranchRepository:
340
+ """Create a GitBranchRepository instance."""
341
+ if not self._git_branch_repository:
342
+ self._git_branch_repository = create_git_branch_repository(
343
+ session_factory=self.session_factory
344
+ )
345
+ return self._git_branch_repository
346
+
347
+ def git_tag_repository(self) -> GitTagRepository:
348
+ """Create a GitTagRepository instance."""
349
+ if not self._git_tag_repository:
350
+ self._git_tag_repository = create_git_tag_repository(
351
+ session_factory=self.session_factory
352
+ )
353
+ return self._git_tag_repository
@@ -0,0 +1,144 @@
1
+ """Service for searching the indexes."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import structlog
6
+
7
+ from kodit.application.services.reporting import ProgressTracker
8
+ from kodit.domain.entities.git import SnippetV2
9
+ from kodit.domain.protocols import FusionService, SnippetRepositoryV2
10
+ from kodit.domain.services.bm25_service import BM25DomainService
11
+ from kodit.domain.services.embedding_service import EmbeddingDomainService
12
+ from kodit.domain.value_objects import (
13
+ FusionRequest,
14
+ MultiSearchRequest,
15
+ SearchRequest,
16
+ SearchResult,
17
+ )
18
+ from kodit.log import log_event
19
+
20
+
21
+ @dataclass
22
+ class MultiSearchResult:
23
+ """Enhanced search result with comprehensive snippet metadata."""
24
+
25
+ snippet: SnippetV2
26
+ original_scores: list[float]
27
+
28
+ def to_json(self) -> str:
29
+ """Return LLM-optimized JSON representation following the compact schema."""
30
+ return self.snippet.model_dump_json()
31
+
32
+ @classmethod
33
+ def to_jsonlines(cls, results: list["MultiSearchResult"]) -> str:
34
+ """Convert multiple MultiSearchResult objects to JSON Lines format.
35
+
36
+ Args:
37
+ results: List of MultiSearchResult objects
38
+ include_summary: Whether to include summary fields
39
+
40
+ Returns:
41
+ JSON Lines string (one JSON object per line)
42
+
43
+ """
44
+ return "\n".join(result.to_json() for result in results)
45
+
46
+
47
+ class CodeSearchApplicationService:
48
+ """Service for searching the indexes."""
49
+
50
+ def __init__( # noqa: PLR0913
51
+ self,
52
+ bm25_service: BM25DomainService,
53
+ code_search_service: EmbeddingDomainService,
54
+ text_search_service: EmbeddingDomainService,
55
+ progress_tracker: ProgressTracker,
56
+ snippet_repository: SnippetRepositoryV2,
57
+ fusion_service: FusionService,
58
+ ) -> None:
59
+ """Initialize the code search application service."""
60
+ self.bm25_service = bm25_service
61
+ self.code_search_service = code_search_service
62
+ self.text_search_service = text_search_service
63
+ self.progress_tracker = progress_tracker
64
+ self.snippet_repository = snippet_repository
65
+ self.fusion_service = fusion_service
66
+ self.log = structlog.get_logger(__name__)
67
+
68
+ async def search(self, request: MultiSearchRequest) -> list[MultiSearchResult]:
69
+ """Search for relevant snippets across all indexes."""
70
+ log_event("kodit.index.search")
71
+
72
+ # Apply filters if provided
73
+ filtered_snippet_ids: list[str] | None = None
74
+ # TODO(Phil): Re-implement filtering on search results
75
+
76
+ # Gather results from different search modes
77
+ fusion_list: list[list[FusionRequest]] = []
78
+
79
+ # Keyword search
80
+ if request.keywords:
81
+ result_ids: list[SearchResult] = []
82
+ for keyword in request.keywords:
83
+ results = await self.bm25_service.search(
84
+ SearchRequest(
85
+ query=keyword,
86
+ top_k=request.top_k,
87
+ snippet_ids=filtered_snippet_ids,
88
+ )
89
+ )
90
+ result_ids.extend(results)
91
+
92
+ fusion_list.append(
93
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in result_ids]
94
+ )
95
+
96
+ # Semantic code search
97
+ if request.code_query:
98
+ query_results = await self.code_search_service.search(
99
+ SearchRequest(
100
+ query=request.code_query,
101
+ top_k=request.top_k,
102
+ snippet_ids=filtered_snippet_ids,
103
+ )
104
+ )
105
+ fusion_list.append(
106
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in query_results]
107
+ )
108
+
109
+ # Semantic text search
110
+ if request.text_query:
111
+ query_results = await self.text_search_service.search(
112
+ SearchRequest(
113
+ query=request.text_query,
114
+ top_k=request.top_k,
115
+ snippet_ids=filtered_snippet_ids,
116
+ )
117
+ )
118
+ fusion_list.append(
119
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in query_results]
120
+ )
121
+
122
+ if len(fusion_list) == 0:
123
+ return []
124
+
125
+ # Fusion ranking
126
+ final_results = self.fusion_service.reciprocal_rank_fusion(
127
+ rankings=fusion_list,
128
+ k=60, # This is a parameter in the RRF algorithm, not top_k
129
+ )
130
+
131
+ # Keep only top_k results
132
+ final_results = final_results[: request.top_k]
133
+
134
+ # Get snippet details
135
+ ids = [x.id for x in final_results]
136
+ search_results = await self.snippet_repository.get_by_ids(ids)
137
+ search_results.sort(key=lambda x: ids.index(x.id))
138
+ return [
139
+ MultiSearchResult(
140
+ snippet=snippet,
141
+ original_scores=[x.score for x in final_results if x.id == snippet.id],
142
+ )
143
+ for snippet in search_results
144
+ ]