kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (100) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +59 -24
  3. kodit/application/factories/reporting_factory.py +16 -7
  4. kodit/application/factories/server_factory.py +311 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +543 -0
  7. kodit/application/services/indexing_worker_service.py +13 -46
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +70 -54
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -763
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +3 -96
  14. kodit/database.py +38 -1
  15. kodit/domain/entities/__init__.py +276 -0
  16. kodit/domain/entities/git.py +190 -0
  17. kodit/domain/factories/__init__.py +1 -0
  18. kodit/domain/factories/git_repo_factory.py +76 -0
  19. kodit/domain/protocols.py +270 -46
  20. kodit/domain/services/bm25_service.py +5 -1
  21. kodit/domain/services/embedding_service.py +3 -0
  22. kodit/domain/services/git_repository_service.py +429 -0
  23. kodit/domain/services/git_service.py +300 -0
  24. kodit/domain/services/task_status_query_service.py +19 -0
  25. kodit/domain/value_objects.py +113 -147
  26. kodit/infrastructure/api/client/__init__.py +0 -2
  27. kodit/infrastructure/api/v1/__init__.py +0 -4
  28. kodit/infrastructure/api/v1/dependencies.py +105 -44
  29. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  30. kodit/infrastructure/api/v1/routers/commits.py +271 -0
  31. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  32. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  33. kodit/infrastructure/api/v1/routers/search.py +31 -14
  34. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  35. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  36. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  37. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  38. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  39. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  40. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  41. kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
  42. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  43. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  44. kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
  45. kodit/infrastructure/cloning/git/working_copy.py +10 -3
  46. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  47. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  48. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  49. kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
  50. kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
  51. kodit/infrastructure/indexing/fusion_service.py +1 -1
  52. kodit/infrastructure/mappers/git_mapper.py +193 -0
  53. kodit/infrastructure/mappers/snippet_mapper.py +106 -0
  54. kodit/infrastructure/mappers/task_mapper.py +5 -44
  55. kodit/infrastructure/mappers/task_status_mapper.py +85 -0
  56. kodit/infrastructure/reporting/db_progress.py +23 -0
  57. kodit/infrastructure/reporting/log_progress.py +13 -38
  58. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  59. kodit/infrastructure/slicing/slicer.py +32 -31
  60. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  61. kodit/infrastructure/sqlalchemy/entities.py +428 -131
  62. kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
  63. kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
  64. kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
  65. kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
  66. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
  67. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  68. kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
  69. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  70. kodit/mcp.py +12 -26
  71. kodit/migrations/env.py +1 -1
  72. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  73. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  74. kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
  75. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  76. kodit/py.typed +0 -0
  77. kodit/utils/dump_openapi.py +7 -4
  78. kodit/utils/path_utils.py +29 -0
  79. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
  80. kodit-0.5.0.dist-info/RECORD +137 -0
  81. kodit/application/factories/code_indexing_factory.py +0 -193
  82. kodit/application/services/auto_indexing_service.py +0 -103
  83. kodit/application/services/code_indexing_application_service.py +0 -393
  84. kodit/domain/entities.py +0 -323
  85. kodit/domain/services/index_query_service.py +0 -70
  86. kodit/domain/services/index_service.py +0 -267
  87. kodit/infrastructure/api/client/index_client.py +0 -57
  88. kodit/infrastructure/api/v1/routers/indexes.py +0 -119
  89. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  90. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  91. kodit/infrastructure/cloning/__init__.py +0 -1
  92. kodit/infrastructure/cloning/metadata.py +0 -98
  93. kodit/infrastructure/mappers/index_mapper.py +0 -345
  94. kodit/infrastructure/reporting/tdqm_progress.py +0 -73
  95. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  96. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  97. kodit-0.4.2.dist-info/RECORD +0 -119
  98. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
  99. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
  100. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.4.2'
32
- __version_tuple__ = version_tuple = (0, 4, 2)
31
+ __version__ = version = '0.5.0'
32
+ __version_tuple__ = version_tuple = (0, 5, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
kodit/app.py CHANGED
@@ -3,70 +3,105 @@
3
3
  from collections.abc import AsyncIterator
4
4
  from contextlib import asynccontextmanager
5
5
 
6
+ import structlog
6
7
  from asgi_correlation_id import CorrelationIdMiddleware
7
8
  from fastapi import FastAPI, Response
8
9
  from fastapi.responses import RedirectResponse
9
10
 
10
11
  from kodit._version import version
11
12
  from kodit.application.factories.reporting_factory import create_server_operation
12
- from kodit.application.services.auto_indexing_service import AutoIndexingService
13
+ from kodit.application.factories.server_factory import ServerFactory
13
14
  from kodit.application.services.indexing_worker_service import IndexingWorkerService
14
15
  from kodit.application.services.sync_scheduler import SyncSchedulerService
15
16
  from kodit.config import AppContext
16
- from kodit.infrastructure.api.v1.routers import (
17
- indexes_router,
18
- queue_router,
19
- search_router,
17
+ from kodit.domain.value_objects import (
18
+ Document,
19
+ EnrichmentIndexRequest,
20
+ EnrichmentRequest,
21
+ IndexRequest,
20
22
  )
23
+ from kodit.infrastructure.api.v1.routers.commits import router as commits_router
24
+ from kodit.infrastructure.api.v1.routers.queue import router as queue_router
25
+ from kodit.infrastructure.api.v1.routers.repositories import (
26
+ router as repositories_router,
27
+ )
28
+ from kodit.infrastructure.api.v1.routers.search import router as search_router
21
29
  from kodit.infrastructure.api.v1.schemas.context import AppLifespanState
30
+ from kodit.infrastructure.sqlalchemy.task_status_repository import (
31
+ create_task_status_repository,
32
+ )
22
33
  from kodit.mcp import mcp
23
- from kodit.middleware import ASGICancelledErrorMiddleware, logging_middleware
34
+ from kodit.middleware import (
35
+ ASGICancelledErrorMiddleware,
36
+ logging_middleware,
37
+ )
24
38
 
25
39
  # Global services
26
- _auto_indexing_service: AutoIndexingService | None = None
27
40
  _sync_scheduler_service: SyncSchedulerService | None = None
41
+ _server_factory: ServerFactory | None = None
28
42
 
29
43
 
30
44
  @asynccontextmanager
31
45
  async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
32
46
  """Manage application lifespan for auto-indexing and sync."""
33
- global _auto_indexing_service, _sync_scheduler_service # noqa: PLW0603
47
+ global _sync_scheduler_service # noqa: PLW0603
48
+ global _server_factory # noqa: PLW0603
34
49
 
35
50
  # App context has already been configured by the CLI.
36
51
  app_context = AppContext()
37
52
  db = await app_context.get_db()
38
- operation = create_server_operation()
53
+ log = structlog.get_logger(__name__)
54
+ operation = create_server_operation(
55
+ create_task_status_repository(db.session_factory)
56
+ )
57
+
58
+ _server_factory = ServerFactory(app_context, db.session_factory)
59
+
60
+ # Quickly check if the providers are accessible and raise an error if not
61
+ log.info("Checking providers are accessible")
62
+ try:
63
+ await anext(
64
+ _server_factory.code_search_service().index_documents(
65
+ IndexRequest(
66
+ documents=[Document(snippet_id="1", text="def hello(): pass")]
67
+ )
68
+ )
69
+ )
70
+ except Exception as e:
71
+ raise ValueError("Embedding service is not accessible") from e
72
+ try:
73
+ await anext(
74
+ _server_factory.enrichment_service().enrich_documents(
75
+ EnrichmentIndexRequest(
76
+ requests=[
77
+ EnrichmentRequest(snippet_id="1", text="def hello(): pass")
78
+ ]
79
+ )
80
+ )
81
+ )
82
+ except Exception as e:
83
+ raise ValueError("Enrichment service is not accessible") from e
39
84
 
40
85
  # Start the queue worker service
41
86
  _indexing_worker_service = IndexingWorkerService(
42
87
  app_context=app_context,
43
88
  session_factory=db.session_factory,
89
+ server_factory=_server_factory,
44
90
  )
45
91
  await _indexing_worker_service.start(operation)
46
92
 
47
- # Start auto-indexing service
48
- _auto_indexing_service = AutoIndexingService(
49
- app_context=app_context,
50
- session_factory=db.session_factory,
51
- )
52
- await _auto_indexing_service.start_background_indexing(operation)
53
-
54
93
  # Start sync scheduler service
55
94
  if app_context.periodic_sync.enabled:
56
- _sync_scheduler_service = SyncSchedulerService(
57
- session_factory=db.session_factory,
58
- )
95
+ _sync_scheduler_service = _server_factory.sync_scheduler_service()
59
96
  _sync_scheduler_service.start_periodic_sync(
60
97
  interval_seconds=app_context.periodic_sync.interval_seconds
61
98
  )
62
99
 
63
- yield AppLifespanState(app_context=app_context)
100
+ yield AppLifespanState(app_context=app_context, server_factory=_server_factory)
64
101
 
65
102
  # Stop services
66
103
  if _sync_scheduler_service:
67
104
  await _sync_scheduler_service.stop_periodic_sync()
68
- if _auto_indexing_service:
69
- await _auto_indexing_service.stop()
70
105
  if _indexing_worker_service:
71
106
  await _indexing_worker_service.stop()
72
107
 
@@ -118,10 +153,10 @@ async def healthz() -> Response:
118
153
 
119
154
 
120
155
  # Include API routers
121
- app.include_router(indexes_router)
122
156
  app.include_router(queue_router)
123
157
  app.include_router(search_router)
124
-
158
+ app.include_router(commits_router)
159
+ app.include_router(repositories_router)
125
160
 
126
161
  # Add mcp routes last, otherwise previous routes aren't added
127
162
  # Mount both apps at root - they have different internal paths
@@ -1,27 +1,36 @@
1
1
  """Reporting factory."""
2
2
 
3
- from kodit.application.services.reporting import OperationType, ProgressTracker
3
+ from kodit.application.services.reporting import ProgressTracker, TaskOperation
4
4
  from kodit.config import ReportingConfig
5
+ from kodit.domain.protocols import TaskStatusRepository
6
+ from kodit.infrastructure.reporting.db_progress import DBProgressReportingModule
5
7
  from kodit.infrastructure.reporting.log_progress import LoggingReportingModule
6
- from kodit.infrastructure.reporting.tdqm_progress import TQDMReportingModule
8
+ from kodit.infrastructure.reporting.telemetry_progress import (
9
+ TelemetryProgressReportingModule,
10
+ )
7
11
 
8
12
 
9
13
  def create_noop_operation() -> ProgressTracker:
10
14
  """Create a noop reporter."""
11
- return ProgressTracker(OperationType.ROOT.value)
15
+ return ProgressTracker.create(TaskOperation.ROOT)
12
16
 
13
17
 
14
18
  def create_cli_operation(config: ReportingConfig | None = None) -> ProgressTracker:
15
19
  """Create a CLI reporter."""
16
20
  shared_config = config or ReportingConfig()
17
- s = ProgressTracker(OperationType.ROOT.value)
18
- s.subscribe(TQDMReportingModule(shared_config))
21
+ s = ProgressTracker.create(TaskOperation.ROOT)
22
+ s.subscribe(TelemetryProgressReportingModule())
23
+ s.subscribe(LoggingReportingModule(shared_config))
19
24
  return s
20
25
 
21
26
 
22
- def create_server_operation(config: ReportingConfig | None = None) -> ProgressTracker:
27
+ def create_server_operation(
28
+ task_status_repository: TaskStatusRepository, config: ReportingConfig | None = None
29
+ ) -> ProgressTracker:
23
30
  """Create a server reporter."""
24
31
  shared_config = config or ReportingConfig()
25
- s = ProgressTracker(OperationType.ROOT.value)
32
+ s = ProgressTracker.create(TaskOperation.ROOT)
33
+ s.subscribe(TelemetryProgressReportingModule())
26
34
  s.subscribe(LoggingReportingModule(shared_config))
35
+ s.subscribe(DBProgressReportingModule(task_status_repository, shared_config))
27
36
  return s
@@ -0,0 +1,311 @@
1
+ """Create a big object that contains all the application services."""
2
+
3
+ from collections.abc import Callable
4
+
5
+ from sqlalchemy.ext.asyncio import AsyncSession
6
+
7
+ from kodit.application.factories.reporting_factory import create_server_operation
8
+ from kodit.application.services.code_search_application_service import (
9
+ CodeSearchApplicationService,
10
+ )
11
+ from kodit.application.services.commit_indexing_application_service import (
12
+ CommitIndexingApplicationService,
13
+ )
14
+ from kodit.application.services.queue_service import QueueService
15
+ from kodit.application.services.reporting import ProgressTracker
16
+ from kodit.application.services.sync_scheduler import SyncSchedulerService
17
+ from kodit.config import AppContext
18
+ from kodit.domain.protocols import (
19
+ FusionService,
20
+ GitAdapter,
21
+ GitBranchRepository,
22
+ GitCommitRepository,
23
+ GitRepoRepository,
24
+ GitTagRepository,
25
+ SnippetRepositoryV2,
26
+ TaskStatusRepository,
27
+ )
28
+ from kodit.domain.services.bm25_service import BM25DomainService, BM25Repository
29
+ from kodit.domain.services.embedding_service import EmbeddingDomainService
30
+ from kodit.domain.services.enrichment_service import EnrichmentDomainService
31
+ from kodit.domain.services.git_repository_service import (
32
+ GitRepositoryScanner,
33
+ RepositoryCloner,
34
+ )
35
+ from kodit.infrastructure.bm25.local_bm25_repository import LocalBM25Repository
36
+ from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
37
+ VectorChordBM25Repository,
38
+ )
39
+ from kodit.infrastructure.cloning.git.git_python_adaptor import GitPythonAdapter
40
+ from kodit.infrastructure.embedding.embedding_factory import (
41
+ embedding_domain_service_factory,
42
+ )
43
+ from kodit.infrastructure.enrichment.enrichment_factory import (
44
+ enrichment_domain_service_factory,
45
+ )
46
+
47
+ # InMemoryGitTagRepository removed - now handled by InMemoryGitRepoRepository
48
+ from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
49
+ from kodit.infrastructure.slicing.slicer import Slicer
50
+ from kodit.infrastructure.sqlalchemy.embedding_repository import (
51
+ SqlAlchemyEmbeddingRepository,
52
+ create_embedding_repository,
53
+ )
54
+ from kodit.infrastructure.sqlalchemy.git_branch_repository import (
55
+ create_git_branch_repository,
56
+ )
57
+ from kodit.infrastructure.sqlalchemy.git_commit_repository import (
58
+ create_git_commit_repository,
59
+ )
60
+ from kodit.infrastructure.sqlalchemy.git_repository import create_git_repo_repository
61
+ from kodit.infrastructure.sqlalchemy.git_tag_repository import (
62
+ create_git_tag_repository,
63
+ )
64
+ from kodit.infrastructure.sqlalchemy.snippet_v2_repository import (
65
+ create_snippet_v2_repository,
66
+ )
67
+ from kodit.infrastructure.sqlalchemy.task_status_repository import (
68
+ create_task_status_repository,
69
+ )
70
+ from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
71
+
72
+
73
+ class ServerFactory:
74
+ """Factory for creating server application services."""
75
+
76
+ def __init__(
77
+ self,
78
+ app_context: AppContext,
79
+ session_factory: Callable[[], AsyncSession],
80
+ ) -> None:
81
+ """Initialize the ServerFactory."""
82
+ self.app_context = app_context
83
+ self.session_factory = session_factory
84
+ self._repo_repository: GitRepoRepository | None = None
85
+ self._snippet_v2_repository: SnippetRepositoryV2 | None = None
86
+ self._git_adapter: GitAdapter | None = None
87
+ self._scanner: GitRepositoryScanner | None = None
88
+ self._cloner: RepositoryCloner | None = None
89
+ self._commit_indexing_application_service: (
90
+ CommitIndexingApplicationService | None
91
+ ) = None
92
+ self._enrichment_service: EnrichmentDomainService | None = None
93
+ self._task_status_repository: TaskStatusRepository | None = None
94
+ self._operation: ProgressTracker | None = None
95
+ self._queue_service: QueueService | None = None
96
+ self._slicer: Slicer | None = None
97
+ self._bm25_service: BM25DomainService | None = None
98
+ self._bm25_repository: BM25Repository | None = None
99
+ self._code_search_service: EmbeddingDomainService | None = None
100
+ self._text_search_service: EmbeddingDomainService | None = None
101
+ self._sync_scheduler_service: SyncSchedulerService | None = None
102
+ self._embedding_repository: SqlAlchemyEmbeddingRepository | None = None
103
+ self._fusion_service: FusionService | None = None
104
+ self._code_search_application_service: CodeSearchApplicationService | None = (
105
+ None
106
+ )
107
+ self._git_commit_repository: GitCommitRepository | None = None
108
+ self._git_branch_repository: GitBranchRepository | None = None
109
+ self._git_tag_repository: GitTagRepository | None = None
110
+
111
+ def queue_service(self) -> QueueService:
112
+ """Create a QueueService instance."""
113
+ if not self._queue_service:
114
+ self._queue_service = QueueService(session_factory=self.session_factory)
115
+ return self._queue_service
116
+
117
+ def task_status_repository(self) -> TaskStatusRepository:
118
+ """Create a TaskStatusRepository instance."""
119
+ if not self._task_status_repository:
120
+ self._task_status_repository = create_task_status_repository(
121
+ session_factory=self.session_factory
122
+ )
123
+ return self._task_status_repository
124
+
125
+ def operation(self) -> ProgressTracker:
126
+ """Create a ProgressTracker instance."""
127
+ if not self._operation:
128
+ self._operation = create_server_operation(
129
+ task_status_repository=self.task_status_repository()
130
+ )
131
+ return self._operation
132
+
133
+ def slicer(self) -> Slicer:
134
+ """Create a Slicer instance."""
135
+ if not self._slicer:
136
+ self._slicer = Slicer()
137
+ return self._slicer
138
+
139
+ def bm25_repository(self) -> BM25Repository:
140
+ """Create a BM25Repository instance."""
141
+ if not self._bm25_repository:
142
+ if self.app_context.default_search.provider == "vectorchord":
143
+ self._bm25_repository = VectorChordBM25Repository(
144
+ session_factory=self.session_factory
145
+ )
146
+ else:
147
+ self._bm25_repository = LocalBM25Repository(
148
+ data_dir=self.app_context.get_data_dir()
149
+ )
150
+ return self._bm25_repository
151
+
152
+ def bm25_service(self) -> BM25DomainService:
153
+ """Create a BM25DomainService instance."""
154
+ if not self._bm25_service:
155
+ self._bm25_service = BM25DomainService(repository=self.bm25_repository())
156
+ return self._bm25_service
157
+
158
+ def code_search_service(self) -> EmbeddingDomainService:
159
+ """Create a EmbeddingDomainService instance."""
160
+ if not self._code_search_service:
161
+ self._code_search_service = embedding_domain_service_factory(
162
+ "code", self.app_context, self.session_factory
163
+ )
164
+ return self._code_search_service
165
+
166
+ def text_search_service(self) -> EmbeddingDomainService:
167
+ """Create a EmbeddingDomainService instance."""
168
+ if not self._text_search_service:
169
+ self._text_search_service = embedding_domain_service_factory(
170
+ "text", self.app_context, self.session_factory
171
+ )
172
+ return self._text_search_service
173
+
174
+ def commit_indexing_application_service(self) -> CommitIndexingApplicationService:
175
+ """Create a CommitIndexingApplicationService instance."""
176
+ if not self._commit_indexing_application_service:
177
+ self._commit_indexing_application_service = (
178
+ CommitIndexingApplicationService(
179
+ snippet_v2_repository=self.snippet_v2_repository(),
180
+ repo_repository=self.repo_repository(),
181
+ git_commit_repository=self.git_commit_repository(),
182
+ git_branch_repository=self.git_branch_repository(),
183
+ git_tag_repository=self.git_tag_repository(),
184
+ operation=self.operation(),
185
+ scanner=self.scanner(),
186
+ cloner=self.cloner(),
187
+ snippet_repository=self.snippet_v2_repository(),
188
+ slicer=self.slicer(),
189
+ queue=self.queue_service(),
190
+ bm25_service=self.bm25_service(),
191
+ code_search_service=self.code_search_service(),
192
+ text_search_service=self.text_search_service(),
193
+ enrichment_service=self.enrichment_service(),
194
+ embedding_repository=self.embedding_repository(),
195
+ )
196
+ )
197
+
198
+ return self._commit_indexing_application_service
199
+
200
+ def unit_of_work(self) -> SqlAlchemyUnitOfWork:
201
+ """Create a SqlAlchemyUnitOfWork instance."""
202
+ return SqlAlchemyUnitOfWork(session_factory=self.session_factory)
203
+
204
+ def repo_repository(self) -> GitRepoRepository:
205
+ """Create a GitRepoRepository instance."""
206
+ if not self._repo_repository:
207
+ self._repo_repository = create_git_repo_repository(
208
+ session_factory=self.session_factory
209
+ )
210
+ return self._repo_repository
211
+
212
+ # branch_repository and commit_repository removed - now handled by repo_repository
213
+ # as GitRepo is the aggregate root
214
+
215
+ def git_adapter(self) -> GitAdapter:
216
+ """Create a GitAdapter instance."""
217
+ if not self._git_adapter:
218
+ self._git_adapter = GitPythonAdapter()
219
+ return self._git_adapter
220
+
221
+ # tag_repository removed - now handled by repo_repository
222
+
223
+ def scanner(self) -> GitRepositoryScanner:
224
+ """Create a GitRepositoryScanner instance."""
225
+ if not self._scanner:
226
+ self._scanner = GitRepositoryScanner(self.git_adapter())
227
+ return self._scanner
228
+
229
+ def cloner(self) -> RepositoryCloner:
230
+ """Create a RepositoryCloner instance."""
231
+ if not self._cloner:
232
+ self._cloner = RepositoryCloner(
233
+ self.git_adapter(), self.app_context.get_clone_dir()
234
+ )
235
+ return self._cloner
236
+
237
+ def snippet_v2_repository(self) -> SnippetRepositoryV2:
238
+ """Create a SnippetRepositoryV2 instance."""
239
+ if not self._snippet_v2_repository:
240
+ self._snippet_v2_repository = create_snippet_v2_repository(
241
+ session_factory=self.session_factory
242
+ )
243
+ return self._snippet_v2_repository
244
+
245
+ def enrichment_service(self) -> EnrichmentDomainService:
246
+ """Create a EnrichmentDomainService instance."""
247
+ if not self._enrichment_service:
248
+ self._enrichment_service = enrichment_domain_service_factory(
249
+ self.app_context
250
+ )
251
+ return self._enrichment_service
252
+
253
+ def sync_scheduler_service(self) -> SyncSchedulerService:
254
+ """Create a SyncSchedulerService instance."""
255
+ if not self._sync_scheduler_service:
256
+ self._sync_scheduler_service = SyncSchedulerService(
257
+ queue_service=self.queue_service(),
258
+ repo_repository=self.repo_repository(),
259
+ )
260
+ return self._sync_scheduler_service
261
+
262
+ def embedding_repository(self) -> SqlAlchemyEmbeddingRepository:
263
+ """Create a SqlAlchemyEmbeddingRepository instance."""
264
+ if not self._embedding_repository:
265
+ self._embedding_repository = create_embedding_repository(
266
+ session_factory=self.session_factory
267
+ )
268
+ return self._embedding_repository
269
+
270
+ def fusion_service(self) -> FusionService:
271
+ """Create a FusionService instance."""
272
+ if not self._fusion_service:
273
+ self._fusion_service = ReciprocalRankFusionService()
274
+ return self._fusion_service
275
+
276
+ def code_search_application_service(self) -> CodeSearchApplicationService:
277
+ """Create a CodeSearchApplicationService instance."""
278
+ if not self._code_search_application_service:
279
+ self._code_search_application_service = CodeSearchApplicationService(
280
+ bm25_service=self.bm25_service(),
281
+ code_search_service=self.code_search_service(),
282
+ text_search_service=self.text_search_service(),
283
+ progress_tracker=self.operation(),
284
+ snippet_repository=self.snippet_v2_repository(),
285
+ fusion_service=self.fusion_service(),
286
+ )
287
+ return self._code_search_application_service
288
+
289
+ def git_commit_repository(self) -> GitCommitRepository:
290
+ """Create a GitCommitRepository instance."""
291
+ if not self._git_commit_repository:
292
+ self._git_commit_repository = create_git_commit_repository(
293
+ session_factory=self.session_factory
294
+ )
295
+ return self._git_commit_repository
296
+
297
+ def git_branch_repository(self) -> GitBranchRepository:
298
+ """Create a GitBranchRepository instance."""
299
+ if not self._git_branch_repository:
300
+ self._git_branch_repository = create_git_branch_repository(
301
+ session_factory=self.session_factory
302
+ )
303
+ return self._git_branch_repository
304
+
305
+ def git_tag_repository(self) -> GitTagRepository:
306
+ """Create a GitTagRepository instance."""
307
+ if not self._git_tag_repository:
308
+ self._git_tag_repository = create_git_tag_repository(
309
+ session_factory=self.session_factory
310
+ )
311
+ return self._git_tag_repository
@@ -0,0 +1,144 @@
1
+ """Service for searching the indexes."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import structlog
6
+
7
+ from kodit.application.services.reporting import ProgressTracker
8
+ from kodit.domain.entities.git import SnippetV2
9
+ from kodit.domain.protocols import FusionService, SnippetRepositoryV2
10
+ from kodit.domain.services.bm25_service import BM25DomainService
11
+ from kodit.domain.services.embedding_service import EmbeddingDomainService
12
+ from kodit.domain.value_objects import (
13
+ FusionRequest,
14
+ MultiSearchRequest,
15
+ SearchRequest,
16
+ SearchResult,
17
+ )
18
+ from kodit.log import log_event
19
+
20
+
21
+ @dataclass
22
+ class MultiSearchResult:
23
+ """Enhanced search result with comprehensive snippet metadata."""
24
+
25
+ snippet: SnippetV2
26
+ original_scores: list[float]
27
+
28
+ def to_json(self) -> str:
29
+ """Return LLM-optimized JSON representation following the compact schema."""
30
+ return self.snippet.model_dump_json()
31
+
32
+ @classmethod
33
+ def to_jsonlines(cls, results: list["MultiSearchResult"]) -> str:
34
+ """Convert multiple MultiSearchResult objects to JSON Lines format.
35
+
36
+ Args:
37
+ results: List of MultiSearchResult objects
38
+ include_summary: Whether to include summary fields
39
+
40
+ Returns:
41
+ JSON Lines string (one JSON object per line)
42
+
43
+ """
44
+ return "\n".join(result.to_json() for result in results)
45
+
46
+
47
+ class CodeSearchApplicationService:
48
+ """Service for searching the indexes."""
49
+
50
+ def __init__( # noqa: PLR0913
51
+ self,
52
+ bm25_service: BM25DomainService,
53
+ code_search_service: EmbeddingDomainService,
54
+ text_search_service: EmbeddingDomainService,
55
+ progress_tracker: ProgressTracker,
56
+ snippet_repository: SnippetRepositoryV2,
57
+ fusion_service: FusionService,
58
+ ) -> None:
59
+ """Initialize the code search application service."""
60
+ self.bm25_service = bm25_service
61
+ self.code_search_service = code_search_service
62
+ self.text_search_service = text_search_service
63
+ self.progress_tracker = progress_tracker
64
+ self.snippet_repository = snippet_repository
65
+ self.fusion_service = fusion_service
66
+ self.log = structlog.get_logger(__name__)
67
+
68
+ async def search(self, request: MultiSearchRequest) -> list[MultiSearchResult]:
69
+ """Search for relevant snippets across all indexes."""
70
+ log_event("kodit.index.search")
71
+
72
+ # Apply filters if provided
73
+ filtered_snippet_ids: list[str] | None = None
74
+ # TODO(Phil): Re-implement filtering on search results
75
+
76
+ # Gather results from different search modes
77
+ fusion_list: list[list[FusionRequest]] = []
78
+
79
+ # Keyword search
80
+ if request.keywords:
81
+ result_ids: list[SearchResult] = []
82
+ for keyword in request.keywords:
83
+ results = await self.bm25_service.search(
84
+ SearchRequest(
85
+ query=keyword,
86
+ top_k=request.top_k,
87
+ snippet_ids=filtered_snippet_ids,
88
+ )
89
+ )
90
+ result_ids.extend(results)
91
+
92
+ fusion_list.append(
93
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in result_ids]
94
+ )
95
+
96
+ # Semantic code search
97
+ if request.code_query:
98
+ query_results = await self.code_search_service.search(
99
+ SearchRequest(
100
+ query=request.code_query,
101
+ top_k=request.top_k,
102
+ snippet_ids=filtered_snippet_ids,
103
+ )
104
+ )
105
+ fusion_list.append(
106
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in query_results]
107
+ )
108
+
109
+ # Semantic text search
110
+ if request.text_query:
111
+ query_results = await self.text_search_service.search(
112
+ SearchRequest(
113
+ query=request.text_query,
114
+ top_k=request.top_k,
115
+ snippet_ids=filtered_snippet_ids,
116
+ )
117
+ )
118
+ fusion_list.append(
119
+ [FusionRequest(id=x.snippet_id, score=x.score) for x in query_results]
120
+ )
121
+
122
+ if len(fusion_list) == 0:
123
+ return []
124
+
125
+ # Fusion ranking
126
+ final_results = self.fusion_service.reciprocal_rank_fusion(
127
+ rankings=fusion_list,
128
+ k=60, # This is a parameter in the RRF algorithm, not top_k
129
+ )
130
+
131
+ # Keep only top_k results
132
+ final_results = final_results[: request.top_k]
133
+
134
+ # Get snippet details
135
+ ids = [x.id for x in final_results]
136
+ search_results = await self.snippet_repository.get_by_ids(ids)
137
+ search_results.sort(key=lambda x: ids.index(x.id))
138
+ return [
139
+ MultiSearchResult(
140
+ snippet=snippet,
141
+ original_scores=[x.score for x in final_results if x.id == snippet.id],
142
+ )
143
+ for snippet in search_results
144
+ ]