kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (135) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +51 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +353 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +700 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +4 -97
  14. kodit/database.py +38 -1
  15. kodit/domain/enrichments/__init__.py +1 -0
  16. kodit/domain/enrichments/architecture/__init__.py +1 -0
  17. kodit/domain/enrichments/architecture/architecture.py +20 -0
  18. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  19. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  20. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  21. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  22. kodit/domain/enrichments/development/__init__.py +1 -0
  23. kodit/domain/enrichments/development/development.py +18 -0
  24. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  25. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  26. kodit/domain/enrichments/enricher.py +17 -0
  27. kodit/domain/enrichments/enrichment.py +39 -0
  28. kodit/domain/enrichments/request.py +12 -0
  29. kodit/domain/enrichments/response.py +11 -0
  30. kodit/domain/enrichments/usage/__init__.py +1 -0
  31. kodit/domain/enrichments/usage/api_docs.py +19 -0
  32. kodit/domain/enrichments/usage/usage.py +18 -0
  33. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  34. kodit/domain/entities/git.py +190 -0
  35. kodit/domain/factories/__init__.py +1 -0
  36. kodit/domain/factories/git_repo_factory.py +76 -0
  37. kodit/domain/protocols.py +264 -64
  38. kodit/domain/services/bm25_service.py +5 -1
  39. kodit/domain/services/embedding_service.py +3 -0
  40. kodit/domain/services/enrichment_service.py +9 -30
  41. kodit/domain/services/git_repository_service.py +429 -0
  42. kodit/domain/services/git_service.py +300 -0
  43. kodit/domain/services/physical_architecture_service.py +182 -0
  44. kodit/domain/services/task_status_query_service.py +2 -2
  45. kodit/domain/value_objects.py +87 -135
  46. kodit/infrastructure/api/client/__init__.py +0 -2
  47. kodit/infrastructure/api/v1/__init__.py +0 -4
  48. kodit/infrastructure/api/v1/dependencies.py +92 -46
  49. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  50. kodit/infrastructure/api/v1/routers/commits.py +352 -0
  51. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  52. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  53. kodit/infrastructure/api/v1/routers/search.py +31 -14
  54. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  55. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  56. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  57. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  58. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  59. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  60. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  61. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  62. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  63. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  64. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  65. kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
  66. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  67. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  68. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  69. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  70. kodit/infrastructure/enricher/__init__.py +1 -0
  71. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  72. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
  73. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  74. kodit/infrastructure/enricher/null_enricher.py +36 -0
  75. kodit/infrastructure/indexing/fusion_service.py +1 -1
  76. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  77. kodit/infrastructure/mappers/git_mapper.py +193 -0
  78. kodit/infrastructure/mappers/snippet_mapper.py +104 -0
  79. kodit/infrastructure/mappers/task_mapper.py +5 -44
  80. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  81. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  82. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  83. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  84. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  85. kodit/infrastructure/reporting/log_progress.py +8 -5
  86. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  87. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  88. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  89. kodit/infrastructure/slicing/slicer.py +87 -421
  90. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  91. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  92. kodit/infrastructure/sqlalchemy/entities.py +402 -158
  93. kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
  94. kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
  95. kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
  96. kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
  97. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
  98. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  99. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  100. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  101. kodit/mcp.py +12 -30
  102. kodit/migrations/env.py +1 -0
  103. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  104. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  105. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  106. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  107. kodit/py.typed +0 -0
  108. kodit/utils/dump_config.py +361 -0
  109. kodit/utils/dump_openapi.py +6 -4
  110. kodit/utils/path_utils.py +29 -0
  111. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
  112. kodit-0.5.1.dist-info/RECORD +168 -0
  113. kodit/application/factories/code_indexing_factory.py +0 -195
  114. kodit/application/services/auto_indexing_service.py +0 -99
  115. kodit/application/services/code_indexing_application_service.py +0 -410
  116. kodit/domain/services/index_query_service.py +0 -70
  117. kodit/domain/services/index_service.py +0 -269
  118. kodit/infrastructure/api/client/index_client.py +0 -57
  119. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  120. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  121. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  122. kodit/infrastructure/cloning/__init__.py +0 -1
  123. kodit/infrastructure/cloning/metadata.py +0 -98
  124. kodit/infrastructure/enrichment/__init__.py +0 -1
  125. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  126. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  127. kodit/infrastructure/mappers/index_mapper.py +0 -345
  128. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  129. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  130. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  131. kodit-0.4.3.dist-info/RECORD +0 -125
  132. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  133. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  134. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  135. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,10 @@
1
1
  """VectorChord vector search repository implementation."""
2
2
 
3
- from collections.abc import AsyncGenerator
4
- from typing import Any, Literal
3
+ from collections.abc import AsyncGenerator, Callable
4
+ from typing import Literal
5
5
 
6
6
  import structlog
7
- from sqlalchemy import Result, TextClause, text
7
+ from sqlalchemy import text
8
8
  from sqlalchemy.ext.asyncio import AsyncSession
9
9
 
10
10
  from kodit.domain.services.embedding_service import (
@@ -19,6 +19,7 @@ from kodit.domain.value_objects import (
19
19
  SearchResult,
20
20
  )
21
21
  from kodit.infrastructure.sqlalchemy.entities import EmbeddingType
22
+ from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
22
23
 
23
24
  # SQL Queries
24
25
  CREATE_VCHORD_EXTENSION = """
@@ -72,6 +73,10 @@ CHECK_VCHORD_EMBEDDING_EXISTS = """
72
73
  SELECT EXISTS(SELECT 1 FROM {TABLE_NAME} WHERE snippet_id = :snippet_id)
73
74
  """
74
75
 
76
+ CHECK_VCHORD_EMBEDDING_EXISTS_MULTIPLE = """
77
+ SELECT snippet_id FROM {TABLE_NAME} WHERE snippet_id = ANY(:snippet_ids)
78
+ """
79
+
75
80
  TaskName = Literal["code", "text"]
76
81
 
77
82
 
@@ -80,8 +85,8 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
80
85
 
81
86
  def __init__(
82
87
  self,
88
+ session_factory: Callable[[], AsyncSession],
83
89
  task_name: TaskName,
84
- session: AsyncSession,
85
90
  embedding_provider: EmbeddingProvider,
86
91
  ) -> None:
87
92
  """Initialize the VectorChord vector search repository.
@@ -93,7 +98,7 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
93
98
 
94
99
  """
95
100
  self.embedding_provider = embedding_provider
96
- self._session = session
101
+ self.session_factory = session_factory
97
102
  self._initialized = False
98
103
  self.table_name = f"vectorchord_{task_name}_embeddings"
99
104
  self.index_name = f"{self.table_name}_idx"
@@ -111,12 +116,12 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
111
116
 
112
117
  async def _create_extensions(self) -> None:
113
118
  """Create the necessary extensions."""
114
- await self._session.execute(text(CREATE_VCHORD_EXTENSION))
115
- await self._commit()
119
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
120
+ await session.execute(text(CREATE_VCHORD_EXTENSION))
116
121
 
117
122
  async def _create_tables(self) -> None:
118
123
  """Create the necessary tables."""
119
- req = EmbeddingRequest(snippet_id=0, text="dimension")
124
+ req = EmbeddingRequest(snippet_id="0", text="dimension")
120
125
  vector_dim: list[float] | None = None
121
126
  async for batch in self.embedding_provider.embed([req]):
122
127
  if batch:
@@ -125,79 +130,85 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
125
130
  if vector_dim is None:
126
131
  msg = "Failed to obtain embedding dimension from provider"
127
132
  raise RuntimeError(msg)
128
- await self._session.execute(
129
- text(
130
- f"""CREATE TABLE IF NOT EXISTS {self.table_name} (
131
- id SERIAL PRIMARY KEY,
132
- snippet_id INT NOT NULL UNIQUE,
133
- embedding VECTOR({len(vector_dim)}) NOT NULL
134
- );"""
133
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
134
+ await session.execute(
135
+ text(
136
+ f"""CREATE TABLE IF NOT EXISTS {self.table_name} (
137
+ id SERIAL PRIMARY KEY,
138
+ snippet_id VARCHAR(255) NOT NULL UNIQUE,
139
+ embedding VECTOR({len(vector_dim)}) NOT NULL
140
+ );"""
141
+ )
135
142
  )
136
- )
137
- await self._session.execute(
138
- text(
139
- CREATE_VCHORD_INDEX.format(
140
- TABLE_NAME=self.table_name, INDEX_NAME=self.index_name
143
+ await session.execute(
144
+ text(
145
+ CREATE_VCHORD_INDEX.format(
146
+ TABLE_NAME=self.table_name, INDEX_NAME=self.index_name
147
+ )
141
148
  )
142
149
  )
143
- )
144
- result = await self._session.execute(
145
- text(CHECK_VCHORD_EMBEDDING_DIMENSION.format(TABLE_NAME=self.table_name))
146
- )
147
- vector_dim_from_db = result.scalar_one()
148
- if vector_dim_from_db != len(vector_dim):
149
- msg = (
150
- f"Embedding vector dimension does not match database, "
151
- f"please delete your index: {vector_dim_from_db} != {len(vector_dim)}"
150
+ result = await session.execute(
151
+ text(
152
+ CHECK_VCHORD_EMBEDDING_DIMENSION.format(TABLE_NAME=self.table_name)
153
+ )
152
154
  )
153
- raise ValueError(msg)
154
- await self._commit()
155
-
156
- async def _execute(
157
- self, query: TextClause, param_list: list[Any] | dict[str, Any] | None = None
158
- ) -> Result:
159
- """Execute a query."""
160
- if not self._initialized:
161
- await self._initialize()
162
- return await self._session.execute(query, param_list)
163
-
164
- async def _commit(self) -> None:
165
- """Commit the session."""
166
- await self._session.commit()
155
+ vector_dim_from_db = result.scalar_one()
156
+ if vector_dim_from_db != len(vector_dim):
157
+ msg = (
158
+ f"Embedding vector dimension does not match database, please "
159
+ f"delete your index: {vector_dim_from_db} != {len(vector_dim)}"
160
+ )
161
+ raise ValueError(msg)
167
162
 
168
163
  async def index_documents(
169
164
  self, request: IndexRequest
170
165
  ) -> AsyncGenerator[list[IndexResult], None]:
171
166
  """Index documents for vector search."""
167
+ if not self._initialized:
168
+ await self._initialize()
169
+
172
170
  if not request.documents:
173
171
  yield []
174
172
 
173
+ # Search for existing embeddings
174
+ existing_ids = await self._get_existing_ids(
175
+ [doc.snippet_id for doc in request.documents]
176
+ )
177
+ new_documents = [
178
+ doc for doc in request.documents if doc.snippet_id not in existing_ids
179
+ ]
180
+ if not new_documents:
181
+ self.log.info("No new documents to index")
182
+ return
183
+
175
184
  # Convert to embedding requests
176
- requests = [
185
+ embedding_requests = [
177
186
  EmbeddingRequest(snippet_id=doc.snippet_id, text=doc.text)
178
- for doc in request.documents
187
+ for doc in new_documents
179
188
  ]
180
189
 
181
- async for batch in self.embedding_provider.embed(requests):
182
- await self._execute(
183
- text(INSERT_QUERY.format(TABLE_NAME=self.table_name)),
184
- [
185
- {
186
- "snippet_id": result.snippet_id,
187
- "embedding": str(result.embedding),
188
- }
189
- for result in batch
190
- ],
191
- )
192
- await self._commit()
193
- yield [IndexResult(snippet_id=result.snippet_id) for result in batch]
190
+ async for batch in self.embedding_provider.embed(embedding_requests):
191
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
192
+ await session.execute(
193
+ text(INSERT_QUERY.format(TABLE_NAME=self.table_name)),
194
+ [
195
+ {
196
+ "snippet_id": result.snippet_id,
197
+ "embedding": str(result.embedding),
198
+ }
199
+ for result in batch
200
+ ],
201
+ )
202
+ yield [IndexResult(snippet_id=result.snippet_id) for result in batch]
194
203
 
195
204
  async def search(self, request: SearchRequest) -> list[SearchResult]:
196
205
  """Search documents using vector similarity."""
206
+ if not self._initialized:
207
+ await self._initialize()
197
208
  if not request.query or not request.query.strip():
198
209
  return []
199
210
 
200
- req = EmbeddingRequest(snippet_id=0, text=request.query)
211
+ req = EmbeddingRequest(snippet_id="0", text=request.query)
201
212
  embedding_vec: list[float] | None = None
202
213
  async for batch in self.embedding_provider.embed([req]):
203
214
  if batch:
@@ -207,39 +218,55 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
207
218
  if not embedding_vec:
208
219
  return []
209
220
 
210
- # Use filtered query if snippet_ids are provided
211
- if request.snippet_ids is not None:
212
- result = await self._execute(
213
- text(SEARCH_QUERY_WITH_FILTER.format(TABLE_NAME=self.table_name)),
214
- {
215
- "query": str(embedding_vec),
216
- "top_k": request.top_k,
217
- "snippet_ids": request.snippet_ids,
218
- },
219
- )
220
- else:
221
- result = await self._execute(
222
- text(SEARCH_QUERY.format(TABLE_NAME=self.table_name)),
223
- {"query": str(embedding_vec), "top_k": request.top_k},
224
- )
221
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
222
+ # Use filtered query if snippet_ids are provided
223
+ if request.snippet_ids is not None:
224
+ result = await session.execute(
225
+ text(SEARCH_QUERY_WITH_FILTER.format(TABLE_NAME=self.table_name)),
226
+ {
227
+ "query": str(embedding_vec),
228
+ "top_k": request.top_k,
229
+ "snippet_ids": request.snippet_ids,
230
+ },
231
+ )
232
+ else:
233
+ result = await session.execute(
234
+ text(SEARCH_QUERY.format(TABLE_NAME=self.table_name)),
235
+ {"query": str(embedding_vec), "top_k": request.top_k},
236
+ )
225
237
 
226
- rows = result.mappings().all()
238
+ rows = result.mappings().all()
227
239
 
228
- return [
229
- SearchResult(snippet_id=row["snippet_id"], score=row["score"])
230
- for row in rows
231
- ]
240
+ return [
241
+ SearchResult(snippet_id=row["snippet_id"], score=row["score"])
242
+ for row in rows
243
+ ]
232
244
 
233
245
  async def has_embedding(
234
246
  self, snippet_id: int, embedding_type: EmbeddingType
235
247
  ) -> bool:
236
248
  """Check if a snippet has an embedding."""
249
+ if not self._initialized:
250
+ await self._initialize()
237
251
  # For VectorChord, we check if the snippet exists in the table
238
252
  # Note: embedding_type is ignored since VectorChord uses separate
239
253
  # tables per task
240
254
  # ruff: noqa: ARG002
241
- result = await self._execute(
242
- text(CHECK_VCHORD_EMBEDDING_EXISTS.format(TABLE_NAME=self.table_name)),
243
- {"snippet_id": snippet_id},
244
- )
245
- return bool(result.scalar())
255
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
256
+ result = await session.execute(
257
+ text(CHECK_VCHORD_EMBEDDING_EXISTS.format(TABLE_NAME=self.table_name)),
258
+ {"snippet_id": snippet_id},
259
+ )
260
+ return bool(result.scalar())
261
+
262
+ async def _get_existing_ids(self, snippet_ids: list[str]) -> set[str]:
263
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
264
+ result = await session.execute(
265
+ text(
266
+ CHECK_VCHORD_EMBEDDING_EXISTS_MULTIPLE.format(
267
+ TABLE_NAME=self.table_name
268
+ )
269
+ ),
270
+ {"snippet_ids": snippet_ids},
271
+ )
272
+ return {row[0] for row in result.fetchall()}
@@ -0,0 +1 @@
1
+ """Generic enricher infrastructure implementations."""
@@ -0,0 +1,53 @@
1
+ """Enricher factory for creating generic enricher domain services."""
2
+
3
+ from kodit.config import AppContext, Endpoint
4
+ from kodit.domain.enrichments.enricher import Enricher
5
+ from kodit.infrastructure.enricher.litellm_enricher import LiteLLMEnricher
6
+ from kodit.infrastructure.enricher.local_enricher import LocalEnricher
7
+ from kodit.infrastructure.enricher.null_enricher import NullEnricher
8
+ from kodit.log import log_event
9
+
10
+
11
+ def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
12
+ """Get the endpoint configuration for the enricher service.
13
+
14
+ Args:
15
+ app_context: The application context.
16
+
17
+ Returns:
18
+ The endpoint configuration or None.
19
+
20
+ """
21
+ return app_context.enrichment_endpoint or None
22
+
23
+
24
+ def enricher_domain_service_factory(
25
+ app_context: AppContext,
26
+ *,
27
+ use_null_enricher: bool = False,
28
+ ) -> Enricher:
29
+ """Create an enricher domain service.
30
+
31
+ Args:
32
+ app_context: The application context.
33
+ use_null_enricher: Whether to use the null enricher instead.
34
+
35
+ Returns:
36
+ An enricher domain service instance.
37
+
38
+ """
39
+ enricher: Enricher
40
+
41
+ if use_null_enricher:
42
+ log_event("kodit.enricher", {"provider": "null"})
43
+ enricher = NullEnricher()
44
+ else:
45
+ endpoint = _get_endpoint_configuration(app_context)
46
+ if endpoint:
47
+ log_event("kodit.enricher", {"provider": "litellm"})
48
+ enricher = LiteLLMEnricher(endpoint=endpoint)
49
+ else:
50
+ log_event("kodit.enricher", {"provider": "local"})
51
+ enricher = LocalEnricher()
52
+
53
+ return enricher
@@ -1,4 +1,4 @@
1
- """LiteLLM enrichment provider implementation."""
1
+ """LiteLLM enricher implementation."""
2
2
 
3
3
  import asyncio
4
4
  from collections.abc import AsyncGenerator
@@ -10,27 +10,22 @@ import structlog
10
10
  from litellm import acompletion
11
11
 
12
12
  from kodit.config import Endpoint
13
- from kodit.domain.services.enrichment_service import EnrichmentProvider
14
- from kodit.domain.value_objects import EnrichmentRequest, EnrichmentResponse
15
- from kodit.infrastructure.enrichment.utils import clean_thinking_tags
13
+ from kodit.domain.enrichments.enricher import Enricher
14
+ from kodit.domain.enrichments.request import EnrichmentRequest
15
+ from kodit.domain.enrichments.response import EnrichmentResponse
16
+ from kodit.infrastructure.enricher.utils import clean_thinking_tags
16
17
 
17
- ENRICHMENT_SYSTEM_PROMPT = """
18
- You are a professional software developer. You will be given a snippet of code.
19
- Please provide a concise explanation of the code.
20
- """
21
-
22
- # Default tuned conservatively for broad provider compatibility
23
18
  DEFAULT_NUM_PARALLEL_TASKS = 20
24
19
 
25
20
 
26
- class LiteLLMEnrichmentProvider(EnrichmentProvider):
27
- """LiteLLM enrichment provider that supports 100+ providers."""
21
+ class LiteLLMEnricher(Enricher):
22
+ """LiteLLM enricher that supports 100+ providers."""
28
23
 
29
24
  def __init__(
30
25
  self,
31
26
  endpoint: Endpoint,
32
27
  ) -> None:
33
- """Initialize the LiteLLM enrichment provider.
28
+ """Initialize the LiteLLM enricher.
34
29
 
35
30
  Args:
36
31
  endpoint: The endpoint configuration containing all settings.
@@ -44,23 +39,20 @@ class LiteLLMEnrichmentProvider(EnrichmentProvider):
44
39
  self.num_parallel_tasks = (
45
40
  endpoint.num_parallel_tasks or DEFAULT_NUM_PARALLEL_TASKS
46
41
  )
47
- self.timeout = endpoint.timeout or 30.0
42
+ self.timeout = endpoint.timeout
48
43
  self.extra_params = endpoint.extra_params or {}
49
44
 
50
- # Configure LiteLLM with custom HTTPX client for Unix socket support if needed
51
45
  self._setup_litellm_client()
52
46
 
53
47
  def _setup_litellm_client(self) -> None:
54
48
  """Set up LiteLLM with custom HTTPX client for Unix socket support."""
55
49
  if self.socket_path:
56
- # Create HTTPX client with Unix socket transport
57
50
  transport = httpx.AsyncHTTPTransport(uds=self.socket_path)
58
51
  unix_client = httpx.AsyncClient(
59
52
  transport=transport,
60
- base_url="http://localhost", # Base URL for Unix socket
53
+ base_url="http://localhost",
61
54
  timeout=self.timeout,
62
55
  )
63
- # Set as LiteLLM's async client session
64
56
  litellm.aclient_session = unix_client
65
57
 
66
58
  async def _call_chat_completion(self, messages: list[dict[str, str]]) -> Any:
@@ -79,20 +71,17 @@ class LiteLLMEnrichmentProvider(EnrichmentProvider):
79
71
  "timeout": self.timeout,
80
72
  }
81
73
 
82
- # Add API key if provided
83
74
  if self.api_key:
84
75
  kwargs["api_key"] = self.api_key
85
76
 
86
- # Add base_url if provided
87
77
  if self.base_url:
88
78
  kwargs["api_base"] = self.base_url
89
79
 
90
- # Add extra parameters
91
80
  kwargs.update(self.extra_params)
92
81
 
93
82
  try:
94
- # Use litellm's async completion function
95
83
  response = await acompletion(**kwargs)
84
+ self.log.debug("enrichment request", request=kwargs, response=response)
96
85
  return (
97
86
  response.model_dump() if hasattr(response, "model_dump") else response
98
87
  )
@@ -108,62 +97,53 @@ class LiteLLMEnrichmentProvider(EnrichmentProvider):
108
97
  """Enrich a list of requests using LiteLLM.
109
98
 
110
99
  Args:
111
- requests: List of enrichment requests.
100
+ requests: List of generic enrichment requests.
112
101
 
113
102
  Yields:
114
- Enrichment responses as they are processed.
103
+ Generic enrichment responses as they are processed.
115
104
 
116
105
  """
117
106
  if not requests:
118
107
  self.log.warning("No requests for enrichment")
119
108
  return
120
109
 
121
- # Process requests in parallel with a semaphore to limit concurrent requests
122
110
  sem = asyncio.Semaphore(self.num_parallel_tasks)
123
111
 
124
- async def process_request(request: EnrichmentRequest) -> EnrichmentResponse:
112
+ async def process_request(
113
+ request: EnrichmentRequest,
114
+ ) -> EnrichmentResponse:
125
115
  async with sem:
126
116
  if not request.text:
127
117
  return EnrichmentResponse(
128
- snippet_id=request.snippet_id,
129
- text="",
130
- )
131
- try:
132
- messages = [
133
- {
134
- "role": "system",
135
- "content": ENRICHMENT_SYSTEM_PROMPT,
136
- },
137
- {"role": "user", "content": request.text},
138
- ]
139
- response = await self._call_chat_completion(messages)
140
- content = (
141
- response.get("choices", [{}])[0]
142
- .get("message", {})
143
- .get("content", "")
144
- )
145
- # Remove thinking tags from the response
146
- cleaned_content = clean_thinking_tags(content or "")
147
- return EnrichmentResponse(
148
- snippet_id=request.snippet_id,
149
- text=cleaned_content,
150
- )
151
- except Exception as e:
152
- self.log.exception("Error enriching request", error=str(e))
153
- return EnrichmentResponse(
154
- snippet_id=request.snippet_id,
118
+ id=request.id,
155
119
  text="",
156
120
  )
121
+ messages = [
122
+ {
123
+ "role": "system",
124
+ "content": request.system_prompt,
125
+ },
126
+ {"role": "user", "content": request.text},
127
+ ]
128
+ response = await self._call_chat_completion(messages)
129
+ content = (
130
+ response.get("choices", [{}])[0]
131
+ .get("message", {})
132
+ .get("content", "")
133
+ )
134
+ cleaned_content = clean_thinking_tags(content or "")
135
+ return EnrichmentResponse(
136
+ id=request.id,
137
+ text=cleaned_content,
138
+ )
157
139
 
158
- # Create tasks for all requests
159
140
  tasks = [process_request(request) for request in requests]
160
141
 
161
- # Process all requests and yield results as they complete
162
142
  for task in asyncio.as_completed(tasks):
163
143
  yield await task
164
144
 
165
145
  async def close(self) -> None:
166
- """Close the provider and cleanup HTTPX client if using Unix sockets."""
146
+ """Close the enricher and cleanup HTTPX client if using Unix sockets."""
167
147
  if (
168
148
  self.socket_path
169
149
  and hasattr(litellm, "aclient_session")
@@ -1,4 +1,4 @@
1
- """Local enrichment provider implementation."""
1
+ """Local enricher implementation."""
2
2
 
3
3
  import asyncio
4
4
  import os
@@ -8,28 +8,24 @@ from typing import Any
8
8
  import structlog
9
9
  import tiktoken
10
10
 
11
- from kodit.domain.services.enrichment_service import EnrichmentProvider
12
- from kodit.domain.value_objects import EnrichmentRequest, EnrichmentResponse
13
- from kodit.infrastructure.enrichment.utils import clean_thinking_tags
11
+ from kodit.domain.enrichments.enricher import Enricher
12
+ from kodit.domain.enrichments.request import EnrichmentRequest
13
+ from kodit.domain.enrichments.response import EnrichmentResponse
14
+ from kodit.infrastructure.enricher.utils import clean_thinking_tags
14
15
 
15
- ENRICHMENT_SYSTEM_PROMPT = """
16
- You are a professional software developer. You will be given a snippet of code.
17
- Please provide a concise explanation of the code.
18
- """
16
+ DEFAULT_ENRICHER_MODEL = "Qwen/Qwen3-0.6B"
17
+ DEFAULT_CONTEXT_WINDOW_SIZE = 2048
19
18
 
20
- DEFAULT_ENRICHMENT_MODEL = "Qwen/Qwen3-0.6B"
21
- DEFAULT_CONTEXT_WINDOW_SIZE = 2048 # Small so it works even on low-powered devices
22
19
 
23
-
24
- class LocalEnrichmentProvider(EnrichmentProvider):
25
- """Local enrichment provider implementation."""
20
+ class LocalEnricher(Enricher):
21
+ """Local enricher implementation using local models."""
26
22
 
27
23
  def __init__(
28
24
  self,
29
- model_name: str = DEFAULT_ENRICHMENT_MODEL,
25
+ model_name: str = DEFAULT_ENRICHER_MODEL,
30
26
  context_window: int = DEFAULT_CONTEXT_WINDOW_SIZE,
31
27
  ) -> None:
32
- """Initialize the local enrichment provider.
28
+ """Initialize the local enricher.
33
29
 
34
30
  Args:
35
31
  model_name: The model name to use for enrichment.
@@ -49,13 +45,13 @@ class LocalEnrichmentProvider(EnrichmentProvider):
49
45
  """Enrich a list of requests using local model.
50
46
 
51
47
  Args:
52
- requests: List of enrichment requests.
48
+ requests: List of generic enrichment requests.
53
49
 
54
50
  Yields:
55
- Enrichment responses as they are processed.
51
+ Generic enrichment responses as they are processed.
56
52
 
57
53
  """
58
- # Remove empty snippets
54
+ # Remove empty requests
59
55
  requests = [req for req in requests if req.text]
60
56
 
61
57
  if not requests:
@@ -73,7 +69,7 @@ class LocalEnrichmentProvider(EnrichmentProvider):
73
69
  self.model_name, padding_side="left"
74
70
  )
75
71
  if self.model is None:
76
- os.environ["TOKENIZERS_PARALLELISM"] = "false" # Avoid warnings
72
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
77
73
  self.model = AutoModelForCausalLM.from_pretrained(
78
74
  self.model_name,
79
75
  torch_dtype="auto",
@@ -83,13 +79,13 @@ class LocalEnrichmentProvider(EnrichmentProvider):
83
79
 
84
80
  await asyncio.to_thread(_init_model)
85
81
 
86
- # Prepare prompts
82
+ # Prepare prompts with custom system prompts
87
83
  prompts = [
88
84
  {
89
- "id": req.snippet_id,
85
+ "id": req.id,
90
86
  "text": self.tokenizer.apply_chat_template( # type: ignore[attr-defined]
91
87
  [
92
- {"role": "system", "content": ENRICHMENT_SYSTEM_PROMPT},
88
+ {"role": "system", "content": req.system_prompt},
93
89
  {"role": "user", "content": req.text},
94
90
  ],
95
91
  tokenize=False,
@@ -121,9 +117,8 @@ class LocalEnrichmentProvider(EnrichmentProvider):
121
117
  )
122
118
 
123
119
  content = await asyncio.to_thread(process_prompt, prompt)
124
- # Remove thinking tags from the response
125
120
  cleaned_content = clean_thinking_tags(content)
126
121
  yield EnrichmentResponse(
127
- snippet_id=prompt["id"],
122
+ id=prompt["id"],
128
123
  text=cleaned_content,
129
124
  )
@@ -0,0 +1,36 @@
1
+ """Null enricher implementation."""
2
+
3
+ from collections.abc import AsyncGenerator
4
+
5
+ import structlog
6
+
7
+ from kodit.domain.enrichments.enricher import Enricher
8
+ from kodit.domain.enrichments.request import EnrichmentRequest
9
+ from kodit.domain.enrichments.response import EnrichmentResponse
10
+
11
+
12
+ class NullEnricher(Enricher):
13
+ """Null enricher that returns empty responses."""
14
+
15
+ def __init__(self) -> None:
16
+ """Initialize the null enricher."""
17
+ self.log = structlog.get_logger(__name__)
18
+
19
+ async def enrich(
20
+ self, requests: list[EnrichmentRequest]
21
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
22
+ """Return empty responses for all requests.
23
+
24
+ Args:
25
+ requests: List of generic enrichment requests.
26
+
27
+ Yields:
28
+ Empty generic enrichment responses.
29
+
30
+ """
31
+ self.log.info("NullEnricher: returning empty responses", count=len(requests))
32
+ for request in requests:
33
+ yield EnrichmentResponse(
34
+ id=request.id,
35
+ text="",
36
+ )
@@ -2,7 +2,7 @@
2
2
 
3
3
  from collections import defaultdict
4
4
 
5
- from kodit.domain.services.index_query_service import FusionService
5
+ from kodit.domain.protocols import FusionService
6
6
  from kodit.domain.value_objects import FusionRequest, FusionResult
7
7
 
8
8