kodit 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/__init__.py +1 -0
- kodit/application/commands/__init__.py +1 -0
- kodit/application/commands/snippet_commands.py +22 -0
- kodit/application/services/__init__.py +1 -0
- kodit/application/services/indexing_application_service.py +363 -0
- kodit/application/services/snippet_application_service.py +143 -0
- kodit/cli.py +105 -82
- kodit/database.py +0 -22
- kodit/domain/__init__.py +1 -0
- kodit/{source/source_models.py → domain/entities.py} +88 -19
- kodit/domain/enums.py +9 -0
- kodit/domain/interfaces.py +27 -0
- kodit/domain/repositories.py +95 -0
- kodit/domain/services/__init__.py +1 -0
- kodit/domain/services/bm25_service.py +124 -0
- kodit/domain/services/embedding_service.py +155 -0
- kodit/domain/services/enrichment_service.py +48 -0
- kodit/domain/services/ignore_service.py +45 -0
- kodit/domain/services/indexing_service.py +203 -0
- kodit/domain/services/snippet_extraction_service.py +89 -0
- kodit/domain/services/source_service.py +83 -0
- kodit/domain/value_objects.py +215 -0
- kodit/infrastructure/__init__.py +1 -0
- kodit/infrastructure/bm25/__init__.py +1 -0
- kodit/infrastructure/bm25/bm25_factory.py +28 -0
- kodit/{bm25/local_bm25.py → infrastructure/bm25/local_bm25_repository.py} +33 -22
- kodit/{bm25/vectorchord_bm25.py → infrastructure/bm25/vectorchord_bm25_repository.py} +40 -35
- kodit/infrastructure/cloning/__init__.py +1 -0
- kodit/infrastructure/cloning/folder/__init__.py +1 -0
- kodit/infrastructure/cloning/folder/factory.py +119 -0
- kodit/infrastructure/cloning/folder/working_copy.py +38 -0
- kodit/infrastructure/cloning/git/__init__.py +1 -0
- kodit/infrastructure/cloning/git/factory.py +133 -0
- kodit/infrastructure/cloning/git/working_copy.py +32 -0
- kodit/infrastructure/cloning/metadata.py +127 -0
- kodit/infrastructure/embedding/__init__.py +1 -0
- kodit/infrastructure/embedding/embedding_factory.py +87 -0
- kodit/infrastructure/embedding/embedding_providers/__init__.py +1 -0
- kodit/infrastructure/embedding/embedding_providers/batching.py +93 -0
- kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +79 -0
- kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +129 -0
- kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +113 -0
- kodit/infrastructure/embedding/local_vector_search_repository.py +114 -0
- kodit/{embedding/vectorchord_vector_search_service.py → infrastructure/embedding/vectorchord_vector_search_repository.py} +65 -46
- kodit/infrastructure/enrichment/__init__.py +1 -0
- kodit/{enrichment → infrastructure/enrichment}/enrichment_factory.py +28 -12
- kodit/infrastructure/enrichment/legacy_enrichment_models.py +42 -0
- kodit/{enrichment/enrichment_provider → infrastructure/enrichment}/local_enrichment_provider.py +38 -26
- kodit/infrastructure/enrichment/null_enrichment_provider.py +25 -0
- kodit/infrastructure/enrichment/openai_enrichment_provider.py +89 -0
- kodit/infrastructure/git/__init__.py +1 -0
- kodit/{source/git.py → infrastructure/git/git_utils.py} +10 -2
- kodit/infrastructure/ignore/__init__.py +1 -0
- kodit/{source/ignore.py → infrastructure/ignore/ignore_pattern_provider.py} +23 -6
- kodit/infrastructure/indexing/__init__.py +1 -0
- kodit/infrastructure/indexing/fusion_service.py +55 -0
- kodit/infrastructure/indexing/index_repository.py +296 -0
- kodit/infrastructure/indexing/indexing_factory.py +111 -0
- kodit/infrastructure/snippet_extraction/__init__.py +1 -0
- kodit/infrastructure/snippet_extraction/language_detection_service.py +39 -0
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +95 -0
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +45 -0
- kodit/{snippets/method_snippets.py → infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py} +123 -61
- kodit/infrastructure/sqlalchemy/__init__.py +1 -0
- kodit/{embedding → infrastructure/sqlalchemy}/embedding_repository.py +40 -24
- kodit/infrastructure/sqlalchemy/file_repository.py +73 -0
- kodit/infrastructure/sqlalchemy/repository.py +121 -0
- kodit/infrastructure/sqlalchemy/snippet_repository.py +75 -0
- kodit/infrastructure/ui/__init__.py +1 -0
- kodit/infrastructure/ui/progress.py +127 -0
- kodit/{util → infrastructure/ui}/spinner.py +19 -4
- kodit/mcp.py +50 -28
- kodit/migrations/env.py +1 -4
- kodit/reporting.py +78 -0
- {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/METADATA +1 -1
- kodit-0.2.5.dist-info/RECORD +99 -0
- kodit/bm25/__init__.py +0 -1
- kodit/bm25/keyword_search_factory.py +0 -17
- kodit/bm25/keyword_search_service.py +0 -34
- kodit/embedding/__init__.py +0 -1
- kodit/embedding/embedding_factory.py +0 -69
- kodit/embedding/embedding_models.py +0 -28
- kodit/embedding/embedding_provider/__init__.py +0 -1
- kodit/embedding/embedding_provider/embedding_provider.py +0 -92
- kodit/embedding/embedding_provider/hash_embedding_provider.py +0 -86
- kodit/embedding/embedding_provider/local_embedding_provider.py +0 -96
- kodit/embedding/embedding_provider/openai_embedding_provider.py +0 -73
- kodit/embedding/local_vector_search_service.py +0 -87
- kodit/embedding/vector_search_service.py +0 -55
- kodit/enrichment/__init__.py +0 -1
- kodit/enrichment/enrichment_provider/__init__.py +0 -1
- kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -36
- kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +0 -79
- kodit/enrichment/enrichment_service.py +0 -45
- kodit/indexing/__init__.py +0 -1
- kodit/indexing/fusion.py +0 -67
- kodit/indexing/indexing_models.py +0 -43
- kodit/indexing/indexing_repository.py +0 -216
- kodit/indexing/indexing_service.py +0 -344
- kodit/snippets/__init__.py +0 -1
- kodit/snippets/languages/__init__.py +0 -53
- kodit/snippets/snippets.py +0 -50
- kodit/source/__init__.py +0 -1
- kodit/source/source_factories.py +0 -356
- kodit/source/source_repository.py +0 -169
- kodit/source/source_service.py +0 -150
- kodit/util/__init__.py +0 -1
- kodit-0.2.4.dist-info/RECORD +0 -71
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/csharp.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/go.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/javascript.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/python.scm +0 -0
- /kodit/{snippets → infrastructure/snippet_extraction}/languages/typescript.scm +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/WHEEL +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/entry_points.txt +0 -0
- {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""Infrastructure implementation of the index repository."""
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
from typing import TypeVar
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import delete, func, select
|
|
7
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
|
+
|
|
9
|
+
from kodit.domain.entities import Embedding, File, Index, Snippet, Source
|
|
10
|
+
from kodit.domain.services.indexing_service import IndexRepository
|
|
11
|
+
from kodit.domain.value_objects import IndexView
|
|
12
|
+
|
|
13
|
+
T = TypeVar("T")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SQLAlchemyIndexRepository(IndexRepository):
|
|
17
|
+
"""SQLAlchemy implementation of the index repository."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, session: AsyncSession) -> None:
|
|
20
|
+
"""Initialize the index repository.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
session: The SQLAlchemy async session to use for database operations.
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
self.session = session
|
|
27
|
+
|
|
28
|
+
async def create_index(self, source_id: int) -> IndexView:
|
|
29
|
+
"""Create a new index for a source.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
source_id: The ID of the source to create an index for.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
The created index view.
|
|
36
|
+
|
|
37
|
+
"""
|
|
38
|
+
# Check if index already exists
|
|
39
|
+
existing_index = await self.get_index_by_source_id(source_id)
|
|
40
|
+
if existing_index:
|
|
41
|
+
return existing_index
|
|
42
|
+
|
|
43
|
+
index = Index(source_id=source_id)
|
|
44
|
+
self.session.add(index)
|
|
45
|
+
await self.session.commit()
|
|
46
|
+
|
|
47
|
+
# Get source for the view
|
|
48
|
+
source_query = select(Source).where(Source.id == source_id)
|
|
49
|
+
source_result = await self.session.execute(source_query)
|
|
50
|
+
source = source_result.scalar_one()
|
|
51
|
+
|
|
52
|
+
return IndexView(
|
|
53
|
+
id=index.id,
|
|
54
|
+
created_at=index.created_at,
|
|
55
|
+
updated_at=index.updated_at,
|
|
56
|
+
source=source.uri,
|
|
57
|
+
num_snippets=0,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
async def _get_index_view(self, index: Index, source: Source) -> IndexView:
|
|
61
|
+
"""Create an IndexView from Index and Source entities.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
index: The index entity
|
|
65
|
+
source: The source entity
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
The index view
|
|
69
|
+
|
|
70
|
+
"""
|
|
71
|
+
num_snippets = await self.num_snippets_for_index(index.id)
|
|
72
|
+
return IndexView(
|
|
73
|
+
id=index.id,
|
|
74
|
+
created_at=index.created_at,
|
|
75
|
+
updated_at=index.updated_at,
|
|
76
|
+
source=source.uri,
|
|
77
|
+
num_snippets=num_snippets,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
async def get_index_by_id(self, index_id: int) -> IndexView | None:
|
|
81
|
+
"""Get an index by its ID.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
index_id: The ID of the index to retrieve.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
The index view if found, None otherwise.
|
|
88
|
+
|
|
89
|
+
"""
|
|
90
|
+
query = (
|
|
91
|
+
select(Index, Source)
|
|
92
|
+
.join(Source, Index.source_id == Source.id)
|
|
93
|
+
.where(Index.id == index_id)
|
|
94
|
+
)
|
|
95
|
+
result = await self.session.execute(query)
|
|
96
|
+
row = result.first()
|
|
97
|
+
|
|
98
|
+
if not row:
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
index, source = row
|
|
102
|
+
return await self._get_index_view(index, source)
|
|
103
|
+
|
|
104
|
+
async def get_index_by_source_id(self, source_id: int) -> IndexView | None:
|
|
105
|
+
"""Get an index by its source ID.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
source_id: The ID of the source to retrieve an index for.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
The index view if found, None otherwise.
|
|
112
|
+
|
|
113
|
+
"""
|
|
114
|
+
query = (
|
|
115
|
+
select(Index, Source)
|
|
116
|
+
.join(Source, Index.source_id == Source.id)
|
|
117
|
+
.where(Index.source_id == source_id)
|
|
118
|
+
)
|
|
119
|
+
result = await self.session.execute(query)
|
|
120
|
+
row = result.first()
|
|
121
|
+
|
|
122
|
+
if not row:
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
index, source = row
|
|
126
|
+
return await self._get_index_view(index, source)
|
|
127
|
+
|
|
128
|
+
async def list_indexes(self) -> list[IndexView]:
|
|
129
|
+
"""List all indexes.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
A list of index views.
|
|
133
|
+
|
|
134
|
+
"""
|
|
135
|
+
query = select(Index, Source).join(
|
|
136
|
+
Source, Index.source_id == Source.id, full=True
|
|
137
|
+
)
|
|
138
|
+
result = await self.session.execute(query)
|
|
139
|
+
rows = result.tuples()
|
|
140
|
+
|
|
141
|
+
indexes = []
|
|
142
|
+
for index, source in rows:
|
|
143
|
+
index_view = await self._get_index_view(index, source)
|
|
144
|
+
indexes.append(index_view)
|
|
145
|
+
|
|
146
|
+
return indexes
|
|
147
|
+
|
|
148
|
+
async def update_index_timestamp(self, index_id: int) -> None:
|
|
149
|
+
"""Update the timestamp of an index.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
index_id: The ID of the index to update.
|
|
153
|
+
|
|
154
|
+
"""
|
|
155
|
+
query = select(Index).where(Index.id == index_id)
|
|
156
|
+
result = await self.session.execute(query)
|
|
157
|
+
index = result.scalar_one_or_none()
|
|
158
|
+
|
|
159
|
+
if index:
|
|
160
|
+
index.updated_at = datetime.now(UTC)
|
|
161
|
+
await self.session.commit()
|
|
162
|
+
|
|
163
|
+
async def delete_all_snippets(self, index_id: int) -> None:
|
|
164
|
+
"""Delete all snippets for an index.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
index_id: The ID of the index to delete snippets for.
|
|
168
|
+
|
|
169
|
+
"""
|
|
170
|
+
# First get all snippets for this index
|
|
171
|
+
snippets = await self.get_snippets_for_index(index_id)
|
|
172
|
+
|
|
173
|
+
# Delete all embeddings for these snippets, if there are any
|
|
174
|
+
for snippet in snippets:
|
|
175
|
+
query = delete(Embedding).where(Embedding.snippet_id == snippet.id)
|
|
176
|
+
await self.session.execute(query)
|
|
177
|
+
|
|
178
|
+
# Now delete the snippets
|
|
179
|
+
query = delete(Snippet).where(Snippet.index_id == index_id)
|
|
180
|
+
await self.session.execute(query)
|
|
181
|
+
await self.session.commit()
|
|
182
|
+
|
|
183
|
+
async def get_snippets_for_index(self, index_id: int) -> list[Snippet]:
|
|
184
|
+
"""Get all snippets for an index.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
index_id: The ID of the index to get snippets for.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
A list of Snippet entities.
|
|
191
|
+
|
|
192
|
+
"""
|
|
193
|
+
query = select(Snippet).where(Snippet.index_id == index_id)
|
|
194
|
+
result = await self.session.execute(query)
|
|
195
|
+
return list(result.scalars())
|
|
196
|
+
|
|
197
|
+
async def add_snippet(self, snippet: dict) -> None:
|
|
198
|
+
"""Add a snippet to the database.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
snippet: The snippet to add.
|
|
202
|
+
|
|
203
|
+
"""
|
|
204
|
+
db_snippet = Snippet(
|
|
205
|
+
file_id=snippet["file_id"],
|
|
206
|
+
index_id=snippet["index_id"],
|
|
207
|
+
content=snippet["content"],
|
|
208
|
+
)
|
|
209
|
+
self.session.add(db_snippet)
|
|
210
|
+
await self.session.commit()
|
|
211
|
+
|
|
212
|
+
async def update_snippet_content(self, snippet_id: int, content: str) -> None:
|
|
213
|
+
"""Update the content of an existing snippet.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
snippet_id: The ID of the snippet to update.
|
|
217
|
+
content: The new content for the snippet.
|
|
218
|
+
|
|
219
|
+
"""
|
|
220
|
+
query = select(Snippet).where(Snippet.id == snippet_id)
|
|
221
|
+
result = await self.session.execute(query)
|
|
222
|
+
snippet = result.scalar_one_or_none()
|
|
223
|
+
|
|
224
|
+
if snippet:
|
|
225
|
+
snippet.content = content
|
|
226
|
+
# SQLAlchemy will automatically track this change
|
|
227
|
+
await self.session.commit()
|
|
228
|
+
|
|
229
|
+
async def list_snippets_by_ids(self, ids: list[int]) -> list[tuple[dict, dict]]:
|
|
230
|
+
"""List snippets by IDs.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
ids: List of snippet IDs to retrieve.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
List of (file, snippet) tuples.
|
|
237
|
+
|
|
238
|
+
"""
|
|
239
|
+
query = (
|
|
240
|
+
select(Snippet, File)
|
|
241
|
+
.where(Snippet.id.in_(ids))
|
|
242
|
+
.join(File, Snippet.file_id == File.id)
|
|
243
|
+
)
|
|
244
|
+
rows = await self.session.execute(query)
|
|
245
|
+
|
|
246
|
+
# Create a dictionary for O(1) lookup of results by ID
|
|
247
|
+
id_to_result = {}
|
|
248
|
+
for snippet, file in rows.all():
|
|
249
|
+
id_to_result[snippet.id] = (
|
|
250
|
+
{
|
|
251
|
+
"id": file.id,
|
|
252
|
+
"source_id": file.source_id,
|
|
253
|
+
"mime_type": file.mime_type,
|
|
254
|
+
"uri": file.uri,
|
|
255
|
+
"cloned_path": file.cloned_path,
|
|
256
|
+
"sha256": file.sha256,
|
|
257
|
+
"size_bytes": file.size_bytes,
|
|
258
|
+
"extension": file.extension,
|
|
259
|
+
"created_at": file.created_at,
|
|
260
|
+
"updated_at": file.updated_at,
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
"id": snippet.id,
|
|
264
|
+
"file_id": snippet.file_id,
|
|
265
|
+
"index_id": snippet.index_id,
|
|
266
|
+
"content": snippet.content,
|
|
267
|
+
"created_at": snippet.created_at,
|
|
268
|
+
"updated_at": snippet.updated_at,
|
|
269
|
+
},
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# Check that all IDs are present
|
|
273
|
+
if len(id_to_result) != len(ids):
|
|
274
|
+
# Create a list of missing IDs
|
|
275
|
+
missing_ids = [
|
|
276
|
+
snippet_id for snippet_id in ids if snippet_id not in id_to_result
|
|
277
|
+
]
|
|
278
|
+
msg = f"Some IDs are not present: {missing_ids}"
|
|
279
|
+
raise ValueError(msg)
|
|
280
|
+
|
|
281
|
+
# Rebuild the list in the same order that it was passed in
|
|
282
|
+
return [id_to_result[i] for i in ids]
|
|
283
|
+
|
|
284
|
+
async def num_snippets_for_index(self, index_id: int) -> int:
|
|
285
|
+
"""Get the number of snippets for an index.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
index_id: The ID of the index.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
The number of snippets.
|
|
292
|
+
|
|
293
|
+
"""
|
|
294
|
+
query = select(func.count()).where(Snippet.index_id == index_id)
|
|
295
|
+
result = await self.session.execute(query)
|
|
296
|
+
return result.scalar_one()
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Factory for creating indexing services."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
6
|
+
|
|
7
|
+
from kodit.application.services.indexing_application_service import (
|
|
8
|
+
IndexingApplicationService,
|
|
9
|
+
)
|
|
10
|
+
from kodit.application.services.snippet_application_service import (
|
|
11
|
+
SnippetApplicationService,
|
|
12
|
+
)
|
|
13
|
+
from kodit.domain.services.bm25_service import BM25DomainService
|
|
14
|
+
from kodit.domain.services.indexing_service import IndexingDomainService
|
|
15
|
+
from kodit.domain.services.source_service import SourceService
|
|
16
|
+
from kodit.infrastructure.bm25.bm25_factory import bm25_repository_factory
|
|
17
|
+
from kodit.infrastructure.embedding.embedding_factory import (
|
|
18
|
+
embedding_domain_service_factory,
|
|
19
|
+
)
|
|
20
|
+
from kodit.infrastructure.enrichment.enrichment_factory import (
|
|
21
|
+
create_enrichment_domain_service,
|
|
22
|
+
)
|
|
23
|
+
from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
|
|
24
|
+
from kodit.infrastructure.indexing.index_repository import SQLAlchemyIndexRepository
|
|
25
|
+
from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
|
|
26
|
+
create_snippet_extraction_domain_service,
|
|
27
|
+
)
|
|
28
|
+
from kodit.infrastructure.sqlalchemy.file_repository import (
|
|
29
|
+
SqlAlchemyFileRepository,
|
|
30
|
+
)
|
|
31
|
+
from kodit.infrastructure.sqlalchemy.snippet_repository import (
|
|
32
|
+
SqlAlchemySnippetRepository,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def create_snippet_application_service(
|
|
37
|
+
session: AsyncSession,
|
|
38
|
+
) -> SnippetApplicationService:
|
|
39
|
+
"""Create a snippet application service with all dependencies."""
|
|
40
|
+
# Create domain service
|
|
41
|
+
snippet_extraction_service = create_snippet_extraction_domain_service()
|
|
42
|
+
|
|
43
|
+
# Create repositories
|
|
44
|
+
snippet_repository = SqlAlchemySnippetRepository(session)
|
|
45
|
+
file_repository = SqlAlchemyFileRepository(session)
|
|
46
|
+
|
|
47
|
+
# Create application service
|
|
48
|
+
from kodit.application.services.snippet_application_service import (
|
|
49
|
+
SnippetApplicationService,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return SnippetApplicationService(
|
|
53
|
+
snippet_extraction_service=snippet_extraction_service,
|
|
54
|
+
snippet_repository=snippet_repository,
|
|
55
|
+
file_repository=file_repository,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def create_indexing_domain_service(session: AsyncSession) -> IndexingDomainService:
|
|
60
|
+
"""Create an indexing domain service.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
session: The database session.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
An indexing domain service instance.
|
|
67
|
+
|
|
68
|
+
"""
|
|
69
|
+
index_repository = SQLAlchemyIndexRepository(session)
|
|
70
|
+
fusion_service = ReciprocalRankFusionService()
|
|
71
|
+
|
|
72
|
+
return IndexingDomainService(
|
|
73
|
+
index_repository=index_repository,
|
|
74
|
+
fusion_service=fusion_service,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def create_indexing_application_service(
|
|
79
|
+
app_context: Any,
|
|
80
|
+
session: AsyncSession,
|
|
81
|
+
source_service: SourceService,
|
|
82
|
+
snippet_application_service: SnippetApplicationService,
|
|
83
|
+
) -> IndexingApplicationService:
|
|
84
|
+
"""Create an indexing application service.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
app_context: The application context.
|
|
88
|
+
session: The database session.
|
|
89
|
+
source_service: The source service.
|
|
90
|
+
snippet_application_service: The snippet application service.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
An indexing application service instance.
|
|
94
|
+
|
|
95
|
+
"""
|
|
96
|
+
# Create domain services
|
|
97
|
+
indexing_domain_service = create_indexing_domain_service(session)
|
|
98
|
+
bm25_service = BM25DomainService(bm25_repository_factory(app_context, session))
|
|
99
|
+
code_search_service = embedding_domain_service_factory("code", app_context, session)
|
|
100
|
+
text_search_service = embedding_domain_service_factory("text", app_context, session)
|
|
101
|
+
enrichment_service = create_enrichment_domain_service(app_context)
|
|
102
|
+
|
|
103
|
+
return IndexingApplicationService(
|
|
104
|
+
indexing_domain_service=indexing_domain_service,
|
|
105
|
+
source_service=source_service,
|
|
106
|
+
bm25_service=bm25_service,
|
|
107
|
+
code_search_service=code_search_service,
|
|
108
|
+
text_search_service=text_search_service,
|
|
109
|
+
enrichment_service=enrichment_service,
|
|
110
|
+
snippet_application_service=snippet_application_service,
|
|
111
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Infrastructure services for snippet extraction."""
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Infrastructure implementation for language detection."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from kodit.domain.services.snippet_extraction_service import LanguageDetectionService
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FileSystemLanguageDetectionService(LanguageDetectionService):
|
|
9
|
+
"""Infrastructure implementation for language detection."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, language_map: dict[str, str]) -> None:
|
|
12
|
+
"""Initialize the language detection service.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
language_map: Mapping of file extensions to programming languages
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
self.language_map = language_map
|
|
19
|
+
|
|
20
|
+
async def detect_language(self, file_path: Path) -> str:
|
|
21
|
+
"""Detect language based on file extension.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
file_path: Path to the file to detect language for
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
The detected programming language
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
ValueError: If the language is not supported
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
suffix = file_path.suffix.removeprefix(".").lower()
|
|
34
|
+
language = self.language_map.get(suffix)
|
|
35
|
+
|
|
36
|
+
if language is None:
|
|
37
|
+
raise ValueError(f"Unsupported language for file suffix: {suffix}")
|
|
38
|
+
|
|
39
|
+
return language
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Factory for creating snippet extraction services."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
6
|
+
|
|
7
|
+
from kodit.domain.enums import SnippetExtractionStrategy
|
|
8
|
+
from kodit.domain.repositories import FileRepository, SnippetRepository
|
|
9
|
+
from kodit.domain.services.snippet_extraction_service import (
|
|
10
|
+
SnippetExtractionDomainService,
|
|
11
|
+
)
|
|
12
|
+
from kodit.infrastructure.snippet_extraction.language_detection_service import (
|
|
13
|
+
FileSystemLanguageDetectionService,
|
|
14
|
+
)
|
|
15
|
+
from kodit.infrastructure.snippet_extraction.snippet_query_provider import (
|
|
16
|
+
FileSystemSnippetQueryProvider,
|
|
17
|
+
)
|
|
18
|
+
from kodit.infrastructure.snippet_extraction.tree_sitter_snippet_extractor import (
|
|
19
|
+
TreeSitterSnippetExtractor,
|
|
20
|
+
)
|
|
21
|
+
from kodit.infrastructure.sqlalchemy.file_repository import SqlAlchemyFileRepository
|
|
22
|
+
from kodit.infrastructure.sqlalchemy.snippet_repository import (
|
|
23
|
+
SqlAlchemySnippetRepository,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def create_snippet_extraction_domain_service() -> SnippetExtractionDomainService:
|
|
28
|
+
"""Create a snippet extraction domain service with all dependencies.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Configured snippet extraction domain service
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
# Language mapping from the existing languages module
|
|
35
|
+
language_map = {
|
|
36
|
+
# JavaScript/TypeScript
|
|
37
|
+
"js": "javascript",
|
|
38
|
+
"jsx": "javascript",
|
|
39
|
+
"ts": "typescript",
|
|
40
|
+
"tsx": "typescript",
|
|
41
|
+
# Python
|
|
42
|
+
"py": "python",
|
|
43
|
+
# Rust
|
|
44
|
+
"rs": "rust",
|
|
45
|
+
# Go
|
|
46
|
+
"go": "go",
|
|
47
|
+
# C/C++
|
|
48
|
+
"cpp": "cpp",
|
|
49
|
+
"hpp": "cpp",
|
|
50
|
+
"c": "c",
|
|
51
|
+
"h": "c",
|
|
52
|
+
# C#
|
|
53
|
+
"cs": "csharp",
|
|
54
|
+
# Ruby
|
|
55
|
+
"rb": "ruby",
|
|
56
|
+
# Java
|
|
57
|
+
"java": "java",
|
|
58
|
+
# PHP
|
|
59
|
+
"php": "php",
|
|
60
|
+
# Swift
|
|
61
|
+
"swift": "swift",
|
|
62
|
+
# Kotlin
|
|
63
|
+
"kt": "kotlin",
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
# Create infrastructure services
|
|
67
|
+
language_detector = FileSystemLanguageDetectionService(language_map)
|
|
68
|
+
query_provider = FileSystemSnippetQueryProvider(Path(__file__).parent / "languages")
|
|
69
|
+
|
|
70
|
+
# Create snippet extractors
|
|
71
|
+
method_extractor = TreeSitterSnippetExtractor(query_provider)
|
|
72
|
+
|
|
73
|
+
snippet_extractors = {
|
|
74
|
+
SnippetExtractionStrategy.METHOD_BASED: method_extractor,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Create domain service
|
|
78
|
+
return SnippetExtractionDomainService(language_detector, snippet_extractors)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def create_snippet_repositories(
|
|
82
|
+
session: AsyncSession,
|
|
83
|
+
) -> tuple[SnippetRepository, FileRepository]:
|
|
84
|
+
"""Create snippet and file repositories.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
session: SQLAlchemy session
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Tuple of (snippet_repository, file_repository)
|
|
91
|
+
|
|
92
|
+
"""
|
|
93
|
+
snippet_repository = SqlAlchemySnippetRepository(session)
|
|
94
|
+
file_repository = SqlAlchemyFileRepository(session)
|
|
95
|
+
return snippet_repository, file_repository
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Infrastructure implementation for loading snippet queries from files."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SnippetQueryProvider(ABC):
|
|
8
|
+
"""Abstract interface for providing snippet queries."""
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
async def get_query(self, language: str) -> str:
|
|
12
|
+
"""Get the query for a specific language."""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FileSystemSnippetQueryProvider(SnippetQueryProvider):
|
|
16
|
+
"""Infrastructure implementation for loading snippet queries from files."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, query_directory: Path) -> None:
|
|
19
|
+
"""Initialize the query provider.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
query_directory: Directory containing query files
|
|
23
|
+
|
|
24
|
+
"""
|
|
25
|
+
self.query_directory = query_directory
|
|
26
|
+
|
|
27
|
+
async def get_query(self, language: str) -> str:
|
|
28
|
+
"""Load query from file system.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
language: The programming language to get the query for
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
The query string for the language
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
FileNotFoundError: If the query file doesn't exist
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
query_path = self.query_directory / f"{language}.scm"
|
|
41
|
+
|
|
42
|
+
if not query_path.exists():
|
|
43
|
+
raise FileNotFoundError(f"Query file not found: {query_path}")
|
|
44
|
+
|
|
45
|
+
return query_path.read_text()
|