kodit 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (70) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +56 -29
  3. kodit/application/services/code_indexing_application_service.py +152 -118
  4. kodit/cli.py +14 -41
  5. kodit/domain/entities.py +268 -197
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +282 -0
  10. kodit/domain/value_objects.py +143 -65
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/slicing/__init__.py +1 -0
  24. kodit/infrastructure/slicing/language_detection_service.py +18 -0
  25. kodit/infrastructure/slicing/slicer.py +894 -0
  26. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  27. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  28. kodit/infrastructure/sqlalchemy/index_repository.py +579 -0
  29. kodit/mcp.py +0 -7
  30. kodit/migrations/env.py +1 -1
  31. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +36 -0
  32. kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
  33. kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
  34. kodit/migrations/versions/85155663351e_initial.py +64 -48
  35. kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
  36. kodit/utils/__init__.py +1 -0
  37. kodit/utils/path_utils.py +54 -0
  38. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/METADATA +9 -4
  39. kodit-0.3.4.dist-info/RECORD +89 -0
  40. kodit/domain/enums.py +0 -9
  41. kodit/domain/repositories.py +0 -128
  42. kodit/domain/services/ignore_service.py +0 -45
  43. kodit/domain/services/indexing_service.py +0 -204
  44. kodit/domain/services/snippet_extraction_service.py +0 -89
  45. kodit/domain/services/snippet_service.py +0 -215
  46. kodit/domain/services/source_service.py +0 -85
  47. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  48. kodit/infrastructure/cloning/folder/factory.py +0 -128
  49. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  50. kodit/infrastructure/cloning/git/factory.py +0 -153
  51. kodit/infrastructure/indexing/index_repository.py +0 -286
  52. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  53. kodit/infrastructure/snippet_extraction/__init__.py +0 -1
  54. kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
  55. kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
  56. kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
  57. kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
  58. kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
  59. kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
  60. kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
  61. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
  62. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -45
  63. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
  64. kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
  65. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  66. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
  67. kodit-0.3.2.dist-info/RECORD +0 -103
  68. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/WHEEL +0 -0
  69. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/entry_points.txt +0 -0
  70. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,344 @@
1
+ """Mapping between domain Index aggregate and SQLAlchemy entities."""
2
+
3
+ from datetime import UTC, datetime
4
+ from pathlib import Path
5
+
6
+ from pydantic import AnyUrl
7
+ from sqlalchemy import select
8
+ from sqlalchemy.ext.asyncio import AsyncSession
9
+
10
+ import kodit.domain.entities as domain_entities
11
+ from kodit.domain.value_objects import (
12
+ FileProcessingStatus,
13
+ SourceType,
14
+ )
15
+ from kodit.infrastructure.sqlalchemy import entities as db_entities
16
+
17
+
18
+ class IndexMapper:
19
+ """Mapper for converting between domain Index aggregate and database entities."""
20
+
21
+ def __init__(self, session: AsyncSession) -> None:
22
+ """Initialize mapper with database session."""
23
+ self._session = session
24
+
25
+ async def to_domain_index(
26
+ self, db_index: db_entities.Index
27
+ ) -> domain_entities.Index:
28
+ """Convert SQLAlchemy Index to domain Index aggregate.
29
+
30
+ Loads the full aggregate including Source, WorkingCopy, Files, and Snippets.
31
+ """
32
+ # Load the source
33
+ db_source = await self._session.get(db_entities.Source, db_index.source_id)
34
+ if not db_source:
35
+ raise ValueError(f"Source not found for index {db_index.id}")
36
+
37
+ # Load files for the source
38
+ files_stmt = select(db_entities.File).where(
39
+ db_entities.File.source_id == db_source.id
40
+ )
41
+ db_files = (await self._session.scalars(files_stmt)).all()
42
+
43
+ # Convert files to domain
44
+ domain_files = []
45
+ for db_file in db_files:
46
+ # Load authors for this file
47
+ authors_stmt = (
48
+ select(db_entities.Author)
49
+ .join(db_entities.AuthorFileMapping)
50
+ .where(db_entities.AuthorFileMapping.file_id == db_file.id)
51
+ )
52
+ db_authors = (await self._session.scalars(authors_stmt)).all()
53
+
54
+ domain_authors = [
55
+ domain_entities.Author(
56
+ id=author.id, name=author.name, email=author.email
57
+ )
58
+ for author in db_authors
59
+ ]
60
+
61
+ domain_file = domain_entities.File(
62
+ id=db_file.id,
63
+ created_at=db_file.created_at,
64
+ updated_at=db_file.updated_at,
65
+ uri=AnyUrl(db_file.uri),
66
+ sha256=db_file.sha256,
67
+ authors=domain_authors,
68
+ mime_type=db_file.mime_type,
69
+ file_processing_status=FileProcessingStatus(
70
+ db_file.file_processing_status
71
+ ),
72
+ )
73
+ domain_files.append(domain_file)
74
+
75
+ # Create working copy
76
+ working_copy = domain_entities.WorkingCopy(
77
+ created_at=db_source.created_at,
78
+ updated_at=db_source.updated_at,
79
+ remote_uri=AnyUrl(db_source.uri),
80
+ cloned_path=Path(db_source.cloned_path),
81
+ source_type=SourceType(db_source.type.value),
82
+ files=domain_files,
83
+ )
84
+
85
+ # Create source
86
+ domain_source = domain_entities.Source(
87
+ id=db_source.id,
88
+ created_at=db_source.created_at,
89
+ updated_at=db_source.updated_at,
90
+ working_copy=working_copy,
91
+ )
92
+
93
+ # Load snippets for this index
94
+ snippets_stmt = select(db_entities.Snippet).where(
95
+ db_entities.Snippet.index_id == db_index.id
96
+ )
97
+ db_snippets = (await self._session.scalars(snippets_stmt)).all()
98
+
99
+ domain_snippets = []
100
+ for db_snippet in db_snippets:
101
+ domain_snippet = await self.to_domain_snippet(db_snippet, domain_files)
102
+ domain_snippets.append(domain_snippet)
103
+
104
+ # Create index aggregate
105
+ return domain_entities.Index(
106
+ id=db_index.id,
107
+ created_at=db_index.created_at,
108
+ updated_at=db_index.updated_at,
109
+ source=domain_source,
110
+ snippets=domain_snippets,
111
+ )
112
+
113
+ async def to_domain_source(
114
+ self, db_source: db_entities.Source
115
+ ) -> domain_entities.Source:
116
+ """Convert SQLAlchemy Source to domain Source."""
117
+ # Load files for the source
118
+ files_stmt = select(db_entities.File).where(
119
+ db_entities.File.source_id == db_source.id
120
+ )
121
+ db_files = (await self._session.scalars(files_stmt)).all()
122
+
123
+ # Convert files to domain
124
+ domain_files = []
125
+ for db_file in db_files:
126
+ # Load authors for this file
127
+ authors_stmt = (
128
+ select(db_entities.Author)
129
+ .join(db_entities.AuthorFileMapping)
130
+ .where(db_entities.AuthorFileMapping.file_id == db_file.id)
131
+ )
132
+ db_authors = (await self._session.scalars(authors_stmt)).all()
133
+
134
+ domain_authors = [
135
+ domain_entities.Author(
136
+ id=author.id, name=author.name, email=author.email
137
+ )
138
+ for author in db_authors
139
+ ]
140
+
141
+ domain_file = domain_entities.File(
142
+ id=db_file.id,
143
+ created_at=db_file.created_at,
144
+ updated_at=db_file.updated_at,
145
+ uri=AnyUrl(db_file.uri),
146
+ sha256=db_file.sha256,
147
+ authors=domain_authors,
148
+ mime_type=db_file.mime_type,
149
+ file_processing_status=FileProcessingStatus(
150
+ db_file.file_processing_status
151
+ ),
152
+ )
153
+ domain_files.append(domain_file)
154
+
155
+ # Create working copy
156
+ working_copy = domain_entities.WorkingCopy(
157
+ created_at=db_source.created_at,
158
+ updated_at=db_source.updated_at,
159
+ remote_uri=AnyUrl(db_source.uri),
160
+ cloned_path=Path(db_source.cloned_path),
161
+ source_type=SourceType(db_source.type.value),
162
+ files=domain_files,
163
+ )
164
+
165
+ # Create source
166
+ return domain_entities.Source(
167
+ id=db_source.id,
168
+ created_at=db_source.created_at,
169
+ updated_at=db_source.updated_at,
170
+ working_copy=working_copy,
171
+ )
172
+
173
+ async def to_domain_file(self, db_file: db_entities.File) -> domain_entities.File:
174
+ """Convert SQLAlchemy File to domain File."""
175
+ # Load authors for this file
176
+ authors_stmt = (
177
+ select(db_entities.Author)
178
+ .join(db_entities.AuthorFileMapping)
179
+ .where(db_entities.AuthorFileMapping.file_id == db_file.id)
180
+ )
181
+ db_authors = (await self._session.scalars(authors_stmt)).all()
182
+
183
+ domain_authors = [
184
+ domain_entities.Author(id=author.id, name=author.name, email=author.email)
185
+ for author in db_authors
186
+ ]
187
+
188
+ return domain_entities.File(
189
+ id=db_file.id,
190
+ created_at=db_file.created_at,
191
+ updated_at=db_file.updated_at,
192
+ uri=AnyUrl(db_file.uri),
193
+ sha256=db_file.sha256,
194
+ authors=domain_authors,
195
+ mime_type=db_file.mime_type,
196
+ file_processing_status=FileProcessingStatus(db_file.file_processing_status),
197
+ )
198
+
199
+ async def to_domain_snippet(
200
+ self, db_snippet: db_entities.Snippet, domain_files: list[domain_entities.File]
201
+ ) -> domain_entities.Snippet:
202
+ """Convert SQLAlchemy Snippet to domain Snippet."""
203
+ # Find the file this snippet derives from
204
+ derives_from = []
205
+ for domain_file in domain_files:
206
+ if domain_file.id == db_snippet.file_id:
207
+ derives_from.append(domain_file)
208
+ break
209
+
210
+ # Create domain snippet with original content
211
+ domain_snippet = domain_entities.Snippet(
212
+ id=db_snippet.id,
213
+ created_at=db_snippet.created_at,
214
+ updated_at=db_snippet.updated_at,
215
+ derives_from=derives_from,
216
+ )
217
+
218
+ # Add original content
219
+ if db_snippet.content:
220
+ domain_snippet.add_original_content(db_snippet.content, "unknown")
221
+
222
+ # Add summary content if it exists
223
+ if db_snippet.summary:
224
+ domain_snippet.add_summary(db_snippet.summary)
225
+
226
+ return domain_snippet
227
+
228
+ async def from_domain_index( # noqa: C901
229
+ self, domain_index: domain_entities.Index
230
+ ) -> tuple[
231
+ db_entities.Index,
232
+ db_entities.Source,
233
+ list[db_entities.File],
234
+ list[db_entities.Author],
235
+ ]:
236
+ """Convert domain Index aggregate to SQLAlchemy entities.
237
+
238
+ Returns all the entities that need to be persisted.
239
+ """
240
+ # Create source entity
241
+ db_source = db_entities.Source(
242
+ uri=str(domain_index.source.working_copy.remote_uri),
243
+ cloned_path=str(domain_index.source.working_copy.cloned_path),
244
+ source_type=db_entities.SourceType(
245
+ domain_index.source.working_copy.source_type.value
246
+ ),
247
+ )
248
+ if domain_index.source.id:
249
+ db_source.id = domain_index.source.id
250
+ if domain_index.source.created_at:
251
+ db_source.created_at = domain_index.source.created_at
252
+ if domain_index.source.updated_at:
253
+ db_source.updated_at = domain_index.source.updated_at
254
+
255
+ # Create index entity
256
+ # Will be set after source is saved
257
+ db_index = db_entities.Index(source_id=db_source.id or 0)
258
+ if domain_index.id:
259
+ db_index.id = domain_index.id
260
+ if domain_index.created_at:
261
+ db_index.created_at = domain_index.created_at
262
+ if domain_index.updated_at:
263
+ db_index.updated_at = domain_index.updated_at
264
+
265
+ # Create file entities
266
+ db_files = []
267
+ all_authors = []
268
+
269
+ for domain_file in domain_index.source.working_copy.files:
270
+ now = datetime.now(UTC)
271
+ db_file = db_entities.File(
272
+ created_at=domain_file.created_at or now,
273
+ updated_at=domain_file.updated_at or now,
274
+ source_id=db_source.id or 0, # Will be set after source is saved
275
+ mime_type="", # Would need to be determined
276
+ uri=str(domain_file.uri),
277
+ # Would need to be determined from working copy + relative path
278
+ cloned_path="",
279
+ sha256=domain_file.sha256,
280
+ size_bytes=0, # Would need to be determined
281
+ extension="", # Would need to be determined
282
+ file_processing_status=domain_file.file_processing_status.value,
283
+ )
284
+ if domain_file.id:
285
+ db_file.id = domain_file.id
286
+
287
+ db_files.append(db_file)
288
+ all_authors.extend(domain_file.authors)
289
+
290
+ # Create unique author entities
291
+ unique_authors = {}
292
+ for author in all_authors:
293
+ key = (author.name, author.email)
294
+ if key not in unique_authors:
295
+ db_author = db_entities.Author(name=author.name, email=author.email)
296
+ if author.id:
297
+ db_author.id = author.id
298
+ unique_authors[key] = db_author
299
+
300
+ return db_index, db_source, db_files, list(unique_authors.values())
301
+
302
+ async def from_domain_snippet(
303
+ self, domain_snippet: domain_entities.Snippet, index_id: int
304
+ ) -> db_entities.Snippet:
305
+ """Convert domain Snippet to SQLAlchemy Snippet."""
306
+ # Get file ID from derives_from (use first file if multiple)
307
+ if not domain_snippet.derives_from:
308
+ raise ValueError("Snippet must derive from at least one file")
309
+
310
+ file_id = domain_snippet.derives_from[0].id
311
+ if file_id is None:
312
+ raise ValueError("File must have an ID")
313
+
314
+ db_snippet = db_entities.Snippet(
315
+ file_id=file_id,
316
+ index_id=index_id,
317
+ content=domain_snippet.original_text(),
318
+ summary=domain_snippet.summary_text(),
319
+ )
320
+
321
+ if domain_snippet.id:
322
+ db_snippet.id = domain_snippet.id
323
+ if domain_snippet.created_at:
324
+ db_snippet.created_at = domain_snippet.created_at
325
+ if domain_snippet.updated_at:
326
+ db_snippet.updated_at = domain_snippet.updated_at
327
+
328
+ return db_snippet
329
+
330
+ async def load_snippets_for_index(
331
+ self, index_id: int, domain_files: list[domain_entities.File]
332
+ ) -> list[domain_entities.Snippet]:
333
+ """Load all snippets for an index and convert to domain entities."""
334
+ stmt = select(db_entities.Snippet).where(
335
+ db_entities.Snippet.index_id == index_id
336
+ )
337
+ db_snippets = (await self._session.scalars(stmt)).all()
338
+
339
+ domain_snippets = []
340
+ for db_snippet in db_snippets:
341
+ domain_snippet = await self.to_domain_snippet(db_snippet, domain_files)
342
+ domain_snippets.append(domain_snippet)
343
+
344
+ return domain_snippets
@@ -0,0 +1 @@
1
+ """Slicing infrastructure module."""
@@ -0,0 +1,18 @@
1
+ """Language detection service implementation."""
2
+
3
+ from pathlib import Path
4
+
5
+ from kodit.domain.services.index_service import LanguageDetectionService
6
+
7
+
8
+ class FileSystemLanguageDetectionService(LanguageDetectionService):
9
+ """Simple file extension based language detection service."""
10
+
11
+ def __init__(self, language_map: dict[str, str]) -> None:
12
+ """Initialize with a mapping of extensions to languages."""
13
+ self._language_map = language_map
14
+
15
+ async def detect_language(self, file_path: Path) -> str:
16
+ """Detect language based on file extension."""
17
+ extension = file_path.suffix.lstrip(".")
18
+ return self._language_map.get(extension, "unknown")