kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (100) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +59 -24
  3. kodit/application/factories/reporting_factory.py +16 -7
  4. kodit/application/factories/server_factory.py +311 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +543 -0
  7. kodit/application/services/indexing_worker_service.py +13 -46
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +70 -54
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -763
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +3 -96
  14. kodit/database.py +38 -1
  15. kodit/domain/entities/__init__.py +276 -0
  16. kodit/domain/entities/git.py +190 -0
  17. kodit/domain/factories/__init__.py +1 -0
  18. kodit/domain/factories/git_repo_factory.py +76 -0
  19. kodit/domain/protocols.py +270 -46
  20. kodit/domain/services/bm25_service.py +5 -1
  21. kodit/domain/services/embedding_service.py +3 -0
  22. kodit/domain/services/git_repository_service.py +429 -0
  23. kodit/domain/services/git_service.py +300 -0
  24. kodit/domain/services/task_status_query_service.py +19 -0
  25. kodit/domain/value_objects.py +113 -147
  26. kodit/infrastructure/api/client/__init__.py +0 -2
  27. kodit/infrastructure/api/v1/__init__.py +0 -4
  28. kodit/infrastructure/api/v1/dependencies.py +105 -44
  29. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  30. kodit/infrastructure/api/v1/routers/commits.py +271 -0
  31. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  32. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  33. kodit/infrastructure/api/v1/routers/search.py +31 -14
  34. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  35. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  36. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  37. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  38. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  39. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  40. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  41. kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
  42. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  43. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  44. kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
  45. kodit/infrastructure/cloning/git/working_copy.py +10 -3
  46. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  47. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  48. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  49. kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
  50. kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
  51. kodit/infrastructure/indexing/fusion_service.py +1 -1
  52. kodit/infrastructure/mappers/git_mapper.py +193 -0
  53. kodit/infrastructure/mappers/snippet_mapper.py +106 -0
  54. kodit/infrastructure/mappers/task_mapper.py +5 -44
  55. kodit/infrastructure/mappers/task_status_mapper.py +85 -0
  56. kodit/infrastructure/reporting/db_progress.py +23 -0
  57. kodit/infrastructure/reporting/log_progress.py +13 -38
  58. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  59. kodit/infrastructure/slicing/slicer.py +32 -31
  60. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  61. kodit/infrastructure/sqlalchemy/entities.py +428 -131
  62. kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
  63. kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
  64. kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
  65. kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
  66. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
  67. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  68. kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
  69. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  70. kodit/mcp.py +12 -26
  71. kodit/migrations/env.py +1 -1
  72. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  73. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  74. kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
  75. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  76. kodit/py.typed +0 -0
  77. kodit/utils/dump_openapi.py +7 -4
  78. kodit/utils/path_utils.py +29 -0
  79. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
  80. kodit-0.5.0.dist-info/RECORD +137 -0
  81. kodit/application/factories/code_indexing_factory.py +0 -193
  82. kodit/application/services/auto_indexing_service.py +0 -103
  83. kodit/application/services/code_indexing_application_service.py +0 -393
  84. kodit/domain/entities.py +0 -323
  85. kodit/domain/services/index_query_service.py +0 -70
  86. kodit/domain/services/index_service.py +0 -267
  87. kodit/infrastructure/api/client/index_client.py +0 -57
  88. kodit/infrastructure/api/v1/routers/indexes.py +0 -119
  89. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  90. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  91. kodit/infrastructure/cloning/__init__.py +0 -1
  92. kodit/infrastructure/cloning/metadata.py +0 -98
  93. kodit/infrastructure/mappers/index_mapper.py +0 -345
  94. kodit/infrastructure/reporting/tdqm_progress.py +0 -73
  95. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  96. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  97. kodit-0.4.2.dist-info/RECORD +0 -119
  98. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
  99. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
  100. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,345 +0,0 @@
1
- """Mapping between domain Index aggregate and SQLAlchemy entities."""
2
-
3
- from datetime import UTC, datetime
4
- from pathlib import Path
5
-
6
- from pydantic import AnyUrl
7
- from sqlalchemy import select
8
- from sqlalchemy.ext.asyncio import AsyncSession
9
-
10
- import kodit.domain.entities as domain_entities
11
- from kodit.domain.value_objects import (
12
- FileProcessingStatus,
13
- SourceType,
14
- )
15
- from kodit.infrastructure.sqlalchemy import entities as db_entities
16
-
17
-
18
- # TODO(Phil): Make this a pure mapper without any DB access # noqa: TD003, FIX002
19
- class IndexMapper:
20
- """Mapper for converting between domain Index aggregate and database entities."""
21
-
22
- def __init__(self, session: AsyncSession) -> None:
23
- """Initialize mapper with database session."""
24
- self._session = session
25
-
26
- async def to_domain_index(
27
- self, db_index: db_entities.Index
28
- ) -> domain_entities.Index:
29
- """Convert SQLAlchemy Index to domain Index aggregate.
30
-
31
- Loads the full aggregate including Source, WorkingCopy, Files, and Snippets.
32
- """
33
- # Load the source
34
- db_source = await self._session.get(db_entities.Source, db_index.source_id)
35
- if not db_source:
36
- raise ValueError(f"Source not found for index {db_index.id}")
37
-
38
- # Load files for the source
39
- files_stmt = select(db_entities.File).where(
40
- db_entities.File.source_id == db_source.id
41
- )
42
- db_files = (await self._session.scalars(files_stmt)).all()
43
-
44
- # Convert files to domain
45
- domain_files = []
46
- for db_file in db_files:
47
- # Load authors for this file
48
- authors_stmt = (
49
- select(db_entities.Author)
50
- .join(db_entities.AuthorFileMapping)
51
- .where(db_entities.AuthorFileMapping.file_id == db_file.id)
52
- )
53
- db_authors = (await self._session.scalars(authors_stmt)).all()
54
-
55
- domain_authors = [
56
- domain_entities.Author(
57
- id=author.id, name=author.name, email=author.email
58
- )
59
- for author in db_authors
60
- ]
61
-
62
- domain_file = domain_entities.File(
63
- id=db_file.id,
64
- created_at=db_file.created_at,
65
- updated_at=db_file.updated_at,
66
- uri=AnyUrl(db_file.uri),
67
- sha256=db_file.sha256,
68
- authors=domain_authors,
69
- mime_type=db_file.mime_type,
70
- file_processing_status=FileProcessingStatus(
71
- db_file.file_processing_status
72
- ),
73
- )
74
- domain_files.append(domain_file)
75
-
76
- # Create working copy
77
- working_copy = domain_entities.WorkingCopy(
78
- created_at=db_source.created_at,
79
- updated_at=db_source.updated_at,
80
- remote_uri=AnyUrl(db_source.uri),
81
- cloned_path=Path(db_source.cloned_path),
82
- source_type=SourceType(db_source.type.value),
83
- files=domain_files,
84
- )
85
-
86
- # Create source
87
- domain_source = domain_entities.Source(
88
- id=db_source.id,
89
- created_at=db_source.created_at,
90
- updated_at=db_source.updated_at,
91
- working_copy=working_copy,
92
- )
93
-
94
- # Load snippets for this index
95
- snippets_stmt = select(db_entities.Snippet).where(
96
- db_entities.Snippet.index_id == db_index.id
97
- )
98
- db_snippets = (await self._session.scalars(snippets_stmt)).all()
99
-
100
- domain_snippets = []
101
- for db_snippet in db_snippets:
102
- domain_snippet = await self.to_domain_snippet(db_snippet, domain_files)
103
- domain_snippets.append(domain_snippet)
104
-
105
- # Create index aggregate
106
- return domain_entities.Index(
107
- id=db_index.id,
108
- created_at=db_index.created_at,
109
- updated_at=db_index.updated_at,
110
- source=domain_source,
111
- snippets=domain_snippets,
112
- )
113
-
114
- async def to_domain_source(
115
- self, db_source: db_entities.Source
116
- ) -> domain_entities.Source:
117
- """Convert SQLAlchemy Source to domain Source."""
118
- # Load files for the source
119
- files_stmt = select(db_entities.File).where(
120
- db_entities.File.source_id == db_source.id
121
- )
122
- db_files = (await self._session.scalars(files_stmt)).all()
123
-
124
- # Convert files to domain
125
- domain_files = []
126
- for db_file in db_files:
127
- # Load authors for this file
128
- authors_stmt = (
129
- select(db_entities.Author)
130
- .join(db_entities.AuthorFileMapping)
131
- .where(db_entities.AuthorFileMapping.file_id == db_file.id)
132
- )
133
- db_authors = (await self._session.scalars(authors_stmt)).all()
134
-
135
- domain_authors = [
136
- domain_entities.Author(
137
- id=author.id, name=author.name, email=author.email
138
- )
139
- for author in db_authors
140
- ]
141
-
142
- domain_file = domain_entities.File(
143
- id=db_file.id,
144
- created_at=db_file.created_at,
145
- updated_at=db_file.updated_at,
146
- uri=AnyUrl(db_file.uri),
147
- sha256=db_file.sha256,
148
- authors=domain_authors,
149
- mime_type=db_file.mime_type,
150
- file_processing_status=FileProcessingStatus(
151
- db_file.file_processing_status
152
- ),
153
- )
154
- domain_files.append(domain_file)
155
-
156
- # Create working copy
157
- working_copy = domain_entities.WorkingCopy(
158
- created_at=db_source.created_at,
159
- updated_at=db_source.updated_at,
160
- remote_uri=AnyUrl(db_source.uri),
161
- cloned_path=Path(db_source.cloned_path),
162
- source_type=SourceType(db_source.type.value),
163
- files=domain_files,
164
- )
165
-
166
- # Create source
167
- return domain_entities.Source(
168
- id=db_source.id,
169
- created_at=db_source.created_at,
170
- updated_at=db_source.updated_at,
171
- working_copy=working_copy,
172
- )
173
-
174
- async def to_domain_file(self, db_file: db_entities.File) -> domain_entities.File:
175
- """Convert SQLAlchemy File to domain File."""
176
- # Load authors for this file
177
- authors_stmt = (
178
- select(db_entities.Author)
179
- .join(db_entities.AuthorFileMapping)
180
- .where(db_entities.AuthorFileMapping.file_id == db_file.id)
181
- )
182
- db_authors = (await self._session.scalars(authors_stmt)).all()
183
-
184
- domain_authors = [
185
- domain_entities.Author(id=author.id, name=author.name, email=author.email)
186
- for author in db_authors
187
- ]
188
-
189
- return domain_entities.File(
190
- id=db_file.id,
191
- created_at=db_file.created_at,
192
- updated_at=db_file.updated_at,
193
- uri=AnyUrl(db_file.uri),
194
- sha256=db_file.sha256,
195
- authors=domain_authors,
196
- mime_type=db_file.mime_type,
197
- file_processing_status=FileProcessingStatus(db_file.file_processing_status),
198
- )
199
-
200
- async def to_domain_snippet(
201
- self, db_snippet: db_entities.Snippet, domain_files: list[domain_entities.File]
202
- ) -> domain_entities.Snippet:
203
- """Convert SQLAlchemy Snippet to domain Snippet."""
204
- # Find the file this snippet derives from
205
- derives_from = []
206
- for domain_file in domain_files:
207
- if domain_file.id == db_snippet.file_id:
208
- derives_from.append(domain_file)
209
- break
210
-
211
- # Create domain snippet with original content
212
- domain_snippet = domain_entities.Snippet(
213
- id=db_snippet.id,
214
- created_at=db_snippet.created_at,
215
- updated_at=db_snippet.updated_at,
216
- derives_from=derives_from,
217
- )
218
-
219
- # Add original content
220
- if db_snippet.content:
221
- domain_snippet.add_original_content(db_snippet.content, "unknown")
222
-
223
- # Add summary content if it exists
224
- if db_snippet.summary:
225
- domain_snippet.add_summary(db_snippet.summary)
226
-
227
- return domain_snippet
228
-
229
- async def from_domain_index( # noqa: C901
230
- self, domain_index: domain_entities.Index
231
- ) -> tuple[
232
- db_entities.Index,
233
- db_entities.Source,
234
- list[db_entities.File],
235
- list[db_entities.Author],
236
- ]:
237
- """Convert domain Index aggregate to SQLAlchemy entities.
238
-
239
- Returns all the entities that need to be persisted.
240
- """
241
- # Create source entity
242
- db_source = db_entities.Source(
243
- uri=str(domain_index.source.working_copy.remote_uri),
244
- cloned_path=str(domain_index.source.working_copy.cloned_path),
245
- source_type=db_entities.SourceType(
246
- domain_index.source.working_copy.source_type.value
247
- ),
248
- )
249
- if domain_index.source.id:
250
- db_source.id = domain_index.source.id
251
- if domain_index.source.created_at:
252
- db_source.created_at = domain_index.source.created_at
253
- if domain_index.source.updated_at:
254
- db_source.updated_at = domain_index.source.updated_at
255
-
256
- # Create index entity
257
- # Will be set after source is saved
258
- db_index = db_entities.Index(source_id=db_source.id or 0)
259
- if domain_index.id:
260
- db_index.id = domain_index.id
261
- if domain_index.created_at:
262
- db_index.created_at = domain_index.created_at
263
- if domain_index.updated_at:
264
- db_index.updated_at = domain_index.updated_at
265
-
266
- # Create file entities
267
- db_files = []
268
- all_authors = []
269
-
270
- for domain_file in domain_index.source.working_copy.files:
271
- now = datetime.now(UTC)
272
- db_file = db_entities.File(
273
- created_at=domain_file.created_at or now,
274
- updated_at=domain_file.updated_at or now,
275
- source_id=db_source.id or 0, # Will be set after source is saved
276
- mime_type="", # Would need to be determined
277
- uri=str(domain_file.uri),
278
- # Would need to be determined from working copy + relative path
279
- cloned_path="",
280
- sha256=domain_file.sha256,
281
- size_bytes=0, # Would need to be determined
282
- extension="", # Would need to be determined
283
- file_processing_status=domain_file.file_processing_status.value,
284
- )
285
- if domain_file.id:
286
- db_file.id = domain_file.id
287
-
288
- db_files.append(db_file)
289
- all_authors.extend(domain_file.authors)
290
-
291
- # Create unique author entities
292
- unique_authors = {}
293
- for author in all_authors:
294
- key = (author.name, author.email)
295
- if key not in unique_authors:
296
- db_author = db_entities.Author(name=author.name, email=author.email)
297
- if author.id:
298
- db_author.id = author.id
299
- unique_authors[key] = db_author
300
-
301
- return db_index, db_source, db_files, list(unique_authors.values())
302
-
303
- async def from_domain_snippet(
304
- self, domain_snippet: domain_entities.Snippet, index_id: int
305
- ) -> db_entities.Snippet:
306
- """Convert domain Snippet to SQLAlchemy Snippet."""
307
- # Get file ID from derives_from (use first file if multiple)
308
- if not domain_snippet.derives_from:
309
- raise ValueError("Snippet must derive from at least one file")
310
-
311
- file_id = domain_snippet.derives_from[0].id
312
- if file_id is None:
313
- raise ValueError("File must have an ID")
314
-
315
- db_snippet = db_entities.Snippet(
316
- file_id=file_id,
317
- index_id=index_id,
318
- content=domain_snippet.original_text(),
319
- summary=domain_snippet.summary_text(),
320
- )
321
-
322
- if domain_snippet.id:
323
- db_snippet.id = domain_snippet.id
324
- if domain_snippet.created_at:
325
- db_snippet.created_at = domain_snippet.created_at
326
- if domain_snippet.updated_at:
327
- db_snippet.updated_at = domain_snippet.updated_at
328
-
329
- return db_snippet
330
-
331
- async def load_snippets_for_index(
332
- self, index_id: int, domain_files: list[domain_entities.File]
333
- ) -> list[domain_entities.Snippet]:
334
- """Load all snippets for an index and convert to domain entities."""
335
- stmt = select(db_entities.Snippet).where(
336
- db_entities.Snippet.index_id == index_id
337
- )
338
- db_snippets = (await self._session.scalars(stmt)).all()
339
-
340
- domain_snippets = []
341
- for db_snippet in db_snippets:
342
- domain_snippet = await self.to_domain_snippet(db_snippet, domain_files)
343
- domain_snippets.append(domain_snippet)
344
-
345
- return domain_snippets
@@ -1,73 +0,0 @@
1
- """TQDM progress."""
2
-
3
- from tqdm import tqdm
4
-
5
- from kodit.config import ReportingConfig
6
- from kodit.domain.protocols import ReportingModule
7
- from kodit.domain.value_objects import Progress, ProgressState, ReportingState
8
-
9
-
10
- class TQDMReportingModule(ReportingModule):
11
- """TQDM reporting module."""
12
-
13
- def __init__(self, config: ReportingConfig) -> None:
14
- """Initialize the TQDM reporting module."""
15
- self.config = config
16
- self.pbar = tqdm()
17
-
18
- def on_change(self, step: Progress) -> None:
19
- """On step changed."""
20
- if step.state == ReportingState.COMPLETED:
21
- self.pbar.close()
22
- return
23
-
24
- self.pbar.set_description(step.message)
25
- self.pbar.refresh()
26
- # Update description if message is provided
27
- if step.message:
28
- # Fix the event message to a specific size so it's not jumping around
29
- # If it's too small, add spaces
30
- # If it's too large, truncate
31
- if len(step.message) < 30:
32
- self.pbar.set_description(step.message + " " * (30 - len(step.message)))
33
- else:
34
- self.pbar.set_description(step.message[-30:])
35
- else:
36
- self.pbar.set_description(step.name)
37
-
38
-
39
- class TQDMProgress(Progress):
40
- """TQDM-based progress callback implementation."""
41
-
42
- def __init__(self, config: ReportingConfig | None = None) -> None:
43
- """Initialize with a TQDM progress bar."""
44
- self.config = config or ReportingConfig()
45
- self.pbar = tqdm()
46
-
47
- def on_update(self, state: ProgressState) -> None:
48
- """Update the TQDM progress bar."""
49
- # Update total if it changes
50
- if state.total != self.pbar.total:
51
- self.pbar.total = state.total
52
-
53
- # Update the progress bar
54
- self.pbar.n = state.current
55
- self.pbar.refresh()
56
-
57
- # Update description if message is provided
58
- if state.message:
59
- # Fix the event message to a specific size so it's not jumping around
60
- # If it's too small, add spaces
61
- # If it's too large, truncate
62
- if len(state.message) < 30:
63
- self.pbar.set_description(
64
- state.message + " " * (30 - len(state.message))
65
- )
66
- else:
67
- self.pbar.set_description(state.message[-30:])
68
- else:
69
- self.pbar.set_description(state.operation)
70
-
71
- def on_complete(self) -> None:
72
- """Complete the progress bar."""
73
- self.pbar.close()
@@ -1,18 +0,0 @@
1
- """Language detection service implementation."""
2
-
3
- from pathlib import Path
4
-
5
- from kodit.domain.services.index_service import LanguageDetectionService
6
-
7
-
8
- class FileSystemLanguageDetectionService(LanguageDetectionService):
9
- """Simple file extension based language detection service."""
10
-
11
- def __init__(self, language_map: dict[str, str]) -> None:
12
- """Initialize with a mapping of extensions to languages."""
13
- self._language_map = language_map
14
-
15
- async def detect_language(self, file_path: Path) -> str:
16
- """Detect language based on file extension."""
17
- extension = file_path.suffix.lstrip(".")
18
- return self._language_map.get(extension, "unknown")