kodit 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kodit/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.5.5'
32
- __version_tuple__ = version_tuple = (0, 5, 5)
31
+ __version__ = version = '0.5.7'
32
+ __version_tuple__ = version_tuple = (0, 5, 7)
33
33
 
34
34
  __commit_id__ = commit_id = None
kodit/app.py CHANGED
@@ -63,6 +63,8 @@ async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
63
63
  )
64
64
  )
65
65
  )
66
+ except StopAsyncIteration:
67
+ pass
66
68
  except Exception as e:
67
69
  raise ValueError("Embedding service is not accessible") from e
68
70
  try:
@@ -50,6 +50,9 @@ from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
50
50
  VectorChordBM25Repository,
51
51
  )
52
52
  from kodit.infrastructure.cloning.git.git_python_adaptor import GitPythonAdapter
53
+ from kodit.infrastructure.database_schema.database_schema_detector import (
54
+ DatabaseSchemaDetector,
55
+ )
53
56
  from kodit.infrastructure.embedding.embedding_factory import (
54
57
  embedding_domain_service_factory,
55
58
  )
@@ -255,6 +258,7 @@ class ServerFactory:
255
258
  text_search_service=self.text_search_service(),
256
259
  embedding_repository=self.embedding_repository(),
257
260
  architecture_service=self.architecture_service(),
261
+ database_schema_detector=DatabaseSchemaDetector(),
258
262
  enrichment_v2_repository=self.enrichment_v2_repository(),
259
263
  enricher_service=self.enricher(),
260
264
  enrichment_association_repository=self.enrichment_association_repository(),
@@ -14,6 +14,9 @@ if TYPE_CHECKING:
14
14
  from kodit.application.services.enrichment_query_service import (
15
15
  EnrichmentQueryService,
16
16
  )
17
+ from kodit.domain.enrichments.architecture.database_schema.database_schema import (
18
+ DatabaseSchemaEnrichment,
19
+ )
17
20
  from kodit.domain.enrichments.architecture.physical.physical import (
18
21
  PhysicalArchitectureEnrichment,
19
22
  )
@@ -27,11 +30,20 @@ from kodit.domain.enrichments.enrichment import (
27
30
  EnrichmentAssociation,
28
31
  EnrichmentV2,
29
32
  )
33
+ from kodit.domain.enrichments.history.commit_description.commit_description import (
34
+ CommitDescriptionEnrichment,
35
+ )
30
36
  from kodit.domain.enrichments.request import (
31
37
  EnrichmentRequest as GenericEnrichmentRequest,
32
38
  )
33
39
  from kodit.domain.entities import Task
34
- from kodit.domain.entities.git import GitFile, GitRepo, SnippetV2, TrackingType
40
+ from kodit.domain.entities.git import (
41
+ GitCommit,
42
+ GitFile,
43
+ GitRepo,
44
+ SnippetV2,
45
+ TrackingType,
46
+ )
35
47
  from kodit.domain.factories.git_repo_factory import GitRepoFactory
36
48
  from kodit.domain.protocols import (
37
49
  EnrichmentAssociationRepository,
@@ -63,6 +75,9 @@ from kodit.domain.value_objects import (
63
75
  TaskOperation,
64
76
  TrackableType,
65
77
  )
78
+ from kodit.infrastructure.database_schema.database_schema_detector import (
79
+ DatabaseSchemaDetector,
80
+ )
66
81
  from kodit.infrastructure.slicing.api_doc_extractor import APIDocExtractor
67
82
  from kodit.infrastructure.slicing.slicer import Slicer
68
83
  from kodit.infrastructure.sqlalchemy import entities as db_entities
@@ -82,6 +97,73 @@ You are a professional software developer. You will be given a snippet of code.
82
97
  Please provide a concise explanation of the code.
83
98
  """
84
99
 
100
+ COMMIT_DESCRIPTION_SYSTEM_PROMPT = """
101
+ You are a professional software developer. You will be given a git commit diff.
102
+ Please provide a concise description of what changes were made and why.
103
+ """
104
+
105
+ DATABASE_SCHEMA_SYSTEM_PROMPT = """
106
+ You are an expert database architect and documentation specialist.
107
+ Your task is to create clear, visual documentation of database schemas.
108
+ """
109
+
110
+ DATABASE_SCHEMA_TASK_PROMPT = """
111
+ You will be provided with a database schema discovery report.
112
+ Please create comprehensive database schema documentation.
113
+
114
+ <schema_report>
115
+ {schema_report}
116
+ </schema_report>
117
+
118
+ **Return the following:**
119
+
120
+ ## Entity List
121
+
122
+ For each table/entity, write one line:
123
+ - **[Table Name]**: [brief description of what it stores]
124
+
125
+ ## Mermaid ERD
126
+
127
+ Create a Mermaid Entity Relationship Diagram showing:
128
+ - All entities (tables)
129
+ - Key relationships between entities (if apparent from names or common patterns)
130
+ - Use standard ERD notation
131
+
132
+ Example format:
133
+ ```mermaid
134
+ erDiagram
135
+ User ||--o{{ Order : places
136
+ User {{
137
+ int id PK
138
+ string email
139
+ string name
140
+ }}
141
+ Order {{
142
+ int id PK
143
+ int user_id FK
144
+ datetime created_at
145
+ }}
146
+ ```
147
+
148
+ If specific field details aren't available, show just the entity boxes and
149
+ relationships.
150
+
151
+ ## Key Observations
152
+
153
+ Answer these questions in 1-2 sentences each:
154
+ 1. What is the primary data model pattern (e.g., user-centric,
155
+ event-sourced, multi-tenant)?
156
+ 2. What migration strategy is being used?
157
+ 3. Are there any notable database design patterns or concerns?
158
+
159
+ ## Rules:
160
+ - Be concise and focus on the high-level structure
161
+ - Infer reasonable relationships from table names when explicit information
162
+ isn't available
163
+ - If no database schema is found, state that clearly
164
+ - Keep entity descriptions to 10 words or less
165
+ """
166
+
85
167
 
86
168
  class CommitIndexingApplicationService:
87
169
  """Application service for commit indexing operations."""
@@ -103,6 +185,7 @@ class CommitIndexingApplicationService:
103
185
  text_search_service: EmbeddingDomainService,
104
186
  embedding_repository: SqlAlchemyEmbeddingRepository,
105
187
  architecture_service: PhysicalArchitectureService,
188
+ database_schema_detector: DatabaseSchemaDetector,
106
189
  enricher_service: Enricher,
107
190
  enrichment_v2_repository: EnrichmentV2Repository,
108
191
  enrichment_association_repository: EnrichmentAssociationRepository,
@@ -124,6 +207,7 @@ class CommitIndexingApplicationService:
124
207
  self.text_search_service = text_search_service
125
208
  self.embedding_repository = embedding_repository
126
209
  self.architecture_service = architecture_service
210
+ self.database_schema_detector = database_schema_detector
127
211
  self.enrichment_v2_repository = enrichment_v2_repository
128
212
  self.enrichment_association_repository = enrichment_association_repository
129
213
  self.enricher_service = enricher_service
@@ -191,11 +275,66 @@ class CommitIndexingApplicationService:
191
275
  await self.process_architecture_discovery(repository_id, commit_sha)
192
276
  elif task.type == TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT:
193
277
  await self.process_api_docs(repository_id, commit_sha)
278
+ elif task.type == TaskOperation.CREATE_COMMIT_DESCRIPTION_FOR_COMMIT:
279
+ await self.process_commit_description(repository_id, commit_sha)
280
+ elif task.type == TaskOperation.CREATE_DATABASE_SCHEMA_FOR_COMMIT:
281
+ await self.process_database_schema(repository_id, commit_sha)
194
282
  else:
195
283
  raise ValueError(f"Unknown task type: {task.type}")
196
284
  else:
197
285
  raise ValueError(f"Unknown task type: {task.type}")
198
286
 
287
+ async def _process_files_in_batches(
288
+ self, cloned_path: Path, all_commits: list[GitCommit], batch_size: int = 100
289
+ ) -> int:
290
+ """Process file metadata for all commits in batches to avoid memory exhaustion.
291
+
292
+ This loads file metadata (paths, sizes, blob SHAs) in batches and saves them
293
+ incrementally to avoid holding millions of file objects in memory.
294
+
295
+ Args:
296
+ cloned_path: Path to the cloned repository
297
+ all_commits: List of all commits from scan
298
+ batch_size: Number of commits to process at once (default 100)
299
+
300
+ Returns:
301
+ Total number of files processed
302
+
303
+ """
304
+ total_files = 0
305
+ commit_shas = [commit.commit_sha for commit in all_commits]
306
+ total_batches = (len(commit_shas) + batch_size - 1) // batch_size
307
+
308
+ self._log.info(
309
+ f"Processing files for {len(commit_shas)} commits "
310
+ f"in {total_batches} batches"
311
+ )
312
+
313
+ # Process commits in batches
314
+ for i in range(0, len(commit_shas), batch_size):
315
+ batch = commit_shas[i : i + batch_size]
316
+ batch_num = i // batch_size + 1
317
+
318
+ self._log.debug(
319
+ f"Processing batch {batch_num}/{total_batches} ({len(batch)} commits)"
320
+ )
321
+
322
+ # Get file metadata for this batch of commits
323
+ files = await self.scanner.process_files_for_commits_batch(
324
+ cloned_path, batch
325
+ )
326
+
327
+ # Save file metadata to database immediately
328
+ if files:
329
+ await self.git_file_repository.save_bulk(files)
330
+ total_files += len(files)
331
+ self._log.debug(
332
+ f"Batch {batch_num}: Saved {len(files)} files "
333
+ f"(total so far: {total_files})"
334
+ )
335
+
336
+ return total_files
337
+
199
338
  async def process_clone_repo(self, repository_id: int) -> None:
200
339
  """Clone a repository."""
201
340
  async with self.operation.create_child(
@@ -233,8 +372,11 @@ class CommitIndexingApplicationService:
233
372
  await step.set_current(2, "Saving commits")
234
373
  await self.git_commit_repository.save_bulk(scan_result.all_commits)
235
374
 
236
- await step.set_current(3, "Saving files")
237
- await self.git_file_repository.save_bulk(scan_result.all_files)
375
+ await step.set_current(3, "Processing and saving files in batches")
376
+ total_files = await self._process_files_in_batches(
377
+ repo.cloned_path, scan_result.all_commits
378
+ )
379
+ self._log.info(f"Processed and saved {total_files} total files")
238
380
 
239
381
  await step.set_current(4, "Saving branches")
240
382
  if scan_result.branches:
@@ -798,6 +940,137 @@ class CommitIndexingApplicationService:
798
940
  ]
799
941
  )
800
942
 
943
+ async def process_commit_description(
944
+ self, repository_id: int, commit_sha: str
945
+ ) -> None:
946
+ """Handle COMMIT_DESCRIPTION task - generate commit descriptions."""
947
+ async with self.operation.create_child(
948
+ TaskOperation.CREATE_COMMIT_DESCRIPTION_FOR_COMMIT,
949
+ trackable_type=TrackableType.KODIT_REPOSITORY,
950
+ trackable_id=repository_id,
951
+ ) as step:
952
+ # Check if commit description already exists for this commit
953
+ if await self.enrichment_query_service.has_commit_description_for_commit(
954
+ commit_sha
955
+ ):
956
+ await step.skip("Commit description already exists for commit")
957
+ return
958
+
959
+ # Get repository path
960
+ repo = await self.repo_repository.get(repository_id)
961
+ if not repo.cloned_path:
962
+ raise ValueError(f"Repository {repository_id} has never been cloned")
963
+
964
+ await step.set_total(3)
965
+ await step.set_current(1, "Getting commit diff")
966
+
967
+ # Get the diff for this commit
968
+ diff = await self.scanner.git_adapter.get_commit_diff(
969
+ repo.cloned_path, commit_sha
970
+ )
971
+
972
+ if not diff or len(diff.strip()) == 0:
973
+ await step.skip("No diff found for commit")
974
+ return
975
+
976
+ await step.set_current(2, "Enriching commit description with LLM")
977
+
978
+ # Enrich the diff through the enricher
979
+ enrichment_request = GenericEnrichmentRequest(
980
+ id=commit_sha,
981
+ text=diff,
982
+ system_prompt=COMMIT_DESCRIPTION_SYSTEM_PROMPT,
983
+ )
984
+
985
+ enriched_content = ""
986
+ async for response in self.enricher_service.enrich([enrichment_request]):
987
+ enriched_content = response.text
988
+
989
+ # Create and save commit description enrichment
990
+ enrichment = await self.enrichment_v2_repository.save(
991
+ CommitDescriptionEnrichment(
992
+ content=enriched_content,
993
+ )
994
+ )
995
+ if not enrichment or not enrichment.id:
996
+ raise ValueError(
997
+ f"Failed to save commit description enrichment for commit "
998
+ f"{commit_sha}"
999
+ )
1000
+ await self.enrichment_association_repository.save(
1001
+ CommitEnrichmentAssociation(
1002
+ enrichment_id=enrichment.id,
1003
+ entity_id=commit_sha,
1004
+ )
1005
+ )
1006
+
1007
+ await step.set_current(3, "Commit description enrichment completed")
1008
+
1009
+ async def process_database_schema(
1010
+ self, repository_id: int, commit_sha: str
1011
+ ) -> None:
1012
+ """Handle DATABASE_SCHEMA task - discover and document database schemas."""
1013
+ async with self.operation.create_child(
1014
+ TaskOperation.CREATE_DATABASE_SCHEMA_FOR_COMMIT,
1015
+ trackable_type=TrackableType.KODIT_REPOSITORY,
1016
+ trackable_id=repository_id,
1017
+ ) as step:
1018
+ # Check if database schema already exists for this commit
1019
+ if await self.enrichment_query_service.has_database_schema_for_commit(
1020
+ commit_sha
1021
+ ):
1022
+ await step.skip("Database schema already exists for commit")
1023
+ return
1024
+
1025
+ # Get repository path
1026
+ repo = await self.repo_repository.get(repository_id)
1027
+ if not repo.cloned_path:
1028
+ raise ValueError(f"Repository {repository_id} has never been cloned")
1029
+
1030
+ await step.set_total(3)
1031
+ await step.set_current(1, "Discovering database schemas")
1032
+
1033
+ # Discover database schemas
1034
+ schema_report = await self.database_schema_detector.discover_schemas(
1035
+ repo.cloned_path
1036
+ )
1037
+
1038
+ if "No database schemas detected" in schema_report:
1039
+ await step.skip("No database schemas found in repository")
1040
+ return
1041
+
1042
+ await step.set_current(2, "Enriching schema documentation with LLM")
1043
+
1044
+ # Enrich the schema report through the enricher
1045
+ enrichment_request = GenericEnrichmentRequest(
1046
+ id=commit_sha,
1047
+ text=DATABASE_SCHEMA_TASK_PROMPT.format(schema_report=schema_report),
1048
+ system_prompt=DATABASE_SCHEMA_SYSTEM_PROMPT,
1049
+ )
1050
+
1051
+ enriched_content = ""
1052
+ async for response in self.enricher_service.enrich([enrichment_request]):
1053
+ enriched_content = response.text
1054
+
1055
+ # Create and save database schema enrichment
1056
+ enrichment = await self.enrichment_v2_repository.save(
1057
+ DatabaseSchemaEnrichment(
1058
+ content=enriched_content,
1059
+ )
1060
+ )
1061
+ if not enrichment or not enrichment.id:
1062
+ raise ValueError(
1063
+ f"Failed to save database schema enrichment for commit {commit_sha}"
1064
+ )
1065
+ await self.enrichment_association_repository.save(
1066
+ CommitEnrichmentAssociation(
1067
+ enrichment_id=enrichment.id,
1068
+ entity_id=commit_sha,
1069
+ )
1070
+ )
1071
+
1072
+ await step.set_current(3, "Database schema enrichment completed")
1073
+
801
1074
  async def _new_snippets_for_type(
802
1075
  self, all_snippets: list[EnrichmentV2], embedding_type: EmbeddingType
803
1076
  ) -> list[EnrichmentV2]:
@@ -5,6 +5,9 @@ import structlog
5
5
  from kodit.domain.enrichments.architecture.architecture import (
6
6
  ENRICHMENT_TYPE_ARCHITECTURE,
7
7
  )
8
+ from kodit.domain.enrichments.architecture.database_schema.database_schema import (
9
+ ENRICHMENT_SUBTYPE_DATABASE_SCHEMA,
10
+ )
8
11
  from kodit.domain.enrichments.architecture.physical.physical import (
9
12
  ENRICHMENT_SUBTYPE_PHYSICAL,
10
13
  )
@@ -14,6 +17,10 @@ from kodit.domain.enrichments.development.snippet.snippet import (
14
17
  ENRICHMENT_SUBTYPE_SNIPPET_SUMMARY,
15
18
  )
16
19
  from kodit.domain.enrichments.enrichment import EnrichmentAssociation, EnrichmentV2
20
+ from kodit.domain.enrichments.history.commit_description.commit_description import (
21
+ ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION,
22
+ )
23
+ from kodit.domain.enrichments.history.history import ENRICHMENT_TYPE_HISTORY
17
24
  from kodit.domain.enrichments.usage.api_docs import ENRICHMENT_SUBTYPE_API_DOCS
18
25
  from kodit.domain.enrichments.usage.usage import ENRICHMENT_TYPE_USAGE
19
26
  from kodit.domain.protocols import (
@@ -215,6 +222,36 @@ class EnrichmentQueryService:
215
222
  api_docs = await self.get_api_docs_for_commit(commit_sha)
216
223
  return len(api_docs) > 0
217
224
 
225
+ async def get_commit_description_for_commit(
226
+ self, commit_sha: str
227
+ ) -> list[EnrichmentV2]:
228
+ """Get commit description enrichments for a commit."""
229
+ return await self.get_enrichments_for_commit(
230
+ commit_sha,
231
+ enrichment_type=ENRICHMENT_TYPE_HISTORY,
232
+ enrichment_subtype=ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION,
233
+ )
234
+
235
+ async def has_commit_description_for_commit(self, commit_sha: str) -> bool:
236
+ """Check if a commit has commit description enrichments."""
237
+ commit_descriptions = await self.get_commit_description_for_commit(commit_sha)
238
+ return len(commit_descriptions) > 0
239
+
240
+ async def get_database_schema_for_commit(
241
+ self, commit_sha: str
242
+ ) -> list[EnrichmentV2]:
243
+ """Get database schema enrichments for a commit."""
244
+ return await self.get_enrichments_for_commit(
245
+ commit_sha,
246
+ enrichment_type=ENRICHMENT_TYPE_ARCHITECTURE,
247
+ enrichment_subtype=ENRICHMENT_SUBTYPE_DATABASE_SCHEMA,
248
+ )
249
+
250
+ async def has_database_schema_for_commit(self, commit_sha: str) -> bool:
251
+ """Check if a commit has database schema enrichments."""
252
+ database_schemas = await self.get_database_schema_for_commit(commit_sha)
253
+ return len(database_schemas) > 0
254
+
218
255
  async def associations_for_enrichments(
219
256
  self, enrichments: list[EnrichmentV2]
220
257
  ) -> list[EnrichmentAssociation]:
@@ -0,0 +1 @@
1
+ """Database schema enrichments."""
@@ -0,0 +1,17 @@
1
+ """Database schema enrichment domain entity."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from kodit.domain.enrichments.architecture.architecture import ArchitectureEnrichment
6
+
7
+ ENRICHMENT_SUBTYPE_DATABASE_SCHEMA = "database_schema"
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class DatabaseSchemaEnrichment(ArchitectureEnrichment):
12
+ """Enrichment containing database schema information for a commit."""
13
+
14
+ @property
15
+ def subtype(self) -> str | None:
16
+ """Return the enrichment subtype."""
17
+ return ENRICHMENT_SUBTYPE_DATABASE_SCHEMA
@@ -0,0 +1 @@
1
+ """History enrichments."""
@@ -0,0 +1 @@
1
+ """Commit description enrichments."""
@@ -0,0 +1,17 @@
1
+ """Commit description enrichment domain entity."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from kodit.domain.enrichments.history.history import HistoryEnrichment
6
+
7
+ ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION = "commit_description"
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class CommitDescriptionEnrichment(HistoryEnrichment):
12
+ """Enrichment containing a description of what a commit did."""
13
+
14
+ @property
15
+ def subtype(self) -> str | None:
16
+ """Return the enrichment subtype."""
17
+ return ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION
@@ -0,0 +1,18 @@
1
+ """History enrichment domain entity."""
2
+
3
+ from abc import ABC
4
+ from dataclasses import dataclass
5
+
6
+ from kodit.domain.enrichments.enrichment import CommitEnrichment
7
+
8
+ ENRICHMENT_TYPE_HISTORY = "history"
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class HistoryEnrichment(CommitEnrichment, ABC):
13
+ """Enrichment containing historical information for a commit."""
14
+
15
+ @property
16
+ def type(self) -> str:
17
+ """Return the enrichment type."""
18
+ return ENRICHMENT_TYPE_HISTORY
kodit/domain/protocols.py CHANGED
@@ -4,6 +4,8 @@ from abc import ABC, abstractmethod
4
4
  from pathlib import Path
5
5
  from typing import Any, Protocol, TypeVar
6
6
 
7
+ from git import Repo
8
+
7
9
  from kodit.domain.enrichments.enrichment import EnrichmentAssociation, EnrichmentV2
8
10
  from kodit.domain.entities import (
9
11
  Task,
@@ -163,9 +165,16 @@ class GitAdapter(ABC):
163
165
 
164
166
  @abstractmethod
165
167
  async def get_commit_files(
166
- self, local_path: Path, commit_sha: str
168
+ self, local_path: Path, commit_sha: str, repo: Repo
167
169
  ) -> list[dict[str, Any]]:
168
- """Get all files in a specific commit from the git tree."""
170
+ """Get all files in a specific commit from the git tree.
171
+
172
+ Args:
173
+ local_path: Path to the repository
174
+ commit_sha: SHA of the commit to get files for
175
+ repo: Repo object to reuse (avoids creating new Repo per commit)
176
+
177
+ """
169
178
 
170
179
  @abstractmethod
171
180
  async def get_commit_file_data(
@@ -213,6 +222,10 @@ class GitAdapter(ABC):
213
222
  ) -> list[str]:
214
223
  """Get only commit SHAs for a branch (much faster than full commit data)."""
215
224
 
225
+ @abstractmethod
226
+ async def get_commit_diff(self, local_path: Path, commit_sha: str) -> str:
227
+ """Get the diff for a specific commit."""
228
+
216
229
 
217
230
  class SnippetRepositoryV2(ABC):
218
231
  """Repository for snippet operations."""
@@ -1,6 +1,5 @@
1
1
  """Domain services for Git repository scanning and cloning operations."""
2
2
 
3
- import asyncio
4
3
  import shutil
5
4
  from dataclasses import dataclass
6
5
  from datetime import UTC, datetime
@@ -66,51 +65,11 @@ class GitRepositoryScanner:
66
65
  tags = await self._process_tags(cloned_path, commit_cache, repo_id)
67
66
  self._log.info(f"Found {len(tags)} tags")
68
67
 
69
- all_files = await self._process_files(cloned_path, commit_cache)
70
- self._log.info(f"Found {len(all_files)} files")
68
+ # Don't load all files into memory - return empty list
69
+ # Files will be processed in batches by the application service
70
+ self._log.info("Deferring file processing to avoid memory exhaustion")
71
71
 
72
- return self._create_scan_result(branches, commit_cache, tags, all_files)
73
-
74
- async def _process_commits_concurrently(
75
- self,
76
- cloned_path: Path,
77
- commits_batch: list[tuple[str, dict[str, Any]]],
78
- ) -> dict[str, GitCommit]:
79
- """Process a batch of commits concurrently."""
80
- batch_cache = {}
81
-
82
- async def process_single_commit(
83
- commit_sha: str, commit_data: dict[str, Any]
84
- ) -> tuple[str, GitCommit | None]:
85
- git_commit = await self._create_git_commit_from_data(
86
- cloned_path, commit_data
87
- )
88
- return commit_sha, git_commit
89
-
90
- # Process commits concurrently in smaller batches
91
- semaphore = asyncio.Semaphore(50) # Limit concurrent operations
92
-
93
- async def bounded_process(
94
- item: tuple[str, dict[str, Any]],
95
- ) -> tuple[str, GitCommit | None]:
96
- async with semaphore:
97
- return await process_single_commit(item[0], item[1])
98
-
99
- # Process all commits concurrently
100
- results = await asyncio.gather(
101
- *[bounded_process(item) for item in commits_batch],
102
- return_exceptions=True,
103
- )
104
-
105
- # Collect successful results
106
- for result in results:
107
- if isinstance(result, tuple):
108
- # Type narrowing: result is now tuple[str, GitCommit | None]
109
- commit_sha, git_commit = result
110
- if git_commit is not None:
111
- batch_cache[commit_sha] = git_commit
112
-
113
- return batch_cache
72
+ return self._create_scan_result(branches, commit_cache, tags, [], cloned_path)
114
73
 
115
74
  async def _process_branches_bulk(
116
75
  self,
@@ -167,30 +126,6 @@ class GitRepositoryScanner:
167
126
 
168
127
  return branches, commit_cache
169
128
 
170
- async def _create_git_commit_from_data(
171
- self, cloned_path: Path, commit_data: dict[str, Any], repo_id: int | None = None
172
- ) -> GitCommit | None:
173
- """Create GitCommit from pre-fetched commit data."""
174
- commit_sha = commit_data["sha"]
175
-
176
- # Get files for this commit
177
- files_data = await self.git_adapter.get_commit_files(cloned_path, commit_sha)
178
- self._create_git_files(cloned_path, files_data, commit_sha)
179
- author = self._format_author_from_data(commit_data)
180
-
181
- # Cache datetime creation
182
- created_at = datetime.now(UTC)
183
-
184
- return GitCommit(
185
- created_at=created_at,
186
- commit_sha=commit_sha,
187
- repo_id=repo_id or 0, # Use 0 as default if not provided
188
- date=commit_data["date"],
189
- message=commit_data["message"],
190
- parent_commit_sha=commit_data["parent_sha"],
191
- author=author,
192
- )
193
-
194
129
  def _format_author_from_data(self, commit_data: dict[str, Any]) -> str:
195
130
  """Format author string from commit data."""
196
131
  author_name = commit_data.get("author_name", "")
@@ -376,17 +311,18 @@ class GitRepositoryScanner:
376
311
  branches: list[GitBranch],
377
312
  commit_cache: dict[str, GitCommit],
378
313
  tags: list[GitTag],
379
- all_files: list[GitFile],
314
+ all_files: list[GitFile], # noqa: ARG002
315
+ cloned_path: Path | None = None, # noqa: ARG002
380
316
  ) -> RepositoryScanResult:
381
317
  """Create final scan result."""
382
- # Files are loaded on-demand for performance, so total_files is 0 during scan
318
+ # Files list is empty to avoid memory issues - will be processed in batches
383
319
  scan_result = RepositoryScanResult(
384
320
  branches=branches,
385
321
  all_commits=list(commit_cache.values()),
386
322
  scan_timestamp=datetime.now(UTC),
387
- total_files_across_commits=len(all_files),
323
+ total_files_across_commits=0, # Will be updated after batch processing
388
324
  all_tags=tags,
389
- all_files=all_files,
325
+ all_files=[], # Empty - processed in batches to avoid memory exhaustion
390
326
  )
391
327
 
392
328
  self._log.info(
@@ -395,16 +331,35 @@ class GitRepositoryScanner:
395
331
  )
396
332
  return scan_result
397
333
 
398
- async def _process_files(
399
- self, cloned_path: Path, commit_cache: dict[str, GitCommit]
334
+ async def process_files_for_commits_batch(
335
+ self, cloned_path: Path, commit_shas: list[str]
400
336
  ) -> list[GitFile]:
401
- """Process files for a commit."""
337
+ """Process files for a batch of commits.
338
+
339
+ This allows the application service to process files in batches
340
+ to avoid loading millions of files into memory at once.
341
+
342
+ CRITICAL: Reuses a single Repo object to avoid creating 32K+ Repo instances
343
+ which would consume massive memory (1-2 MB each).
344
+ """
345
+ from git import Repo
346
+
347
+ # Open repo once and reuse for all commits in this batch
348
+ repo = Repo(cloned_path)
402
349
  files = []
403
- for commit_sha in commit_cache:
404
- files_data = await self.git_adapter.get_commit_files(
405
- cloned_path, commit_sha
406
- )
407
- files.extend(self._create_git_files(cloned_path, files_data, commit_sha))
350
+
351
+ try:
352
+ for commit_sha in commit_shas:
353
+ files_data = await self.git_adapter.get_commit_files(
354
+ cloned_path, commit_sha, repo=repo
355
+ )
356
+ files.extend(
357
+ self._create_git_files(cloned_path, files_data, commit_sha)
358
+ )
359
+ finally:
360
+ # Explicitly close the repo to free resources
361
+ repo.close()
362
+
408
363
  return files
409
364
 
410
365
 
@@ -614,6 +614,8 @@ class TaskOperation(StrEnum):
614
614
  "kodit.commit.create_architecture_enrichment"
615
615
  )
616
616
  CREATE_PUBLIC_API_DOCS_FOR_COMMIT = "kodit.commit.create_public_api_docs"
617
+ CREATE_COMMIT_DESCRIPTION_FOR_COMMIT = "kodit.commit.create_commit_description"
618
+ CREATE_DATABASE_SCHEMA_FOR_COMMIT = "kodit.commit.create_database_schema"
617
619
 
618
620
  def is_repository_operation(self) -> bool:
619
621
  """Check if the task operation is a repository operation."""
@@ -639,6 +641,8 @@ class PrescribedOperations:
639
641
  TaskOperation.CREATE_SUMMARY_EMBEDDINGS_FOR_COMMIT,
640
642
  TaskOperation.CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT,
641
643
  TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT,
644
+ TaskOperation.CREATE_COMMIT_DESCRIPTION_FOR_COMMIT,
645
+ TaskOperation.CREATE_DATABASE_SCHEMA_FOR_COMMIT,
642
646
  ]
643
647
  SYNC_REPOSITORY: ClassVar[list[TaskOperation]] = [
644
648
  TaskOperation.SCAN_REPOSITORY,
@@ -346,14 +346,22 @@ class GitPythonAdapter(GitAdapter):
346
346
  )
347
347
 
348
348
  async def get_commit_files(
349
- self, local_path: Path, commit_sha: str
349
+ self, local_path: Path, commit_sha: str, repo: Repo
350
350
  ) -> list[dict[str, Any]]:
351
- """Get all files in a specific commit from the git tree."""
351
+ """Get all files in a specific commit from the git tree.
352
+
353
+ Args:
354
+ local_path: Path to the repository
355
+ commit_sha: SHA of the commit to get files for
356
+ repo: Repo object to reuse (avoids creating new Repo per commit)
357
+
358
+ """
352
359
 
353
360
  def _get_files() -> list[dict[str, Any]]:
354
361
  try:
355
- repo = Repo(local_path)
356
- commit = repo.commit(commit_sha)
362
+ # Use the provided repo object
363
+ _repo = repo
364
+ commit = _repo.commit(commit_sha)
357
365
 
358
366
  files = []
359
367
 
@@ -395,7 +403,11 @@ class GitPythonAdapter(GitAdapter):
395
403
  """Get file metadata for a commit, with files checked out to disk."""
396
404
  await self._checkout_commit(local_path, commit_sha)
397
405
  try:
398
- return await self.get_commit_files(local_path, commit_sha)
406
+ repo = Repo(local_path)
407
+ try:
408
+ return await self.get_commit_files(local_path, commit_sha, repo)
409
+ finally:
410
+ repo.close()
399
411
  finally:
400
412
  await self.restore_to_branch(local_path, "main")
401
413
 
@@ -532,3 +544,42 @@ class GitPythonAdapter(GitAdapter):
532
544
  raise
533
545
 
534
546
  return await asyncio.get_event_loop().run_in_executor(self.executor, _get_tags)
547
+
548
+ async def get_commit_diff(self, local_path: Path, commit_sha: str) -> str:
549
+ """Get the diff for a specific commit."""
550
+
551
+ def _get_diff() -> str:
552
+ try:
553
+ repo = Repo(local_path)
554
+ commit = repo.commit(commit_sha)
555
+
556
+ # If this is the first commit (no parents), show diff against empty tree
557
+ if not commit.parents:
558
+ diffs = commit.diff(None, create_patch=True)
559
+ if not diffs:
560
+ return ""
561
+ first_diff = diffs[0]
562
+ diff_bytes = first_diff.diff
563
+ if isinstance(diff_bytes, bytes):
564
+ return diff_bytes.decode("utf-8")
565
+ return str(diff_bytes) if diff_bytes is not None else ""
566
+
567
+ # For commits with parents, show diff against first parent
568
+ parent = commit.parents[0]
569
+ diffs = parent.diff(commit, create_patch=True)
570
+
571
+ # Combine all diffs into a single string
572
+ diff_text = ""
573
+ for diff in diffs:
574
+ diff_bytes = diff.diff
575
+ if diff_bytes and isinstance(diff_bytes, bytes):
576
+ diff_text += diff_bytes.decode("utf-8")
577
+ except Exception as e:
578
+ self._log.error(
579
+ f"Failed to get diff for commit {commit_sha} in {local_path}: {e}"
580
+ )
581
+ raise
582
+ else:
583
+ return diff_text
584
+
585
+ return await asyncio.get_event_loop().run_in_executor(self.executor, _get_diff)
@@ -0,0 +1 @@
1
+ """Database schema detection infrastructure."""
@@ -0,0 +1,268 @@
1
+ """Database schema detector for discovering database schemas in a repository."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+ from typing import ClassVar
6
+
7
+
8
+ class DatabaseSchemaDetector:
9
+ """Detects database schemas from various sources in a repository."""
10
+
11
+ # File patterns to look for
12
+ MIGRATION_PATTERNS: ClassVar[list[str]] = [
13
+ "**/migrations/**/*.sql",
14
+ "**/migrations/**/*.py",
15
+ "**/migrate/**/*.sql",
16
+ "**/migrate/**/*.go",
17
+ "**/db/migrate/**/*.rb",
18
+ "**/alembic/versions/**/*.py",
19
+ "**/liquibase/**/*.xml",
20
+ "**/flyway/**/*.sql",
21
+ ]
22
+
23
+ SQL_FILE_PATTERNS: ClassVar[list[str]] = [
24
+ "**/*.sql",
25
+ "**/schema/**/*.sql",
26
+ "**/schemas/**/*.sql",
27
+ "**/database/**/*.sql",
28
+ "**/db/**/*.sql",
29
+ ]
30
+
31
+ ORM_MODEL_PATTERNS: ClassVar[list[str]] = [
32
+ "**/models/**/*.py", # SQLAlchemy, Django
33
+ "**/models/**/*.go", # GORM
34
+ "**/entities/**/*.py", # SQLAlchemy
35
+ "**/entities/**/*.ts", # TypeORM
36
+ "**/entities/**/*.js", # TypeORM/Sequelize
37
+ ]
38
+
39
+ # Regex patterns for schema detection
40
+ CREATE_TABLE_PATTERN = re.compile(
41
+ r"CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?[`\"]?(\w+)[`\"]?",
42
+ re.IGNORECASE,
43
+ )
44
+
45
+ SQLALCHEMY_MODEL_PATTERN = re.compile(
46
+ r"class\s+(\w+)\s*\([^)]*(?:Base|Model|db\.Model)[^)]*\):",
47
+ re.MULTILINE,
48
+ )
49
+
50
+ GORM_MODEL_PATTERN = re.compile(
51
+ r"type\s+(\w+)\s+struct\s*{[^}]*gorm\.Model",
52
+ re.MULTILINE | re.DOTALL,
53
+ )
54
+
55
+ TYPEORM_ENTITY_PATTERN = re.compile(
56
+ r"@Entity\([^)]*\)\s*(?:export\s+)?class\s+(\w+)",
57
+ re.MULTILINE,
58
+ )
59
+
60
+ async def discover_schemas(self, repo_path: Path) -> str:
61
+ """Discover database schemas and generate a structured report."""
62
+ findings: dict[str, set[str] | list[str] | list[dict] | None] = {
63
+ "tables": set(),
64
+ "migration_files": [],
65
+ "sql_files": [],
66
+ "orm_models": [],
67
+ "orm_type": None,
68
+ }
69
+
70
+ # Detect migration files
71
+ await self._detect_migrations(repo_path, findings)
72
+
73
+ # Detect SQL schema files
74
+ await self._detect_sql_files(repo_path, findings)
75
+
76
+ # Detect ORM models
77
+ await self._detect_orm_models(repo_path, findings)
78
+
79
+ # Generate report
80
+ return self._generate_report(findings)
81
+
82
+ async def _detect_migrations(self, repo_path: Path, findings: dict) -> None:
83
+ """Detect migration files."""
84
+ for pattern in self.MIGRATION_PATTERNS:
85
+ for file_path in repo_path.glob(pattern):
86
+ if file_path.is_file():
87
+ findings["migration_files"].append(str(file_path.relative_to(repo_path)))
88
+ # Try to extract table names from migrations
89
+ await self._extract_tables_from_file(file_path, findings)
90
+
91
+ async def _detect_sql_files(self, repo_path: Path, findings: dict) -> None:
92
+ """Detect SQL schema files."""
93
+ migration_paths = set(findings["migration_files"])
94
+
95
+ for pattern in self.SQL_FILE_PATTERNS:
96
+ for file_path in repo_path.glob(pattern):
97
+ if file_path.is_file():
98
+ rel_path = str(file_path.relative_to(repo_path))
99
+ # Skip if already counted as migration
100
+ if rel_path not in migration_paths:
101
+ findings["sql_files"].append(rel_path)
102
+ await self._extract_tables_from_file(file_path, findings)
103
+
104
+ async def _detect_orm_models(self, repo_path: Path, findings: dict) -> None:
105
+ """Detect ORM model files."""
106
+ for pattern in self.ORM_MODEL_PATTERNS:
107
+ for file_path in repo_path.glob(pattern):
108
+ if file_path.is_file():
109
+ rel_path = str(file_path.relative_to(repo_path))
110
+ models = await self._extract_orm_models(file_path)
111
+ if models:
112
+ findings["orm_models"].append({
113
+ "file": rel_path,
114
+ "models": models,
115
+ })
116
+ findings["tables"].update(models)
117
+
118
+ async def _extract_tables_from_file(self, file_path: Path, findings: dict) -> None:
119
+ """Extract table names from SQL or migration files."""
120
+ try:
121
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
122
+
123
+ # Look for CREATE TABLE statements
124
+ for match in self.CREATE_TABLE_PATTERN.finditer(content):
125
+ table_name = match.group(1)
126
+ findings["tables"].add(table_name)
127
+
128
+ except (OSError, UnicodeDecodeError):
129
+ pass
130
+
131
+ async def _extract_orm_models(self, file_path: Path) -> list[str]:
132
+ """Extract ORM model names from model files."""
133
+ models: list[str] = []
134
+
135
+ try:
136
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
137
+ suffix = file_path.suffix
138
+
139
+ if suffix == ".py":
140
+ # SQLAlchemy or Django models
141
+ models.extend(
142
+ match.group(1)
143
+ for match in self.SQLALCHEMY_MODEL_PATTERN.finditer(content)
144
+ )
145
+
146
+ elif suffix == ".go":
147
+ # GORM models
148
+ models.extend(
149
+ match.group(1)
150
+ for match in self.GORM_MODEL_PATTERN.finditer(content)
151
+ )
152
+
153
+ elif suffix in [".ts", ".js"]:
154
+ # TypeORM entities
155
+ models.extend(
156
+ match.group(1)
157
+ for match in self.TYPEORM_ENTITY_PATTERN.finditer(content)
158
+ )
159
+
160
+ except (OSError, UnicodeDecodeError):
161
+ pass
162
+
163
+ return models
164
+
165
+ def _generate_report(self, findings: dict) -> str: # noqa: PLR0915, C901, PLR0912
166
+ """Generate a structured report of database schema findings."""
167
+ lines = []
168
+
169
+ # Summary
170
+ lines.append("# Database Schema Discovery Report")
171
+ lines.append("")
172
+
173
+ has_findings = (
174
+ findings["tables"]
175
+ or findings["migration_files"]
176
+ or findings["sql_files"]
177
+ or findings["orm_models"]
178
+ )
179
+ if not has_findings:
180
+ lines.append("No database schemas detected in this repository.")
181
+ return "\n".join(lines)
182
+
183
+ # Tables/Entities found
184
+ if findings["tables"]:
185
+ lines.append(f"## Detected Tables/Entities ({len(findings['tables'])})")
186
+ lines.append("")
187
+ lines.extend(f"- {table}" for table in sorted(findings["tables"]))
188
+ lines.append("")
189
+
190
+ # Migration files
191
+ if findings["migration_files"]:
192
+ lines.append(f"## Migration Files ({len(findings['migration_files'])})")
193
+ lines.append("")
194
+ lines.append(
195
+ "Database migrations detected, suggesting schema evolution over time:"
196
+ )
197
+ lines.extend(
198
+ f"- {mig_file}" for mig_file in findings["migration_files"][:10]
199
+ )
200
+ if len(findings["migration_files"]) > 10:
201
+ lines.append(f"- ... and {len(findings['migration_files']) - 10} more")
202
+ lines.append("")
203
+
204
+ # SQL files
205
+ if findings["sql_files"]:
206
+ lines.append(f"## SQL Schema Files ({len(findings['sql_files'])})")
207
+ lines.append("")
208
+ lines.extend(f"- {sql_file}" for sql_file in findings["sql_files"][:10])
209
+ if len(findings["sql_files"]) > 10:
210
+ lines.append(f"- ... and {len(findings['sql_files']) - 10} more")
211
+ lines.append("")
212
+
213
+ # ORM models
214
+ if findings["orm_models"]:
215
+ lines.append(f"## ORM Models ({len(findings['orm_models'])} files)")
216
+ lines.append("")
217
+ lines.append(
218
+ "ORM models detected, suggesting object-relational mapping:"
219
+ )
220
+ for orm_info in findings["orm_models"][:10]: # Limit to first 10
221
+ model_names = ", ".join(orm_info["models"][:5])
222
+ lines.append(f"- {orm_info['file']}: {model_names}")
223
+ if len(orm_info["models"]) > 5:
224
+ lines.append(f" (and {len(orm_info['models']) - 5} more models)")
225
+ if len(findings["orm_models"]) > 10:
226
+ lines.append(f"- ... and {len(findings['orm_models']) - 10} more files")
227
+ lines.append("")
228
+
229
+ # Inferred database type
230
+ lines.append("## Inferred Information")
231
+ lines.append("")
232
+
233
+ mig_files_str = str(findings.get("migration_files", []))
234
+ mig_files = findings.get("migration_files", [])
235
+
236
+ if "alembic" in mig_files_str:
237
+ lines.append("- Migration framework: Alembic (Python/SQLAlchemy)")
238
+ elif "django" in mig_files_str or any(
239
+ "migrations" in f and f.endswith(".py") for f in mig_files
240
+ ):
241
+ lines.append("- Migration framework: Django Migrations")
242
+ elif any(".go" in f for f in mig_files):
243
+ lines.append(
244
+ "- Migration framework: Go-based migrations (golang-migrate)"
245
+ )
246
+ elif "flyway" in mig_files_str:
247
+ lines.append("- Migration framework: Flyway")
248
+ elif "liquibase" in mig_files_str:
249
+ lines.append("- Migration framework: Liquibase")
250
+
251
+ if findings["orm_models"]:
252
+ orm_models = findings["orm_models"]
253
+ py_models = sum(1 for m in orm_models if m["file"].endswith(".py"))
254
+ go_models = sum(1 for m in orm_models if m["file"].endswith(".go"))
255
+ ts_models = sum(
256
+ 1 for m in orm_models if m["file"].endswith((".ts", ".js"))
257
+ )
258
+
259
+ if py_models > 0:
260
+ lines.append("- ORM: Python (likely SQLAlchemy or Django ORM)")
261
+ if go_models > 0:
262
+ lines.append("- ORM: Go (likely GORM)")
263
+ if ts_models > 0:
264
+ lines.append(
265
+ "- ORM: TypeScript/JavaScript (likely TypeORM or Sequelize)"
266
+ )
267
+
268
+ return "\n".join(lines)
@@ -7,6 +7,10 @@ from sqlalchemy.ext.asyncio import AsyncSession
7
7
  from kodit.domain.enrichments.architecture.architecture import (
8
8
  ENRICHMENT_TYPE_ARCHITECTURE,
9
9
  )
10
+ from kodit.domain.enrichments.architecture.database_schema.database_schema import (
11
+ ENRICHMENT_SUBTYPE_DATABASE_SCHEMA,
12
+ DatabaseSchemaEnrichment,
13
+ )
10
14
  from kodit.domain.enrichments.architecture.physical.physical import (
11
15
  ENRICHMENT_SUBTYPE_PHYSICAL,
12
16
  PhysicalArchitectureEnrichment,
@@ -19,6 +23,11 @@ from kodit.domain.enrichments.development.snippet.snippet import (
19
23
  SnippetEnrichmentSummary,
20
24
  )
21
25
  from kodit.domain.enrichments.enrichment import EnrichmentV2
26
+ from kodit.domain.enrichments.history.commit_description.commit_description import (
27
+ ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION,
28
+ CommitDescriptionEnrichment,
29
+ )
30
+ from kodit.domain.enrichments.history.history import ENRICHMENT_TYPE_HISTORY
22
31
  from kodit.domain.enrichments.usage.api_docs import (
23
32
  ENRICHMENT_SUBTYPE_API_DOCS,
24
33
  APIDocEnrichment,
@@ -131,6 +140,26 @@ class SQLAlchemyEnrichmentV2Repository(
131
140
  created_at=db_entity.created_at,
132
141
  updated_at=db_entity.updated_at,
133
142
  )
143
+ if (
144
+ db_entity.type == ENRICHMENT_TYPE_HISTORY
145
+ and db_entity.subtype == ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION
146
+ ):
147
+ return CommitDescriptionEnrichment(
148
+ id=db_entity.id,
149
+ content=db_entity.content,
150
+ created_at=db_entity.created_at,
151
+ updated_at=db_entity.updated_at,
152
+ )
153
+ if (
154
+ db_entity.type == ENRICHMENT_TYPE_ARCHITECTURE
155
+ and db_entity.subtype == ENRICHMENT_SUBTYPE_DATABASE_SCHEMA
156
+ ):
157
+ return DatabaseSchemaEnrichment(
158
+ id=db_entity.id,
159
+ content=db_entity.content,
160
+ created_at=db_entity.created_at,
161
+ updated_at=db_entity.updated_at,
162
+ )
134
163
 
135
164
  raise ValueError(
136
165
  f"Unknown enrichment type: {db_entity.type}/{db_entity.subtype}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kodit
3
- Version: 0.5.5
3
+ Version: 0.5.7
4
4
  Summary: Code indexing for better AI code generation
5
5
  Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
6
  Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -1,7 +1,7 @@
1
1
  kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
2
2
  kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
3
- kodit/_version.py,sha256=0gHg6pqkExJvz1iV3rjNnM6ZmxmZPlhVrGzZVWp6WuA,704
4
- kodit/app.py,sha256=niIfZiuuDp7mLzrBwQhx_FU7RvKfUALNV5y0o43miss,5802
3
+ kodit/_version.py,sha256=NvV7p6eu_Rli4DWHJnEcpyTUiImNPPDyoDonzzIsNwA,704
4
+ kodit/app.py,sha256=7WxSQcktnpYBmjO1skIjMeBu55rVVRf4lotBEq55pAM,5846
5
5
  kodit/cli.py,sha256=QSTXIUDxZo3anIONY-grZi9_VSehWoS8QoVJZyOmWPQ,3086
6
6
  kodit/cli_utils.py,sha256=umkvt4kWNapk6db6RGz6bmn7oxgDpsW2Vo09MZ37OGg,2430
7
7
  kodit/config.py,sha256=x_67lawaejOenJvl8yMxzXgdIkeWx8Yyc2ISO37GCvc,8031
@@ -13,19 +13,19 @@ kodit/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  kodit/application/__init__.py,sha256=mH50wTpgP9dhbKztFsL8Dda9Hi18TSnMVxXtpp4aGOA,35
14
14
  kodit/application/factories/__init__.py,sha256=bU5CvEnaBePZ7JbkCOp1MGTNP752bnU2uEqmfy5FdRk,37
15
15
  kodit/application/factories/reporting_factory.py,sha256=3IpRiAw_olM69db-jbDAtjyGtd6Nh5o8jUJX3-rXCA8,1421
16
- kodit/application/factories/server_factory.py,sha256=Y99haqn_cv9Gci4cC4YRzkfoLUTWtERS9Ghgo5NjGFI,17236
16
+ kodit/application/factories/server_factory.py,sha256=dr0X_zQRUlEybtGZ3NS-kkwTU-K96u2D1Qw5xhWkd88,17409
17
17
  kodit/application/services/__init__.py,sha256=p5UQNw-H5sxQvs5Etfte93B3cJ1kKW6DNxK34uFvU1E,38
18
18
  kodit/application/services/code_search_application_service.py,sha256=ceyv5TTN-jvlOFOupGa9XwfTTraLNN2GU55kFeulVXY,7763
19
- kodit/application/services/commit_indexing_application_service.py,sha256=SZEWjgFR1dd1yFsWnVxtOUQ_Dh_AA37cXhIsbrmAvd0,34746
20
- kodit/application/services/enrichment_query_service.py,sha256=ICAuMY8iw1LlioXXGDPLCcxeL9kPQYMbNg9YNvOpbXk,13362
19
+ kodit/application/services/commit_indexing_application_service.py,sha256=uRYPkVbiqu1V9bORjQu2yoylskLgCz55vYJ1pODjm94,44690
20
+ kodit/application/services/enrichment_query_service.py,sha256=RMVze-DzS5zAki1iC96Kid7tbg-nHSv0z8eqPsiURqc,15002
21
21
  kodit/application/services/indexing_worker_service.py,sha256=59cZthlzViOVrAWEoZqUTCfLzxx2OO_FOGdM3pYf9Mc,4065
22
22
  kodit/application/services/queue_service.py,sha256=pIHTS8M65FzAhZH5kn54BTiZ43sCbsALYdCFTz9wdqE,2692
23
23
  kodit/application/services/reporting.py,sha256=cwe-S-UpSOE6xSAEhoD1hi4hSWk1bW3YRLJ7463fIvM,3518
24
24
  kodit/application/services/sync_scheduler.py,sha256=hVT3dlmvfbqXKOV_KU5ZQ5gEKBGPJTlvJcF9gP2ZHQM,2853
25
25
  kodit/domain/__init__.py,sha256=TCpg4Xx-oF4mKV91lo4iXqMEfBT1OoRSYnbG-zVWolA,66
26
26
  kodit/domain/errors.py,sha256=yIsgCjM_yOFIg8l7l-t7jM8pgeAX4cfPq0owf7iz3DA,106
27
- kodit/domain/protocols.py,sha256=YQqL1XO2OWqYhPC8W7nyPrqGW3NKafMEJn2HEGXwHIk,7664
28
- kodit/domain/value_objects.py,sha256=svHQixeLa8fzaJ5NuKl3rBLBVrPfV7VvFd2-U-Vh_Sk,17818
27
+ kodit/domain/protocols.py,sha256=Q6blYD79Tn5LQyNEAioTuPPIdZYXDf46kVpAW2EG2jY,8056
28
+ kodit/domain/value_objects.py,sha256=FW0sTMtcl0Q1qej7vzEg7-Gsv86Z01IbPrDdudsgU3g,18097
29
29
  kodit/domain/enrichments/__init__.py,sha256=UpQMnMEHqaK3u3K-eJZOC28kfBPHALLAjFMdyYBXSPE,33
30
30
  kodit/domain/enrichments/enricher.py,sha256=jnZ5X9RmZA8Acy-RBS2TbEoBg9QSm8AgleqwS9h5WlY,512
31
31
  kodit/domain/enrichments/enrichment.py,sha256=_4lAOFibvSRN-01HB7it61k38IGQsub0gVERqRrhWPs,1776
@@ -33,6 +33,8 @@ kodit/domain/enrichments/request.py,sha256=6zBQhliDcdw8vS4sYPG2mqZSDSbQ5VzY1YQ-4
33
33
  kodit/domain/enrichments/response.py,sha256=NzoMAKgs7c2yo9vvgWjQDo1yO0koKHbbY_SrsqsalAk,205
34
34
  kodit/domain/enrichments/architecture/__init__.py,sha256=hBSliXMuixUZKtF-_zvcgQjnqrdyc4_SjYG2PTRFYpg,39
35
35
  kodit/domain/enrichments/architecture/architecture.py,sha256=_3nF9qdBdcA8rTXPkb1KO2F7OXTcH9SajqNYB-ICaZA,507
36
+ kodit/domain/enrichments/architecture/database_schema/__init__.py,sha256=xS5UGMfHqOXjpr4ZZQup3IUtevJxBDXMH4fO7wuH5vM,35
37
+ kodit/domain/enrichments/architecture/database_schema/database_schema.py,sha256=OOiG5SrN4Jyw_L6_-UVg4DaGWNo5JLiii7e21ZLtDvY,529
36
38
  kodit/domain/enrichments/architecture/physical/__init__.py,sha256=4jc89cGxALWo8d3Xzfb5t-YjcCyDb1dDVGwTqVYBFmc,48
37
39
  kodit/domain/enrichments/architecture/physical/discovery_notes.py,sha256=Wdv41rkUcMgRqXWB5Q9roaGMGFznH4V_I7mELUvDShw,636
38
40
  kodit/domain/enrichments/architecture/physical/formatter.py,sha256=V_JvHsGDPPJ-TqGS-G61P3OS3xe0QpS2NLBEk5jX6Yc,351
@@ -41,6 +43,10 @@ kodit/domain/enrichments/development/__init__.py,sha256=ls7zlKUpSpyLZRl-WTuaow9C
41
43
  kodit/domain/enrichments/development/development.py,sha256=amzcheLEtXbOyhhmjlay_yt1Z2FRyW2CrR8wZWkpC0g,483
42
44
  kodit/domain/enrichments/development/snippet/__init__.py,sha256=M5XVnlDgfqSE5UiAqkQwE1Mbr5Rg8zQpcspHKC3k_xU,34
43
45
  kodit/domain/enrichments/development/snippet/snippet.py,sha256=A1f385Bu3_ZBaDKQrGHZMb6GIiQoo-hORFSw2ca56yQ,791
46
+ kodit/domain/enrichments/history/__init__.py,sha256=OXS0MOFEjD76rBOmLl8yA2L3Q8NYebBkoGhAmgbO2O0,27
47
+ kodit/domain/enrichments/history/history.py,sha256=pdmkU2ZZGFBsZDQ7kKo1hj-GaVKUd0v4Q2Fu15WE2A8,464
48
+ kodit/domain/enrichments/history/commit_description/__init__.py,sha256=j0fVMIkao9RzkLa6JakBPP40KrELl1eb-dfOLvfADMQ,38
49
+ kodit/domain/enrichments/history/commit_description/commit_description.py,sha256=96yKz-YsyWPfUu7zFtnT9AhRe7DjLmky9z0jy7oreFo,518
44
50
  kodit/domain/enrichments/usage/__init__.py,sha256=7W36rvCF6DH-VqW2RiqU6GMlkYYHZy9Wm0DL_3_fbRc,40
45
51
  kodit/domain/enrichments/usage/api_docs.py,sha256=5cvkNXUfAWDb0HJGIViAzIEZDGEnBnWYhkacs4lHCYA,470
46
52
  kodit/domain/enrichments/usage/usage.py,sha256=U_JrxwXWlFtOzCP7fbfMd-NH75W44MwVFliONMzYB4U,453
@@ -51,7 +57,7 @@ kodit/domain/factories/git_repo_factory.py,sha256=EdeQo4HsBi2hVeVvnSnYtFdR3yGVZQ
51
57
  kodit/domain/services/__init__.py,sha256=Q1GhCK_PqKHYwYE4tkwDz5BIyXkJngLBBOHhzvX8nzo,42
52
58
  kodit/domain/services/bm25_service.py,sha256=-E5k0td2Ucs25qygWkJlY0fl7ZckOUe5xZnKYff3hF8,3631
53
59
  kodit/domain/services/embedding_service.py,sha256=CEcQ2E9XvOcjKNCJEw5soYUNMHJ5LCJGyXzPCl75CPc,4812
54
- kodit/domain/services/git_repository_service.py,sha256=suIBmiBG9OcXUFrw1uiYRidS9yvFEekZU8H-tsY0zs0,16545
60
+ kodit/domain/services/git_repository_service.py,sha256=KtwYF3XKBeNbAHbi-sEdMJ-1jGRy7rmWMZkPpCrh9fw,14980
55
61
  kodit/domain/services/git_service.py,sha256=Lr7kPnnBEa_fWfGA9jpffMK7wcfxQ0wfXgynsbSKSzg,11661
56
62
  kodit/domain/services/physical_architecture_service.py,sha256=0YgoAvbUxT_VwgIh_prftSYnil_XIqNPSoP0g37eIt4,7209
57
63
  kodit/domain/services/task_status_query_service.py,sha256=rI93pTMHeycigQryCWkimXSDzRqx_nJOr07UzPAacPE,736
@@ -89,8 +95,10 @@ kodit/infrastructure/bm25/__init__.py,sha256=DmGbrEO34FOJy4e685BbyxLA7gPW1eqs2gA
89
95
  kodit/infrastructure/bm25/local_bm25_repository.py,sha256=YE3pUkPS5n1JNu6oSM_HRBOXM8U04HiY8dMMZCf9CMQ,5197
90
96
  kodit/infrastructure/bm25/vectorchord_bm25_repository.py,sha256=LjbUPj4nPMb9pdEudThUbZTmQjhxvpN314EzKGpXfi0,8621
91
97
  kodit/infrastructure/cloning/git/__init__.py,sha256=20ePcp0qE6BuLsjsv4KYB1DzKhMIMsPXwEqIEZtjTJs,34
92
- kodit/infrastructure/cloning/git/git_python_adaptor.py,sha256=X0cyoBz3AWeY4lEmAyAqD4i3bXhSBh0ggKng1ERoswI,19944
98
+ kodit/infrastructure/cloning/git/git_python_adaptor.py,sha256=kiiXrjSqdSYT_c_migWff1WEVlJT8JRlgo5m_9T4rrM,21942
93
99
  kodit/infrastructure/cloning/git/working_copy.py,sha256=sPKQN-A1gDVV_QJISNNP4PqxRWxyj5owv5tvWfXMl44,3909
100
+ kodit/infrastructure/database_schema/__init__.py,sha256=jgejYX70fjV69zCuOBiNw3oCQlCKYzxTkjnUUUU7DY0,48
101
+ kodit/infrastructure/database_schema/database_schema_detector.py,sha256=zXU7HqrZU4_EYckloKDbH0gZvZ3_TJG5-Bd5PAkEkXc,10167
94
102
  kodit/infrastructure/embedding/__init__.py,sha256=F-8nLlWAerYJ0MOIA4tbXHLan8bW5rRR84vzxx6tRKI,39
95
103
  kodit/infrastructure/embedding/embedding_factory.py,sha256=6nP8HKKlNWmDE8ATT5tNQHgPqeTDUMpRuWwn2rsfrOQ,3446
96
104
  kodit/infrastructure/embedding/local_vector_search_repository.py,sha256=urccvadIF-uizmYuzK7ii7hl2HaV7swHCiS8P6n7U18,3507
@@ -134,7 +142,7 @@ kodit/infrastructure/slicing/slicer.py,sha256=EDYkoLf6RsTVloudZUq6LS5X10JJAHWcKW
134
142
  kodit/infrastructure/sqlalchemy/__init__.py,sha256=UXPMSF_hgWaqr86cawRVqM8XdVNumQyyK5B8B97GnlA,33
135
143
  kodit/infrastructure/sqlalchemy/embedding_repository.py,sha256=OhSIuNEQ725WoxaIpK3jcZvUVPW-b95HKRXr1HjurmI,8824
136
144
  kodit/infrastructure/sqlalchemy/enrichment_association_repository.py,sha256=mjlGH4vkIv1cPfhkZ4SUyGWpMbgeS7QljsK54yQvV4g,2615
137
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py,sha256=9Yiv8I86qOD3YiNcucs6686JtY_8DOQFpEFJmx1_8HM,5177
145
+ kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py,sha256=9C7y6aRzHHkKJYTTAFxU5p0kwJVBjhqHvs-wUZDWsmk,6350
138
146
  kodit/infrastructure/sqlalchemy/entities.py,sha256=kvZqUPCN2TNgovdNAT_0h4Y8zrgFWwkk-OecvcHIz-A,14852
139
147
  kodit/infrastructure/sqlalchemy/git_branch_repository.py,sha256=dW9kBr8aDBXXVmw1zEux2mueiKhTcpG0JxnLuz5yZ3w,3106
140
148
  kodit/infrastructure/sqlalchemy/git_commit_repository.py,sha256=jzYpFV1gjI-Wfgai-hxesglYn6XD384mqIorV1AtNCA,1991
@@ -169,8 +177,8 @@ kodit/utils/dump_config.py,sha256=dd5uPgqh6ATk02Zt59t2JFKR9X17YWjHudV0nE8VktE,11
169
177
  kodit/utils/dump_openapi.py,sha256=EasYOnnpeabwb_sTKQUBrrOLHjPcOFQ7Zx0YKpx9fmM,1239
170
178
  kodit/utils/generate_api_paths.py,sha256=TMtx9v55podDfUmiWaHgJHLtEWLV2sLL-5ejGFMPzAo,3569
171
179
  kodit/utils/path_utils.py,sha256=UB_81rx7Y1G1jalVv2PX8miwaprBbcqEdtoQ3hPT3kU,2451
172
- kodit-0.5.5.dist-info/METADATA,sha256=KprUWaNcvHEDmwR0zjo59ln_jUNOC_d6-AokObNZbk4,7703
173
- kodit-0.5.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
174
- kodit-0.5.5.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
175
- kodit-0.5.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
176
- kodit-0.5.5.dist-info/RECORD,,
180
+ kodit-0.5.7.dist-info/METADATA,sha256=Yi8IGWrrk1FLgnC5GiqmBc8V3bJcWz8Fl29-nM8CkcE,7703
181
+ kodit-0.5.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
182
+ kodit-0.5.7.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
183
+ kodit-0.5.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
184
+ kodit-0.5.7.dist-info/RECORD,,
File without changes