kodit 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kodit/_version.py +2 -2
- kodit/app.py +2 -0
- kodit/application/factories/server_factory.py +4 -0
- kodit/application/services/commit_indexing_application_service.py +276 -3
- kodit/application/services/enrichment_query_service.py +37 -0
- kodit/domain/enrichments/architecture/database_schema/__init__.py +1 -0
- kodit/domain/enrichments/architecture/database_schema/database_schema.py +17 -0
- kodit/domain/enrichments/history/__init__.py +1 -0
- kodit/domain/enrichments/history/commit_description/__init__.py +1 -0
- kodit/domain/enrichments/history/commit_description/commit_description.py +17 -0
- kodit/domain/enrichments/history/history.py +18 -0
- kodit/domain/protocols.py +15 -2
- kodit/domain/services/git_repository_service.py +36 -81
- kodit/domain/value_objects.py +4 -0
- kodit/infrastructure/cloning/git/git_python_adaptor.py +56 -5
- kodit/infrastructure/database_schema/__init__.py +1 -0
- kodit/infrastructure/database_schema/database_schema_detector.py +268 -0
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +29 -0
- {kodit-0.5.5.dist-info → kodit-0.5.7.dist-info}/METADATA +1 -1
- {kodit-0.5.5.dist-info → kodit-0.5.7.dist-info}/RECORD +23 -15
- {kodit-0.5.5.dist-info → kodit-0.5.7.dist-info}/WHEEL +0 -0
- {kodit-0.5.5.dist-info → kodit-0.5.7.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.5.dist-info → kodit-0.5.7.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.5.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 5,
|
|
31
|
+
__version__ = version = '0.5.7'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 7)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
kodit/app.py
CHANGED
|
@@ -50,6 +50,9 @@ from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
|
|
|
50
50
|
VectorChordBM25Repository,
|
|
51
51
|
)
|
|
52
52
|
from kodit.infrastructure.cloning.git.git_python_adaptor import GitPythonAdapter
|
|
53
|
+
from kodit.infrastructure.database_schema.database_schema_detector import (
|
|
54
|
+
DatabaseSchemaDetector,
|
|
55
|
+
)
|
|
53
56
|
from kodit.infrastructure.embedding.embedding_factory import (
|
|
54
57
|
embedding_domain_service_factory,
|
|
55
58
|
)
|
|
@@ -255,6 +258,7 @@ class ServerFactory:
|
|
|
255
258
|
text_search_service=self.text_search_service(),
|
|
256
259
|
embedding_repository=self.embedding_repository(),
|
|
257
260
|
architecture_service=self.architecture_service(),
|
|
261
|
+
database_schema_detector=DatabaseSchemaDetector(),
|
|
258
262
|
enrichment_v2_repository=self.enrichment_v2_repository(),
|
|
259
263
|
enricher_service=self.enricher(),
|
|
260
264
|
enrichment_association_repository=self.enrichment_association_repository(),
|
|
@@ -14,6 +14,9 @@ if TYPE_CHECKING:
|
|
|
14
14
|
from kodit.application.services.enrichment_query_service import (
|
|
15
15
|
EnrichmentQueryService,
|
|
16
16
|
)
|
|
17
|
+
from kodit.domain.enrichments.architecture.database_schema.database_schema import (
|
|
18
|
+
DatabaseSchemaEnrichment,
|
|
19
|
+
)
|
|
17
20
|
from kodit.domain.enrichments.architecture.physical.physical import (
|
|
18
21
|
PhysicalArchitectureEnrichment,
|
|
19
22
|
)
|
|
@@ -27,11 +30,20 @@ from kodit.domain.enrichments.enrichment import (
|
|
|
27
30
|
EnrichmentAssociation,
|
|
28
31
|
EnrichmentV2,
|
|
29
32
|
)
|
|
33
|
+
from kodit.domain.enrichments.history.commit_description.commit_description import (
|
|
34
|
+
CommitDescriptionEnrichment,
|
|
35
|
+
)
|
|
30
36
|
from kodit.domain.enrichments.request import (
|
|
31
37
|
EnrichmentRequest as GenericEnrichmentRequest,
|
|
32
38
|
)
|
|
33
39
|
from kodit.domain.entities import Task
|
|
34
|
-
from kodit.domain.entities.git import
|
|
40
|
+
from kodit.domain.entities.git import (
|
|
41
|
+
GitCommit,
|
|
42
|
+
GitFile,
|
|
43
|
+
GitRepo,
|
|
44
|
+
SnippetV2,
|
|
45
|
+
TrackingType,
|
|
46
|
+
)
|
|
35
47
|
from kodit.domain.factories.git_repo_factory import GitRepoFactory
|
|
36
48
|
from kodit.domain.protocols import (
|
|
37
49
|
EnrichmentAssociationRepository,
|
|
@@ -63,6 +75,9 @@ from kodit.domain.value_objects import (
|
|
|
63
75
|
TaskOperation,
|
|
64
76
|
TrackableType,
|
|
65
77
|
)
|
|
78
|
+
from kodit.infrastructure.database_schema.database_schema_detector import (
|
|
79
|
+
DatabaseSchemaDetector,
|
|
80
|
+
)
|
|
66
81
|
from kodit.infrastructure.slicing.api_doc_extractor import APIDocExtractor
|
|
67
82
|
from kodit.infrastructure.slicing.slicer import Slicer
|
|
68
83
|
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
@@ -82,6 +97,73 @@ You are a professional software developer. You will be given a snippet of code.
|
|
|
82
97
|
Please provide a concise explanation of the code.
|
|
83
98
|
"""
|
|
84
99
|
|
|
100
|
+
COMMIT_DESCRIPTION_SYSTEM_PROMPT = """
|
|
101
|
+
You are a professional software developer. You will be given a git commit diff.
|
|
102
|
+
Please provide a concise description of what changes were made and why.
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
DATABASE_SCHEMA_SYSTEM_PROMPT = """
|
|
106
|
+
You are an expert database architect and documentation specialist.
|
|
107
|
+
Your task is to create clear, visual documentation of database schemas.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
DATABASE_SCHEMA_TASK_PROMPT = """
|
|
111
|
+
You will be provided with a database schema discovery report.
|
|
112
|
+
Please create comprehensive database schema documentation.
|
|
113
|
+
|
|
114
|
+
<schema_report>
|
|
115
|
+
{schema_report}
|
|
116
|
+
</schema_report>
|
|
117
|
+
|
|
118
|
+
**Return the following:**
|
|
119
|
+
|
|
120
|
+
## Entity List
|
|
121
|
+
|
|
122
|
+
For each table/entity, write one line:
|
|
123
|
+
- **[Table Name]**: [brief description of what it stores]
|
|
124
|
+
|
|
125
|
+
## Mermaid ERD
|
|
126
|
+
|
|
127
|
+
Create a Mermaid Entity Relationship Diagram showing:
|
|
128
|
+
- All entities (tables)
|
|
129
|
+
- Key relationships between entities (if apparent from names or common patterns)
|
|
130
|
+
- Use standard ERD notation
|
|
131
|
+
|
|
132
|
+
Example format:
|
|
133
|
+
```mermaid
|
|
134
|
+
erDiagram
|
|
135
|
+
User ||--o{{ Order : places
|
|
136
|
+
User {{
|
|
137
|
+
int id PK
|
|
138
|
+
string email
|
|
139
|
+
string name
|
|
140
|
+
}}
|
|
141
|
+
Order {{
|
|
142
|
+
int id PK
|
|
143
|
+
int user_id FK
|
|
144
|
+
datetime created_at
|
|
145
|
+
}}
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
If specific field details aren't available, show just the entity boxes and
|
|
149
|
+
relationships.
|
|
150
|
+
|
|
151
|
+
## Key Observations
|
|
152
|
+
|
|
153
|
+
Answer these questions in 1-2 sentences each:
|
|
154
|
+
1. What is the primary data model pattern (e.g., user-centric,
|
|
155
|
+
event-sourced, multi-tenant)?
|
|
156
|
+
2. What migration strategy is being used?
|
|
157
|
+
3. Are there any notable database design patterns or concerns?
|
|
158
|
+
|
|
159
|
+
## Rules:
|
|
160
|
+
- Be concise and focus on the high-level structure
|
|
161
|
+
- Infer reasonable relationships from table names when explicit information
|
|
162
|
+
isn't available
|
|
163
|
+
- If no database schema is found, state that clearly
|
|
164
|
+
- Keep entity descriptions to 10 words or less
|
|
165
|
+
"""
|
|
166
|
+
|
|
85
167
|
|
|
86
168
|
class CommitIndexingApplicationService:
|
|
87
169
|
"""Application service for commit indexing operations."""
|
|
@@ -103,6 +185,7 @@ class CommitIndexingApplicationService:
|
|
|
103
185
|
text_search_service: EmbeddingDomainService,
|
|
104
186
|
embedding_repository: SqlAlchemyEmbeddingRepository,
|
|
105
187
|
architecture_service: PhysicalArchitectureService,
|
|
188
|
+
database_schema_detector: DatabaseSchemaDetector,
|
|
106
189
|
enricher_service: Enricher,
|
|
107
190
|
enrichment_v2_repository: EnrichmentV2Repository,
|
|
108
191
|
enrichment_association_repository: EnrichmentAssociationRepository,
|
|
@@ -124,6 +207,7 @@ class CommitIndexingApplicationService:
|
|
|
124
207
|
self.text_search_service = text_search_service
|
|
125
208
|
self.embedding_repository = embedding_repository
|
|
126
209
|
self.architecture_service = architecture_service
|
|
210
|
+
self.database_schema_detector = database_schema_detector
|
|
127
211
|
self.enrichment_v2_repository = enrichment_v2_repository
|
|
128
212
|
self.enrichment_association_repository = enrichment_association_repository
|
|
129
213
|
self.enricher_service = enricher_service
|
|
@@ -191,11 +275,66 @@ class CommitIndexingApplicationService:
|
|
|
191
275
|
await self.process_architecture_discovery(repository_id, commit_sha)
|
|
192
276
|
elif task.type == TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT:
|
|
193
277
|
await self.process_api_docs(repository_id, commit_sha)
|
|
278
|
+
elif task.type == TaskOperation.CREATE_COMMIT_DESCRIPTION_FOR_COMMIT:
|
|
279
|
+
await self.process_commit_description(repository_id, commit_sha)
|
|
280
|
+
elif task.type == TaskOperation.CREATE_DATABASE_SCHEMA_FOR_COMMIT:
|
|
281
|
+
await self.process_database_schema(repository_id, commit_sha)
|
|
194
282
|
else:
|
|
195
283
|
raise ValueError(f"Unknown task type: {task.type}")
|
|
196
284
|
else:
|
|
197
285
|
raise ValueError(f"Unknown task type: {task.type}")
|
|
198
286
|
|
|
287
|
+
async def _process_files_in_batches(
|
|
288
|
+
self, cloned_path: Path, all_commits: list[GitCommit], batch_size: int = 100
|
|
289
|
+
) -> int:
|
|
290
|
+
"""Process file metadata for all commits in batches to avoid memory exhaustion.
|
|
291
|
+
|
|
292
|
+
This loads file metadata (paths, sizes, blob SHAs) in batches and saves them
|
|
293
|
+
incrementally to avoid holding millions of file objects in memory.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
cloned_path: Path to the cloned repository
|
|
297
|
+
all_commits: List of all commits from scan
|
|
298
|
+
batch_size: Number of commits to process at once (default 100)
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
Total number of files processed
|
|
302
|
+
|
|
303
|
+
"""
|
|
304
|
+
total_files = 0
|
|
305
|
+
commit_shas = [commit.commit_sha for commit in all_commits]
|
|
306
|
+
total_batches = (len(commit_shas) + batch_size - 1) // batch_size
|
|
307
|
+
|
|
308
|
+
self._log.info(
|
|
309
|
+
f"Processing files for {len(commit_shas)} commits "
|
|
310
|
+
f"in {total_batches} batches"
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Process commits in batches
|
|
314
|
+
for i in range(0, len(commit_shas), batch_size):
|
|
315
|
+
batch = commit_shas[i : i + batch_size]
|
|
316
|
+
batch_num = i // batch_size + 1
|
|
317
|
+
|
|
318
|
+
self._log.debug(
|
|
319
|
+
f"Processing batch {batch_num}/{total_batches} ({len(batch)} commits)"
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
# Get file metadata for this batch of commits
|
|
323
|
+
files = await self.scanner.process_files_for_commits_batch(
|
|
324
|
+
cloned_path, batch
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Save file metadata to database immediately
|
|
328
|
+
if files:
|
|
329
|
+
await self.git_file_repository.save_bulk(files)
|
|
330
|
+
total_files += len(files)
|
|
331
|
+
self._log.debug(
|
|
332
|
+
f"Batch {batch_num}: Saved {len(files)} files "
|
|
333
|
+
f"(total so far: {total_files})"
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
return total_files
|
|
337
|
+
|
|
199
338
|
async def process_clone_repo(self, repository_id: int) -> None:
|
|
200
339
|
"""Clone a repository."""
|
|
201
340
|
async with self.operation.create_child(
|
|
@@ -233,8 +372,11 @@ class CommitIndexingApplicationService:
|
|
|
233
372
|
await step.set_current(2, "Saving commits")
|
|
234
373
|
await self.git_commit_repository.save_bulk(scan_result.all_commits)
|
|
235
374
|
|
|
236
|
-
await step.set_current(3, "
|
|
237
|
-
await self.
|
|
375
|
+
await step.set_current(3, "Processing and saving files in batches")
|
|
376
|
+
total_files = await self._process_files_in_batches(
|
|
377
|
+
repo.cloned_path, scan_result.all_commits
|
|
378
|
+
)
|
|
379
|
+
self._log.info(f"Processed and saved {total_files} total files")
|
|
238
380
|
|
|
239
381
|
await step.set_current(4, "Saving branches")
|
|
240
382
|
if scan_result.branches:
|
|
@@ -798,6 +940,137 @@ class CommitIndexingApplicationService:
|
|
|
798
940
|
]
|
|
799
941
|
)
|
|
800
942
|
|
|
943
|
+
async def process_commit_description(
|
|
944
|
+
self, repository_id: int, commit_sha: str
|
|
945
|
+
) -> None:
|
|
946
|
+
"""Handle COMMIT_DESCRIPTION task - generate commit descriptions."""
|
|
947
|
+
async with self.operation.create_child(
|
|
948
|
+
TaskOperation.CREATE_COMMIT_DESCRIPTION_FOR_COMMIT,
|
|
949
|
+
trackable_type=TrackableType.KODIT_REPOSITORY,
|
|
950
|
+
trackable_id=repository_id,
|
|
951
|
+
) as step:
|
|
952
|
+
# Check if commit description already exists for this commit
|
|
953
|
+
if await self.enrichment_query_service.has_commit_description_for_commit(
|
|
954
|
+
commit_sha
|
|
955
|
+
):
|
|
956
|
+
await step.skip("Commit description already exists for commit")
|
|
957
|
+
return
|
|
958
|
+
|
|
959
|
+
# Get repository path
|
|
960
|
+
repo = await self.repo_repository.get(repository_id)
|
|
961
|
+
if not repo.cloned_path:
|
|
962
|
+
raise ValueError(f"Repository {repository_id} has never been cloned")
|
|
963
|
+
|
|
964
|
+
await step.set_total(3)
|
|
965
|
+
await step.set_current(1, "Getting commit diff")
|
|
966
|
+
|
|
967
|
+
# Get the diff for this commit
|
|
968
|
+
diff = await self.scanner.git_adapter.get_commit_diff(
|
|
969
|
+
repo.cloned_path, commit_sha
|
|
970
|
+
)
|
|
971
|
+
|
|
972
|
+
if not diff or len(diff.strip()) == 0:
|
|
973
|
+
await step.skip("No diff found for commit")
|
|
974
|
+
return
|
|
975
|
+
|
|
976
|
+
await step.set_current(2, "Enriching commit description with LLM")
|
|
977
|
+
|
|
978
|
+
# Enrich the diff through the enricher
|
|
979
|
+
enrichment_request = GenericEnrichmentRequest(
|
|
980
|
+
id=commit_sha,
|
|
981
|
+
text=diff,
|
|
982
|
+
system_prompt=COMMIT_DESCRIPTION_SYSTEM_PROMPT,
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
enriched_content = ""
|
|
986
|
+
async for response in self.enricher_service.enrich([enrichment_request]):
|
|
987
|
+
enriched_content = response.text
|
|
988
|
+
|
|
989
|
+
# Create and save commit description enrichment
|
|
990
|
+
enrichment = await self.enrichment_v2_repository.save(
|
|
991
|
+
CommitDescriptionEnrichment(
|
|
992
|
+
content=enriched_content,
|
|
993
|
+
)
|
|
994
|
+
)
|
|
995
|
+
if not enrichment or not enrichment.id:
|
|
996
|
+
raise ValueError(
|
|
997
|
+
f"Failed to save commit description enrichment for commit "
|
|
998
|
+
f"{commit_sha}"
|
|
999
|
+
)
|
|
1000
|
+
await self.enrichment_association_repository.save(
|
|
1001
|
+
CommitEnrichmentAssociation(
|
|
1002
|
+
enrichment_id=enrichment.id,
|
|
1003
|
+
entity_id=commit_sha,
|
|
1004
|
+
)
|
|
1005
|
+
)
|
|
1006
|
+
|
|
1007
|
+
await step.set_current(3, "Commit description enrichment completed")
|
|
1008
|
+
|
|
1009
|
+
async def process_database_schema(
|
|
1010
|
+
self, repository_id: int, commit_sha: str
|
|
1011
|
+
) -> None:
|
|
1012
|
+
"""Handle DATABASE_SCHEMA task - discover and document database schemas."""
|
|
1013
|
+
async with self.operation.create_child(
|
|
1014
|
+
TaskOperation.CREATE_DATABASE_SCHEMA_FOR_COMMIT,
|
|
1015
|
+
trackable_type=TrackableType.KODIT_REPOSITORY,
|
|
1016
|
+
trackable_id=repository_id,
|
|
1017
|
+
) as step:
|
|
1018
|
+
# Check if database schema already exists for this commit
|
|
1019
|
+
if await self.enrichment_query_service.has_database_schema_for_commit(
|
|
1020
|
+
commit_sha
|
|
1021
|
+
):
|
|
1022
|
+
await step.skip("Database schema already exists for commit")
|
|
1023
|
+
return
|
|
1024
|
+
|
|
1025
|
+
# Get repository path
|
|
1026
|
+
repo = await self.repo_repository.get(repository_id)
|
|
1027
|
+
if not repo.cloned_path:
|
|
1028
|
+
raise ValueError(f"Repository {repository_id} has never been cloned")
|
|
1029
|
+
|
|
1030
|
+
await step.set_total(3)
|
|
1031
|
+
await step.set_current(1, "Discovering database schemas")
|
|
1032
|
+
|
|
1033
|
+
# Discover database schemas
|
|
1034
|
+
schema_report = await self.database_schema_detector.discover_schemas(
|
|
1035
|
+
repo.cloned_path
|
|
1036
|
+
)
|
|
1037
|
+
|
|
1038
|
+
if "No database schemas detected" in schema_report:
|
|
1039
|
+
await step.skip("No database schemas found in repository")
|
|
1040
|
+
return
|
|
1041
|
+
|
|
1042
|
+
await step.set_current(2, "Enriching schema documentation with LLM")
|
|
1043
|
+
|
|
1044
|
+
# Enrich the schema report through the enricher
|
|
1045
|
+
enrichment_request = GenericEnrichmentRequest(
|
|
1046
|
+
id=commit_sha,
|
|
1047
|
+
text=DATABASE_SCHEMA_TASK_PROMPT.format(schema_report=schema_report),
|
|
1048
|
+
system_prompt=DATABASE_SCHEMA_SYSTEM_PROMPT,
|
|
1049
|
+
)
|
|
1050
|
+
|
|
1051
|
+
enriched_content = ""
|
|
1052
|
+
async for response in self.enricher_service.enrich([enrichment_request]):
|
|
1053
|
+
enriched_content = response.text
|
|
1054
|
+
|
|
1055
|
+
# Create and save database schema enrichment
|
|
1056
|
+
enrichment = await self.enrichment_v2_repository.save(
|
|
1057
|
+
DatabaseSchemaEnrichment(
|
|
1058
|
+
content=enriched_content,
|
|
1059
|
+
)
|
|
1060
|
+
)
|
|
1061
|
+
if not enrichment or not enrichment.id:
|
|
1062
|
+
raise ValueError(
|
|
1063
|
+
f"Failed to save database schema enrichment for commit {commit_sha}"
|
|
1064
|
+
)
|
|
1065
|
+
await self.enrichment_association_repository.save(
|
|
1066
|
+
CommitEnrichmentAssociation(
|
|
1067
|
+
enrichment_id=enrichment.id,
|
|
1068
|
+
entity_id=commit_sha,
|
|
1069
|
+
)
|
|
1070
|
+
)
|
|
1071
|
+
|
|
1072
|
+
await step.set_current(3, "Database schema enrichment completed")
|
|
1073
|
+
|
|
801
1074
|
async def _new_snippets_for_type(
|
|
802
1075
|
self, all_snippets: list[EnrichmentV2], embedding_type: EmbeddingType
|
|
803
1076
|
) -> list[EnrichmentV2]:
|
|
@@ -5,6 +5,9 @@ import structlog
|
|
|
5
5
|
from kodit.domain.enrichments.architecture.architecture import (
|
|
6
6
|
ENRICHMENT_TYPE_ARCHITECTURE,
|
|
7
7
|
)
|
|
8
|
+
from kodit.domain.enrichments.architecture.database_schema.database_schema import (
|
|
9
|
+
ENRICHMENT_SUBTYPE_DATABASE_SCHEMA,
|
|
10
|
+
)
|
|
8
11
|
from kodit.domain.enrichments.architecture.physical.physical import (
|
|
9
12
|
ENRICHMENT_SUBTYPE_PHYSICAL,
|
|
10
13
|
)
|
|
@@ -14,6 +17,10 @@ from kodit.domain.enrichments.development.snippet.snippet import (
|
|
|
14
17
|
ENRICHMENT_SUBTYPE_SNIPPET_SUMMARY,
|
|
15
18
|
)
|
|
16
19
|
from kodit.domain.enrichments.enrichment import EnrichmentAssociation, EnrichmentV2
|
|
20
|
+
from kodit.domain.enrichments.history.commit_description.commit_description import (
|
|
21
|
+
ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION,
|
|
22
|
+
)
|
|
23
|
+
from kodit.domain.enrichments.history.history import ENRICHMENT_TYPE_HISTORY
|
|
17
24
|
from kodit.domain.enrichments.usage.api_docs import ENRICHMENT_SUBTYPE_API_DOCS
|
|
18
25
|
from kodit.domain.enrichments.usage.usage import ENRICHMENT_TYPE_USAGE
|
|
19
26
|
from kodit.domain.protocols import (
|
|
@@ -215,6 +222,36 @@ class EnrichmentQueryService:
|
|
|
215
222
|
api_docs = await self.get_api_docs_for_commit(commit_sha)
|
|
216
223
|
return len(api_docs) > 0
|
|
217
224
|
|
|
225
|
+
async def get_commit_description_for_commit(
|
|
226
|
+
self, commit_sha: str
|
|
227
|
+
) -> list[EnrichmentV2]:
|
|
228
|
+
"""Get commit description enrichments for a commit."""
|
|
229
|
+
return await self.get_enrichments_for_commit(
|
|
230
|
+
commit_sha,
|
|
231
|
+
enrichment_type=ENRICHMENT_TYPE_HISTORY,
|
|
232
|
+
enrichment_subtype=ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
async def has_commit_description_for_commit(self, commit_sha: str) -> bool:
|
|
236
|
+
"""Check if a commit has commit description enrichments."""
|
|
237
|
+
commit_descriptions = await self.get_commit_description_for_commit(commit_sha)
|
|
238
|
+
return len(commit_descriptions) > 0
|
|
239
|
+
|
|
240
|
+
async def get_database_schema_for_commit(
|
|
241
|
+
self, commit_sha: str
|
|
242
|
+
) -> list[EnrichmentV2]:
|
|
243
|
+
"""Get database schema enrichments for a commit."""
|
|
244
|
+
return await self.get_enrichments_for_commit(
|
|
245
|
+
commit_sha,
|
|
246
|
+
enrichment_type=ENRICHMENT_TYPE_ARCHITECTURE,
|
|
247
|
+
enrichment_subtype=ENRICHMENT_SUBTYPE_DATABASE_SCHEMA,
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
async def has_database_schema_for_commit(self, commit_sha: str) -> bool:
|
|
251
|
+
"""Check if a commit has database schema enrichments."""
|
|
252
|
+
database_schemas = await self.get_database_schema_for_commit(commit_sha)
|
|
253
|
+
return len(database_schemas) > 0
|
|
254
|
+
|
|
218
255
|
async def associations_for_enrichments(
|
|
219
256
|
self, enrichments: list[EnrichmentV2]
|
|
220
257
|
) -> list[EnrichmentAssociation]:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Database schema enrichments."""
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Database schema enrichment domain entity."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from kodit.domain.enrichments.architecture.architecture import ArchitectureEnrichment
|
|
6
|
+
|
|
7
|
+
ENRICHMENT_SUBTYPE_DATABASE_SCHEMA = "database_schema"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class DatabaseSchemaEnrichment(ArchitectureEnrichment):
|
|
12
|
+
"""Enrichment containing database schema information for a commit."""
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def subtype(self) -> str | None:
|
|
16
|
+
"""Return the enrichment subtype."""
|
|
17
|
+
return ENRICHMENT_SUBTYPE_DATABASE_SCHEMA
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""History enrichments."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Commit description enrichments."""
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Commit description enrichment domain entity."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from kodit.domain.enrichments.history.history import HistoryEnrichment
|
|
6
|
+
|
|
7
|
+
ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION = "commit_description"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class CommitDescriptionEnrichment(HistoryEnrichment):
|
|
12
|
+
"""Enrichment containing a description of what a commit did."""
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def subtype(self) -> str | None:
|
|
16
|
+
"""Return the enrichment subtype."""
|
|
17
|
+
return ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""History enrichment domain entity."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from kodit.domain.enrichments.enrichment import CommitEnrichment
|
|
7
|
+
|
|
8
|
+
ENRICHMENT_TYPE_HISTORY = "history"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class HistoryEnrichment(CommitEnrichment, ABC):
|
|
13
|
+
"""Enrichment containing historical information for a commit."""
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def type(self) -> str:
|
|
17
|
+
"""Return the enrichment type."""
|
|
18
|
+
return ENRICHMENT_TYPE_HISTORY
|
kodit/domain/protocols.py
CHANGED
|
@@ -4,6 +4,8 @@ from abc import ABC, abstractmethod
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Any, Protocol, TypeVar
|
|
6
6
|
|
|
7
|
+
from git import Repo
|
|
8
|
+
|
|
7
9
|
from kodit.domain.enrichments.enrichment import EnrichmentAssociation, EnrichmentV2
|
|
8
10
|
from kodit.domain.entities import (
|
|
9
11
|
Task,
|
|
@@ -163,9 +165,16 @@ class GitAdapter(ABC):
|
|
|
163
165
|
|
|
164
166
|
@abstractmethod
|
|
165
167
|
async def get_commit_files(
|
|
166
|
-
self, local_path: Path, commit_sha: str
|
|
168
|
+
self, local_path: Path, commit_sha: str, repo: Repo
|
|
167
169
|
) -> list[dict[str, Any]]:
|
|
168
|
-
"""Get all files in a specific commit from the git tree.
|
|
170
|
+
"""Get all files in a specific commit from the git tree.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
local_path: Path to the repository
|
|
174
|
+
commit_sha: SHA of the commit to get files for
|
|
175
|
+
repo: Repo object to reuse (avoids creating new Repo per commit)
|
|
176
|
+
|
|
177
|
+
"""
|
|
169
178
|
|
|
170
179
|
@abstractmethod
|
|
171
180
|
async def get_commit_file_data(
|
|
@@ -213,6 +222,10 @@ class GitAdapter(ABC):
|
|
|
213
222
|
) -> list[str]:
|
|
214
223
|
"""Get only commit SHAs for a branch (much faster than full commit data)."""
|
|
215
224
|
|
|
225
|
+
@abstractmethod
|
|
226
|
+
async def get_commit_diff(self, local_path: Path, commit_sha: str) -> str:
|
|
227
|
+
"""Get the diff for a specific commit."""
|
|
228
|
+
|
|
216
229
|
|
|
217
230
|
class SnippetRepositoryV2(ABC):
|
|
218
231
|
"""Repository for snippet operations."""
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Domain services for Git repository scanning and cloning operations."""
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
import shutil
|
|
5
4
|
from dataclasses import dataclass
|
|
6
5
|
from datetime import UTC, datetime
|
|
@@ -66,51 +65,11 @@ class GitRepositoryScanner:
|
|
|
66
65
|
tags = await self._process_tags(cloned_path, commit_cache, repo_id)
|
|
67
66
|
self._log.info(f"Found {len(tags)} tags")
|
|
68
67
|
|
|
69
|
-
|
|
70
|
-
|
|
68
|
+
# Don't load all files into memory - return empty list
|
|
69
|
+
# Files will be processed in batches by the application service
|
|
70
|
+
self._log.info("Deferring file processing to avoid memory exhaustion")
|
|
71
71
|
|
|
72
|
-
return self._create_scan_result(branches, commit_cache, tags,
|
|
73
|
-
|
|
74
|
-
async def _process_commits_concurrently(
|
|
75
|
-
self,
|
|
76
|
-
cloned_path: Path,
|
|
77
|
-
commits_batch: list[tuple[str, dict[str, Any]]],
|
|
78
|
-
) -> dict[str, GitCommit]:
|
|
79
|
-
"""Process a batch of commits concurrently."""
|
|
80
|
-
batch_cache = {}
|
|
81
|
-
|
|
82
|
-
async def process_single_commit(
|
|
83
|
-
commit_sha: str, commit_data: dict[str, Any]
|
|
84
|
-
) -> tuple[str, GitCommit | None]:
|
|
85
|
-
git_commit = await self._create_git_commit_from_data(
|
|
86
|
-
cloned_path, commit_data
|
|
87
|
-
)
|
|
88
|
-
return commit_sha, git_commit
|
|
89
|
-
|
|
90
|
-
# Process commits concurrently in smaller batches
|
|
91
|
-
semaphore = asyncio.Semaphore(50) # Limit concurrent operations
|
|
92
|
-
|
|
93
|
-
async def bounded_process(
|
|
94
|
-
item: tuple[str, dict[str, Any]],
|
|
95
|
-
) -> tuple[str, GitCommit | None]:
|
|
96
|
-
async with semaphore:
|
|
97
|
-
return await process_single_commit(item[0], item[1])
|
|
98
|
-
|
|
99
|
-
# Process all commits concurrently
|
|
100
|
-
results = await asyncio.gather(
|
|
101
|
-
*[bounded_process(item) for item in commits_batch],
|
|
102
|
-
return_exceptions=True,
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
# Collect successful results
|
|
106
|
-
for result in results:
|
|
107
|
-
if isinstance(result, tuple):
|
|
108
|
-
# Type narrowing: result is now tuple[str, GitCommit | None]
|
|
109
|
-
commit_sha, git_commit = result
|
|
110
|
-
if git_commit is not None:
|
|
111
|
-
batch_cache[commit_sha] = git_commit
|
|
112
|
-
|
|
113
|
-
return batch_cache
|
|
72
|
+
return self._create_scan_result(branches, commit_cache, tags, [], cloned_path)
|
|
114
73
|
|
|
115
74
|
async def _process_branches_bulk(
|
|
116
75
|
self,
|
|
@@ -167,30 +126,6 @@ class GitRepositoryScanner:
|
|
|
167
126
|
|
|
168
127
|
return branches, commit_cache
|
|
169
128
|
|
|
170
|
-
async def _create_git_commit_from_data(
|
|
171
|
-
self, cloned_path: Path, commit_data: dict[str, Any], repo_id: int | None = None
|
|
172
|
-
) -> GitCommit | None:
|
|
173
|
-
"""Create GitCommit from pre-fetched commit data."""
|
|
174
|
-
commit_sha = commit_data["sha"]
|
|
175
|
-
|
|
176
|
-
# Get files for this commit
|
|
177
|
-
files_data = await self.git_adapter.get_commit_files(cloned_path, commit_sha)
|
|
178
|
-
self._create_git_files(cloned_path, files_data, commit_sha)
|
|
179
|
-
author = self._format_author_from_data(commit_data)
|
|
180
|
-
|
|
181
|
-
# Cache datetime creation
|
|
182
|
-
created_at = datetime.now(UTC)
|
|
183
|
-
|
|
184
|
-
return GitCommit(
|
|
185
|
-
created_at=created_at,
|
|
186
|
-
commit_sha=commit_sha,
|
|
187
|
-
repo_id=repo_id or 0, # Use 0 as default if not provided
|
|
188
|
-
date=commit_data["date"],
|
|
189
|
-
message=commit_data["message"],
|
|
190
|
-
parent_commit_sha=commit_data["parent_sha"],
|
|
191
|
-
author=author,
|
|
192
|
-
)
|
|
193
|
-
|
|
194
129
|
def _format_author_from_data(self, commit_data: dict[str, Any]) -> str:
|
|
195
130
|
"""Format author string from commit data."""
|
|
196
131
|
author_name = commit_data.get("author_name", "")
|
|
@@ -376,17 +311,18 @@ class GitRepositoryScanner:
|
|
|
376
311
|
branches: list[GitBranch],
|
|
377
312
|
commit_cache: dict[str, GitCommit],
|
|
378
313
|
tags: list[GitTag],
|
|
379
|
-
all_files: list[GitFile],
|
|
314
|
+
all_files: list[GitFile], # noqa: ARG002
|
|
315
|
+
cloned_path: Path | None = None, # noqa: ARG002
|
|
380
316
|
) -> RepositoryScanResult:
|
|
381
317
|
"""Create final scan result."""
|
|
382
|
-
# Files
|
|
318
|
+
# Files list is empty to avoid memory issues - will be processed in batches
|
|
383
319
|
scan_result = RepositoryScanResult(
|
|
384
320
|
branches=branches,
|
|
385
321
|
all_commits=list(commit_cache.values()),
|
|
386
322
|
scan_timestamp=datetime.now(UTC),
|
|
387
|
-
total_files_across_commits=
|
|
323
|
+
total_files_across_commits=0, # Will be updated after batch processing
|
|
388
324
|
all_tags=tags,
|
|
389
|
-
all_files=
|
|
325
|
+
all_files=[], # Empty - processed in batches to avoid memory exhaustion
|
|
390
326
|
)
|
|
391
327
|
|
|
392
328
|
self._log.info(
|
|
@@ -395,16 +331,35 @@ class GitRepositoryScanner:
|
|
|
395
331
|
)
|
|
396
332
|
return scan_result
|
|
397
333
|
|
|
398
|
-
async def
|
|
399
|
-
self, cloned_path: Path,
|
|
334
|
+
async def process_files_for_commits_batch(
|
|
335
|
+
self, cloned_path: Path, commit_shas: list[str]
|
|
400
336
|
) -> list[GitFile]:
|
|
401
|
-
"""Process files for a
|
|
337
|
+
"""Process files for a batch of commits.
|
|
338
|
+
|
|
339
|
+
This allows the application service to process files in batches
|
|
340
|
+
to avoid loading millions of files into memory at once.
|
|
341
|
+
|
|
342
|
+
CRITICAL: Reuses a single Repo object to avoid creating 32K+ Repo instances
|
|
343
|
+
which would consume massive memory (1-2 MB each).
|
|
344
|
+
"""
|
|
345
|
+
from git import Repo
|
|
346
|
+
|
|
347
|
+
# Open repo once and reuse for all commits in this batch
|
|
348
|
+
repo = Repo(cloned_path)
|
|
402
349
|
files = []
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
350
|
+
|
|
351
|
+
try:
|
|
352
|
+
for commit_sha in commit_shas:
|
|
353
|
+
files_data = await self.git_adapter.get_commit_files(
|
|
354
|
+
cloned_path, commit_sha, repo=repo
|
|
355
|
+
)
|
|
356
|
+
files.extend(
|
|
357
|
+
self._create_git_files(cloned_path, files_data, commit_sha)
|
|
358
|
+
)
|
|
359
|
+
finally:
|
|
360
|
+
# Explicitly close the repo to free resources
|
|
361
|
+
repo.close()
|
|
362
|
+
|
|
408
363
|
return files
|
|
409
364
|
|
|
410
365
|
|
kodit/domain/value_objects.py
CHANGED
|
@@ -614,6 +614,8 @@ class TaskOperation(StrEnum):
|
|
|
614
614
|
"kodit.commit.create_architecture_enrichment"
|
|
615
615
|
)
|
|
616
616
|
CREATE_PUBLIC_API_DOCS_FOR_COMMIT = "kodit.commit.create_public_api_docs"
|
|
617
|
+
CREATE_COMMIT_DESCRIPTION_FOR_COMMIT = "kodit.commit.create_commit_description"
|
|
618
|
+
CREATE_DATABASE_SCHEMA_FOR_COMMIT = "kodit.commit.create_database_schema"
|
|
617
619
|
|
|
618
620
|
def is_repository_operation(self) -> bool:
|
|
619
621
|
"""Check if the task operation is a repository operation."""
|
|
@@ -639,6 +641,8 @@ class PrescribedOperations:
|
|
|
639
641
|
TaskOperation.CREATE_SUMMARY_EMBEDDINGS_FOR_COMMIT,
|
|
640
642
|
TaskOperation.CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT,
|
|
641
643
|
TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT,
|
|
644
|
+
TaskOperation.CREATE_COMMIT_DESCRIPTION_FOR_COMMIT,
|
|
645
|
+
TaskOperation.CREATE_DATABASE_SCHEMA_FOR_COMMIT,
|
|
642
646
|
]
|
|
643
647
|
SYNC_REPOSITORY: ClassVar[list[TaskOperation]] = [
|
|
644
648
|
TaskOperation.SCAN_REPOSITORY,
|
|
@@ -346,14 +346,22 @@ class GitPythonAdapter(GitAdapter):
|
|
|
346
346
|
)
|
|
347
347
|
|
|
348
348
|
async def get_commit_files(
|
|
349
|
-
self, local_path: Path, commit_sha: str
|
|
349
|
+
self, local_path: Path, commit_sha: str, repo: Repo
|
|
350
350
|
) -> list[dict[str, Any]]:
|
|
351
|
-
"""Get all files in a specific commit from the git tree.
|
|
351
|
+
"""Get all files in a specific commit from the git tree.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
local_path: Path to the repository
|
|
355
|
+
commit_sha: SHA of the commit to get files for
|
|
356
|
+
repo: Repo object to reuse (avoids creating new Repo per commit)
|
|
357
|
+
|
|
358
|
+
"""
|
|
352
359
|
|
|
353
360
|
def _get_files() -> list[dict[str, Any]]:
|
|
354
361
|
try:
|
|
355
|
-
repo
|
|
356
|
-
|
|
362
|
+
# Use the provided repo object
|
|
363
|
+
_repo = repo
|
|
364
|
+
commit = _repo.commit(commit_sha)
|
|
357
365
|
|
|
358
366
|
files = []
|
|
359
367
|
|
|
@@ -395,7 +403,11 @@ class GitPythonAdapter(GitAdapter):
|
|
|
395
403
|
"""Get file metadata for a commit, with files checked out to disk."""
|
|
396
404
|
await self._checkout_commit(local_path, commit_sha)
|
|
397
405
|
try:
|
|
398
|
-
|
|
406
|
+
repo = Repo(local_path)
|
|
407
|
+
try:
|
|
408
|
+
return await self.get_commit_files(local_path, commit_sha, repo)
|
|
409
|
+
finally:
|
|
410
|
+
repo.close()
|
|
399
411
|
finally:
|
|
400
412
|
await self.restore_to_branch(local_path, "main")
|
|
401
413
|
|
|
@@ -532,3 +544,42 @@ class GitPythonAdapter(GitAdapter):
|
|
|
532
544
|
raise
|
|
533
545
|
|
|
534
546
|
return await asyncio.get_event_loop().run_in_executor(self.executor, _get_tags)
|
|
547
|
+
|
|
548
|
+
async def get_commit_diff(self, local_path: Path, commit_sha: str) -> str:
|
|
549
|
+
"""Get the diff for a specific commit."""
|
|
550
|
+
|
|
551
|
+
def _get_diff() -> str:
|
|
552
|
+
try:
|
|
553
|
+
repo = Repo(local_path)
|
|
554
|
+
commit = repo.commit(commit_sha)
|
|
555
|
+
|
|
556
|
+
# If this is the first commit (no parents), show diff against empty tree
|
|
557
|
+
if not commit.parents:
|
|
558
|
+
diffs = commit.diff(None, create_patch=True)
|
|
559
|
+
if not diffs:
|
|
560
|
+
return ""
|
|
561
|
+
first_diff = diffs[0]
|
|
562
|
+
diff_bytes = first_diff.diff
|
|
563
|
+
if isinstance(diff_bytes, bytes):
|
|
564
|
+
return diff_bytes.decode("utf-8")
|
|
565
|
+
return str(diff_bytes) if diff_bytes is not None else ""
|
|
566
|
+
|
|
567
|
+
# For commits with parents, show diff against first parent
|
|
568
|
+
parent = commit.parents[0]
|
|
569
|
+
diffs = parent.diff(commit, create_patch=True)
|
|
570
|
+
|
|
571
|
+
# Combine all diffs into a single string
|
|
572
|
+
diff_text = ""
|
|
573
|
+
for diff in diffs:
|
|
574
|
+
diff_bytes = diff.diff
|
|
575
|
+
if diff_bytes and isinstance(diff_bytes, bytes):
|
|
576
|
+
diff_text += diff_bytes.decode("utf-8")
|
|
577
|
+
except Exception as e:
|
|
578
|
+
self._log.error(
|
|
579
|
+
f"Failed to get diff for commit {commit_sha} in {local_path}: {e}"
|
|
580
|
+
)
|
|
581
|
+
raise
|
|
582
|
+
else:
|
|
583
|
+
return diff_text
|
|
584
|
+
|
|
585
|
+
return await asyncio.get_event_loop().run_in_executor(self.executor, _get_diff)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Database schema detection infrastructure."""
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""Database schema detector for discovering database schemas in a repository."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import ClassVar
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DatabaseSchemaDetector:
|
|
9
|
+
"""Detects database schemas from various sources in a repository."""
|
|
10
|
+
|
|
11
|
+
# File patterns to look for
|
|
12
|
+
MIGRATION_PATTERNS: ClassVar[list[str]] = [
|
|
13
|
+
"**/migrations/**/*.sql",
|
|
14
|
+
"**/migrations/**/*.py",
|
|
15
|
+
"**/migrate/**/*.sql",
|
|
16
|
+
"**/migrate/**/*.go",
|
|
17
|
+
"**/db/migrate/**/*.rb",
|
|
18
|
+
"**/alembic/versions/**/*.py",
|
|
19
|
+
"**/liquibase/**/*.xml",
|
|
20
|
+
"**/flyway/**/*.sql",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
SQL_FILE_PATTERNS: ClassVar[list[str]] = [
|
|
24
|
+
"**/*.sql",
|
|
25
|
+
"**/schema/**/*.sql",
|
|
26
|
+
"**/schemas/**/*.sql",
|
|
27
|
+
"**/database/**/*.sql",
|
|
28
|
+
"**/db/**/*.sql",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
ORM_MODEL_PATTERNS: ClassVar[list[str]] = [
|
|
32
|
+
"**/models/**/*.py", # SQLAlchemy, Django
|
|
33
|
+
"**/models/**/*.go", # GORM
|
|
34
|
+
"**/entities/**/*.py", # SQLAlchemy
|
|
35
|
+
"**/entities/**/*.ts", # TypeORM
|
|
36
|
+
"**/entities/**/*.js", # TypeORM/Sequelize
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
# Regex patterns for schema detection
|
|
40
|
+
CREATE_TABLE_PATTERN = re.compile(
|
|
41
|
+
r"CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?[`\"]?(\w+)[`\"]?",
|
|
42
|
+
re.IGNORECASE,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
SQLALCHEMY_MODEL_PATTERN = re.compile(
|
|
46
|
+
r"class\s+(\w+)\s*\([^)]*(?:Base|Model|db\.Model)[^)]*\):",
|
|
47
|
+
re.MULTILINE,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
GORM_MODEL_PATTERN = re.compile(
|
|
51
|
+
r"type\s+(\w+)\s+struct\s*{[^}]*gorm\.Model",
|
|
52
|
+
re.MULTILINE | re.DOTALL,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
TYPEORM_ENTITY_PATTERN = re.compile(
|
|
56
|
+
r"@Entity\([^)]*\)\s*(?:export\s+)?class\s+(\w+)",
|
|
57
|
+
re.MULTILINE,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
async def discover_schemas(self, repo_path: Path) -> str:
|
|
61
|
+
"""Discover database schemas and generate a structured report."""
|
|
62
|
+
findings: dict[str, set[str] | list[str] | list[dict] | None] = {
|
|
63
|
+
"tables": set(),
|
|
64
|
+
"migration_files": [],
|
|
65
|
+
"sql_files": [],
|
|
66
|
+
"orm_models": [],
|
|
67
|
+
"orm_type": None,
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# Detect migration files
|
|
71
|
+
await self._detect_migrations(repo_path, findings)
|
|
72
|
+
|
|
73
|
+
# Detect SQL schema files
|
|
74
|
+
await self._detect_sql_files(repo_path, findings)
|
|
75
|
+
|
|
76
|
+
# Detect ORM models
|
|
77
|
+
await self._detect_orm_models(repo_path, findings)
|
|
78
|
+
|
|
79
|
+
# Generate report
|
|
80
|
+
return self._generate_report(findings)
|
|
81
|
+
|
|
82
|
+
async def _detect_migrations(self, repo_path: Path, findings: dict) -> None:
|
|
83
|
+
"""Detect migration files."""
|
|
84
|
+
for pattern in self.MIGRATION_PATTERNS:
|
|
85
|
+
for file_path in repo_path.glob(pattern):
|
|
86
|
+
if file_path.is_file():
|
|
87
|
+
findings["migration_files"].append(str(file_path.relative_to(repo_path)))
|
|
88
|
+
# Try to extract table names from migrations
|
|
89
|
+
await self._extract_tables_from_file(file_path, findings)
|
|
90
|
+
|
|
91
|
+
async def _detect_sql_files(self, repo_path: Path, findings: dict) -> None:
|
|
92
|
+
"""Detect SQL schema files."""
|
|
93
|
+
migration_paths = set(findings["migration_files"])
|
|
94
|
+
|
|
95
|
+
for pattern in self.SQL_FILE_PATTERNS:
|
|
96
|
+
for file_path in repo_path.glob(pattern):
|
|
97
|
+
if file_path.is_file():
|
|
98
|
+
rel_path = str(file_path.relative_to(repo_path))
|
|
99
|
+
# Skip if already counted as migration
|
|
100
|
+
if rel_path not in migration_paths:
|
|
101
|
+
findings["sql_files"].append(rel_path)
|
|
102
|
+
await self._extract_tables_from_file(file_path, findings)
|
|
103
|
+
|
|
104
|
+
async def _detect_orm_models(self, repo_path: Path, findings: dict) -> None:
|
|
105
|
+
"""Detect ORM model files."""
|
|
106
|
+
for pattern in self.ORM_MODEL_PATTERNS:
|
|
107
|
+
for file_path in repo_path.glob(pattern):
|
|
108
|
+
if file_path.is_file():
|
|
109
|
+
rel_path = str(file_path.relative_to(repo_path))
|
|
110
|
+
models = await self._extract_orm_models(file_path)
|
|
111
|
+
if models:
|
|
112
|
+
findings["orm_models"].append({
|
|
113
|
+
"file": rel_path,
|
|
114
|
+
"models": models,
|
|
115
|
+
})
|
|
116
|
+
findings["tables"].update(models)
|
|
117
|
+
|
|
118
|
+
async def _extract_tables_from_file(self, file_path: Path, findings: dict) -> None:
|
|
119
|
+
"""Extract table names from SQL or migration files."""
|
|
120
|
+
try:
|
|
121
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
122
|
+
|
|
123
|
+
# Look for CREATE TABLE statements
|
|
124
|
+
for match in self.CREATE_TABLE_PATTERN.finditer(content):
|
|
125
|
+
table_name = match.group(1)
|
|
126
|
+
findings["tables"].add(table_name)
|
|
127
|
+
|
|
128
|
+
except (OSError, UnicodeDecodeError):
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
async def _extract_orm_models(self, file_path: Path) -> list[str]:
|
|
132
|
+
"""Extract ORM model names from model files."""
|
|
133
|
+
models: list[str] = []
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
137
|
+
suffix = file_path.suffix
|
|
138
|
+
|
|
139
|
+
if suffix == ".py":
|
|
140
|
+
# SQLAlchemy or Django models
|
|
141
|
+
models.extend(
|
|
142
|
+
match.group(1)
|
|
143
|
+
for match in self.SQLALCHEMY_MODEL_PATTERN.finditer(content)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
elif suffix == ".go":
|
|
147
|
+
# GORM models
|
|
148
|
+
models.extend(
|
|
149
|
+
match.group(1)
|
|
150
|
+
for match in self.GORM_MODEL_PATTERN.finditer(content)
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
elif suffix in [".ts", ".js"]:
|
|
154
|
+
# TypeORM entities
|
|
155
|
+
models.extend(
|
|
156
|
+
match.group(1)
|
|
157
|
+
for match in self.TYPEORM_ENTITY_PATTERN.finditer(content)
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
except (OSError, UnicodeDecodeError):
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
return models
|
|
164
|
+
|
|
165
|
+
def _generate_report(self, findings: dict) -> str: # noqa: PLR0915, C901, PLR0912
|
|
166
|
+
"""Generate a structured report of database schema findings."""
|
|
167
|
+
lines = []
|
|
168
|
+
|
|
169
|
+
# Summary
|
|
170
|
+
lines.append("# Database Schema Discovery Report")
|
|
171
|
+
lines.append("")
|
|
172
|
+
|
|
173
|
+
has_findings = (
|
|
174
|
+
findings["tables"]
|
|
175
|
+
or findings["migration_files"]
|
|
176
|
+
or findings["sql_files"]
|
|
177
|
+
or findings["orm_models"]
|
|
178
|
+
)
|
|
179
|
+
if not has_findings:
|
|
180
|
+
lines.append("No database schemas detected in this repository.")
|
|
181
|
+
return "\n".join(lines)
|
|
182
|
+
|
|
183
|
+
# Tables/Entities found
|
|
184
|
+
if findings["tables"]:
|
|
185
|
+
lines.append(f"## Detected Tables/Entities ({len(findings['tables'])})")
|
|
186
|
+
lines.append("")
|
|
187
|
+
lines.extend(f"- {table}" for table in sorted(findings["tables"]))
|
|
188
|
+
lines.append("")
|
|
189
|
+
|
|
190
|
+
# Migration files
|
|
191
|
+
if findings["migration_files"]:
|
|
192
|
+
lines.append(f"## Migration Files ({len(findings['migration_files'])})")
|
|
193
|
+
lines.append("")
|
|
194
|
+
lines.append(
|
|
195
|
+
"Database migrations detected, suggesting schema evolution over time:"
|
|
196
|
+
)
|
|
197
|
+
lines.extend(
|
|
198
|
+
f"- {mig_file}" for mig_file in findings["migration_files"][:10]
|
|
199
|
+
)
|
|
200
|
+
if len(findings["migration_files"]) > 10:
|
|
201
|
+
lines.append(f"- ... and {len(findings['migration_files']) - 10} more")
|
|
202
|
+
lines.append("")
|
|
203
|
+
|
|
204
|
+
# SQL files
|
|
205
|
+
if findings["sql_files"]:
|
|
206
|
+
lines.append(f"## SQL Schema Files ({len(findings['sql_files'])})")
|
|
207
|
+
lines.append("")
|
|
208
|
+
lines.extend(f"- {sql_file}" for sql_file in findings["sql_files"][:10])
|
|
209
|
+
if len(findings["sql_files"]) > 10:
|
|
210
|
+
lines.append(f"- ... and {len(findings['sql_files']) - 10} more")
|
|
211
|
+
lines.append("")
|
|
212
|
+
|
|
213
|
+
# ORM models
|
|
214
|
+
if findings["orm_models"]:
|
|
215
|
+
lines.append(f"## ORM Models ({len(findings['orm_models'])} files)")
|
|
216
|
+
lines.append("")
|
|
217
|
+
lines.append(
|
|
218
|
+
"ORM models detected, suggesting object-relational mapping:"
|
|
219
|
+
)
|
|
220
|
+
for orm_info in findings["orm_models"][:10]: # Limit to first 10
|
|
221
|
+
model_names = ", ".join(orm_info["models"][:5])
|
|
222
|
+
lines.append(f"- {orm_info['file']}: {model_names}")
|
|
223
|
+
if len(orm_info["models"]) > 5:
|
|
224
|
+
lines.append(f" (and {len(orm_info['models']) - 5} more models)")
|
|
225
|
+
if len(findings["orm_models"]) > 10:
|
|
226
|
+
lines.append(f"- ... and {len(findings['orm_models']) - 10} more files")
|
|
227
|
+
lines.append("")
|
|
228
|
+
|
|
229
|
+
# Inferred database type
|
|
230
|
+
lines.append("## Inferred Information")
|
|
231
|
+
lines.append("")
|
|
232
|
+
|
|
233
|
+
mig_files_str = str(findings.get("migration_files", []))
|
|
234
|
+
mig_files = findings.get("migration_files", [])
|
|
235
|
+
|
|
236
|
+
if "alembic" in mig_files_str:
|
|
237
|
+
lines.append("- Migration framework: Alembic (Python/SQLAlchemy)")
|
|
238
|
+
elif "django" in mig_files_str or any(
|
|
239
|
+
"migrations" in f and f.endswith(".py") for f in mig_files
|
|
240
|
+
):
|
|
241
|
+
lines.append("- Migration framework: Django Migrations")
|
|
242
|
+
elif any(".go" in f for f in mig_files):
|
|
243
|
+
lines.append(
|
|
244
|
+
"- Migration framework: Go-based migrations (golang-migrate)"
|
|
245
|
+
)
|
|
246
|
+
elif "flyway" in mig_files_str:
|
|
247
|
+
lines.append("- Migration framework: Flyway")
|
|
248
|
+
elif "liquibase" in mig_files_str:
|
|
249
|
+
lines.append("- Migration framework: Liquibase")
|
|
250
|
+
|
|
251
|
+
if findings["orm_models"]:
|
|
252
|
+
orm_models = findings["orm_models"]
|
|
253
|
+
py_models = sum(1 for m in orm_models if m["file"].endswith(".py"))
|
|
254
|
+
go_models = sum(1 for m in orm_models if m["file"].endswith(".go"))
|
|
255
|
+
ts_models = sum(
|
|
256
|
+
1 for m in orm_models if m["file"].endswith((".ts", ".js"))
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
if py_models > 0:
|
|
260
|
+
lines.append("- ORM: Python (likely SQLAlchemy or Django ORM)")
|
|
261
|
+
if go_models > 0:
|
|
262
|
+
lines.append("- ORM: Go (likely GORM)")
|
|
263
|
+
if ts_models > 0:
|
|
264
|
+
lines.append(
|
|
265
|
+
"- ORM: TypeScript/JavaScript (likely TypeORM or Sequelize)"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
return "\n".join(lines)
|
|
@@ -7,6 +7,10 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
7
7
|
from kodit.domain.enrichments.architecture.architecture import (
|
|
8
8
|
ENRICHMENT_TYPE_ARCHITECTURE,
|
|
9
9
|
)
|
|
10
|
+
from kodit.domain.enrichments.architecture.database_schema.database_schema import (
|
|
11
|
+
ENRICHMENT_SUBTYPE_DATABASE_SCHEMA,
|
|
12
|
+
DatabaseSchemaEnrichment,
|
|
13
|
+
)
|
|
10
14
|
from kodit.domain.enrichments.architecture.physical.physical import (
|
|
11
15
|
ENRICHMENT_SUBTYPE_PHYSICAL,
|
|
12
16
|
PhysicalArchitectureEnrichment,
|
|
@@ -19,6 +23,11 @@ from kodit.domain.enrichments.development.snippet.snippet import (
|
|
|
19
23
|
SnippetEnrichmentSummary,
|
|
20
24
|
)
|
|
21
25
|
from kodit.domain.enrichments.enrichment import EnrichmentV2
|
|
26
|
+
from kodit.domain.enrichments.history.commit_description.commit_description import (
|
|
27
|
+
ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION,
|
|
28
|
+
CommitDescriptionEnrichment,
|
|
29
|
+
)
|
|
30
|
+
from kodit.domain.enrichments.history.history import ENRICHMENT_TYPE_HISTORY
|
|
22
31
|
from kodit.domain.enrichments.usage.api_docs import (
|
|
23
32
|
ENRICHMENT_SUBTYPE_API_DOCS,
|
|
24
33
|
APIDocEnrichment,
|
|
@@ -131,6 +140,26 @@ class SQLAlchemyEnrichmentV2Repository(
|
|
|
131
140
|
created_at=db_entity.created_at,
|
|
132
141
|
updated_at=db_entity.updated_at,
|
|
133
142
|
)
|
|
143
|
+
if (
|
|
144
|
+
db_entity.type == ENRICHMENT_TYPE_HISTORY
|
|
145
|
+
and db_entity.subtype == ENRICHMENT_SUBTYPE_COMMIT_DESCRIPTION
|
|
146
|
+
):
|
|
147
|
+
return CommitDescriptionEnrichment(
|
|
148
|
+
id=db_entity.id,
|
|
149
|
+
content=db_entity.content,
|
|
150
|
+
created_at=db_entity.created_at,
|
|
151
|
+
updated_at=db_entity.updated_at,
|
|
152
|
+
)
|
|
153
|
+
if (
|
|
154
|
+
db_entity.type == ENRICHMENT_TYPE_ARCHITECTURE
|
|
155
|
+
and db_entity.subtype == ENRICHMENT_SUBTYPE_DATABASE_SCHEMA
|
|
156
|
+
):
|
|
157
|
+
return DatabaseSchemaEnrichment(
|
|
158
|
+
id=db_entity.id,
|
|
159
|
+
content=db_entity.content,
|
|
160
|
+
created_at=db_entity.created_at,
|
|
161
|
+
updated_at=db_entity.updated_at,
|
|
162
|
+
)
|
|
134
163
|
|
|
135
164
|
raise ValueError(
|
|
136
165
|
f"Unknown enrichment type: {db_entity.type}/{db_entity.subtype}"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
2
|
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
-
kodit/_version.py,sha256=
|
|
4
|
-
kodit/app.py,sha256=
|
|
3
|
+
kodit/_version.py,sha256=NvV7p6eu_Rli4DWHJnEcpyTUiImNPPDyoDonzzIsNwA,704
|
|
4
|
+
kodit/app.py,sha256=7WxSQcktnpYBmjO1skIjMeBu55rVVRf4lotBEq55pAM,5846
|
|
5
5
|
kodit/cli.py,sha256=QSTXIUDxZo3anIONY-grZi9_VSehWoS8QoVJZyOmWPQ,3086
|
|
6
6
|
kodit/cli_utils.py,sha256=umkvt4kWNapk6db6RGz6bmn7oxgDpsW2Vo09MZ37OGg,2430
|
|
7
7
|
kodit/config.py,sha256=x_67lawaejOenJvl8yMxzXgdIkeWx8Yyc2ISO37GCvc,8031
|
|
@@ -13,19 +13,19 @@ kodit/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
13
13
|
kodit/application/__init__.py,sha256=mH50wTpgP9dhbKztFsL8Dda9Hi18TSnMVxXtpp4aGOA,35
|
|
14
14
|
kodit/application/factories/__init__.py,sha256=bU5CvEnaBePZ7JbkCOp1MGTNP752bnU2uEqmfy5FdRk,37
|
|
15
15
|
kodit/application/factories/reporting_factory.py,sha256=3IpRiAw_olM69db-jbDAtjyGtd6Nh5o8jUJX3-rXCA8,1421
|
|
16
|
-
kodit/application/factories/server_factory.py,sha256=
|
|
16
|
+
kodit/application/factories/server_factory.py,sha256=dr0X_zQRUlEybtGZ3NS-kkwTU-K96u2D1Qw5xhWkd88,17409
|
|
17
17
|
kodit/application/services/__init__.py,sha256=p5UQNw-H5sxQvs5Etfte93B3cJ1kKW6DNxK34uFvU1E,38
|
|
18
18
|
kodit/application/services/code_search_application_service.py,sha256=ceyv5TTN-jvlOFOupGa9XwfTTraLNN2GU55kFeulVXY,7763
|
|
19
|
-
kodit/application/services/commit_indexing_application_service.py,sha256=
|
|
20
|
-
kodit/application/services/enrichment_query_service.py,sha256=
|
|
19
|
+
kodit/application/services/commit_indexing_application_service.py,sha256=uRYPkVbiqu1V9bORjQu2yoylskLgCz55vYJ1pODjm94,44690
|
|
20
|
+
kodit/application/services/enrichment_query_service.py,sha256=RMVze-DzS5zAki1iC96Kid7tbg-nHSv0z8eqPsiURqc,15002
|
|
21
21
|
kodit/application/services/indexing_worker_service.py,sha256=59cZthlzViOVrAWEoZqUTCfLzxx2OO_FOGdM3pYf9Mc,4065
|
|
22
22
|
kodit/application/services/queue_service.py,sha256=pIHTS8M65FzAhZH5kn54BTiZ43sCbsALYdCFTz9wdqE,2692
|
|
23
23
|
kodit/application/services/reporting.py,sha256=cwe-S-UpSOE6xSAEhoD1hi4hSWk1bW3YRLJ7463fIvM,3518
|
|
24
24
|
kodit/application/services/sync_scheduler.py,sha256=hVT3dlmvfbqXKOV_KU5ZQ5gEKBGPJTlvJcF9gP2ZHQM,2853
|
|
25
25
|
kodit/domain/__init__.py,sha256=TCpg4Xx-oF4mKV91lo4iXqMEfBT1OoRSYnbG-zVWolA,66
|
|
26
26
|
kodit/domain/errors.py,sha256=yIsgCjM_yOFIg8l7l-t7jM8pgeAX4cfPq0owf7iz3DA,106
|
|
27
|
-
kodit/domain/protocols.py,sha256=
|
|
28
|
-
kodit/domain/value_objects.py,sha256=
|
|
27
|
+
kodit/domain/protocols.py,sha256=Q6blYD79Tn5LQyNEAioTuPPIdZYXDf46kVpAW2EG2jY,8056
|
|
28
|
+
kodit/domain/value_objects.py,sha256=FW0sTMtcl0Q1qej7vzEg7-Gsv86Z01IbPrDdudsgU3g,18097
|
|
29
29
|
kodit/domain/enrichments/__init__.py,sha256=UpQMnMEHqaK3u3K-eJZOC28kfBPHALLAjFMdyYBXSPE,33
|
|
30
30
|
kodit/domain/enrichments/enricher.py,sha256=jnZ5X9RmZA8Acy-RBS2TbEoBg9QSm8AgleqwS9h5WlY,512
|
|
31
31
|
kodit/domain/enrichments/enrichment.py,sha256=_4lAOFibvSRN-01HB7it61k38IGQsub0gVERqRrhWPs,1776
|
|
@@ -33,6 +33,8 @@ kodit/domain/enrichments/request.py,sha256=6zBQhliDcdw8vS4sYPG2mqZSDSbQ5VzY1YQ-4
|
|
|
33
33
|
kodit/domain/enrichments/response.py,sha256=NzoMAKgs7c2yo9vvgWjQDo1yO0koKHbbY_SrsqsalAk,205
|
|
34
34
|
kodit/domain/enrichments/architecture/__init__.py,sha256=hBSliXMuixUZKtF-_zvcgQjnqrdyc4_SjYG2PTRFYpg,39
|
|
35
35
|
kodit/domain/enrichments/architecture/architecture.py,sha256=_3nF9qdBdcA8rTXPkb1KO2F7OXTcH9SajqNYB-ICaZA,507
|
|
36
|
+
kodit/domain/enrichments/architecture/database_schema/__init__.py,sha256=xS5UGMfHqOXjpr4ZZQup3IUtevJxBDXMH4fO7wuH5vM,35
|
|
37
|
+
kodit/domain/enrichments/architecture/database_schema/database_schema.py,sha256=OOiG5SrN4Jyw_L6_-UVg4DaGWNo5JLiii7e21ZLtDvY,529
|
|
36
38
|
kodit/domain/enrichments/architecture/physical/__init__.py,sha256=4jc89cGxALWo8d3Xzfb5t-YjcCyDb1dDVGwTqVYBFmc,48
|
|
37
39
|
kodit/domain/enrichments/architecture/physical/discovery_notes.py,sha256=Wdv41rkUcMgRqXWB5Q9roaGMGFznH4V_I7mELUvDShw,636
|
|
38
40
|
kodit/domain/enrichments/architecture/physical/formatter.py,sha256=V_JvHsGDPPJ-TqGS-G61P3OS3xe0QpS2NLBEk5jX6Yc,351
|
|
@@ -41,6 +43,10 @@ kodit/domain/enrichments/development/__init__.py,sha256=ls7zlKUpSpyLZRl-WTuaow9C
|
|
|
41
43
|
kodit/domain/enrichments/development/development.py,sha256=amzcheLEtXbOyhhmjlay_yt1Z2FRyW2CrR8wZWkpC0g,483
|
|
42
44
|
kodit/domain/enrichments/development/snippet/__init__.py,sha256=M5XVnlDgfqSE5UiAqkQwE1Mbr5Rg8zQpcspHKC3k_xU,34
|
|
43
45
|
kodit/domain/enrichments/development/snippet/snippet.py,sha256=A1f385Bu3_ZBaDKQrGHZMb6GIiQoo-hORFSw2ca56yQ,791
|
|
46
|
+
kodit/domain/enrichments/history/__init__.py,sha256=OXS0MOFEjD76rBOmLl8yA2L3Q8NYebBkoGhAmgbO2O0,27
|
|
47
|
+
kodit/domain/enrichments/history/history.py,sha256=pdmkU2ZZGFBsZDQ7kKo1hj-GaVKUd0v4Q2Fu15WE2A8,464
|
|
48
|
+
kodit/domain/enrichments/history/commit_description/__init__.py,sha256=j0fVMIkao9RzkLa6JakBPP40KrELl1eb-dfOLvfADMQ,38
|
|
49
|
+
kodit/domain/enrichments/history/commit_description/commit_description.py,sha256=96yKz-YsyWPfUu7zFtnT9AhRe7DjLmky9z0jy7oreFo,518
|
|
44
50
|
kodit/domain/enrichments/usage/__init__.py,sha256=7W36rvCF6DH-VqW2RiqU6GMlkYYHZy9Wm0DL_3_fbRc,40
|
|
45
51
|
kodit/domain/enrichments/usage/api_docs.py,sha256=5cvkNXUfAWDb0HJGIViAzIEZDGEnBnWYhkacs4lHCYA,470
|
|
46
52
|
kodit/domain/enrichments/usage/usage.py,sha256=U_JrxwXWlFtOzCP7fbfMd-NH75W44MwVFliONMzYB4U,453
|
|
@@ -51,7 +57,7 @@ kodit/domain/factories/git_repo_factory.py,sha256=EdeQo4HsBi2hVeVvnSnYtFdR3yGVZQ
|
|
|
51
57
|
kodit/domain/services/__init__.py,sha256=Q1GhCK_PqKHYwYE4tkwDz5BIyXkJngLBBOHhzvX8nzo,42
|
|
52
58
|
kodit/domain/services/bm25_service.py,sha256=-E5k0td2Ucs25qygWkJlY0fl7ZckOUe5xZnKYff3hF8,3631
|
|
53
59
|
kodit/domain/services/embedding_service.py,sha256=CEcQ2E9XvOcjKNCJEw5soYUNMHJ5LCJGyXzPCl75CPc,4812
|
|
54
|
-
kodit/domain/services/git_repository_service.py,sha256=
|
|
60
|
+
kodit/domain/services/git_repository_service.py,sha256=KtwYF3XKBeNbAHbi-sEdMJ-1jGRy7rmWMZkPpCrh9fw,14980
|
|
55
61
|
kodit/domain/services/git_service.py,sha256=Lr7kPnnBEa_fWfGA9jpffMK7wcfxQ0wfXgynsbSKSzg,11661
|
|
56
62
|
kodit/domain/services/physical_architecture_service.py,sha256=0YgoAvbUxT_VwgIh_prftSYnil_XIqNPSoP0g37eIt4,7209
|
|
57
63
|
kodit/domain/services/task_status_query_service.py,sha256=rI93pTMHeycigQryCWkimXSDzRqx_nJOr07UzPAacPE,736
|
|
@@ -89,8 +95,10 @@ kodit/infrastructure/bm25/__init__.py,sha256=DmGbrEO34FOJy4e685BbyxLA7gPW1eqs2gA
|
|
|
89
95
|
kodit/infrastructure/bm25/local_bm25_repository.py,sha256=YE3pUkPS5n1JNu6oSM_HRBOXM8U04HiY8dMMZCf9CMQ,5197
|
|
90
96
|
kodit/infrastructure/bm25/vectorchord_bm25_repository.py,sha256=LjbUPj4nPMb9pdEudThUbZTmQjhxvpN314EzKGpXfi0,8621
|
|
91
97
|
kodit/infrastructure/cloning/git/__init__.py,sha256=20ePcp0qE6BuLsjsv4KYB1DzKhMIMsPXwEqIEZtjTJs,34
|
|
92
|
-
kodit/infrastructure/cloning/git/git_python_adaptor.py,sha256=
|
|
98
|
+
kodit/infrastructure/cloning/git/git_python_adaptor.py,sha256=kiiXrjSqdSYT_c_migWff1WEVlJT8JRlgo5m_9T4rrM,21942
|
|
93
99
|
kodit/infrastructure/cloning/git/working_copy.py,sha256=sPKQN-A1gDVV_QJISNNP4PqxRWxyj5owv5tvWfXMl44,3909
|
|
100
|
+
kodit/infrastructure/database_schema/__init__.py,sha256=jgejYX70fjV69zCuOBiNw3oCQlCKYzxTkjnUUUU7DY0,48
|
|
101
|
+
kodit/infrastructure/database_schema/database_schema_detector.py,sha256=zXU7HqrZU4_EYckloKDbH0gZvZ3_TJG5-Bd5PAkEkXc,10167
|
|
94
102
|
kodit/infrastructure/embedding/__init__.py,sha256=F-8nLlWAerYJ0MOIA4tbXHLan8bW5rRR84vzxx6tRKI,39
|
|
95
103
|
kodit/infrastructure/embedding/embedding_factory.py,sha256=6nP8HKKlNWmDE8ATT5tNQHgPqeTDUMpRuWwn2rsfrOQ,3446
|
|
96
104
|
kodit/infrastructure/embedding/local_vector_search_repository.py,sha256=urccvadIF-uizmYuzK7ii7hl2HaV7swHCiS8P6n7U18,3507
|
|
@@ -134,7 +142,7 @@ kodit/infrastructure/slicing/slicer.py,sha256=EDYkoLf6RsTVloudZUq6LS5X10JJAHWcKW
|
|
|
134
142
|
kodit/infrastructure/sqlalchemy/__init__.py,sha256=UXPMSF_hgWaqr86cawRVqM8XdVNumQyyK5B8B97GnlA,33
|
|
135
143
|
kodit/infrastructure/sqlalchemy/embedding_repository.py,sha256=OhSIuNEQ725WoxaIpK3jcZvUVPW-b95HKRXr1HjurmI,8824
|
|
136
144
|
kodit/infrastructure/sqlalchemy/enrichment_association_repository.py,sha256=mjlGH4vkIv1cPfhkZ4SUyGWpMbgeS7QljsK54yQvV4g,2615
|
|
137
|
-
kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py,sha256=
|
|
145
|
+
kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py,sha256=9C7y6aRzHHkKJYTTAFxU5p0kwJVBjhqHvs-wUZDWsmk,6350
|
|
138
146
|
kodit/infrastructure/sqlalchemy/entities.py,sha256=kvZqUPCN2TNgovdNAT_0h4Y8zrgFWwkk-OecvcHIz-A,14852
|
|
139
147
|
kodit/infrastructure/sqlalchemy/git_branch_repository.py,sha256=dW9kBr8aDBXXVmw1zEux2mueiKhTcpG0JxnLuz5yZ3w,3106
|
|
140
148
|
kodit/infrastructure/sqlalchemy/git_commit_repository.py,sha256=jzYpFV1gjI-Wfgai-hxesglYn6XD384mqIorV1AtNCA,1991
|
|
@@ -169,8 +177,8 @@ kodit/utils/dump_config.py,sha256=dd5uPgqh6ATk02Zt59t2JFKR9X17YWjHudV0nE8VktE,11
|
|
|
169
177
|
kodit/utils/dump_openapi.py,sha256=EasYOnnpeabwb_sTKQUBrrOLHjPcOFQ7Zx0YKpx9fmM,1239
|
|
170
178
|
kodit/utils/generate_api_paths.py,sha256=TMtx9v55podDfUmiWaHgJHLtEWLV2sLL-5ejGFMPzAo,3569
|
|
171
179
|
kodit/utils/path_utils.py,sha256=UB_81rx7Y1G1jalVv2PX8miwaprBbcqEdtoQ3hPT3kU,2451
|
|
172
|
-
kodit-0.5.
|
|
173
|
-
kodit-0.5.
|
|
174
|
-
kodit-0.5.
|
|
175
|
-
kodit-0.5.
|
|
176
|
-
kodit-0.5.
|
|
180
|
+
kodit-0.5.7.dist-info/METADATA,sha256=Yi8IGWrrk1FLgnC5GiqmBc8V3bJcWz8Fl29-nM8CkcE,7703
|
|
181
|
+
kodit-0.5.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
182
|
+
kodit-0.5.7.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
183
|
+
kodit-0.5.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
184
|
+
kodit-0.5.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|