kodit 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +10 -12
- kodit/application/factories/server_factory.py +78 -11
- kodit/application/services/commit_indexing_application_service.py +188 -31
- kodit/application/services/enrichment_query_service.py +95 -0
- kodit/config.py +3 -3
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/protocols.py +7 -6
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/tracking/__init__.py +1 -0
- kodit/domain/tracking/resolution_service.py +81 -0
- kodit/domain/tracking/trackable.py +21 -0
- kodit/domain/value_objects.py +6 -23
- kodit/infrastructure/api/v1/dependencies.py +15 -0
- kodit/infrastructure/api/v1/routers/commits.py +81 -0
- kodit/infrastructure/api/v1/routers/repositories.py +99 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/cloning/git/git_python_adaptor.py +71 -4
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +20 -33
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/snippet_mapper.py +20 -22
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +56 -391
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +46 -38
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +22 -11
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +23 -14
- kodit/infrastructure/sqlalchemy/git_repository.py +27 -17
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +22 -11
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +101 -106
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +5 -6
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/METADATA +1 -1
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/RECORD +67 -32
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/WHEEL +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -10,6 +10,7 @@ from sqlalchemy import (
|
|
|
10
10
|
Float,
|
|
11
11
|
ForeignKey,
|
|
12
12
|
ForeignKeyConstraint,
|
|
13
|
+
Index,
|
|
13
14
|
Integer,
|
|
14
15
|
String,
|
|
15
16
|
TypeDecorator,
|
|
@@ -483,44 +484,6 @@ class CommitSnippetV2(Base):
|
|
|
483
484
|
self.snippet_sha = snippet_sha
|
|
484
485
|
|
|
485
486
|
|
|
486
|
-
# Enrichment model for SnippetV2
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
class EnrichmentType(Enum):
|
|
490
|
-
"""Enrichment type enum."""
|
|
491
|
-
|
|
492
|
-
UNKNOWN = "unknown"
|
|
493
|
-
SUMMARIZATION = "summarization"
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
class Enrichment(Base, CommonMixin):
|
|
497
|
-
"""Enrichment model for snippet enrichments."""
|
|
498
|
-
|
|
499
|
-
__tablename__ = "enrichments"
|
|
500
|
-
|
|
501
|
-
snippet_sha: Mapped[str] = mapped_column(ForeignKey("snippets_v2.sha"), index=True)
|
|
502
|
-
type: Mapped[EnrichmentType] = mapped_column(
|
|
503
|
-
SQLAlchemyEnum(EnrichmentType), index=True
|
|
504
|
-
)
|
|
505
|
-
content: Mapped[str] = mapped_column(UnicodeText)
|
|
506
|
-
|
|
507
|
-
__table_args__ = (
|
|
508
|
-
UniqueConstraint("snippet_sha", "type", name="uix_snippet_enrichment"),
|
|
509
|
-
)
|
|
510
|
-
|
|
511
|
-
def __init__(
|
|
512
|
-
self,
|
|
513
|
-
snippet_sha: str,
|
|
514
|
-
type: EnrichmentType, # noqa: A002
|
|
515
|
-
content: str,
|
|
516
|
-
) -> None:
|
|
517
|
-
"""Initialize enrichment."""
|
|
518
|
-
super().__init__()
|
|
519
|
-
self.snippet_sha = snippet_sha
|
|
520
|
-
self.type = type
|
|
521
|
-
self.content = content
|
|
522
|
-
|
|
523
|
-
|
|
524
487
|
class CommitIndex(Base):
|
|
525
488
|
"""Commit index model."""
|
|
526
489
|
|
|
@@ -559,3 +522,48 @@ class CommitIndex(Base):
|
|
|
559
522
|
self.error_message = error_message
|
|
560
523
|
self.files_processed = files_processed
|
|
561
524
|
self.processing_time_seconds = processing_time_seconds
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
class EnrichmentV2(Base, CommonMixin):
|
|
528
|
+
"""Generic enrichment entity."""
|
|
529
|
+
|
|
530
|
+
__tablename__ = "enrichments_v2"
|
|
531
|
+
|
|
532
|
+
type: Mapped[str] = mapped_column(String, nullable=False, index=True)
|
|
533
|
+
subtype: Mapped[str] = mapped_column(String, nullable=False, index=True)
|
|
534
|
+
content: Mapped[str] = mapped_column(UnicodeText, nullable=False)
|
|
535
|
+
|
|
536
|
+
__table_args__ = (Index("idx_type_subtype", "type", "subtype"),)
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
class EnrichmentAssociation(Base, CommonMixin):
|
|
540
|
+
"""Polymorphic association between enrichments and entities."""
|
|
541
|
+
|
|
542
|
+
__tablename__ = "enrichment_associations"
|
|
543
|
+
|
|
544
|
+
enrichment_id: Mapped[int] = mapped_column(
|
|
545
|
+
ForeignKey("enrichments_v2.id", ondelete="CASCADE"),
|
|
546
|
+
nullable=False,
|
|
547
|
+
index=True,
|
|
548
|
+
)
|
|
549
|
+
entity_type: Mapped[str] = mapped_column(
|
|
550
|
+
String(50),
|
|
551
|
+
nullable=False,
|
|
552
|
+
index=True,
|
|
553
|
+
)
|
|
554
|
+
entity_id: Mapped[str] = mapped_column(
|
|
555
|
+
String(255),
|
|
556
|
+
nullable=False,
|
|
557
|
+
index=True,
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
__table_args__ = (
|
|
561
|
+
UniqueConstraint(
|
|
562
|
+
"entity_type",
|
|
563
|
+
"entity_id",
|
|
564
|
+
"enrichment_id",
|
|
565
|
+
name="uix_entity_enrichment",
|
|
566
|
+
),
|
|
567
|
+
Index("idx_entity_lookup", "entity_type", "entity_id"),
|
|
568
|
+
{"sqlite_autoincrement": True},
|
|
569
|
+
)
|
|
@@ -98,17 +98,28 @@ class SqlAlchemyGitBranchRepository(GitBranchRepository):
|
|
|
98
98
|
|
|
99
99
|
commit_shas = [branch.head_commit_sha for branch in db_branches]
|
|
100
100
|
|
|
101
|
-
# Get all head commits for these branches
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
101
|
+
# Get all head commits for these branches in chunks
|
|
102
|
+
# to avoid parameter limits
|
|
103
|
+
db_commits: list[db_entities.GitCommit] = []
|
|
104
|
+
chunk_size = 1000
|
|
105
|
+
for i in range(0, len(commit_shas), chunk_size):
|
|
106
|
+
chunk = commit_shas[i : i + chunk_size]
|
|
107
|
+
commits_stmt = select(db_entities.GitCommit).where(
|
|
108
|
+
db_entities.GitCommit.commit_sha.in_(chunk)
|
|
109
|
+
)
|
|
110
|
+
chunk_commits = (await session.scalars(commits_stmt)).all()
|
|
111
|
+
db_commits.extend(chunk_commits)
|
|
112
|
+
|
|
113
|
+
# Get all files for these commits in chunks
|
|
114
|
+
# to avoid parameter limits
|
|
115
|
+
db_files: list[db_entities.GitCommitFile] = []
|
|
116
|
+
for i in range(0, len(commit_shas), chunk_size):
|
|
117
|
+
chunk = commit_shas[i : i + chunk_size]
|
|
118
|
+
files_stmt = select(db_entities.GitCommitFile).where(
|
|
119
|
+
db_entities.GitCommitFile.commit_sha.in_(chunk)
|
|
120
|
+
)
|
|
121
|
+
chunk_files = (await session.scalars(files_stmt)).all()
|
|
122
|
+
db_files.extend(chunk_files)
|
|
112
123
|
|
|
113
124
|
# Group files by commit SHA
|
|
114
125
|
from kodit.domain.entities.git import GitFile
|
|
@@ -77,11 +77,17 @@ class SqlAlchemyGitCommitRepository(GitCommitRepository):
|
|
|
77
77
|
|
|
78
78
|
commit_shas = [commit.commit_sha for commit in db_commits]
|
|
79
79
|
|
|
80
|
-
# Get all files for these commits
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
80
|
+
# Get all files for these commits in chunks
|
|
81
|
+
# to avoid parameter limits
|
|
82
|
+
db_files: list[db_entities.GitCommitFile] = []
|
|
83
|
+
chunk_size = 1000
|
|
84
|
+
for i in range(0, len(commit_shas), chunk_size):
|
|
85
|
+
chunk = commit_shas[i : i + chunk_size]
|
|
86
|
+
files_stmt = select(db_entities.GitCommitFile).where(
|
|
87
|
+
db_entities.GitCommitFile.commit_sha.in_(chunk)
|
|
88
|
+
)
|
|
89
|
+
chunk_files = (await session.scalars(files_stmt)).all()
|
|
90
|
+
db_files.extend(chunk_files)
|
|
85
91
|
|
|
86
92
|
# Group files by commit SHA
|
|
87
93
|
files_by_commit: dict[str, list[GitFile]] = {}
|
|
@@ -149,13 +155,16 @@ class SqlAlchemyGitCommitRepository(GitCommitRepository):
|
|
|
149
155
|
async with SqlAlchemyUnitOfWork(self.session_factory) as session:
|
|
150
156
|
commit_shas = [commit.commit_sha for commit in commits]
|
|
151
157
|
|
|
152
|
-
# Get existing commits in bulk
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
(
|
|
158
|
-
|
|
158
|
+
# Get existing commits in bulk (chunked to avoid parameter limits)
|
|
159
|
+
existing_commit_shas: set[str] = set()
|
|
160
|
+
chunk_size = 1000
|
|
161
|
+
for i in range(0, len(commit_shas), chunk_size):
|
|
162
|
+
chunk = commit_shas[i : i + chunk_size]
|
|
163
|
+
existing_commits_stmt = select(db_entities.GitCommit.commit_sha).where(
|
|
164
|
+
db_entities.GitCommit.commit_sha.in_(chunk)
|
|
165
|
+
)
|
|
166
|
+
chunk_existing = (await session.scalars(existing_commits_stmt)).all()
|
|
167
|
+
existing_commit_shas.update(chunk_existing)
|
|
159
168
|
|
|
160
169
|
# Prepare new commits for bulk insert
|
|
161
170
|
new_commits_data = []
|
|
@@ -176,8 +185,8 @@ class SqlAlchemyGitCommitRepository(GitCommitRepository):
|
|
|
176
185
|
if new_commits_data:
|
|
177
186
|
chunk_size = 1000 # Conservative chunk size for parameter limits
|
|
178
187
|
for i in range(0, len(new_commits_data), chunk_size):
|
|
179
|
-
|
|
180
|
-
stmt = insert(db_entities.GitCommit).values(
|
|
188
|
+
data_chunk = new_commits_data[i : i + chunk_size]
|
|
189
|
+
stmt = insert(db_entities.GitCommit).values(data_chunk)
|
|
181
190
|
await session.execute(stmt)
|
|
182
191
|
|
|
183
192
|
# Bulk save files for new commits
|
|
@@ -219,28 +219,38 @@ class SqlAlchemyGitRepoRepository(GitRepoRepository):
|
|
|
219
219
|
if db_tracking_branch_entity:
|
|
220
220
|
referenced_commit_shas.add(db_tracking_branch_entity.head_commit_sha)
|
|
221
221
|
|
|
222
|
-
# Load only the referenced commits
|
|
222
|
+
# Load only the referenced commits in chunks to avoid parameter limits
|
|
223
223
|
referenced_commits = []
|
|
224
224
|
referenced_files = []
|
|
225
225
|
if referenced_commit_shas:
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
226
|
+
commit_shas_list = list(referenced_commit_shas)
|
|
227
|
+
chunk_size = 1000
|
|
228
|
+
|
|
229
|
+
for i in range(0, len(commit_shas_list), chunk_size):
|
|
230
|
+
chunk = commit_shas_list[i : i + chunk_size]
|
|
231
|
+
chunk_commits = list(
|
|
232
|
+
(
|
|
233
|
+
await session.scalars(
|
|
234
|
+
select(db_entities.GitCommit).where(
|
|
235
|
+
db_entities.GitCommit.commit_sha.in_(chunk)
|
|
236
|
+
)
|
|
231
237
|
)
|
|
232
|
-
)
|
|
233
|
-
)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
238
|
+
).all()
|
|
239
|
+
)
|
|
240
|
+
referenced_commits.extend(chunk_commits)
|
|
241
|
+
|
|
242
|
+
for i in range(0, len(commit_shas_list), chunk_size):
|
|
243
|
+
chunk = commit_shas_list[i : i + chunk_size]
|
|
244
|
+
chunk_files = list(
|
|
245
|
+
(
|
|
246
|
+
await session.scalars(
|
|
247
|
+
select(db_entities.GitCommitFile).where(
|
|
248
|
+
db_entities.GitCommitFile.commit_sha.in_(chunk)
|
|
249
|
+
)
|
|
240
250
|
)
|
|
241
|
-
)
|
|
242
|
-
)
|
|
243
|
-
|
|
251
|
+
).all()
|
|
252
|
+
)
|
|
253
|
+
referenced_files.extend(chunk_files)
|
|
244
254
|
|
|
245
255
|
return self._mapper.to_domain_git_repo(
|
|
246
256
|
db_repo=db_repo,
|
|
@@ -96,17 +96,28 @@ class SqlAlchemyGitTagRepository(GitTagRepository):
|
|
|
96
96
|
|
|
97
97
|
commit_shas = [tag.target_commit_sha for tag in db_tags]
|
|
98
98
|
|
|
99
|
-
# Get all target commits for these tags
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
99
|
+
# Get all target commits for these tags in chunks
|
|
100
|
+
# to avoid parameter limits
|
|
101
|
+
db_commits: list[db_entities.GitCommit] = []
|
|
102
|
+
chunk_size = 1000
|
|
103
|
+
for i in range(0, len(commit_shas), chunk_size):
|
|
104
|
+
chunk = commit_shas[i : i + chunk_size]
|
|
105
|
+
commits_stmt = select(db_entities.GitCommit).where(
|
|
106
|
+
db_entities.GitCommit.commit_sha.in_(chunk)
|
|
107
|
+
)
|
|
108
|
+
chunk_commits = (await session.scalars(commits_stmt)).all()
|
|
109
|
+
db_commits.extend(chunk_commits)
|
|
110
|
+
|
|
111
|
+
# Get all files for these commits in chunks
|
|
112
|
+
# to avoid parameter limits
|
|
113
|
+
db_files: list[db_entities.GitCommitFile] = []
|
|
114
|
+
for i in range(0, len(commit_shas), chunk_size):
|
|
115
|
+
chunk = commit_shas[i : i + chunk_size]
|
|
116
|
+
files_stmt = select(db_entities.GitCommitFile).where(
|
|
117
|
+
db_entities.GitCommitFile.commit_sha.in_(chunk)
|
|
118
|
+
)
|
|
119
|
+
chunk_files = (await session.scalars(files_stmt)).all()
|
|
120
|
+
db_files.extend(chunk_files)
|
|
110
121
|
|
|
111
122
|
# Group files by commit SHA
|
|
112
123
|
files_by_commit: dict[str, list[GitFile]] = {}
|
|
@@ -2,18 +2,36 @@
|
|
|
2
2
|
|
|
3
3
|
import zlib
|
|
4
4
|
from collections.abc import Callable
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import TypedDict
|
|
5
7
|
|
|
6
8
|
from sqlalchemy import delete, insert, select
|
|
7
9
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
8
10
|
|
|
11
|
+
from kodit.domain.enrichments.development.snippet.snippet import SnippetEnrichment
|
|
9
12
|
from kodit.domain.entities.git import SnippetV2
|
|
10
13
|
from kodit.domain.protocols import SnippetRepositoryV2
|
|
11
14
|
from kodit.domain.value_objects import MultiSearchRequest
|
|
12
15
|
from kodit.infrastructure.mappers.snippet_mapper import SnippetMapper
|
|
13
16
|
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
17
|
+
from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
|
|
18
|
+
EnrichmentV2Repository,
|
|
19
|
+
)
|
|
14
20
|
from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
|
|
15
21
|
|
|
16
22
|
|
|
23
|
+
class _GitFileData(TypedDict):
|
|
24
|
+
"""Type for GitCommitFile creation data."""
|
|
25
|
+
|
|
26
|
+
commit_sha: str
|
|
27
|
+
path: str
|
|
28
|
+
blob_sha: str
|
|
29
|
+
mime_type: str
|
|
30
|
+
size: int
|
|
31
|
+
extension: str
|
|
32
|
+
created_at: datetime
|
|
33
|
+
|
|
34
|
+
|
|
17
35
|
def create_snippet_v2_repository(
|
|
18
36
|
session_factory: Callable[[], AsyncSession],
|
|
19
37
|
) -> SnippetRepositoryV2:
|
|
@@ -27,6 +45,7 @@ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
|
|
|
27
45
|
def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
|
|
28
46
|
"""Initialize the repository."""
|
|
29
47
|
self.session_factory = session_factory
|
|
48
|
+
self._enrichment_repo = EnrichmentV2Repository(session_factory)
|
|
30
49
|
|
|
31
50
|
@property
|
|
32
51
|
def _mapper(self) -> SnippetMapper:
|
|
@@ -112,10 +131,15 @@ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
|
|
|
112
131
|
stmt = insert(db_entities.CommitSnippetV2).values(chunk)
|
|
113
132
|
await session.execute(stmt)
|
|
114
133
|
|
|
115
|
-
async def _bulk_create_file_associations(
|
|
134
|
+
async def _bulk_create_file_associations( # noqa: C901
|
|
116
135
|
self, session: AsyncSession, commit_sha: str, snippets: list[SnippetV2]
|
|
117
136
|
) -> None:
|
|
118
|
-
"""Bulk create snippet-file associations.
|
|
137
|
+
"""Bulk create snippet-file associations.
|
|
138
|
+
|
|
139
|
+
Creates SnippetV2File records linking snippets to GitCommitFile records.
|
|
140
|
+
If a GitCommitFile doesn't exist, it creates it automatically to prevent
|
|
141
|
+
losing file associations during enrichment cycles.
|
|
142
|
+
"""
|
|
119
143
|
# Collect all file paths from all snippets
|
|
120
144
|
file_paths = set()
|
|
121
145
|
for snippet in snippets:
|
|
@@ -150,18 +174,55 @@ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
|
|
|
150
174
|
existing_snippet_files = set(await session.execute(existing_snippet_files_stmt))
|
|
151
175
|
|
|
152
176
|
# Prepare new file associations
|
|
153
|
-
new_file_associations = []
|
|
177
|
+
new_file_associations: list[dict[str, str]] = []
|
|
178
|
+
missing_git_files: list[_GitFileData] = []
|
|
179
|
+
|
|
154
180
|
for snippet in snippets:
|
|
155
181
|
for file in snippet.derives_from:
|
|
156
182
|
association_key = (snippet.sha, file.path)
|
|
157
|
-
if
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
183
|
+
if association_key not in existing_snippet_files:
|
|
184
|
+
if file.path in existing_files_map:
|
|
185
|
+
# GitCommitFile exists, use its blob_sha
|
|
186
|
+
new_file_associations.append({
|
|
187
|
+
"snippet_sha": snippet.sha,
|
|
188
|
+
"blob_sha": existing_files_map[file.path],
|
|
189
|
+
"commit_sha": commit_sha,
|
|
190
|
+
"file_path": file.path,
|
|
191
|
+
})
|
|
192
|
+
else:
|
|
193
|
+
# GitCommitFile doesn't exist - create it and the association
|
|
194
|
+
missing_git_files.append({
|
|
195
|
+
"commit_sha": commit_sha,
|
|
196
|
+
"path": file.path,
|
|
197
|
+
"blob_sha": file.blob_sha,
|
|
198
|
+
"mime_type": file.mime_type,
|
|
199
|
+
"size": file.size,
|
|
200
|
+
"extension": file.extension,
|
|
201
|
+
"created_at": file.created_at,
|
|
202
|
+
})
|
|
203
|
+
new_file_associations.append({
|
|
204
|
+
"snippet_sha": snippet.sha,
|
|
205
|
+
"blob_sha": file.blob_sha,
|
|
206
|
+
"commit_sha": commit_sha,
|
|
207
|
+
"file_path": file.path,
|
|
208
|
+
})
|
|
209
|
+
# Add to map so subsequent snippets can find it
|
|
210
|
+
existing_files_map[file.path] = file.blob_sha
|
|
211
|
+
|
|
212
|
+
# Create missing GitCommitFile records
|
|
213
|
+
if missing_git_files:
|
|
214
|
+
for git_file_data in missing_git_files:
|
|
215
|
+
git_file = db_entities.GitCommitFile(
|
|
216
|
+
commit_sha=git_file_data["commit_sha"],
|
|
217
|
+
path=git_file_data["path"],
|
|
218
|
+
blob_sha=git_file_data["blob_sha"],
|
|
219
|
+
mime_type=git_file_data["mime_type"],
|
|
220
|
+
size=git_file_data["size"],
|
|
221
|
+
extension=git_file_data["extension"],
|
|
222
|
+
created_at=git_file_data["created_at"],
|
|
223
|
+
)
|
|
224
|
+
session.add(git_file)
|
|
225
|
+
await session.flush()
|
|
165
226
|
|
|
166
227
|
# Bulk insert new file associations in chunks to avoid parameter limits
|
|
167
228
|
if new_file_associations:
|
|
@@ -172,70 +233,29 @@ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
|
|
|
172
233
|
await session.execute(stmt)
|
|
173
234
|
|
|
174
235
|
async def _bulk_update_enrichments(
|
|
175
|
-
self, session: AsyncSession, snippets: list[SnippetV2]
|
|
236
|
+
self, session: AsyncSession, snippets: list[SnippetV2] # noqa: ARG002
|
|
176
237
|
) -> None:
|
|
177
|
-
"""Bulk update enrichments for snippets."""
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
)
|
|
188
|
-
existing_enrichments = await session.execute(existing_enrichments_stmt)
|
|
189
|
-
|
|
190
|
-
# Create lookup for existing enrichment hashes
|
|
191
|
-
existing_enrichment_map = {}
|
|
192
|
-
for snippet_sha, enrichment_type, content in existing_enrichments:
|
|
193
|
-
content_hash = self._hash_string(content)
|
|
194
|
-
key = (snippet_sha, enrichment_type)
|
|
195
|
-
existing_enrichment_map[key] = content_hash
|
|
196
|
-
|
|
197
|
-
# Collect enrichments to delete and add
|
|
198
|
-
enrichments_to_delete = []
|
|
199
|
-
enrichments_to_add = []
|
|
238
|
+
"""Bulk update enrichments for snippets using new enrichment_v2."""
|
|
239
|
+
# Collect all enrichments from snippets using list comprehension
|
|
240
|
+
snippet_enrichments = [
|
|
241
|
+
SnippetEnrichment(
|
|
242
|
+
entity_id=snippet.sha,
|
|
243
|
+
content=enrichment.content,
|
|
244
|
+
)
|
|
245
|
+
for snippet in snippets
|
|
246
|
+
for enrichment in snippet.enrichments
|
|
247
|
+
]
|
|
200
248
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
# Content changed, mark for deletion and re-addition
|
|
209
|
-
enrichments_to_delete.append(key)
|
|
210
|
-
enrichments_to_add.append({
|
|
211
|
-
"snippet_sha": snippet.sha,
|
|
212
|
-
"type": db_entities.EnrichmentType(enrichment.type.value),
|
|
213
|
-
"content": enrichment.content,
|
|
214
|
-
})
|
|
215
|
-
else:
|
|
216
|
-
# New enrichment
|
|
217
|
-
enrichments_to_add.append({
|
|
218
|
-
"snippet_sha": snippet.sha,
|
|
219
|
-
"type": db_entities.EnrichmentType(enrichment.type.value),
|
|
220
|
-
"content": enrichment.content,
|
|
221
|
-
})
|
|
222
|
-
|
|
223
|
-
# Bulk delete changed enrichments
|
|
224
|
-
if enrichments_to_delete:
|
|
225
|
-
for snippet_sha, enrichment_type in enrichments_to_delete:
|
|
226
|
-
stmt = delete(db_entities.Enrichment).where(
|
|
227
|
-
db_entities.Enrichment.snippet_sha == snippet_sha,
|
|
228
|
-
db_entities.Enrichment.type == enrichment_type,
|
|
229
|
-
)
|
|
230
|
-
await session.execute(stmt)
|
|
249
|
+
if snippet_enrichments:
|
|
250
|
+
# First delete existing enrichments for these snippets
|
|
251
|
+
snippet_shas = [snippet.sha for snippet in snippets]
|
|
252
|
+
await self._enrichment_repo.bulk_delete_enrichments(
|
|
253
|
+
entity_type="snippet_v2",
|
|
254
|
+
entity_ids=snippet_shas,
|
|
255
|
+
)
|
|
231
256
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
chunk_size = 1000 # Conservative chunk size for parameter limits
|
|
235
|
-
for i in range(0, len(enrichments_to_add), chunk_size):
|
|
236
|
-
chunk = enrichments_to_add[i : i + chunk_size]
|
|
237
|
-
insert_stmt = insert(db_entities.Enrichment).values(chunk)
|
|
238
|
-
await session.execute(insert_stmt)
|
|
257
|
+
# Then save the new enrichments
|
|
258
|
+
await self._enrichment_repo.bulk_save_enrichments(snippet_enrichments)
|
|
239
259
|
|
|
240
260
|
async def _get_or_create_raw_snippet(
|
|
241
261
|
self, session: AsyncSession, commit_sha: str, domain_snippet: SnippetV2
|
|
@@ -281,33 +301,8 @@ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
|
|
|
281
301
|
domain_snippet: SnippetV2,
|
|
282
302
|
) -> None:
|
|
283
303
|
"""Update enrichments if they have changed."""
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
db_entities.Enrichment.snippet_sha == db_snippet.sha
|
|
287
|
-
)
|
|
288
|
-
)
|
|
289
|
-
current_enrichment_shas = {
|
|
290
|
-
self._hash_string(enrichment.content)
|
|
291
|
-
for enrichment in list(current_enrichments)
|
|
292
|
-
}
|
|
293
|
-
for enrichment in domain_snippet.enrichments:
|
|
294
|
-
if self._hash_string(enrichment.content) in current_enrichment_shas:
|
|
295
|
-
continue
|
|
296
|
-
|
|
297
|
-
# If not present, delete the existing enrichment for this type if it exists
|
|
298
|
-
stmt = delete(db_entities.Enrichment).where(
|
|
299
|
-
db_entities.Enrichment.snippet_sha == db_snippet.sha,
|
|
300
|
-
db_entities.Enrichment.type
|
|
301
|
-
== db_entities.EnrichmentType(enrichment.type.value),
|
|
302
|
-
)
|
|
303
|
-
await session.execute(stmt)
|
|
304
|
-
|
|
305
|
-
db_enrichment = db_entities.Enrichment(
|
|
306
|
-
snippet_sha=db_snippet.sha,
|
|
307
|
-
type=db_entities.EnrichmentType(enrichment.type.value),
|
|
308
|
-
content=enrichment.content,
|
|
309
|
-
)
|
|
310
|
-
session.add(db_enrichment)
|
|
304
|
+
# For now, enrichments are not yet implemented with the new schema
|
|
305
|
+
# This method will need to be updated once we migrate to EnrichmentV2
|
|
311
306
|
|
|
312
307
|
async def get_snippets_for_commit(self, commit_sha: str) -> list[SnippetV2]:
|
|
313
308
|
"""Get all snippets for a specific commit."""
|
|
@@ -469,16 +464,16 @@ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
|
|
|
469
464
|
)
|
|
470
465
|
.where(db_entities.SnippetV2File.snippet_sha == db_snippet.sha)
|
|
471
466
|
)
|
|
467
|
+
db_files_list = list(db_files)
|
|
472
468
|
|
|
473
|
-
#
|
|
474
|
-
db_enrichments = await
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
)
|
|
469
|
+
# Get enrichments for this snippet
|
|
470
|
+
db_enrichments = await self._enrichment_repo.enrichments_for_entity_type(
|
|
471
|
+
entity_type="snippet_v2",
|
|
472
|
+
entity_ids=[db_snippet.sha],
|
|
478
473
|
)
|
|
479
474
|
|
|
480
475
|
return self._mapper.to_domain_snippet_v2(
|
|
481
476
|
db_snippet=db_snippet,
|
|
482
|
-
db_files=
|
|
483
|
-
db_enrichments=
|
|
477
|
+
db_files=db_files_list,
|
|
478
|
+
db_enrichments=db_enrichments,
|
|
484
479
|
)
|