kodit 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (64) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +10 -12
  3. kodit/application/factories/server_factory.py +53 -11
  4. kodit/application/services/commit_indexing_application_service.py +188 -31
  5. kodit/config.py +3 -3
  6. kodit/domain/enrichments/__init__.py +1 -0
  7. kodit/domain/enrichments/architecture/__init__.py +1 -0
  8. kodit/domain/enrichments/architecture/architecture.py +20 -0
  9. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  10. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  11. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  12. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  13. kodit/domain/enrichments/development/__init__.py +1 -0
  14. kodit/domain/enrichments/development/development.py +18 -0
  15. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  16. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  17. kodit/domain/enrichments/enricher.py +17 -0
  18. kodit/domain/enrichments/enrichment.py +39 -0
  19. kodit/domain/enrichments/request.py +12 -0
  20. kodit/domain/enrichments/response.py +11 -0
  21. kodit/domain/enrichments/usage/__init__.py +1 -0
  22. kodit/domain/enrichments/usage/api_docs.py +19 -0
  23. kodit/domain/enrichments/usage/usage.py +18 -0
  24. kodit/domain/protocols.py +7 -6
  25. kodit/domain/services/enrichment_service.py +9 -30
  26. kodit/domain/services/physical_architecture_service.py +182 -0
  27. kodit/domain/value_objects.py +6 -23
  28. kodit/infrastructure/api/v1/routers/commits.py +81 -0
  29. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  30. kodit/infrastructure/cloning/git/git_python_adaptor.py +71 -4
  31. kodit/infrastructure/enricher/__init__.py +1 -0
  32. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  33. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +20 -33
  34. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  35. kodit/infrastructure/enricher/null_enricher.py +36 -0
  36. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  37. kodit/infrastructure/mappers/snippet_mapper.py +20 -22
  38. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  39. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  40. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  41. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  42. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  43. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  44. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  45. kodit/infrastructure/slicing/slicer.py +56 -391
  46. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  47. kodit/infrastructure/sqlalchemy/entities.py +46 -38
  48. kodit/infrastructure/sqlalchemy/git_branch_repository.py +22 -11
  49. kodit/infrastructure/sqlalchemy/git_commit_repository.py +23 -14
  50. kodit/infrastructure/sqlalchemy/git_repository.py +27 -17
  51. kodit/infrastructure/sqlalchemy/git_tag_repository.py +22 -11
  52. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +101 -106
  53. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  54. kodit/utils/dump_config.py +361 -0
  55. kodit/utils/dump_openapi.py +5 -6
  56. {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/METADATA +1 -1
  57. {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/RECORD +61 -30
  58. kodit/infrastructure/enrichment/__init__.py +0 -1
  59. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  60. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  61. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  62. {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  63. {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  64. {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -10,6 +10,7 @@ from sqlalchemy import (
10
10
  Float,
11
11
  ForeignKey,
12
12
  ForeignKeyConstraint,
13
+ Index,
13
14
  Integer,
14
15
  String,
15
16
  TypeDecorator,
@@ -483,44 +484,6 @@ class CommitSnippetV2(Base):
483
484
  self.snippet_sha = snippet_sha
484
485
 
485
486
 
486
- # Enrichment model for SnippetV2
487
-
488
-
489
- class EnrichmentType(Enum):
490
- """Enrichment type enum."""
491
-
492
- UNKNOWN = "unknown"
493
- SUMMARIZATION = "summarization"
494
-
495
-
496
- class Enrichment(Base, CommonMixin):
497
- """Enrichment model for snippet enrichments."""
498
-
499
- __tablename__ = "enrichments"
500
-
501
- snippet_sha: Mapped[str] = mapped_column(ForeignKey("snippets_v2.sha"), index=True)
502
- type: Mapped[EnrichmentType] = mapped_column(
503
- SQLAlchemyEnum(EnrichmentType), index=True
504
- )
505
- content: Mapped[str] = mapped_column(UnicodeText)
506
-
507
- __table_args__ = (
508
- UniqueConstraint("snippet_sha", "type", name="uix_snippet_enrichment"),
509
- )
510
-
511
- def __init__(
512
- self,
513
- snippet_sha: str,
514
- type: EnrichmentType, # noqa: A002
515
- content: str,
516
- ) -> None:
517
- """Initialize enrichment."""
518
- super().__init__()
519
- self.snippet_sha = snippet_sha
520
- self.type = type
521
- self.content = content
522
-
523
-
524
487
  class CommitIndex(Base):
525
488
  """Commit index model."""
526
489
 
@@ -559,3 +522,48 @@ class CommitIndex(Base):
559
522
  self.error_message = error_message
560
523
  self.files_processed = files_processed
561
524
  self.processing_time_seconds = processing_time_seconds
525
+
526
+
527
+ class EnrichmentV2(Base, CommonMixin):
528
+ """Generic enrichment entity."""
529
+
530
+ __tablename__ = "enrichments_v2"
531
+
532
+ type: Mapped[str] = mapped_column(String, nullable=False, index=True)
533
+ subtype: Mapped[str] = mapped_column(String, nullable=False, index=True)
534
+ content: Mapped[str] = mapped_column(UnicodeText, nullable=False)
535
+
536
+ __table_args__ = (Index("idx_type_subtype", "type", "subtype"),)
537
+
538
+
539
+ class EnrichmentAssociation(Base, CommonMixin):
540
+ """Polymorphic association between enrichments and entities."""
541
+
542
+ __tablename__ = "enrichment_associations"
543
+
544
+ enrichment_id: Mapped[int] = mapped_column(
545
+ ForeignKey("enrichments_v2.id", ondelete="CASCADE"),
546
+ nullable=False,
547
+ index=True,
548
+ )
549
+ entity_type: Mapped[str] = mapped_column(
550
+ String(50),
551
+ nullable=False,
552
+ index=True,
553
+ )
554
+ entity_id: Mapped[str] = mapped_column(
555
+ String(255),
556
+ nullable=False,
557
+ index=True,
558
+ )
559
+
560
+ __table_args__ = (
561
+ UniqueConstraint(
562
+ "entity_type",
563
+ "entity_id",
564
+ "enrichment_id",
565
+ name="uix_entity_enrichment",
566
+ ),
567
+ Index("idx_entity_lookup", "entity_type", "entity_id"),
568
+ {"sqlite_autoincrement": True},
569
+ )
@@ -98,17 +98,28 @@ class SqlAlchemyGitBranchRepository(GitBranchRepository):
98
98
 
99
99
  commit_shas = [branch.head_commit_sha for branch in db_branches]
100
100
 
101
- # Get all head commits for these branches
102
- commits_stmt = select(db_entities.GitCommit).where(
103
- db_entities.GitCommit.commit_sha.in_(commit_shas)
104
- )
105
- db_commits = (await session.scalars(commits_stmt)).all()
106
-
107
- # Get all files for these commits
108
- files_stmt = select(db_entities.GitCommitFile).where(
109
- db_entities.GitCommitFile.commit_sha.in_(commit_shas)
110
- )
111
- db_files = (await session.scalars(files_stmt)).all()
101
+ # Get all head commits for these branches in chunks
102
+ # to avoid parameter limits
103
+ db_commits: list[db_entities.GitCommit] = []
104
+ chunk_size = 1000
105
+ for i in range(0, len(commit_shas), chunk_size):
106
+ chunk = commit_shas[i : i + chunk_size]
107
+ commits_stmt = select(db_entities.GitCommit).where(
108
+ db_entities.GitCommit.commit_sha.in_(chunk)
109
+ )
110
+ chunk_commits = (await session.scalars(commits_stmt)).all()
111
+ db_commits.extend(chunk_commits)
112
+
113
+ # Get all files for these commits in chunks
114
+ # to avoid parameter limits
115
+ db_files: list[db_entities.GitCommitFile] = []
116
+ for i in range(0, len(commit_shas), chunk_size):
117
+ chunk = commit_shas[i : i + chunk_size]
118
+ files_stmt = select(db_entities.GitCommitFile).where(
119
+ db_entities.GitCommitFile.commit_sha.in_(chunk)
120
+ )
121
+ chunk_files = (await session.scalars(files_stmt)).all()
122
+ db_files.extend(chunk_files)
112
123
 
113
124
  # Group files by commit SHA
114
125
  from kodit.domain.entities.git import GitFile
@@ -77,11 +77,17 @@ class SqlAlchemyGitCommitRepository(GitCommitRepository):
77
77
 
78
78
  commit_shas = [commit.commit_sha for commit in db_commits]
79
79
 
80
- # Get all files for these commits
81
- files_stmt = select(db_entities.GitCommitFile).where(
82
- db_entities.GitCommitFile.commit_sha.in_(commit_shas)
83
- )
84
- db_files = (await session.scalars(files_stmt)).all()
80
+ # Get all files for these commits in chunks
81
+ # to avoid parameter limits
82
+ db_files: list[db_entities.GitCommitFile] = []
83
+ chunk_size = 1000
84
+ for i in range(0, len(commit_shas), chunk_size):
85
+ chunk = commit_shas[i : i + chunk_size]
86
+ files_stmt = select(db_entities.GitCommitFile).where(
87
+ db_entities.GitCommitFile.commit_sha.in_(chunk)
88
+ )
89
+ chunk_files = (await session.scalars(files_stmt)).all()
90
+ db_files.extend(chunk_files)
85
91
 
86
92
  # Group files by commit SHA
87
93
  files_by_commit: dict[str, list[GitFile]] = {}
@@ -149,13 +155,16 @@ class SqlAlchemyGitCommitRepository(GitCommitRepository):
149
155
  async with SqlAlchemyUnitOfWork(self.session_factory) as session:
150
156
  commit_shas = [commit.commit_sha for commit in commits]
151
157
 
152
- # Get existing commits in bulk
153
- existing_commits_stmt = select(db_entities.GitCommit.commit_sha).where(
154
- db_entities.GitCommit.commit_sha.in_(commit_shas)
155
- )
156
- existing_commit_shas = set(
157
- (await session.scalars(existing_commits_stmt)).all()
158
- )
158
+ # Get existing commits in bulk (chunked to avoid parameter limits)
159
+ existing_commit_shas: set[str] = set()
160
+ chunk_size = 1000
161
+ for i in range(0, len(commit_shas), chunk_size):
162
+ chunk = commit_shas[i : i + chunk_size]
163
+ existing_commits_stmt = select(db_entities.GitCommit.commit_sha).where(
164
+ db_entities.GitCommit.commit_sha.in_(chunk)
165
+ )
166
+ chunk_existing = (await session.scalars(existing_commits_stmt)).all()
167
+ existing_commit_shas.update(chunk_existing)
159
168
 
160
169
  # Prepare new commits for bulk insert
161
170
  new_commits_data = []
@@ -176,8 +185,8 @@ class SqlAlchemyGitCommitRepository(GitCommitRepository):
176
185
  if new_commits_data:
177
186
  chunk_size = 1000 # Conservative chunk size for parameter limits
178
187
  for i in range(0, len(new_commits_data), chunk_size):
179
- chunk = new_commits_data[i : i + chunk_size]
180
- stmt = insert(db_entities.GitCommit).values(chunk)
188
+ data_chunk = new_commits_data[i : i + chunk_size]
189
+ stmt = insert(db_entities.GitCommit).values(data_chunk)
181
190
  await session.execute(stmt)
182
191
 
183
192
  # Bulk save files for new commits
@@ -219,28 +219,38 @@ class SqlAlchemyGitRepoRepository(GitRepoRepository):
219
219
  if db_tracking_branch_entity:
220
220
  referenced_commit_shas.add(db_tracking_branch_entity.head_commit_sha)
221
221
 
222
- # Load only the referenced commits
222
+ # Load only the referenced commits in chunks to avoid parameter limits
223
223
  referenced_commits = []
224
224
  referenced_files = []
225
225
  if referenced_commit_shas:
226
- referenced_commits = list(
227
- (
228
- await session.scalars(
229
- select(db_entities.GitCommit).where(
230
- db_entities.GitCommit.commit_sha.in_(referenced_commit_shas)
226
+ commit_shas_list = list(referenced_commit_shas)
227
+ chunk_size = 1000
228
+
229
+ for i in range(0, len(commit_shas_list), chunk_size):
230
+ chunk = commit_shas_list[i : i + chunk_size]
231
+ chunk_commits = list(
232
+ (
233
+ await session.scalars(
234
+ select(db_entities.GitCommit).where(
235
+ db_entities.GitCommit.commit_sha.in_(chunk)
236
+ )
231
237
  )
232
- )
233
- ).all()
234
- )
235
- referenced_files = list(
236
- (
237
- await session.scalars(
238
- select(db_entities.GitCommitFile).where(
239
- db_entities.GitCommitFile.commit_sha.in_(referenced_commit_shas)
238
+ ).all()
239
+ )
240
+ referenced_commits.extend(chunk_commits)
241
+
242
+ for i in range(0, len(commit_shas_list), chunk_size):
243
+ chunk = commit_shas_list[i : i + chunk_size]
244
+ chunk_files = list(
245
+ (
246
+ await session.scalars(
247
+ select(db_entities.GitCommitFile).where(
248
+ db_entities.GitCommitFile.commit_sha.in_(chunk)
249
+ )
240
250
  )
241
- )
242
- ).all()
243
- )
251
+ ).all()
252
+ )
253
+ referenced_files.extend(chunk_files)
244
254
 
245
255
  return self._mapper.to_domain_git_repo(
246
256
  db_repo=db_repo,
@@ -96,17 +96,28 @@ class SqlAlchemyGitTagRepository(GitTagRepository):
96
96
 
97
97
  commit_shas = [tag.target_commit_sha for tag in db_tags]
98
98
 
99
- # Get all target commits for these tags
100
- commits_stmt = select(db_entities.GitCommit).where(
101
- db_entities.GitCommit.commit_sha.in_(commit_shas)
102
- )
103
- db_commits = (await session.scalars(commits_stmt)).all()
104
-
105
- # Get all files for these commits
106
- files_stmt = select(db_entities.GitCommitFile).where(
107
- db_entities.GitCommitFile.commit_sha.in_(commit_shas)
108
- )
109
- db_files = (await session.scalars(files_stmt)).all()
99
+ # Get all target commits for these tags in chunks
100
+ # to avoid parameter limits
101
+ db_commits: list[db_entities.GitCommit] = []
102
+ chunk_size = 1000
103
+ for i in range(0, len(commit_shas), chunk_size):
104
+ chunk = commit_shas[i : i + chunk_size]
105
+ commits_stmt = select(db_entities.GitCommit).where(
106
+ db_entities.GitCommit.commit_sha.in_(chunk)
107
+ )
108
+ chunk_commits = (await session.scalars(commits_stmt)).all()
109
+ db_commits.extend(chunk_commits)
110
+
111
+ # Get all files for these commits in chunks
112
+ # to avoid parameter limits
113
+ db_files: list[db_entities.GitCommitFile] = []
114
+ for i in range(0, len(commit_shas), chunk_size):
115
+ chunk = commit_shas[i : i + chunk_size]
116
+ files_stmt = select(db_entities.GitCommitFile).where(
117
+ db_entities.GitCommitFile.commit_sha.in_(chunk)
118
+ )
119
+ chunk_files = (await session.scalars(files_stmt)).all()
120
+ db_files.extend(chunk_files)
110
121
 
111
122
  # Group files by commit SHA
112
123
  files_by_commit: dict[str, list[GitFile]] = {}
@@ -2,18 +2,36 @@
2
2
 
3
3
  import zlib
4
4
  from collections.abc import Callable
5
+ from datetime import datetime
6
+ from typing import TypedDict
5
7
 
6
8
  from sqlalchemy import delete, insert, select
7
9
  from sqlalchemy.ext.asyncio import AsyncSession
8
10
 
11
+ from kodit.domain.enrichments.development.snippet.snippet import SnippetEnrichment
9
12
  from kodit.domain.entities.git import SnippetV2
10
13
  from kodit.domain.protocols import SnippetRepositoryV2
11
14
  from kodit.domain.value_objects import MultiSearchRequest
12
15
  from kodit.infrastructure.mappers.snippet_mapper import SnippetMapper
13
16
  from kodit.infrastructure.sqlalchemy import entities as db_entities
17
+ from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
18
+ EnrichmentV2Repository,
19
+ )
14
20
  from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
15
21
 
16
22
 
23
+ class _GitFileData(TypedDict):
24
+ """Type for GitCommitFile creation data."""
25
+
26
+ commit_sha: str
27
+ path: str
28
+ blob_sha: str
29
+ mime_type: str
30
+ size: int
31
+ extension: str
32
+ created_at: datetime
33
+
34
+
17
35
  def create_snippet_v2_repository(
18
36
  session_factory: Callable[[], AsyncSession],
19
37
  ) -> SnippetRepositoryV2:
@@ -27,6 +45,7 @@ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
27
45
  def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
28
46
  """Initialize the repository."""
29
47
  self.session_factory = session_factory
48
+ self._enrichment_repo = EnrichmentV2Repository(session_factory)
30
49
 
31
50
  @property
32
51
  def _mapper(self) -> SnippetMapper:
@@ -112,10 +131,15 @@ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
112
131
  stmt = insert(db_entities.CommitSnippetV2).values(chunk)
113
132
  await session.execute(stmt)
114
133
 
115
- async def _bulk_create_file_associations(
134
+ async def _bulk_create_file_associations( # noqa: C901
116
135
  self, session: AsyncSession, commit_sha: str, snippets: list[SnippetV2]
117
136
  ) -> None:
118
- """Bulk create snippet-file associations."""
137
+ """Bulk create snippet-file associations.
138
+
139
+ Creates SnippetV2File records linking snippets to GitCommitFile records.
140
+ If a GitCommitFile doesn't exist, it creates it automatically to prevent
141
+ losing file associations during enrichment cycles.
142
+ """
119
143
  # Collect all file paths from all snippets
120
144
  file_paths = set()
121
145
  for snippet in snippets:
@@ -150,18 +174,55 @@ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
150
174
  existing_snippet_files = set(await session.execute(existing_snippet_files_stmt))
151
175
 
152
176
  # Prepare new file associations
153
- new_file_associations = []
177
+ new_file_associations: list[dict[str, str]] = []
178
+ missing_git_files: list[_GitFileData] = []
179
+
154
180
  for snippet in snippets:
155
181
  for file in snippet.derives_from:
156
182
  association_key = (snippet.sha, file.path)
157
- if (association_key not in existing_snippet_files
158
- and file.path in existing_files_map):
159
- new_file_associations.append({
160
- "snippet_sha": snippet.sha,
161
- "blob_sha": existing_files_map[file.path],
162
- "commit_sha": commit_sha,
163
- "file_path": file.path,
164
- })
183
+ if association_key not in existing_snippet_files:
184
+ if file.path in existing_files_map:
185
+ # GitCommitFile exists, use its blob_sha
186
+ new_file_associations.append({
187
+ "snippet_sha": snippet.sha,
188
+ "blob_sha": existing_files_map[file.path],
189
+ "commit_sha": commit_sha,
190
+ "file_path": file.path,
191
+ })
192
+ else:
193
+ # GitCommitFile doesn't exist - create it and the association
194
+ missing_git_files.append({
195
+ "commit_sha": commit_sha,
196
+ "path": file.path,
197
+ "blob_sha": file.blob_sha,
198
+ "mime_type": file.mime_type,
199
+ "size": file.size,
200
+ "extension": file.extension,
201
+ "created_at": file.created_at,
202
+ })
203
+ new_file_associations.append({
204
+ "snippet_sha": snippet.sha,
205
+ "blob_sha": file.blob_sha,
206
+ "commit_sha": commit_sha,
207
+ "file_path": file.path,
208
+ })
209
+ # Add to map so subsequent snippets can find it
210
+ existing_files_map[file.path] = file.blob_sha
211
+
212
+ # Create missing GitCommitFile records
213
+ if missing_git_files:
214
+ for git_file_data in missing_git_files:
215
+ git_file = db_entities.GitCommitFile(
216
+ commit_sha=git_file_data["commit_sha"],
217
+ path=git_file_data["path"],
218
+ blob_sha=git_file_data["blob_sha"],
219
+ mime_type=git_file_data["mime_type"],
220
+ size=git_file_data["size"],
221
+ extension=git_file_data["extension"],
222
+ created_at=git_file_data["created_at"],
223
+ )
224
+ session.add(git_file)
225
+ await session.flush()
165
226
 
166
227
  # Bulk insert new file associations in chunks to avoid parameter limits
167
228
  if new_file_associations:
@@ -172,70 +233,29 @@ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
172
233
  await session.execute(stmt)
173
234
 
174
235
  async def _bulk_update_enrichments(
175
- self, session: AsyncSession, snippets: list[SnippetV2]
236
+ self, session: AsyncSession, snippets: list[SnippetV2] # noqa: ARG002
176
237
  ) -> None:
177
- """Bulk update enrichments for snippets."""
178
- snippet_shas = [snippet.sha for snippet in snippets]
179
-
180
- # Get all existing enrichments for these snippets
181
- existing_enrichments_stmt = select(
182
- db_entities.Enrichment.snippet_sha,
183
- db_entities.Enrichment.type,
184
- db_entities.Enrichment.content
185
- ).where(
186
- db_entities.Enrichment.snippet_sha.in_(snippet_shas)
187
- )
188
- existing_enrichments = await session.execute(existing_enrichments_stmt)
189
-
190
- # Create lookup for existing enrichment hashes
191
- existing_enrichment_map = {}
192
- for snippet_sha, enrichment_type, content in existing_enrichments:
193
- content_hash = self._hash_string(content)
194
- key = (snippet_sha, enrichment_type)
195
- existing_enrichment_map[key] = content_hash
196
-
197
- # Collect enrichments to delete and add
198
- enrichments_to_delete = []
199
- enrichments_to_add = []
238
+ """Bulk update enrichments for snippets using new enrichment_v2."""
239
+ # Collect all enrichments from snippets using list comprehension
240
+ snippet_enrichments = [
241
+ SnippetEnrichment(
242
+ entity_id=snippet.sha,
243
+ content=enrichment.content,
244
+ )
245
+ for snippet in snippets
246
+ for enrichment in snippet.enrichments
247
+ ]
200
248
 
201
- for snippet in snippets:
202
- for enrichment in snippet.enrichments:
203
- key = (snippet.sha, db_entities.EnrichmentType(enrichment.type.value))
204
- new_hash = self._hash_string(enrichment.content)
205
-
206
- if key in existing_enrichment_map:
207
- if existing_enrichment_map[key] != new_hash:
208
- # Content changed, mark for deletion and re-addition
209
- enrichments_to_delete.append(key)
210
- enrichments_to_add.append({
211
- "snippet_sha": snippet.sha,
212
- "type": db_entities.EnrichmentType(enrichment.type.value),
213
- "content": enrichment.content,
214
- })
215
- else:
216
- # New enrichment
217
- enrichments_to_add.append({
218
- "snippet_sha": snippet.sha,
219
- "type": db_entities.EnrichmentType(enrichment.type.value),
220
- "content": enrichment.content,
221
- })
222
-
223
- # Bulk delete changed enrichments
224
- if enrichments_to_delete:
225
- for snippet_sha, enrichment_type in enrichments_to_delete:
226
- stmt = delete(db_entities.Enrichment).where(
227
- db_entities.Enrichment.snippet_sha == snippet_sha,
228
- db_entities.Enrichment.type == enrichment_type,
229
- )
230
- await session.execute(stmt)
249
+ if snippet_enrichments:
250
+ # First delete existing enrichments for these snippets
251
+ snippet_shas = [snippet.sha for snippet in snippets]
252
+ await self._enrichment_repo.bulk_delete_enrichments(
253
+ entity_type="snippet_v2",
254
+ entity_ids=snippet_shas,
255
+ )
231
256
 
232
- # Bulk insert new/updated enrichments in chunks to avoid parameter limits
233
- if enrichments_to_add:
234
- chunk_size = 1000 # Conservative chunk size for parameter limits
235
- for i in range(0, len(enrichments_to_add), chunk_size):
236
- chunk = enrichments_to_add[i : i + chunk_size]
237
- insert_stmt = insert(db_entities.Enrichment).values(chunk)
238
- await session.execute(insert_stmt)
257
+ # Then save the new enrichments
258
+ await self._enrichment_repo.bulk_save_enrichments(snippet_enrichments)
239
259
 
240
260
  async def _get_or_create_raw_snippet(
241
261
  self, session: AsyncSession, commit_sha: str, domain_snippet: SnippetV2
@@ -281,33 +301,8 @@ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
281
301
  domain_snippet: SnippetV2,
282
302
  ) -> None:
283
303
  """Update enrichments if they have changed."""
284
- current_enrichments = await session.scalars(
285
- select(db_entities.Enrichment).where(
286
- db_entities.Enrichment.snippet_sha == db_snippet.sha
287
- )
288
- )
289
- current_enrichment_shas = {
290
- self._hash_string(enrichment.content)
291
- for enrichment in list(current_enrichments)
292
- }
293
- for enrichment in domain_snippet.enrichments:
294
- if self._hash_string(enrichment.content) in current_enrichment_shas:
295
- continue
296
-
297
- # If not present, delete the existing enrichment for this type if it exists
298
- stmt = delete(db_entities.Enrichment).where(
299
- db_entities.Enrichment.snippet_sha == db_snippet.sha,
300
- db_entities.Enrichment.type
301
- == db_entities.EnrichmentType(enrichment.type.value),
302
- )
303
- await session.execute(stmt)
304
-
305
- db_enrichment = db_entities.Enrichment(
306
- snippet_sha=db_snippet.sha,
307
- type=db_entities.EnrichmentType(enrichment.type.value),
308
- content=enrichment.content,
309
- )
310
- session.add(db_enrichment)
304
+ # For now, enrichments are not yet implemented with the new schema
305
+ # This method will need to be updated once we migrate to EnrichmentV2
311
306
 
312
307
  async def get_snippets_for_commit(self, commit_sha: str) -> list[SnippetV2]:
313
308
  """Get all snippets for a specific commit."""
@@ -469,16 +464,16 @@ class SqlAlchemySnippetRepositoryV2(SnippetRepositoryV2):
469
464
  )
470
465
  .where(db_entities.SnippetV2File.snippet_sha == db_snippet.sha)
471
466
  )
467
+ db_files_list = list(db_files)
472
468
 
473
- # Enrichments related to this snippet
474
- db_enrichments = await session.scalars(
475
- select(db_entities.Enrichment).where(
476
- db_entities.Enrichment.snippet_sha == db_snippet.sha
477
- )
469
+ # Get enrichments for this snippet
470
+ db_enrichments = await self._enrichment_repo.enrichments_for_entity_type(
471
+ entity_type="snippet_v2",
472
+ entity_ids=[db_snippet.sha],
478
473
  )
479
474
 
480
475
  return self._mapper.to_domain_snippet_v2(
481
476
  db_snippet=db_snippet,
482
- db_files=list(db_files),
483
- db_enrichments=list(db_enrichments),
477
+ db_files=db_files_list,
478
+ db_enrichments=db_enrichments,
484
479
  )