kodit 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (70) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +10 -12
  3. kodit/application/factories/server_factory.py +78 -11
  4. kodit/application/services/commit_indexing_application_service.py +188 -31
  5. kodit/application/services/enrichment_query_service.py +95 -0
  6. kodit/config.py +3 -3
  7. kodit/domain/enrichments/__init__.py +1 -0
  8. kodit/domain/enrichments/architecture/__init__.py +1 -0
  9. kodit/domain/enrichments/architecture/architecture.py +20 -0
  10. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  11. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  12. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  13. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  14. kodit/domain/enrichments/development/__init__.py +1 -0
  15. kodit/domain/enrichments/development/development.py +18 -0
  16. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  17. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  18. kodit/domain/enrichments/enricher.py +17 -0
  19. kodit/domain/enrichments/enrichment.py +39 -0
  20. kodit/domain/enrichments/request.py +12 -0
  21. kodit/domain/enrichments/response.py +11 -0
  22. kodit/domain/enrichments/usage/__init__.py +1 -0
  23. kodit/domain/enrichments/usage/api_docs.py +19 -0
  24. kodit/domain/enrichments/usage/usage.py +18 -0
  25. kodit/domain/protocols.py +7 -6
  26. kodit/domain/services/enrichment_service.py +9 -30
  27. kodit/domain/services/physical_architecture_service.py +182 -0
  28. kodit/domain/tracking/__init__.py +1 -0
  29. kodit/domain/tracking/resolution_service.py +81 -0
  30. kodit/domain/tracking/trackable.py +21 -0
  31. kodit/domain/value_objects.py +6 -23
  32. kodit/infrastructure/api/v1/dependencies.py +15 -0
  33. kodit/infrastructure/api/v1/routers/commits.py +81 -0
  34. kodit/infrastructure/api/v1/routers/repositories.py +99 -0
  35. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  36. kodit/infrastructure/cloning/git/git_python_adaptor.py +71 -4
  37. kodit/infrastructure/enricher/__init__.py +1 -0
  38. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  39. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +20 -33
  40. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  41. kodit/infrastructure/enricher/null_enricher.py +36 -0
  42. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  43. kodit/infrastructure/mappers/snippet_mapper.py +20 -22
  44. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  45. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  46. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  47. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  48. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  49. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  50. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  51. kodit/infrastructure/slicing/slicer.py +56 -391
  52. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  53. kodit/infrastructure/sqlalchemy/entities.py +46 -38
  54. kodit/infrastructure/sqlalchemy/git_branch_repository.py +22 -11
  55. kodit/infrastructure/sqlalchemy/git_commit_repository.py +23 -14
  56. kodit/infrastructure/sqlalchemy/git_repository.py +27 -17
  57. kodit/infrastructure/sqlalchemy/git_tag_repository.py +22 -11
  58. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +101 -106
  59. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  60. kodit/utils/dump_config.py +361 -0
  61. kodit/utils/dump_openapi.py +5 -6
  62. {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/METADATA +1 -1
  63. {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/RECORD +67 -32
  64. kodit/infrastructure/enrichment/__init__.py +0 -1
  65. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  66. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  67. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  68. {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/WHEEL +0 -0
  69. {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/entry_points.txt +0 -0
  70. {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.5.0'
32
- __version_tuple__ = version_tuple = (0, 5, 0)
31
+ __version__ = version = '0.5.2'
32
+ __version_tuple__ = version_tuple = (0, 5, 2)
33
33
 
34
34
  __commit_id__ = commit_id = None
kodit/app.py CHANGED
@@ -14,12 +14,8 @@ from kodit.application.factories.server_factory import ServerFactory
14
14
  from kodit.application.services.indexing_worker_service import IndexingWorkerService
15
15
  from kodit.application.services.sync_scheduler import SyncSchedulerService
16
16
  from kodit.config import AppContext
17
- from kodit.domain.value_objects import (
18
- Document,
19
- EnrichmentIndexRequest,
20
- EnrichmentRequest,
21
- IndexRequest,
22
- )
17
+ from kodit.domain.enrichments.request import EnrichmentRequest
18
+ from kodit.domain.value_objects import Document, IndexRequest
23
19
  from kodit.infrastructure.api.v1.routers.commits import router as commits_router
24
20
  from kodit.infrastructure.api.v1.routers.queue import router as queue_router
25
21
  from kodit.infrastructure.api.v1.routers.repositories import (
@@ -71,12 +67,14 @@ async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
71
67
  raise ValueError("Embedding service is not accessible") from e
72
68
  try:
73
69
  await anext(
74
- _server_factory.enrichment_service().enrich_documents(
75
- EnrichmentIndexRequest(
76
- requests=[
77
- EnrichmentRequest(snippet_id="1", text="def hello(): pass")
78
- ]
79
- )
70
+ _server_factory.enricher().enrich(
71
+ [
72
+ EnrichmentRequest(
73
+ id="1",
74
+ text="def hello(): pass",
75
+ system_prompt="Explain this code",
76
+ )
77
+ ]
80
78
  )
81
79
  )
82
80
  except Exception as e:
@@ -1,6 +1,7 @@
1
1
  """Create a big object that contains all the application services."""
2
2
 
3
3
  from collections.abc import Callable
4
+ from typing import TYPE_CHECKING
4
5
 
5
6
  from sqlalchemy.ext.asyncio import AsyncSession
6
7
 
@@ -11,10 +12,17 @@ from kodit.application.services.code_search_application_service import (
11
12
  from kodit.application.services.commit_indexing_application_service import (
12
13
  CommitIndexingApplicationService,
13
14
  )
15
+ from kodit.application.services.enrichment_query_service import (
16
+ EnrichmentQueryService,
17
+ )
14
18
  from kodit.application.services.queue_service import QueueService
15
19
  from kodit.application.services.reporting import ProgressTracker
16
20
  from kodit.application.services.sync_scheduler import SyncSchedulerService
17
21
  from kodit.config import AppContext
22
+ from kodit.domain.enrichments.architecture.physical.formatter import (
23
+ PhysicalArchitectureFormatter,
24
+ )
25
+ from kodit.domain.enrichments.enricher import Enricher
18
26
  from kodit.domain.protocols import (
19
27
  FusionService,
20
28
  GitAdapter,
@@ -27,11 +35,14 @@ from kodit.domain.protocols import (
27
35
  )
28
36
  from kodit.domain.services.bm25_service import BM25DomainService, BM25Repository
29
37
  from kodit.domain.services.embedding_service import EmbeddingDomainService
30
- from kodit.domain.services.enrichment_service import EnrichmentDomainService
31
38
  from kodit.domain.services.git_repository_service import (
32
39
  GitRepositoryScanner,
33
40
  RepositoryCloner,
34
41
  )
42
+ from kodit.domain.services.physical_architecture_service import (
43
+ PhysicalArchitectureService,
44
+ )
45
+ from kodit.domain.tracking.resolution_service import TrackableResolutionService
35
46
  from kodit.infrastructure.bm25.local_bm25_repository import LocalBM25Repository
36
47
  from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
37
48
  VectorChordBM25Repository,
@@ -40,17 +51,23 @@ from kodit.infrastructure.cloning.git.git_python_adaptor import GitPythonAdapter
40
51
  from kodit.infrastructure.embedding.embedding_factory import (
41
52
  embedding_domain_service_factory,
42
53
  )
43
- from kodit.infrastructure.enrichment.enrichment_factory import (
44
- enrichment_domain_service_factory,
54
+ from kodit.infrastructure.enricher.enricher_factory import (
55
+ enricher_domain_service_factory,
45
56
  )
46
57
 
47
58
  # InMemoryGitTagRepository removed - now handled by InMemoryGitRepoRepository
48
59
  from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
60
+ from kodit.infrastructure.physical_architecture.formatters.narrative_formatter import (
61
+ NarrativeFormatter,
62
+ )
49
63
  from kodit.infrastructure.slicing.slicer import Slicer
50
64
  from kodit.infrastructure.sqlalchemy.embedding_repository import (
51
65
  SqlAlchemyEmbeddingRepository,
52
66
  create_embedding_repository,
53
67
  )
68
+ from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
69
+ EnrichmentV2Repository,
70
+ )
54
71
  from kodit.infrastructure.sqlalchemy.git_branch_repository import (
55
72
  create_git_branch_repository,
56
73
  )
@@ -69,6 +86,9 @@ from kodit.infrastructure.sqlalchemy.task_status_repository import (
69
86
  )
70
87
  from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
71
88
 
89
+ if TYPE_CHECKING:
90
+ from kodit.domain.services.enrichment_service import EnrichmentDomainService
91
+
72
92
 
73
93
  class ServerFactory:
74
94
  """Factory for creating server application services."""
@@ -90,6 +110,7 @@ class ServerFactory:
90
110
  CommitIndexingApplicationService | None
91
111
  ) = None
92
112
  self._enrichment_service: EnrichmentDomainService | None = None
113
+ self._enricher_service: Enricher | None = None
93
114
  self._task_status_repository: TaskStatusRepository | None = None
94
115
  self._operation: ProgressTracker | None = None
95
116
  self._queue_service: QueueService | None = None
@@ -107,6 +128,33 @@ class ServerFactory:
107
128
  self._git_commit_repository: GitCommitRepository | None = None
108
129
  self._git_branch_repository: GitBranchRepository | None = None
109
130
  self._git_tag_repository: GitTagRepository | None = None
131
+ self._architecture_service: PhysicalArchitectureService | None = None
132
+ self._enrichment_v2_repository: EnrichmentV2Repository | None = None
133
+ self._architecture_formatter: PhysicalArchitectureFormatter | None = None
134
+ self._trackable_resolution_service: TrackableResolutionService | None = None
135
+ self._enrichment_query_service: EnrichmentQueryService | None = None
136
+
137
+ def architecture_formatter(self) -> PhysicalArchitectureFormatter:
138
+ """Create a PhysicalArchitectureFormatter instance."""
139
+ if not self._architecture_formatter:
140
+ self._architecture_formatter = NarrativeFormatter()
141
+ return self._architecture_formatter
142
+
143
+ def architecture_service(self) -> PhysicalArchitectureService:
144
+ """Create a PhysicalArchitectureService instance."""
145
+ if not self._architecture_service:
146
+ self._architecture_service = PhysicalArchitectureService(
147
+ formatter=self.architecture_formatter()
148
+ )
149
+ return self._architecture_service
150
+
151
+ def enrichment_v2_repository(self) -> EnrichmentV2Repository:
152
+ """Create a EnrichmentV2Repository instance."""
153
+ if not self._enrichment_v2_repository:
154
+ self._enrichment_v2_repository = EnrichmentV2Repository(
155
+ session_factory=self.session_factory
156
+ )
157
+ return self._enrichment_v2_repository
110
158
 
111
159
  def queue_service(self) -> QueueService:
112
160
  """Create a QueueService instance."""
@@ -190,8 +238,10 @@ class ServerFactory:
190
238
  bm25_service=self.bm25_service(),
191
239
  code_search_service=self.code_search_service(),
192
240
  text_search_service=self.text_search_service(),
193
- enrichment_service=self.enrichment_service(),
194
241
  embedding_repository=self.embedding_repository(),
242
+ architecture_service=self.architecture_service(),
243
+ enrichment_v2_repository=self.enrichment_v2_repository(),
244
+ enricher_service=self.enricher(),
195
245
  )
196
246
  )
197
247
 
@@ -242,13 +292,11 @@ class ServerFactory:
242
292
  )
243
293
  return self._snippet_v2_repository
244
294
 
245
- def enrichment_service(self) -> EnrichmentDomainService:
246
- """Create a EnrichmentDomainService instance."""
247
- if not self._enrichment_service:
248
- self._enrichment_service = enrichment_domain_service_factory(
249
- self.app_context
250
- )
251
- return self._enrichment_service
295
+ def enricher(self) -> Enricher:
296
+ """Create a EnricherDomainService instance."""
297
+ if not self._enricher_service:
298
+ self._enricher_service = enricher_domain_service_factory(self.app_context)
299
+ return self._enricher_service
252
300
 
253
301
  def sync_scheduler_service(self) -> SyncSchedulerService:
254
302
  """Create a SyncSchedulerService instance."""
@@ -309,3 +357,22 @@ class ServerFactory:
309
357
  session_factory=self.session_factory
310
358
  )
311
359
  return self._git_tag_repository
360
+
361
+ def trackable_resolution_service(self) -> TrackableResolutionService:
362
+ """Create a TrackableResolutionService instance."""
363
+ if not self._trackable_resolution_service:
364
+ self._trackable_resolution_service = TrackableResolutionService(
365
+ commit_repo=self.git_commit_repository(),
366
+ branch_repo=self.git_branch_repository(),
367
+ tag_repo=self.git_tag_repository(),
368
+ )
369
+ return self._trackable_resolution_service
370
+
371
+ def enrichment_query_service(self) -> EnrichmentQueryService:
372
+ """Create a EnrichmentQueryService instance."""
373
+ if not self._enrichment_query_service:
374
+ self._enrichment_query_service = EnrichmentQueryService(
375
+ trackable_resolution=self.trackable_resolution_service(),
376
+ enrichment_repo=self.enrichment_v2_repository(),
377
+ )
378
+ return self._enrichment_query_service
@@ -8,6 +8,15 @@ from pydantic import AnyUrl
8
8
 
9
9
  from kodit.application.services.queue_service import QueueService
10
10
  from kodit.application.services.reporting import ProgressTracker
11
+ from kodit.domain.enrichments.architecture.physical.physical import (
12
+ PhysicalArchitectureEnrichment,
13
+ )
14
+ from kodit.domain.enrichments.enricher import Enricher
15
+ from kodit.domain.enrichments.request import (
16
+ EnrichmentRequest as GenericEnrichmentRequest,
17
+ )
18
+ from kodit.domain.enrichments.usage.api_docs import ENRICHMENT_SUBTYPE_API_DOCS
19
+ from kodit.domain.enrichments.usage.usage import ENRICHMENT_TYPE_USAGE
11
20
  from kodit.domain.entities import Task
12
21
  from kodit.domain.entities.git import GitFile, GitRepo, SnippetV2
13
22
  from kodit.domain.factories.git_repo_factory import GitRepoFactory
@@ -20,17 +29,19 @@ from kodit.domain.protocols import (
20
29
  )
21
30
  from kodit.domain.services.bm25_service import BM25DomainService
22
31
  from kodit.domain.services.embedding_service import EmbeddingDomainService
23
- from kodit.domain.services.enrichment_service import EnrichmentDomainService
24
32
  from kodit.domain.services.git_repository_service import (
25
33
  GitRepositoryScanner,
26
34
  RepositoryCloner,
27
35
  )
36
+ from kodit.domain.services.physical_architecture_service import (
37
+ ARCHITECTURE_ENRICHMENT_SYSTEM_PROMPT,
38
+ ARCHITECTURE_ENRICHMENT_TASK_PROMPT,
39
+ PhysicalArchitectureService,
40
+ )
28
41
  from kodit.domain.value_objects import (
29
42
  DeleteRequest,
30
43
  Document,
31
44
  Enrichment,
32
- EnrichmentIndexRequest,
33
- EnrichmentRequest,
34
45
  EnrichmentType,
35
46
  IndexRequest,
36
47
  LanguageMapping,
@@ -39,12 +50,21 @@ from kodit.domain.value_objects import (
39
50
  TaskOperation,
40
51
  TrackableType,
41
52
  )
53
+ from kodit.infrastructure.slicing.api_doc_extractor import APIDocExtractor
42
54
  from kodit.infrastructure.slicing.slicer import Slicer
43
55
  from kodit.infrastructure.sqlalchemy.embedding_repository import (
44
56
  SqlAlchemyEmbeddingRepository,
45
57
  )
58
+ from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
59
+ EnrichmentV2Repository,
60
+ )
46
61
  from kodit.infrastructure.sqlalchemy.entities import EmbeddingType
47
62
 
63
+ SUMMARIZATION_SYSTEM_PROMPT = """
64
+ You are a professional software developer. You will be given a snippet of code.
65
+ Please provide a concise explanation of the code.
66
+ """
67
+
48
68
 
49
69
  class CommitIndexingApplicationService:
50
70
  """Application service for commit indexing operations."""
@@ -65,8 +85,10 @@ class CommitIndexingApplicationService:
65
85
  bm25_service: BM25DomainService,
66
86
  code_search_service: EmbeddingDomainService,
67
87
  text_search_service: EmbeddingDomainService,
68
- enrichment_service: EnrichmentDomainService,
69
88
  embedding_repository: SqlAlchemyEmbeddingRepository,
89
+ architecture_service: PhysicalArchitectureService,
90
+ enrichment_v2_repository: EnrichmentV2Repository,
91
+ enricher_service: Enricher,
70
92
  ) -> None:
71
93
  """Initialize the commit indexing application service.
72
94
 
@@ -92,8 +114,10 @@ class CommitIndexingApplicationService:
92
114
  self.bm25_service = bm25_service
93
115
  self.code_search_service = code_search_service
94
116
  self.text_search_service = text_search_service
95
- self.enrichment_service = enrichment_service
96
117
  self.embedding_repository = embedding_repository
118
+ self.architecture_service = architecture_service
119
+ self.enrichment_v2_repository = enrichment_v2_repository
120
+ self.enricher_service = enricher_service
97
121
  self._log = structlog.get_logger(__name__)
98
122
 
99
123
  async def create_git_repository(self, remote_uri: AnyUrl) -> GitRepo:
@@ -153,6 +177,10 @@ class CommitIndexingApplicationService:
153
177
  await self.process_enrich(repository_id, commit_sha)
154
178
  elif task.type == TaskOperation.CREATE_SUMMARY_EMBEDDINGS_FOR_COMMIT:
155
179
  await self.process_summary_embeddings(repository_id, commit_sha)
180
+ elif task.type == TaskOperation.CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT:
181
+ await self.process_architecture_discovery(repository_id, commit_sha)
182
+ elif task.type == TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT:
183
+ await self.process_api_docs(repository_id, commit_sha)
156
184
  else:
157
185
  raise ValueError(f"Unknown task type: {task.type}")
158
186
  else:
@@ -245,16 +273,14 @@ class CommitIndexingApplicationService:
245
273
  snippets = await self.snippet_repository.get_snippets_for_commit(
246
274
  commit_sha
247
275
  )
248
- all_snippet_ids.extend([
249
- snippet.id for snippet in snippets if snippet.id
250
- ])
276
+ all_snippet_ids.extend(
277
+ [snippet.id for snippet in snippets if snippet.id]
278
+ )
251
279
 
252
280
  # Step 2: Delete from BM25 and embedding indices
253
281
  if all_snippet_ids:
254
282
  # Convert to strings as DeleteRequest expects list[str]
255
- snippet_id_strings = [
256
- str(snippet_id) for snippet_id in all_snippet_ids
257
- ]
283
+ snippet_id_strings = [str(snippet_id) for snippet_id in all_snippet_ids]
258
284
  delete_request = DeleteRequest(snippet_ids=snippet_id_strings)
259
285
  await self.bm25_service.delete_documents(delete_request)
260
286
 
@@ -264,20 +290,27 @@ class CommitIndexingApplicationService:
264
290
  snippet_id
265
291
  )
266
292
 
267
- # Step 3: Delete snippet associations for all commits
293
+ # Step 3: Delete enrichments for all commits
294
+ if commit_shas:
295
+ await self.enrichment_v2_repository.bulk_delete_enrichments(
296
+ entity_type="git_commit",
297
+ entity_ids=commit_shas,
298
+ )
299
+
300
+ # Step 4: Delete snippet associations for all commits
268
301
  for commit_sha in commit_shas:
269
302
  await self.snippet_repository.delete_snippets_for_commit(commit_sha)
270
303
 
271
- # Step 4: Delete branches (they reference commits via head_commit_sha)
304
+ # Step 5: Delete branches (they reference commits via head_commit_sha)
272
305
  await self.git_branch_repository.delete_by_repo_id(repository_id)
273
306
 
274
- # Step 5: Delete tags (they reference commits via target_commit_sha)
307
+ # Step 6: Delete tags (they reference commits via target_commit_sha)
275
308
  await self.git_tag_repository.delete_by_repo_id(repository_id)
276
309
 
277
- # Step 6: Delete commits and their files
310
+ # Step 7: Delete commits and their files
278
311
  await self.git_commit_repository.delete_by_repo_id(repository_id)
279
312
 
280
- # Step 7: Finally delete the repository
313
+ # Step 8: Finally delete the repository
281
314
  await self.repo_repository.delete(repo.sanitized_remote_uri)
282
315
 
283
316
  async def process_snippets_for_commit(
@@ -302,11 +335,7 @@ class CommitIndexingApplicationService:
302
335
  if not repo.cloned_path:
303
336
  raise ValueError(f"Repository {repository_id} has never been cloned")
304
337
 
305
- # Ensure we're on the specific commit for file access
306
- await self.scanner.git_adapter.checkout_commit(repo.cloned_path, commit_sha)
307
-
308
- # Get files directly from Git adapter for this specific commit
309
- files_data = await self.scanner.git_adapter.get_commit_files(
338
+ files_data = await self.scanner.git_adapter.get_commit_file_data(
310
339
  repo.cloned_path, commit_sha
311
340
  )
312
341
 
@@ -456,18 +485,18 @@ class CommitIndexingApplicationService:
456
485
  if snippet.id
457
486
  }
458
487
 
459
- enrichment_request = EnrichmentIndexRequest(
460
- requests=[
461
- EnrichmentRequest(snippet_id=snippet_id, text=snippet.content)
462
- for snippet_id, snippet in snippet_map.items()
463
- ]
464
- )
488
+ enrichment_requests = [
489
+ GenericEnrichmentRequest(
490
+ id=snippet_id,
491
+ text=snippet.content,
492
+ system_prompt=SUMMARIZATION_SYSTEM_PROMPT,
493
+ )
494
+ for snippet_id, snippet in snippet_map.items()
495
+ ]
465
496
 
466
497
  processed = 0
467
- async for result in self.enrichment_service.enrich_documents(
468
- enrichment_request
469
- ):
470
- snippet = snippet_map[result.snippet_id]
498
+ async for result in self.enricher_service.enrich(enrichment_requests):
499
+ snippet = snippet_map[result.id]
471
500
  snippet.enrichments.append(
472
501
  Enrichment(type=EnrichmentType.SUMMARIZATION, content=result.text)
473
502
  )
@@ -526,6 +555,134 @@ class CommitIndexingApplicationService:
526
555
  processed += len(result)
527
556
  await step.set_current(processed, "Creating text embeddings for commit")
528
557
 
558
+ async def process_architecture_discovery(
559
+ self, repository_id: int, commit_sha: str
560
+ ) -> None:
561
+ """Handle ARCHITECTURE_DISCOVERY task - discover physical architecture."""
562
+ async with self.operation.create_child(
563
+ TaskOperation.CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT,
564
+ trackable_type=TrackableType.KODIT_REPOSITORY,
565
+ trackable_id=repository_id,
566
+ ) as step:
567
+ await step.set_total(3)
568
+
569
+ # Check if architecture enrichment already exists for this commit
570
+ enrichment_repo = self.enrichment_v2_repository
571
+ existing_enrichments = await enrichment_repo.enrichments_for_entity_type(
572
+ entity_type="git_commit",
573
+ entity_ids=[commit_sha],
574
+ )
575
+
576
+ # Check if architecture enrichment already exists
577
+ has_architecture = any(
578
+ enrichment.type == "architecture" for enrichment in existing_enrichments
579
+ )
580
+
581
+ if has_architecture:
582
+ await step.skip("Architecture enrichment already exists for commit")
583
+ return
584
+
585
+ # Get repository path
586
+ repo = await self.repo_repository.get_by_id(repository_id)
587
+ if not repo.cloned_path:
588
+ raise ValueError(f"Repository {repository_id} has never been cloned")
589
+
590
+ await step.set_current(1, "Discovering physical architecture")
591
+
592
+ # Discover architecture
593
+ architecture_narrative = (
594
+ await self.architecture_service.discover_architecture(repo.cloned_path)
595
+ )
596
+
597
+ await step.set_current(2, "Enriching architecture notes with LLM")
598
+
599
+ # Enrich the architecture narrative through the enricher
600
+ enrichment_request = GenericEnrichmentRequest(
601
+ id=commit_sha,
602
+ text=ARCHITECTURE_ENRICHMENT_TASK_PROMPT.format(
603
+ architecture_narrative=architecture_narrative,
604
+ ),
605
+ system_prompt=ARCHITECTURE_ENRICHMENT_SYSTEM_PROMPT,
606
+ )
607
+
608
+ enriched_content = ""
609
+ async for response in self.enricher_service.enrich([enrichment_request]):
610
+ enriched_content = response.text
611
+
612
+ # Create and save architecture enrichment with enriched content
613
+ architecture_enrichment = PhysicalArchitectureEnrichment(
614
+ entity_id=commit_sha,
615
+ content=enriched_content,
616
+ )
617
+
618
+ await self.enrichment_v2_repository.bulk_save_enrichments(
619
+ [architecture_enrichment]
620
+ )
621
+
622
+ await step.set_current(3, "Architecture enrichment completed")
623
+
624
+ async def process_api_docs(self, repository_id: int, commit_sha: str) -> None:
625
+ """Handle API_DOCS task - generate API documentation."""
626
+ async with self.operation.create_child(
627
+ TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT,
628
+ trackable_type=TrackableType.KODIT_REPOSITORY,
629
+ trackable_id=repository_id,
630
+ ) as step:
631
+ # Check if API docs already exist for this commit
632
+ existing_enrichments = (
633
+ await self.enrichment_v2_repository.enrichments_for_entity_type(
634
+ entity_type="git_commit",
635
+ entity_ids=[commit_sha],
636
+ )
637
+ )
638
+
639
+ has_api_docs = any(
640
+ e.type == ENRICHMENT_TYPE_USAGE
641
+ and e.subtype == ENRICHMENT_SUBTYPE_API_DOCS
642
+ for e in existing_enrichments
643
+ )
644
+
645
+ if has_api_docs:
646
+ await step.skip("API docs already exist for commit")
647
+ return
648
+
649
+ # Get repository for metadata
650
+ repo = await self.repo_repository.get_by_id(repository_id)
651
+ if not repo:
652
+ raise ValueError(f"Repository {repository_id} not found")
653
+ str(repo.sanitized_remote_uri)
654
+
655
+ commit = await self.git_commit_repository.get_by_sha(commit_sha)
656
+
657
+ # Group files by language
658
+ lang_files_map: dict[str, list[GitFile]] = defaultdict(list)
659
+ for file in commit.files:
660
+ try:
661
+ lang = LanguageMapping.get_language_for_extension(file.extension)
662
+ except ValueError:
663
+ continue
664
+ lang_files_map[lang].append(file)
665
+
666
+ all_enrichments = []
667
+ extractor = APIDocExtractor()
668
+
669
+ await step.set_total(len(lang_files_map))
670
+ for i, (lang, lang_files) in enumerate(lang_files_map.items()):
671
+ await step.set_current(i, f"Extracting API docs for {lang}")
672
+ enrichments = extractor.extract_api_docs(
673
+ lang_files,
674
+ lang,
675
+ commit_sha,
676
+ include_private=False,
677
+ )
678
+ all_enrichments.extend(enrichments)
679
+
680
+ # Save all enrichments
681
+ if all_enrichments:
682
+ await self.enrichment_v2_repository.bulk_save_enrichments(
683
+ all_enrichments
684
+ )
685
+
529
686
  async def _new_snippets_for_type(
530
687
  self, all_snippets: list[SnippetV2], embedding_type: EmbeddingType
531
688
  ) -> list[SnippetV2]:
@@ -0,0 +1,95 @@
1
+ """Application service for querying enrichments."""
2
+
3
+ import structlog
4
+
5
+ from kodit.domain.enrichments.enrichment import EnrichmentV2
6
+ from kodit.domain.tracking.resolution_service import TrackableResolutionService
7
+ from kodit.domain.tracking.trackable import Trackable
8
+ from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
9
+ EnrichmentV2Repository,
10
+ )
11
+
12
+
13
+ class EnrichmentQueryService:
14
+ """Finds the latest commit with enrichments for a trackable.
15
+
16
+ Orchestrates domain services and repositories to fulfill the use case.
17
+ """
18
+
19
+ def __init__(
20
+ self,
21
+ trackable_resolution: TrackableResolutionService,
22
+ enrichment_repo: EnrichmentV2Repository,
23
+ ) -> None:
24
+ """Initialize the enrichment query service."""
25
+ self.trackable_resolution = trackable_resolution
26
+ self.enrichment_repo = enrichment_repo
27
+ self.log = structlog.get_logger(__name__)
28
+
29
+ async def find_latest_enriched_commit(
30
+ self,
31
+ trackable: Trackable,
32
+ enrichment_type: str | None = None,
33
+ max_commits_to_check: int = 100,
34
+ ) -> str | None:
35
+ """Find the most recent commit with enrichments.
36
+
37
+ Args:
38
+ trackable: What to track (branch, tag, or commit)
39
+ enrichment_type: Optional filter for specific enrichment type
40
+ max_commits_to_check: How far back in history to search
41
+
42
+ Returns:
43
+ Commit SHA of the most recent commit with enrichments, or None
44
+
45
+ """
46
+ # Get candidate commits from the trackable
47
+ candidate_commits = await self.trackable_resolution.resolve_to_commits(
48
+ trackable, max_commits_to_check
49
+ )
50
+
51
+ if not candidate_commits:
52
+ return None
53
+
54
+ # Check which commits have enrichments
55
+ enrichments = await self.enrichment_repo.enrichments_for_entity_type(
56
+ entity_type="git_commit",
57
+ entity_ids=candidate_commits,
58
+ )
59
+
60
+ # Filter by type if specified
61
+ if enrichment_type:
62
+ enrichments = [e for e in enrichments if e.type == enrichment_type]
63
+
64
+ # Find the first commit (newest) that has enrichments
65
+ for commit_sha in candidate_commits:
66
+ if any(e.entity_id == commit_sha for e in enrichments):
67
+ return commit_sha
68
+
69
+ return None
70
+
71
+ async def get_enrichments_for_commit(
72
+ self,
73
+ commit_sha: str,
74
+ enrichment_type: str | None = None,
75
+ ) -> list[EnrichmentV2]:
76
+ """Get all enrichments for a specific commit.
77
+
78
+ Args:
79
+ commit_sha: The commit SHA to get enrichments for
80
+ enrichment_type: Optional filter for specific enrichment type
81
+
82
+ Returns:
83
+ List of enrichments for the commit
84
+
85
+ """
86
+ enrichments = await self.enrichment_repo.enrichments_for_entity_type(
87
+ entity_type="git_commit",
88
+ entity_ids=[commit_sha],
89
+ )
90
+
91
+ # Filter by type if specified
92
+ if enrichment_type:
93
+ enrichments = [e for e in enrichments if e.type == enrichment_type]
94
+
95
+ return enrichments
kodit/config.py CHANGED
@@ -66,9 +66,9 @@ class Endpoint(BaseModel):
66
66
  default=None,
67
67
  description="Unix socket path for local communication (e.g., /tmp/openai.sock)",
68
68
  )
69
- timeout: float | None = Field(
70
- default=None,
71
- description="Request timeout in seconds (default: 30.0)",
69
+ timeout: float = Field(
70
+ default=60,
71
+ description="Request timeout in seconds",
72
72
  )
73
73
  extra_params: dict[str, Any] | None = Field(
74
74
  default=None,
@@ -0,0 +1 @@
1
+ """Enrichment domain package."""
@@ -0,0 +1 @@
1
+ """Architecture enrichment package."""
@@ -0,0 +1,20 @@
1
+ """Architecture enrichment domain entity."""
2
+
3
+ from abc import ABC
4
+ from dataclasses import dataclass
5
+
6
+ from kodit.domain.enrichments.enrichment import (
7
+ CommitEnrichment,
8
+ )
9
+
10
+ ENRICHMENT_TYPE_ARCHITECTURE = "architecture"
11
+
12
+
13
+ @dataclass
14
+ class ArchitectureEnrichment(CommitEnrichment, ABC):
15
+ """Enrichment containing physical architecture discovery for a commit."""
16
+
17
+ @property
18
+ def type(self) -> str:
19
+ """Return the enrichment type."""
20
+ return ENRICHMENT_TYPE_ARCHITECTURE
@@ -0,0 +1 @@
1
+ """Physical architecture enrichment package."""