kodit 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (64) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +10 -12
  3. kodit/application/factories/server_factory.py +53 -11
  4. kodit/application/services/commit_indexing_application_service.py +188 -31
  5. kodit/config.py +3 -3
  6. kodit/domain/enrichments/__init__.py +1 -0
  7. kodit/domain/enrichments/architecture/__init__.py +1 -0
  8. kodit/domain/enrichments/architecture/architecture.py +20 -0
  9. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  10. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  11. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  12. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  13. kodit/domain/enrichments/development/__init__.py +1 -0
  14. kodit/domain/enrichments/development/development.py +18 -0
  15. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  16. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  17. kodit/domain/enrichments/enricher.py +17 -0
  18. kodit/domain/enrichments/enrichment.py +39 -0
  19. kodit/domain/enrichments/request.py +12 -0
  20. kodit/domain/enrichments/response.py +11 -0
  21. kodit/domain/enrichments/usage/__init__.py +1 -0
  22. kodit/domain/enrichments/usage/api_docs.py +19 -0
  23. kodit/domain/enrichments/usage/usage.py +18 -0
  24. kodit/domain/protocols.py +7 -6
  25. kodit/domain/services/enrichment_service.py +9 -30
  26. kodit/domain/services/physical_architecture_service.py +182 -0
  27. kodit/domain/value_objects.py +6 -23
  28. kodit/infrastructure/api/v1/routers/commits.py +81 -0
  29. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  30. kodit/infrastructure/cloning/git/git_python_adaptor.py +71 -4
  31. kodit/infrastructure/enricher/__init__.py +1 -0
  32. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  33. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +20 -33
  34. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  35. kodit/infrastructure/enricher/null_enricher.py +36 -0
  36. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  37. kodit/infrastructure/mappers/snippet_mapper.py +20 -22
  38. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  39. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  40. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  41. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  42. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  43. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  44. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  45. kodit/infrastructure/slicing/slicer.py +56 -391
  46. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  47. kodit/infrastructure/sqlalchemy/entities.py +46 -38
  48. kodit/infrastructure/sqlalchemy/git_branch_repository.py +22 -11
  49. kodit/infrastructure/sqlalchemy/git_commit_repository.py +23 -14
  50. kodit/infrastructure/sqlalchemy/git_repository.py +27 -17
  51. kodit/infrastructure/sqlalchemy/git_tag_repository.py +22 -11
  52. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +101 -106
  53. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  54. kodit/utils/dump_config.py +361 -0
  55. kodit/utils/dump_openapi.py +5 -6
  56. {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/METADATA +1 -1
  57. {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/RECORD +61 -30
  58. kodit/infrastructure/enrichment/__init__.py +0 -1
  59. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  60. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  61. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  62. {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  63. {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  64. {kodit-0.5.0.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.5.0'
32
- __version_tuple__ = version_tuple = (0, 5, 0)
31
+ __version__ = version = '0.5.1'
32
+ __version_tuple__ = version_tuple = (0, 5, 1)
33
33
 
34
34
  __commit_id__ = commit_id = None
kodit/app.py CHANGED
@@ -14,12 +14,8 @@ from kodit.application.factories.server_factory import ServerFactory
14
14
  from kodit.application.services.indexing_worker_service import IndexingWorkerService
15
15
  from kodit.application.services.sync_scheduler import SyncSchedulerService
16
16
  from kodit.config import AppContext
17
- from kodit.domain.value_objects import (
18
- Document,
19
- EnrichmentIndexRequest,
20
- EnrichmentRequest,
21
- IndexRequest,
22
- )
17
+ from kodit.domain.enrichments.request import EnrichmentRequest
18
+ from kodit.domain.value_objects import Document, IndexRequest
23
19
  from kodit.infrastructure.api.v1.routers.commits import router as commits_router
24
20
  from kodit.infrastructure.api.v1.routers.queue import router as queue_router
25
21
  from kodit.infrastructure.api.v1.routers.repositories import (
@@ -71,12 +67,14 @@ async def app_lifespan(_: FastAPI) -> AsyncIterator[AppLifespanState]:
71
67
  raise ValueError("Embedding service is not accessible") from e
72
68
  try:
73
69
  await anext(
74
- _server_factory.enrichment_service().enrich_documents(
75
- EnrichmentIndexRequest(
76
- requests=[
77
- EnrichmentRequest(snippet_id="1", text="def hello(): pass")
78
- ]
79
- )
70
+ _server_factory.enricher().enrich(
71
+ [
72
+ EnrichmentRequest(
73
+ id="1",
74
+ text="def hello(): pass",
75
+ system_prompt="Explain this code",
76
+ )
77
+ ]
80
78
  )
81
79
  )
82
80
  except Exception as e:
@@ -1,6 +1,7 @@
1
1
  """Create a big object that contains all the application services."""
2
2
 
3
3
  from collections.abc import Callable
4
+ from typing import TYPE_CHECKING
4
5
 
5
6
  from sqlalchemy.ext.asyncio import AsyncSession
6
7
 
@@ -15,6 +16,10 @@ from kodit.application.services.queue_service import QueueService
15
16
  from kodit.application.services.reporting import ProgressTracker
16
17
  from kodit.application.services.sync_scheduler import SyncSchedulerService
17
18
  from kodit.config import AppContext
19
+ from kodit.domain.enrichments.architecture.physical.formatter import (
20
+ PhysicalArchitectureFormatter,
21
+ )
22
+ from kodit.domain.enrichments.enricher import Enricher
18
23
  from kodit.domain.protocols import (
19
24
  FusionService,
20
25
  GitAdapter,
@@ -27,11 +32,13 @@ from kodit.domain.protocols import (
27
32
  )
28
33
  from kodit.domain.services.bm25_service import BM25DomainService, BM25Repository
29
34
  from kodit.domain.services.embedding_service import EmbeddingDomainService
30
- from kodit.domain.services.enrichment_service import EnrichmentDomainService
31
35
  from kodit.domain.services.git_repository_service import (
32
36
  GitRepositoryScanner,
33
37
  RepositoryCloner,
34
38
  )
39
+ from kodit.domain.services.physical_architecture_service import (
40
+ PhysicalArchitectureService,
41
+ )
35
42
  from kodit.infrastructure.bm25.local_bm25_repository import LocalBM25Repository
36
43
  from kodit.infrastructure.bm25.vectorchord_bm25_repository import (
37
44
  VectorChordBM25Repository,
@@ -40,17 +47,23 @@ from kodit.infrastructure.cloning.git.git_python_adaptor import GitPythonAdapter
40
47
  from kodit.infrastructure.embedding.embedding_factory import (
41
48
  embedding_domain_service_factory,
42
49
  )
43
- from kodit.infrastructure.enrichment.enrichment_factory import (
44
- enrichment_domain_service_factory,
50
+ from kodit.infrastructure.enricher.enricher_factory import (
51
+ enricher_domain_service_factory,
45
52
  )
46
53
 
47
54
  # InMemoryGitTagRepository removed - now handled by InMemoryGitRepoRepository
48
55
  from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
56
+ from kodit.infrastructure.physical_architecture.formatters.narrative_formatter import (
57
+ NarrativeFormatter,
58
+ )
49
59
  from kodit.infrastructure.slicing.slicer import Slicer
50
60
  from kodit.infrastructure.sqlalchemy.embedding_repository import (
51
61
  SqlAlchemyEmbeddingRepository,
52
62
  create_embedding_repository,
53
63
  )
64
+ from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
65
+ EnrichmentV2Repository,
66
+ )
54
67
  from kodit.infrastructure.sqlalchemy.git_branch_repository import (
55
68
  create_git_branch_repository,
56
69
  )
@@ -69,6 +82,9 @@ from kodit.infrastructure.sqlalchemy.task_status_repository import (
69
82
  )
70
83
  from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
71
84
 
85
+ if TYPE_CHECKING:
86
+ from kodit.domain.services.enrichment_service import EnrichmentDomainService
87
+
72
88
 
73
89
  class ServerFactory:
74
90
  """Factory for creating server application services."""
@@ -90,6 +106,7 @@ class ServerFactory:
90
106
  CommitIndexingApplicationService | None
91
107
  ) = None
92
108
  self._enrichment_service: EnrichmentDomainService | None = None
109
+ self._enricher_service: Enricher | None = None
93
110
  self._task_status_repository: TaskStatusRepository | None = None
94
111
  self._operation: ProgressTracker | None = None
95
112
  self._queue_service: QueueService | None = None
@@ -107,6 +124,31 @@ class ServerFactory:
107
124
  self._git_commit_repository: GitCommitRepository | None = None
108
125
  self._git_branch_repository: GitBranchRepository | None = None
109
126
  self._git_tag_repository: GitTagRepository | None = None
127
+ self._architecture_service: PhysicalArchitectureService | None = None
128
+ self._enrichment_v2_repository: EnrichmentV2Repository | None = None
129
+ self._architecture_formatter: PhysicalArchitectureFormatter | None = None
130
+
131
+ def architecture_formatter(self) -> PhysicalArchitectureFormatter:
132
+ """Create a PhysicalArchitectureFormatter instance."""
133
+ if not self._architecture_formatter:
134
+ self._architecture_formatter = NarrativeFormatter()
135
+ return self._architecture_formatter
136
+
137
+ def architecture_service(self) -> PhysicalArchitectureService:
138
+ """Create a PhysicalArchitectureService instance."""
139
+ if not self._architecture_service:
140
+ self._architecture_service = PhysicalArchitectureService(
141
+ formatter=self.architecture_formatter()
142
+ )
143
+ return self._architecture_service
144
+
145
+ def enrichment_v2_repository(self) -> EnrichmentV2Repository:
146
+ """Create a EnrichmentV2Repository instance."""
147
+ if not self._enrichment_v2_repository:
148
+ self._enrichment_v2_repository = EnrichmentV2Repository(
149
+ session_factory=self.session_factory
150
+ )
151
+ return self._enrichment_v2_repository
110
152
 
111
153
  def queue_service(self) -> QueueService:
112
154
  """Create a QueueService instance."""
@@ -190,8 +232,10 @@ class ServerFactory:
190
232
  bm25_service=self.bm25_service(),
191
233
  code_search_service=self.code_search_service(),
192
234
  text_search_service=self.text_search_service(),
193
- enrichment_service=self.enrichment_service(),
194
235
  embedding_repository=self.embedding_repository(),
236
+ architecture_service=self.architecture_service(),
237
+ enrichment_v2_repository=self.enrichment_v2_repository(),
238
+ enricher_service=self.enricher(),
195
239
  )
196
240
  )
197
241
 
@@ -242,13 +286,11 @@ class ServerFactory:
242
286
  )
243
287
  return self._snippet_v2_repository
244
288
 
245
- def enrichment_service(self) -> EnrichmentDomainService:
246
- """Create a EnrichmentDomainService instance."""
247
- if not self._enrichment_service:
248
- self._enrichment_service = enrichment_domain_service_factory(
249
- self.app_context
250
- )
251
- return self._enrichment_service
289
+ def enricher(self) -> Enricher:
290
+ """Create a EnricherDomainService instance."""
291
+ if not self._enricher_service:
292
+ self._enricher_service = enricher_domain_service_factory(self.app_context)
293
+ return self._enricher_service
252
294
 
253
295
  def sync_scheduler_service(self) -> SyncSchedulerService:
254
296
  """Create a SyncSchedulerService instance."""
@@ -8,6 +8,15 @@ from pydantic import AnyUrl
8
8
 
9
9
  from kodit.application.services.queue_service import QueueService
10
10
  from kodit.application.services.reporting import ProgressTracker
11
+ from kodit.domain.enrichments.architecture.physical.physical import (
12
+ PhysicalArchitectureEnrichment,
13
+ )
14
+ from kodit.domain.enrichments.enricher import Enricher
15
+ from kodit.domain.enrichments.request import (
16
+ EnrichmentRequest as GenericEnrichmentRequest,
17
+ )
18
+ from kodit.domain.enrichments.usage.api_docs import ENRICHMENT_SUBTYPE_API_DOCS
19
+ from kodit.domain.enrichments.usage.usage import ENRICHMENT_TYPE_USAGE
11
20
  from kodit.domain.entities import Task
12
21
  from kodit.domain.entities.git import GitFile, GitRepo, SnippetV2
13
22
  from kodit.domain.factories.git_repo_factory import GitRepoFactory
@@ -20,17 +29,19 @@ from kodit.domain.protocols import (
20
29
  )
21
30
  from kodit.domain.services.bm25_service import BM25DomainService
22
31
  from kodit.domain.services.embedding_service import EmbeddingDomainService
23
- from kodit.domain.services.enrichment_service import EnrichmentDomainService
24
32
  from kodit.domain.services.git_repository_service import (
25
33
  GitRepositoryScanner,
26
34
  RepositoryCloner,
27
35
  )
36
+ from kodit.domain.services.physical_architecture_service import (
37
+ ARCHITECTURE_ENRICHMENT_SYSTEM_PROMPT,
38
+ ARCHITECTURE_ENRICHMENT_TASK_PROMPT,
39
+ PhysicalArchitectureService,
40
+ )
28
41
  from kodit.domain.value_objects import (
29
42
  DeleteRequest,
30
43
  Document,
31
44
  Enrichment,
32
- EnrichmentIndexRequest,
33
- EnrichmentRequest,
34
45
  EnrichmentType,
35
46
  IndexRequest,
36
47
  LanguageMapping,
@@ -39,12 +50,21 @@ from kodit.domain.value_objects import (
39
50
  TaskOperation,
40
51
  TrackableType,
41
52
  )
53
+ from kodit.infrastructure.slicing.api_doc_extractor import APIDocExtractor
42
54
  from kodit.infrastructure.slicing.slicer import Slicer
43
55
  from kodit.infrastructure.sqlalchemy.embedding_repository import (
44
56
  SqlAlchemyEmbeddingRepository,
45
57
  )
58
+ from kodit.infrastructure.sqlalchemy.enrichment_v2_repository import (
59
+ EnrichmentV2Repository,
60
+ )
46
61
  from kodit.infrastructure.sqlalchemy.entities import EmbeddingType
47
62
 
63
+ SUMMARIZATION_SYSTEM_PROMPT = """
64
+ You are a professional software developer. You will be given a snippet of code.
65
+ Please provide a concise explanation of the code.
66
+ """
67
+
48
68
 
49
69
  class CommitIndexingApplicationService:
50
70
  """Application service for commit indexing operations."""
@@ -65,8 +85,10 @@ class CommitIndexingApplicationService:
65
85
  bm25_service: BM25DomainService,
66
86
  code_search_service: EmbeddingDomainService,
67
87
  text_search_service: EmbeddingDomainService,
68
- enrichment_service: EnrichmentDomainService,
69
88
  embedding_repository: SqlAlchemyEmbeddingRepository,
89
+ architecture_service: PhysicalArchitectureService,
90
+ enrichment_v2_repository: EnrichmentV2Repository,
91
+ enricher_service: Enricher,
70
92
  ) -> None:
71
93
  """Initialize the commit indexing application service.
72
94
 
@@ -92,8 +114,10 @@ class CommitIndexingApplicationService:
92
114
  self.bm25_service = bm25_service
93
115
  self.code_search_service = code_search_service
94
116
  self.text_search_service = text_search_service
95
- self.enrichment_service = enrichment_service
96
117
  self.embedding_repository = embedding_repository
118
+ self.architecture_service = architecture_service
119
+ self.enrichment_v2_repository = enrichment_v2_repository
120
+ self.enricher_service = enricher_service
97
121
  self._log = structlog.get_logger(__name__)
98
122
 
99
123
  async def create_git_repository(self, remote_uri: AnyUrl) -> GitRepo:
@@ -153,6 +177,10 @@ class CommitIndexingApplicationService:
153
177
  await self.process_enrich(repository_id, commit_sha)
154
178
  elif task.type == TaskOperation.CREATE_SUMMARY_EMBEDDINGS_FOR_COMMIT:
155
179
  await self.process_summary_embeddings(repository_id, commit_sha)
180
+ elif task.type == TaskOperation.CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT:
181
+ await self.process_architecture_discovery(repository_id, commit_sha)
182
+ elif task.type == TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT:
183
+ await self.process_api_docs(repository_id, commit_sha)
156
184
  else:
157
185
  raise ValueError(f"Unknown task type: {task.type}")
158
186
  else:
@@ -245,16 +273,14 @@ class CommitIndexingApplicationService:
245
273
  snippets = await self.snippet_repository.get_snippets_for_commit(
246
274
  commit_sha
247
275
  )
248
- all_snippet_ids.extend([
249
- snippet.id for snippet in snippets if snippet.id
250
- ])
276
+ all_snippet_ids.extend(
277
+ [snippet.id for snippet in snippets if snippet.id]
278
+ )
251
279
 
252
280
  # Step 2: Delete from BM25 and embedding indices
253
281
  if all_snippet_ids:
254
282
  # Convert to strings as DeleteRequest expects list[str]
255
- snippet_id_strings = [
256
- str(snippet_id) for snippet_id in all_snippet_ids
257
- ]
283
+ snippet_id_strings = [str(snippet_id) for snippet_id in all_snippet_ids]
258
284
  delete_request = DeleteRequest(snippet_ids=snippet_id_strings)
259
285
  await self.bm25_service.delete_documents(delete_request)
260
286
 
@@ -264,20 +290,27 @@ class CommitIndexingApplicationService:
264
290
  snippet_id
265
291
  )
266
292
 
267
- # Step 3: Delete snippet associations for all commits
293
+ # Step 3: Delete enrichments for all commits
294
+ if commit_shas:
295
+ await self.enrichment_v2_repository.bulk_delete_enrichments(
296
+ entity_type="git_commit",
297
+ entity_ids=commit_shas,
298
+ )
299
+
300
+ # Step 4: Delete snippet associations for all commits
268
301
  for commit_sha in commit_shas:
269
302
  await self.snippet_repository.delete_snippets_for_commit(commit_sha)
270
303
 
271
- # Step 4: Delete branches (they reference commits via head_commit_sha)
304
+ # Step 5: Delete branches (they reference commits via head_commit_sha)
272
305
  await self.git_branch_repository.delete_by_repo_id(repository_id)
273
306
 
274
- # Step 5: Delete tags (they reference commits via target_commit_sha)
307
+ # Step 6: Delete tags (they reference commits via target_commit_sha)
275
308
  await self.git_tag_repository.delete_by_repo_id(repository_id)
276
309
 
277
- # Step 6: Delete commits and their files
310
+ # Step 7: Delete commits and their files
278
311
  await self.git_commit_repository.delete_by_repo_id(repository_id)
279
312
 
280
- # Step 7: Finally delete the repository
313
+ # Step 8: Finally delete the repository
281
314
  await self.repo_repository.delete(repo.sanitized_remote_uri)
282
315
 
283
316
  async def process_snippets_for_commit(
@@ -302,11 +335,7 @@ class CommitIndexingApplicationService:
302
335
  if not repo.cloned_path:
303
336
  raise ValueError(f"Repository {repository_id} has never been cloned")
304
337
 
305
- # Ensure we're on the specific commit for file access
306
- await self.scanner.git_adapter.checkout_commit(repo.cloned_path, commit_sha)
307
-
308
- # Get files directly from Git adapter for this specific commit
309
- files_data = await self.scanner.git_adapter.get_commit_files(
338
+ files_data = await self.scanner.git_adapter.get_commit_file_data(
310
339
  repo.cloned_path, commit_sha
311
340
  )
312
341
 
@@ -456,18 +485,18 @@ class CommitIndexingApplicationService:
456
485
  if snippet.id
457
486
  }
458
487
 
459
- enrichment_request = EnrichmentIndexRequest(
460
- requests=[
461
- EnrichmentRequest(snippet_id=snippet_id, text=snippet.content)
462
- for snippet_id, snippet in snippet_map.items()
463
- ]
464
- )
488
+ enrichment_requests = [
489
+ GenericEnrichmentRequest(
490
+ id=snippet_id,
491
+ text=snippet.content,
492
+ system_prompt=SUMMARIZATION_SYSTEM_PROMPT,
493
+ )
494
+ for snippet_id, snippet in snippet_map.items()
495
+ ]
465
496
 
466
497
  processed = 0
467
- async for result in self.enrichment_service.enrich_documents(
468
- enrichment_request
469
- ):
470
- snippet = snippet_map[result.snippet_id]
498
+ async for result in self.enricher_service.enrich(enrichment_requests):
499
+ snippet = snippet_map[result.id]
471
500
  snippet.enrichments.append(
472
501
  Enrichment(type=EnrichmentType.SUMMARIZATION, content=result.text)
473
502
  )
@@ -526,6 +555,134 @@ class CommitIndexingApplicationService:
526
555
  processed += len(result)
527
556
  await step.set_current(processed, "Creating text embeddings for commit")
528
557
 
558
+ async def process_architecture_discovery(
559
+ self, repository_id: int, commit_sha: str
560
+ ) -> None:
561
+ """Handle ARCHITECTURE_DISCOVERY task - discover physical architecture."""
562
+ async with self.operation.create_child(
563
+ TaskOperation.CREATE_ARCHITECTURE_ENRICHMENT_FOR_COMMIT,
564
+ trackable_type=TrackableType.KODIT_REPOSITORY,
565
+ trackable_id=repository_id,
566
+ ) as step:
567
+ await step.set_total(3)
568
+
569
+ # Check if architecture enrichment already exists for this commit
570
+ enrichment_repo = self.enrichment_v2_repository
571
+ existing_enrichments = await enrichment_repo.enrichments_for_entity_type(
572
+ entity_type="git_commit",
573
+ entity_ids=[commit_sha],
574
+ )
575
+
576
+ # Check if architecture enrichment already exists
577
+ has_architecture = any(
578
+ enrichment.type == "architecture" for enrichment in existing_enrichments
579
+ )
580
+
581
+ if has_architecture:
582
+ await step.skip("Architecture enrichment already exists for commit")
583
+ return
584
+
585
+ # Get repository path
586
+ repo = await self.repo_repository.get_by_id(repository_id)
587
+ if not repo.cloned_path:
588
+ raise ValueError(f"Repository {repository_id} has never been cloned")
589
+
590
+ await step.set_current(1, "Discovering physical architecture")
591
+
592
+ # Discover architecture
593
+ architecture_narrative = (
594
+ await self.architecture_service.discover_architecture(repo.cloned_path)
595
+ )
596
+
597
+ await step.set_current(2, "Enriching architecture notes with LLM")
598
+
599
+ # Enrich the architecture narrative through the enricher
600
+ enrichment_request = GenericEnrichmentRequest(
601
+ id=commit_sha,
602
+ text=ARCHITECTURE_ENRICHMENT_TASK_PROMPT.format(
603
+ architecture_narrative=architecture_narrative,
604
+ ),
605
+ system_prompt=ARCHITECTURE_ENRICHMENT_SYSTEM_PROMPT,
606
+ )
607
+
608
+ enriched_content = ""
609
+ async for response in self.enricher_service.enrich([enrichment_request]):
610
+ enriched_content = response.text
611
+
612
+ # Create and save architecture enrichment with enriched content
613
+ architecture_enrichment = PhysicalArchitectureEnrichment(
614
+ entity_id=commit_sha,
615
+ content=enriched_content,
616
+ )
617
+
618
+ await self.enrichment_v2_repository.bulk_save_enrichments(
619
+ [architecture_enrichment]
620
+ )
621
+
622
+ await step.set_current(3, "Architecture enrichment completed")
623
+
624
+ async def process_api_docs(self, repository_id: int, commit_sha: str) -> None:
625
+ """Handle API_DOCS task - generate API documentation."""
626
+ async with self.operation.create_child(
627
+ TaskOperation.CREATE_PUBLIC_API_DOCS_FOR_COMMIT,
628
+ trackable_type=TrackableType.KODIT_REPOSITORY,
629
+ trackable_id=repository_id,
630
+ ) as step:
631
+ # Check if API docs already exist for this commit
632
+ existing_enrichments = (
633
+ await self.enrichment_v2_repository.enrichments_for_entity_type(
634
+ entity_type="git_commit",
635
+ entity_ids=[commit_sha],
636
+ )
637
+ )
638
+
639
+ has_api_docs = any(
640
+ e.type == ENRICHMENT_TYPE_USAGE
641
+ and e.subtype == ENRICHMENT_SUBTYPE_API_DOCS
642
+ for e in existing_enrichments
643
+ )
644
+
645
+ if has_api_docs:
646
+ await step.skip("API docs already exist for commit")
647
+ return
648
+
649
+ # Get repository for metadata
650
+ repo = await self.repo_repository.get_by_id(repository_id)
651
+ if not repo:
652
+ raise ValueError(f"Repository {repository_id} not found")
653
+ str(repo.sanitized_remote_uri)
654
+
655
+ commit = await self.git_commit_repository.get_by_sha(commit_sha)
656
+
657
+ # Group files by language
658
+ lang_files_map: dict[str, list[GitFile]] = defaultdict(list)
659
+ for file in commit.files:
660
+ try:
661
+ lang = LanguageMapping.get_language_for_extension(file.extension)
662
+ except ValueError:
663
+ continue
664
+ lang_files_map[lang].append(file)
665
+
666
+ all_enrichments = []
667
+ extractor = APIDocExtractor()
668
+
669
+ await step.set_total(len(lang_files_map))
670
+ for i, (lang, lang_files) in enumerate(lang_files_map.items()):
671
+ await step.set_current(i, f"Extracting API docs for {lang}")
672
+ enrichments = extractor.extract_api_docs(
673
+ lang_files,
674
+ lang,
675
+ commit_sha,
676
+ include_private=False,
677
+ )
678
+ all_enrichments.extend(enrichments)
679
+
680
+ # Save all enrichments
681
+ if all_enrichments:
682
+ await self.enrichment_v2_repository.bulk_save_enrichments(
683
+ all_enrichments
684
+ )
685
+
529
686
  async def _new_snippets_for_type(
530
687
  self, all_snippets: list[SnippetV2], embedding_type: EmbeddingType
531
688
  ) -> list[SnippetV2]:
kodit/config.py CHANGED
@@ -66,9 +66,9 @@ class Endpoint(BaseModel):
66
66
  default=None,
67
67
  description="Unix socket path for local communication (e.g., /tmp/openai.sock)",
68
68
  )
69
- timeout: float | None = Field(
70
- default=None,
71
- description="Request timeout in seconds (default: 30.0)",
69
+ timeout: float = Field(
70
+ default=60,
71
+ description="Request timeout in seconds",
72
72
  )
73
73
  extra_params: dict[str, Any] | None = Field(
74
74
  default=None,
@@ -0,0 +1 @@
1
+ """Enrichment domain package."""
@@ -0,0 +1 @@
1
+ """Architecture enrichment package."""
@@ -0,0 +1,20 @@
1
+ """Architecture enrichment domain entity."""
2
+
3
+ from abc import ABC
4
+ from dataclasses import dataclass
5
+
6
+ from kodit.domain.enrichments.enrichment import (
7
+ CommitEnrichment,
8
+ )
9
+
10
+ ENRICHMENT_TYPE_ARCHITECTURE = "architecture"
11
+
12
+
13
+ @dataclass
14
+ class ArchitectureEnrichment(CommitEnrichment, ABC):
15
+ """Enrichment containing physical architecture discovery for a commit."""
16
+
17
+ @property
18
+ def type(self) -> str:
19
+ """Return the enrichment type."""
20
+ return ENRICHMENT_TYPE_ARCHITECTURE
@@ -0,0 +1 @@
1
+ """Physical architecture enrichment package."""
@@ -0,0 +1,14 @@
1
+ """Physical architecture domain value objects."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass
7
+ class ArchitectureDiscoveryNotes:
8
+ """Rich, narrative observations about repository architecture for LLM consumption.""" # noqa: E501
9
+
10
+ repository_context: str # High-level overview and discovery scope
11
+ component_observations: list[str] # Detailed findings about each component
12
+ connection_observations: list[str] # How components interact and communicate
13
+ infrastructure_observations: list[str] # Deployment, config, operational patterns
14
+ discovery_metadata: str # Methodology, confidence, limitations, timestamp
@@ -0,0 +1,11 @@
1
+ """Physical architecture formatter protocol."""
2
+
3
+ from typing import Any, Protocol
4
+
5
+
6
+ class PhysicalArchitectureFormatter(Protocol):
7
+ """Formatter for converting architecture discovery notes to LLM-optimized text."""
8
+
9
+ def format_for_llm(self, notes: Any) -> str:
10
+ """Format architecture discovery notes for LLM consumption."""
11
+ ...
@@ -0,0 +1,17 @@
1
+ """Physical architecture enrichment domain entity."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from kodit.domain.enrichments.architecture.architecture import ArchitectureEnrichment
6
+
7
+ ENRICHMENT_SUBTYPE_PHYSICAL = "physical"
8
+
9
+
10
+ @dataclass
11
+ class PhysicalArchitectureEnrichment(ArchitectureEnrichment):
12
+ """Enrichment containing physical architecture discovery for a commit."""
13
+
14
+ @property
15
+ def subtype(self) -> str | None:
16
+ """Return the enrichment subtype."""
17
+ return ENRICHMENT_SUBTYPE_PHYSICAL
@@ -0,0 +1 @@
1
+ """Development enrichment package."""
@@ -0,0 +1,18 @@
1
+ """Development enrichment domain entity."""
2
+
3
+ from abc import ABC
4
+ from dataclasses import dataclass
5
+
6
+ from kodit.domain.enrichments.enrichment import CommitEnrichment
7
+
8
+ ENRICHMENT_TYPE_DEVELOPMENT = "development"
9
+
10
+
11
+ @dataclass
12
+ class DevelopmentEnrichment(CommitEnrichment, ABC):
13
+ """Enrichment containing development discovery for a commit."""
14
+
15
+ @property
16
+ def type(self) -> str:
17
+ """Return the enrichment type."""
18
+ return ENRICHMENT_TYPE_DEVELOPMENT
@@ -0,0 +1 @@
1
+ """Snippet enrichment package."""
@@ -0,0 +1,21 @@
1
+ """Snippet enrichment domain entity."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from kodit.domain.enrichments.development.development import DevelopmentEnrichment
6
+
7
+ ENRICHMENT_SUBTYPE_SNIPPET_SUMMARY = "snippet_summary"
8
+
9
+
10
+ @dataclass
11
+ class SnippetEnrichment(DevelopmentEnrichment):
12
+ """Enrichment specific to code snippets."""
13
+
14
+ @property
15
+ def subtype(self) -> str | None:
16
+ """Return the enrichment subtype."""
17
+ return ENRICHMENT_SUBTYPE_SNIPPET_SUMMARY
18
+
19
+ def entity_type_key(self) -> str:
20
+ """Return the entity type key this enrichment is for."""
21
+ return "snippet_v2"
@@ -0,0 +1,17 @@
1
+ """Enricher interface."""
2
+
3
+ from collections.abc import AsyncGenerator
4
+ from typing import Protocol
5
+
6
+ from kodit.domain.enrichments.request import EnrichmentRequest
7
+ from kodit.domain.enrichments.response import EnrichmentResponse
8
+
9
+
10
+ class Enricher(Protocol):
11
+ """Interface for text enrichment with custom prompts."""
12
+
13
+ def enrich(
14
+ self, requests: list[EnrichmentRequest]
15
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
16
+ """Enrich a list of requests with custom system prompts."""
17
+ ...