kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (135) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +51 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +353 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +700 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +4 -97
  14. kodit/database.py +38 -1
  15. kodit/domain/enrichments/__init__.py +1 -0
  16. kodit/domain/enrichments/architecture/__init__.py +1 -0
  17. kodit/domain/enrichments/architecture/architecture.py +20 -0
  18. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  19. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  20. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  21. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  22. kodit/domain/enrichments/development/__init__.py +1 -0
  23. kodit/domain/enrichments/development/development.py +18 -0
  24. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  25. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  26. kodit/domain/enrichments/enricher.py +17 -0
  27. kodit/domain/enrichments/enrichment.py +39 -0
  28. kodit/domain/enrichments/request.py +12 -0
  29. kodit/domain/enrichments/response.py +11 -0
  30. kodit/domain/enrichments/usage/__init__.py +1 -0
  31. kodit/domain/enrichments/usage/api_docs.py +19 -0
  32. kodit/domain/enrichments/usage/usage.py +18 -0
  33. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  34. kodit/domain/entities/git.py +190 -0
  35. kodit/domain/factories/__init__.py +1 -0
  36. kodit/domain/factories/git_repo_factory.py +76 -0
  37. kodit/domain/protocols.py +264 -64
  38. kodit/domain/services/bm25_service.py +5 -1
  39. kodit/domain/services/embedding_service.py +3 -0
  40. kodit/domain/services/enrichment_service.py +9 -30
  41. kodit/domain/services/git_repository_service.py +429 -0
  42. kodit/domain/services/git_service.py +300 -0
  43. kodit/domain/services/physical_architecture_service.py +182 -0
  44. kodit/domain/services/task_status_query_service.py +2 -2
  45. kodit/domain/value_objects.py +87 -135
  46. kodit/infrastructure/api/client/__init__.py +0 -2
  47. kodit/infrastructure/api/v1/__init__.py +0 -4
  48. kodit/infrastructure/api/v1/dependencies.py +92 -46
  49. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  50. kodit/infrastructure/api/v1/routers/commits.py +352 -0
  51. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  52. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  53. kodit/infrastructure/api/v1/routers/search.py +31 -14
  54. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  55. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  56. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  57. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  58. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  59. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  60. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  61. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  62. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  63. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  64. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  65. kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
  66. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  67. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  68. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  69. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  70. kodit/infrastructure/enricher/__init__.py +1 -0
  71. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  72. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
  73. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  74. kodit/infrastructure/enricher/null_enricher.py +36 -0
  75. kodit/infrastructure/indexing/fusion_service.py +1 -1
  76. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  77. kodit/infrastructure/mappers/git_mapper.py +193 -0
  78. kodit/infrastructure/mappers/snippet_mapper.py +104 -0
  79. kodit/infrastructure/mappers/task_mapper.py +5 -44
  80. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  81. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  82. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  83. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  84. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  85. kodit/infrastructure/reporting/log_progress.py +8 -5
  86. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  87. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  88. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  89. kodit/infrastructure/slicing/slicer.py +87 -421
  90. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  91. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  92. kodit/infrastructure/sqlalchemy/entities.py +402 -158
  93. kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
  94. kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
  95. kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
  96. kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
  97. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
  98. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  99. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  100. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  101. kodit/mcp.py +12 -30
  102. kodit/migrations/env.py +1 -0
  103. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  104. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  105. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  106. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  107. kodit/py.typed +0 -0
  108. kodit/utils/dump_config.py +361 -0
  109. kodit/utils/dump_openapi.py +6 -4
  110. kodit/utils/path_utils.py +29 -0
  111. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
  112. kodit-0.5.1.dist-info/RECORD +168 -0
  113. kodit/application/factories/code_indexing_factory.py +0 -195
  114. kodit/application/services/auto_indexing_service.py +0 -99
  115. kodit/application/services/code_indexing_application_service.py +0 -410
  116. kodit/domain/services/index_query_service.py +0 -70
  117. kodit/domain/services/index_service.py +0 -269
  118. kodit/infrastructure/api/client/index_client.py +0 -57
  119. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  120. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  121. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  122. kodit/infrastructure/cloning/__init__.py +0 -1
  123. kodit/infrastructure/cloning/metadata.py +0 -98
  124. kodit/infrastructure/enrichment/__init__.py +0 -1
  125. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  126. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  127. kodit/infrastructure/mappers/index_mapper.py +0 -345
  128. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  129. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  130. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  131. kodit-0.4.3.dist-info/RECORD +0 -125
  132. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  133. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  134. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  135. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -14,59 +14,79 @@ def create_embedding_repository(
14
14
  session_factory: Callable[[], AsyncSession],
15
15
  ) -> "SqlAlchemyEmbeddingRepository":
16
16
  """Create an embedding repository."""
17
- uow = SqlAlchemyUnitOfWork(session_factory=session_factory)
18
- return SqlAlchemyEmbeddingRepository(uow)
17
+ return SqlAlchemyEmbeddingRepository(session_factory=session_factory)
19
18
 
20
19
 
21
20
  class SqlAlchemyEmbeddingRepository:
22
21
  """SQLAlchemy implementation of embedding repository."""
23
22
 
24
- def __init__(self, uow: SqlAlchemyUnitOfWork) -> None:
23
+ def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
25
24
  """Initialize the SQLAlchemy embedding repository."""
26
- self.uow = uow
25
+ self.session_factory = session_factory
27
26
 
28
27
  async def create_embedding(self, embedding: Embedding) -> None:
29
28
  """Create a new embedding record in the database."""
30
- async with self.uow:
31
- self.uow.session.add(embedding)
29
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
30
+ session.add(embedding)
32
31
 
33
32
  async def get_embedding_by_snippet_id_and_type(
34
33
  self, snippet_id: int, embedding_type: EmbeddingType
35
34
  ) -> Embedding | None:
36
35
  """Get an embedding by its snippet ID and type."""
37
- async with self.uow:
36
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
38
37
  query = select(Embedding).where(
39
38
  Embedding.snippet_id == snippet_id,
40
39
  Embedding.type == embedding_type,
41
40
  )
42
- result = await self.uow.session.execute(query)
41
+ result = await session.execute(query)
43
42
  return result.scalar_one_or_none()
44
43
 
45
44
  async def list_embeddings_by_type(
46
45
  self, embedding_type: EmbeddingType
47
46
  ) -> list[Embedding]:
48
47
  """List all embeddings of a given type."""
49
- async with self.uow:
48
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
50
49
  query = select(Embedding).where(Embedding.type == embedding_type)
51
- result = await self.uow.session.execute(query)
50
+ result = await session.execute(query)
52
51
  return list(result.scalars())
53
52
 
54
- async def delete_embeddings_by_snippet_id(self, snippet_id: int) -> None:
53
+ async def delete_embeddings_by_snippet_id(self, snippet_id: str) -> None:
55
54
  """Delete all embeddings for a snippet."""
56
- async with self.uow:
55
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
57
56
  query = select(Embedding).where(Embedding.snippet_id == snippet_id)
58
- result = await self.uow.session.execute(query)
57
+ result = await session.execute(query)
59
58
  embeddings = result.scalars().all()
60
59
  for embedding in embeddings:
61
- await self.uow.session.delete(embedding)
60
+ await session.delete(embedding)
61
+
62
+ async def list_embeddings_by_snippet_ids_and_type(
63
+ self, snippet_ids: list[str], embedding_type: EmbeddingType
64
+ ) -> list[Embedding]:
65
+ """Get all embeddings for the given snippet IDs."""
66
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
67
+ query = select(Embedding).where(
68
+ Embedding.snippet_id.in_(snippet_ids),
69
+ Embedding.type == embedding_type,
70
+ )
71
+ result = await session.execute(query)
72
+ return list(result.scalars())
73
+
74
+ async def get_embeddings_by_snippet_ids(
75
+ self, snippet_ids: list[str]
76
+ ) -> list[Embedding]:
77
+ """Get all embeddings for the given snippet IDs."""
78
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
79
+ query = select(Embedding).where(Embedding.snippet_id.in_(snippet_ids))
80
+ result = await session.execute(query)
81
+ return list(result.scalars())
62
82
 
63
83
  async def list_semantic_results(
64
84
  self,
65
85
  embedding_type: EmbeddingType,
66
86
  embedding: list[float],
67
87
  top_k: int = 10,
68
- snippet_ids: list[int] | None = None,
69
- ) -> list[tuple[int, float]]:
88
+ snippet_ids: list[str] | None = None,
89
+ ) -> list[tuple[str, float]]:
70
90
  """List semantic results using cosine similarity.
71
91
 
72
92
  This implementation fetches all embeddings of the given type and computes
@@ -97,8 +117,8 @@ class SqlAlchemyEmbeddingRepository:
97
117
  return self._get_top_k_results(similarities, embeddings, top_k)
98
118
 
99
119
  async def _list_embedding_values(
100
- self, embedding_type: EmbeddingType, snippet_ids: list[int] | None = None
101
- ) -> list[tuple[int, list[float]]]:
120
+ self, embedding_type: EmbeddingType, snippet_ids: list[str] | None = None
121
+ ) -> list[tuple[str, list[float]]]:
102
122
  """List all embeddings of a given type from the database.
103
123
 
104
124
  Args:
@@ -109,7 +129,7 @@ class SqlAlchemyEmbeddingRepository:
109
129
  List of (snippet_id, embedding) tuples
110
130
 
111
131
  """
112
- async with self.uow:
132
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
113
133
  query = select(Embedding.snippet_id, Embedding.embedding).where(
114
134
  Embedding.type == embedding_type
115
135
  )
@@ -118,11 +138,11 @@ class SqlAlchemyEmbeddingRepository:
118
138
  if snippet_ids is not None:
119
139
  query = query.where(Embedding.snippet_id.in_(snippet_ids))
120
140
 
121
- rows = await self.uow.session.execute(query)
141
+ rows = await session.execute(query)
122
142
  return [tuple(row) for row in rows.all()] # Convert Row objects to tuples
123
143
 
124
144
  def _prepare_vectors(
125
- self, embeddings: list[tuple[int, list[float]]], query_embedding: list[float]
145
+ self, embeddings: list[tuple[str, list[float]]], query_embedding: list[float]
126
146
  ) -> tuple[np.ndarray, np.ndarray]:
127
147
  """Convert embeddings to numpy arrays.
128
148
 
@@ -191,9 +211,9 @@ class SqlAlchemyEmbeddingRepository:
191
211
  def _get_top_k_results(
192
212
  self,
193
213
  similarities: np.ndarray,
194
- embeddings: list[tuple[int, list[float]]],
214
+ embeddings: list[tuple[str, list[float]]],
195
215
  top_k: int,
196
- ) -> list[tuple[int, float]]:
216
+ ) -> list[tuple[str, float]]:
197
217
  """Get top-k results by similarity score.
198
218
 
199
219
  Args:
@@ -0,0 +1,118 @@
1
+ """EnrichmentV2 repository."""
2
+
3
+ from collections.abc import Callable, Sequence
4
+
5
+ import structlog
6
+ from sqlalchemy import delete, select
7
+ from sqlalchemy.ext.asyncio import AsyncSession
8
+
9
+ from kodit.domain.enrichments.enrichment import EnrichmentV2
10
+ from kodit.infrastructure.mappers.enrichment_mapper import EnrichmentMapper
11
+ from kodit.infrastructure.sqlalchemy import entities as db_entities
12
+ from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
13
+
14
+
15
+ class EnrichmentV2Repository:
16
+ """Repository for managing enrichments and their associations."""
17
+
18
+ def __init__(
19
+ self,
20
+ session_factory: Callable[[], AsyncSession],
21
+ ) -> None:
22
+ """Initialize the repository."""
23
+ self.session_factory = session_factory
24
+ self.mapper = EnrichmentMapper()
25
+ self.log = structlog.get_logger(__name__)
26
+
27
+ async def enrichments_for_entity_type(
28
+ self,
29
+ entity_type: str,
30
+ entity_ids: list[str],
31
+ ) -> list[EnrichmentV2]:
32
+ """Get all enrichments for multiple entities of the same type."""
33
+ if not entity_ids:
34
+ return []
35
+
36
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
37
+ stmt = (
38
+ select(
39
+ db_entities.EnrichmentV2,
40
+ db_entities.EnrichmentAssociation.entity_id,
41
+ )
42
+ .join(db_entities.EnrichmentAssociation)
43
+ .where(
44
+ db_entities.EnrichmentAssociation.entity_type == entity_type,
45
+ db_entities.EnrichmentAssociation.entity_id.in_(entity_ids),
46
+ )
47
+ )
48
+
49
+ result = await session.execute(stmt)
50
+ rows = result.all()
51
+
52
+ return [
53
+ self.mapper.to_domain(db_enrichment, entity_type, entity_id)
54
+ for db_enrichment, entity_id in rows
55
+ ]
56
+
57
+ async def bulk_save_enrichments(
58
+ self,
59
+ enrichments: Sequence[EnrichmentV2],
60
+ ) -> None:
61
+ """Bulk save enrichments with their associations."""
62
+ if not enrichments:
63
+ return
64
+
65
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
66
+ enrichment_records = []
67
+ for enrichment in enrichments:
68
+ db_enrichment = db_entities.EnrichmentV2(
69
+ type=enrichment.type,
70
+ subtype=enrichment.subtype,
71
+ content=enrichment.content,
72
+ )
73
+ session.add(db_enrichment)
74
+ enrichment_records.append((enrichment, db_enrichment))
75
+
76
+ await session.flush()
77
+
78
+ for enrichment, db_enrichment in enrichment_records:
79
+ db_association = db_entities.EnrichmentAssociation(
80
+ enrichment_id=db_enrichment.id,
81
+ entity_type=enrichment.entity_type_key(),
82
+ entity_id=enrichment.entity_id,
83
+ )
84
+ session.add(db_association)
85
+
86
+ async def bulk_delete_enrichments(
87
+ self,
88
+ entity_type: str,
89
+ entity_ids: list[str],
90
+ ) -> None:
91
+ """Bulk delete enrichments for multiple entities of the same type."""
92
+ if not entity_ids:
93
+ return
94
+
95
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
96
+ stmt = select(db_entities.EnrichmentAssociation.enrichment_id).where(
97
+ db_entities.EnrichmentAssociation.entity_type == entity_type,
98
+ db_entities.EnrichmentAssociation.entity_id.in_(entity_ids),
99
+ )
100
+ result = await session.execute(stmt)
101
+ enrichment_ids = result.scalars().all()
102
+
103
+ if enrichment_ids:
104
+ await session.execute(
105
+ delete(db_entities.EnrichmentV2).where(
106
+ db_entities.EnrichmentV2.id.in_(enrichment_ids)
107
+ )
108
+ )
109
+
110
+ async def delete_enrichment(self, enrichment_id: int) -> bool:
111
+ """Delete a specific enrichment by ID."""
112
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
113
+ result = await session.execute(
114
+ delete(db_entities.EnrichmentV2).where(
115
+ db_entities.EnrichmentV2.id == enrichment_id
116
+ )
117
+ )
118
+ return result.rowcount > 0