kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (135) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +51 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +353 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +700 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +4 -97
  14. kodit/database.py +38 -1
  15. kodit/domain/enrichments/__init__.py +1 -0
  16. kodit/domain/enrichments/architecture/__init__.py +1 -0
  17. kodit/domain/enrichments/architecture/architecture.py +20 -0
  18. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  19. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  20. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  21. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  22. kodit/domain/enrichments/development/__init__.py +1 -0
  23. kodit/domain/enrichments/development/development.py +18 -0
  24. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  25. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  26. kodit/domain/enrichments/enricher.py +17 -0
  27. kodit/domain/enrichments/enrichment.py +39 -0
  28. kodit/domain/enrichments/request.py +12 -0
  29. kodit/domain/enrichments/response.py +11 -0
  30. kodit/domain/enrichments/usage/__init__.py +1 -0
  31. kodit/domain/enrichments/usage/api_docs.py +19 -0
  32. kodit/domain/enrichments/usage/usage.py +18 -0
  33. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  34. kodit/domain/entities/git.py +190 -0
  35. kodit/domain/factories/__init__.py +1 -0
  36. kodit/domain/factories/git_repo_factory.py +76 -0
  37. kodit/domain/protocols.py +264 -64
  38. kodit/domain/services/bm25_service.py +5 -1
  39. kodit/domain/services/embedding_service.py +3 -0
  40. kodit/domain/services/enrichment_service.py +9 -30
  41. kodit/domain/services/git_repository_service.py +429 -0
  42. kodit/domain/services/git_service.py +300 -0
  43. kodit/domain/services/physical_architecture_service.py +182 -0
  44. kodit/domain/services/task_status_query_service.py +2 -2
  45. kodit/domain/value_objects.py +87 -135
  46. kodit/infrastructure/api/client/__init__.py +0 -2
  47. kodit/infrastructure/api/v1/__init__.py +0 -4
  48. kodit/infrastructure/api/v1/dependencies.py +92 -46
  49. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  50. kodit/infrastructure/api/v1/routers/commits.py +352 -0
  51. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  52. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  53. kodit/infrastructure/api/v1/routers/search.py +31 -14
  54. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  55. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  56. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  57. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  58. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  59. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  60. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  61. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  62. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  63. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  64. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  65. kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
  66. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  67. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  68. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  69. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  70. kodit/infrastructure/enricher/__init__.py +1 -0
  71. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  72. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
  73. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  74. kodit/infrastructure/enricher/null_enricher.py +36 -0
  75. kodit/infrastructure/indexing/fusion_service.py +1 -1
  76. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  77. kodit/infrastructure/mappers/git_mapper.py +193 -0
  78. kodit/infrastructure/mappers/snippet_mapper.py +104 -0
  79. kodit/infrastructure/mappers/task_mapper.py +5 -44
  80. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  81. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  82. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  83. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  84. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  85. kodit/infrastructure/reporting/log_progress.py +8 -5
  86. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  87. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  88. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  89. kodit/infrastructure/slicing/slicer.py +87 -421
  90. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  91. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  92. kodit/infrastructure/sqlalchemy/entities.py +402 -158
  93. kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
  94. kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
  95. kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
  96. kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
  97. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
  98. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  99. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  100. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  101. kodit/mcp.py +12 -30
  102. kodit/migrations/env.py +1 -0
  103. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  104. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  105. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  106. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  107. kodit/py.typed +0 -0
  108. kodit/utils/dump_config.py +361 -0
  109. kodit/utils/dump_openapi.py +6 -4
  110. kodit/utils/path_utils.py +29 -0
  111. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
  112. kodit-0.5.1.dist-info/RECORD +168 -0
  113. kodit/application/factories/code_indexing_factory.py +0 -195
  114. kodit/application/services/auto_indexing_service.py +0 -99
  115. kodit/application/services/code_indexing_application_service.py +0 -410
  116. kodit/domain/services/index_query_service.py +0 -70
  117. kodit/domain/services/index_service.py +0 -269
  118. kodit/infrastructure/api/client/index_client.py +0 -57
  119. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  120. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  121. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  122. kodit/infrastructure/cloning/__init__.py +0 -1
  123. kodit/infrastructure/cloning/metadata.py +0 -98
  124. kodit/infrastructure/enrichment/__init__.py +0 -1
  125. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  126. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  127. kodit/infrastructure/mappers/index_mapper.py +0 -345
  128. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  129. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  130. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  131. kodit-0.4.3.dist-info/RECORD +0 -125
  132. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  133. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  134. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  135. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,9 @@
1
1
  """VectorChord BM25 repository implementation."""
2
2
 
3
- from typing import Any
3
+ from collections.abc import Callable
4
4
 
5
5
  import structlog
6
- from sqlalchemy import Result, TextClause, bindparam, text
6
+ from sqlalchemy import bindparam, text
7
7
  from sqlalchemy.ext.asyncio import AsyncSession
8
8
 
9
9
  from kodit.domain.services.bm25_service import BM25Repository
@@ -13,6 +13,7 @@ from kodit.domain.value_objects import (
13
13
  SearchRequest,
14
14
  SearchResult,
15
15
  )
16
+ from kodit.infrastructure.sqlalchemy.unit_of_work import SqlAlchemyUnitOfWork
16
17
 
17
18
  TABLE_NAME = "vectorchord_bm25_documents"
18
19
  INDEX_NAME = f"{TABLE_NAME}_idx"
@@ -29,13 +30,17 @@ SET search_path TO
29
30
  CREATE_BM25_TABLE = f"""
30
31
  CREATE TABLE IF NOT EXISTS {TABLE_NAME} (
31
32
  id SERIAL PRIMARY KEY,
32
- snippet_id BIGINT NOT NULL,
33
+ snippet_id VARCHAR(255) NOT NULL,
33
34
  passage TEXT NOT NULL,
34
35
  embedding bm25vector,
35
36
  UNIQUE(snippet_id)
36
37
  )
37
38
  """
38
-
39
+ CHECK_EXISTING_IDS = f"""
40
+ SELECT snippet_id
41
+ FROM {TABLE_NAME}
42
+ WHERE snippet_id = ANY(:snippet_ids)
43
+ """ # noqa: S608
39
44
  CREATE_BM25_INDEX = f"""
40
45
  CREATE INDEX IF NOT EXISTS {INDEX_NAME}
41
46
  ON {TABLE_NAME}
@@ -103,14 +108,14 @@ WHERE snippet_id IN :snippet_ids
103
108
  class VectorChordBM25Repository(BM25Repository):
104
109
  """VectorChord BM25 repository implementation."""
105
110
 
106
- def __init__(self, session: AsyncSession) -> None:
111
+ def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
107
112
  """Initialize the VectorChord BM25 repository.
108
113
 
109
114
  Args:
110
115
  session: The SQLAlchemy async session to use for database operations
111
116
 
112
117
  """
113
- self.__session = session
118
+ self.session_factory = session_factory
114
119
  self._initialized = False
115
120
  self.log = structlog.get_logger(__name__)
116
121
 
@@ -127,41 +132,39 @@ class VectorChordBM25Repository(BM25Repository):
127
132
 
128
133
  async def _create_extensions(self) -> None:
129
134
  """Create the necessary extensions."""
130
- await self.__session.execute(text(CREATE_VCHORD_EXTENSION))
131
- await self.__session.execute(text(CREATE_PG_TOKENIZER))
132
- await self.__session.execute(text(CREATE_VCHORD_BM25))
133
- await self.__session.execute(text(SET_SEARCH_PATH))
134
- await self._commit()
135
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
136
+ await session.execute(text(CREATE_VCHORD_EXTENSION))
137
+ await session.execute(text(CREATE_PG_TOKENIZER))
138
+ await session.execute(text(CREATE_VCHORD_BM25))
139
+ await session.execute(text(SET_SEARCH_PATH))
135
140
 
136
141
  async def _create_tokenizer_if_not_exists(self) -> None:
137
142
  """Create the tokenizer if it doesn't exist."""
138
- # Check if tokenizer exists in the catalog
139
- result = await self.__session.execute(text(TOKENIZER_NAME_CHECK_QUERY))
140
- if result.scalar_one_or_none() is None:
141
- # Tokenizer doesn't exist, create it
142
- await self.__session.execute(text(LOAD_TOKENIZER))
143
- await self._commit()
143
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
144
+ # Check if tokenizer exists in the catalog
145
+ result = await session.execute(text(TOKENIZER_NAME_CHECK_QUERY))
146
+ if result.scalar_one_or_none() is None:
147
+ # Tokenizer doesn't exist, create it
148
+ await session.execute(text(LOAD_TOKENIZER))
144
149
 
145
150
  async def _create_tables(self) -> None:
146
151
  """Create the necessary tables in the correct order."""
147
- await self.__session.execute(text(CREATE_BM25_TABLE))
148
- await self.__session.execute(text(CREATE_BM25_INDEX))
149
- await self._commit()
150
-
151
- async def _execute(
152
- self, query: TextClause, param_list: list[Any] | dict[str, Any] | None = None
153
- ) -> Result:
154
- """Execute a query."""
155
- if not self._initialized:
156
- await self._initialize()
157
- return await self.__session.execute(query, param_list)
158
-
159
- async def _commit(self) -> None:
160
- """Commit the session."""
161
- await self.__session.commit()
152
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
153
+ await session.execute(text(CREATE_BM25_TABLE))
154
+ await session.execute(text(CREATE_BM25_INDEX))
155
+
156
+ async def _get_existing_ids(self, snippet_ids: list[str]) -> set[int]:
157
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
158
+ result = await session.execute(
159
+ text(CHECK_EXISTING_IDS), {"snippet_ids": snippet_ids}
160
+ )
161
+ return {row[0] for row in result.fetchall()}
162
162
 
163
163
  async def index_documents(self, request: IndexRequest) -> None:
164
164
  """Index documents for BM25 search."""
165
+ if not self._initialized:
166
+ await self._initialize()
167
+
165
168
  # Filter out any documents that don't have a snippet_id or text
166
169
  valid_documents = [
167
170
  doc
@@ -173,21 +176,35 @@ class VectorChordBM25Repository(BM25Repository):
173
176
  self.log.warning("Corpus is empty, skipping bm25 index")
174
177
  return
175
178
 
176
- # Execute inserts
177
- await self._execute(
178
- text(INSERT_QUERY),
179
- [
180
- {"snippet_id": doc.snippet_id, "passage": doc.text}
181
- for doc in valid_documents
182
- ],
179
+ # Filter out documents that have already been indexed
180
+ existing_ids = await self._get_existing_ids(
181
+ [doc.snippet_id for doc in valid_documents]
183
182
  )
183
+ valid_documents = [
184
+ doc for doc in valid_documents if doc.snippet_id not in existing_ids
185
+ ]
184
186
 
185
- # Tokenize the new documents with schema qualification
186
- await self._execute(text(UPDATE_QUERY))
187
- await self._commit()
187
+ if not valid_documents:
188
+ self.log.info("No new documents to index")
189
+ return
190
+
191
+ # Execute inserts
192
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
193
+ await session.execute(
194
+ text(INSERT_QUERY),
195
+ [
196
+ {"snippet_id": doc.snippet_id, "passage": doc.text}
197
+ for doc in valid_documents
198
+ ],
199
+ )
200
+
201
+ # Tokenize the new documents with schema qualification
202
+ await session.execute(text(UPDATE_QUERY))
188
203
 
189
204
  async def search(self, request: SearchRequest) -> list[SearchResult]:
190
205
  """Search documents using BM25."""
206
+ if not self._initialized:
207
+ await self._initialize()
191
208
  if not request.query or request.query.strip() == "":
192
209
  return []
193
210
 
@@ -203,22 +220,21 @@ class VectorChordBM25Repository(BM25Repository):
203
220
  limit=request.top_k,
204
221
  )
205
222
 
206
- try:
207
- result = await self._execute(sql)
223
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
224
+ result = await session.execute(sql)
208
225
  rows = result.mappings().all()
209
226
 
210
227
  return [
211
228
  SearchResult(snippet_id=row["snippet_id"], score=row["bm25_score"])
212
229
  for row in rows
213
230
  ]
214
- except Exception as e:
215
- msg = f"Error during BM25 search: {e}"
216
- raise RuntimeError(msg) from e
217
231
 
218
232
  async def delete_documents(self, request: DeleteRequest) -> None:
219
233
  """Delete documents from the index."""
220
- await self._execute(
221
- text(DELETE_QUERY).bindparams(bindparam("snippet_ids", expanding=True)),
222
- {"snippet_ids": request.snippet_ids},
223
- )
224
- await self._commit()
234
+ if not self._initialized:
235
+ await self._initialize()
236
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
237
+ await session.execute(
238
+ text(DELETE_QUERY).bindparams(bindparam("snippet_ids", expanding=True)),
239
+ {"snippet_ids": request.snippet_ids},
240
+ )