kodit 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (100) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +59 -24
  3. kodit/application/factories/reporting_factory.py +16 -7
  4. kodit/application/factories/server_factory.py +311 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +543 -0
  7. kodit/application/services/indexing_worker_service.py +13 -46
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +70 -54
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -763
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +3 -96
  14. kodit/database.py +38 -1
  15. kodit/domain/entities/__init__.py +276 -0
  16. kodit/domain/entities/git.py +190 -0
  17. kodit/domain/factories/__init__.py +1 -0
  18. kodit/domain/factories/git_repo_factory.py +76 -0
  19. kodit/domain/protocols.py +270 -46
  20. kodit/domain/services/bm25_service.py +5 -1
  21. kodit/domain/services/embedding_service.py +3 -0
  22. kodit/domain/services/git_repository_service.py +429 -0
  23. kodit/domain/services/git_service.py +300 -0
  24. kodit/domain/services/task_status_query_service.py +19 -0
  25. kodit/domain/value_objects.py +113 -147
  26. kodit/infrastructure/api/client/__init__.py +0 -2
  27. kodit/infrastructure/api/v1/__init__.py +0 -4
  28. kodit/infrastructure/api/v1/dependencies.py +105 -44
  29. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  30. kodit/infrastructure/api/v1/routers/commits.py +271 -0
  31. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  32. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  33. kodit/infrastructure/api/v1/routers/search.py +31 -14
  34. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  35. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  36. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  37. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  38. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  39. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  40. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  41. kodit/infrastructure/api/v1/schemas/task_status.py +41 -0
  42. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  43. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  44. kodit/infrastructure/cloning/git/git_python_adaptor.py +467 -0
  45. kodit/infrastructure/cloning/git/working_copy.py +10 -3
  46. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  47. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  48. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  49. kodit/infrastructure/enrichment/litellm_enrichment_provider.py +19 -26
  50. kodit/infrastructure/enrichment/local_enrichment_provider.py +41 -30
  51. kodit/infrastructure/indexing/fusion_service.py +1 -1
  52. kodit/infrastructure/mappers/git_mapper.py +193 -0
  53. kodit/infrastructure/mappers/snippet_mapper.py +106 -0
  54. kodit/infrastructure/mappers/task_mapper.py +5 -44
  55. kodit/infrastructure/mappers/task_status_mapper.py +85 -0
  56. kodit/infrastructure/reporting/db_progress.py +23 -0
  57. kodit/infrastructure/reporting/log_progress.py +13 -38
  58. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  59. kodit/infrastructure/slicing/slicer.py +32 -31
  60. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  61. kodit/infrastructure/sqlalchemy/entities.py +428 -131
  62. kodit/infrastructure/sqlalchemy/git_branch_repository.py +263 -0
  63. kodit/infrastructure/sqlalchemy/git_commit_repository.py +337 -0
  64. kodit/infrastructure/sqlalchemy/git_repository.py +252 -0
  65. kodit/infrastructure/sqlalchemy/git_tag_repository.py +257 -0
  66. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +484 -0
  67. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  68. kodit/infrastructure/sqlalchemy/task_status_repository.py +91 -0
  69. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  70. kodit/mcp.py +12 -26
  71. kodit/migrations/env.py +1 -1
  72. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  73. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  74. kodit/migrations/versions/b9cd1c3fd762_add_task_status.py +77 -0
  75. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  76. kodit/py.typed +0 -0
  77. kodit/utils/dump_openapi.py +7 -4
  78. kodit/utils/path_utils.py +29 -0
  79. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/METADATA +3 -3
  80. kodit-0.5.0.dist-info/RECORD +137 -0
  81. kodit/application/factories/code_indexing_factory.py +0 -193
  82. kodit/application/services/auto_indexing_service.py +0 -103
  83. kodit/application/services/code_indexing_application_service.py +0 -393
  84. kodit/domain/entities.py +0 -323
  85. kodit/domain/services/index_query_service.py +0 -70
  86. kodit/domain/services/index_service.py +0 -267
  87. kodit/infrastructure/api/client/index_client.py +0 -57
  88. kodit/infrastructure/api/v1/routers/indexes.py +0 -119
  89. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  90. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  91. kodit/infrastructure/cloning/__init__.py +0 -1
  92. kodit/infrastructure/cloning/metadata.py +0 -98
  93. kodit/infrastructure/mappers/index_mapper.py +0 -345
  94. kodit/infrastructure/reporting/tdqm_progress.py +0 -73
  95. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  96. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  97. kodit-0.4.2.dist-info/RECORD +0 -119
  98. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/WHEEL +0 -0
  99. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/entry_points.txt +0 -0
  100. {kodit-0.4.2.dist-info → kodit-0.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -14,59 +14,79 @@ def create_embedding_repository(
14
14
  session_factory: Callable[[], AsyncSession],
15
15
  ) -> "SqlAlchemyEmbeddingRepository":
16
16
  """Create an embedding repository."""
17
- uow = SqlAlchemyUnitOfWork(session_factory=session_factory)
18
- return SqlAlchemyEmbeddingRepository(uow)
17
+ return SqlAlchemyEmbeddingRepository(session_factory=session_factory)
19
18
 
20
19
 
21
20
  class SqlAlchemyEmbeddingRepository:
22
21
  """SQLAlchemy implementation of embedding repository."""
23
22
 
24
- def __init__(self, uow: SqlAlchemyUnitOfWork) -> None:
23
+ def __init__(self, session_factory: Callable[[], AsyncSession]) -> None:
25
24
  """Initialize the SQLAlchemy embedding repository."""
26
- self.uow = uow
25
+ self.session_factory = session_factory
27
26
 
28
27
  async def create_embedding(self, embedding: Embedding) -> None:
29
28
  """Create a new embedding record in the database."""
30
- async with self.uow:
31
- self.uow.session.add(embedding)
29
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
30
+ session.add(embedding)
32
31
 
33
32
  async def get_embedding_by_snippet_id_and_type(
34
33
  self, snippet_id: int, embedding_type: EmbeddingType
35
34
  ) -> Embedding | None:
36
35
  """Get an embedding by its snippet ID and type."""
37
- async with self.uow:
36
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
38
37
  query = select(Embedding).where(
39
38
  Embedding.snippet_id == snippet_id,
40
39
  Embedding.type == embedding_type,
41
40
  )
42
- result = await self.uow.session.execute(query)
41
+ result = await session.execute(query)
43
42
  return result.scalar_one_or_none()
44
43
 
45
44
  async def list_embeddings_by_type(
46
45
  self, embedding_type: EmbeddingType
47
46
  ) -> list[Embedding]:
48
47
  """List all embeddings of a given type."""
49
- async with self.uow:
48
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
50
49
  query = select(Embedding).where(Embedding.type == embedding_type)
51
- result = await self.uow.session.execute(query)
50
+ result = await session.execute(query)
52
51
  return list(result.scalars())
53
52
 
54
- async def delete_embeddings_by_snippet_id(self, snippet_id: int) -> None:
53
+ async def delete_embeddings_by_snippet_id(self, snippet_id: str) -> None:
55
54
  """Delete all embeddings for a snippet."""
56
- async with self.uow:
55
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
57
56
  query = select(Embedding).where(Embedding.snippet_id == snippet_id)
58
- result = await self.uow.session.execute(query)
57
+ result = await session.execute(query)
59
58
  embeddings = result.scalars().all()
60
59
  for embedding in embeddings:
61
- await self.uow.session.delete(embedding)
60
+ await session.delete(embedding)
61
+
62
+ async def list_embeddings_by_snippet_ids_and_type(
63
+ self, snippet_ids: list[str], embedding_type: EmbeddingType
64
+ ) -> list[Embedding]:
65
+ """Get all embeddings for the given snippet IDs."""
66
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
67
+ query = select(Embedding).where(
68
+ Embedding.snippet_id.in_(snippet_ids),
69
+ Embedding.type == embedding_type,
70
+ )
71
+ result = await session.execute(query)
72
+ return list(result.scalars())
73
+
74
+ async def get_embeddings_by_snippet_ids(
75
+ self, snippet_ids: list[str]
76
+ ) -> list[Embedding]:
77
+ """Get all embeddings for the given snippet IDs."""
78
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
79
+ query = select(Embedding).where(Embedding.snippet_id.in_(snippet_ids))
80
+ result = await session.execute(query)
81
+ return list(result.scalars())
62
82
 
63
83
  async def list_semantic_results(
64
84
  self,
65
85
  embedding_type: EmbeddingType,
66
86
  embedding: list[float],
67
87
  top_k: int = 10,
68
- snippet_ids: list[int] | None = None,
69
- ) -> list[tuple[int, float]]:
88
+ snippet_ids: list[str] | None = None,
89
+ ) -> list[tuple[str, float]]:
70
90
  """List semantic results using cosine similarity.
71
91
 
72
92
  This implementation fetches all embeddings of the given type and computes
@@ -97,8 +117,8 @@ class SqlAlchemyEmbeddingRepository:
97
117
  return self._get_top_k_results(similarities, embeddings, top_k)
98
118
 
99
119
  async def _list_embedding_values(
100
- self, embedding_type: EmbeddingType, snippet_ids: list[int] | None = None
101
- ) -> list[tuple[int, list[float]]]:
120
+ self, embedding_type: EmbeddingType, snippet_ids: list[str] | None = None
121
+ ) -> list[tuple[str, list[float]]]:
102
122
  """List all embeddings of a given type from the database.
103
123
 
104
124
  Args:
@@ -109,7 +129,7 @@ class SqlAlchemyEmbeddingRepository:
109
129
  List of (snippet_id, embedding) tuples
110
130
 
111
131
  """
112
- async with self.uow:
132
+ async with SqlAlchemyUnitOfWork(self.session_factory) as session:
113
133
  query = select(Embedding.snippet_id, Embedding.embedding).where(
114
134
  Embedding.type == embedding_type
115
135
  )
@@ -118,11 +138,11 @@ class SqlAlchemyEmbeddingRepository:
118
138
  if snippet_ids is not None:
119
139
  query = query.where(Embedding.snippet_id.in_(snippet_ids))
120
140
 
121
- rows = await self.uow.session.execute(query)
141
+ rows = await session.execute(query)
122
142
  return [tuple(row) for row in rows.all()] # Convert Row objects to tuples
123
143
 
124
144
  def _prepare_vectors(
125
- self, embeddings: list[tuple[int, list[float]]], query_embedding: list[float]
145
+ self, embeddings: list[tuple[str, list[float]]], query_embedding: list[float]
126
146
  ) -> tuple[np.ndarray, np.ndarray]:
127
147
  """Convert embeddings to numpy arrays.
128
148
 
@@ -191,9 +211,9 @@ class SqlAlchemyEmbeddingRepository:
191
211
  def _get_top_k_results(
192
212
  self,
193
213
  similarities: np.ndarray,
194
- embeddings: list[tuple[int, list[float]]],
214
+ embeddings: list[tuple[str, list[float]]],
195
215
  top_k: int,
196
- ) -> list[tuple[int, float]]:
216
+ ) -> list[tuple[str, float]]:
197
217
  """Get top-k results by similarity score.
198
218
 
199
219
  Args: