kodit 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (57) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +77 -28
  3. kodit/application/services/code_indexing_application_service.py +148 -119
  4. kodit/cli.py +49 -52
  5. kodit/domain/entities.py +268 -189
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +323 -0
  10. kodit/domain/value_objects.py +225 -92
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/snippet_extraction/factories.py +13 -0
  24. kodit/infrastructure/snippet_extraction/language_detection_service.py +1 -1
  25. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -1
  26. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +1 -1
  27. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  28. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  29. kodit/infrastructure/sqlalchemy/file_repository.py +1 -1
  30. kodit/infrastructure/sqlalchemy/index_repository.py +550 -0
  31. kodit/log.py +4 -1
  32. kodit/mcp.py +1 -13
  33. kodit/migrations/env.py +1 -1
  34. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +34 -0
  35. kodit/migrations/versions/4552eb3f23ce_add_summary.py +34 -0
  36. kodit/utils/__init__.py +1 -0
  37. kodit/utils/path_utils.py +54 -0
  38. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/METADATA +1 -1
  39. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/RECORD +42 -45
  40. kodit/domain/enums.py +0 -9
  41. kodit/domain/repositories.py +0 -128
  42. kodit/domain/services/ignore_service.py +0 -45
  43. kodit/domain/services/indexing_service.py +0 -204
  44. kodit/domain/services/snippet_extraction_service.py +0 -89
  45. kodit/domain/services/snippet_service.py +0 -211
  46. kodit/domain/services/source_service.py +0 -85
  47. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  48. kodit/infrastructure/cloning/folder/factory.py +0 -128
  49. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  50. kodit/infrastructure/cloning/git/factory.py +0 -153
  51. kodit/infrastructure/indexing/index_repository.py +0 -273
  52. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  53. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  54. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -251
  55. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/WHEEL +0 -0
  56. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/entry_points.txt +0 -0
  57. {kodit-0.3.1.dist-info → kodit-0.3.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,251 +0,0 @@
1
- """SQLAlchemy implementation of snippet repository."""
2
-
3
- from collections.abc import Sequence
4
- from pathlib import Path
5
-
6
- from sqlalchemy import delete, or_, select
7
- from sqlalchemy.ext.asyncio import AsyncSession
8
-
9
- from kodit.domain.entities import (
10
- Author,
11
- AuthorFileMapping,
12
- Embedding,
13
- File,
14
- Snippet,
15
- Source,
16
- )
17
- from kodit.domain.repositories import SnippetRepository
18
- from kodit.domain.value_objects import (
19
- LanguageMapping,
20
- MultiSearchRequest,
21
- SnippetListItem,
22
- )
23
-
24
-
25
- class SqlAlchemySnippetRepository(SnippetRepository):
26
- """SQLAlchemy implementation of snippet repository."""
27
-
28
- def __init__(self, session: AsyncSession) -> None:
29
- """Initialize the SQLAlchemy snippet repository.
30
-
31
- Args:
32
- session: The SQLAlchemy async session to use for database operations
33
-
34
- """
35
- self.session = session
36
-
37
- async def get(self, id: int) -> Snippet | None: # noqa: A002
38
- """Get a snippet by ID."""
39
- return await self.session.get(Snippet, id)
40
-
41
- async def save(self, entity: Snippet) -> Snippet:
42
- """Save entity."""
43
- self.session.add(entity)
44
- return entity
45
-
46
- async def delete(self, id: int) -> None: # noqa: A002
47
- """Delete entity by ID."""
48
- snippet = await self.get(id)
49
- if snippet:
50
- await self.session.delete(snippet)
51
-
52
- async def list(self) -> Sequence[Snippet]:
53
- """List all entities."""
54
- return (await self.session.scalars(select(Snippet))).all()
55
-
56
- async def get_by_id(self, snippet_id: int) -> Snippet | None:
57
- """Get a snippet by ID.
58
-
59
- Args:
60
- snippet_id: The ID of the snippet to retrieve
61
-
62
- Returns:
63
- The Snippet instance if found, None otherwise
64
-
65
- """
66
- query = select(Snippet).where(Snippet.id == snippet_id)
67
- result = await self.session.execute(query)
68
- return result.scalar_one_or_none()
69
-
70
- async def get_by_index(self, index_id: int) -> Sequence[Snippet]:
71
- """Get all snippets for an index.
72
-
73
- Args:
74
- index_id: The ID of the index to get snippets for
75
-
76
- Returns:
77
- A list of Snippet instances
78
-
79
- """
80
- query = select(Snippet).where(Snippet.index_id == index_id)
81
- result = await self.session.execute(query)
82
- return list(result.scalars())
83
-
84
- async def delete_by_index(self, index_id: int) -> None:
85
- """Delete all snippets for an index.
86
-
87
- Args:
88
- index_id: The ID of the index to delete snippets for
89
-
90
- """
91
- # First get all snippets for this index
92
- snippets = await self.get_by_index(index_id)
93
-
94
- # Delete all embeddings for these snippets, if there are any
95
- for snippet in snippets:
96
- query = delete(Embedding).where(Embedding.snippet_id == snippet.id)
97
- await self.session.execute(query)
98
-
99
- # Now delete the snippets
100
- query = delete(Snippet).where(Snippet.index_id == index_id)
101
- await self.session.execute(query)
102
-
103
- async def list_snippets(
104
- self, file_path: str | None = None, source_uri: str | None = None
105
- ) -> Sequence[SnippetListItem]:
106
- """List snippets with optional filtering by file path and source URI.
107
-
108
- Args:
109
- file_path: Optional file or directory path to filter by. Can be relative
110
- (uri) or absolute (cloned_path).
111
- source_uri: Optional source URI to filter by. If None, returns snippets from
112
- all sources.
113
-
114
- Returns:
115
- A sequence of SnippetListItem instances matching the criteria
116
-
117
- """
118
- # Build the base query
119
- query = (
120
- select(
121
- Snippet,
122
- File.cloned_path,
123
- Source.cloned_path.label("source_cloned_path"),
124
- Source.uri.label("source_uri"),
125
- )
126
- .join(File, Snippet.file_id == File.id)
127
- .join(Source, File.source_id == Source.id)
128
- )
129
-
130
- # Apply filters
131
- if file_path is not None:
132
- query = query.where(
133
- or_(
134
- File.cloned_path.like(f"%{file_path}%"),
135
- File.uri.like(f"%{file_path}%"),
136
- )
137
- )
138
-
139
- if source_uri is not None:
140
- query = query.where(Source.uri == source_uri)
141
-
142
- result = await self.session.execute(query)
143
- return [
144
- SnippetListItem(
145
- id=snippet.id,
146
- file_path=self._get_relative_path(file_cloned_path, source_cloned_path),
147
- content=snippet.content,
148
- source_uri=source_uri_val,
149
- )
150
- for (
151
- snippet,
152
- file_cloned_path,
153
- source_cloned_path,
154
- source_uri_val,
155
- ) in result.all()
156
- ]
157
-
158
- def _get_relative_path(self, file_path: str, source_path: str) -> str:
159
- """Calculate the relative path of a file from the source root.
160
-
161
- Args:
162
- file_path: The full path to the file
163
- source_path: The full path to the source root
164
-
165
- Returns:
166
- The relative path from the source root
167
-
168
- """
169
- try:
170
- file_path_obj = Path(file_path)
171
- source_path_obj = Path(source_path)
172
- return str(file_path_obj.relative_to(source_path_obj))
173
- except ValueError:
174
- # If the file is not relative to the source, return the filename
175
- return Path(file_path).name
176
-
177
- async def search(self, request: MultiSearchRequest) -> Sequence[SnippetListItem]:
178
- """Search snippets with filters.
179
-
180
- Args:
181
- request: The search request containing queries and optional filters.
182
-
183
- Returns:
184
- A sequence of SnippetListItem instances matching the search criteria.
185
-
186
- """
187
- # Build the base query with joins
188
- query = (
189
- select(
190
- Snippet,
191
- File.cloned_path,
192
- Source.cloned_path.label("source_cloned_path"),
193
- Source.uri.label("source_uri"),
194
- )
195
- .join(File, Snippet.file_id == File.id)
196
- .join(Source, File.source_id == Source.id)
197
- )
198
-
199
- # Apply filters if provided
200
- if request.filters:
201
- filters = request.filters
202
-
203
- # Language filter (using file extension)
204
- if filters.language:
205
- extensions = LanguageMapping.get_extensions_with_fallback(
206
- filters.language
207
- )
208
- query = query.where(File.extension.in_(extensions))
209
-
210
- # Author filter
211
- if filters.author:
212
- query = (
213
- query.join(AuthorFileMapping, File.id == AuthorFileMapping.file_id)
214
- .join(Author, AuthorFileMapping.author_id == Author.id)
215
- .where(Author.name.ilike(f"%{filters.author}%"))
216
- )
217
-
218
- # Date filters
219
- if filters.created_after:
220
- query = query.where(Snippet.created_at >= filters.created_after)
221
-
222
- if filters.created_before:
223
- query = query.where(Snippet.created_at <= filters.created_before)
224
-
225
- # Source repository filter
226
- if filters.source_repo:
227
- query = query.where(Source.uri.like(f"%{filters.source_repo}%"))
228
-
229
- # Only apply top_k limit if there are no search queries
230
- # This ensures that when used for pre-filtering (with search queries),
231
- # all matching snippets are returned for the search services to consider
232
- if request.top_k and not any(
233
- [request.keywords, request.code_query, request.text_query]
234
- ):
235
- query = query.limit(request.top_k)
236
-
237
- result = await self.session.execute(query)
238
- return [
239
- SnippetListItem(
240
- id=snippet.id,
241
- file_path=self._get_relative_path(file_cloned_path, source_cloned_path),
242
- content=snippet.content,
243
- source_uri=source_uri_val,
244
- )
245
- for (
246
- snippet,
247
- file_cloned_path,
248
- source_cloned_path,
249
- source_uri_val,
250
- ) in result.all()
251
- ]
File without changes