kodit 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (55) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +77 -28
  3. kodit/application/services/code_indexing_application_service.py +142 -116
  4. kodit/cli.py +14 -41
  5. kodit/domain/entities.py +268 -197
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +323 -0
  10. kodit/domain/value_objects.py +150 -60
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/snippet_extraction/factories.py +13 -0
  24. kodit/infrastructure/snippet_extraction/language_detection_service.py +1 -1
  25. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -1
  26. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +1 -1
  27. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  28. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  29. kodit/infrastructure/sqlalchemy/file_repository.py +1 -1
  30. kodit/infrastructure/sqlalchemy/index_repository.py +550 -0
  31. kodit/mcp.py +0 -7
  32. kodit/migrations/env.py +1 -1
  33. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +34 -0
  34. kodit/utils/__init__.py +1 -0
  35. kodit/utils/path_utils.py +54 -0
  36. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/METADATA +1 -1
  37. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/RECORD +40 -44
  38. kodit/domain/enums.py +0 -9
  39. kodit/domain/repositories.py +0 -128
  40. kodit/domain/services/ignore_service.py +0 -45
  41. kodit/domain/services/indexing_service.py +0 -204
  42. kodit/domain/services/snippet_extraction_service.py +0 -89
  43. kodit/domain/services/snippet_service.py +0 -215
  44. kodit/domain/services/source_service.py +0 -85
  45. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  46. kodit/infrastructure/cloning/folder/factory.py +0 -128
  47. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  48. kodit/infrastructure/cloning/git/factory.py +0 -153
  49. kodit/infrastructure/indexing/index_repository.py +0 -286
  50. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  51. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  52. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
  53. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/WHEEL +0 -0
  54. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/entry_points.txt +0 -0
  55. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,259 +0,0 @@
1
- """SQLAlchemy implementation of snippet repository."""
2
-
3
- import builtins
4
- from collections.abc import Sequence
5
- from pathlib import Path
6
- from typing import Any
7
-
8
- from sqlalchemy import delete, or_, select
9
- from sqlalchemy.ext.asyncio import AsyncSession
10
-
11
- from kodit.domain.entities import (
12
- Author,
13
- AuthorFileMapping,
14
- Embedding,
15
- File,
16
- Snippet,
17
- Source,
18
- )
19
- from kodit.domain.repositories import SnippetRepository
20
- from kodit.domain.value_objects import (
21
- LanguageMapping,
22
- MultiSearchRequest,
23
- SnippetWithContext,
24
- )
25
-
26
-
27
- class SqlAlchemySnippetRepository(SnippetRepository):
28
- """SQLAlchemy implementation of snippet repository."""
29
-
30
- def __init__(self, session: AsyncSession) -> None:
31
- """Initialize the SQLAlchemy snippet repository.
32
-
33
- Args:
34
- session: The SQLAlchemy async session to use for database operations
35
-
36
- """
37
- self.session = session
38
-
39
- async def get(self, id: int) -> Snippet | None: # noqa: A002
40
- """Get a snippet by ID."""
41
- return await self.session.get(Snippet, id)
42
-
43
- async def save(self, entity: Snippet) -> Snippet:
44
- """Save entity."""
45
- self.session.add(entity)
46
- return entity
47
-
48
- async def delete(self, id: int) -> None: # noqa: A002
49
- """Delete entity by ID."""
50
- snippet = await self.get(id)
51
- if snippet:
52
- await self.session.delete(snippet)
53
-
54
- async def list(self) -> Sequence[Snippet]:
55
- """List all entities."""
56
- return (await self.session.scalars(select(Snippet))).all()
57
-
58
- async def get_by_id(self, snippet_id: int) -> Snippet | None:
59
- """Get a snippet by ID.
60
-
61
- Args:
62
- snippet_id: The ID of the snippet to retrieve
63
-
64
- Returns:
65
- The Snippet instance if found, None otherwise
66
-
67
- """
68
- query = select(Snippet).where(Snippet.id == snippet_id)
69
- result = await self.session.execute(query)
70
- return result.scalar_one_or_none()
71
-
72
- async def get_by_index(self, index_id: int) -> Sequence[Snippet]:
73
- """Get all snippets for an index.
74
-
75
- Args:
76
- index_id: The ID of the index to get snippets for
77
-
78
- Returns:
79
- A list of Snippet instances
80
-
81
- """
82
- query = select(Snippet).where(Snippet.index_id == index_id)
83
- result = await self.session.execute(query)
84
- return list(result.scalars())
85
-
86
- async def delete_by_index(self, index_id: int) -> None:
87
- """Delete all snippets for an index.
88
-
89
- Args:
90
- index_id: The ID of the index to delete snippets for
91
-
92
- """
93
- # First get all snippets for this index
94
- snippets = await self.get_by_index(index_id)
95
-
96
- # Delete all embeddings for these snippets, if there are any
97
- for snippet in snippets:
98
- query = delete(Embedding).where(Embedding.snippet_id == snippet.id)
99
- await self.session.execute(query)
100
-
101
- # Now delete the snippets
102
- query = delete(Snippet).where(Snippet.index_id == index_id)
103
- await self.session.execute(query)
104
-
105
- async def list_snippets(
106
- self, file_path: str | None = None, source_uri: str | None = None
107
- ) -> Sequence[SnippetWithContext]:
108
- """List snippets with optional filtering by file path and source URI.
109
-
110
- Args:
111
- file_path: Optional file or directory path to filter by. Can be relative
112
- (uri) or absolute (cloned_path).
113
- source_uri: Optional source URI to filter by. If None, returns snippets from
114
- all sources.
115
-
116
- Returns:
117
- A sequence of SnippetWithContext instances matching the criteria
118
-
119
- """
120
- # Build the base query with joins for all required entities
121
- query = self._build_base_query()
122
-
123
- # Apply filters
124
- if file_path is not None:
125
- query = query.where(
126
- or_(
127
- File.cloned_path.like(f"%{file_path}%"),
128
- File.uri.like(f"%{file_path}%"),
129
- )
130
- )
131
-
132
- if source_uri is not None:
133
- query = query.where(Source.uri == source_uri)
134
-
135
- result = await self.session.execute(query)
136
- return self._process_results(result)
137
-
138
- def _get_relative_path(self, file_path: str, source_path: str) -> str:
139
- """Calculate the relative path of a file from the source root.
140
-
141
- Args:
142
- file_path: The full path to the file
143
- source_path: The full path to the source root
144
-
145
- Returns:
146
- The relative path from the source root
147
-
148
- """
149
- try:
150
- file_path_obj = Path(file_path)
151
- source_path_obj = Path(source_path)
152
- return str(file_path_obj.relative_to(source_path_obj))
153
- except ValueError:
154
- # If the file is not relative to the source, return the filename
155
- return Path(file_path).name
156
-
157
- def _apply_filters(self, query: Any, filters: Any) -> Any:
158
- """Apply filters to the query.
159
-
160
- Args:
161
- query: The base query to apply filters to
162
- filters: The filters to apply
163
-
164
- Returns:
165
- The modified query with filters applied
166
-
167
- """
168
- if not filters:
169
- return query
170
-
171
- # Language filter (using file extension)
172
- if filters.language:
173
- extensions = LanguageMapping.get_extensions_with_fallback(filters.language)
174
- query = query.where(File.extension.in_(extensions))
175
-
176
- # Author filter
177
- if filters.author:
178
- query = query.where(Author.name.ilike(f"%{filters.author}%"))
179
-
180
- # Date filters
181
- if filters.created_after:
182
- query = query.where(Snippet.created_at >= filters.created_after)
183
-
184
- if filters.created_before:
185
- query = query.where(Snippet.created_at <= filters.created_before)
186
-
187
- # Source repository filter
188
- if filters.source_repo:
189
- query = query.where(Source.uri.like(f"%{filters.source_repo}%"))
190
-
191
- return query
192
-
193
- def _build_base_query(self) -> Any:
194
- """Build the base query with joins for all required entities.
195
-
196
- Returns:
197
- The base query with joins
198
-
199
- """
200
- return (
201
- select(Snippet, File, Source, Author)
202
- .join(File, Snippet.file_id == File.id)
203
- .join(Source, File.source_id == Source.id)
204
- .outerjoin(AuthorFileMapping, AuthorFileMapping.file_id == File.id)
205
- .outerjoin(Author, AuthorFileMapping.author_id == Author.id)
206
- )
207
-
208
- def _process_results(self, result: Any) -> builtins.list[SnippetWithContext]:
209
- """Process query results into SnippetWithContext objects.
210
-
211
- Args:
212
- result: The query result
213
-
214
- Returns:
215
- List of SnippetWithContext objects
216
-
217
- """
218
- # Group results by snippet ID and collect authors
219
- id_to_result: dict[int, SnippetWithContext] = {}
220
- for snippet, file, source, author in result.all():
221
- if snippet.id not in id_to_result:
222
- id_to_result[snippet.id] = SnippetWithContext(
223
- snippet=snippet,
224
- file=file,
225
- source=source,
226
- authors=[],
227
- )
228
- # Add author if it exists (outer join might return None)
229
- if author is not None:
230
- id_to_result[snippet.id].authors.append(author)
231
-
232
- return list(id_to_result.values())
233
-
234
- async def search(self, request: MultiSearchRequest) -> Sequence[SnippetWithContext]:
235
- """Search snippets with filters.
236
-
237
- Args:
238
- request: The search request containing queries and optional filters.
239
-
240
- Returns:
241
- A sequence of SnippetWithContext instances matching the search criteria.
242
-
243
- """
244
- # Build the base query with joins for all required entities
245
- query = self._build_base_query()
246
-
247
- # Apply filters if provided
248
- query = self._apply_filters(query, request.filters)
249
-
250
- # Only apply top_k limit if there are no search queries
251
- # This ensures that when used for pre-filtering (with search queries),
252
- # all matching snippets are returned for the search services to consider
253
- if request.top_k and not any(
254
- [request.keywords, request.code_query, request.text_query]
255
- ):
256
- query = query.limit(request.top_k)
257
-
258
- result = await self.session.execute(query)
259
- return self._process_results(result)
File without changes