kodit 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (55) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +77 -28
  3. kodit/application/services/code_indexing_application_service.py +142 -116
  4. kodit/cli.py +14 -41
  5. kodit/domain/entities.py +268 -197
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +323 -0
  10. kodit/domain/value_objects.py +150 -60
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/snippet_extraction/factories.py +13 -0
  24. kodit/infrastructure/snippet_extraction/language_detection_service.py +1 -1
  25. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -1
  26. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +1 -1
  27. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  28. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  29. kodit/infrastructure/sqlalchemy/file_repository.py +1 -1
  30. kodit/infrastructure/sqlalchemy/index_repository.py +550 -0
  31. kodit/mcp.py +0 -7
  32. kodit/migrations/env.py +1 -1
  33. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +34 -0
  34. kodit/utils/__init__.py +1 -0
  35. kodit/utils/path_utils.py +54 -0
  36. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/METADATA +1 -1
  37. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/RECORD +40 -44
  38. kodit/domain/enums.py +0 -9
  39. kodit/domain/repositories.py +0 -128
  40. kodit/domain/services/ignore_service.py +0 -45
  41. kodit/domain/services/indexing_service.py +0 -204
  42. kodit/domain/services/snippet_extraction_service.py +0 -89
  43. kodit/domain/services/snippet_service.py +0 -215
  44. kodit/domain/services/source_service.py +0 -85
  45. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  46. kodit/infrastructure/cloning/folder/factory.py +0 -128
  47. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  48. kodit/infrastructure/cloning/git/factory.py +0 -153
  49. kodit/infrastructure/indexing/index_repository.py +0 -286
  50. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  51. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  52. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
  53. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/WHEEL +0 -0
  54. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/entry_points.txt +0 -0
  55. {kodit-0.3.2.dist-info → kodit-0.3.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,286 +0,0 @@
1
- """Infrastructure implementation of the index repository."""
2
-
3
- from datetime import UTC, datetime
4
- from typing import TypeVar
5
-
6
- from sqlalchemy import delete, func, select
7
- from sqlalchemy.ext.asyncio import AsyncSession
8
-
9
- from kodit.domain.entities import (
10
- Author,
11
- AuthorFileMapping,
12
- Embedding,
13
- File,
14
- Index,
15
- Snippet,
16
- Source,
17
- )
18
- from kodit.domain.services.indexing_service import IndexRepository
19
- from kodit.domain.value_objects import (
20
- IndexView,
21
- SnippetWithContext,
22
- )
23
-
24
- T = TypeVar("T")
25
-
26
-
27
- class SQLAlchemyIndexRepository(IndexRepository):
28
- """SQLAlchemy implementation of the index repository."""
29
-
30
- def __init__(self, session: AsyncSession) -> None:
31
- """Initialize the index repository.
32
-
33
- Args:
34
- session: The SQLAlchemy async session to use for database operations.
35
-
36
- """
37
- self.session = session
38
-
39
- async def create_index(self, source_id: int) -> IndexView:
40
- """Create a new index for a source.
41
-
42
- Args:
43
- source_id: The ID of the source to create an index for.
44
-
45
- Returns:
46
- The created index view.
47
-
48
- """
49
- # Check if index already exists
50
- existing_index = await self.get_index_by_source_id(source_id)
51
- if existing_index:
52
- return existing_index
53
-
54
- index = Index(source_id=source_id)
55
- self.session.add(index)
56
-
57
- # Get source for the view
58
- source_query = select(Source).where(Source.id == source_id)
59
- source_result = await self.session.execute(source_query)
60
- source = source_result.scalar_one()
61
-
62
- return IndexView(
63
- id=index.id,
64
- created_at=index.created_at,
65
- updated_at=index.updated_at,
66
- source=source.uri,
67
- num_snippets=0,
68
- )
69
-
70
- async def _get_index_view(self, index: Index, source: Source) -> IndexView:
71
- """Create an IndexView from Index and Source entities.
72
-
73
- Args:
74
- index: The index entity
75
- source: The source entity
76
-
77
- Returns:
78
- The index view
79
-
80
- """
81
- num_snippets = await self.num_snippets_for_index(index.id)
82
- return IndexView(
83
- id=index.id,
84
- created_at=index.created_at,
85
- updated_at=index.updated_at,
86
- source=source.uri,
87
- num_snippets=num_snippets,
88
- )
89
-
90
- async def get_index_by_id(self, index_id: int) -> IndexView | None:
91
- """Get an index by its ID.
92
-
93
- Args:
94
- index_id: The ID of the index to retrieve.
95
-
96
- Returns:
97
- The index view if found, None otherwise.
98
-
99
- """
100
- query = (
101
- select(Index, Source)
102
- .join(Source, Index.source_id == Source.id)
103
- .where(Index.id == index_id)
104
- )
105
- result = await self.session.execute(query)
106
- row = result.first()
107
-
108
- if not row:
109
- return None
110
-
111
- index, source = row
112
- return await self._get_index_view(index, source)
113
-
114
- async def get_index_by_source_id(self, source_id: int) -> IndexView | None:
115
- """Get an index by its source ID.
116
-
117
- Args:
118
- source_id: The ID of the source to retrieve an index for.
119
-
120
- Returns:
121
- The index view if found, None otherwise.
122
-
123
- """
124
- query = (
125
- select(Index, Source)
126
- .join(Source, Index.source_id == Source.id)
127
- .where(Index.source_id == source_id)
128
- )
129
- result = await self.session.execute(query)
130
- row = result.first()
131
-
132
- if not row:
133
- return None
134
-
135
- index, source = row
136
- return await self._get_index_view(index, source)
137
-
138
- async def list_indexes(self) -> list[IndexView]:
139
- """List all indexes.
140
-
141
- Returns:
142
- A list of index views.
143
-
144
- """
145
- query = select(Index, Source).join(
146
- Source, Index.source_id == Source.id, full=True
147
- )
148
- result = await self.session.execute(query)
149
- rows = result.tuples()
150
-
151
- indexes = []
152
- for index, source in rows:
153
- index_view = await self._get_index_view(index, source)
154
- indexes.append(index_view)
155
-
156
- return indexes
157
-
158
- async def update_index_timestamp(self, index_id: int) -> None:
159
- """Update the timestamp of an index.
160
-
161
- Args:
162
- index_id: The ID of the index to update.
163
-
164
- """
165
- query = select(Index).where(Index.id == index_id)
166
- result = await self.session.execute(query)
167
- index = result.scalar_one_or_none()
168
-
169
- if index:
170
- index.updated_at = datetime.now(UTC)
171
-
172
- async def delete_all_snippets(self, index_id: int) -> None:
173
- """Delete all snippets for an index.
174
-
175
- Args:
176
- index_id: The ID of the index to delete snippets for.
177
-
178
- """
179
- # First get all snippets for this index
180
- snippets = await self.get_snippets_for_index(index_id)
181
-
182
- # Delete all embeddings for these snippets, if there are any
183
- for snippet in snippets:
184
- query = delete(Embedding).where(Embedding.snippet_id == snippet.id)
185
- await self.session.execute(query)
186
-
187
- # Now delete the snippets
188
- query = delete(Snippet).where(Snippet.index_id == index_id)
189
- await self.session.execute(query)
190
-
191
- async def get_snippets_for_index(self, index_id: int) -> list[Snippet]:
192
- """Get all snippets for an index.
193
-
194
- Args:
195
- index_id: The ID of the index to get snippets for.
196
-
197
- Returns:
198
- A list of Snippet entities.
199
-
200
- """
201
- query = select(Snippet).where(Snippet.index_id == index_id)
202
- result = await self.session.execute(query)
203
- return list(result.scalars())
204
-
205
- async def add_snippet(self, snippet: dict) -> None:
206
- """Add a snippet to the database.
207
-
208
- Args:
209
- snippet: The snippet to add.
210
-
211
- """
212
- db_snippet = Snippet(
213
- file_id=snippet["file_id"],
214
- index_id=snippet["index_id"],
215
- content=snippet["content"],
216
- summary=snippet.get("summary", ""),
217
- )
218
- self.session.add(db_snippet)
219
-
220
- async def update_snippet_content(self, snippet_id: int, content: str) -> None:
221
- """Update the content of an existing snippet.
222
-
223
- Args:
224
- snippet_id: The ID of the snippet to update.
225
- content: The new content for the snippet.
226
-
227
- """
228
- query = select(Snippet).where(Snippet.id == snippet_id)
229
- result = await self.session.execute(query)
230
- snippet = result.scalar_one_or_none()
231
-
232
- if snippet:
233
- snippet.content = content
234
- # SQLAlchemy will automatically track this change
235
-
236
- async def list_snippets_by_ids(self, ids: list[int]) -> list[SnippetWithContext]:
237
- """List snippets by IDs."""
238
- query = (
239
- select(Snippet, File, Source, Author)
240
- .where(Snippet.id.in_(ids))
241
- .join(File, Snippet.file_id == File.id)
242
- .join(Source, File.source_id == Source.id)
243
- .outerjoin(AuthorFileMapping, AuthorFileMapping.file_id == File.id)
244
- .outerjoin(Author, AuthorFileMapping.author_id == Author.id)
245
- )
246
- rows = await self.session.execute(query)
247
-
248
- # Group results by snippet ID and collect authors
249
- id_to_result: dict[int, SnippetWithContext] = {}
250
- for snippet, file, source, author in rows.all():
251
- if snippet.id not in id_to_result:
252
- id_to_result[snippet.id] = SnippetWithContext(
253
- snippet=snippet,
254
- file=file,
255
- source=source,
256
- authors=[],
257
- )
258
- # Add author if it exists (outer join might return None)
259
- if author is not None:
260
- id_to_result[snippet.id].authors.append(author)
261
-
262
- # Check that all IDs are present
263
- if len(id_to_result) != len(ids):
264
- # Create a list of missing IDs
265
- missing_ids = [
266
- snippet_id for snippet_id in ids if snippet_id not in id_to_result
267
- ]
268
- msg = f"Some IDs are not present: {missing_ids}"
269
- raise ValueError(msg)
270
-
271
- # Rebuild the list in the same order that it was passed in
272
- return [id_to_result[i] for i in ids]
273
-
274
- async def num_snippets_for_index(self, index_id: int) -> int:
275
- """Get the number of snippets for an index.
276
-
277
- Args:
278
- index_id: The ID of the index.
279
-
280
- Returns:
281
- The number of snippets.
282
-
283
- """
284
- query = select(func.count()).where(Snippet.index_id == index_id)
285
- result = await self.session.execute(query)
286
- return result.scalar_one()
@@ -1,37 +0,0 @@
1
- """Factory for creating snippet domain service."""
2
-
3
- from sqlalchemy.ext.asyncio import AsyncSession
4
-
5
- from kodit.domain.services.snippet_service import SnippetDomainService
6
- from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
7
- create_snippet_extraction_domain_service,
8
- )
9
- from kodit.infrastructure.sqlalchemy.file_repository import SqlAlchemyFileRepository
10
- from kodit.infrastructure.sqlalchemy.snippet_repository import (
11
- SqlAlchemySnippetRepository,
12
- )
13
-
14
-
15
- def snippet_domain_service_factory(session: AsyncSession) -> SnippetDomainService:
16
- """Create a snippet domain service with all dependencies.
17
-
18
- Args:
19
- session: The database session
20
-
21
- Returns:
22
- Configured snippet domain service
23
-
24
- """
25
- # Create domain service for snippet extraction
26
- snippet_extraction_service = create_snippet_extraction_domain_service()
27
-
28
- # Create repositories
29
- snippet_repository = SqlAlchemySnippetRepository(session)
30
- file_repository = SqlAlchemyFileRepository(session)
31
-
32
- # Create and return the domain service
33
- return SnippetDomainService(
34
- snippet_extraction_service=snippet_extraction_service,
35
- snippet_repository=snippet_repository,
36
- file_repository=file_repository,
37
- )
@@ -1,133 +0,0 @@
1
- """SQLAlchemy repository."""
2
-
3
- from collections.abc import Sequence
4
- from typing import cast
5
-
6
- from sqlalchemy import select
7
- from sqlalchemy.ext.asyncio import AsyncSession
8
-
9
- from kodit.domain.entities import Author, AuthorFileMapping, File, Source, SourceType
10
- from kodit.domain.repositories import AuthorRepository, SourceRepository
11
-
12
-
13
- class SqlAlchemySourceRepository(SourceRepository):
14
- """SQLAlchemy source repository."""
15
-
16
- def __init__(self, session: AsyncSession) -> None:
17
- """Initialize the repository."""
18
- self._session = session
19
-
20
- async def get(self, id: int) -> Source | None: # noqa: A002
21
- """Get a source by ID."""
22
- return await self._session.get(Source, id)
23
-
24
- async def save(self, entity: Source) -> Source:
25
- """Save entity."""
26
- self._session.add(entity)
27
- return entity
28
-
29
- async def delete(self, id: int) -> None: # noqa: A002
30
- """Delete entity by ID."""
31
- source = await self.get(id)
32
- if source:
33
- await self._session.delete(source)
34
-
35
- async def list(self) -> Sequence[Source]:
36
- """List all entities."""
37
- stmt = select(Source)
38
- return (await self._session.scalars(stmt)).all()
39
-
40
- async def get_by_uri(self, uri: str) -> Source | None:
41
- """Get a source by URI."""
42
- stmt = select(Source).where(Source.uri == uri)
43
- return cast("Source | None", await self._session.scalar(stmt))
44
-
45
- async def list_by_type(
46
- self, source_type: SourceType | None = None
47
- ) -> Sequence[Source]:
48
- """List sources by type."""
49
- stmt = select(Source)
50
- if source_type is not None:
51
- stmt = stmt.where(Source.type == source_type)
52
- return (await self._session.scalars(stmt)).all()
53
-
54
- async def create_file(self, file: File) -> File:
55
- """Create a new file record."""
56
- self._session.add(file)
57
- return file
58
-
59
- async def upsert_author(self, author: Author) -> Author:
60
- """Create a new author or return existing one if email already exists."""
61
- # First check if author already exists with same name and email
62
- stmt = select(Author).where(
63
- Author.name == author.name, Author.email == author.email
64
- )
65
- existing_author = cast("Author | None", await self._session.scalar(stmt))
66
-
67
- if existing_author:
68
- return existing_author
69
-
70
- # Author doesn't exist, create new one
71
- self._session.add(author)
72
- return author
73
-
74
- async def upsert_author_file_mapping(
75
- self, mapping: AuthorFileMapping
76
- ) -> AuthorFileMapping:
77
- """Create a new author file mapping or return existing one if already exists."""
78
- # First check if mapping already exists with same author_id and file_id
79
- stmt = select(AuthorFileMapping).where(
80
- AuthorFileMapping.author_id == mapping.author_id,
81
- AuthorFileMapping.file_id == mapping.file_id,
82
- )
83
- existing_mapping = cast(
84
- "AuthorFileMapping | None", await self._session.scalar(stmt)
85
- )
86
-
87
- if existing_mapping:
88
- return existing_mapping
89
-
90
- # Mapping doesn't exist, create new one
91
- self._session.add(mapping)
92
- return mapping
93
-
94
-
95
- class SqlAlchemyAuthorRepository(AuthorRepository):
96
- """SQLAlchemy author repository."""
97
-
98
- def __init__(self, session: AsyncSession) -> None:
99
- """Initialize the repository."""
100
- self._session = session
101
-
102
- async def get(self, id: int) -> Author | None: # noqa: A002
103
- """Get an author by ID."""
104
- return await self._session.get(Author, id)
105
-
106
- async def save(self, entity: Author) -> Author:
107
- """Save entity."""
108
- self._session.add(entity)
109
- return entity
110
-
111
- async def delete(self, id: int) -> None: # noqa: A002
112
- """Delete entity by ID."""
113
- author = await self.get(id)
114
- if author:
115
- await self._session.delete(author)
116
-
117
- async def list(self) -> Sequence[Author]:
118
- """List authors."""
119
- return (await self._session.scalars(select(Author))).all()
120
-
121
- async def get_by_name(self, name: str) -> Author | None:
122
- """Get an author by name."""
123
- return cast(
124
- "Author | None",
125
- await self._session.scalar(select(Author).where(Author.name == name)),
126
- )
127
-
128
- async def get_by_email(self, email: str) -> Author | None:
129
- """Get an author by email."""
130
- return cast(
131
- "Author | None",
132
- await self._session.scalar(select(Author).where(Author.email == email)),
133
- )