kodit 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/code_indexing_factory.py +56 -29
- kodit/application/services/code_indexing_application_service.py +152 -118
- kodit/cli.py +14 -41
- kodit/domain/entities.py +268 -197
- kodit/domain/protocols.py +61 -0
- kodit/domain/services/embedding_service.py +1 -1
- kodit/domain/services/index_query_service.py +66 -0
- kodit/domain/services/index_service.py +282 -0
- kodit/domain/value_objects.py +143 -65
- kodit/infrastructure/cloning/git/working_copy.py +17 -8
- kodit/infrastructure/cloning/metadata.py +37 -67
- kodit/infrastructure/embedding/embedding_factory.py +1 -1
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
- kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
- kodit/infrastructure/git/git_utils.py +1 -63
- kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
- kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/__init__.py +1 -0
- kodit/infrastructure/mappers/index_mapper.py +344 -0
- kodit/infrastructure/slicing/__init__.py +1 -0
- kodit/infrastructure/slicing/language_detection_service.py +18 -0
- kodit/infrastructure/slicing/slicer.py +894 -0
- kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
- kodit/infrastructure/sqlalchemy/entities.py +203 -0
- kodit/infrastructure/sqlalchemy/index_repository.py +579 -0
- kodit/mcp.py +0 -7
- kodit/migrations/env.py +1 -1
- kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +36 -0
- kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
- kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
- kodit/migrations/versions/85155663351e_initial.py +64 -48
- kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
- kodit/utils/__init__.py +1 -0
- kodit/utils/path_utils.py +54 -0
- {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/METADATA +9 -4
- kodit-0.3.4.dist-info/RECORD +89 -0
- kodit/domain/enums.py +0 -9
- kodit/domain/repositories.py +0 -128
- kodit/domain/services/ignore_service.py +0 -45
- kodit/domain/services/indexing_service.py +0 -204
- kodit/domain/services/snippet_extraction_service.py +0 -89
- kodit/domain/services/snippet_service.py +0 -215
- kodit/domain/services/source_service.py +0 -85
- kodit/infrastructure/cloning/folder/__init__.py +0 -1
- kodit/infrastructure/cloning/folder/factory.py +0 -128
- kodit/infrastructure/cloning/folder/working_copy.py +0 -38
- kodit/infrastructure/cloning/git/factory.py +0 -153
- kodit/infrastructure/indexing/index_repository.py +0 -286
- kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
- kodit/infrastructure/snippet_extraction/__init__.py +0 -1
- kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
- kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
- kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
- kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
- kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
- kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
- kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -45
- kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
- kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
- kodit/infrastructure/sqlalchemy/repository.py +0 -133
- kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
- kodit-0.3.2.dist-info/RECORD +0 -103
- {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/WHEEL +0 -0
- {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/entry_points.txt +0 -0
- {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -4,7 +4,7 @@ import numpy as np
|
|
|
4
4
|
from sqlalchemy import select
|
|
5
5
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
6
6
|
|
|
7
|
-
from kodit.
|
|
7
|
+
from kodit.infrastructure.sqlalchemy.entities import Embedding, EmbeddingType
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class SqlAlchemyEmbeddingRepository:
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""SQLAlchemy entities."""
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
from git import Actor
|
|
7
|
+
from sqlalchemy import (
|
|
8
|
+
DateTime,
|
|
9
|
+
ForeignKey,
|
|
10
|
+
Integer,
|
|
11
|
+
String,
|
|
12
|
+
UnicodeText,
|
|
13
|
+
UniqueConstraint,
|
|
14
|
+
)
|
|
15
|
+
from sqlalchemy import Enum as SQLAlchemyEnum
|
|
16
|
+
from sqlalchemy.ext.asyncio import AsyncAttrs
|
|
17
|
+
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
18
|
+
from sqlalchemy.types import JSON
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Base(AsyncAttrs, DeclarativeBase):
|
|
22
|
+
"""Base class for all models."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CommonMixin:
|
|
26
|
+
"""Common mixin for all models."""
|
|
27
|
+
|
|
28
|
+
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
|
29
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
30
|
+
DateTime(timezone=True), default=lambda: datetime.now(UTC)
|
|
31
|
+
)
|
|
32
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
33
|
+
DateTime(timezone=True),
|
|
34
|
+
default=lambda: datetime.now(UTC),
|
|
35
|
+
onupdate=lambda: datetime.now(UTC),
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SourceType(Enum):
|
|
40
|
+
"""The type of source."""
|
|
41
|
+
|
|
42
|
+
UNKNOWN = 0
|
|
43
|
+
FOLDER = 1
|
|
44
|
+
GIT = 2
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class Source(Base, CommonMixin):
|
|
48
|
+
"""Base model for tracking code sources.
|
|
49
|
+
|
|
50
|
+
This model serves as the parent table for different types of sources.
|
|
51
|
+
It provides common fields and relationships for all source types.
|
|
52
|
+
|
|
53
|
+
Attributes:
|
|
54
|
+
id: The unique identifier for the source.
|
|
55
|
+
created_at: Timestamp when the source was created.
|
|
56
|
+
updated_at: Timestamp when the source was last updated.
|
|
57
|
+
cloned_uri: A URI to a copy of the source on the local filesystem.
|
|
58
|
+
uri: The URI of the source.
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
__tablename__ = "sources"
|
|
63
|
+
uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
|
|
64
|
+
cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
65
|
+
type: Mapped[SourceType] = mapped_column(
|
|
66
|
+
SQLAlchemyEnum(SourceType), default=SourceType.UNKNOWN, index=True
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
|
|
70
|
+
"""Initialize a new Source instance for typing purposes."""
|
|
71
|
+
super().__init__()
|
|
72
|
+
self.uri = uri
|
|
73
|
+
self.cloned_path = cloned_path
|
|
74
|
+
self.type = source_type
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class Author(Base, CommonMixin):
|
|
78
|
+
"""Author model."""
|
|
79
|
+
|
|
80
|
+
__tablename__ = "authors"
|
|
81
|
+
|
|
82
|
+
__table_args__ = (UniqueConstraint("name", "email", name="uix_author"),)
|
|
83
|
+
|
|
84
|
+
name: Mapped[str] = mapped_column(String(255), index=True)
|
|
85
|
+
email: Mapped[str] = mapped_column(String(255), index=True)
|
|
86
|
+
|
|
87
|
+
@staticmethod
|
|
88
|
+
def from_actor(actor: Actor) -> "Author":
|
|
89
|
+
"""Create an Author from an Actor."""
|
|
90
|
+
return Author(name=actor.name, email=actor.email)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class AuthorFileMapping(Base, CommonMixin):
|
|
94
|
+
"""Author file mapping model."""
|
|
95
|
+
|
|
96
|
+
__tablename__ = "author_file_mappings"
|
|
97
|
+
|
|
98
|
+
__table_args__ = (
|
|
99
|
+
UniqueConstraint("author_id", "file_id", name="uix_author_file_mapping"),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
author_id: Mapped[int] = mapped_column(ForeignKey("authors.id"), index=True)
|
|
103
|
+
file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class File(Base, CommonMixin):
|
|
107
|
+
"""File model."""
|
|
108
|
+
|
|
109
|
+
__tablename__ = "files"
|
|
110
|
+
|
|
111
|
+
source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"))
|
|
112
|
+
mime_type: Mapped[str] = mapped_column(String(255), default="", index=True)
|
|
113
|
+
uri: Mapped[str] = mapped_column(String(1024), default="", index=True)
|
|
114
|
+
cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
115
|
+
sha256: Mapped[str] = mapped_column(String(64), default="", index=True)
|
|
116
|
+
size_bytes: Mapped[int] = mapped_column(Integer, default=0)
|
|
117
|
+
extension: Mapped[str] = mapped_column(String(255), default="", index=True)
|
|
118
|
+
file_processing_status: Mapped[int] = mapped_column(Integer, default=0)
|
|
119
|
+
|
|
120
|
+
def __init__( # noqa: PLR0913
|
|
121
|
+
self,
|
|
122
|
+
created_at: datetime,
|
|
123
|
+
updated_at: datetime,
|
|
124
|
+
source_id: int,
|
|
125
|
+
mime_type: str,
|
|
126
|
+
uri: str,
|
|
127
|
+
cloned_path: str,
|
|
128
|
+
sha256: str,
|
|
129
|
+
size_bytes: int,
|
|
130
|
+
extension: str,
|
|
131
|
+
file_processing_status: int,
|
|
132
|
+
) -> None:
|
|
133
|
+
"""Initialize a new File instance for typing purposes."""
|
|
134
|
+
super().__init__()
|
|
135
|
+
self.created_at = created_at
|
|
136
|
+
self.updated_at = updated_at
|
|
137
|
+
self.source_id = source_id
|
|
138
|
+
self.mime_type = mime_type
|
|
139
|
+
self.uri = uri
|
|
140
|
+
self.cloned_path = cloned_path
|
|
141
|
+
self.sha256 = sha256
|
|
142
|
+
self.size_bytes = size_bytes
|
|
143
|
+
self.extension = extension
|
|
144
|
+
self.file_processing_status = file_processing_status
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class EmbeddingType(Enum):
|
|
148
|
+
"""Embedding type."""
|
|
149
|
+
|
|
150
|
+
CODE = 1
|
|
151
|
+
TEXT = 2
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class Embedding(Base, CommonMixin):
|
|
155
|
+
"""Embedding model."""
|
|
156
|
+
|
|
157
|
+
__tablename__ = "embeddings"
|
|
158
|
+
|
|
159
|
+
snippet_id: Mapped[int] = mapped_column(ForeignKey("snippets.id"), index=True)
|
|
160
|
+
type: Mapped[EmbeddingType] = mapped_column(
|
|
161
|
+
SQLAlchemyEnum(EmbeddingType), index=True
|
|
162
|
+
)
|
|
163
|
+
embedding: Mapped[list[float]] = mapped_column(JSON)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class Index(Base, CommonMixin):
|
|
167
|
+
"""Index model."""
|
|
168
|
+
|
|
169
|
+
__tablename__ = "indexes"
|
|
170
|
+
|
|
171
|
+
source_id: Mapped[int] = mapped_column(
|
|
172
|
+
ForeignKey("sources.id"), unique=True, index=True
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
def __init__(self, source_id: int) -> None:
|
|
176
|
+
"""Initialize the index."""
|
|
177
|
+
super().__init__()
|
|
178
|
+
self.source_id = source_id
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class Snippet(Base, CommonMixin):
|
|
182
|
+
"""Snippet model."""
|
|
183
|
+
|
|
184
|
+
__tablename__ = "snippets"
|
|
185
|
+
|
|
186
|
+
file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
|
|
187
|
+
index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
|
|
188
|
+
content: Mapped[str] = mapped_column(UnicodeText, default="")
|
|
189
|
+
summary: Mapped[str] = mapped_column(UnicodeText, default="")
|
|
190
|
+
|
|
191
|
+
def __init__(
|
|
192
|
+
self,
|
|
193
|
+
file_id: int,
|
|
194
|
+
index_id: int,
|
|
195
|
+
content: str,
|
|
196
|
+
summary: str = "",
|
|
197
|
+
) -> None:
|
|
198
|
+
"""Initialize the snippet."""
|
|
199
|
+
super().__init__()
|
|
200
|
+
self.file_id = file_id
|
|
201
|
+
self.index_id = index_id
|
|
202
|
+
self.content = content
|
|
203
|
+
self.summary = summary
|