kodit 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (70) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +56 -29
  3. kodit/application/services/code_indexing_application_service.py +152 -118
  4. kodit/cli.py +14 -41
  5. kodit/domain/entities.py +268 -197
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +282 -0
  10. kodit/domain/value_objects.py +143 -65
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/slicing/__init__.py +1 -0
  24. kodit/infrastructure/slicing/language_detection_service.py +18 -0
  25. kodit/infrastructure/slicing/slicer.py +894 -0
  26. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  27. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  28. kodit/infrastructure/sqlalchemy/index_repository.py +579 -0
  29. kodit/mcp.py +0 -7
  30. kodit/migrations/env.py +1 -1
  31. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +36 -0
  32. kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
  33. kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
  34. kodit/migrations/versions/85155663351e_initial.py +64 -48
  35. kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
  36. kodit/utils/__init__.py +1 -0
  37. kodit/utils/path_utils.py +54 -0
  38. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/METADATA +9 -4
  39. kodit-0.3.4.dist-info/RECORD +89 -0
  40. kodit/domain/enums.py +0 -9
  41. kodit/domain/repositories.py +0 -128
  42. kodit/domain/services/ignore_service.py +0 -45
  43. kodit/domain/services/indexing_service.py +0 -204
  44. kodit/domain/services/snippet_extraction_service.py +0 -89
  45. kodit/domain/services/snippet_service.py +0 -215
  46. kodit/domain/services/source_service.py +0 -85
  47. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  48. kodit/infrastructure/cloning/folder/factory.py +0 -128
  49. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  50. kodit/infrastructure/cloning/git/factory.py +0 -153
  51. kodit/infrastructure/indexing/index_repository.py +0 -286
  52. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  53. kodit/infrastructure/snippet_extraction/__init__.py +0 -1
  54. kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
  55. kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
  56. kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
  57. kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
  58. kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
  59. kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
  60. kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
  61. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
  62. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -45
  63. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
  64. kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
  65. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  66. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
  67. kodit-0.3.2.dist-info/RECORD +0 -103
  68. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/WHEEL +0 -0
  69. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/entry_points.txt +0 -0
  70. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/licenses/LICENSE +0 -0
@@ -4,7 +4,7 @@ import numpy as np
4
4
  from sqlalchemy import select
5
5
  from sqlalchemy.ext.asyncio import AsyncSession
6
6
 
7
- from kodit.domain.entities import Embedding, EmbeddingType
7
+ from kodit.infrastructure.sqlalchemy.entities import Embedding, EmbeddingType
8
8
 
9
9
 
10
10
  class SqlAlchemyEmbeddingRepository:
@@ -0,0 +1,203 @@
1
+ """SQLAlchemy entities."""
2
+
3
+ from datetime import UTC, datetime
4
+ from enum import Enum
5
+
6
+ from git import Actor
7
+ from sqlalchemy import (
8
+ DateTime,
9
+ ForeignKey,
10
+ Integer,
11
+ String,
12
+ UnicodeText,
13
+ UniqueConstraint,
14
+ )
15
+ from sqlalchemy import Enum as SQLAlchemyEnum
16
+ from sqlalchemy.ext.asyncio import AsyncAttrs
17
+ from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
18
+ from sqlalchemy.types import JSON
19
+
20
+
21
+ class Base(AsyncAttrs, DeclarativeBase):
22
+ """Base class for all models."""
23
+
24
+
25
+ class CommonMixin:
26
+ """Common mixin for all models."""
27
+
28
+ id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
29
+ created_at: Mapped[datetime] = mapped_column(
30
+ DateTime(timezone=True), default=lambda: datetime.now(UTC)
31
+ )
32
+ updated_at: Mapped[datetime] = mapped_column(
33
+ DateTime(timezone=True),
34
+ default=lambda: datetime.now(UTC),
35
+ onupdate=lambda: datetime.now(UTC),
36
+ )
37
+
38
+
39
+ class SourceType(Enum):
40
+ """The type of source."""
41
+
42
+ UNKNOWN = 0
43
+ FOLDER = 1
44
+ GIT = 2
45
+
46
+
47
+ class Source(Base, CommonMixin):
48
+ """Base model for tracking code sources.
49
+
50
+ This model serves as the parent table for different types of sources.
51
+ It provides common fields and relationships for all source types.
52
+
53
+ Attributes:
54
+ id: The unique identifier for the source.
55
+ created_at: Timestamp when the source was created.
56
+ updated_at: Timestamp when the source was last updated.
57
+ cloned_uri: A URI to a copy of the source on the local filesystem.
58
+ uri: The URI of the source.
59
+
60
+ """
61
+
62
+ __tablename__ = "sources"
63
+ uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
64
+ cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
65
+ type: Mapped[SourceType] = mapped_column(
66
+ SQLAlchemyEnum(SourceType), default=SourceType.UNKNOWN, index=True
67
+ )
68
+
69
+ def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
70
+ """Initialize a new Source instance for typing purposes."""
71
+ super().__init__()
72
+ self.uri = uri
73
+ self.cloned_path = cloned_path
74
+ self.type = source_type
75
+
76
+
77
+ class Author(Base, CommonMixin):
78
+ """Author model."""
79
+
80
+ __tablename__ = "authors"
81
+
82
+ __table_args__ = (UniqueConstraint("name", "email", name="uix_author"),)
83
+
84
+ name: Mapped[str] = mapped_column(String(255), index=True)
85
+ email: Mapped[str] = mapped_column(String(255), index=True)
86
+
87
+ @staticmethod
88
+ def from_actor(actor: Actor) -> "Author":
89
+ """Create an Author from an Actor."""
90
+ return Author(name=actor.name, email=actor.email)
91
+
92
+
93
+ class AuthorFileMapping(Base, CommonMixin):
94
+ """Author file mapping model."""
95
+
96
+ __tablename__ = "author_file_mappings"
97
+
98
+ __table_args__ = (
99
+ UniqueConstraint("author_id", "file_id", name="uix_author_file_mapping"),
100
+ )
101
+
102
+ author_id: Mapped[int] = mapped_column(ForeignKey("authors.id"), index=True)
103
+ file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
104
+
105
+
106
+ class File(Base, CommonMixin):
107
+ """File model."""
108
+
109
+ __tablename__ = "files"
110
+
111
+ source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"))
112
+ mime_type: Mapped[str] = mapped_column(String(255), default="", index=True)
113
+ uri: Mapped[str] = mapped_column(String(1024), default="", index=True)
114
+ cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
115
+ sha256: Mapped[str] = mapped_column(String(64), default="", index=True)
116
+ size_bytes: Mapped[int] = mapped_column(Integer, default=0)
117
+ extension: Mapped[str] = mapped_column(String(255), default="", index=True)
118
+ file_processing_status: Mapped[int] = mapped_column(Integer, default=0)
119
+
120
+ def __init__( # noqa: PLR0913
121
+ self,
122
+ created_at: datetime,
123
+ updated_at: datetime,
124
+ source_id: int,
125
+ mime_type: str,
126
+ uri: str,
127
+ cloned_path: str,
128
+ sha256: str,
129
+ size_bytes: int,
130
+ extension: str,
131
+ file_processing_status: int,
132
+ ) -> None:
133
+ """Initialize a new File instance for typing purposes."""
134
+ super().__init__()
135
+ self.created_at = created_at
136
+ self.updated_at = updated_at
137
+ self.source_id = source_id
138
+ self.mime_type = mime_type
139
+ self.uri = uri
140
+ self.cloned_path = cloned_path
141
+ self.sha256 = sha256
142
+ self.size_bytes = size_bytes
143
+ self.extension = extension
144
+ self.file_processing_status = file_processing_status
145
+
146
+
147
+ class EmbeddingType(Enum):
148
+ """Embedding type."""
149
+
150
+ CODE = 1
151
+ TEXT = 2
152
+
153
+
154
+ class Embedding(Base, CommonMixin):
155
+ """Embedding model."""
156
+
157
+ __tablename__ = "embeddings"
158
+
159
+ snippet_id: Mapped[int] = mapped_column(ForeignKey("snippets.id"), index=True)
160
+ type: Mapped[EmbeddingType] = mapped_column(
161
+ SQLAlchemyEnum(EmbeddingType), index=True
162
+ )
163
+ embedding: Mapped[list[float]] = mapped_column(JSON)
164
+
165
+
166
+ class Index(Base, CommonMixin):
167
+ """Index model."""
168
+
169
+ __tablename__ = "indexes"
170
+
171
+ source_id: Mapped[int] = mapped_column(
172
+ ForeignKey("sources.id"), unique=True, index=True
173
+ )
174
+
175
+ def __init__(self, source_id: int) -> None:
176
+ """Initialize the index."""
177
+ super().__init__()
178
+ self.source_id = source_id
179
+
180
+
181
+ class Snippet(Base, CommonMixin):
182
+ """Snippet model."""
183
+
184
+ __tablename__ = "snippets"
185
+
186
+ file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
187
+ index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
188
+ content: Mapped[str] = mapped_column(UnicodeText, default="")
189
+ summary: Mapped[str] = mapped_column(UnicodeText, default="")
190
+
191
+ def __init__(
192
+ self,
193
+ file_id: int,
194
+ index_id: int,
195
+ content: str,
196
+ summary: str = "",
197
+ ) -> None:
198
+ """Initialize the snippet."""
199
+ super().__init__()
200
+ self.file_id = file_id
201
+ self.index_id = index_id
202
+ self.content = content
203
+ self.summary = summary