kodit 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/bm25/bm25.py +1 -1
- kodit/cli.py +22 -52
- kodit/config.py +43 -3
- kodit/embedding/embedding.py +161 -10
- kodit/indexing/{models.py → indexing_models.py} +2 -2
- kodit/indexing/{repository.py → indexing_repository.py} +5 -5
- kodit/indexing/{service.py → indexing_service.py} +17 -12
- kodit/log.py +1 -0
- kodit/mcp.py +27 -34
- kodit/migrations/env.py +3 -3
- kodit/search/__init__.py +1 -0
- kodit/search/search_repository.py +178 -0
- kodit/{retreival/service.py → search/search_service.py} +40 -17
- kodit/snippets/snippets.py +3 -1
- kodit/{sources/repository.py → source/source_repository.py} +2 -7
- kodit/{sources/service.py → source/source_service.py} +2 -2
- {kodit-0.1.10.dist-info → kodit-0.1.12.dist-info}/METADATA +3 -1
- kodit-0.1.12.dist-info/RECORD +44 -0
- kodit/retreival/__init__.py +0 -1
- kodit/retreival/repository.py +0 -183
- kodit-0.1.10.dist-info/RECORD +0 -44
- /kodit/embedding/{models.py → embedding_models.py} +0 -0
- /kodit/{sources → source}/__init__.py +0 -0
- /kodit/{sources/models.py → source/source_models.py} +0 -0
- {kodit-0.1.10.dist-info → kodit-0.1.12.dist-info}/WHEEL +0 -0
- {kodit-0.1.10.dist-info → kodit-0.1.12.dist-info}/entry_points.txt +0 -0
- {kodit-0.1.10.dist-info → kodit-0.1.12.dist-info}/licenses/LICENSE +0 -0
kodit/retreival/repository.py
DELETED
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
"""Repository for retrieving code snippets and search results.
|
|
2
|
-
|
|
3
|
-
This module provides the RetrievalRepository class which handles all database operations
|
|
4
|
-
related to searching and retrieving code snippets, including string-based searches
|
|
5
|
-
and their associated file information.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import math
|
|
9
|
-
from typing import Any, TypeVar
|
|
10
|
-
|
|
11
|
-
import pydantic
|
|
12
|
-
from sqlalchemy import (
|
|
13
|
-
ColumnElement,
|
|
14
|
-
Float,
|
|
15
|
-
cast,
|
|
16
|
-
desc,
|
|
17
|
-
func,
|
|
18
|
-
literal,
|
|
19
|
-
select,
|
|
20
|
-
)
|
|
21
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
22
|
-
from sqlalchemy.orm import Mapped
|
|
23
|
-
|
|
24
|
-
from kodit.embedding.models import Embedding, EmbeddingType
|
|
25
|
-
from kodit.indexing.models import Snippet
|
|
26
|
-
from kodit.sources.models import File
|
|
27
|
-
|
|
28
|
-
T = TypeVar("T")
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class RetrievalResult(pydantic.BaseModel):
|
|
32
|
-
"""Data transfer object for search results.
|
|
33
|
-
|
|
34
|
-
This model represents a single search result, containing both the file path
|
|
35
|
-
and the matching snippet content.
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
id: int
|
|
39
|
-
uri: str
|
|
40
|
-
content: str
|
|
41
|
-
score: float
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class RetrievalRepository:
|
|
45
|
-
"""Repository for retrieving code snippets and search results.
|
|
46
|
-
|
|
47
|
-
This class provides methods for searching and retrieving code snippets from
|
|
48
|
-
the database, including string-based searches and their associated file information.
|
|
49
|
-
"""
|
|
50
|
-
|
|
51
|
-
def __init__(self, session: AsyncSession) -> None:
|
|
52
|
-
"""Initialize the retrieval repository.
|
|
53
|
-
|
|
54
|
-
Args:
|
|
55
|
-
session: The SQLAlchemy async session to use for database operations.
|
|
56
|
-
|
|
57
|
-
"""
|
|
58
|
-
self.session = session
|
|
59
|
-
|
|
60
|
-
async def string_search(self, query: str) -> list[RetrievalResult]:
|
|
61
|
-
"""Search for snippets containing the given query string.
|
|
62
|
-
|
|
63
|
-
This method performs a case-insensitive search for the query string within
|
|
64
|
-
snippet contents, returning up to 10 most recent matches.
|
|
65
|
-
|
|
66
|
-
Args:
|
|
67
|
-
query: The string to search for within snippet contents.
|
|
68
|
-
|
|
69
|
-
Returns:
|
|
70
|
-
A list of RetrievalResult objects containing the matching snippets
|
|
71
|
-
and their associated file paths.
|
|
72
|
-
|
|
73
|
-
"""
|
|
74
|
-
search_query = (
|
|
75
|
-
select(Snippet, File)
|
|
76
|
-
.join(File, Snippet.file_id == File.id)
|
|
77
|
-
.where(Snippet.content.ilike(f"%{query}%"))
|
|
78
|
-
.limit(10)
|
|
79
|
-
)
|
|
80
|
-
rows = await self.session.execute(search_query)
|
|
81
|
-
results = list(rows.all())
|
|
82
|
-
|
|
83
|
-
return [
|
|
84
|
-
RetrievalResult(
|
|
85
|
-
id=snippet.id,
|
|
86
|
-
uri=file.uri,
|
|
87
|
-
content=snippet.content,
|
|
88
|
-
score=1.0,
|
|
89
|
-
)
|
|
90
|
-
for snippet, file in results
|
|
91
|
-
]
|
|
92
|
-
|
|
93
|
-
async def list_snippet_ids(self) -> list[int]:
|
|
94
|
-
"""List all snippet IDs.
|
|
95
|
-
|
|
96
|
-
Returns:
|
|
97
|
-
A list of all snippets.
|
|
98
|
-
|
|
99
|
-
"""
|
|
100
|
-
query = select(Snippet.id)
|
|
101
|
-
rows = await self.session.execute(query)
|
|
102
|
-
return list(rows.scalars().all())
|
|
103
|
-
|
|
104
|
-
async def list_snippets_by_ids(self, ids: list[int]) -> list[RetrievalResult]:
|
|
105
|
-
"""List snippets by IDs.
|
|
106
|
-
|
|
107
|
-
Returns:
|
|
108
|
-
A list of snippets in the same order as the input IDs.
|
|
109
|
-
|
|
110
|
-
"""
|
|
111
|
-
query = (
|
|
112
|
-
select(Snippet, File)
|
|
113
|
-
.where(Snippet.id.in_(ids))
|
|
114
|
-
.join(File, Snippet.file_id == File.id)
|
|
115
|
-
)
|
|
116
|
-
rows = await self.session.execute(query)
|
|
117
|
-
|
|
118
|
-
# Create a dictionary for O(1) lookup of results by ID
|
|
119
|
-
id_to_result = {
|
|
120
|
-
snippet.id: RetrievalResult(
|
|
121
|
-
id=snippet.id,
|
|
122
|
-
uri=file.uri,
|
|
123
|
-
content=snippet.content,
|
|
124
|
-
score=1.0,
|
|
125
|
-
)
|
|
126
|
-
for snippet, file in rows.all()
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
# Return results in the same order as input IDs
|
|
130
|
-
return [id_to_result[i] for i in ids]
|
|
131
|
-
|
|
132
|
-
async def list_semantic_results(
|
|
133
|
-
self, embedding_type: EmbeddingType, embedding: list[float], top_k: int = 10
|
|
134
|
-
) -> list[tuple[int, float]]:
|
|
135
|
-
"""List semantic results."""
|
|
136
|
-
cosine_similarity = cosine_similarity_json(Embedding.embedding, embedding)
|
|
137
|
-
|
|
138
|
-
query = (
|
|
139
|
-
select(Embedding, cosine_similarity)
|
|
140
|
-
.where(Embedding.type == embedding_type)
|
|
141
|
-
.order_by(desc(cosine_similarity))
|
|
142
|
-
.limit(top_k)
|
|
143
|
-
)
|
|
144
|
-
rows = await self.session.execute(query)
|
|
145
|
-
return [(embedding.snippet_id, distance) for embedding, distance in rows.all()]
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def cosine_similarity_json(
|
|
149
|
-
col: Mapped[Any], query_vec: list[float]
|
|
150
|
-
) -> ColumnElement[Any]:
|
|
151
|
-
"""Calculate the cosine similarity using pure sqlalchemy.
|
|
152
|
-
|
|
153
|
-
Works for a *fixed-length* vector stored as a JSON array in SQLite.
|
|
154
|
-
The calculation is done entirely in SQL using SQLite's JSON functions.
|
|
155
|
-
|
|
156
|
-
Args:
|
|
157
|
-
col: The column containing the JSON array of floats
|
|
158
|
-
query_vec: The query vector to compare against
|
|
159
|
-
|
|
160
|
-
Returns:
|
|
161
|
-
A SQLAlchemy expression that computes the cosine similarity
|
|
162
|
-
|
|
163
|
-
"""
|
|
164
|
-
# Pre-compute query norm
|
|
165
|
-
q_norm = math.sqrt(sum(x * x for x in query_vec))
|
|
166
|
-
|
|
167
|
-
# Calculate dot product using JSON array functions
|
|
168
|
-
dot = sum(
|
|
169
|
-
cast(func.json_extract(col, f"$[{i}]"), Float) * literal(float(q))
|
|
170
|
-
for i, q in enumerate(query_vec)
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
# Calculate row norm on the fly
|
|
174
|
-
row_norm = func.sqrt(
|
|
175
|
-
sum(
|
|
176
|
-
cast(func.json_extract(col, f"$[{i}]"), Float)
|
|
177
|
-
* cast(func.json_extract(col, f"$[{i}]"), Float)
|
|
178
|
-
for i in range(len(query_vec))
|
|
179
|
-
)
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
# Calculate cosine similarity
|
|
183
|
-
return (dot / (row_norm * literal(q_norm))).label("cosine_similarity")
|
kodit-0.1.10.dist-info/RECORD
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
|
-
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
-
kodit/_version.py,sha256=HsFzfK6RsoG-sFr1kLh3t-q2dq2wNylFvX6VW_rx5vM,513
|
|
4
|
-
kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
|
|
5
|
-
kodit/cli.py,sha256=qEQy_Sd64cEV5KzYsKlGLyMxFQ4fFi-as4QO8CRrKYo,8978
|
|
6
|
-
kodit/config.py,sha256=hQshTMW_8jpk94zP-1JaxowgmW_LrT534ipHFaRUGMw,3006
|
|
7
|
-
kodit/database.py,sha256=kekSdyEATdb47jxzQemkSOXMNOwnUwmVVTpn9hYaDK8,2356
|
|
8
|
-
kodit/log.py,sha256=PhyzQktEyyHaNr78W0wmL-RSRuq311DQ-d0l-EKTGmQ,5417
|
|
9
|
-
kodit/mcp.py,sha256=qp16vRb0TY46-xQy179iWgYebr6Ju_Z91ZSzZnWPHuk,4771
|
|
10
|
-
kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
|
|
11
|
-
kodit/bm25/__init__.py,sha256=j8zyriNWhbwE5Lbybzg1hQAhANlU9mKHWw4beeUR6og,19
|
|
12
|
-
kodit/bm25/bm25.py,sha256=NtlcLrgqJja11qDGKz_U6tuYWaS9sfbyS-TcA__rBKs,2284
|
|
13
|
-
kodit/embedding/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
|
|
14
|
-
kodit/embedding/embedding.py,sha256=X2Fa-eXhQwp__QFj9yxIhvlCAiYVQSaZ2y18ZtG5_1Y,1810
|
|
15
|
-
kodit/embedding/models.py,sha256=rN90vSs86dYiqoawcp8E9jtwY31JoJXYfaDlsJK7uqc,656
|
|
16
|
-
kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
|
|
17
|
-
kodit/indexing/models.py,sha256=sZIhGwvL4Dw0QTWFxrjfWctSLkAoDT6fv5DlGz8-Fr8,1258
|
|
18
|
-
kodit/indexing/repository.py,sha256=eIaIbqNs9Z3XTVymZ5Zl5uPWveqiEXNo0JTa-y-Tl24,5430
|
|
19
|
-
kodit/indexing/service.py,sha256=hhQ_6vI7J7LnNgOLbsO4B07TOJvEePqqFviiqr3TL_M,6579
|
|
20
|
-
kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
|
|
21
|
-
kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
|
|
22
|
-
kodit/migrations/env.py,sha256=bzB6vod_tO-X2F_G671FwYSAn0pyhNw8M1kG4MgidO8,2444
|
|
23
|
-
kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
|
|
24
|
-
kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
|
|
25
|
-
kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
|
|
26
|
-
kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
|
|
27
|
-
kodit/retreival/__init__.py,sha256=33PhJU-3gtsqYq6A1UkaLNKbev_Zee9Lq6dYC59-CsA,69
|
|
28
|
-
kodit/retreival/repository.py,sha256=ZXHUYJrsmHCII9PUgYzLfN0EhiyWw7eJ3_rKCvMrSpY,5465
|
|
29
|
-
kodit/retreival/service.py,sha256=gGp74jnqhyCDF5vKOrN2dJKDnhlfR4HZaxADSrjTb4s,3778
|
|
30
|
-
kodit/snippets/__init__.py,sha256=-2coNoCRjTixU9KcP6alpmt7zqf37tCRWH3D7FPJ8dg,48
|
|
31
|
-
kodit/snippets/method_snippets.py,sha256=EVHhSNWahAC5nSXv9fWVFJY2yq25goHdCSCuENC07F8,4145
|
|
32
|
-
kodit/snippets/snippets.py,sha256=QumvhltWoxXw41SyKb-RbSvAr3m6V3lUy9n0AI8jcto,1409
|
|
33
|
-
kodit/snippets/languages/__init__.py,sha256=Bj5KKZSls2MQ8ZY1S_nHg447MgGZW-2WZM-oq6vjwwA,1187
|
|
34
|
-
kodit/snippets/languages/csharp.scm,sha256=gbBN4RiV1FBuTJF6orSnDFi8H9JwTw-d4piLJYsWUsc,222
|
|
35
|
-
kodit/snippets/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyrFGrVeo,429
|
|
36
|
-
kodit/sources/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
|
|
37
|
-
kodit/sources/models.py,sha256=xb42CaNDO1CUB8SIW-xXMrB6Ji8cFw-yeJ550xBEg9Q,2398
|
|
38
|
-
kodit/sources/repository.py,sha256=mGJrHWH6Uo8YABdoojHFbzaf_jW-2ywJpAHIa1gnc3U,3401
|
|
39
|
-
kodit/sources/service.py,sha256=aV_qiqkU2kMBNPvye5_v4NnZiK-lJ64rQdmFtBtsQaY,9243
|
|
40
|
-
kodit-0.1.10.dist-info/METADATA,sha256=wi-_Yl0ZPw898Mc1QjtvNQRl5-4xkdfBUlf6isC7Wr0,2288
|
|
41
|
-
kodit-0.1.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
42
|
-
kodit-0.1.10.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
43
|
-
kodit-0.1.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
44
|
-
kodit-0.1.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|