kodit 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/bm25/bm25.py +1 -1
- kodit/cli.py +22 -52
- kodit/config.py +43 -3
- kodit/embedding/embedding.py +161 -10
- kodit/indexing/{models.py → indexing_models.py} +2 -2
- kodit/indexing/{repository.py → indexing_repository.py} +5 -5
- kodit/indexing/{service.py → indexing_service.py} +17 -12
- kodit/log.py +1 -0
- kodit/mcp.py +27 -34
- kodit/migrations/env.py +3 -3
- kodit/search/__init__.py +1 -0
- kodit/search/search_repository.py +178 -0
- kodit/{retreival/service.py → search/search_service.py} +40 -17
- kodit/snippets/snippets.py +3 -1
- kodit/{sources/repository.py → source/source_repository.py} +2 -7
- kodit/{sources/service.py → source/source_service.py} +2 -2
- {kodit-0.1.10.dist-info → kodit-0.1.12.dist-info}/METADATA +3 -1
- kodit-0.1.12.dist-info/RECORD +44 -0
- kodit/retreival/__init__.py +0 -1
- kodit/retreival/repository.py +0 -183
- kodit-0.1.10.dist-info/RECORD +0 -44
- /kodit/embedding/{models.py → embedding_models.py} +0 -0
- /kodit/{sources → source}/__init__.py +0 -0
- /kodit/{sources/models.py → source/source_models.py} +0 -0
- {kodit-0.1.10.dist-info → kodit-0.1.12.dist-info}/WHEEL +0 -0
- {kodit-0.1.10.dist-info → kodit-0.1.12.dist-info}/entry_points.txt +0 -0
- {kodit-0.1.10.dist-info → kodit-0.1.12.dist-info}/licenses/LICENSE +0 -0
kodit/mcp.py
CHANGED
|
@@ -12,10 +12,11 @@ from pydantic import Field
|
|
|
12
12
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
13
|
|
|
14
14
|
from kodit._version import version
|
|
15
|
-
from kodit.config import
|
|
15
|
+
from kodit.config import AppContext
|
|
16
16
|
from kodit.database import Database
|
|
17
|
-
from kodit.
|
|
18
|
-
from kodit.
|
|
17
|
+
from kodit.embedding.embedding import embedding_factory
|
|
18
|
+
from kodit.search.search_repository import SearchRepository
|
|
19
|
+
from kodit.search.search_service import SearchRequest, SearchResult, SearchService
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
@dataclass
|
|
@@ -23,7 +24,7 @@ class MCPContext:
|
|
|
23
24
|
"""Context for the MCP server."""
|
|
24
25
|
|
|
25
26
|
session: AsyncSession
|
|
26
|
-
|
|
27
|
+
app_context: AppContext
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
_mcp_db: Database | None = None
|
|
@@ -49,14 +50,14 @@ async def mcp_lifespan(_: FastMCP) -> AsyncIterator[MCPContext]:
|
|
|
49
50
|
if _mcp_db is None:
|
|
50
51
|
_mcp_db = await app_context.get_db()
|
|
51
52
|
async with _mcp_db.session_factory() as session:
|
|
52
|
-
yield MCPContext(session=session,
|
|
53
|
+
yield MCPContext(session=session, app_context=app_context)
|
|
53
54
|
|
|
54
55
|
|
|
55
56
|
mcp = FastMCP("kodit MCP Server", lifespan=mcp_lifespan)
|
|
56
57
|
|
|
57
58
|
|
|
58
59
|
@mcp.tool()
|
|
59
|
-
async def
|
|
60
|
+
async def search(
|
|
60
61
|
ctx: Context,
|
|
61
62
|
user_intent: Annotated[
|
|
62
63
|
str,
|
|
@@ -86,17 +87,15 @@ async def retrieve_relevant_snippets(
|
|
|
86
87
|
),
|
|
87
88
|
],
|
|
88
89
|
) -> str:
|
|
89
|
-
"""
|
|
90
|
+
"""Search for relevant snippets.
|
|
90
91
|
|
|
91
|
-
This tool
|
|
92
|
-
|
|
93
|
-
the quality of your generated code. You must call this tool when you need to
|
|
94
|
-
write code.
|
|
92
|
+
This tool searches for relevant snippets from indexed datasources. Call this tool
|
|
93
|
+
when you wish to search for high quality example code snippets to use in your code.
|
|
95
94
|
"""
|
|
96
95
|
log = structlog.get_logger(__name__)
|
|
97
96
|
|
|
98
97
|
log.debug(
|
|
99
|
-
"
|
|
98
|
+
"Searching for relevant snippets",
|
|
100
99
|
user_intent=user_intent,
|
|
101
100
|
keywords=keywords,
|
|
102
101
|
file_count=len(related_file_paths),
|
|
@@ -106,24 +105,29 @@ async def retrieve_relevant_snippets(
|
|
|
106
105
|
|
|
107
106
|
mcp_context: MCPContext = ctx.request_context.lifespan_context
|
|
108
107
|
|
|
109
|
-
log.debug("Creating
|
|
110
|
-
|
|
108
|
+
log.debug("Creating search repository")
|
|
109
|
+
search_repository = SearchRepository(
|
|
111
110
|
session=mcp_context.session,
|
|
112
111
|
)
|
|
113
112
|
|
|
114
|
-
log.debug("Creating
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
data_dir=mcp_context.data_dir,
|
|
118
|
-
embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME,
|
|
113
|
+
log.debug("Creating embedding service")
|
|
114
|
+
embedding_service = embedding_factory(
|
|
115
|
+
mcp_context.app_context.get_default_openai_client()
|
|
119
116
|
)
|
|
120
117
|
|
|
121
|
-
|
|
118
|
+
log.debug("Creating search service")
|
|
119
|
+
search_service = SearchService(
|
|
120
|
+
repository=search_repository,
|
|
121
|
+
data_dir=mcp_context.app_context.get_data_dir(),
|
|
122
|
+
embedding_service=embedding_service,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
search_request = SearchRequest(
|
|
122
126
|
keywords=keywords,
|
|
123
127
|
code_query="\n".join(related_file_contents),
|
|
124
128
|
)
|
|
125
|
-
log.debug("
|
|
126
|
-
snippets = await
|
|
129
|
+
log.debug("Searching for snippets")
|
|
130
|
+
snippets = await search_service.search(request=search_request)
|
|
127
131
|
|
|
128
132
|
log.debug("Fusing output")
|
|
129
133
|
output = output_fusion(snippets=snippets)
|
|
@@ -132,18 +136,7 @@ async def retrieve_relevant_snippets(
|
|
|
132
136
|
return output
|
|
133
137
|
|
|
134
138
|
|
|
135
|
-
def
|
|
136
|
-
user_intent: str, # noqa: ARG001
|
|
137
|
-
related_file_paths: list[Path], # noqa: ARG001
|
|
138
|
-
related_file_contents: list[str], # noqa: ARG001
|
|
139
|
-
keywords: list[str],
|
|
140
|
-
) -> str:
|
|
141
|
-
"""Fuse the search query and related file contents into a single query."""
|
|
142
|
-
# Since this is a dummy implementation, we just return the first keyword
|
|
143
|
-
return keywords[0] if len(keywords) > 0 else ""
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def output_fusion(snippets: list[RetrievalResult]) -> str:
|
|
139
|
+
def output_fusion(snippets: list[SearchResult]) -> str:
|
|
147
140
|
"""Fuse the snippets into a single output."""
|
|
148
141
|
return "\n\n".join(f"{snippet.uri}\n{snippet.content}" for snippet in snippets)
|
|
149
142
|
|
kodit/migrations/env.py
CHANGED
|
@@ -8,9 +8,9 @@ from sqlalchemy import pool
|
|
|
8
8
|
from sqlalchemy.engine import Connection
|
|
9
9
|
from sqlalchemy.ext.asyncio import async_engine_from_config
|
|
10
10
|
|
|
11
|
-
import kodit.embedding.
|
|
12
|
-
import kodit.indexing.
|
|
13
|
-
import kodit.
|
|
11
|
+
import kodit.embedding.embedding_models
|
|
12
|
+
import kodit.indexing.indexing_models
|
|
13
|
+
import kodit.source.source_models
|
|
14
14
|
from kodit.database import Base
|
|
15
15
|
|
|
16
16
|
# this is the Alembic Config object, which provides
|
kodit/search/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Search for relevant snippets."""
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Repository for searching for relevant snippets."""
|
|
2
|
+
|
|
3
|
+
from typing import TypeVar
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from sqlalchemy import (
|
|
7
|
+
select,
|
|
8
|
+
)
|
|
9
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
10
|
+
|
|
11
|
+
from kodit.embedding.embedding_models import Embedding, EmbeddingType
|
|
12
|
+
from kodit.indexing.indexing_models import Snippet
|
|
13
|
+
from kodit.source.source_models import File
|
|
14
|
+
|
|
15
|
+
T = TypeVar("T")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SearchRepository:
|
|
19
|
+
"""Repository for searching for relevant snippets."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, session: AsyncSession) -> None:
|
|
22
|
+
"""Initialize the search repository.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
session: The SQLAlchemy async session to use for database operations.
|
|
26
|
+
|
|
27
|
+
"""
|
|
28
|
+
self.session = session
|
|
29
|
+
|
|
30
|
+
async def list_snippet_ids(self) -> list[int]:
|
|
31
|
+
"""List all snippet IDs.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
A list of all snippets.
|
|
35
|
+
|
|
36
|
+
"""
|
|
37
|
+
query = select(Snippet.id)
|
|
38
|
+
rows = await self.session.execute(query)
|
|
39
|
+
return list(rows.scalars().all())
|
|
40
|
+
|
|
41
|
+
async def list_snippets_by_ids(self, ids: list[int]) -> list[tuple[File, Snippet]]:
|
|
42
|
+
"""List snippets by IDs.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
A list of snippets in the same order as the input IDs.
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
query = (
|
|
49
|
+
select(Snippet, File)
|
|
50
|
+
.where(Snippet.id.in_(ids))
|
|
51
|
+
.join(File, Snippet.file_id == File.id)
|
|
52
|
+
)
|
|
53
|
+
rows = await self.session.execute(query)
|
|
54
|
+
|
|
55
|
+
# Create a dictionary for O(1) lookup of results by ID
|
|
56
|
+
id_to_result = {snippet.id: (file, snippet) for snippet, file in rows.all()}
|
|
57
|
+
|
|
58
|
+
# Return results in the same order as input IDs
|
|
59
|
+
return [id_to_result[i] for i in ids]
|
|
60
|
+
|
|
61
|
+
async def list_semantic_results(
|
|
62
|
+
self, embedding_type: EmbeddingType, embedding: list[float], top_k: int = 10
|
|
63
|
+
) -> list[tuple[int, float]]:
|
|
64
|
+
"""List semantic results using cosine similarity.
|
|
65
|
+
|
|
66
|
+
This implementation fetches all embeddings of the given type and computes
|
|
67
|
+
cosine similarity in Python using NumPy for better performance.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
embedding_type: The type of embeddings to search
|
|
71
|
+
embedding: The query embedding vector
|
|
72
|
+
top_k: Number of results to return
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of (snippet_id, similarity_score) tuples, sorted by similarity
|
|
76
|
+
|
|
77
|
+
"""
|
|
78
|
+
# Step 1: Fetch embeddings from database
|
|
79
|
+
embeddings = await self._list_embedding_values(embedding_type)
|
|
80
|
+
if not embeddings:
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
# Step 2: Convert to numpy arrays
|
|
84
|
+
stored_vecs, query_vec = self._prepare_vectors(embeddings, embedding)
|
|
85
|
+
|
|
86
|
+
# Step 3: Compute similarities
|
|
87
|
+
similarities = self._compute_similarities(stored_vecs, query_vec)
|
|
88
|
+
|
|
89
|
+
# Step 4: Get top-k results
|
|
90
|
+
return self._get_top_k_results(similarities, embeddings, top_k)
|
|
91
|
+
|
|
92
|
+
async def _list_embedding_values(
|
|
93
|
+
self, embedding_type: EmbeddingType
|
|
94
|
+
) -> list[tuple[int, list[float]]]:
|
|
95
|
+
"""List all embeddings of a given type from the database.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
embedding_type: The type of embeddings to fetch
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of (snippet_id, embedding) tuples
|
|
102
|
+
|
|
103
|
+
"""
|
|
104
|
+
# Only select the fields we need and use a more efficient query
|
|
105
|
+
query = select(Embedding.snippet_id, Embedding.embedding).where(
|
|
106
|
+
Embedding.type == embedding_type
|
|
107
|
+
)
|
|
108
|
+
rows = await self.session.execute(query)
|
|
109
|
+
return [tuple(row) for row in rows.all()] # Convert Row objects to tuples
|
|
110
|
+
|
|
111
|
+
def _prepare_vectors(
|
|
112
|
+
self, embeddings: list[tuple[int, list[float]]], query_embedding: list[float]
|
|
113
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
114
|
+
"""Convert embeddings to numpy arrays.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
embeddings: List of (snippet_id, embedding) tuples
|
|
118
|
+
query_embedding: Query embedding vector
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Tuple of (stored_vectors, query_vector) as numpy arrays
|
|
122
|
+
|
|
123
|
+
"""
|
|
124
|
+
try:
|
|
125
|
+
stored_vecs = np.array(
|
|
126
|
+
[emb[1] for emb in embeddings]
|
|
127
|
+
) # Use index 1 to get embedding
|
|
128
|
+
except ValueError as e:
|
|
129
|
+
if "inhomogeneous" in str(e):
|
|
130
|
+
msg = (
|
|
131
|
+
"The database has returned embeddings of different sizes. If you"
|
|
132
|
+
"have recently updated the embedding model, you will need to"
|
|
133
|
+
"delete your database and re-index your snippets."
|
|
134
|
+
)
|
|
135
|
+
raise ValueError(msg) from e
|
|
136
|
+
raise
|
|
137
|
+
|
|
138
|
+
query_vec = np.array(query_embedding)
|
|
139
|
+
return stored_vecs, query_vec
|
|
140
|
+
|
|
141
|
+
def _compute_similarities(
|
|
142
|
+
self, stored_vecs: np.ndarray, query_vec: np.ndarray
|
|
143
|
+
) -> np.ndarray:
|
|
144
|
+
"""Compute cosine similarities between stored vectors and query vector.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
stored_vecs: Array of stored embedding vectors
|
|
148
|
+
query_vec: Query embedding vector
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Array of similarity scores
|
|
152
|
+
|
|
153
|
+
"""
|
|
154
|
+
stored_norms = np.linalg.norm(stored_vecs, axis=1)
|
|
155
|
+
query_norm = np.linalg.norm(query_vec)
|
|
156
|
+
return np.dot(stored_vecs, query_vec) / (stored_norms * query_norm)
|
|
157
|
+
|
|
158
|
+
def _get_top_k_results(
|
|
159
|
+
self,
|
|
160
|
+
similarities: np.ndarray,
|
|
161
|
+
embeddings: list[tuple[int, list[float]]],
|
|
162
|
+
top_k: int,
|
|
163
|
+
) -> list[tuple[int, float]]:
|
|
164
|
+
"""Get top-k results by similarity score.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
similarities: Array of similarity scores
|
|
168
|
+
embeddings: List of (snippet_id, embedding) tuples
|
|
169
|
+
top_k: Number of results to return
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
List of (snippet_id, similarity_score) tuples
|
|
173
|
+
|
|
174
|
+
"""
|
|
175
|
+
top_indices = np.argsort(similarities)[::-1][:top_k]
|
|
176
|
+
return [
|
|
177
|
+
(embeddings[i][0], float(similarities[i])) for i in top_indices
|
|
178
|
+
] # Use index 0 to get snippet_id
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Search service."""
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
@@ -6,19 +6,31 @@ import pydantic
|
|
|
6
6
|
import structlog
|
|
7
7
|
|
|
8
8
|
from kodit.bm25.bm25 import BM25Service
|
|
9
|
-
from kodit.embedding.embedding import
|
|
10
|
-
from kodit.embedding.
|
|
11
|
-
from kodit.
|
|
9
|
+
from kodit.embedding.embedding import Embedder
|
|
10
|
+
from kodit.embedding.embedding_models import EmbeddingType
|
|
11
|
+
from kodit.search.search_repository import SearchRepository
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class
|
|
15
|
-
"""Request for a
|
|
14
|
+
class SearchRequest(pydantic.BaseModel):
|
|
15
|
+
"""Request for a search."""
|
|
16
16
|
|
|
17
17
|
code_query: str | None = None
|
|
18
18
|
keywords: list[str] | None = None
|
|
19
19
|
top_k: int = 10
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
class SearchResult(pydantic.BaseModel):
|
|
23
|
+
"""Data transfer object for search results.
|
|
24
|
+
|
|
25
|
+
This model represents a single search result, containing both the file path
|
|
26
|
+
and the matching snippet content.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
id: int
|
|
30
|
+
uri: str
|
|
31
|
+
content: str
|
|
32
|
+
|
|
33
|
+
|
|
22
34
|
class Snippet(pydantic.BaseModel):
|
|
23
35
|
"""Snippet model."""
|
|
24
36
|
|
|
@@ -26,23 +38,23 @@ class Snippet(pydantic.BaseModel):
|
|
|
26
38
|
file_path: str
|
|
27
39
|
|
|
28
40
|
|
|
29
|
-
class
|
|
30
|
-
"""Service for
|
|
41
|
+
class SearchService:
|
|
42
|
+
"""Service for searching for relevant data."""
|
|
31
43
|
|
|
32
44
|
def __init__(
|
|
33
45
|
self,
|
|
34
|
-
repository:
|
|
46
|
+
repository: SearchRepository,
|
|
35
47
|
data_dir: Path,
|
|
36
|
-
|
|
48
|
+
embedding_service: Embedder,
|
|
37
49
|
) -> None:
|
|
38
|
-
"""Initialize the
|
|
50
|
+
"""Initialize the search service."""
|
|
39
51
|
self.repository = repository
|
|
40
52
|
self.log = structlog.get_logger(__name__)
|
|
41
53
|
self.bm25 = BM25Service(data_dir)
|
|
42
|
-
self.code_embedding_service =
|
|
54
|
+
self.code_embedding_service = embedding_service
|
|
43
55
|
|
|
44
|
-
async def
|
|
45
|
-
"""
|
|
56
|
+
async def search(self, request: SearchRequest) -> list[SearchResult]:
|
|
57
|
+
"""Search for relevant data."""
|
|
46
58
|
fusion_list = []
|
|
47
59
|
if request.keywords:
|
|
48
60
|
snippet_ids = await self.repository.list_snippet_ids()
|
|
@@ -56,7 +68,7 @@ class RetrievalService:
|
|
|
56
68
|
# Sort results by score
|
|
57
69
|
result_ids.sort(key=lambda x: x[1], reverse=True)
|
|
58
70
|
|
|
59
|
-
self.log.debug("
|
|
71
|
+
self.log.debug("Search results (BM25)", results=result_ids)
|
|
60
72
|
|
|
61
73
|
bm25_results = [x[0] for x in result_ids]
|
|
62
74
|
fusion_list.append(bm25_results)
|
|
@@ -64,7 +76,7 @@ class RetrievalService:
|
|
|
64
76
|
# Compute embedding for semantic query
|
|
65
77
|
semantic_results = []
|
|
66
78
|
if request.code_query:
|
|
67
|
-
query_embedding =
|
|
79
|
+
query_embedding = await anext(
|
|
68
80
|
self.code_embedding_service.query([request.code_query])
|
|
69
81
|
)
|
|
70
82
|
|
|
@@ -89,7 +101,18 @@ class RetrievalService:
|
|
|
89
101
|
final_ids = [x[0] for x in final_results]
|
|
90
102
|
|
|
91
103
|
# Get snippets from database (up to top_k)
|
|
92
|
-
|
|
104
|
+
search_results = await self.repository.list_snippets_by_ids(
|
|
105
|
+
final_ids[: request.top_k]
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
return [
|
|
109
|
+
SearchResult(
|
|
110
|
+
id=snippet.id,
|
|
111
|
+
uri=file.uri,
|
|
112
|
+
content=snippet.content,
|
|
113
|
+
)
|
|
114
|
+
for file, snippet in search_results
|
|
115
|
+
]
|
|
93
116
|
|
|
94
117
|
|
|
95
118
|
def reciprocal_rank_fusion(
|
kodit/snippets/snippets.py
CHANGED
|
@@ -45,4 +45,6 @@ class SnippetService:
|
|
|
45
45
|
raise ValueError(msg) from e
|
|
46
46
|
|
|
47
47
|
method_snippets = method_analser.extract(file_bytes)
|
|
48
|
-
|
|
48
|
+
all_snippets = [Snippet(text=snippet) for snippet in method_snippets]
|
|
49
|
+
# Remove any snippets that are empty
|
|
50
|
+
return [snippet for snippet in all_snippets if snippet.text.strip()]
|
|
@@ -1,14 +1,9 @@
|
|
|
1
|
-
"""Source repository for database operations.
|
|
2
|
-
|
|
3
|
-
This module provides the SourceRepository class which handles all database operations
|
|
4
|
-
related to code sources. It manages the creation and retrieval of source records
|
|
5
|
-
from the database, abstracting away the SQLAlchemy implementation details.
|
|
6
|
-
"""
|
|
1
|
+
"""Source repository for database operations."""
|
|
7
2
|
|
|
8
3
|
from sqlalchemy import func, select
|
|
9
4
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
10
5
|
|
|
11
|
-
from kodit.
|
|
6
|
+
from kodit.source.source_models import File, Source
|
|
12
7
|
|
|
13
8
|
|
|
14
9
|
class SourceRepository:
|
|
@@ -19,8 +19,8 @@ import structlog
|
|
|
19
19
|
from tqdm import tqdm
|
|
20
20
|
from uritools import isuri, urisplit
|
|
21
21
|
|
|
22
|
-
from kodit.
|
|
23
|
-
from kodit.
|
|
22
|
+
from kodit.source.source_models import File, Source
|
|
23
|
+
from kodit.source.source_repository import SourceRepository
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class SourceView(pydantic.BaseModel):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kodit
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.12
|
|
4
4
|
Summary: Code indexing for better AI code generation
|
|
5
5
|
Project-URL: Homepage, https://docs.helixml.tech/kodit/
|
|
6
6
|
Project-URL: Documentation, https://docs.helixml.tech/kodit/
|
|
@@ -32,6 +32,7 @@ Requires-Dist: gitpython>=3.1.44
|
|
|
32
32
|
Requires-Dist: hf-xet>=1.1.2
|
|
33
33
|
Requires-Dist: httpx-retries>=0.3.2
|
|
34
34
|
Requires-Dist: httpx>=0.28.1
|
|
35
|
+
Requires-Dist: openai>=1.82.0
|
|
35
36
|
Requires-Dist: posthog>=4.0.1
|
|
36
37
|
Requires-Dist: pydantic-settings>=2.9.1
|
|
37
38
|
Requires-Dist: pytable-formatter>=0.1.1
|
|
@@ -39,6 +40,7 @@ Requires-Dist: sentence-transformers>=4.1.0
|
|
|
39
40
|
Requires-Dist: sqlalchemy[asyncio]>=2.0.40
|
|
40
41
|
Requires-Dist: structlog>=25.3.0
|
|
41
42
|
Requires-Dist: tdqm>=0.0.1
|
|
43
|
+
Requires-Dist: tiktoken>=0.9.0
|
|
42
44
|
Requires-Dist: tree-sitter-language-pack>=0.7.3
|
|
43
45
|
Requires-Dist: tree-sitter>=0.24.0
|
|
44
46
|
Requires-Dist: uritools>=5.0.0
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
|
+
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
+
kodit/_version.py,sha256=dT0--P3ntKmcNKRbKEwrBhj1O2T26ST7PUdodqExauM,513
|
|
4
|
+
kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
|
|
5
|
+
kodit/cli.py,sha256=RNgTWMBOG95Yv8-3DKzL66bgIT6OJjmRXU_BpMMMlqU,8033
|
|
6
|
+
kodit/config.py,sha256=TDcLt6fiJn9cI1PoO5AqBqsL_Bxmm9JV5GqRxhj1tLw,4202
|
|
7
|
+
kodit/database.py,sha256=kekSdyEATdb47jxzQemkSOXMNOwnUwmVVTpn9hYaDK8,2356
|
|
8
|
+
kodit/log.py,sha256=HU1OmuxO4FcVw61k4WW7Y4WM7BrDaeplw1PcBHhuIZY,5434
|
|
9
|
+
kodit/mcp.py,sha256=I_ZFzQOR0gyS8LO8td-q-utPZpqiOnIkn7O-SIBUi0g,4384
|
|
10
|
+
kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
|
|
11
|
+
kodit/bm25/__init__.py,sha256=j8zyriNWhbwE5Lbybzg1hQAhANlU9mKHWw4beeUR6og,19
|
|
12
|
+
kodit/bm25/bm25.py,sha256=JtgJfsHz-2SHx96zxWjkPFSH7fXkahFMp01cDwl4YBg,2298
|
|
13
|
+
kodit/embedding/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
|
|
14
|
+
kodit/embedding/embedding.py,sha256=EMJpHK8ICZk_FjiO9Aqr2IO20qkGOmj_PfA1hyfI7Vk,6745
|
|
15
|
+
kodit/embedding/embedding_models.py,sha256=rN90vSs86dYiqoawcp8E9jtwY31JoJXYfaDlsJK7uqc,656
|
|
16
|
+
kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
|
|
17
|
+
kodit/indexing/indexing_models.py,sha256=6NX9HVcj6Pu9ePwHC7n-PWSyAgukpJq0nCNmUIigtbo,1282
|
|
18
|
+
kodit/indexing/indexing_repository.py,sha256=7bkAiBwtr3qlkdhNIalwMwbxezVz_RQGOhLVWPKHwNk,5506
|
|
19
|
+
kodit/indexing/indexing_service.py,sha256=VGfKgbkYEAYP_gIubvhMxo3yThT20ndS5xdg2LxwRgA,6685
|
|
20
|
+
kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
|
|
21
|
+
kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
|
|
22
|
+
kodit/migrations/env.py,sha256=w1M7OZh-ZeR2dPHS0ByXAUxQjfZQ8xIzMseWuzLDTWw,2469
|
|
23
|
+
kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
|
|
24
|
+
kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
|
|
25
|
+
kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
|
|
26
|
+
kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
|
|
27
|
+
kodit/search/__init__.py,sha256=4QbdjbrlhNKMovmuKHxJnUeZT7KNjTTFU0GdnuwUHdQ,36
|
|
28
|
+
kodit/search/search_repository.py,sha256=r1fkV6-cy9BKsy5J4WTHaY_FcjMaT1PV5qqqq0gvjZw,5833
|
|
29
|
+
kodit/search/search_service.py,sha256=KePkqCAc3CUcrpNsbDc5DqbF6W2m0TG6TDa9-VSJZS0,4227
|
|
30
|
+
kodit/snippets/__init__.py,sha256=-2coNoCRjTixU9KcP6alpmt7zqf37tCRWH3D7FPJ8dg,48
|
|
31
|
+
kodit/snippets/method_snippets.py,sha256=EVHhSNWahAC5nSXv9fWVFJY2yq25goHdCSCuENC07F8,4145
|
|
32
|
+
kodit/snippets/snippets.py,sha256=mwN0bM1Msu8ZeEsUHyQ7tx3Hj3vZsm8G7Wu4eWSkLY8,1539
|
|
33
|
+
kodit/snippets/languages/__init__.py,sha256=Bj5KKZSls2MQ8ZY1S_nHg447MgGZW-2WZM-oq6vjwwA,1187
|
|
34
|
+
kodit/snippets/languages/csharp.scm,sha256=gbBN4RiV1FBuTJF6orSnDFi8H9JwTw-d4piLJYsWUsc,222
|
|
35
|
+
kodit/snippets/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyrFGrVeo,429
|
|
36
|
+
kodit/source/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
|
|
37
|
+
kodit/source/source_models.py,sha256=xb42CaNDO1CUB8SIW-xXMrB6Ji8cFw-yeJ550xBEg9Q,2398
|
|
38
|
+
kodit/source/source_repository.py,sha256=0EksMpoLzdkfe8S4eeCm4Sf7TuxsOzOzaF4BBsMYo-4,3163
|
|
39
|
+
kodit/source/source_service.py,sha256=qBV9FCFQbJppeFrVo4uMgvC_mzWRIKldymp5yqLx9pw,9255
|
|
40
|
+
kodit-0.1.12.dist-info/METADATA,sha256=Kg662YrY11JGmUWEftq9c5einLPg_LGQc0Flxul9XyA,2349
|
|
41
|
+
kodit-0.1.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
42
|
+
kodit-0.1.12.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
43
|
+
kodit-0.1.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
44
|
+
kodit-0.1.12.dist-info/RECORD,,
|
kodit/retreival/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Retrieval package for code search and retrieval functionality."""
|