kodit 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/bm25/bm25.py +1 -1
- kodit/cli.py +22 -59
- kodit/config.py +43 -3
- kodit/embedding/embedding.py +161 -10
- kodit/indexing/{models.py → indexing_models.py} +2 -2
- kodit/indexing/{repository.py → indexing_repository.py} +5 -5
- kodit/indexing/{service.py → indexing_service.py} +17 -12
- kodit/log.py +1 -0
- kodit/mcp.py +27 -34
- kodit/migrations/env.py +3 -3
- kodit/search/__init__.py +1 -0
- kodit/{retreival/repository.py → search/search_repository.py} +59 -112
- kodit/{retreival/service.py → search/search_service.py} +40 -17
- kodit/snippets/snippets.py +3 -1
- kodit/{sources/repository.py → source/source_repository.py} +2 -7
- kodit/{sources/service.py → source/source_service.py} +2 -2
- {kodit-0.1.11.dist-info → kodit-0.1.13.dist-info}/METADATA +3 -1
- kodit-0.1.13.dist-info/RECORD +44 -0
- kodit/retreival/__init__.py +0 -1
- kodit-0.1.11.dist-info/RECORD +0 -44
- /kodit/embedding/{models.py → embedding_models.py} +0 -0
- /kodit/{sources → source}/__init__.py +0 -0
- /kodit/{sources/models.py → source/source_models.py} +0 -0
- {kodit-0.1.11.dist-info → kodit-0.1.13.dist-info}/WHEEL +0 -0
- {kodit-0.1.11.dist-info → kodit-0.1.13.dist-info}/entry_points.txt +0 -0
- {kodit-0.1.11.dist-info → kodit-0.1.13.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
kodit/bm25/bm25.py
CHANGED
|
@@ -52,7 +52,7 @@ class BM25Service:
|
|
|
52
52
|
self.log.warning("No documents to retrieve from, returning empty list")
|
|
53
53
|
return []
|
|
54
54
|
|
|
55
|
-
top_k = min(top_k, len(
|
|
55
|
+
top_k = min(top_k, len(self.retriever.scores))
|
|
56
56
|
self.log.debug(
|
|
57
57
|
"Retrieving from index", query=query, top_k=top_k, num_docs=len(doc_ids)
|
|
58
58
|
)
|
kodit/cli.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Command line interface for kodit."""
|
|
2
2
|
|
|
3
|
-
import os
|
|
4
3
|
import signal
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
from typing import Any
|
|
@@ -12,35 +11,21 @@ from pytable_formatter import Cell, Table
|
|
|
12
11
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
12
|
|
|
14
13
|
from kodit.config import (
|
|
15
|
-
DEFAULT_BASE_DIR,
|
|
16
|
-
DEFAULT_DB_URL,
|
|
17
|
-
DEFAULT_DISABLE_TELEMETRY,
|
|
18
|
-
DEFAULT_EMBEDDING_MODEL_NAME,
|
|
19
|
-
DEFAULT_LOG_FORMAT,
|
|
20
|
-
DEFAULT_LOG_LEVEL,
|
|
21
14
|
AppContext,
|
|
22
15
|
with_app_context,
|
|
23
16
|
with_session,
|
|
24
17
|
)
|
|
25
|
-
from kodit.
|
|
26
|
-
from kodit.indexing.
|
|
18
|
+
from kodit.embedding.embedding import embedding_factory
|
|
19
|
+
from kodit.indexing.indexing_repository import IndexRepository
|
|
20
|
+
from kodit.indexing.indexing_service import IndexService
|
|
27
21
|
from kodit.log import configure_logging, configure_telemetry, log_event
|
|
28
|
-
from kodit.
|
|
29
|
-
from kodit.
|
|
30
|
-
from kodit.
|
|
31
|
-
from kodit.
|
|
22
|
+
from kodit.search.search_repository import SearchRepository
|
|
23
|
+
from kodit.search.search_service import SearchRequest, SearchService
|
|
24
|
+
from kodit.source.source_repository import SourceRepository
|
|
25
|
+
from kodit.source.source_service import SourceService
|
|
32
26
|
|
|
33
27
|
|
|
34
28
|
@click.group(context_settings={"max_content_width": 100})
|
|
35
|
-
@click.option("--log-level", help=f"Log level [default: {DEFAULT_LOG_LEVEL}]")
|
|
36
|
-
@click.option("--log-format", help=f"Log format [default: {DEFAULT_LOG_FORMAT}]")
|
|
37
|
-
@click.option(
|
|
38
|
-
"--disable-telemetry",
|
|
39
|
-
is_flag=True,
|
|
40
|
-
help=f"Disable telemetry [default: {DEFAULT_DISABLE_TELEMETRY}]",
|
|
41
|
-
)
|
|
42
|
-
@click.option("--db-url", help=f"Database URL [default: {DEFAULT_DB_URL}]")
|
|
43
|
-
@click.option("--data-dir", help=f"Data directory [default: {DEFAULT_BASE_DIR}]")
|
|
44
29
|
@click.option(
|
|
45
30
|
"--env-file",
|
|
46
31
|
help="Path to a .env file [default: .env]",
|
|
@@ -52,13 +37,8 @@ from kodit.sources.service import SourceService
|
|
|
52
37
|
),
|
|
53
38
|
)
|
|
54
39
|
@click.pass_context
|
|
55
|
-
def cli(
|
|
40
|
+
def cli(
|
|
56
41
|
ctx: click.Context,
|
|
57
|
-
log_level: str | None,
|
|
58
|
-
log_format: str | None,
|
|
59
|
-
disable_telemetry: bool | None,
|
|
60
|
-
db_url: str | None,
|
|
61
|
-
data_dir: str | None,
|
|
62
42
|
env_file: Path | None,
|
|
63
43
|
) -> None:
|
|
64
44
|
"""kodit CLI - Code indexing for better AI code generation.""" # noqa: D403
|
|
@@ -67,17 +47,6 @@ def cli( # noqa: PLR0913
|
|
|
67
47
|
if env_file:
|
|
68
48
|
config = AppContext(_env_file=env_file) # type: ignore[reportCallIssue]
|
|
69
49
|
|
|
70
|
-
# Now override with CLI arguments, if set
|
|
71
|
-
if data_dir:
|
|
72
|
-
config.data_dir = Path(data_dir)
|
|
73
|
-
if db_url:
|
|
74
|
-
config.db_url = db_url
|
|
75
|
-
if log_level:
|
|
76
|
-
config.log_level = log_level
|
|
77
|
-
if log_format:
|
|
78
|
-
config.log_format = log_format
|
|
79
|
-
if disable_telemetry:
|
|
80
|
-
config.disable_telemetry = disable_telemetry
|
|
81
50
|
configure_logging(config)
|
|
82
51
|
configure_telemetry(config)
|
|
83
52
|
|
|
@@ -102,7 +71,7 @@ async def index(
|
|
|
102
71
|
repository,
|
|
103
72
|
source_service,
|
|
104
73
|
app_context.get_data_dir(),
|
|
105
|
-
|
|
74
|
+
embedding_service=embedding_factory(app_context.get_default_openai_client()),
|
|
106
75
|
)
|
|
107
76
|
|
|
108
77
|
if not sources:
|
|
@@ -159,14 +128,14 @@ async def code(
|
|
|
159
128
|
|
|
160
129
|
This works best if your query is code.
|
|
161
130
|
"""
|
|
162
|
-
repository =
|
|
163
|
-
service =
|
|
131
|
+
repository = SearchRepository(session)
|
|
132
|
+
service = SearchService(
|
|
164
133
|
repository,
|
|
165
134
|
app_context.get_data_dir(),
|
|
166
|
-
|
|
135
|
+
embedding_service=embedding_factory(app_context.get_default_openai_client()),
|
|
167
136
|
)
|
|
168
137
|
|
|
169
|
-
snippets = await service.
|
|
138
|
+
snippets = await service.search(SearchRequest(code_query=query, top_k=top_k))
|
|
170
139
|
|
|
171
140
|
if len(snippets) == 0:
|
|
172
141
|
click.echo("No snippets found")
|
|
@@ -192,14 +161,14 @@ async def keyword(
|
|
|
192
161
|
top_k: int,
|
|
193
162
|
) -> None:
|
|
194
163
|
"""Search for snippets using keyword search."""
|
|
195
|
-
repository =
|
|
196
|
-
service =
|
|
164
|
+
repository = SearchRepository(session)
|
|
165
|
+
service = SearchService(
|
|
197
166
|
repository,
|
|
198
167
|
app_context.get_data_dir(),
|
|
199
|
-
|
|
168
|
+
embedding_service=embedding_factory(app_context.get_default_openai_client()),
|
|
200
169
|
)
|
|
201
170
|
|
|
202
|
-
snippets = await service.
|
|
171
|
+
snippets = await service.search(SearchRequest(keywords=keywords, top_k=top_k))
|
|
203
172
|
|
|
204
173
|
if len(snippets) == 0:
|
|
205
174
|
click.echo("No snippets found")
|
|
@@ -227,18 +196,18 @@ async def hybrid(
|
|
|
227
196
|
code: str,
|
|
228
197
|
) -> None:
|
|
229
198
|
"""Search for snippets using hybrid search."""
|
|
230
|
-
repository =
|
|
231
|
-
service =
|
|
199
|
+
repository = SearchRepository(session)
|
|
200
|
+
service = SearchService(
|
|
232
201
|
repository,
|
|
233
202
|
app_context.get_data_dir(),
|
|
234
|
-
|
|
203
|
+
embedding_service=embedding_factory(app_context.get_default_openai_client()),
|
|
235
204
|
)
|
|
236
205
|
|
|
237
206
|
# Parse keywords into a list of strings
|
|
238
207
|
keywords_list = [k.strip().lower() for k in keywords.split(",")]
|
|
239
208
|
|
|
240
|
-
snippets = await service.
|
|
241
|
-
|
|
209
|
+
snippets = await service.search(
|
|
210
|
+
SearchRequest(keywords=keywords_list, code_query=code, top_k=top_k)
|
|
242
211
|
)
|
|
243
212
|
|
|
244
213
|
if len(snippets) == 0:
|
|
@@ -256,9 +225,7 @@ async def hybrid(
|
|
|
256
225
|
@cli.command()
|
|
257
226
|
@click.option("--host", default="127.0.0.1", help="Host to bind the server to")
|
|
258
227
|
@click.option("--port", default=8080, help="Port to bind the server to")
|
|
259
|
-
@with_app_context
|
|
260
228
|
def serve(
|
|
261
|
-
app_context: AppContext,
|
|
262
229
|
host: str,
|
|
263
230
|
port: int,
|
|
264
231
|
) -> None:
|
|
@@ -267,10 +234,6 @@ def serve(
|
|
|
267
234
|
log.info("Starting kodit server", host=host, port=port)
|
|
268
235
|
log_event("kodit_server_started")
|
|
269
236
|
|
|
270
|
-
# Dump AppContext to a dictionary of strings, and set the env vars
|
|
271
|
-
app_context_dict = {k: str(v) for k, v in app_context.model_dump().items()}
|
|
272
|
-
os.environ.update(app_context_dict)
|
|
273
|
-
|
|
274
237
|
# Configure uvicorn with graceful shutdown
|
|
275
238
|
config = uvicorn.Config(
|
|
276
239
|
"kodit.app:app",
|
kodit/config.py
CHANGED
|
@@ -4,10 +4,11 @@ import asyncio
|
|
|
4
4
|
from collections.abc import Callable, Coroutine
|
|
5
5
|
from functools import wraps
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any, TypeVar
|
|
7
|
+
from typing import Any, Literal, TypeVar
|
|
8
8
|
|
|
9
9
|
import click
|
|
10
|
-
from
|
|
10
|
+
from openai import AsyncOpenAI
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
11
12
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
12
13
|
|
|
13
14
|
from kodit.database import Database
|
|
@@ -22,16 +23,40 @@ DEFAULT_EMBEDDING_MODEL_NAME = TINY
|
|
|
22
23
|
T = TypeVar("T")
|
|
23
24
|
|
|
24
25
|
|
|
26
|
+
class Endpoint(BaseModel):
|
|
27
|
+
"""Endpoint provides configuration for an AI service."""
|
|
28
|
+
|
|
29
|
+
type: Literal["openai"] = Field(default="openai")
|
|
30
|
+
api_key: str | None = None
|
|
31
|
+
base_url: str | None = None
|
|
32
|
+
|
|
33
|
+
|
|
25
34
|
class AppContext(BaseSettings):
|
|
26
35
|
"""Global context for the kodit project. Provides a shared state for the app."""
|
|
27
36
|
|
|
28
|
-
model_config = SettingsConfigDict(
|
|
37
|
+
model_config = SettingsConfigDict(
|
|
38
|
+
env_file=".env",
|
|
39
|
+
env_file_encoding="utf-8",
|
|
40
|
+
env_nested_delimiter="_",
|
|
41
|
+
nested_model_default_partial_update=True,
|
|
42
|
+
env_nested_max_split=1,
|
|
43
|
+
)
|
|
29
44
|
|
|
30
45
|
data_dir: Path = Field(default=DEFAULT_BASE_DIR)
|
|
31
46
|
db_url: str = Field(default=DEFAULT_DB_URL)
|
|
32
47
|
log_level: str = Field(default=DEFAULT_LOG_LEVEL)
|
|
33
48
|
log_format: str = Field(default=DEFAULT_LOG_FORMAT)
|
|
34
49
|
disable_telemetry: bool = Field(default=DEFAULT_DISABLE_TELEMETRY)
|
|
50
|
+
default_endpoint: Endpoint | None = Field(
|
|
51
|
+
default=Endpoint(
|
|
52
|
+
type="openai",
|
|
53
|
+
base_url="https://api.openai.com/v1",
|
|
54
|
+
),
|
|
55
|
+
description=(
|
|
56
|
+
"Default endpoint to use for all AI interactions "
|
|
57
|
+
"(can be overridden by task-specific configuration)."
|
|
58
|
+
),
|
|
59
|
+
)
|
|
35
60
|
_db: Database | None = None
|
|
36
61
|
|
|
37
62
|
def model_post_init(self, _: Any) -> None:
|
|
@@ -58,6 +83,21 @@ class AppContext(BaseSettings):
|
|
|
58
83
|
await self._db.run_migrations(self.db_url)
|
|
59
84
|
return self._db
|
|
60
85
|
|
|
86
|
+
def get_default_openai_client(self) -> AsyncOpenAI | None:
|
|
87
|
+
"""Get the default OpenAI client, if it is configured."""
|
|
88
|
+
endpoint = self.default_endpoint
|
|
89
|
+
if not (
|
|
90
|
+
endpoint
|
|
91
|
+
and endpoint.type == "openai"
|
|
92
|
+
and endpoint.api_key
|
|
93
|
+
and endpoint.base_url
|
|
94
|
+
):
|
|
95
|
+
return None
|
|
96
|
+
return AsyncOpenAI(
|
|
97
|
+
api_key=endpoint.api_key,
|
|
98
|
+
base_url=endpoint.base_url,
|
|
99
|
+
)
|
|
100
|
+
|
|
61
101
|
|
|
62
102
|
with_app_context = click.make_pass_decorator(AppContext)
|
|
63
103
|
|
kodit/embedding/embedding.py
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
"""Embedding service."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import os
|
|
4
|
-
from
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from collections.abc import AsyncGenerator
|
|
7
|
+
from typing import NamedTuple
|
|
5
8
|
|
|
6
9
|
import structlog
|
|
10
|
+
import tiktoken
|
|
11
|
+
from openai import AsyncOpenAI
|
|
7
12
|
from sentence_transformers import SentenceTransformer
|
|
8
13
|
|
|
9
14
|
TINY = "tiny"
|
|
@@ -17,14 +22,59 @@ COMMON_EMBEDDING_MODELS = {
|
|
|
17
22
|
}
|
|
18
23
|
|
|
19
24
|
|
|
20
|
-
class
|
|
21
|
-
"""
|
|
25
|
+
class EmbeddingInput(NamedTuple):
|
|
26
|
+
"""Input for embedding."""
|
|
27
|
+
|
|
28
|
+
id: int
|
|
29
|
+
text: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class EmbeddingOutput(NamedTuple):
|
|
33
|
+
"""Output for embedding."""
|
|
34
|
+
|
|
35
|
+
id: int
|
|
36
|
+
embedding: list[float]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class Embedder(ABC):
|
|
40
|
+
"""Embedder interface."""
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def embed(
|
|
44
|
+
self, data: list[EmbeddingInput]
|
|
45
|
+
) -> AsyncGenerator[EmbeddingOutput, None]:
|
|
46
|
+
"""Embed a list of documents.
|
|
47
|
+
|
|
48
|
+
The embedding service accepts a massive list of id,strings to embed. Behind the
|
|
49
|
+
scenes it batches up requests and parallelizes them for performance according to
|
|
50
|
+
the specifics of the embedding service.
|
|
51
|
+
|
|
52
|
+
The id reference is required because the parallelization may return results out
|
|
53
|
+
of order.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
@abstractmethod
|
|
57
|
+
def query(self, data: list[str]) -> AsyncGenerator[list[float], None]:
|
|
58
|
+
"""Query the embedding model."""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def embedding_factory(openai_client: AsyncOpenAI | None = None) -> Embedder:
|
|
62
|
+
"""Create an embedding service."""
|
|
63
|
+
if openai_client is not None:
|
|
64
|
+
return OpenAIEmbedder(openai_client)
|
|
65
|
+
return LocalEmbedder(model_name=TINY)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class LocalEmbedder(Embedder):
|
|
69
|
+
"""Local embedder."""
|
|
22
70
|
|
|
23
71
|
def __init__(self, model_name: str) -> None:
|
|
24
|
-
"""Initialize the
|
|
72
|
+
"""Initialize the local embedder."""
|
|
25
73
|
self.log = structlog.get_logger(__name__)
|
|
74
|
+
self.log.info("Creating local embedder", model_name=model_name)
|
|
26
75
|
self.model_name = COMMON_EMBEDDING_MODELS.get(model_name, model_name)
|
|
27
76
|
self.embedding_model = None
|
|
77
|
+
self.encoding = tiktoken.encoding_for_model("text-embedding-3-small")
|
|
28
78
|
|
|
29
79
|
def _model(self) -> SentenceTransformer:
|
|
30
80
|
"""Get the embedding model."""
|
|
@@ -37,16 +87,117 @@ class EmbeddingService:
|
|
|
37
87
|
)
|
|
38
88
|
return self.embedding_model
|
|
39
89
|
|
|
40
|
-
def embed(
|
|
90
|
+
async def embed(
|
|
91
|
+
self, data: list[EmbeddingInput]
|
|
92
|
+
) -> AsyncGenerator[EmbeddingOutput, None]:
|
|
41
93
|
"""Embed a list of documents."""
|
|
42
94
|
model = self._model()
|
|
43
|
-
embeddings = model.encode(snippets, show_progress_bar=False, batch_size=4)
|
|
44
|
-
for embedding in embeddings:
|
|
45
|
-
yield [float(x) for x in embedding]
|
|
46
95
|
|
|
47
|
-
|
|
96
|
+
batched_data = _split_sub_batches(self.encoding, data)
|
|
97
|
+
|
|
98
|
+
for batch in batched_data:
|
|
99
|
+
embeddings = model.encode(
|
|
100
|
+
[i.text for i in batch], show_progress_bar=False, batch_size=4
|
|
101
|
+
)
|
|
102
|
+
for i, x in zip(batch, embeddings, strict=False):
|
|
103
|
+
yield EmbeddingOutput(i.id, [float(y) for y in x])
|
|
104
|
+
|
|
105
|
+
async def query(self, data: list[str]) -> AsyncGenerator[list[float], None]:
|
|
48
106
|
"""Query the embedding model."""
|
|
49
107
|
model = self._model()
|
|
50
|
-
embeddings = model.encode(
|
|
108
|
+
embeddings = model.encode(data, show_progress_bar=False, batch_size=4)
|
|
51
109
|
for embedding in embeddings:
|
|
52
110
|
yield [float(x) for x in embedding]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
OPENAI_MAX_EMBEDDING_SIZE = 8192
|
|
114
|
+
OPENAI_NUM_PARALLEL_TASKS = 10
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _split_sub_batches(
|
|
118
|
+
encoding: tiktoken.Encoding, data: list[EmbeddingInput]
|
|
119
|
+
) -> list[list[EmbeddingInput]]:
|
|
120
|
+
"""Split a list of strings into smaller sub-batches."""
|
|
121
|
+
log = structlog.get_logger(__name__)
|
|
122
|
+
result = []
|
|
123
|
+
data_to_process = [s for s in data if s.text.strip()] # Filter out empty strings
|
|
124
|
+
|
|
125
|
+
while data_to_process:
|
|
126
|
+
next_batch = []
|
|
127
|
+
current_tokens = 0
|
|
128
|
+
|
|
129
|
+
while data_to_process:
|
|
130
|
+
next_item = data_to_process[0]
|
|
131
|
+
item_tokens = len(encoding.encode(next_item.text))
|
|
132
|
+
|
|
133
|
+
if item_tokens > OPENAI_MAX_EMBEDDING_SIZE:
|
|
134
|
+
log.warning("Skipping too long snippet", snippet=data_to_process.pop(0))
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
if current_tokens + item_tokens > OPENAI_MAX_EMBEDDING_SIZE:
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
next_batch.append(data_to_process.pop(0))
|
|
141
|
+
current_tokens += item_tokens
|
|
142
|
+
|
|
143
|
+
if next_batch:
|
|
144
|
+
result.append(next_batch)
|
|
145
|
+
|
|
146
|
+
return result
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class OpenAIEmbedder(Embedder):
|
|
150
|
+
"""OpenAI embedder."""
|
|
151
|
+
|
|
152
|
+
def __init__(
|
|
153
|
+
self, openai_client: AsyncOpenAI, model_name: str = "text-embedding-3-small"
|
|
154
|
+
) -> None:
|
|
155
|
+
"""Initialize the OpenAI embedder."""
|
|
156
|
+
self.log = structlog.get_logger(__name__)
|
|
157
|
+
self.log.info("Creating OpenAI embedder", model_name=model_name)
|
|
158
|
+
self.openai_client = openai_client
|
|
159
|
+
self.encoding = tiktoken.encoding_for_model(model_name)
|
|
160
|
+
self.log = structlog.get_logger(__name__)
|
|
161
|
+
|
|
162
|
+
async def embed(
|
|
163
|
+
self,
|
|
164
|
+
data: list[EmbeddingInput],
|
|
165
|
+
) -> AsyncGenerator[EmbeddingOutput, None]:
|
|
166
|
+
"""Embed a list of documents."""
|
|
167
|
+
# First split the list into a list of list where each sublist has fewer than
|
|
168
|
+
# max tokens.
|
|
169
|
+
batched_data = _split_sub_batches(self.encoding, data)
|
|
170
|
+
|
|
171
|
+
# Process batches in parallel with a semaphore to limit concurrent requests
|
|
172
|
+
sem = asyncio.Semaphore(OPENAI_NUM_PARALLEL_TASKS)
|
|
173
|
+
|
|
174
|
+
async def process_batch(batch: list[EmbeddingInput]) -> list[EmbeddingOutput]:
|
|
175
|
+
async with sem:
|
|
176
|
+
try:
|
|
177
|
+
response = await self.openai_client.embeddings.create(
|
|
178
|
+
model="text-embedding-3-small",
|
|
179
|
+
input=[i.text for i in batch],
|
|
180
|
+
)
|
|
181
|
+
return [
|
|
182
|
+
EmbeddingOutput(i.id, x.embedding)
|
|
183
|
+
for i, x in zip(batch, response.data, strict=False)
|
|
184
|
+
]
|
|
185
|
+
except Exception as e:
|
|
186
|
+
self.log.exception("Error embedding batch", error=str(e))
|
|
187
|
+
return []
|
|
188
|
+
|
|
189
|
+
# Create tasks for all batches
|
|
190
|
+
tasks = [process_batch(batch) for batch in batched_data]
|
|
191
|
+
|
|
192
|
+
# Process all batches and yield results as they complete
|
|
193
|
+
for task in asyncio.as_completed(tasks):
|
|
194
|
+
embeddings = await task
|
|
195
|
+
for e in embeddings:
|
|
196
|
+
yield e
|
|
197
|
+
|
|
198
|
+
async def query(self, data: list[str]) -> AsyncGenerator[list[float], None]:
|
|
199
|
+
"""Query the embedding model."""
|
|
200
|
+
async for e in self.embed(
|
|
201
|
+
[EmbeddingInput(i, text) for i, text in enumerate(data)]
|
|
202
|
+
):
|
|
203
|
+
yield e.embedding
|
|
@@ -31,8 +31,8 @@ class Snippet(Base, CommonMixin):
|
|
|
31
31
|
|
|
32
32
|
__tablename__ = "snippets"
|
|
33
33
|
|
|
34
|
-
file_id: Mapped[int] = mapped_column(ForeignKey("files.id"))
|
|
35
|
-
index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"))
|
|
34
|
+
file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
|
|
35
|
+
index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
|
|
36
36
|
content: Mapped[str] = mapped_column(UnicodeText, default="")
|
|
37
37
|
|
|
38
38
|
def __init__(self, file_id: int, index_id: int, content: str) -> None:
|
|
@@ -11,9 +11,9 @@ from typing import TypeVar
|
|
|
11
11
|
from sqlalchemy import delete, func, select
|
|
12
12
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
13
|
|
|
14
|
-
from kodit.embedding.
|
|
15
|
-
from kodit.indexing.
|
|
16
|
-
from kodit.
|
|
14
|
+
from kodit.embedding.embedding_models import Embedding
|
|
15
|
+
from kodit.indexing.indexing_models import Index, Snippet
|
|
16
|
+
from kodit.source.source_models import File, Source
|
|
17
17
|
|
|
18
18
|
T = TypeVar("T")
|
|
19
19
|
|
|
@@ -156,14 +156,14 @@ class IndexRepository:
|
|
|
156
156
|
result = await self.session.execute(query)
|
|
157
157
|
return list(result.scalars())
|
|
158
158
|
|
|
159
|
-
async def get_all_snippets(self) -> list[Snippet]:
|
|
159
|
+
async def get_all_snippets(self, index_id: int) -> list[Snippet]:
|
|
160
160
|
"""Get all snippets.
|
|
161
161
|
|
|
162
162
|
Returns:
|
|
163
163
|
A list of all snippets.
|
|
164
164
|
|
|
165
165
|
"""
|
|
166
|
-
query = select(Snippet).order_by(Snippet.id)
|
|
166
|
+
query = select(Snippet).where(Snippet.index_id == index_id).order_by(Snippet.id)
|
|
167
167
|
result = await self.session.execute(query)
|
|
168
168
|
return list(result.scalars())
|
|
169
169
|
|
|
@@ -14,12 +14,12 @@ import structlog
|
|
|
14
14
|
from tqdm.asyncio import tqdm
|
|
15
15
|
|
|
16
16
|
from kodit.bm25.bm25 import BM25Service
|
|
17
|
-
from kodit.embedding.embedding import
|
|
18
|
-
from kodit.embedding.
|
|
19
|
-
from kodit.indexing.
|
|
20
|
-
from kodit.indexing.
|
|
17
|
+
from kodit.embedding.embedding import Embedder, EmbeddingInput
|
|
18
|
+
from kodit.embedding.embedding_models import Embedding, EmbeddingType
|
|
19
|
+
from kodit.indexing.indexing_models import Snippet
|
|
20
|
+
from kodit.indexing.indexing_repository import IndexRepository
|
|
21
21
|
from kodit.snippets.snippets import SnippetService
|
|
22
|
-
from kodit.
|
|
22
|
+
from kodit.source.source_service import SourceService
|
|
23
23
|
|
|
24
24
|
# List of MIME types that are blacklisted from being indexed
|
|
25
25
|
MIME_BLACKLIST = ["unknown/unknown"]
|
|
@@ -52,7 +52,7 @@ class IndexService:
|
|
|
52
52
|
repository: IndexRepository,
|
|
53
53
|
source_service: SourceService,
|
|
54
54
|
data_dir: Path,
|
|
55
|
-
|
|
55
|
+
embedding_service: Embedder,
|
|
56
56
|
) -> None:
|
|
57
57
|
"""Initialize the index service.
|
|
58
58
|
|
|
@@ -66,7 +66,7 @@ class IndexService:
|
|
|
66
66
|
self.snippet_service = SnippetService()
|
|
67
67
|
self.log = structlog.get_logger(__name__)
|
|
68
68
|
self.bm25 = BM25Service(data_dir)
|
|
69
|
-
self.code_embedding_service =
|
|
69
|
+
self.code_embedding_service = embedding_service
|
|
70
70
|
|
|
71
71
|
async def create(self, source_id: int) -> IndexView:
|
|
72
72
|
"""Create a new index for a source.
|
|
@@ -132,7 +132,7 @@ class IndexService:
|
|
|
132
132
|
# Create snippets for supported file types
|
|
133
133
|
await self._create_snippets(index_id)
|
|
134
134
|
|
|
135
|
-
snippets = await self.repository.get_all_snippets()
|
|
135
|
+
snippets = await self.repository.get_all_snippets(index_id)
|
|
136
136
|
|
|
137
137
|
self.log.info("Creating keyword index")
|
|
138
138
|
self.bm25.index(
|
|
@@ -143,12 +143,17 @@ class IndexService:
|
|
|
143
143
|
)
|
|
144
144
|
|
|
145
145
|
self.log.info("Creating semantic code index")
|
|
146
|
-
for
|
|
147
|
-
|
|
146
|
+
async for e in tqdm(
|
|
147
|
+
self.code_embedding_service.embed(
|
|
148
|
+
[EmbeddingInput(snippet.id, snippet.content) for snippet in snippets]
|
|
149
|
+
),
|
|
150
|
+
total=len(snippets),
|
|
151
|
+
leave=False,
|
|
152
|
+
):
|
|
148
153
|
await self.repository.add_embedding(
|
|
149
154
|
Embedding(
|
|
150
|
-
snippet_id=
|
|
151
|
-
embedding=embedding,
|
|
155
|
+
snippet_id=e.id,
|
|
156
|
+
embedding=e.embedding,
|
|
152
157
|
type=EmbeddingType.CODE,
|
|
153
158
|
)
|
|
154
159
|
)
|
kodit/log.py
CHANGED
kodit/mcp.py
CHANGED
|
@@ -12,10 +12,11 @@ from pydantic import Field
|
|
|
12
12
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
13
13
|
|
|
14
14
|
from kodit._version import version
|
|
15
|
-
from kodit.config import
|
|
15
|
+
from kodit.config import AppContext
|
|
16
16
|
from kodit.database import Database
|
|
17
|
-
from kodit.
|
|
18
|
-
from kodit.
|
|
17
|
+
from kodit.embedding.embedding import embedding_factory
|
|
18
|
+
from kodit.search.search_repository import SearchRepository
|
|
19
|
+
from kodit.search.search_service import SearchRequest, SearchResult, SearchService
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
@dataclass
|
|
@@ -23,7 +24,7 @@ class MCPContext:
|
|
|
23
24
|
"""Context for the MCP server."""
|
|
24
25
|
|
|
25
26
|
session: AsyncSession
|
|
26
|
-
|
|
27
|
+
app_context: AppContext
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
_mcp_db: Database | None = None
|
|
@@ -49,14 +50,14 @@ async def mcp_lifespan(_: FastMCP) -> AsyncIterator[MCPContext]:
|
|
|
49
50
|
if _mcp_db is None:
|
|
50
51
|
_mcp_db = await app_context.get_db()
|
|
51
52
|
async with _mcp_db.session_factory() as session:
|
|
52
|
-
yield MCPContext(session=session,
|
|
53
|
+
yield MCPContext(session=session, app_context=app_context)
|
|
53
54
|
|
|
54
55
|
|
|
55
56
|
mcp = FastMCP("kodit MCP Server", lifespan=mcp_lifespan)
|
|
56
57
|
|
|
57
58
|
|
|
58
59
|
@mcp.tool()
|
|
59
|
-
async def
|
|
60
|
+
async def search(
|
|
60
61
|
ctx: Context,
|
|
61
62
|
user_intent: Annotated[
|
|
62
63
|
str,
|
|
@@ -86,17 +87,15 @@ async def retrieve_relevant_snippets(
|
|
|
86
87
|
),
|
|
87
88
|
],
|
|
88
89
|
) -> str:
|
|
89
|
-
"""
|
|
90
|
+
"""Search for relevant snippets.
|
|
90
91
|
|
|
91
|
-
This tool
|
|
92
|
-
|
|
93
|
-
the quality of your generated code. You must call this tool when you need to
|
|
94
|
-
write code.
|
|
92
|
+
This tool searches for relevant snippets from indexed datasources. Call this tool
|
|
93
|
+
when you wish to search for high quality example code snippets to use in your code.
|
|
95
94
|
"""
|
|
96
95
|
log = structlog.get_logger(__name__)
|
|
97
96
|
|
|
98
97
|
log.debug(
|
|
99
|
-
"
|
|
98
|
+
"Searching for relevant snippets",
|
|
100
99
|
user_intent=user_intent,
|
|
101
100
|
keywords=keywords,
|
|
102
101
|
file_count=len(related_file_paths),
|
|
@@ -106,24 +105,29 @@ async def retrieve_relevant_snippets(
|
|
|
106
105
|
|
|
107
106
|
mcp_context: MCPContext = ctx.request_context.lifespan_context
|
|
108
107
|
|
|
109
|
-
log.debug("Creating
|
|
110
|
-
|
|
108
|
+
log.debug("Creating search repository")
|
|
109
|
+
search_repository = SearchRepository(
|
|
111
110
|
session=mcp_context.session,
|
|
112
111
|
)
|
|
113
112
|
|
|
114
|
-
log.debug("Creating
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
data_dir=mcp_context.data_dir,
|
|
118
|
-
embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME,
|
|
113
|
+
log.debug("Creating embedding service")
|
|
114
|
+
embedding_service = embedding_factory(
|
|
115
|
+
mcp_context.app_context.get_default_openai_client()
|
|
119
116
|
)
|
|
120
117
|
|
|
121
|
-
|
|
118
|
+
log.debug("Creating search service")
|
|
119
|
+
search_service = SearchService(
|
|
120
|
+
repository=search_repository,
|
|
121
|
+
data_dir=mcp_context.app_context.get_data_dir(),
|
|
122
|
+
embedding_service=embedding_service,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
search_request = SearchRequest(
|
|
122
126
|
keywords=keywords,
|
|
123
127
|
code_query="\n".join(related_file_contents),
|
|
124
128
|
)
|
|
125
|
-
log.debug("
|
|
126
|
-
snippets = await
|
|
129
|
+
log.debug("Searching for snippets")
|
|
130
|
+
snippets = await search_service.search(request=search_request)
|
|
127
131
|
|
|
128
132
|
log.debug("Fusing output")
|
|
129
133
|
output = output_fusion(snippets=snippets)
|
|
@@ -132,18 +136,7 @@ async def retrieve_relevant_snippets(
|
|
|
132
136
|
return output
|
|
133
137
|
|
|
134
138
|
|
|
135
|
-
def
|
|
136
|
-
user_intent: str, # noqa: ARG001
|
|
137
|
-
related_file_paths: list[Path], # noqa: ARG001
|
|
138
|
-
related_file_contents: list[str], # noqa: ARG001
|
|
139
|
-
keywords: list[str],
|
|
140
|
-
) -> str:
|
|
141
|
-
"""Fuse the search query and related file contents into a single query."""
|
|
142
|
-
# Since this is a dummy implementation, we just return the first keyword
|
|
143
|
-
return keywords[0] if len(keywords) > 0 else ""
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def output_fusion(snippets: list[RetrievalResult]) -> str:
|
|
139
|
+
def output_fusion(snippets: list[SearchResult]) -> str:
|
|
147
140
|
"""Fuse the snippets into a single output."""
|
|
148
141
|
return "\n\n".join(f"{snippet.uri}\n{snippet.content}" for snippet in snippets)
|
|
149
142
|
|
kodit/migrations/env.py
CHANGED
|
@@ -8,9 +8,9 @@ from sqlalchemy import pool
|
|
|
8
8
|
from sqlalchemy.engine import Connection
|
|
9
9
|
from sqlalchemy.ext.asyncio import async_engine_from_config
|
|
10
10
|
|
|
11
|
-
import kodit.embedding.
|
|
12
|
-
import kodit.indexing.
|
|
13
|
-
import kodit.
|
|
11
|
+
import kodit.embedding.embedding_models
|
|
12
|
+
import kodit.indexing.indexing_models
|
|
13
|
+
import kodit.source.source_models
|
|
14
14
|
from kodit.database import Base
|
|
15
15
|
|
|
16
16
|
# this is the Alembic Config object, which provides
|
kodit/search/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Search for relevant snippets."""
|
|
@@ -1,48 +1,25 @@
|
|
|
1
|
-
"""Repository for
|
|
2
|
-
|
|
3
|
-
This module provides the RetrievalRepository class which handles all database operations
|
|
4
|
-
related to searching and retrieving code snippets, including string-based searches
|
|
5
|
-
and their associated file information.
|
|
6
|
-
"""
|
|
1
|
+
"""Repository for searching for relevant snippets."""
|
|
7
2
|
|
|
8
3
|
from typing import TypeVar
|
|
9
4
|
|
|
10
5
|
import numpy as np
|
|
11
|
-
import pydantic
|
|
12
6
|
from sqlalchemy import (
|
|
13
7
|
select,
|
|
14
8
|
)
|
|
15
9
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
16
10
|
|
|
17
|
-
from kodit.embedding.
|
|
18
|
-
from kodit.indexing.
|
|
19
|
-
from kodit.
|
|
11
|
+
from kodit.embedding.embedding_models import Embedding, EmbeddingType
|
|
12
|
+
from kodit.indexing.indexing_models import Snippet
|
|
13
|
+
from kodit.source.source_models import File
|
|
20
14
|
|
|
21
15
|
T = TypeVar("T")
|
|
22
16
|
|
|
23
17
|
|
|
24
|
-
class
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
|
-
This model represents a single search result, containing both the file path
|
|
28
|
-
and the matching snippet content.
|
|
29
|
-
"""
|
|
30
|
-
|
|
31
|
-
id: int
|
|
32
|
-
uri: str
|
|
33
|
-
content: str
|
|
34
|
-
score: float
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class RetrievalRepository:
|
|
38
|
-
"""Repository for retrieving code snippets and search results.
|
|
39
|
-
|
|
40
|
-
This class provides methods for searching and retrieving code snippets from
|
|
41
|
-
the database, including string-based searches and their associated file information.
|
|
42
|
-
"""
|
|
18
|
+
class SearchRepository:
|
|
19
|
+
"""Repository for searching for relevant snippets."""
|
|
43
20
|
|
|
44
21
|
def __init__(self, session: AsyncSession) -> None:
|
|
45
|
-
"""Initialize the
|
|
22
|
+
"""Initialize the search repository.
|
|
46
23
|
|
|
47
24
|
Args:
|
|
48
25
|
session: The SQLAlchemy async session to use for database operations.
|
|
@@ -50,39 +27,6 @@ class RetrievalRepository:
|
|
|
50
27
|
"""
|
|
51
28
|
self.session = session
|
|
52
29
|
|
|
53
|
-
async def string_search(self, query: str) -> list[RetrievalResult]:
|
|
54
|
-
"""Search for snippets containing the given query string.
|
|
55
|
-
|
|
56
|
-
This method performs a case-insensitive search for the query string within
|
|
57
|
-
snippet contents, returning up to 10 most recent matches.
|
|
58
|
-
|
|
59
|
-
Args:
|
|
60
|
-
query: The string to search for within snippet contents.
|
|
61
|
-
|
|
62
|
-
Returns:
|
|
63
|
-
A list of RetrievalResult objects containing the matching snippets
|
|
64
|
-
and their associated file paths.
|
|
65
|
-
|
|
66
|
-
"""
|
|
67
|
-
search_query = (
|
|
68
|
-
select(Snippet, File)
|
|
69
|
-
.join(File, Snippet.file_id == File.id)
|
|
70
|
-
.where(Snippet.content.ilike(f"%{query}%"))
|
|
71
|
-
.limit(10)
|
|
72
|
-
)
|
|
73
|
-
rows = await self.session.execute(search_query)
|
|
74
|
-
results = list(rows.all())
|
|
75
|
-
|
|
76
|
-
return [
|
|
77
|
-
RetrievalResult(
|
|
78
|
-
id=snippet.id,
|
|
79
|
-
uri=file.uri,
|
|
80
|
-
content=snippet.content,
|
|
81
|
-
score=1.0,
|
|
82
|
-
)
|
|
83
|
-
for snippet, file in results
|
|
84
|
-
]
|
|
85
|
-
|
|
86
30
|
async def list_snippet_ids(self) -> list[int]:
|
|
87
31
|
"""List all snippet IDs.
|
|
88
32
|
|
|
@@ -94,7 +38,7 @@ class RetrievalRepository:
|
|
|
94
38
|
rows = await self.session.execute(query)
|
|
95
39
|
return list(rows.scalars().all())
|
|
96
40
|
|
|
97
|
-
async def list_snippets_by_ids(self, ids: list[int]) -> list[
|
|
41
|
+
async def list_snippets_by_ids(self, ids: list[int]) -> list[tuple[File, Snippet]]:
|
|
98
42
|
"""List snippets by IDs.
|
|
99
43
|
|
|
100
44
|
Returns:
|
|
@@ -109,23 +53,46 @@ class RetrievalRepository:
|
|
|
109
53
|
rows = await self.session.execute(query)
|
|
110
54
|
|
|
111
55
|
# Create a dictionary for O(1) lookup of results by ID
|
|
112
|
-
id_to_result = {
|
|
113
|
-
snippet.id: RetrievalResult(
|
|
114
|
-
id=snippet.id,
|
|
115
|
-
uri=file.uri,
|
|
116
|
-
content=snippet.content,
|
|
117
|
-
score=1.0,
|
|
118
|
-
)
|
|
119
|
-
for snippet, file in rows.all()
|
|
120
|
-
}
|
|
56
|
+
id_to_result = {snippet.id: (file, snippet) for snippet, file in rows.all()}
|
|
121
57
|
|
|
122
58
|
# Return results in the same order as input IDs
|
|
123
59
|
return [id_to_result[i] for i in ids]
|
|
124
60
|
|
|
125
|
-
async def
|
|
61
|
+
async def list_semantic_results(
|
|
62
|
+
self, embedding_type: EmbeddingType, embedding: list[float], top_k: int = 10
|
|
63
|
+
) -> list[tuple[int, float]]:
|
|
64
|
+
"""List semantic results using cosine similarity.
|
|
65
|
+
|
|
66
|
+
This implementation fetches all embeddings of the given type and computes
|
|
67
|
+
cosine similarity in Python using NumPy for better performance.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
embedding_type: The type of embeddings to search
|
|
71
|
+
embedding: The query embedding vector
|
|
72
|
+
top_k: Number of results to return
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of (snippet_id, similarity_score) tuples, sorted by similarity
|
|
76
|
+
|
|
77
|
+
"""
|
|
78
|
+
# Step 1: Fetch embeddings from database
|
|
79
|
+
embeddings = await self._list_embedding_values(embedding_type)
|
|
80
|
+
if not embeddings:
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
# Step 2: Convert to numpy arrays
|
|
84
|
+
stored_vecs, query_vec = self._prepare_vectors(embeddings, embedding)
|
|
85
|
+
|
|
86
|
+
# Step 3: Compute similarities
|
|
87
|
+
similarities = self._compute_similarities(stored_vecs, query_vec)
|
|
88
|
+
|
|
89
|
+
# Step 4: Get top-k results
|
|
90
|
+
return self._get_top_k_results(similarities, embeddings, top_k)
|
|
91
|
+
|
|
92
|
+
async def _list_embedding_values(
|
|
126
93
|
self, embedding_type: EmbeddingType
|
|
127
94
|
) -> list[tuple[int, list[float]]]:
|
|
128
|
-
"""
|
|
95
|
+
"""List all embeddings of a given type from the database.
|
|
129
96
|
|
|
130
97
|
Args:
|
|
131
98
|
embedding_type: The type of embeddings to fetch
|
|
@@ -141,7 +108,7 @@ class RetrievalRepository:
|
|
|
141
108
|
rows = await self.session.execute(query)
|
|
142
109
|
return [tuple(row) for row in rows.all()] # Convert Row objects to tuples
|
|
143
110
|
|
|
144
|
-
def
|
|
111
|
+
def _prepare_vectors(
|
|
145
112
|
self, embeddings: list[tuple[int, list[float]]], query_embedding: list[float]
|
|
146
113
|
) -> tuple[np.ndarray, np.ndarray]:
|
|
147
114
|
"""Convert embeddings to numpy arrays.
|
|
@@ -154,13 +121,24 @@ class RetrievalRepository:
|
|
|
154
121
|
Tuple of (stored_vectors, query_vector) as numpy arrays
|
|
155
122
|
|
|
156
123
|
"""
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
124
|
+
try:
|
|
125
|
+
stored_vecs = np.array(
|
|
126
|
+
[emb[1] for emb in embeddings]
|
|
127
|
+
) # Use index 1 to get embedding
|
|
128
|
+
except ValueError as e:
|
|
129
|
+
if "inhomogeneous" in str(e):
|
|
130
|
+
msg = (
|
|
131
|
+
"The database has returned embeddings of different sizes. If you"
|
|
132
|
+
"have recently updated the embedding model, you will need to"
|
|
133
|
+
"delete your database and re-index your snippets."
|
|
134
|
+
)
|
|
135
|
+
raise ValueError(msg) from e
|
|
136
|
+
raise
|
|
137
|
+
|
|
160
138
|
query_vec = np.array(query_embedding)
|
|
161
139
|
return stored_vecs, query_vec
|
|
162
140
|
|
|
163
|
-
def
|
|
141
|
+
def _compute_similarities(
|
|
164
142
|
self, stored_vecs: np.ndarray, query_vec: np.ndarray
|
|
165
143
|
) -> np.ndarray:
|
|
166
144
|
"""Compute cosine similarities between stored vectors and query vector.
|
|
@@ -177,7 +155,7 @@ class RetrievalRepository:
|
|
|
177
155
|
query_norm = np.linalg.norm(query_vec)
|
|
178
156
|
return np.dot(stored_vecs, query_vec) / (stored_norms * query_norm)
|
|
179
157
|
|
|
180
|
-
def
|
|
158
|
+
def _get_top_k_results(
|
|
181
159
|
self,
|
|
182
160
|
similarities: np.ndarray,
|
|
183
161
|
embeddings: list[tuple[int, list[float]]],
|
|
@@ -198,34 +176,3 @@ class RetrievalRepository:
|
|
|
198
176
|
return [
|
|
199
177
|
(embeddings[i][0], float(similarities[i])) for i in top_indices
|
|
200
178
|
] # Use index 0 to get snippet_id
|
|
201
|
-
|
|
202
|
-
async def list_semantic_results(
|
|
203
|
-
self, embedding_type: EmbeddingType, embedding: list[float], top_k: int = 10
|
|
204
|
-
) -> list[tuple[int, float]]:
|
|
205
|
-
"""List semantic results using cosine similarity.
|
|
206
|
-
|
|
207
|
-
This implementation fetches all embeddings of the given type and computes
|
|
208
|
-
cosine similarity in Python using NumPy for better performance.
|
|
209
|
-
|
|
210
|
-
Args:
|
|
211
|
-
embedding_type: The type of embeddings to search
|
|
212
|
-
embedding: The query embedding vector
|
|
213
|
-
top_k: Number of results to return
|
|
214
|
-
|
|
215
|
-
Returns:
|
|
216
|
-
List of (snippet_id, similarity_score) tuples, sorted by similarity
|
|
217
|
-
|
|
218
|
-
"""
|
|
219
|
-
# Step 1: Fetch embeddings from database
|
|
220
|
-
embeddings = await self.fetch_embeddings(embedding_type)
|
|
221
|
-
if not embeddings:
|
|
222
|
-
return []
|
|
223
|
-
|
|
224
|
-
# Step 2: Convert to numpy arrays
|
|
225
|
-
stored_vecs, query_vec = self.prepare_vectors(embeddings, embedding)
|
|
226
|
-
|
|
227
|
-
# Step 3: Compute similarities
|
|
228
|
-
similarities = self.compute_similarities(stored_vecs, query_vec)
|
|
229
|
-
|
|
230
|
-
# Step 4: Get top-k results
|
|
231
|
-
return self.get_top_k_results(similarities, embeddings, top_k)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Search service."""
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
@@ -6,19 +6,31 @@ import pydantic
|
|
|
6
6
|
import structlog
|
|
7
7
|
|
|
8
8
|
from kodit.bm25.bm25 import BM25Service
|
|
9
|
-
from kodit.embedding.embedding import
|
|
10
|
-
from kodit.embedding.
|
|
11
|
-
from kodit.
|
|
9
|
+
from kodit.embedding.embedding import Embedder
|
|
10
|
+
from kodit.embedding.embedding_models import EmbeddingType
|
|
11
|
+
from kodit.search.search_repository import SearchRepository
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class
|
|
15
|
-
"""Request for a
|
|
14
|
+
class SearchRequest(pydantic.BaseModel):
|
|
15
|
+
"""Request for a search."""
|
|
16
16
|
|
|
17
17
|
code_query: str | None = None
|
|
18
18
|
keywords: list[str] | None = None
|
|
19
19
|
top_k: int = 10
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
class SearchResult(pydantic.BaseModel):
|
|
23
|
+
"""Data transfer object for search results.
|
|
24
|
+
|
|
25
|
+
This model represents a single search result, containing both the file path
|
|
26
|
+
and the matching snippet content.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
id: int
|
|
30
|
+
uri: str
|
|
31
|
+
content: str
|
|
32
|
+
|
|
33
|
+
|
|
22
34
|
class Snippet(pydantic.BaseModel):
|
|
23
35
|
"""Snippet model."""
|
|
24
36
|
|
|
@@ -26,23 +38,23 @@ class Snippet(pydantic.BaseModel):
|
|
|
26
38
|
file_path: str
|
|
27
39
|
|
|
28
40
|
|
|
29
|
-
class
|
|
30
|
-
"""Service for
|
|
41
|
+
class SearchService:
|
|
42
|
+
"""Service for searching for relevant data."""
|
|
31
43
|
|
|
32
44
|
def __init__(
|
|
33
45
|
self,
|
|
34
|
-
repository:
|
|
46
|
+
repository: SearchRepository,
|
|
35
47
|
data_dir: Path,
|
|
36
|
-
|
|
48
|
+
embedding_service: Embedder,
|
|
37
49
|
) -> None:
|
|
38
|
-
"""Initialize the
|
|
50
|
+
"""Initialize the search service."""
|
|
39
51
|
self.repository = repository
|
|
40
52
|
self.log = structlog.get_logger(__name__)
|
|
41
53
|
self.bm25 = BM25Service(data_dir)
|
|
42
|
-
self.code_embedding_service =
|
|
54
|
+
self.code_embedding_service = embedding_service
|
|
43
55
|
|
|
44
|
-
async def
|
|
45
|
-
"""
|
|
56
|
+
async def search(self, request: SearchRequest) -> list[SearchResult]:
|
|
57
|
+
"""Search for relevant data."""
|
|
46
58
|
fusion_list = []
|
|
47
59
|
if request.keywords:
|
|
48
60
|
snippet_ids = await self.repository.list_snippet_ids()
|
|
@@ -56,7 +68,7 @@ class RetrievalService:
|
|
|
56
68
|
# Sort results by score
|
|
57
69
|
result_ids.sort(key=lambda x: x[1], reverse=True)
|
|
58
70
|
|
|
59
|
-
self.log.debug("
|
|
71
|
+
self.log.debug("Search results (BM25)", results=result_ids)
|
|
60
72
|
|
|
61
73
|
bm25_results = [x[0] for x in result_ids]
|
|
62
74
|
fusion_list.append(bm25_results)
|
|
@@ -64,7 +76,7 @@ class RetrievalService:
|
|
|
64
76
|
# Compute embedding for semantic query
|
|
65
77
|
semantic_results = []
|
|
66
78
|
if request.code_query:
|
|
67
|
-
query_embedding =
|
|
79
|
+
query_embedding = await anext(
|
|
68
80
|
self.code_embedding_service.query([request.code_query])
|
|
69
81
|
)
|
|
70
82
|
|
|
@@ -89,7 +101,18 @@ class RetrievalService:
|
|
|
89
101
|
final_ids = [x[0] for x in final_results]
|
|
90
102
|
|
|
91
103
|
# Get snippets from database (up to top_k)
|
|
92
|
-
|
|
104
|
+
search_results = await self.repository.list_snippets_by_ids(
|
|
105
|
+
final_ids[: request.top_k]
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
return [
|
|
109
|
+
SearchResult(
|
|
110
|
+
id=snippet.id,
|
|
111
|
+
uri=file.uri,
|
|
112
|
+
content=snippet.content,
|
|
113
|
+
)
|
|
114
|
+
for file, snippet in search_results
|
|
115
|
+
]
|
|
93
116
|
|
|
94
117
|
|
|
95
118
|
def reciprocal_rank_fusion(
|
kodit/snippets/snippets.py
CHANGED
|
@@ -45,4 +45,6 @@ class SnippetService:
|
|
|
45
45
|
raise ValueError(msg) from e
|
|
46
46
|
|
|
47
47
|
method_snippets = method_analser.extract(file_bytes)
|
|
48
|
-
|
|
48
|
+
all_snippets = [Snippet(text=snippet) for snippet in method_snippets]
|
|
49
|
+
# Remove any snippets that are empty
|
|
50
|
+
return [snippet for snippet in all_snippets if snippet.text.strip()]
|
|
@@ -1,14 +1,9 @@
|
|
|
1
|
-
"""Source repository for database operations.
|
|
2
|
-
|
|
3
|
-
This module provides the SourceRepository class which handles all database operations
|
|
4
|
-
related to code sources. It manages the creation and retrieval of source records
|
|
5
|
-
from the database, abstracting away the SQLAlchemy implementation details.
|
|
6
|
-
"""
|
|
1
|
+
"""Source repository for database operations."""
|
|
7
2
|
|
|
8
3
|
from sqlalchemy import func, select
|
|
9
4
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
10
5
|
|
|
11
|
-
from kodit.
|
|
6
|
+
from kodit.source.source_models import File, Source
|
|
12
7
|
|
|
13
8
|
|
|
14
9
|
class SourceRepository:
|
|
@@ -19,8 +19,8 @@ import structlog
|
|
|
19
19
|
from tqdm import tqdm
|
|
20
20
|
from uritools import isuri, urisplit
|
|
21
21
|
|
|
22
|
-
from kodit.
|
|
23
|
-
from kodit.
|
|
22
|
+
from kodit.source.source_models import File, Source
|
|
23
|
+
from kodit.source.source_repository import SourceRepository
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class SourceView(pydantic.BaseModel):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kodit
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.13
|
|
4
4
|
Summary: Code indexing for better AI code generation
|
|
5
5
|
Project-URL: Homepage, https://docs.helixml.tech/kodit/
|
|
6
6
|
Project-URL: Documentation, https://docs.helixml.tech/kodit/
|
|
@@ -32,6 +32,7 @@ Requires-Dist: gitpython>=3.1.44
|
|
|
32
32
|
Requires-Dist: hf-xet>=1.1.2
|
|
33
33
|
Requires-Dist: httpx-retries>=0.3.2
|
|
34
34
|
Requires-Dist: httpx>=0.28.1
|
|
35
|
+
Requires-Dist: openai>=1.82.0
|
|
35
36
|
Requires-Dist: posthog>=4.0.1
|
|
36
37
|
Requires-Dist: pydantic-settings>=2.9.1
|
|
37
38
|
Requires-Dist: pytable-formatter>=0.1.1
|
|
@@ -39,6 +40,7 @@ Requires-Dist: sentence-transformers>=4.1.0
|
|
|
39
40
|
Requires-Dist: sqlalchemy[asyncio]>=2.0.40
|
|
40
41
|
Requires-Dist: structlog>=25.3.0
|
|
41
42
|
Requires-Dist: tdqm>=0.0.1
|
|
43
|
+
Requires-Dist: tiktoken>=0.9.0
|
|
42
44
|
Requires-Dist: tree-sitter-language-pack>=0.7.3
|
|
43
45
|
Requires-Dist: tree-sitter>=0.24.0
|
|
44
46
|
Requires-Dist: uritools>=5.0.0
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
|
+
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
+
kodit/_version.py,sha256=Ln0urWB3R3JaxFwIIvoej0v08KbDCO89NUBxWx-zj0U,513
|
|
4
|
+
kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
|
|
5
|
+
kodit/cli.py,sha256=VLoXFS1xJnQ0TLy3_cO8-B9tCb4NJHiYPfzZtHxpgRY,7784
|
|
6
|
+
kodit/config.py,sha256=TDcLt6fiJn9cI1PoO5AqBqsL_Bxmm9JV5GqRxhj1tLw,4202
|
|
7
|
+
kodit/database.py,sha256=kekSdyEATdb47jxzQemkSOXMNOwnUwmVVTpn9hYaDK8,2356
|
|
8
|
+
kodit/log.py,sha256=HU1OmuxO4FcVw61k4WW7Y4WM7BrDaeplw1PcBHhuIZY,5434
|
|
9
|
+
kodit/mcp.py,sha256=I_ZFzQOR0gyS8LO8td-q-utPZpqiOnIkn7O-SIBUi0g,4384
|
|
10
|
+
kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
|
|
11
|
+
kodit/bm25/__init__.py,sha256=j8zyriNWhbwE5Lbybzg1hQAhANlU9mKHWw4beeUR6og,19
|
|
12
|
+
kodit/bm25/bm25.py,sha256=JtgJfsHz-2SHx96zxWjkPFSH7fXkahFMp01cDwl4YBg,2298
|
|
13
|
+
kodit/embedding/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
|
|
14
|
+
kodit/embedding/embedding.py,sha256=EMJpHK8ICZk_FjiO9Aqr2IO20qkGOmj_PfA1hyfI7Vk,6745
|
|
15
|
+
kodit/embedding/embedding_models.py,sha256=rN90vSs86dYiqoawcp8E9jtwY31JoJXYfaDlsJK7uqc,656
|
|
16
|
+
kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
|
|
17
|
+
kodit/indexing/indexing_models.py,sha256=6NX9HVcj6Pu9ePwHC7n-PWSyAgukpJq0nCNmUIigtbo,1282
|
|
18
|
+
kodit/indexing/indexing_repository.py,sha256=7bkAiBwtr3qlkdhNIalwMwbxezVz_RQGOhLVWPKHwNk,5506
|
|
19
|
+
kodit/indexing/indexing_service.py,sha256=VGfKgbkYEAYP_gIubvhMxo3yThT20ndS5xdg2LxwRgA,6685
|
|
20
|
+
kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
|
|
21
|
+
kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
|
|
22
|
+
kodit/migrations/env.py,sha256=w1M7OZh-ZeR2dPHS0ByXAUxQjfZQ8xIzMseWuzLDTWw,2469
|
|
23
|
+
kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
|
|
24
|
+
kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
|
|
25
|
+
kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
|
|
26
|
+
kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
|
|
27
|
+
kodit/search/__init__.py,sha256=4QbdjbrlhNKMovmuKHxJnUeZT7KNjTTFU0GdnuwUHdQ,36
|
|
28
|
+
kodit/search/search_repository.py,sha256=r1fkV6-cy9BKsy5J4WTHaY_FcjMaT1PV5qqqq0gvjZw,5833
|
|
29
|
+
kodit/search/search_service.py,sha256=KePkqCAc3CUcrpNsbDc5DqbF6W2m0TG6TDa9-VSJZS0,4227
|
|
30
|
+
kodit/snippets/__init__.py,sha256=-2coNoCRjTixU9KcP6alpmt7zqf37tCRWH3D7FPJ8dg,48
|
|
31
|
+
kodit/snippets/method_snippets.py,sha256=EVHhSNWahAC5nSXv9fWVFJY2yq25goHdCSCuENC07F8,4145
|
|
32
|
+
kodit/snippets/snippets.py,sha256=mwN0bM1Msu8ZeEsUHyQ7tx3Hj3vZsm8G7Wu4eWSkLY8,1539
|
|
33
|
+
kodit/snippets/languages/__init__.py,sha256=Bj5KKZSls2MQ8ZY1S_nHg447MgGZW-2WZM-oq6vjwwA,1187
|
|
34
|
+
kodit/snippets/languages/csharp.scm,sha256=gbBN4RiV1FBuTJF6orSnDFi8H9JwTw-d4piLJYsWUsc,222
|
|
35
|
+
kodit/snippets/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyrFGrVeo,429
|
|
36
|
+
kodit/source/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
|
|
37
|
+
kodit/source/source_models.py,sha256=xb42CaNDO1CUB8SIW-xXMrB6Ji8cFw-yeJ550xBEg9Q,2398
|
|
38
|
+
kodit/source/source_repository.py,sha256=0EksMpoLzdkfe8S4eeCm4Sf7TuxsOzOzaF4BBsMYo-4,3163
|
|
39
|
+
kodit/source/source_service.py,sha256=qBV9FCFQbJppeFrVo4uMgvC_mzWRIKldymp5yqLx9pw,9255
|
|
40
|
+
kodit-0.1.13.dist-info/METADATA,sha256=Od1OTG0tkd0Cf82juR2DGKBQ8l1RwHQ5VLgtiIW5qeA,2349
|
|
41
|
+
kodit-0.1.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
42
|
+
kodit-0.1.13.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
43
|
+
kodit-0.1.13.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
44
|
+
kodit-0.1.13.dist-info/RECORD,,
|
kodit/retreival/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Retrieval package for code search and retrieval functionality."""
|
kodit-0.1.11.dist-info/RECORD
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
|
-
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
-
kodit/_version.py,sha256=xfwL5IZGNNwnNDAQtGFjpvlNxqYn3U9IM9B98Du9pJw,513
|
|
4
|
-
kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
|
|
5
|
-
kodit/cli.py,sha256=qEQy_Sd64cEV5KzYsKlGLyMxFQ4fFi-as4QO8CRrKYo,8978
|
|
6
|
-
kodit/config.py,sha256=hQshTMW_8jpk94zP-1JaxowgmW_LrT534ipHFaRUGMw,3006
|
|
7
|
-
kodit/database.py,sha256=kekSdyEATdb47jxzQemkSOXMNOwnUwmVVTpn9hYaDK8,2356
|
|
8
|
-
kodit/log.py,sha256=PhyzQktEyyHaNr78W0wmL-RSRuq311DQ-d0l-EKTGmQ,5417
|
|
9
|
-
kodit/mcp.py,sha256=qp16vRb0TY46-xQy179iWgYebr6Ju_Z91ZSzZnWPHuk,4771
|
|
10
|
-
kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
|
|
11
|
-
kodit/bm25/__init__.py,sha256=j8zyriNWhbwE5Lbybzg1hQAhANlU9mKHWw4beeUR6og,19
|
|
12
|
-
kodit/bm25/bm25.py,sha256=NtlcLrgqJja11qDGKz_U6tuYWaS9sfbyS-TcA__rBKs,2284
|
|
13
|
-
kodit/embedding/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
|
|
14
|
-
kodit/embedding/embedding.py,sha256=X2Fa-eXhQwp__QFj9yxIhvlCAiYVQSaZ2y18ZtG5_1Y,1810
|
|
15
|
-
kodit/embedding/models.py,sha256=rN90vSs86dYiqoawcp8E9jtwY31JoJXYfaDlsJK7uqc,656
|
|
16
|
-
kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
|
|
17
|
-
kodit/indexing/models.py,sha256=sZIhGwvL4Dw0QTWFxrjfWctSLkAoDT6fv5DlGz8-Fr8,1258
|
|
18
|
-
kodit/indexing/repository.py,sha256=eIaIbqNs9Z3XTVymZ5Zl5uPWveqiEXNo0JTa-y-Tl24,5430
|
|
19
|
-
kodit/indexing/service.py,sha256=hhQ_6vI7J7LnNgOLbsO4B07TOJvEePqqFviiqr3TL_M,6579
|
|
20
|
-
kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
|
|
21
|
-
kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
|
|
22
|
-
kodit/migrations/env.py,sha256=bzB6vod_tO-X2F_G671FwYSAn0pyhNw8M1kG4MgidO8,2444
|
|
23
|
-
kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
|
|
24
|
-
kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
|
|
25
|
-
kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
|
|
26
|
-
kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
|
|
27
|
-
kodit/retreival/__init__.py,sha256=33PhJU-3gtsqYq6A1UkaLNKbev_Zee9Lq6dYC59-CsA,69
|
|
28
|
-
kodit/retreival/repository.py,sha256=XHkkeUsnXSrrcthJOL9FXgivn5kkaPnC9Qci6ebwjZc,7294
|
|
29
|
-
kodit/retreival/service.py,sha256=gGp74jnqhyCDF5vKOrN2dJKDnhlfR4HZaxADSrjTb4s,3778
|
|
30
|
-
kodit/snippets/__init__.py,sha256=-2coNoCRjTixU9KcP6alpmt7zqf37tCRWH3D7FPJ8dg,48
|
|
31
|
-
kodit/snippets/method_snippets.py,sha256=EVHhSNWahAC5nSXv9fWVFJY2yq25goHdCSCuENC07F8,4145
|
|
32
|
-
kodit/snippets/snippets.py,sha256=QumvhltWoxXw41SyKb-RbSvAr3m6V3lUy9n0AI8jcto,1409
|
|
33
|
-
kodit/snippets/languages/__init__.py,sha256=Bj5KKZSls2MQ8ZY1S_nHg447MgGZW-2WZM-oq6vjwwA,1187
|
|
34
|
-
kodit/snippets/languages/csharp.scm,sha256=gbBN4RiV1FBuTJF6orSnDFi8H9JwTw-d4piLJYsWUsc,222
|
|
35
|
-
kodit/snippets/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyrFGrVeo,429
|
|
36
|
-
kodit/sources/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
|
|
37
|
-
kodit/sources/models.py,sha256=xb42CaNDO1CUB8SIW-xXMrB6Ji8cFw-yeJ550xBEg9Q,2398
|
|
38
|
-
kodit/sources/repository.py,sha256=mGJrHWH6Uo8YABdoojHFbzaf_jW-2ywJpAHIa1gnc3U,3401
|
|
39
|
-
kodit/sources/service.py,sha256=aV_qiqkU2kMBNPvye5_v4NnZiK-lJ64rQdmFtBtsQaY,9243
|
|
40
|
-
kodit-0.1.11.dist-info/METADATA,sha256=yUO645VYUiVrJMRtwNB71O-6qvC94nS7_ILQ8eQEvoY,2288
|
|
41
|
-
kodit-0.1.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
42
|
-
kodit-0.1.11.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
43
|
-
kodit-0.1.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
44
|
-
kodit-0.1.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|