kodit 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

kodit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.1.11'
21
- __version_tuple__ = version_tuple = (0, 1, 11)
20
+ __version__ = version = '0.1.13'
21
+ __version_tuple__ = version_tuple = (0, 1, 13)
kodit/bm25/bm25.py CHANGED
@@ -52,7 +52,7 @@ class BM25Service:
52
52
  self.log.warning("No documents to retrieve from, returning empty list")
53
53
  return []
54
54
 
55
- top_k = min(top_k, len(doc_ids))
55
+ top_k = min(top_k, len(self.retriever.scores))
56
56
  self.log.debug(
57
57
  "Retrieving from index", query=query, top_k=top_k, num_docs=len(doc_ids)
58
58
  )
kodit/cli.py CHANGED
@@ -1,6 +1,5 @@
1
1
  """Command line interface for kodit."""
2
2
 
3
- import os
4
3
  import signal
5
4
  from pathlib import Path
6
5
  from typing import Any
@@ -12,35 +11,21 @@ from pytable_formatter import Cell, Table
12
11
  from sqlalchemy.ext.asyncio import AsyncSession
13
12
 
14
13
  from kodit.config import (
15
- DEFAULT_BASE_DIR,
16
- DEFAULT_DB_URL,
17
- DEFAULT_DISABLE_TELEMETRY,
18
- DEFAULT_EMBEDDING_MODEL_NAME,
19
- DEFAULT_LOG_FORMAT,
20
- DEFAULT_LOG_LEVEL,
21
14
  AppContext,
22
15
  with_app_context,
23
16
  with_session,
24
17
  )
25
- from kodit.indexing.repository import IndexRepository
26
- from kodit.indexing.service import IndexService
18
+ from kodit.embedding.embedding import embedding_factory
19
+ from kodit.indexing.indexing_repository import IndexRepository
20
+ from kodit.indexing.indexing_service import IndexService
27
21
  from kodit.log import configure_logging, configure_telemetry, log_event
28
- from kodit.retreival.repository import RetrievalRepository
29
- from kodit.retreival.service import RetrievalRequest, RetrievalService
30
- from kodit.sources.repository import SourceRepository
31
- from kodit.sources.service import SourceService
22
+ from kodit.search.search_repository import SearchRepository
23
+ from kodit.search.search_service import SearchRequest, SearchService
24
+ from kodit.source.source_repository import SourceRepository
25
+ from kodit.source.source_service import SourceService
32
26
 
33
27
 
34
28
  @click.group(context_settings={"max_content_width": 100})
35
- @click.option("--log-level", help=f"Log level [default: {DEFAULT_LOG_LEVEL}]")
36
- @click.option("--log-format", help=f"Log format [default: {DEFAULT_LOG_FORMAT}]")
37
- @click.option(
38
- "--disable-telemetry",
39
- is_flag=True,
40
- help=f"Disable telemetry [default: {DEFAULT_DISABLE_TELEMETRY}]",
41
- )
42
- @click.option("--db-url", help=f"Database URL [default: {DEFAULT_DB_URL}]")
43
- @click.option("--data-dir", help=f"Data directory [default: {DEFAULT_BASE_DIR}]")
44
29
  @click.option(
45
30
  "--env-file",
46
31
  help="Path to a .env file [default: .env]",
@@ -52,13 +37,8 @@ from kodit.sources.service import SourceService
52
37
  ),
53
38
  )
54
39
  @click.pass_context
55
- def cli( # noqa: PLR0913
40
+ def cli(
56
41
  ctx: click.Context,
57
- log_level: str | None,
58
- log_format: str | None,
59
- disable_telemetry: bool | None,
60
- db_url: str | None,
61
- data_dir: str | None,
62
42
  env_file: Path | None,
63
43
  ) -> None:
64
44
  """kodit CLI - Code indexing for better AI code generation.""" # noqa: D403
@@ -67,17 +47,6 @@ def cli( # noqa: PLR0913
67
47
  if env_file:
68
48
  config = AppContext(_env_file=env_file) # type: ignore[reportCallIssue]
69
49
 
70
- # Now override with CLI arguments, if set
71
- if data_dir:
72
- config.data_dir = Path(data_dir)
73
- if db_url:
74
- config.db_url = db_url
75
- if log_level:
76
- config.log_level = log_level
77
- if log_format:
78
- config.log_format = log_format
79
- if disable_telemetry:
80
- config.disable_telemetry = disable_telemetry
81
50
  configure_logging(config)
82
51
  configure_telemetry(config)
83
52
 
@@ -102,7 +71,7 @@ async def index(
102
71
  repository,
103
72
  source_service,
104
73
  app_context.get_data_dir(),
105
- embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME,
74
+ embedding_service=embedding_factory(app_context.get_default_openai_client()),
106
75
  )
107
76
 
108
77
  if not sources:
@@ -159,14 +128,14 @@ async def code(
159
128
 
160
129
  This works best if your query is code.
161
130
  """
162
- repository = RetrievalRepository(session)
163
- service = RetrievalService(
131
+ repository = SearchRepository(session)
132
+ service = SearchService(
164
133
  repository,
165
134
  app_context.get_data_dir(),
166
- embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME,
135
+ embedding_service=embedding_factory(app_context.get_default_openai_client()),
167
136
  )
168
137
 
169
- snippets = await service.retrieve(RetrievalRequest(code_query=query, top_k=top_k))
138
+ snippets = await service.search(SearchRequest(code_query=query, top_k=top_k))
170
139
 
171
140
  if len(snippets) == 0:
172
141
  click.echo("No snippets found")
@@ -192,14 +161,14 @@ async def keyword(
192
161
  top_k: int,
193
162
  ) -> None:
194
163
  """Search for snippets using keyword search."""
195
- repository = RetrievalRepository(session)
196
- service = RetrievalService(
164
+ repository = SearchRepository(session)
165
+ service = SearchService(
197
166
  repository,
198
167
  app_context.get_data_dir(),
199
- embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME,
168
+ embedding_service=embedding_factory(app_context.get_default_openai_client()),
200
169
  )
201
170
 
202
- snippets = await service.retrieve(RetrievalRequest(keywords=keywords, top_k=top_k))
171
+ snippets = await service.search(SearchRequest(keywords=keywords, top_k=top_k))
203
172
 
204
173
  if len(snippets) == 0:
205
174
  click.echo("No snippets found")
@@ -227,18 +196,18 @@ async def hybrid(
227
196
  code: str,
228
197
  ) -> None:
229
198
  """Search for snippets using hybrid search."""
230
- repository = RetrievalRepository(session)
231
- service = RetrievalService(
199
+ repository = SearchRepository(session)
200
+ service = SearchService(
232
201
  repository,
233
202
  app_context.get_data_dir(),
234
- embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME,
203
+ embedding_service=embedding_factory(app_context.get_default_openai_client()),
235
204
  )
236
205
 
237
206
  # Parse keywords into a list of strings
238
207
  keywords_list = [k.strip().lower() for k in keywords.split(",")]
239
208
 
240
- snippets = await service.retrieve(
241
- RetrievalRequest(keywords=keywords_list, code_query=code, top_k=top_k)
209
+ snippets = await service.search(
210
+ SearchRequest(keywords=keywords_list, code_query=code, top_k=top_k)
242
211
  )
243
212
 
244
213
  if len(snippets) == 0:
@@ -256,9 +225,7 @@ async def hybrid(
256
225
  @cli.command()
257
226
  @click.option("--host", default="127.0.0.1", help="Host to bind the server to")
258
227
  @click.option("--port", default=8080, help="Port to bind the server to")
259
- @with_app_context
260
228
  def serve(
261
- app_context: AppContext,
262
229
  host: str,
263
230
  port: int,
264
231
  ) -> None:
@@ -267,10 +234,6 @@ def serve(
267
234
  log.info("Starting kodit server", host=host, port=port)
268
235
  log_event("kodit_server_started")
269
236
 
270
- # Dump AppContext to a dictionary of strings, and set the env vars
271
- app_context_dict = {k: str(v) for k, v in app_context.model_dump().items()}
272
- os.environ.update(app_context_dict)
273
-
274
237
  # Configure uvicorn with graceful shutdown
275
238
  config = uvicorn.Config(
276
239
  "kodit.app:app",
kodit/config.py CHANGED
@@ -4,10 +4,11 @@ import asyncio
4
4
  from collections.abc import Callable, Coroutine
5
5
  from functools import wraps
6
6
  from pathlib import Path
7
- from typing import Any, TypeVar
7
+ from typing import Any, Literal, TypeVar
8
8
 
9
9
  import click
10
- from pydantic import Field
10
+ from openai import AsyncOpenAI
11
+ from pydantic import BaseModel, Field
11
12
  from pydantic_settings import BaseSettings, SettingsConfigDict
12
13
 
13
14
  from kodit.database import Database
@@ -22,16 +23,40 @@ DEFAULT_EMBEDDING_MODEL_NAME = TINY
22
23
  T = TypeVar("T")
23
24
 
24
25
 
26
+ class Endpoint(BaseModel):
27
+ """Endpoint provides configuration for an AI service."""
28
+
29
+ type: Literal["openai"] = Field(default="openai")
30
+ api_key: str | None = None
31
+ base_url: str | None = None
32
+
33
+
25
34
  class AppContext(BaseSettings):
26
35
  """Global context for the kodit project. Provides a shared state for the app."""
27
36
 
28
- model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
37
+ model_config = SettingsConfigDict(
38
+ env_file=".env",
39
+ env_file_encoding="utf-8",
40
+ env_nested_delimiter="_",
41
+ nested_model_default_partial_update=True,
42
+ env_nested_max_split=1,
43
+ )
29
44
 
30
45
  data_dir: Path = Field(default=DEFAULT_BASE_DIR)
31
46
  db_url: str = Field(default=DEFAULT_DB_URL)
32
47
  log_level: str = Field(default=DEFAULT_LOG_LEVEL)
33
48
  log_format: str = Field(default=DEFAULT_LOG_FORMAT)
34
49
  disable_telemetry: bool = Field(default=DEFAULT_DISABLE_TELEMETRY)
50
+ default_endpoint: Endpoint | None = Field(
51
+ default=Endpoint(
52
+ type="openai",
53
+ base_url="https://api.openai.com/v1",
54
+ ),
55
+ description=(
56
+ "Default endpoint to use for all AI interactions "
57
+ "(can be overridden by task-specific configuration)."
58
+ ),
59
+ )
35
60
  _db: Database | None = None
36
61
 
37
62
  def model_post_init(self, _: Any) -> None:
@@ -58,6 +83,21 @@ class AppContext(BaseSettings):
58
83
  await self._db.run_migrations(self.db_url)
59
84
  return self._db
60
85
 
86
+ def get_default_openai_client(self) -> AsyncOpenAI | None:
87
+ """Get the default OpenAI client, if it is configured."""
88
+ endpoint = self.default_endpoint
89
+ if not (
90
+ endpoint
91
+ and endpoint.type == "openai"
92
+ and endpoint.api_key
93
+ and endpoint.base_url
94
+ ):
95
+ return None
96
+ return AsyncOpenAI(
97
+ api_key=endpoint.api_key,
98
+ base_url=endpoint.base_url,
99
+ )
100
+
61
101
 
62
102
  with_app_context = click.make_pass_decorator(AppContext)
63
103
 
@@ -1,9 +1,14 @@
1
1
  """Embedding service."""
2
2
 
3
+ import asyncio
3
4
  import os
4
- from collections.abc import Generator
5
+ from abc import ABC, abstractmethod
6
+ from collections.abc import AsyncGenerator
7
+ from typing import NamedTuple
5
8
 
6
9
  import structlog
10
+ import tiktoken
11
+ from openai import AsyncOpenAI
7
12
  from sentence_transformers import SentenceTransformer
8
13
 
9
14
  TINY = "tiny"
@@ -17,14 +22,59 @@ COMMON_EMBEDDING_MODELS = {
17
22
  }
18
23
 
19
24
 
20
- class EmbeddingService:
21
- """Service for embeddings."""
25
+ class EmbeddingInput(NamedTuple):
26
+ """Input for embedding."""
27
+
28
+ id: int
29
+ text: str
30
+
31
+
32
+ class EmbeddingOutput(NamedTuple):
33
+ """Output for embedding."""
34
+
35
+ id: int
36
+ embedding: list[float]
37
+
38
+
39
+ class Embedder(ABC):
40
+ """Embedder interface."""
41
+
42
+ @abstractmethod
43
+ def embed(
44
+ self, data: list[EmbeddingInput]
45
+ ) -> AsyncGenerator[EmbeddingOutput, None]:
46
+ """Embed a list of documents.
47
+
48
+ The embedding service accepts a massive list of id,strings to embed. Behind the
49
+ scenes it batches up requests and parallelizes them for performance according to
50
+ the specifics of the embedding service.
51
+
52
+ The id reference is required because the parallelization may return results out
53
+ of order.
54
+ """
55
+
56
+ @abstractmethod
57
+ def query(self, data: list[str]) -> AsyncGenerator[list[float], None]:
58
+ """Query the embedding model."""
59
+
60
+
61
+ def embedding_factory(openai_client: AsyncOpenAI | None = None) -> Embedder:
62
+ """Create an embedding service."""
63
+ if openai_client is not None:
64
+ return OpenAIEmbedder(openai_client)
65
+ return LocalEmbedder(model_name=TINY)
66
+
67
+
68
+ class LocalEmbedder(Embedder):
69
+ """Local embedder."""
22
70
 
23
71
  def __init__(self, model_name: str) -> None:
24
- """Initialize the embedding service."""
72
+ """Initialize the local embedder."""
25
73
  self.log = structlog.get_logger(__name__)
74
+ self.log.info("Creating local embedder", model_name=model_name)
26
75
  self.model_name = COMMON_EMBEDDING_MODELS.get(model_name, model_name)
27
76
  self.embedding_model = None
77
+ self.encoding = tiktoken.encoding_for_model("text-embedding-3-small")
28
78
 
29
79
  def _model(self) -> SentenceTransformer:
30
80
  """Get the embedding model."""
@@ -37,16 +87,117 @@ class EmbeddingService:
37
87
  )
38
88
  return self.embedding_model
39
89
 
40
- def embed(self, snippets: list[str]) -> Generator[list[float], None, None]:
90
+ async def embed(
91
+ self, data: list[EmbeddingInput]
92
+ ) -> AsyncGenerator[EmbeddingOutput, None]:
41
93
  """Embed a list of documents."""
42
94
  model = self._model()
43
- embeddings = model.encode(snippets, show_progress_bar=False, batch_size=4)
44
- for embedding in embeddings:
45
- yield [float(x) for x in embedding]
46
95
 
47
- def query(self, query: list[str]) -> Generator[list[float], None, None]:
96
+ batched_data = _split_sub_batches(self.encoding, data)
97
+
98
+ for batch in batched_data:
99
+ embeddings = model.encode(
100
+ [i.text for i in batch], show_progress_bar=False, batch_size=4
101
+ )
102
+ for i, x in zip(batch, embeddings, strict=False):
103
+ yield EmbeddingOutput(i.id, [float(y) for y in x])
104
+
105
+ async def query(self, data: list[str]) -> AsyncGenerator[list[float], None]:
48
106
  """Query the embedding model."""
49
107
  model = self._model()
50
- embeddings = model.encode(query, show_progress_bar=False, batch_size=4)
108
+ embeddings = model.encode(data, show_progress_bar=False, batch_size=4)
51
109
  for embedding in embeddings:
52
110
  yield [float(x) for x in embedding]
111
+
112
+
113
+ OPENAI_MAX_EMBEDDING_SIZE = 8192
114
+ OPENAI_NUM_PARALLEL_TASKS = 10
115
+
116
+
117
+ def _split_sub_batches(
118
+ encoding: tiktoken.Encoding, data: list[EmbeddingInput]
119
+ ) -> list[list[EmbeddingInput]]:
120
+ """Split a list of strings into smaller sub-batches."""
121
+ log = structlog.get_logger(__name__)
122
+ result = []
123
+ data_to_process = [s for s in data if s.text.strip()] # Filter out empty strings
124
+
125
+ while data_to_process:
126
+ next_batch = []
127
+ current_tokens = 0
128
+
129
+ while data_to_process:
130
+ next_item = data_to_process[0]
131
+ item_tokens = len(encoding.encode(next_item.text))
132
+
133
+ if item_tokens > OPENAI_MAX_EMBEDDING_SIZE:
134
+ log.warning("Skipping too long snippet", snippet=data_to_process.pop(0))
135
+ continue
136
+
137
+ if current_tokens + item_tokens > OPENAI_MAX_EMBEDDING_SIZE:
138
+ break
139
+
140
+ next_batch.append(data_to_process.pop(0))
141
+ current_tokens += item_tokens
142
+
143
+ if next_batch:
144
+ result.append(next_batch)
145
+
146
+ return result
147
+
148
+
149
+ class OpenAIEmbedder(Embedder):
150
+ """OpenAI embedder."""
151
+
152
+ def __init__(
153
+ self, openai_client: AsyncOpenAI, model_name: str = "text-embedding-3-small"
154
+ ) -> None:
155
+ """Initialize the OpenAI embedder."""
156
+ self.log = structlog.get_logger(__name__)
157
+ self.log.info("Creating OpenAI embedder", model_name=model_name)
158
+ self.openai_client = openai_client
159
+ self.encoding = tiktoken.encoding_for_model(model_name)
160
+ self.log = structlog.get_logger(__name__)
161
+
162
+ async def embed(
163
+ self,
164
+ data: list[EmbeddingInput],
165
+ ) -> AsyncGenerator[EmbeddingOutput, None]:
166
+ """Embed a list of documents."""
167
+ # First split the list into a list of list where each sublist has fewer than
168
+ # max tokens.
169
+ batched_data = _split_sub_batches(self.encoding, data)
170
+
171
+ # Process batches in parallel with a semaphore to limit concurrent requests
172
+ sem = asyncio.Semaphore(OPENAI_NUM_PARALLEL_TASKS)
173
+
174
+ async def process_batch(batch: list[EmbeddingInput]) -> list[EmbeddingOutput]:
175
+ async with sem:
176
+ try:
177
+ response = await self.openai_client.embeddings.create(
178
+ model="text-embedding-3-small",
179
+ input=[i.text for i in batch],
180
+ )
181
+ return [
182
+ EmbeddingOutput(i.id, x.embedding)
183
+ for i, x in zip(batch, response.data, strict=False)
184
+ ]
185
+ except Exception as e:
186
+ self.log.exception("Error embedding batch", error=str(e))
187
+ return []
188
+
189
+ # Create tasks for all batches
190
+ tasks = [process_batch(batch) for batch in batched_data]
191
+
192
+ # Process all batches and yield results as they complete
193
+ for task in asyncio.as_completed(tasks):
194
+ embeddings = await task
195
+ for e in embeddings:
196
+ yield e
197
+
198
+ async def query(self, data: list[str]) -> AsyncGenerator[list[float], None]:
199
+ """Query the embedding model."""
200
+ async for e in self.embed(
201
+ [EmbeddingInput(i, text) for i, text in enumerate(data)]
202
+ ):
203
+ yield e.embedding
@@ -31,8 +31,8 @@ class Snippet(Base, CommonMixin):
31
31
 
32
32
  __tablename__ = "snippets"
33
33
 
34
- file_id: Mapped[int] = mapped_column(ForeignKey("files.id"))
35
- index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"))
34
+ file_id: Mapped[int] = mapped_column(ForeignKey("files.id"), index=True)
35
+ index_id: Mapped[int] = mapped_column(ForeignKey("indexes.id"), index=True)
36
36
  content: Mapped[str] = mapped_column(UnicodeText, default="")
37
37
 
38
38
  def __init__(self, file_id: int, index_id: int, content: str) -> None:
@@ -11,9 +11,9 @@ from typing import TypeVar
11
11
  from sqlalchemy import delete, func, select
12
12
  from sqlalchemy.ext.asyncio import AsyncSession
13
13
 
14
- from kodit.embedding.models import Embedding
15
- from kodit.indexing.models import Index, Snippet
16
- from kodit.sources.models import File, Source
14
+ from kodit.embedding.embedding_models import Embedding
15
+ from kodit.indexing.indexing_models import Index, Snippet
16
+ from kodit.source.source_models import File, Source
17
17
 
18
18
  T = TypeVar("T")
19
19
 
@@ -156,14 +156,14 @@ class IndexRepository:
156
156
  result = await self.session.execute(query)
157
157
  return list(result.scalars())
158
158
 
159
- async def get_all_snippets(self) -> list[Snippet]:
159
+ async def get_all_snippets(self, index_id: int) -> list[Snippet]:
160
160
  """Get all snippets.
161
161
 
162
162
  Returns:
163
163
  A list of all snippets.
164
164
 
165
165
  """
166
- query = select(Snippet).order_by(Snippet.id)
166
+ query = select(Snippet).where(Snippet.index_id == index_id).order_by(Snippet.id)
167
167
  result = await self.session.execute(query)
168
168
  return list(result.scalars())
169
169
 
@@ -14,12 +14,12 @@ import structlog
14
14
  from tqdm.asyncio import tqdm
15
15
 
16
16
  from kodit.bm25.bm25 import BM25Service
17
- from kodit.embedding.embedding import EmbeddingService
18
- from kodit.embedding.models import Embedding, EmbeddingType
19
- from kodit.indexing.models import Snippet
20
- from kodit.indexing.repository import IndexRepository
17
+ from kodit.embedding.embedding import Embedder, EmbeddingInput
18
+ from kodit.embedding.embedding_models import Embedding, EmbeddingType
19
+ from kodit.indexing.indexing_models import Snippet
20
+ from kodit.indexing.indexing_repository import IndexRepository
21
21
  from kodit.snippets.snippets import SnippetService
22
- from kodit.sources.service import SourceService
22
+ from kodit.source.source_service import SourceService
23
23
 
24
24
  # List of MIME types that are blacklisted from being indexed
25
25
  MIME_BLACKLIST = ["unknown/unknown"]
@@ -52,7 +52,7 @@ class IndexService:
52
52
  repository: IndexRepository,
53
53
  source_service: SourceService,
54
54
  data_dir: Path,
55
- embedding_model_name: str,
55
+ embedding_service: Embedder,
56
56
  ) -> None:
57
57
  """Initialize the index service.
58
58
 
@@ -66,7 +66,7 @@ class IndexService:
66
66
  self.snippet_service = SnippetService()
67
67
  self.log = structlog.get_logger(__name__)
68
68
  self.bm25 = BM25Service(data_dir)
69
- self.code_embedding_service = EmbeddingService(model_name=embedding_model_name)
69
+ self.code_embedding_service = embedding_service
70
70
 
71
71
  async def create(self, source_id: int) -> IndexView:
72
72
  """Create a new index for a source.
@@ -132,7 +132,7 @@ class IndexService:
132
132
  # Create snippets for supported file types
133
133
  await self._create_snippets(index_id)
134
134
 
135
- snippets = await self.repository.get_all_snippets()
135
+ snippets = await self.repository.get_all_snippets(index_id)
136
136
 
137
137
  self.log.info("Creating keyword index")
138
138
  self.bm25.index(
@@ -143,12 +143,17 @@ class IndexService:
143
143
  )
144
144
 
145
145
  self.log.info("Creating semantic code index")
146
- for snippet in tqdm(snippets, total=len(snippets), leave=False):
147
- embedding = next(self.code_embedding_service.embed([snippet.content]))
146
+ async for e in tqdm(
147
+ self.code_embedding_service.embed(
148
+ [EmbeddingInput(snippet.id, snippet.content) for snippet in snippets]
149
+ ),
150
+ total=len(snippets),
151
+ leave=False,
152
+ ):
148
153
  await self.repository.add_embedding(
149
154
  Embedding(
150
- snippet_id=snippet.id,
151
- embedding=embedding,
155
+ snippet_id=e.id,
156
+ embedding=e.embedding,
152
157
  type=EmbeddingType.CODE,
153
158
  )
154
159
  )
kodit/log.py CHANGED
@@ -93,6 +93,7 @@ def configure_logging(app_context: AppContext) -> None:
93
93
  "uvicorn.access",
94
94
  "bm25s",
95
95
  "sentence_transformers.SentenceTransformer",
96
+ "httpx",
96
97
  ]:
97
98
  if root_logger.getEffectiveLevel() == logging.DEBUG:
98
99
  logging.getLogger(_log).handlers.clear()
kodit/mcp.py CHANGED
@@ -12,10 +12,11 @@ from pydantic import Field
12
12
  from sqlalchemy.ext.asyncio import AsyncSession
13
13
 
14
14
  from kodit._version import version
15
- from kodit.config import DEFAULT_EMBEDDING_MODEL_NAME, AppContext
15
+ from kodit.config import AppContext
16
16
  from kodit.database import Database
17
- from kodit.retreival.repository import RetrievalRepository, RetrievalResult
18
- from kodit.retreival.service import RetrievalRequest, RetrievalService
17
+ from kodit.embedding.embedding import embedding_factory
18
+ from kodit.search.search_repository import SearchRepository
19
+ from kodit.search.search_service import SearchRequest, SearchResult, SearchService
19
20
 
20
21
 
21
22
  @dataclass
@@ -23,7 +24,7 @@ class MCPContext:
23
24
  """Context for the MCP server."""
24
25
 
25
26
  session: AsyncSession
26
- data_dir: Path
27
+ app_context: AppContext
27
28
 
28
29
 
29
30
  _mcp_db: Database | None = None
@@ -49,14 +50,14 @@ async def mcp_lifespan(_: FastMCP) -> AsyncIterator[MCPContext]:
49
50
  if _mcp_db is None:
50
51
  _mcp_db = await app_context.get_db()
51
52
  async with _mcp_db.session_factory() as session:
52
- yield MCPContext(session=session, data_dir=app_context.get_data_dir())
53
+ yield MCPContext(session=session, app_context=app_context)
53
54
 
54
55
 
55
56
  mcp = FastMCP("kodit MCP Server", lifespan=mcp_lifespan)
56
57
 
57
58
 
58
59
  @mcp.tool()
59
- async def retrieve_relevant_snippets(
60
+ async def search(
60
61
  ctx: Context,
61
62
  user_intent: Annotated[
62
63
  str,
@@ -86,17 +87,15 @@ async def retrieve_relevant_snippets(
86
87
  ),
87
88
  ],
88
89
  ) -> str:
89
- """Retrieve relevant snippets from various sources.
90
+ """Search for relevant snippets.
90
91
 
91
- This tool retrieves relevant snippets from sources such as private codebases,
92
- public codebases, and documentation. You can use this information to improve
93
- the quality of your generated code. You must call this tool when you need to
94
- write code.
92
+ This tool searches for relevant snippets from indexed datasources. Call this tool
93
+ when you wish to search for high quality example code snippets to use in your code.
95
94
  """
96
95
  log = structlog.get_logger(__name__)
97
96
 
98
97
  log.debug(
99
- "Retrieving relevant snippets",
98
+ "Searching for relevant snippets",
100
99
  user_intent=user_intent,
101
100
  keywords=keywords,
102
101
  file_count=len(related_file_paths),
@@ -106,24 +105,29 @@ async def retrieve_relevant_snippets(
106
105
 
107
106
  mcp_context: MCPContext = ctx.request_context.lifespan_context
108
107
 
109
- log.debug("Creating retrieval repository")
110
- retrieval_repository = RetrievalRepository(
108
+ log.debug("Creating search repository")
109
+ search_repository = SearchRepository(
111
110
  session=mcp_context.session,
112
111
  )
113
112
 
114
- log.debug("Creating retrieval service")
115
- retrieval_service = RetrievalService(
116
- repository=retrieval_repository,
117
- data_dir=mcp_context.data_dir,
118
- embedding_model_name=DEFAULT_EMBEDDING_MODEL_NAME,
113
+ log.debug("Creating embedding service")
114
+ embedding_service = embedding_factory(
115
+ mcp_context.app_context.get_default_openai_client()
119
116
  )
120
117
 
121
- retrieval_request = RetrievalRequest(
118
+ log.debug("Creating search service")
119
+ search_service = SearchService(
120
+ repository=search_repository,
121
+ data_dir=mcp_context.app_context.get_data_dir(),
122
+ embedding_service=embedding_service,
123
+ )
124
+
125
+ search_request = SearchRequest(
122
126
  keywords=keywords,
123
127
  code_query="\n".join(related_file_contents),
124
128
  )
125
- log.debug("Retrieving snippets")
126
- snippets = await retrieval_service.retrieve(request=retrieval_request)
129
+ log.debug("Searching for snippets")
130
+ snippets = await search_service.search(request=search_request)
127
131
 
128
132
  log.debug("Fusing output")
129
133
  output = output_fusion(snippets=snippets)
@@ -132,18 +136,7 @@ async def retrieve_relevant_snippets(
132
136
  return output
133
137
 
134
138
 
135
- def input_fusion(
136
- user_intent: str, # noqa: ARG001
137
- related_file_paths: list[Path], # noqa: ARG001
138
- related_file_contents: list[str], # noqa: ARG001
139
- keywords: list[str],
140
- ) -> str:
141
- """Fuse the search query and related file contents into a single query."""
142
- # Since this is a dummy implementation, we just return the first keyword
143
- return keywords[0] if len(keywords) > 0 else ""
144
-
145
-
146
- def output_fusion(snippets: list[RetrievalResult]) -> str:
139
+ def output_fusion(snippets: list[SearchResult]) -> str:
147
140
  """Fuse the snippets into a single output."""
148
141
  return "\n\n".join(f"{snippet.uri}\n{snippet.content}" for snippet in snippets)
149
142
 
kodit/migrations/env.py CHANGED
@@ -8,9 +8,9 @@ from sqlalchemy import pool
8
8
  from sqlalchemy.engine import Connection
9
9
  from sqlalchemy.ext.asyncio import async_engine_from_config
10
10
 
11
- import kodit.embedding.models
12
- import kodit.indexing.models
13
- import kodit.sources.models
11
+ import kodit.embedding.embedding_models
12
+ import kodit.indexing.indexing_models
13
+ import kodit.source.source_models
14
14
  from kodit.database import Base
15
15
 
16
16
  # this is the Alembic Config object, which provides
@@ -0,0 +1 @@
1
+ """Search for relevant snippets."""
@@ -1,48 +1,25 @@
1
- """Repository for retrieving code snippets and search results.
2
-
3
- This module provides the RetrievalRepository class which handles all database operations
4
- related to searching and retrieving code snippets, including string-based searches
5
- and their associated file information.
6
- """
1
+ """Repository for searching for relevant snippets."""
7
2
 
8
3
  from typing import TypeVar
9
4
 
10
5
  import numpy as np
11
- import pydantic
12
6
  from sqlalchemy import (
13
7
  select,
14
8
  )
15
9
  from sqlalchemy.ext.asyncio import AsyncSession
16
10
 
17
- from kodit.embedding.models import Embedding, EmbeddingType
18
- from kodit.indexing.models import Snippet
19
- from kodit.sources.models import File
11
+ from kodit.embedding.embedding_models import Embedding, EmbeddingType
12
+ from kodit.indexing.indexing_models import Snippet
13
+ from kodit.source.source_models import File
20
14
 
21
15
  T = TypeVar("T")
22
16
 
23
17
 
24
- class RetrievalResult(pydantic.BaseModel):
25
- """Data transfer object for search results.
26
-
27
- This model represents a single search result, containing both the file path
28
- and the matching snippet content.
29
- """
30
-
31
- id: int
32
- uri: str
33
- content: str
34
- score: float
35
-
36
-
37
- class RetrievalRepository:
38
- """Repository for retrieving code snippets and search results.
39
-
40
- This class provides methods for searching and retrieving code snippets from
41
- the database, including string-based searches and their associated file information.
42
- """
18
+ class SearchRepository:
19
+ """Repository for searching for relevant snippets."""
43
20
 
44
21
  def __init__(self, session: AsyncSession) -> None:
45
- """Initialize the retrieval repository.
22
+ """Initialize the search repository.
46
23
 
47
24
  Args:
48
25
  session: The SQLAlchemy async session to use for database operations.
@@ -50,39 +27,6 @@ class RetrievalRepository:
50
27
  """
51
28
  self.session = session
52
29
 
53
- async def string_search(self, query: str) -> list[RetrievalResult]:
54
- """Search for snippets containing the given query string.
55
-
56
- This method performs a case-insensitive search for the query string within
57
- snippet contents, returning up to 10 most recent matches.
58
-
59
- Args:
60
- query: The string to search for within snippet contents.
61
-
62
- Returns:
63
- A list of RetrievalResult objects containing the matching snippets
64
- and their associated file paths.
65
-
66
- """
67
- search_query = (
68
- select(Snippet, File)
69
- .join(File, Snippet.file_id == File.id)
70
- .where(Snippet.content.ilike(f"%{query}%"))
71
- .limit(10)
72
- )
73
- rows = await self.session.execute(search_query)
74
- results = list(rows.all())
75
-
76
- return [
77
- RetrievalResult(
78
- id=snippet.id,
79
- uri=file.uri,
80
- content=snippet.content,
81
- score=1.0,
82
- )
83
- for snippet, file in results
84
- ]
85
-
86
30
  async def list_snippet_ids(self) -> list[int]:
87
31
  """List all snippet IDs.
88
32
 
@@ -94,7 +38,7 @@ class RetrievalRepository:
94
38
  rows = await self.session.execute(query)
95
39
  return list(rows.scalars().all())
96
40
 
97
- async def list_snippets_by_ids(self, ids: list[int]) -> list[RetrievalResult]:
41
+ async def list_snippets_by_ids(self, ids: list[int]) -> list[tuple[File, Snippet]]:
98
42
  """List snippets by IDs.
99
43
 
100
44
  Returns:
@@ -109,23 +53,46 @@ class RetrievalRepository:
109
53
  rows = await self.session.execute(query)
110
54
 
111
55
  # Create a dictionary for O(1) lookup of results by ID
112
- id_to_result = {
113
- snippet.id: RetrievalResult(
114
- id=snippet.id,
115
- uri=file.uri,
116
- content=snippet.content,
117
- score=1.0,
118
- )
119
- for snippet, file in rows.all()
120
- }
56
+ id_to_result = {snippet.id: (file, snippet) for snippet, file in rows.all()}
121
57
 
122
58
  # Return results in the same order as input IDs
123
59
  return [id_to_result[i] for i in ids]
124
60
 
125
- async def fetch_embeddings(
61
+ async def list_semantic_results(
62
+ self, embedding_type: EmbeddingType, embedding: list[float], top_k: int = 10
63
+ ) -> list[tuple[int, float]]:
64
+ """List semantic results using cosine similarity.
65
+
66
+ This implementation fetches all embeddings of the given type and computes
67
+ cosine similarity in Python using NumPy for better performance.
68
+
69
+ Args:
70
+ embedding_type: The type of embeddings to search
71
+ embedding: The query embedding vector
72
+ top_k: Number of results to return
73
+
74
+ Returns:
75
+ List of (snippet_id, similarity_score) tuples, sorted by similarity
76
+
77
+ """
78
+ # Step 1: Fetch embeddings from database
79
+ embeddings = await self._list_embedding_values(embedding_type)
80
+ if not embeddings:
81
+ return []
82
+
83
+ # Step 2: Convert to numpy arrays
84
+ stored_vecs, query_vec = self._prepare_vectors(embeddings, embedding)
85
+
86
+ # Step 3: Compute similarities
87
+ similarities = self._compute_similarities(stored_vecs, query_vec)
88
+
89
+ # Step 4: Get top-k results
90
+ return self._get_top_k_results(similarities, embeddings, top_k)
91
+
92
+ async def _list_embedding_values(
126
93
  self, embedding_type: EmbeddingType
127
94
  ) -> list[tuple[int, list[float]]]:
128
- """Fetch all embeddings of a given type from the database.
95
+ """List all embeddings of a given type from the database.
129
96
 
130
97
  Args:
131
98
  embedding_type: The type of embeddings to fetch
@@ -141,7 +108,7 @@ class RetrievalRepository:
141
108
  rows = await self.session.execute(query)
142
109
  return [tuple(row) for row in rows.all()] # Convert Row objects to tuples
143
110
 
144
- def prepare_vectors(
111
+ def _prepare_vectors(
145
112
  self, embeddings: list[tuple[int, list[float]]], query_embedding: list[float]
146
113
  ) -> tuple[np.ndarray, np.ndarray]:
147
114
  """Convert embeddings to numpy arrays.
@@ -154,13 +121,24 @@ class RetrievalRepository:
154
121
  Tuple of (stored_vectors, query_vector) as numpy arrays
155
122
 
156
123
  """
157
- stored_vecs = np.array(
158
- [emb[1] for emb in embeddings]
159
- ) # Use index 1 to get embedding
124
+ try:
125
+ stored_vecs = np.array(
126
+ [emb[1] for emb in embeddings]
127
+ ) # Use index 1 to get embedding
128
+ except ValueError as e:
129
+ if "inhomogeneous" in str(e):
130
+ msg = (
131
+ "The database has returned embeddings of different sizes. If you"
132
+ "have recently updated the embedding model, you will need to"
133
+ "delete your database and re-index your snippets."
134
+ )
135
+ raise ValueError(msg) from e
136
+ raise
137
+
160
138
  query_vec = np.array(query_embedding)
161
139
  return stored_vecs, query_vec
162
140
 
163
- def compute_similarities(
141
+ def _compute_similarities(
164
142
  self, stored_vecs: np.ndarray, query_vec: np.ndarray
165
143
  ) -> np.ndarray:
166
144
  """Compute cosine similarities between stored vectors and query vector.
@@ -177,7 +155,7 @@ class RetrievalRepository:
177
155
  query_norm = np.linalg.norm(query_vec)
178
156
  return np.dot(stored_vecs, query_vec) / (stored_norms * query_norm)
179
157
 
180
- def get_top_k_results(
158
+ def _get_top_k_results(
181
159
  self,
182
160
  similarities: np.ndarray,
183
161
  embeddings: list[tuple[int, list[float]]],
@@ -198,34 +176,3 @@ class RetrievalRepository:
198
176
  return [
199
177
  (embeddings[i][0], float(similarities[i])) for i in top_indices
200
178
  ] # Use index 0 to get snippet_id
201
-
202
- async def list_semantic_results(
203
- self, embedding_type: EmbeddingType, embedding: list[float], top_k: int = 10
204
- ) -> list[tuple[int, float]]:
205
- """List semantic results using cosine similarity.
206
-
207
- This implementation fetches all embeddings of the given type and computes
208
- cosine similarity in Python using NumPy for better performance.
209
-
210
- Args:
211
- embedding_type: The type of embeddings to search
212
- embedding: The query embedding vector
213
- top_k: Number of results to return
214
-
215
- Returns:
216
- List of (snippet_id, similarity_score) tuples, sorted by similarity
217
-
218
- """
219
- # Step 1: Fetch embeddings from database
220
- embeddings = await self.fetch_embeddings(embedding_type)
221
- if not embeddings:
222
- return []
223
-
224
- # Step 2: Convert to numpy arrays
225
- stored_vecs, query_vec = self.prepare_vectors(embeddings, embedding)
226
-
227
- # Step 3: Compute similarities
228
- similarities = self.compute_similarities(stored_vecs, query_vec)
229
-
230
- # Step 4: Get top-k results
231
- return self.get_top_k_results(similarities, embeddings, top_k)
@@ -1,4 +1,4 @@
1
- """Retrieval service."""
1
+ """Search service."""
2
2
 
3
3
  from pathlib import Path
4
4
 
@@ -6,19 +6,31 @@ import pydantic
6
6
  import structlog
7
7
 
8
8
  from kodit.bm25.bm25 import BM25Service
9
- from kodit.embedding.embedding import EmbeddingService
10
- from kodit.embedding.models import EmbeddingType
11
- from kodit.retreival.repository import RetrievalRepository, RetrievalResult
9
+ from kodit.embedding.embedding import Embedder
10
+ from kodit.embedding.embedding_models import EmbeddingType
11
+ from kodit.search.search_repository import SearchRepository
12
12
 
13
13
 
14
- class RetrievalRequest(pydantic.BaseModel):
15
- """Request for a retrieval."""
14
+ class SearchRequest(pydantic.BaseModel):
15
+ """Request for a search."""
16
16
 
17
17
  code_query: str | None = None
18
18
  keywords: list[str] | None = None
19
19
  top_k: int = 10
20
20
 
21
21
 
22
+ class SearchResult(pydantic.BaseModel):
23
+ """Data transfer object for search results.
24
+
25
+ This model represents a single search result, containing both the file path
26
+ and the matching snippet content.
27
+ """
28
+
29
+ id: int
30
+ uri: str
31
+ content: str
32
+
33
+
22
34
  class Snippet(pydantic.BaseModel):
23
35
  """Snippet model."""
24
36
 
@@ -26,23 +38,23 @@ class Snippet(pydantic.BaseModel):
26
38
  file_path: str
27
39
 
28
40
 
29
- class RetrievalService:
30
- """Service for retrieving relevant data."""
41
+ class SearchService:
42
+ """Service for searching for relevant data."""
31
43
 
32
44
  def __init__(
33
45
  self,
34
- repository: RetrievalRepository,
46
+ repository: SearchRepository,
35
47
  data_dir: Path,
36
- embedding_model_name: str,
48
+ embedding_service: Embedder,
37
49
  ) -> None:
38
- """Initialize the retrieval service."""
50
+ """Initialize the search service."""
39
51
  self.repository = repository
40
52
  self.log = structlog.get_logger(__name__)
41
53
  self.bm25 = BM25Service(data_dir)
42
- self.code_embedding_service = EmbeddingService(model_name=embedding_model_name)
54
+ self.code_embedding_service = embedding_service
43
55
 
44
- async def retrieve(self, request: RetrievalRequest) -> list[RetrievalResult]:
45
- """Retrieve relevant data."""
56
+ async def search(self, request: SearchRequest) -> list[SearchResult]:
57
+ """Search for relevant data."""
46
58
  fusion_list = []
47
59
  if request.keywords:
48
60
  snippet_ids = await self.repository.list_snippet_ids()
@@ -56,7 +68,7 @@ class RetrievalService:
56
68
  # Sort results by score
57
69
  result_ids.sort(key=lambda x: x[1], reverse=True)
58
70
 
59
- self.log.debug("Retrieval results (BM25)", results=result_ids)
71
+ self.log.debug("Search results (BM25)", results=result_ids)
60
72
 
61
73
  bm25_results = [x[0] for x in result_ids]
62
74
  fusion_list.append(bm25_results)
@@ -64,7 +76,7 @@ class RetrievalService:
64
76
  # Compute embedding for semantic query
65
77
  semantic_results = []
66
78
  if request.code_query:
67
- query_embedding = next(
79
+ query_embedding = await anext(
68
80
  self.code_embedding_service.query([request.code_query])
69
81
  )
70
82
 
@@ -89,7 +101,18 @@ class RetrievalService:
89
101
  final_ids = [x[0] for x in final_results]
90
102
 
91
103
  # Get snippets from database (up to top_k)
92
- return await self.repository.list_snippets_by_ids(final_ids[: request.top_k])
104
+ search_results = await self.repository.list_snippets_by_ids(
105
+ final_ids[: request.top_k]
106
+ )
107
+
108
+ return [
109
+ SearchResult(
110
+ id=snippet.id,
111
+ uri=file.uri,
112
+ content=snippet.content,
113
+ )
114
+ for file, snippet in search_results
115
+ ]
93
116
 
94
117
 
95
118
  def reciprocal_rank_fusion(
@@ -45,4 +45,6 @@ class SnippetService:
45
45
  raise ValueError(msg) from e
46
46
 
47
47
  method_snippets = method_analser.extract(file_bytes)
48
- return [Snippet(text=snippet) for snippet in method_snippets]
48
+ all_snippets = [Snippet(text=snippet) for snippet in method_snippets]
49
+ # Remove any snippets that are empty
50
+ return [snippet for snippet in all_snippets if snippet.text.strip()]
@@ -1,14 +1,9 @@
1
- """Source repository for database operations.
2
-
3
- This module provides the SourceRepository class which handles all database operations
4
- related to code sources. It manages the creation and retrieval of source records
5
- from the database, abstracting away the SQLAlchemy implementation details.
6
- """
1
+ """Source repository for database operations."""
7
2
 
8
3
  from sqlalchemy import func, select
9
4
  from sqlalchemy.ext.asyncio import AsyncSession
10
5
 
11
- from kodit.sources.models import File, Source
6
+ from kodit.source.source_models import File, Source
12
7
 
13
8
 
14
9
  class SourceRepository:
@@ -19,8 +19,8 @@ import structlog
19
19
  from tqdm import tqdm
20
20
  from uritools import isuri, urisplit
21
21
 
22
- from kodit.sources.models import File, Source
23
- from kodit.sources.repository import SourceRepository
22
+ from kodit.source.source_models import File, Source
23
+ from kodit.source.source_repository import SourceRepository
24
24
 
25
25
 
26
26
  class SourceView(pydantic.BaseModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kodit
3
- Version: 0.1.11
3
+ Version: 0.1.13
4
4
  Summary: Code indexing for better AI code generation
5
5
  Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
6
  Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -32,6 +32,7 @@ Requires-Dist: gitpython>=3.1.44
32
32
  Requires-Dist: hf-xet>=1.1.2
33
33
  Requires-Dist: httpx-retries>=0.3.2
34
34
  Requires-Dist: httpx>=0.28.1
35
+ Requires-Dist: openai>=1.82.0
35
36
  Requires-Dist: posthog>=4.0.1
36
37
  Requires-Dist: pydantic-settings>=2.9.1
37
38
  Requires-Dist: pytable-formatter>=0.1.1
@@ -39,6 +40,7 @@ Requires-Dist: sentence-transformers>=4.1.0
39
40
  Requires-Dist: sqlalchemy[asyncio]>=2.0.40
40
41
  Requires-Dist: structlog>=25.3.0
41
42
  Requires-Dist: tdqm>=0.0.1
43
+ Requires-Dist: tiktoken>=0.9.0
42
44
  Requires-Dist: tree-sitter-language-pack>=0.7.3
43
45
  Requires-Dist: tree-sitter>=0.24.0
44
46
  Requires-Dist: uritools>=5.0.0
@@ -0,0 +1,44 @@
1
+ kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
2
+ kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
3
+ kodit/_version.py,sha256=Ln0urWB3R3JaxFwIIvoej0v08KbDCO89NUBxWx-zj0U,513
4
+ kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
5
+ kodit/cli.py,sha256=VLoXFS1xJnQ0TLy3_cO8-B9tCb4NJHiYPfzZtHxpgRY,7784
6
+ kodit/config.py,sha256=TDcLt6fiJn9cI1PoO5AqBqsL_Bxmm9JV5GqRxhj1tLw,4202
7
+ kodit/database.py,sha256=kekSdyEATdb47jxzQemkSOXMNOwnUwmVVTpn9hYaDK8,2356
8
+ kodit/log.py,sha256=HU1OmuxO4FcVw61k4WW7Y4WM7BrDaeplw1PcBHhuIZY,5434
9
+ kodit/mcp.py,sha256=I_ZFzQOR0gyS8LO8td-q-utPZpqiOnIkn7O-SIBUi0g,4384
10
+ kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
11
+ kodit/bm25/__init__.py,sha256=j8zyriNWhbwE5Lbybzg1hQAhANlU9mKHWw4beeUR6og,19
12
+ kodit/bm25/bm25.py,sha256=JtgJfsHz-2SHx96zxWjkPFSH7fXkahFMp01cDwl4YBg,2298
13
+ kodit/embedding/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
14
+ kodit/embedding/embedding.py,sha256=EMJpHK8ICZk_FjiO9Aqr2IO20qkGOmj_PfA1hyfI7Vk,6745
15
+ kodit/embedding/embedding_models.py,sha256=rN90vSs86dYiqoawcp8E9jtwY31JoJXYfaDlsJK7uqc,656
16
+ kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
17
+ kodit/indexing/indexing_models.py,sha256=6NX9HVcj6Pu9ePwHC7n-PWSyAgukpJq0nCNmUIigtbo,1282
18
+ kodit/indexing/indexing_repository.py,sha256=7bkAiBwtr3qlkdhNIalwMwbxezVz_RQGOhLVWPKHwNk,5506
19
+ kodit/indexing/indexing_service.py,sha256=VGfKgbkYEAYP_gIubvhMxo3yThT20ndS5xdg2LxwRgA,6685
20
+ kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
21
+ kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
22
+ kodit/migrations/env.py,sha256=w1M7OZh-ZeR2dPHS0ByXAUxQjfZQ8xIzMseWuzLDTWw,2469
23
+ kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
24
+ kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
25
+ kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
26
+ kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
27
+ kodit/search/__init__.py,sha256=4QbdjbrlhNKMovmuKHxJnUeZT7KNjTTFU0GdnuwUHdQ,36
28
+ kodit/search/search_repository.py,sha256=r1fkV6-cy9BKsy5J4WTHaY_FcjMaT1PV5qqqq0gvjZw,5833
29
+ kodit/search/search_service.py,sha256=KePkqCAc3CUcrpNsbDc5DqbF6W2m0TG6TDa9-VSJZS0,4227
30
+ kodit/snippets/__init__.py,sha256=-2coNoCRjTixU9KcP6alpmt7zqf37tCRWH3D7FPJ8dg,48
31
+ kodit/snippets/method_snippets.py,sha256=EVHhSNWahAC5nSXv9fWVFJY2yq25goHdCSCuENC07F8,4145
32
+ kodit/snippets/snippets.py,sha256=mwN0bM1Msu8ZeEsUHyQ7tx3Hj3vZsm8G7Wu4eWSkLY8,1539
33
+ kodit/snippets/languages/__init__.py,sha256=Bj5KKZSls2MQ8ZY1S_nHg447MgGZW-2WZM-oq6vjwwA,1187
34
+ kodit/snippets/languages/csharp.scm,sha256=gbBN4RiV1FBuTJF6orSnDFi8H9JwTw-d4piLJYsWUsc,222
35
+ kodit/snippets/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyrFGrVeo,429
36
+ kodit/source/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
37
+ kodit/source/source_models.py,sha256=xb42CaNDO1CUB8SIW-xXMrB6Ji8cFw-yeJ550xBEg9Q,2398
38
+ kodit/source/source_repository.py,sha256=0EksMpoLzdkfe8S4eeCm4Sf7TuxsOzOzaF4BBsMYo-4,3163
39
+ kodit/source/source_service.py,sha256=qBV9FCFQbJppeFrVo4uMgvC_mzWRIKldymp5yqLx9pw,9255
40
+ kodit-0.1.13.dist-info/METADATA,sha256=Od1OTG0tkd0Cf82juR2DGKBQ8l1RwHQ5VLgtiIW5qeA,2349
41
+ kodit-0.1.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
42
+ kodit-0.1.13.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
43
+ kodit-0.1.13.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
+ kodit-0.1.13.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- """Retrieval package for code search and retrieval functionality."""
@@ -1,44 +0,0 @@
1
- kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
2
- kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
3
- kodit/_version.py,sha256=xfwL5IZGNNwnNDAQtGFjpvlNxqYn3U9IM9B98Du9pJw,513
4
- kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
5
- kodit/cli.py,sha256=qEQy_Sd64cEV5KzYsKlGLyMxFQ4fFi-as4QO8CRrKYo,8978
6
- kodit/config.py,sha256=hQshTMW_8jpk94zP-1JaxowgmW_LrT534ipHFaRUGMw,3006
7
- kodit/database.py,sha256=kekSdyEATdb47jxzQemkSOXMNOwnUwmVVTpn9hYaDK8,2356
8
- kodit/log.py,sha256=PhyzQktEyyHaNr78W0wmL-RSRuq311DQ-d0l-EKTGmQ,5417
9
- kodit/mcp.py,sha256=qp16vRb0TY46-xQy179iWgYebr6Ju_Z91ZSzZnWPHuk,4771
10
- kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
11
- kodit/bm25/__init__.py,sha256=j8zyriNWhbwE5Lbybzg1hQAhANlU9mKHWw4beeUR6og,19
12
- kodit/bm25/bm25.py,sha256=NtlcLrgqJja11qDGKz_U6tuYWaS9sfbyS-TcA__rBKs,2284
13
- kodit/embedding/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
14
- kodit/embedding/embedding.py,sha256=X2Fa-eXhQwp__QFj9yxIhvlCAiYVQSaZ2y18ZtG5_1Y,1810
15
- kodit/embedding/models.py,sha256=rN90vSs86dYiqoawcp8E9jtwY31JoJXYfaDlsJK7uqc,656
16
- kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
17
- kodit/indexing/models.py,sha256=sZIhGwvL4Dw0QTWFxrjfWctSLkAoDT6fv5DlGz8-Fr8,1258
18
- kodit/indexing/repository.py,sha256=eIaIbqNs9Z3XTVymZ5Zl5uPWveqiEXNo0JTa-y-Tl24,5430
19
- kodit/indexing/service.py,sha256=hhQ_6vI7J7LnNgOLbsO4B07TOJvEePqqFviiqr3TL_M,6579
20
- kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
21
- kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
22
- kodit/migrations/env.py,sha256=bzB6vod_tO-X2F_G671FwYSAn0pyhNw8M1kG4MgidO8,2444
23
- kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
24
- kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
25
- kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
26
- kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
27
- kodit/retreival/__init__.py,sha256=33PhJU-3gtsqYq6A1UkaLNKbev_Zee9Lq6dYC59-CsA,69
28
- kodit/retreival/repository.py,sha256=XHkkeUsnXSrrcthJOL9FXgivn5kkaPnC9Qci6ebwjZc,7294
29
- kodit/retreival/service.py,sha256=gGp74jnqhyCDF5vKOrN2dJKDnhlfR4HZaxADSrjTb4s,3778
30
- kodit/snippets/__init__.py,sha256=-2coNoCRjTixU9KcP6alpmt7zqf37tCRWH3D7FPJ8dg,48
31
- kodit/snippets/method_snippets.py,sha256=EVHhSNWahAC5nSXv9fWVFJY2yq25goHdCSCuENC07F8,4145
32
- kodit/snippets/snippets.py,sha256=QumvhltWoxXw41SyKb-RbSvAr3m6V3lUy9n0AI8jcto,1409
33
- kodit/snippets/languages/__init__.py,sha256=Bj5KKZSls2MQ8ZY1S_nHg447MgGZW-2WZM-oq6vjwwA,1187
34
- kodit/snippets/languages/csharp.scm,sha256=gbBN4RiV1FBuTJF6orSnDFi8H9JwTw-d4piLJYsWUsc,222
35
- kodit/snippets/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyrFGrVeo,429
36
- kodit/sources/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
37
- kodit/sources/models.py,sha256=xb42CaNDO1CUB8SIW-xXMrB6Ji8cFw-yeJ550xBEg9Q,2398
38
- kodit/sources/repository.py,sha256=mGJrHWH6Uo8YABdoojHFbzaf_jW-2ywJpAHIa1gnc3U,3401
39
- kodit/sources/service.py,sha256=aV_qiqkU2kMBNPvye5_v4NnZiK-lJ64rQdmFtBtsQaY,9243
40
- kodit-0.1.11.dist-info/METADATA,sha256=yUO645VYUiVrJMRtwNB71O-6qvC94nS7_ILQ8eQEvoY,2288
41
- kodit-0.1.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
42
- kodit-0.1.11.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
43
- kodit-0.1.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
- kodit-0.1.11.dist-info/RECORD,,
File without changes
File without changes
File without changes
File without changes