kodit 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

kodit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.2.3'
21
- __version_tuple__ = version_tuple = (0, 2, 3)
20
+ __version__ = version = '0.2.4'
21
+ __version_tuple__ = version_tuple = (0, 2, 4)
@@ -3,6 +3,7 @@
3
3
  from sqlalchemy.ext.asyncio import AsyncSession
4
4
 
5
5
  from kodit.config import AppContext, Endpoint
6
+ from kodit.embedding.embedding_models import EmbeddingType
6
7
  from kodit.embedding.embedding_provider.local_embedding_provider import (
7
8
  CODE,
8
9
  LocalEmbeddingProvider,
@@ -54,9 +55,14 @@ def embedding_factory(
54
55
  return VectorChordVectorSearchService(task_name, session, embedding_provider)
55
56
  if app_context.default_search.provider == "sqlite":
56
57
  log_event("kodit.database", {"provider": "sqlite"})
58
+ if task_name == "code":
59
+ embedding_type = EmbeddingType.CODE
60
+ elif task_name == "text":
61
+ embedding_type = EmbeddingType.TEXT
57
62
  return LocalVectorSearchService(
58
63
  embedding_repository=embedding_repository,
59
64
  embedding_provider=embedding_provider,
65
+ embedding_type=embedding_type,
60
66
  )
61
67
 
62
68
  msg = f"Invalid semantic search provider: {app_context.default_search.provider}"
@@ -1,6 +1,8 @@
1
1
  """Embedding provider."""
2
2
 
3
3
  from abc import ABC, abstractmethod
4
+ from collections.abc import AsyncGenerator
5
+ from dataclasses import dataclass
4
6
 
5
7
  import structlog
6
8
  import tiktoken
@@ -10,11 +12,29 @@ OPENAI_MAX_EMBEDDING_SIZE = 8192
10
12
  Vector = list[float]
11
13
 
12
14
 
15
+ @dataclass
16
+ class EmbeddingRequest:
17
+ """Embedding request."""
18
+
19
+ id: int
20
+ text: str
21
+
22
+
23
+ @dataclass
24
+ class EmbeddingResponse:
25
+ """Embedding response."""
26
+
27
+ id: int
28
+ embedding: Vector
29
+
30
+
13
31
  class EmbeddingProvider(ABC):
14
32
  """Embedding provider."""
15
33
 
16
34
  @abstractmethod
17
- async def embed(self, data: list[str]) -> list[Vector]:
35
+ def embed(
36
+ self, data: list[EmbeddingRequest]
37
+ ) -> AsyncGenerator[list[EmbeddingResponse], None]:
18
38
  """Embed a list of strings.
19
39
 
20
40
  The embedding provider is responsible for embedding a list of strings into a
@@ -25,13 +45,13 @@ class EmbeddingProvider(ABC):
25
45
 
26
46
  def split_sub_batches(
27
47
  encoding: tiktoken.Encoding,
28
- data: list[str],
48
+ data: list[EmbeddingRequest],
29
49
  max_context_window: int = OPENAI_MAX_EMBEDDING_SIZE,
30
- ) -> list[list[str]]:
50
+ ) -> list[list[EmbeddingRequest]]:
31
51
  """Split a list of strings into smaller sub-batches."""
32
52
  log = structlog.get_logger(__name__)
33
53
  result = []
34
- data_to_process = [s for s in data if s.strip()] # Filter out empty strings
54
+ data_to_process = [s for s in data if s.text.strip()] # Filter out empty strings
35
55
 
36
56
  while data_to_process:
37
57
  next_batch = []
@@ -39,18 +59,26 @@ def split_sub_batches(
39
59
 
40
60
  while data_to_process:
41
61
  next_item = data_to_process[0]
42
- item_tokens = len(encoding.encode(next_item, disallowed_special=()))
62
+ item_tokens = len(encoding.encode(next_item.text, disallowed_special=()))
43
63
 
44
64
  if item_tokens > max_context_window:
45
- # Loop around trying to truncate the snippet until it fits in the max
46
- # embedding size
47
- while item_tokens > max_context_window:
48
- next_item = next_item[:-1]
49
- item_tokens = len(encoding.encode(next_item, disallowed_special=()))
50
-
51
- data_to_process[0] = next_item
52
-
53
- log.warning("Truncated snippet", snippet=next_item)
65
+ # Optimise truncation by operating on tokens directly instead of
66
+ # removing one character at a time and repeatedly re-encoding.
67
+ tokens = encoding.encode(next_item.text, disallowed_special=())
68
+ if len(tokens) > max_context_window:
69
+ # Keep only the first *max_context_window* tokens.
70
+ tokens = tokens[:max_context_window]
71
+ # Convert back to text. This requires only one decode call and
72
+ # guarantees that the resulting string fits the token budget.
73
+ next_item.text = encoding.decode(tokens)
74
+ item_tokens = max_context_window # We know the exact size now
75
+
76
+ data_to_process[0] = next_item
77
+
78
+ log.warning(
79
+ "Truncated snippet because it was too long to embed",
80
+ snippet=next_item.text[:100] + "...",
81
+ )
54
82
 
55
83
  if current_tokens + item_tokens > max_context_window:
56
84
  break
@@ -3,10 +3,12 @@
3
3
  import asyncio
4
4
  import hashlib
5
5
  import math
6
- from collections.abc import Generator, Sequence
6
+ from collections.abc import AsyncGenerator, Generator, Sequence
7
7
 
8
8
  from kodit.embedding.embedding_provider.embedding_provider import (
9
9
  EmbeddingProvider,
10
+ EmbeddingRequest,
11
+ EmbeddingResponse,
10
12
  Vector,
11
13
  )
12
14
 
@@ -31,27 +33,34 @@ class HashEmbeddingProvider(EmbeddingProvider):
31
33
  self.dim = dim
32
34
  self.batch_size = batch_size
33
35
 
34
- async def embed(self, data: list[str]) -> list[Vector]:
36
+ async def embed(
37
+ self, data: list[EmbeddingRequest]
38
+ ) -> AsyncGenerator[list[EmbeddingResponse], None]:
35
39
  """Embed every string in *data*, preserving order.
36
40
 
37
41
  Work is sliced into *batch_size* chunks and scheduled concurrently
38
42
  (still CPU-bound, but enough to cooperate with an asyncio loop).
39
43
  """
40
44
  if not data:
41
- return []
45
+ yield []
42
46
 
43
47
  async def _embed_chunk(chunk: Sequence[str]) -> list[Vector]:
44
48
  return [self._string_to_vector(text) for text in chunk]
45
49
 
46
50
  tasks = [
47
51
  asyncio.create_task(_embed_chunk(chunk))
48
- for chunk in self._chunked(data, self.batch_size)
52
+ for chunk in self._chunked([i.text for i in data], self.batch_size)
49
53
  ]
50
54
 
51
- vectors: list[Vector] = []
52
55
  for task in tasks:
53
- vectors.extend(await task)
54
- return vectors
56
+ result = await task
57
+ yield [
58
+ EmbeddingResponse(
59
+ id=item.id,
60
+ embedding=embedding,
61
+ )
62
+ for item, embedding in zip(data, result, strict=True)
63
+ ]
55
64
 
56
65
  @staticmethod
57
66
  def _chunked(seq: Sequence[str], size: int) -> Generator[Sequence[str], None, None]:
@@ -3,20 +3,24 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import os
6
+ from time import time
6
7
  from typing import TYPE_CHECKING
7
8
 
8
9
  import structlog
9
- import tiktoken
10
- from tqdm import tqdm
11
10
 
12
11
  from kodit.embedding.embedding_provider.embedding_provider import (
13
12
  EmbeddingProvider,
14
- Vector,
13
+ EmbeddingRequest,
14
+ EmbeddingResponse,
15
15
  split_sub_batches,
16
16
  )
17
17
 
18
18
  if TYPE_CHECKING:
19
+ from collections.abc import AsyncGenerator
20
+
19
21
  from sentence_transformers import SentenceTransformer
22
+ from tiktoken import Encoding
23
+
20
24
 
21
25
  TINY = "tiny"
22
26
  CODE = "code"
@@ -36,8 +40,22 @@ class LocalEmbeddingProvider(EmbeddingProvider):
36
40
  """Initialize the local embedder."""
37
41
  self.log = structlog.get_logger(__name__)
38
42
  self.model_name = COMMON_EMBEDDING_MODELS.get(model_name, model_name)
43
+ self.encoding_name = "text-embedding-3-small"
39
44
  self.embedding_model = None
40
- self.encoding = tiktoken.encoding_for_model("text-embedding-3-small")
45
+ self.encoding = None
46
+
47
+ def _encoding(self) -> Encoding:
48
+ if self.encoding is None:
49
+ from tiktoken import encoding_for_model
50
+
51
+ start_time = time()
52
+ self.encoding = encoding_for_model(self.encoding_name)
53
+ self.log.debug(
54
+ "Encoding loaded",
55
+ model_name=self.encoding_name,
56
+ duration=time() - start_time,
57
+ )
58
+ return self.encoding
41
59
 
42
60
  def _model(self) -> SentenceTransformer:
43
61
  """Get the embedding model."""
@@ -45,20 +63,34 @@ class LocalEmbeddingProvider(EmbeddingProvider):
45
63
  os.environ["TOKENIZERS_PARALLELISM"] = "false" # Avoid warnings
46
64
  from sentence_transformers import SentenceTransformer
47
65
 
66
+ start_time = time()
48
67
  self.embedding_model = SentenceTransformer(
49
68
  self.model_name,
50
69
  trust_remote_code=True,
51
70
  )
71
+ self.log.debug(
72
+ "Model loaded",
73
+ model_name=self.model_name,
74
+ duration=time() - start_time,
75
+ )
52
76
  return self.embedding_model
53
77
 
54
- async def embed(self, data: list[str]) -> list[Vector]:
78
+ async def embed(
79
+ self, data: list[EmbeddingRequest]
80
+ ) -> AsyncGenerator[list[EmbeddingResponse], None]:
55
81
  """Embed a list of strings."""
56
82
  model = self._model()
57
83
 
58
- batched_data = split_sub_batches(self.encoding, data)
84
+ batched_data = split_sub_batches(self._encoding(), data)
59
85
 
60
- results: list[Vector] = []
61
- for batch in tqdm(batched_data, total=len(batched_data), leave=False):
62
- embeddings = model.encode(batch, show_progress_bar=False, batch_size=4)
63
- results.extend([[float(x) for x in embedding] for embedding in embeddings])
64
- return results
86
+ for batch in batched_data:
87
+ embeddings = model.encode(
88
+ [i.text for i in batch], show_progress_bar=False, batch_size=4
89
+ )
90
+ yield [
91
+ EmbeddingResponse(
92
+ id=item.id,
93
+ embedding=[float(x) for x in embedding],
94
+ )
95
+ for item, embedding in zip(batch, embeddings, strict=True)
96
+ ]
@@ -1,6 +1,7 @@
1
1
  """OpenAI embedding service."""
2
2
 
3
3
  import asyncio
4
+ from collections.abc import AsyncGenerator
4
5
 
5
6
  import structlog
6
7
  import tiktoken
@@ -8,7 +9,8 @@ from openai import AsyncOpenAI
8
9
 
9
10
  from kodit.embedding.embedding_provider.embedding_provider import (
10
11
  EmbeddingProvider,
11
- Vector,
12
+ EmbeddingRequest,
13
+ EmbeddingResponse,
12
14
  split_sub_batches,
13
15
  )
14
16
 
@@ -31,7 +33,9 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
31
33
  "text-embedding-3-small"
32
34
  ) # Sensible default
33
35
 
34
- async def embed(self, data: list[str]) -> list[Vector]:
36
+ async def embed(
37
+ self, data: list[EmbeddingRequest]
38
+ ) -> AsyncGenerator[list[EmbeddingResponse], None]:
35
39
  """Embed a list of documents."""
36
40
  # First split the list into a list of list where each sublist has fewer than
37
41
  # max tokens.
@@ -40,38 +44,30 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
40
44
  # Process batches in parallel with a semaphore to limit concurrent requests
41
45
  sem = asyncio.Semaphore(OPENAI_NUM_PARALLEL_TASKS)
42
46
 
43
- # Create a list of tuples with a temporary id for each batch
44
- # We need to do this so that we can return the results in the same order as the
45
- # input data
46
- input_data = [(i, batch) for i, batch in enumerate(batched_data)]
47
-
48
47
  async def process_batch(
49
- data: tuple[int, list[str]],
50
- ) -> tuple[int, list[Vector]]:
51
- batch_id, batch = data
48
+ data: list[EmbeddingRequest],
49
+ ) -> list[EmbeddingResponse]:
52
50
  async with sem:
53
51
  try:
54
52
  response = await self.openai_client.embeddings.create(
55
53
  model=self.model_name,
56
- input=batch,
54
+ input=[i.text for i in data],
57
55
  )
58
- return batch_id, [
59
- [float(x) for x in embedding.embedding]
60
- for embedding in response.data
56
+ return [
57
+ EmbeddingResponse(
58
+ id=item.id,
59
+ embedding=embedding.embedding,
60
+ )
61
+ for item, embedding in zip(data, response.data, strict=True)
61
62
  ]
62
63
  except Exception as e:
63
64
  self.log.exception("Error embedding batch", error=str(e))
64
- return batch_id, []
65
+ return []
65
66
 
66
67
  # Create tasks for all batches
67
- tasks = [process_batch(batch) for batch in input_data]
68
+ tasks = [process_batch(batch) for batch in batched_data]
68
69
 
69
70
  # Process all batches and yield results as they complete
70
- results: list[tuple[int, list[Vector]]] = []
71
71
  for task in asyncio.as_completed(tasks):
72
72
  result = await task
73
- results.append(result)
74
-
75
- # Output in the same order as the input data
76
- ordered_results = [result for _, result in sorted(results, key=lambda x: x[0])]
77
- return [item for sublist in ordered_results for item in sublist]
73
+ yield result
@@ -1,12 +1,18 @@
1
1
  """Local vector search."""
2
2
 
3
+ from collections.abc import AsyncGenerator
4
+
3
5
  import structlog
4
6
  import tiktoken
5
7
 
6
8
  from kodit.embedding.embedding_models import Embedding, EmbeddingType
7
- from kodit.embedding.embedding_provider.embedding_provider import EmbeddingProvider
9
+ from kodit.embedding.embedding_provider.embedding_provider import (
10
+ EmbeddingProvider,
11
+ EmbeddingRequest,
12
+ )
8
13
  from kodit.embedding.embedding_repository import EmbeddingRepository
9
14
  from kodit.embedding.vector_search_service import (
15
+ IndexResult,
10
16
  VectorSearchRequest,
11
17
  VectorSearchResponse,
12
18
  VectorSearchService,
@@ -20,35 +26,62 @@ class LocalVectorSearchService(VectorSearchService):
20
26
  self,
21
27
  embedding_repository: EmbeddingRepository,
22
28
  embedding_provider: EmbeddingProvider,
29
+ embedding_type: EmbeddingType = EmbeddingType.CODE,
23
30
  ) -> None:
24
31
  """Initialize the local embedder."""
25
32
  self.log = structlog.get_logger(__name__)
26
33
  self.embedding_repository = embedding_repository
27
34
  self.embedding_provider = embedding_provider
28
35
  self.encoding = tiktoken.encoding_for_model("text-embedding-3-small")
36
+ self.embedding_type = embedding_type
29
37
 
30
- async def index(self, data: list[VectorSearchRequest]) -> None:
38
+ async def index(
39
+ self, data: list[VectorSearchRequest]
40
+ ) -> AsyncGenerator[list[IndexResult], None]:
31
41
  """Embed a list of documents."""
32
42
  if not data or len(data) == 0:
33
- self.log.warning("Embedding data is empty, skipping embedding")
34
43
  return
35
44
 
36
- embeddings = await self.embedding_provider.embed([i.text for i in data])
37
- for i, x in zip(data, embeddings, strict=False):
38
- await self.embedding_repository.create_embedding(
39
- Embedding(
40
- snippet_id=i.snippet_id,
41
- embedding=[float(y) for y in x],
42
- type=EmbeddingType.CODE,
45
+ requests = [EmbeddingRequest(id=doc.snippet_id, text=doc.text) for doc in data]
46
+
47
+ async for batch in self.embedding_provider.embed(requests):
48
+ for result in batch:
49
+ await self.embedding_repository.create_embedding(
50
+ Embedding(
51
+ snippet_id=result.id,
52
+ embedding=result.embedding,
53
+ type=self.embedding_type,
54
+ )
43
55
  )
44
- )
56
+ yield [IndexResult(snippet_id=result.id)]
45
57
 
46
58
  async def retrieve(self, query: str, top_k: int = 10) -> list[VectorSearchResponse]:
47
59
  """Query the embedding model."""
48
- embedding = (await self.embedding_provider.embed([query]))[0]
60
+ # Build a single-item request and collect its embedding.
61
+ req = EmbeddingRequest(id=0, text=query)
62
+ embedding_vec: list[float] | None = None
63
+ async for batch in self.embedding_provider.embed([req]):
64
+ if batch:
65
+ embedding_vec = [float(v) for v in batch[0].embedding]
66
+ break
67
+
68
+ if not embedding_vec:
69
+ return []
70
+
49
71
  results = await self.embedding_repository.list_semantic_results(
50
- EmbeddingType.CODE, [float(x) for x in embedding], top_k
72
+ self.embedding_type, embedding_vec, top_k
51
73
  )
52
74
  return [
53
75
  VectorSearchResponse(snippet_id, score) for snippet_id, score in results
54
76
  ]
77
+
78
+ async def has_embedding(
79
+ self, snippet_id: int, embedding_type: EmbeddingType
80
+ ) -> bool:
81
+ """Check if a snippet has an embedding."""
82
+ return (
83
+ await self.embedding_repository.get_embedding_by_snippet_id_and_type(
84
+ snippet_id, embedding_type
85
+ )
86
+ is not None
87
+ )
@@ -1,8 +1,11 @@
1
1
  """Embedding service."""
2
2
 
3
3
  from abc import ABC, abstractmethod
4
+ from collections.abc import AsyncGenerator
4
5
  from typing import NamedTuple
5
6
 
7
+ from kodit.embedding.embedding_models import EmbeddingType
8
+
6
9
 
7
10
  class VectorSearchResponse(NamedTuple):
8
11
  """Embedding result."""
@@ -18,11 +21,19 @@ class VectorSearchRequest(NamedTuple):
18
21
  text: str
19
22
 
20
23
 
24
+ class IndexResult(NamedTuple):
25
+ """Result of indexing."""
26
+
27
+ snippet_id: int
28
+
29
+
21
30
  class VectorSearchService(ABC):
22
31
  """Semantic search service interface."""
23
32
 
24
33
  @abstractmethod
25
- async def index(self, data: list[VectorSearchRequest]) -> None:
34
+ def index(
35
+ self, data: list[VectorSearchRequest]
36
+ ) -> AsyncGenerator[list[IndexResult], None]:
26
37
  """Embed a list of documents.
27
38
 
28
39
  The embedding service accepts a massive list of id,strings to embed. Behind the
@@ -36,3 +47,9 @@ class VectorSearchService(ABC):
36
47
  @abstractmethod
37
48
  async def retrieve(self, query: str, top_k: int = 10) -> list[VectorSearchResponse]:
38
49
  """Query the embedding model."""
50
+
51
+ @abstractmethod
52
+ async def has_embedding(
53
+ self, snippet_id: int, embedding_type: EmbeddingType
54
+ ) -> bool:
55
+ """Check if a snippet has an embedding."""
@@ -1,13 +1,19 @@
1
1
  """Vectorchord vector search."""
2
2
 
3
+ from collections.abc import AsyncGenerator
3
4
  from typing import Any, Literal
4
5
 
5
6
  import structlog
6
7
  from sqlalchemy import Result, TextClause, text
7
8
  from sqlalchemy.ext.asyncio import AsyncSession
8
9
 
9
- from kodit.embedding.embedding_provider.embedding_provider import EmbeddingProvider
10
+ from kodit.embedding.embedding_models import EmbeddingType
11
+ from kodit.embedding.embedding_provider.embedding_provider import (
12
+ EmbeddingProvider,
13
+ EmbeddingRequest,
14
+ )
10
15
  from kodit.embedding.vector_search_service import (
16
+ IndexResult,
11
17
  VectorSearchRequest,
12
18
  VectorSearchResponse,
13
19
  VectorSearchService,
@@ -52,6 +58,10 @@ ORDER BY score ASC
52
58
  LIMIT :top_k;
53
59
  """
54
60
 
61
+ CHECK_VCHORD_EMBEDDING_EXISTS = """
62
+ SELECT EXISTS(SELECT 1 FROM {TABLE_NAME} WHERE snippet_id = :snippet_id)
63
+ """
64
+
55
65
  TaskName = Literal["code", "text"]
56
66
 
57
67
 
@@ -89,7 +99,15 @@ class VectorChordVectorSearchService(VectorSearchService):
89
99
 
90
100
  async def _create_tables(self) -> None:
91
101
  """Create the necessary tables."""
92
- vector_dim = (await self.embedding_provider.embed(["dimension"]))[0]
102
+ req = EmbeddingRequest(id=0, text="dimension")
103
+ vector_dim: list[float] | None = None
104
+ async for batch in self.embedding_provider.embed([req]):
105
+ if batch:
106
+ vector_dim = batch[0].embedding
107
+ break
108
+ if vector_dim is None:
109
+ msg = "Failed to obtain embedding dimension from provider"
110
+ raise RuntimeError(msg)
93
111
  await self._session.execute(
94
112
  text(
95
113
  f"""CREATE TABLE IF NOT EXISTS {self.table_name} (
@@ -130,31 +148,48 @@ class VectorChordVectorSearchService(VectorSearchService):
130
148
  """Commit the session."""
131
149
  await self._session.commit()
132
150
 
133
- async def index(self, data: list[VectorSearchRequest]) -> None:
151
+ async def index(
152
+ self, data: list[VectorSearchRequest]
153
+ ) -> AsyncGenerator[list[IndexResult], None]:
134
154
  """Embed a list of documents."""
135
155
  if not data or len(data) == 0:
136
156
  self.log.warning("Embedding data is empty, skipping embedding")
137
157
  return
138
158
 
139
- embeddings = await self.embedding_provider.embed([doc.text for doc in data])
140
- # Execute inserts
141
- await self._execute(
142
- text(INSERT_QUERY.format(TABLE_NAME=self.table_name)),
143
- [
144
- {"snippet_id": doc.snippet_id, "embedding": str(embedding)}
145
- for doc, embedding in zip(data, embeddings, strict=True)
146
- ],
147
- )
148
- await self._commit()
159
+ requests = [EmbeddingRequest(id=doc.snippet_id, text=doc.text) for doc in data]
160
+
161
+ async for batch in self.embedding_provider.embed(requests):
162
+ await self._execute(
163
+ text(INSERT_QUERY.format(TABLE_NAME=self.table_name)),
164
+ [
165
+ {
166
+ "snippet_id": result.id,
167
+ "embedding": str(result.embedding),
168
+ }
169
+ for result in batch
170
+ ],
171
+ )
172
+ await self._commit()
173
+ yield [IndexResult(snippet_id=result.id) for result in batch]
149
174
 
150
175
  async def retrieve(self, query: str, top_k: int = 10) -> list[VectorSearchResponse]:
151
176
  """Query the embedding model."""
152
- embedding = await self.embedding_provider.embed([query])
153
- if len(embedding) == 0 or len(embedding[0]) == 0:
177
+ from kodit.embedding.embedding_provider.embedding_provider import (
178
+ EmbeddingRequest,
179
+ )
180
+
181
+ req = EmbeddingRequest(id=0, text=query)
182
+ embedding_vec: list[float] | None = None
183
+ async for batch in self.embedding_provider.embed([req]):
184
+ if batch:
185
+ embedding_vec = batch[0].embedding
186
+ break
187
+
188
+ if not embedding_vec:
154
189
  return []
155
190
  result = await self._execute(
156
191
  text(SEARCH_QUERY.format(TABLE_NAME=self.table_name)),
157
- {"query": str(embedding[0]), "top_k": top_k},
192
+ {"query": str(embedding_vec), "top_k": top_k},
158
193
  )
159
194
  rows = result.mappings().all()
160
195
 
@@ -162,3 +197,15 @@ class VectorChordVectorSearchService(VectorSearchService):
162
197
  VectorSearchResponse(snippet_id=row["snippet_id"], score=row["score"])
163
198
  for row in rows
164
199
  ]
200
+
201
+ async def has_embedding(
202
+ self,
203
+ snippet_id: int,
204
+ embedding_type: EmbeddingType, # noqa: ARG002
205
+ ) -> bool:
206
+ """Check if a snippet has an embedding."""
207
+ result = await self._execute(
208
+ text(CHECK_VCHORD_EMBEDDING_EXISTS.format(TABLE_NAME=self.table_name)),
209
+ {"snippet_id": snippet_id},
210
+ )
211
+ return result.scalar_one()
@@ -1,6 +1,8 @@
1
1
  """Enrichment provider."""
2
2
 
3
3
  from abc import ABC, abstractmethod
4
+ from collections.abc import AsyncGenerator
5
+ from dataclasses import dataclass
4
6
 
5
7
  ENRICHMENT_SYSTEM_PROMPT = """
6
8
  You are a professional software developer. You will be given a snippet of code.
@@ -8,9 +10,27 @@ Please provide a concise explanation of the code.
8
10
  """
9
11
 
10
12
 
13
+ @dataclass
14
+ class EnrichmentRequest:
15
+ """Enrichment request."""
16
+
17
+ snippet_id: int
18
+ text: str
19
+
20
+
21
+ @dataclass
22
+ class EnrichmentResponse:
23
+ """Enrichment response."""
24
+
25
+ snippet_id: int
26
+ text: str
27
+
28
+
11
29
  class EnrichmentProvider(ABC):
12
30
  """Enrichment provider."""
13
31
 
14
32
  @abstractmethod
15
- async def enrich(self, data: list[str]) -> list[str]:
33
+ def enrich(
34
+ self, data: list[EnrichmentRequest]
35
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
16
36
  """Enrich a list of strings."""
@@ -1,15 +1,19 @@
1
1
  """Local embedding service."""
2
2
 
3
3
  import os
4
+ from collections.abc import AsyncGenerator
4
5
 
5
6
  import structlog
6
7
  import tiktoken
7
- from tqdm import tqdm
8
8
 
9
- from kodit.embedding.embedding_provider.embedding_provider import split_sub_batches
9
+ from kodit.embedding.embedding_provider.embedding_provider import (
10
+ EmbeddingRequest,
11
+ )
10
12
  from kodit.enrichment.enrichment_provider.enrichment_provider import (
11
13
  ENRICHMENT_SYSTEM_PROMPT,
12
14
  EnrichmentProvider,
15
+ EnrichmentRequest,
16
+ EnrichmentResponse,
13
17
  )
14
18
 
15
19
  DEFAULT_ENRICHMENT_MODEL = "Qwen/Qwen3-0.6B"
@@ -32,11 +36,16 @@ class LocalEnrichmentProvider(EnrichmentProvider):
32
36
  self.tokenizer = None
33
37
  self.encoding = tiktoken.encoding_for_model("text-embedding-3-small")
34
38
 
35
- async def enrich(self, data: list[str]) -> list[str]:
39
+ async def enrich(
40
+ self, data: list[EnrichmentRequest]
41
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
36
42
  """Enrich a list of strings."""
43
+ # Remove empty snippets
44
+ data = [snippet for snippet in data if snippet.text]
45
+
37
46
  if not data or len(data) == 0:
38
47
  self.log.warning("Data is empty, skipping enrichment")
39
- return []
48
+ return
40
49
 
41
50
  from transformers.models.auto.modeling_auto import (
42
51
  AutoModelForCausalLM,
@@ -57,36 +66,38 @@ class LocalEnrichmentProvider(EnrichmentProvider):
57
66
  )
58
67
 
59
68
  # Prepare prompts
60
- prompts = [
61
- self.tokenizer.apply_chat_template(
62
- [
63
- {"role": "system", "content": ENRICHMENT_SYSTEM_PROMPT},
64
- {"role": "user", "content": snippet},
65
- ],
66
- tokenize=False,
67
- add_generation_prompt=True,
68
- enable_thinking=False,
69
+ prompts: list[EmbeddingRequest] = [
70
+ EmbeddingRequest(
71
+ id=snippet.snippet_id,
72
+ text=self.tokenizer.apply_chat_template(
73
+ [
74
+ {"role": "system", "content": ENRICHMENT_SYSTEM_PROMPT},
75
+ {"role": "user", "content": snippet.text},
76
+ ],
77
+ tokenize=False,
78
+ add_generation_prompt=True,
79
+ enable_thinking=False,
80
+ ),
69
81
  )
70
82
  for snippet in data
71
83
  ]
72
84
 
73
- # Batch prompts using split_sub_batches
74
- batched_prompts = split_sub_batches(
75
- self.encoding, prompts, max_context_window=self.context_window
76
- )
77
- results = []
78
- for batch in tqdm(batched_prompts, leave=False, total=len(batched_prompts)):
85
+ for prompt in prompts:
79
86
  model_inputs = self.tokenizer(
80
- batch, return_tensors="pt", padding=True, truncation=True
87
+ prompt.text,
88
+ return_tensors="pt",
89
+ padding=True,
90
+ truncation=True,
81
91
  ).to(self.model.device)
82
92
  generated_ids = self.model.generate(
83
93
  **model_inputs, max_new_tokens=self.context_window
84
94
  )
85
- # For each prompt in the batch, decode only the generated part
86
- for i, input_ids in enumerate(model_inputs["input_ids"]):
87
- output_ids = generated_ids[i][len(input_ids) :].tolist()
88
- content = self.tokenizer.decode(
89
- output_ids, skip_special_tokens=True
90
- ).strip("\n")
91
- results.append(content)
92
- return results
95
+ input_ids = model_inputs["input_ids"][0]
96
+ output_ids = generated_ids[0][len(input_ids) :].tolist()
97
+ content = self.tokenizer.decode(output_ids, skip_special_tokens=True).strip(
98
+ "\n"
99
+ )
100
+ yield EnrichmentResponse(
101
+ snippet_id=prompt.id,
102
+ text=content,
103
+ )
@@ -1,15 +1,17 @@
1
1
  """OpenAI embedding service."""
2
2
 
3
3
  import asyncio
4
+ from collections.abc import AsyncGenerator
4
5
 
5
6
  import structlog
6
7
  import tiktoken
7
8
  from openai import AsyncOpenAI
8
- from tqdm import tqdm
9
9
 
10
10
  from kodit.enrichment.enrichment_provider.enrichment_provider import (
11
11
  ENRICHMENT_SYSTEM_PROMPT,
12
12
  EnrichmentProvider,
13
+ EnrichmentRequest,
14
+ EnrichmentResponse,
13
15
  )
14
16
 
15
17
  OPENAI_NUM_PARALLEL_TASKS = 10
@@ -29,25 +31,24 @@ class OpenAIEnrichmentProvider(EnrichmentProvider):
29
31
  self.model_name = model_name
30
32
  self.encoding = tiktoken.encoding_for_model("gpt-4o-mini") # Approximation
31
33
 
32
- async def enrich(self, data: list[str]) -> list[str]:
34
+ async def enrich(
35
+ self, data: list[EnrichmentRequest]
36
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
33
37
  """Enrich a list of documents."""
34
38
  if not data or len(data) == 0:
35
39
  self.log.warning("Data is empty, skipping enrichment")
36
- return []
40
+ return
37
41
 
38
42
  # Process batches in parallel with a semaphore to limit concurrent requests
39
43
  sem = asyncio.Semaphore(OPENAI_NUM_PARALLEL_TASKS)
40
44
 
41
- # Create a list of tuples with a temporary id for each snippet
42
- # We need to do this so that we can return the results in the same order as the
43
- # input data
44
- input_data = [(i, snippet) for i, snippet in enumerate(data)]
45
-
46
- async def process_data(data: tuple[int, str]) -> tuple[int, str]:
47
- snippet_id, snippet = data
48
- if not snippet:
49
- return snippet_id, ""
45
+ async def process_data(data: EnrichmentRequest) -> EnrichmentResponse:
50
46
  async with sem:
47
+ if not data.text:
48
+ return EnrichmentResponse(
49
+ snippet_id=data.snippet_id,
50
+ text="",
51
+ )
51
52
  try:
52
53
  response = await self.openai_client.chat.completions.create(
53
54
  model=self.model_name,
@@ -56,26 +57,23 @@ class OpenAIEnrichmentProvider(EnrichmentProvider):
56
57
  "role": "system",
57
58
  "content": ENRICHMENT_SYSTEM_PROMPT,
58
59
  },
59
- {"role": "user", "content": snippet},
60
+ {"role": "user", "content": data.text},
60
61
  ],
61
62
  )
62
- return snippet_id, response.choices[0].message.content or ""
63
+ return EnrichmentResponse(
64
+ snippet_id=data.snippet_id,
65
+ text=response.choices[0].message.content or "",
66
+ )
63
67
  except Exception as e:
64
68
  self.log.exception("Error enriching data", error=str(e))
65
- return snippet_id, ""
69
+ return EnrichmentResponse(
70
+ snippet_id=data.snippet_id,
71
+ text="",
72
+ )
66
73
 
67
74
  # Create tasks for all data
68
- tasks = [process_data(snippet) for snippet in input_data]
75
+ tasks = [process_data(snippet) for snippet in data]
69
76
 
70
77
  # Process all data and yield results as they complete
71
- results: list[tuple[int, str]] = []
72
- for task in tqdm(
73
- asyncio.as_completed(tasks),
74
- total=len(tasks),
75
- leave=False,
76
- ):
77
- result = await task
78
- results.append(result)
79
-
80
- # Output in the same order as the input data
81
- return [result for _, result in sorted(results, key=lambda x: x[0])]
78
+ for task in asyncio.as_completed(tasks):
79
+ yield await task
@@ -1,24 +1,34 @@
1
1
  """Enrichment service."""
2
2
 
3
3
  from abc import ABC, abstractmethod
4
+ from collections.abc import AsyncGenerator
4
5
 
5
- from kodit.enrichment.enrichment_provider.enrichment_provider import EnrichmentProvider
6
+ from kodit.enrichment.enrichment_provider.enrichment_provider import (
7
+ EnrichmentProvider,
8
+ EnrichmentRequest,
9
+ EnrichmentResponse,
10
+ )
6
11
 
7
12
 
8
13
  class EnrichmentService(ABC):
9
14
  """Enrichment service."""
10
15
 
11
16
  @abstractmethod
12
- async def enrich(self, data: list[str]) -> list[str]:
17
+ def enrich(
18
+ self, data: list[EnrichmentRequest]
19
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
13
20
  """Enrich a list of strings."""
14
21
 
15
22
 
16
23
  class NullEnrichmentService(EnrichmentService):
17
24
  """Null enrichment service."""
18
25
 
19
- async def enrich(self, data: list[str]) -> list[str]:
26
+ async def enrich(
27
+ self, data: list[EnrichmentRequest]
28
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
20
29
  """Enrich a list of strings."""
21
- return [""] * len(data)
30
+ for request in data:
31
+ yield EnrichmentResponse(snippet_id=request.snippet_id, text="")
22
32
 
23
33
 
24
34
  class LLMEnrichmentService(EnrichmentService):
@@ -28,6 +38,8 @@ class LLMEnrichmentService(EnrichmentService):
28
38
  """Initialize the enrichment service."""
29
39
  self.enrichment_provider = enrichment_provider
30
40
 
31
- async def enrich(self, data: list[str]) -> list[str]:
32
- """Enrich a list of strings."""
33
- return await self.enrichment_provider.enrich(data)
41
+ def enrich(
42
+ self, data: list[EnrichmentRequest]
43
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
44
+ """Enrich a list of snippets."""
45
+ return self.enrichment_provider.enrich(data)
@@ -22,6 +22,7 @@ from kodit.embedding.vector_search_service import (
22
22
  VectorSearchRequest,
23
23
  VectorSearchService,
24
24
  )
25
+ from kodit.enrichment.enrichment_provider.enrichment_provider import EnrichmentRequest
25
26
  from kodit.enrichment.enrichment_service import EnrichmentService
26
27
  from kodit.indexing.fusion import FusionRequest, reciprocal_rank_fusion
27
28
  from kodit.indexing.indexing_models import Snippet
@@ -200,37 +201,42 @@ class IndexService:
200
201
  )
201
202
 
202
203
  self.log.info("Creating semantic code index")
203
- with Spinner():
204
- await self.code_search_service.index(
204
+ with tqdm(total=len(snippets), leave=False) as pbar:
205
+ async for result in self.code_search_service.index(
205
206
  [
206
207
  VectorSearchRequest(snippet.id, snippet.content)
207
208
  for snippet in snippets
208
209
  ]
209
- )
210
+ ):
211
+ pbar.update(len(result))
210
212
 
211
213
  self.log.info("Enriching snippets", num_snippets=len(snippets))
212
- enriched_contents = await self.enrichment_service.enrich(
213
- [snippet.content for snippet in snippets]
214
- )
214
+ enriched_contents = []
215
+ with tqdm(total=len(snippets), leave=False) as pbar:
216
+ async for result in self.enrichment_service.enrich(
217
+ [
218
+ EnrichmentRequest(snippet_id=snippet.id, text=snippet.content)
219
+ for snippet in snippets
220
+ ]
221
+ ):
222
+ snippet = next(s for s in snippets if s.id == result.snippet_id)
223
+ if snippet:
224
+ snippet.content = (
225
+ result.text + "\n\n```\n" + snippet.content + "\n```"
226
+ )
227
+ await self.repository.add_snippet(snippet)
228
+ enriched_contents.append(result)
229
+ pbar.update(1)
215
230
 
216
231
  self.log.info("Creating semantic text index")
217
- with Spinner():
218
- await self.text_search_service.index(
232
+ with tqdm(total=len(snippets), leave=False) as pbar:
233
+ async for result in self.text_search_service.index(
219
234
  [
220
- VectorSearchRequest(snippet.id, enriched_content)
221
- for snippet, enriched_content in zip(
222
- snippets, enriched_contents, strict=True
223
- )
235
+ VectorSearchRequest(snippet.id, snippet.content)
236
+ for snippet in snippets
224
237
  ]
225
- )
226
- # Add the enriched text back to the snippets and write to the database
227
- for snippet, enriched_content in zip(
228
- snippets, enriched_contents, strict=True
229
238
  ):
230
- snippet.content = (
231
- enriched_content + "\n\n```\n" + snippet.content + "\n```"
232
- )
233
- await self.repository.add_snippet(snippet)
239
+ pbar.update(len(result))
234
240
 
235
241
  # Update index timestamp
236
242
  await self.repository.update_index_timestamp(index)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kodit
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Code indexing for better AI code generation
5
5
  Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
6
  Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -1,6 +1,6 @@
1
1
  kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
2
2
  kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
3
- kodit/_version.py,sha256=wD8hnA5gV5UmPkQnpT3xR6V2csgj9K5NEADogbLK79M,511
3
+ kodit/_version.py,sha256=1LUN_sRKOiFInoB6AlW6TYoQMCh1Z4KutwcHNvHcfB0,511
4
4
  kodit/app.py,sha256=qKBWJ0VNSY_M6G3VFfAQ0133q5bnS99cUFD0p396taw,1032
5
5
  kodit/cli.py,sha256=wKFXGUMX-fDLooaK-3po2TBpNNRBwgSD7BRbUddg-_M,11562
6
6
  kodit/config.py,sha256=3yh7hfLSILjZK_qJMhcExwRcrWJ0b5Eb1JjjOvMPJZo,4146
@@ -14,29 +14,29 @@ kodit/bm25/keyword_search_service.py,sha256=aBbWQKgQmi2re3EIHdXFS00n7Wj3b2D0pZsL
14
14
  kodit/bm25/local_bm25.py,sha256=nokrd_xAeqXi3m68X5P1R5KBhRRB1E2L_J6Zgm26PCg,3869
15
15
  kodit/bm25/vectorchord_bm25.py,sha256=0p_FgliaoevB8GLSmzWnV3zUjdcWgCgOKIpLURr7Qfo,6549
16
16
  kodit/embedding/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
17
- kodit/embedding/embedding_factory.py,sha256=-WuXNleQ_mqdw1E4TczjtOawNeaXKAiDPFqN_XX7Mmg,2419
17
+ kodit/embedding/embedding_factory.py,sha256=lFcgqsDxw8L5mygq-TppQ2wtoIA2p2OL7XmtOyX8Omw,2683
18
18
  kodit/embedding/embedding_models.py,sha256=rN90vSs86dYiqoawcp8E9jtwY31JoJXYfaDlsJK7uqc,656
19
19
  kodit/embedding/embedding_repository.py,sha256=-ux3scpBzel8c0pMH9fNOEsSXFIzl-IfgaWrkTb1szo,6907
20
- kodit/embedding/local_vector_search_service.py,sha256=dgMi8hQNUbYEgHnEYmLIpon4yLduoNUpu7k7VP6sOHI,2042
21
- kodit/embedding/vector_search_service.py,sha256=pQJ129QjGrAWOXzqkywmgtDRpy8_gtzYgkivyqF9Vrs,1009
22
- kodit/embedding/vectorchord_vector_search_service.py,sha256=TKNR3HgWHwwWtJ1SsvSaj_BXLJ_uw6Bdr_tpaePMeAA,5383
20
+ kodit/embedding/local_vector_search_service.py,sha256=yZm0ahQQKhfYZ943yxKHp04cairmzgGBUNi5PB_GDbo,3002
21
+ kodit/embedding/vector_search_service.py,sha256=frN9baAlqFmsY3xiv1ZeSgsfhK9FzKPkVR55MEvMV4I,1416
22
+ kodit/embedding/vectorchord_vector_search_service.py,sha256=JQeIl9mtR4E_izOoFD_4ZRfENHNfwoKr16pQkkGoK3o,6884
23
23
  kodit/embedding/embedding_provider/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
24
- kodit/embedding/embedding_provider/embedding_provider.py,sha256=T6AKMWwzEJ1vDe-cEIg-qxdjhUEZ0PKs9YqQMWaLaQ0,1928
25
- kodit/embedding/embedding_provider/hash_embedding_provider.py,sha256=nAhlhh8j8PqqCCbhVl26Y8ntFBm2vJBCtB4X04g5Wwg,2638
26
- kodit/embedding/embedding_provider/local_embedding_provider.py,sha256=WP8lw6XG7v1_5Mw4_rhIOETooYRsxhkwmFaXCqCouQU,1977
27
- kodit/embedding/embedding_provider/openai_embedding_provider.py,sha256=-phz5FKYM_tI3Q4_3SPzjzIOK3k92Uk52TAOTmoVoWI,2722
24
+ kodit/embedding/embedding_provider/embedding_provider.py,sha256=WDHifrsQOnpXwIDzSfau32Eq8z8BF3XNeVYd6X989uc,2841
25
+ kodit/embedding/embedding_provider/hash_embedding_provider.py,sha256=AhGize94EoScyQMhCjo26zlO0eP_m3F_1qvrVmB6MTE,2941
26
+ kodit/embedding/embedding_provider/local_embedding_provider.py,sha256=kqbGd7TW6BUsOq_f_IzPCsD7z8LsFieTOZ7saY11I8o,2877
27
+ kodit/embedding/embedding_provider/openai_embedding_provider.py,sha256=2FTIL34yVstf0NTJNSi-sjk38OJd4Aa66TH5FMPJul0,2425
28
28
  kodit/enrichment/__init__.py,sha256=vBEolHpKaHUhfINX0dSGyAPlvgpLNAer9YzFtdvCB24,18
29
29
  kodit/enrichment/enrichment_factory.py,sha256=AAzvxgjo-FQU5aAm9Zla4DAwUMKGrcw8mQwJsMhIsHY,1566
30
- kodit/enrichment/enrichment_service.py,sha256=87Sd3gGbEMJYb_wVrHG8L1yGIZmQNR7foUS4_y94azI,977
30
+ kodit/enrichment/enrichment_service.py,sha256=z7VrrQ-Jhb-oO26rQCaqlpmkGRlDQGAu7qVsI0cwHak,1310
31
31
  kodit/enrichment/enrichment_provider/__init__.py,sha256=klf8iuLVWX4iRz-DZQauFFNAoJC5CByczh48TBZPW-o,27
32
- kodit/enrichment/enrichment_provider/enrichment_provider.py,sha256=E0H5rq3OENM0yYbA8K_3nSnj5lUHCpoIOqpWLo-2MVU,413
33
- kodit/enrichment/enrichment_provider/local_enrichment_provider.py,sha256=RqwUD0BnwRQ8zlkFNkaKq8d58r33k2jIdnSdf6zla1w,3325
34
- kodit/enrichment/enrichment_provider/openai_enrichment_provider.py,sha256=0Yw7h9RXptoI4bKuqJSKIRQXPUUhNV7eACavgoy_T8s,2874
32
+ kodit/enrichment/enrichment_provider/enrichment_provider.py,sha256=kiDgg2G8G85K4KqwCQKHE_ANybANURPO6NbASf4yAr0,751
33
+ kodit/enrichment/enrichment_provider/local_enrichment_provider.py,sha256=JipvgZwfNvHaECqLJRmQo3W10yb9rOVSrV2U0Jpp4d8,3456
34
+ kodit/enrichment/enrichment_provider/openai_enrichment_provider.py,sha256=xAzbGHJHqGxZxa3yPvHAcPgjOMzQ05qLes0XW6OIdYc,2758
35
35
  kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
36
36
  kodit/indexing/fusion.py,sha256=TZb4fPAedXdEUXzwzOofW98QIOymdbclBOP1KOijuEk,1674
37
37
  kodit/indexing/indexing_models.py,sha256=6NX9HVcj6Pu9ePwHC7n-PWSyAgukpJq0nCNmUIigtbo,1282
38
38
  kodit/indexing/indexing_repository.py,sha256=dqOS0pxKM6bUjMXWqYukAK8XdiD36OnskFASgZRXRQM,6955
39
- kodit/indexing/indexing_service.py,sha256=3hW7vbFyabLEkLU-PRoGR49yVLewANdOKlye4GhR-tw,11467
39
+ kodit/indexing/indexing_service.py,sha256=UD7RKQRkAlpmepl20vcdEgQapwEA2kDJQBmn4_kGWwU,11841
40
40
  kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
41
41
  kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
42
42
  kodit/migrations/env.py,sha256=w1M7OZh-ZeR2dPHS0ByXAUxQjfZQ8xIzMseWuzLDTWw,2469
@@ -64,8 +64,8 @@ kodit/source/source_repository.py,sha256=eme0C3pRqwFZ1ZSbqq4Z6SV9CC6AvRmiOjy3eHQ
64
64
  kodit/source/source_service.py,sha256=E1KPG7TrorqdreJVHxZPx8CVLncOxGEvZ5uDQ6yZugo,5050
65
65
  kodit/util/__init__.py,sha256=bPu6CtqDWCRGU7VgW2_aiQrCBi8G89FS6k1PjvDajJ0,37
66
66
  kodit/util/spinner.py,sha256=R9bzrHtBiIH6IfLbmsIVHL53s8vg-tqW4lwGGALu4dw,1932
67
- kodit-0.2.3.dist-info/METADATA,sha256=ccZ0bl5PCGFNC30XSr-4ljL-JrkTgTfYlqZUlUPYba8,5867
68
- kodit-0.2.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
69
- kodit-0.2.3.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
70
- kodit-0.2.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
71
- kodit-0.2.3.dist-info/RECORD,,
67
+ kodit-0.2.4.dist-info/METADATA,sha256=PLQQVNKVnMyyliP9TEapeXUuog_N1bTFlup6F89B7NU,5867
68
+ kodit-0.2.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
69
+ kodit-0.2.4.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
70
+ kodit-0.2.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
71
+ kodit-0.2.4.dist-info/RECORD,,
File without changes