kodit 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/bm25/local_bm25.py +39 -17
- kodit/bm25/vectorchord_bm25.py +4 -1
- kodit/config.py +21 -24
- kodit/embedding/embedding_factory.py +20 -6
- kodit/embedding/embedding_provider/embedding_provider.py +8 -4
- kodit/embedding/embedding_provider/local_embedding_provider.py +8 -2
- kodit/embedding/embedding_provider/openai_embedding_provider.py +3 -1
- kodit/embedding/local_vector_search_service.py +4 -0
- kodit/embedding/vectorchord_vector_search_service.py +10 -2
- kodit/enrichment/enrichment_factory.py +23 -7
- kodit/enrichment/enrichment_provider/local_enrichment_provider.py +53 -24
- kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +5 -1
- kodit/indexing/indexing_service.py +4 -0
- kodit/migrations/versions/42e836b21102_add_authors.py +64 -0
- kodit/source/git.py +16 -0
- kodit/source/ignore.py +53 -0
- kodit/source/source_models.py +40 -2
- kodit/source/source_repository.py +51 -16
- kodit/source/source_service.py +101 -51
- {kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/METADATA +4 -1
- {kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/RECORD +25 -22
- {kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/WHEEL +0 -0
- {kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/entry_points.txt +0 -0
- {kodit-0.2.0.dist-info → kodit-0.2.2.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
kodit/bm25/local_bm25.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
"""Locally hosted BM25 service primarily for use with SQLite."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import json
|
|
4
6
|
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
5
8
|
|
|
6
9
|
import aiofiles
|
|
7
|
-
import bm25s
|
|
8
10
|
import Stemmer
|
|
9
11
|
import structlog
|
|
10
|
-
from bm25s.tokenization import Tokenized
|
|
11
12
|
|
|
12
13
|
from kodit.bm25.keyword_search_service import (
|
|
13
14
|
BM25Document,
|
|
@@ -15,6 +16,11 @@ from kodit.bm25.keyword_search_service import (
|
|
|
15
16
|
KeywordSearchProvider,
|
|
16
17
|
)
|
|
17
18
|
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
import bm25s
|
|
21
|
+
from bm25s.tokenization import Tokenized
|
|
22
|
+
|
|
23
|
+
|
|
18
24
|
SNIPPET_IDS_FILE = "snippet_ids.jsonl"
|
|
19
25
|
|
|
20
26
|
|
|
@@ -26,19 +32,28 @@ class BM25Service(KeywordSearchProvider):
|
|
|
26
32
|
self.log = structlog.get_logger(__name__)
|
|
27
33
|
self.index_path = data_dir / "bm25s_index"
|
|
28
34
|
self.snippet_ids: list[int] = []
|
|
29
|
-
try:
|
|
30
|
-
self.log.debug("Loading BM25 index")
|
|
31
|
-
self.retriever = bm25s.BM25.load(self.index_path, mmap=True)
|
|
32
|
-
with Path(self.index_path / SNIPPET_IDS_FILE).open() as f:
|
|
33
|
-
self.snippet_ids = json.load(f)
|
|
34
|
-
except FileNotFoundError:
|
|
35
|
-
self.log.debug("BM25 index not found, creating new index")
|
|
36
|
-
self.retriever = bm25s.BM25()
|
|
37
|
-
|
|
38
35
|
self.stemmer = Stemmer.Stemmer("english")
|
|
36
|
+
self.__retriever: bm25s.BM25 | None = None
|
|
37
|
+
|
|
38
|
+
def _retriever(self) -> bm25s.BM25:
|
|
39
|
+
"""Get the BM25 retriever."""
|
|
40
|
+
if self.__retriever is None:
|
|
41
|
+
import bm25s
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
self.log.debug("Loading BM25 index")
|
|
45
|
+
self.__retriever = bm25s.BM25.load(self.index_path, mmap=True)
|
|
46
|
+
with Path(self.index_path / SNIPPET_IDS_FILE).open() as f:
|
|
47
|
+
self.snippet_ids = json.load(f)
|
|
48
|
+
except FileNotFoundError:
|
|
49
|
+
self.log.debug("BM25 index not found, creating new index")
|
|
50
|
+
self.__retriever = bm25s.BM25()
|
|
51
|
+
return self.__retriever
|
|
39
52
|
|
|
40
53
|
def _tokenize(self, corpus: list[str]) -> list[list[str]] | Tokenized:
|
|
41
|
-
|
|
54
|
+
from bm25s import tokenize
|
|
55
|
+
|
|
56
|
+
return tokenize(
|
|
42
57
|
corpus,
|
|
43
58
|
stopwords="en",
|
|
44
59
|
stemmer=self.stemmer,
|
|
@@ -49,10 +64,13 @@ class BM25Service(KeywordSearchProvider):
|
|
|
49
64
|
async def index(self, corpus: list[BM25Document]) -> None:
|
|
50
65
|
"""Index a new corpus."""
|
|
51
66
|
self.log.debug("Indexing corpus")
|
|
67
|
+
if not corpus or len(corpus) == 0:
|
|
68
|
+
self.log.warning("Corpus is empty, skipping bm25 index")
|
|
69
|
+
return
|
|
70
|
+
|
|
52
71
|
vocab = self._tokenize([doc.text for doc in corpus])
|
|
53
|
-
self.
|
|
54
|
-
self.
|
|
55
|
-
self.retriever.save(self.index_path)
|
|
72
|
+
self._retriever().index(vocab, show_progress=False)
|
|
73
|
+
self._retriever().save(self.index_path)
|
|
56
74
|
self.snippet_ids = self.snippet_ids + [doc.snippet_id for doc in corpus]
|
|
57
75
|
async with aiofiles.open(self.index_path / SNIPPET_IDS_FILE, "w") as f:
|
|
58
76
|
await f.write(json.dumps(self.snippet_ids))
|
|
@@ -63,8 +81,12 @@ class BM25Service(KeywordSearchProvider):
|
|
|
63
81
|
self.log.warning("Top k is 0, returning empty list")
|
|
64
82
|
return []
|
|
65
83
|
|
|
84
|
+
# Check that the index has data
|
|
85
|
+
if not hasattr(self._retriever(), "scores"):
|
|
86
|
+
return []
|
|
87
|
+
|
|
66
88
|
# Get the number of documents in the index
|
|
67
|
-
num_docs = self.
|
|
89
|
+
num_docs = self._retriever().scores["num_docs"]
|
|
68
90
|
if num_docs == 0:
|
|
69
91
|
return []
|
|
70
92
|
|
|
@@ -80,7 +102,7 @@ class BM25Service(KeywordSearchProvider):
|
|
|
80
102
|
|
|
81
103
|
self.log.debug("Query tokens", query_tokens=query_tokens)
|
|
82
104
|
|
|
83
|
-
results, scores = self.
|
|
105
|
+
results, scores = self._retriever().retrieve(
|
|
84
106
|
query_tokens=query_tokens,
|
|
85
107
|
corpus=self.snippet_ids,
|
|
86
108
|
k=top_k,
|
kodit/bm25/vectorchord_bm25.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
|
+
import structlog
|
|
5
6
|
from sqlalchemy import Result, TextClause, bindparam, text
|
|
6
7
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
8
|
|
|
@@ -93,6 +94,7 @@ class VectorChordBM25(KeywordSearchProvider):
|
|
|
93
94
|
"""Initialize the VectorChord BM25."""
|
|
94
95
|
self.__session = session
|
|
95
96
|
self._initialized = False
|
|
97
|
+
self.log = structlog.get_logger(__name__)
|
|
96
98
|
|
|
97
99
|
async def _initialize(self) -> None:
|
|
98
100
|
"""Initialize the VectorChord environment."""
|
|
@@ -149,7 +151,8 @@ class VectorChordBM25(KeywordSearchProvider):
|
|
|
149
151
|
if doc.snippet_id is not None and doc.text is not None and doc.text != ""
|
|
150
152
|
]
|
|
151
153
|
|
|
152
|
-
if not corpus:
|
|
154
|
+
if not corpus or len(corpus) == 0:
|
|
155
|
+
self.log.warning("Corpus is empty, skipping bm25 index")
|
|
153
156
|
return
|
|
154
157
|
|
|
155
158
|
# Execute inserts
|
kodit/config.py
CHANGED
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
"""Global configuration for the kodit project."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import asyncio
|
|
4
|
-
from collections.abc import Callable, Coroutine
|
|
5
6
|
from functools import wraps
|
|
6
7
|
from pathlib import Path
|
|
7
|
-
from typing import Any, Literal, TypeVar
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Literal, TypeVar
|
|
8
9
|
|
|
9
10
|
import click
|
|
10
|
-
from openai import AsyncOpenAI
|
|
11
11
|
from pydantic import BaseModel, Field
|
|
12
12
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
13
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Callable, Coroutine
|
|
16
|
+
|
|
17
|
+
|
|
14
18
|
from kodit.database import Database
|
|
15
19
|
|
|
16
20
|
DEFAULT_BASE_DIR = Path.home() / ".kodit"
|
|
@@ -20,13 +24,16 @@ DEFAULT_LOG_FORMAT = "pretty"
|
|
|
20
24
|
DEFAULT_DISABLE_TELEMETRY = False
|
|
21
25
|
T = TypeVar("T")
|
|
22
26
|
|
|
27
|
+
EndpointType = Literal["openai"]
|
|
28
|
+
|
|
23
29
|
|
|
24
30
|
class Endpoint(BaseModel):
|
|
25
31
|
"""Endpoint provides configuration for an AI service."""
|
|
26
32
|
|
|
27
|
-
type:
|
|
28
|
-
api_key: str | None = None
|
|
33
|
+
type: EndpointType | None = None
|
|
29
34
|
base_url: str | None = None
|
|
35
|
+
model: str | None = None
|
|
36
|
+
api_key: str | None = None
|
|
30
37
|
|
|
31
38
|
|
|
32
39
|
class Search(BaseModel):
|
|
@@ -52,15 +59,20 @@ class AppContext(BaseSettings):
|
|
|
52
59
|
log_format: str = Field(default=DEFAULT_LOG_FORMAT)
|
|
53
60
|
disable_telemetry: bool = Field(default=DEFAULT_DISABLE_TELEMETRY)
|
|
54
61
|
default_endpoint: Endpoint | None = Field(
|
|
55
|
-
default=
|
|
56
|
-
type="openai",
|
|
57
|
-
base_url="https://api.openai.com/v1",
|
|
58
|
-
),
|
|
62
|
+
default=None,
|
|
59
63
|
description=(
|
|
60
64
|
"Default endpoint to use for all AI interactions "
|
|
61
65
|
"(can be overridden by task-specific configuration)."
|
|
62
66
|
),
|
|
63
67
|
)
|
|
68
|
+
embedding_endpoint: Endpoint | None = Field(
|
|
69
|
+
default=None,
|
|
70
|
+
description="Endpoint to use for embedding.",
|
|
71
|
+
)
|
|
72
|
+
enrichment_endpoint: Endpoint | None = Field(
|
|
73
|
+
default=None,
|
|
74
|
+
description="Endpoint to use for enrichment.",
|
|
75
|
+
)
|
|
64
76
|
default_search: Search = Field(
|
|
65
77
|
default=Search(),
|
|
66
78
|
)
|
|
@@ -90,21 +102,6 @@ class AppContext(BaseSettings):
|
|
|
90
102
|
await self._db.run_migrations(self.db_url)
|
|
91
103
|
return self._db
|
|
92
104
|
|
|
93
|
-
def get_default_openai_client(self) -> AsyncOpenAI | None:
|
|
94
|
-
"""Get the default OpenAI client, if it is configured."""
|
|
95
|
-
endpoint = self.default_endpoint
|
|
96
|
-
if not (
|
|
97
|
-
endpoint
|
|
98
|
-
and endpoint.type == "openai"
|
|
99
|
-
and endpoint.api_key
|
|
100
|
-
and endpoint.base_url
|
|
101
|
-
):
|
|
102
|
-
return None
|
|
103
|
-
return AsyncOpenAI(
|
|
104
|
-
api_key=endpoint.api_key,
|
|
105
|
-
base_url=endpoint.base_url,
|
|
106
|
-
)
|
|
107
|
-
|
|
108
105
|
|
|
109
106
|
with_app_context = click.make_pass_decorator(AppContext)
|
|
110
107
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
4
|
|
|
5
|
-
from kodit.config import AppContext
|
|
5
|
+
from kodit.config import AppContext, Endpoint
|
|
6
6
|
from kodit.embedding.embedding_provider.local_embedding_provider import (
|
|
7
7
|
CODE,
|
|
8
8
|
LocalEmbeddingProvider,
|
|
@@ -16,19 +16,33 @@ from kodit.embedding.vector_search_service import (
|
|
|
16
16
|
VectorSearchService,
|
|
17
17
|
)
|
|
18
18
|
from kodit.embedding.vectorchord_vector_search_service import (
|
|
19
|
+
TaskName,
|
|
19
20
|
VectorChordVectorSearchService,
|
|
20
21
|
)
|
|
21
22
|
|
|
22
23
|
|
|
24
|
+
def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
|
|
25
|
+
"""Get the endpoint configuration for the embedding service."""
|
|
26
|
+
return app_context.embedding_endpoint or app_context.default_endpoint or None
|
|
27
|
+
|
|
28
|
+
|
|
23
29
|
def embedding_factory(
|
|
24
|
-
task_name:
|
|
30
|
+
task_name: TaskName, app_context: AppContext, session: AsyncSession
|
|
25
31
|
) -> VectorSearchService:
|
|
26
32
|
"""Create an embedding service."""
|
|
27
33
|
embedding_repository = EmbeddingRepository(session=session)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
if
|
|
31
|
-
|
|
34
|
+
endpoint = _get_endpoint_configuration(app_context)
|
|
35
|
+
|
|
36
|
+
if endpoint and endpoint.type == "openai":
|
|
37
|
+
from openai import AsyncOpenAI
|
|
38
|
+
|
|
39
|
+
embedding_provider = OpenAIEmbeddingProvider(
|
|
40
|
+
openai_client=AsyncOpenAI(
|
|
41
|
+
api_key=endpoint.api_key or "default",
|
|
42
|
+
base_url=endpoint.base_url or "https://api.openai.com/v1",
|
|
43
|
+
),
|
|
44
|
+
model_name=endpoint.model or "text-embedding-3-small",
|
|
45
|
+
)
|
|
32
46
|
else:
|
|
33
47
|
embedding_provider = LocalEmbeddingProvider(CODE)
|
|
34
48
|
|
|
@@ -23,7 +23,11 @@ class EmbeddingProvider(ABC):
|
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
def split_sub_batches(
|
|
26
|
+
def split_sub_batches(
|
|
27
|
+
encoding: tiktoken.Encoding,
|
|
28
|
+
data: list[str],
|
|
29
|
+
max_context_window: int = OPENAI_MAX_EMBEDDING_SIZE,
|
|
30
|
+
) -> list[list[str]]:
|
|
27
31
|
"""Split a list of strings into smaller sub-batches."""
|
|
28
32
|
log = structlog.get_logger(__name__)
|
|
29
33
|
result = []
|
|
@@ -37,10 +41,10 @@ def split_sub_batches(encoding: tiktoken.Encoding, data: list[str]) -> list[list
|
|
|
37
41
|
next_item = data_to_process[0]
|
|
38
42
|
item_tokens = len(encoding.encode(next_item))
|
|
39
43
|
|
|
40
|
-
if item_tokens >
|
|
44
|
+
if item_tokens > max_context_window:
|
|
41
45
|
# Loop around trying to truncate the snippet until it fits in the max
|
|
42
46
|
# embedding size
|
|
43
|
-
while item_tokens >
|
|
47
|
+
while item_tokens > max_context_window:
|
|
44
48
|
next_item = next_item[:-1]
|
|
45
49
|
item_tokens = len(encoding.encode(next_item))
|
|
46
50
|
|
|
@@ -48,7 +52,7 @@ def split_sub_batches(encoding: tiktoken.Encoding, data: list[str]) -> list[list
|
|
|
48
52
|
|
|
49
53
|
log.warning("Truncated snippet", snippet=next_item)
|
|
50
54
|
|
|
51
|
-
if current_tokens + item_tokens >
|
|
55
|
+
if current_tokens + item_tokens > max_context_window:
|
|
52
56
|
break
|
|
53
57
|
|
|
54
58
|
next_batch.append(data_to_process.pop(0))
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
"""Local embedding service."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import os
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
4
7
|
|
|
5
8
|
import structlog
|
|
6
9
|
import tiktoken
|
|
7
|
-
from sentence_transformers import SentenceTransformer
|
|
8
10
|
from tqdm import tqdm
|
|
9
11
|
|
|
10
12
|
from kodit.embedding.embedding_provider.embedding_provider import (
|
|
@@ -13,6 +15,9 @@ from kodit.embedding.embedding_provider.embedding_provider import (
|
|
|
13
15
|
split_sub_batches,
|
|
14
16
|
)
|
|
15
17
|
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from sentence_transformers import SentenceTransformer
|
|
20
|
+
|
|
16
21
|
TINY = "tiny"
|
|
17
22
|
CODE = "code"
|
|
18
23
|
TEST = "test"
|
|
@@ -38,10 +43,11 @@ class LocalEmbeddingProvider(EmbeddingProvider):
|
|
|
38
43
|
"""Get the embedding model."""
|
|
39
44
|
if self.embedding_model is None:
|
|
40
45
|
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Avoid warnings
|
|
46
|
+
from sentence_transformers import SentenceTransformer
|
|
47
|
+
|
|
41
48
|
self.embedding_model = SentenceTransformer(
|
|
42
49
|
self.model_name,
|
|
43
50
|
trust_remote_code=True,
|
|
44
|
-
device="cpu", # Force CPU so we don't have to install accelerate, etc.
|
|
45
51
|
)
|
|
46
52
|
return self.embedding_model
|
|
47
53
|
|
|
@@ -27,7 +27,9 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
|
|
|
27
27
|
self.log = structlog.get_logger(__name__)
|
|
28
28
|
self.openai_client = openai_client
|
|
29
29
|
self.model_name = model_name
|
|
30
|
-
self.encoding = tiktoken.encoding_for_model(
|
|
30
|
+
self.encoding = tiktoken.encoding_for_model(
|
|
31
|
+
"text-embedding-3-small"
|
|
32
|
+
) # Sensible default
|
|
31
33
|
|
|
32
34
|
async def embed(self, data: list[str]) -> list[Vector]:
|
|
33
35
|
"""Embed a list of documents."""
|
|
@@ -29,6 +29,10 @@ class LocalVectorSearchService(VectorSearchService):
|
|
|
29
29
|
|
|
30
30
|
async def index(self, data: list[VectorSearchRequest]) -> None:
|
|
31
31
|
"""Embed a list of documents."""
|
|
32
|
+
if not data or len(data) == 0:
|
|
33
|
+
self.log.warning("Embedding data is empty, skipping embedding")
|
|
34
|
+
return
|
|
35
|
+
|
|
32
36
|
embeddings = await self.embedding_provider.embed([i.text for i in data])
|
|
33
37
|
for i, x in zip(data, embeddings, strict=False):
|
|
34
38
|
await self.embedding_repository.create_embedding(
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""Vectorchord vector search."""
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any, Literal
|
|
4
4
|
|
|
5
|
+
import structlog
|
|
5
6
|
from sqlalchemy import Result, TextClause, text
|
|
6
7
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
8
|
|
|
@@ -51,13 +52,15 @@ ORDER BY score ASC
|
|
|
51
52
|
LIMIT :top_k;
|
|
52
53
|
"""
|
|
53
54
|
|
|
55
|
+
TaskName = Literal["code", "text"]
|
|
56
|
+
|
|
54
57
|
|
|
55
58
|
class VectorChordVectorSearchService(VectorSearchService):
|
|
56
59
|
"""VectorChord vector search."""
|
|
57
60
|
|
|
58
61
|
def __init__(
|
|
59
62
|
self,
|
|
60
|
-
task_name:
|
|
63
|
+
task_name: TaskName,
|
|
61
64
|
session: AsyncSession,
|
|
62
65
|
embedding_provider: EmbeddingProvider,
|
|
63
66
|
) -> None:
|
|
@@ -67,6 +70,7 @@ class VectorChordVectorSearchService(VectorSearchService):
|
|
|
67
70
|
self._initialized = False
|
|
68
71
|
self.table_name = f"vectorchord_{task_name}_embeddings"
|
|
69
72
|
self.index_name = f"{self.table_name}_idx"
|
|
73
|
+
self.log = structlog.get_logger(__name__)
|
|
70
74
|
|
|
71
75
|
async def _initialize(self) -> None:
|
|
72
76
|
"""Initialize the VectorChord environment."""
|
|
@@ -128,6 +132,10 @@ class VectorChordVectorSearchService(VectorSearchService):
|
|
|
128
132
|
|
|
129
133
|
async def index(self, data: list[VectorSearchRequest]) -> None:
|
|
130
134
|
"""Embed a list of documents."""
|
|
135
|
+
if not data or len(data) == 0:
|
|
136
|
+
self.log.warning("Embedding data is empty, skipping embedding")
|
|
137
|
+
return
|
|
138
|
+
|
|
131
139
|
embeddings = await self.embedding_provider.embed([doc.text for doc in data])
|
|
132
140
|
# Execute inserts
|
|
133
141
|
await self._execute(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Embedding service."""
|
|
2
2
|
|
|
3
|
-
from kodit.config import AppContext
|
|
3
|
+
from kodit.config import AppContext, Endpoint
|
|
4
4
|
from kodit.enrichment.enrichment_provider.local_enrichment_provider import (
|
|
5
5
|
LocalEnrichmentProvider,
|
|
6
6
|
)
|
|
@@ -13,11 +13,27 @@ from kodit.enrichment.enrichment_service import (
|
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
|
|
17
|
+
"""Get the endpoint configuration for the enrichment service."""
|
|
18
|
+
return app_context.enrichment_endpoint or app_context.default_endpoint or None
|
|
19
|
+
|
|
20
|
+
|
|
16
21
|
def enrichment_factory(app_context: AppContext) -> EnrichmentService:
|
|
17
|
-
"""Create an
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
+
"""Create an enrichment service."""
|
|
23
|
+
endpoint = _get_endpoint_configuration(app_context)
|
|
24
|
+
endpoint = app_context.enrichment_endpoint or app_context.default_endpoint or None
|
|
25
|
+
|
|
26
|
+
if endpoint and endpoint.type == "openai":
|
|
27
|
+
from openai import AsyncOpenAI
|
|
28
|
+
|
|
29
|
+
enrichment_provider = OpenAIEnrichmentProvider(
|
|
30
|
+
openai_client=AsyncOpenAI(
|
|
31
|
+
api_key=endpoint.api_key or "default",
|
|
32
|
+
base_url=endpoint.base_url or "https://api.openai.com/v1",
|
|
33
|
+
),
|
|
34
|
+
model_name=endpoint.model or "gpt-4o-mini",
|
|
35
|
+
)
|
|
36
|
+
else:
|
|
37
|
+
enrichment_provider = LocalEnrichmentProvider()
|
|
22
38
|
|
|
23
|
-
return LLMEnrichmentService(
|
|
39
|
+
return LLMEnrichmentService(enrichment_provider=enrichment_provider)
|
|
@@ -3,61 +3,90 @@
|
|
|
3
3
|
import os
|
|
4
4
|
|
|
5
5
|
import structlog
|
|
6
|
-
|
|
7
|
-
from
|
|
6
|
+
import tiktoken
|
|
7
|
+
from tqdm import tqdm
|
|
8
8
|
|
|
9
|
+
from kodit.embedding.embedding_provider.embedding_provider import split_sub_batches
|
|
9
10
|
from kodit.enrichment.enrichment_provider.enrichment_provider import (
|
|
10
11
|
ENRICHMENT_SYSTEM_PROMPT,
|
|
11
12
|
EnrichmentProvider,
|
|
12
13
|
)
|
|
13
14
|
|
|
15
|
+
DEFAULT_ENRICHMENT_MODEL = "Qwen/Qwen3-0.6B"
|
|
16
|
+
DEFAULT_CONTEXT_WINDOW_SIZE = 2048 # Small so it works even on low-powered devices
|
|
17
|
+
|
|
14
18
|
|
|
15
19
|
class LocalEnrichmentProvider(EnrichmentProvider):
|
|
16
20
|
"""Local embedder."""
|
|
17
21
|
|
|
18
|
-
def __init__(
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
model_name: str = DEFAULT_ENRICHMENT_MODEL,
|
|
25
|
+
context_window: int = DEFAULT_CONTEXT_WINDOW_SIZE,
|
|
26
|
+
) -> None:
|
|
19
27
|
"""Initialize the local enrichment provider."""
|
|
20
28
|
self.log = structlog.get_logger(__name__)
|
|
21
29
|
self.model_name = model_name
|
|
30
|
+
self.context_window = context_window
|
|
22
31
|
self.model = None
|
|
23
32
|
self.tokenizer = None
|
|
33
|
+
self.encoding = tiktoken.encoding_for_model("text-embedding-3-small")
|
|
24
34
|
|
|
25
35
|
async def enrich(self, data: list[str]) -> list[str]:
|
|
26
36
|
"""Enrich a list of strings."""
|
|
37
|
+
if not data or len(data) == 0:
|
|
38
|
+
self.log.warning("Data is empty, skipping enrichment")
|
|
39
|
+
return []
|
|
40
|
+
|
|
41
|
+
from transformers.models.auto.modeling_auto import (
|
|
42
|
+
AutoModelForCausalLM,
|
|
43
|
+
)
|
|
44
|
+
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
|
45
|
+
|
|
27
46
|
if self.tokenizer is None:
|
|
28
|
-
self.tokenizer = AutoTokenizer.from_pretrained(
|
|
47
|
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
|
48
|
+
self.model_name, padding_side="left"
|
|
49
|
+
)
|
|
29
50
|
if self.model is None:
|
|
30
51
|
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Avoid warnings
|
|
31
52
|
self.model = AutoModelForCausalLM.from_pretrained(
|
|
32
53
|
self.model_name,
|
|
33
54
|
torch_dtype="auto",
|
|
34
55
|
trust_remote_code=True,
|
|
56
|
+
device_map="auto",
|
|
35
57
|
)
|
|
36
58
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
text = self.tokenizer.apply_chat_template(
|
|
45
|
-
messages,
|
|
59
|
+
# Prepare prompts
|
|
60
|
+
prompts = [
|
|
61
|
+
self.tokenizer.apply_chat_template(
|
|
62
|
+
[
|
|
63
|
+
{"role": "system", "content": ENRICHMENT_SYSTEM_PROMPT},
|
|
64
|
+
{"role": "user", "content": snippet},
|
|
65
|
+
],
|
|
46
66
|
tokenize=False,
|
|
47
67
|
add_generation_prompt=True,
|
|
48
68
|
enable_thinking=False,
|
|
49
69
|
)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
)
|
|
70
|
+
for snippet in data
|
|
71
|
+
]
|
|
53
72
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
73
|
+
# Batch prompts using split_sub_batches
|
|
74
|
+
batched_prompts = split_sub_batches(
|
|
75
|
+
self.encoding, prompts, max_context_window=self.context_window
|
|
76
|
+
)
|
|
77
|
+
results = []
|
|
78
|
+
for batch in tqdm(batched_prompts, leave=False, total=len(batched_prompts)):
|
|
79
|
+
model_inputs = self.tokenizer(
|
|
80
|
+
batch, return_tensors="pt", padding=True, truncation=True
|
|
81
|
+
).to(self.model.device)
|
|
82
|
+
generated_ids = self.model.generate(
|
|
83
|
+
**model_inputs, max_new_tokens=self.context_window
|
|
59
84
|
)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
85
|
+
# For each prompt in the batch, decode only the generated part
|
|
86
|
+
for i, input_ids in enumerate(model_inputs["input_ids"]):
|
|
87
|
+
output_ids = generated_ids[i][len(input_ids) :].tolist()
|
|
88
|
+
content = self.tokenizer.decode(
|
|
89
|
+
output_ids, skip_special_tokens=True
|
|
90
|
+
).strip("\n")
|
|
91
|
+
results.append(content)
|
|
63
92
|
return results
|
|
@@ -27,10 +27,14 @@ class OpenAIEnrichmentProvider(EnrichmentProvider):
|
|
|
27
27
|
self.log = structlog.get_logger(__name__)
|
|
28
28
|
self.openai_client = openai_client
|
|
29
29
|
self.model_name = model_name
|
|
30
|
-
self.encoding = tiktoken.encoding_for_model(
|
|
30
|
+
self.encoding = tiktoken.encoding_for_model("gpt-4o-mini") # Approximation
|
|
31
31
|
|
|
32
32
|
async def enrich(self, data: list[str]) -> list[str]:
|
|
33
33
|
"""Enrich a list of documents."""
|
|
34
|
+
if not data or len(data) == 0:
|
|
35
|
+
self.log.warning("Data is empty, skipping enrichment")
|
|
36
|
+
return []
|
|
37
|
+
|
|
34
38
|
# Process batches in parallel with a semaphore to limit concurrent requests
|
|
35
39
|
sem = asyncio.Semaphore(OPENAI_NUM_PARALLEL_TASKS)
|
|
36
40
|
|
|
@@ -289,6 +289,10 @@ class IndexService:
|
|
|
289
289
|
|
|
290
290
|
"""
|
|
291
291
|
files = await self.repository.files_for_index(index_id)
|
|
292
|
+
if not files:
|
|
293
|
+
self.log.warning("No files to create snippets for")
|
|
294
|
+
return
|
|
295
|
+
|
|
292
296
|
for file in tqdm(files, total=len(files), leave=False):
|
|
293
297
|
# Skip unsupported file types
|
|
294
298
|
if file.mime_type in MIME_BLACKLIST:
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# ruff: noqa
|
|
2
|
+
"""add authors
|
|
3
|
+
|
|
4
|
+
Revision ID: 42e836b21102
|
|
5
|
+
Revises: c3f5137d30f5
|
|
6
|
+
Create Date: 2025-06-13 14:48:50.152940
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Sequence, Union
|
|
11
|
+
|
|
12
|
+
from alembic import op
|
|
13
|
+
import sqlalchemy as sa
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# revision identifiers, used by Alembic.
|
|
17
|
+
revision: str = '42e836b21102'
|
|
18
|
+
down_revision: Union[str, None] = 'c3f5137d30f5'
|
|
19
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
20
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def upgrade() -> None:
|
|
24
|
+
"""Upgrade schema."""
|
|
25
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
26
|
+
op.create_table('authors',
|
|
27
|
+
sa.Column('name', sa.String(length=255), nullable=False),
|
|
28
|
+
sa.Column('email', sa.String(length=255), nullable=False),
|
|
29
|
+
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
|
|
30
|
+
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
|
|
31
|
+
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
|
|
32
|
+
sa.PrimaryKeyConstraint('id')
|
|
33
|
+
)
|
|
34
|
+
op.create_index(op.f('ix_authors_email'), 'authors', ['email'], unique=True)
|
|
35
|
+
op.create_index(op.f('ix_authors_name'), 'authors', ['name'], unique=True)
|
|
36
|
+
op.create_table('author_file_mappings',
|
|
37
|
+
sa.Column('author_id', sa.Integer(), nullable=False),
|
|
38
|
+
sa.Column('file_id', sa.Integer(), nullable=False),
|
|
39
|
+
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
|
|
40
|
+
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
|
|
41
|
+
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
|
|
42
|
+
sa.ForeignKeyConstraint(['author_id'], ['authors.id'], ),
|
|
43
|
+
sa.ForeignKeyConstraint(['file_id'], ['files.id'], ),
|
|
44
|
+
sa.PrimaryKeyConstraint('id')
|
|
45
|
+
)
|
|
46
|
+
op.add_column('files', sa.Column('extension', sa.String(length=255), nullable=False))
|
|
47
|
+
op.create_index(op.f('ix_files_extension'), 'files', ['extension'], unique=False)
|
|
48
|
+
op.add_column('sources', sa.Column('type', sa.Enum('UNKNOWN', 'FOLDER', 'GIT', name='sourcetype'), nullable=False))
|
|
49
|
+
op.create_index(op.f('ix_sources_type'), 'sources', ['type'], unique=False)
|
|
50
|
+
# ### end Alembic commands ###
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def downgrade() -> None:
|
|
54
|
+
"""Downgrade schema."""
|
|
55
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
56
|
+
op.drop_index(op.f('ix_sources_type'), table_name='sources')
|
|
57
|
+
op.drop_column('sources', 'type')
|
|
58
|
+
op.drop_index(op.f('ix_files_extension'), table_name='files')
|
|
59
|
+
op.drop_column('files', 'extension')
|
|
60
|
+
op.drop_table('author_file_mappings')
|
|
61
|
+
op.drop_index(op.f('ix_authors_name'), table_name='authors')
|
|
62
|
+
op.drop_index(op.f('ix_authors_email'), table_name='authors')
|
|
63
|
+
op.drop_table('authors')
|
|
64
|
+
# ### end Alembic commands ###
|
kodit/source/git.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Git utilities."""
|
|
2
|
+
|
|
3
|
+
import tempfile
|
|
4
|
+
|
|
5
|
+
import git
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def is_valid_clone_target(target: str) -> bool:
|
|
9
|
+
"""Return True if the target is clonable."""
|
|
10
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
11
|
+
try:
|
|
12
|
+
git.Repo.clone_from(target, temp_dir)
|
|
13
|
+
except git.GitCommandError:
|
|
14
|
+
return False
|
|
15
|
+
else:
|
|
16
|
+
return True
|
kodit/source/ignore.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Ignore patterns."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import git
|
|
6
|
+
import pathspec
|
|
7
|
+
|
|
8
|
+
from kodit.source.git import is_valid_clone_target
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class IgnorePatterns:
|
|
12
|
+
"""Ignore patterns."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, base_dir: Path) -> None:
|
|
15
|
+
"""Initialize the ignore patterns."""
|
|
16
|
+
if not base_dir.is_dir():
|
|
17
|
+
msg = f"Base directory is not a directory: {base_dir}"
|
|
18
|
+
raise ValueError(msg)
|
|
19
|
+
|
|
20
|
+
self.base_dir = base_dir
|
|
21
|
+
|
|
22
|
+
# Check if the base_dir is a valid git repository
|
|
23
|
+
self.git_repo = None
|
|
24
|
+
if is_valid_clone_target(str(base_dir)):
|
|
25
|
+
self.git_repo = git.Repo(base_dir)
|
|
26
|
+
|
|
27
|
+
def should_ignore(self, path: Path) -> bool:
|
|
28
|
+
"""Check if a path should be ignored."""
|
|
29
|
+
if path.is_dir():
|
|
30
|
+
return False
|
|
31
|
+
|
|
32
|
+
# Get the path relative to the base_dir
|
|
33
|
+
relative_path = path.relative_to(self.base_dir)
|
|
34
|
+
|
|
35
|
+
# If this file is _part_ of a .git directory, then it should be ignored
|
|
36
|
+
if relative_path.as_posix().startswith(".git"):
|
|
37
|
+
return True
|
|
38
|
+
|
|
39
|
+
# If it is a git repository, then we need to check if the file is ignored
|
|
40
|
+
if self.git_repo and len(self.git_repo.ignored(path)) > 0:
|
|
41
|
+
return True
|
|
42
|
+
|
|
43
|
+
# If the repo has a .noindex file
|
|
44
|
+
noindex_path = Path(self.base_dir / ".noindex")
|
|
45
|
+
if noindex_path.exists():
|
|
46
|
+
with noindex_path.open() as f:
|
|
47
|
+
patterns = [line.strip() for line in f if line.strip()]
|
|
48
|
+
if patterns:
|
|
49
|
+
spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
|
50
|
+
if spec.match_file(relative_path.as_posix()):
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
return False
|
kodit/source/source_models.py
CHANGED
|
@@ -5,7 +5,10 @@ It includes models for tracking different types of sources (git repositories and
|
|
|
5
5
|
folders) and their relationships.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
import datetime
|
|
9
|
+
from enum import Enum as EnumType
|
|
10
|
+
|
|
11
|
+
from sqlalchemy import Enum, ForeignKey, Integer, String
|
|
9
12
|
from sqlalchemy.orm import Mapped, mapped_column
|
|
10
13
|
|
|
11
14
|
from kodit.database import Base, CommonMixin
|
|
@@ -14,6 +17,14 @@ from kodit.database import Base, CommonMixin
|
|
|
14
17
|
__all__ = ["File", "Source"]
|
|
15
18
|
|
|
16
19
|
|
|
20
|
+
class SourceType(EnumType):
|
|
21
|
+
"""The type of source."""
|
|
22
|
+
|
|
23
|
+
UNKNOWN = 0
|
|
24
|
+
FOLDER = 1
|
|
25
|
+
GIT = 2
|
|
26
|
+
|
|
27
|
+
|
|
17
28
|
class Source(Base, CommonMixin):
|
|
18
29
|
"""Base model for tracking code sources.
|
|
19
30
|
|
|
@@ -32,12 +43,34 @@ class Source(Base, CommonMixin):
|
|
|
32
43
|
__tablename__ = "sources"
|
|
33
44
|
uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
|
|
34
45
|
cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
46
|
+
type: Mapped[SourceType] = mapped_column(
|
|
47
|
+
Enum(SourceType), default=SourceType.UNKNOWN, index=True
|
|
48
|
+
)
|
|
35
49
|
|
|
36
|
-
def __init__(self, uri: str, cloned_path: str) -> None:
|
|
50
|
+
def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
|
|
37
51
|
"""Initialize a new Source instance for typing purposes."""
|
|
38
52
|
super().__init__()
|
|
39
53
|
self.uri = uri
|
|
40
54
|
self.cloned_path = cloned_path
|
|
55
|
+
self.type = source_type
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class Author(Base, CommonMixin):
|
|
59
|
+
"""Author model."""
|
|
60
|
+
|
|
61
|
+
__tablename__ = "authors"
|
|
62
|
+
|
|
63
|
+
name: Mapped[str] = mapped_column(String(255), index=True, unique=True)
|
|
64
|
+
email: Mapped[str] = mapped_column(String(255), index=True, unique=True)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class AuthorFileMapping(Base, CommonMixin):
|
|
68
|
+
"""Author file mapping model."""
|
|
69
|
+
|
|
70
|
+
__tablename__ = "author_file_mappings"
|
|
71
|
+
|
|
72
|
+
author_id: Mapped[int] = mapped_column(ForeignKey("authors.id"))
|
|
73
|
+
file_id: Mapped[int] = mapped_column(ForeignKey("files.id"))
|
|
41
74
|
|
|
42
75
|
|
|
43
76
|
class File(Base, CommonMixin):
|
|
@@ -51,9 +84,12 @@ class File(Base, CommonMixin):
|
|
|
51
84
|
cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
52
85
|
sha256: Mapped[str] = mapped_column(String(64), default="", index=True)
|
|
53
86
|
size_bytes: Mapped[int] = mapped_column(Integer, default=0)
|
|
87
|
+
extension: Mapped[str] = mapped_column(String(255), default="", index=True)
|
|
54
88
|
|
|
55
89
|
def __init__( # noqa: PLR0913
|
|
56
90
|
self,
|
|
91
|
+
created_at: datetime.datetime,
|
|
92
|
+
updated_at: datetime.datetime,
|
|
57
93
|
source_id: int,
|
|
58
94
|
cloned_path: str,
|
|
59
95
|
mime_type: str = "",
|
|
@@ -63,6 +99,8 @@ class File(Base, CommonMixin):
|
|
|
63
99
|
) -> None:
|
|
64
100
|
"""Initialize a new File instance for typing purposes."""
|
|
65
101
|
super().__init__()
|
|
102
|
+
self.created_at = created_at
|
|
103
|
+
self.updated_at = updated_at
|
|
66
104
|
self.source_id = source_id
|
|
67
105
|
self.cloned_path = cloned_path
|
|
68
106
|
self.mime_type = mime_type
|
|
@@ -3,7 +3,13 @@
|
|
|
3
3
|
from sqlalchemy import func, select
|
|
4
4
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
5
|
|
|
6
|
-
from kodit.source.source_models import
|
|
6
|
+
from kodit.source.source_models import (
|
|
7
|
+
Author,
|
|
8
|
+
AuthorFileMapping,
|
|
9
|
+
File,
|
|
10
|
+
Source,
|
|
11
|
+
SourceType,
|
|
12
|
+
)
|
|
7
13
|
|
|
8
14
|
|
|
9
15
|
class SourceRepository:
|
|
@@ -22,22 +28,12 @@ class SourceRepository:
|
|
|
22
28
|
self.session = session
|
|
23
29
|
|
|
24
30
|
async def create_source(self, source: Source) -> Source:
|
|
25
|
-
"""
|
|
31
|
+
"""Add a new source to the database."""
|
|
32
|
+
# Validate the source
|
|
33
|
+
if source.type == SourceType.UNKNOWN:
|
|
34
|
+
msg = "Source type is required"
|
|
35
|
+
raise ValueError(msg)
|
|
26
36
|
|
|
27
|
-
This method creates both a Source record and a linked FolderSource record
|
|
28
|
-
in a single transaction.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
path: The absolute path of the folder to create a source for.
|
|
32
|
-
|
|
33
|
-
Returns:
|
|
34
|
-
The created Source model instance.
|
|
35
|
-
|
|
36
|
-
Note:
|
|
37
|
-
This method commits the transaction to ensure the source.id is available
|
|
38
|
-
for creating the linked FolderSource record.
|
|
39
|
-
|
|
40
|
-
"""
|
|
41
37
|
self.session.add(source)
|
|
42
38
|
await self.session.commit()
|
|
43
39
|
return source
|
|
@@ -52,6 +48,12 @@ class SourceRepository:
|
|
|
52
48
|
await self.session.commit()
|
|
53
49
|
return file
|
|
54
50
|
|
|
51
|
+
async def list_files_for_source(self, source_id: int) -> list[File]:
|
|
52
|
+
"""List all files for a source."""
|
|
53
|
+
query = select(File).where(File.source_id == source_id)
|
|
54
|
+
result = await self.session.execute(query)
|
|
55
|
+
return list(result.scalars())
|
|
56
|
+
|
|
55
57
|
async def num_files_for_source(self, source_id: int) -> int:
|
|
56
58
|
"""Get the number of files for a source.
|
|
57
59
|
|
|
@@ -103,3 +105,36 @@ class SourceRepository:
|
|
|
103
105
|
query = select(Source).where(Source.id == source_id)
|
|
104
106
|
result = await self.session.execute(query)
|
|
105
107
|
return result.scalar_one_or_none()
|
|
108
|
+
|
|
109
|
+
async def get_or_create_author(self, name: str, email: str) -> Author:
|
|
110
|
+
"""Get or create an author by name and email.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
name: The name of the author.
|
|
114
|
+
email: The email of the author.
|
|
115
|
+
|
|
116
|
+
"""
|
|
117
|
+
query = select(Author).where(Author.name == name, Author.email == email)
|
|
118
|
+
result = await self.session.execute(query)
|
|
119
|
+
author = result.scalar_one_or_none()
|
|
120
|
+
if not author:
|
|
121
|
+
author = Author(name=name, email=email)
|
|
122
|
+
self.session.add(author)
|
|
123
|
+
await self.session.commit()
|
|
124
|
+
return author
|
|
125
|
+
|
|
126
|
+
async def get_or_create_author_file_mapping(
|
|
127
|
+
self, author_id: int, file_id: int
|
|
128
|
+
) -> AuthorFileMapping:
|
|
129
|
+
"""Create a new author file mapping record in the database."""
|
|
130
|
+
query = select(AuthorFileMapping).where(
|
|
131
|
+
AuthorFileMapping.author_id == author_id,
|
|
132
|
+
AuthorFileMapping.file_id == file_id,
|
|
133
|
+
)
|
|
134
|
+
result = await self.session.execute(query)
|
|
135
|
+
mapping = result.scalar_one_or_none()
|
|
136
|
+
if not mapping:
|
|
137
|
+
mapping = AuthorFileMapping(author_id=author_id, file_id=file_id)
|
|
138
|
+
self.session.add(mapping)
|
|
139
|
+
await self.session.commit()
|
|
140
|
+
return mapping
|
kodit/source/source_service.py
CHANGED
|
@@ -8,7 +8,8 @@ source management.
|
|
|
8
8
|
|
|
9
9
|
import mimetypes
|
|
10
10
|
import shutil
|
|
11
|
-
|
|
11
|
+
import tempfile
|
|
12
|
+
from datetime import UTC, datetime
|
|
12
13
|
from hashlib import sha256
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
|
|
@@ -17,9 +18,15 @@ import git
|
|
|
17
18
|
import pydantic
|
|
18
19
|
import structlog
|
|
19
20
|
from tqdm import tqdm
|
|
20
|
-
from uritools import isuri, urisplit
|
|
21
21
|
|
|
22
|
-
from kodit.source.
|
|
22
|
+
from kodit.source.git import is_valid_clone_target
|
|
23
|
+
from kodit.source.ignore import IgnorePatterns
|
|
24
|
+
from kodit.source.source_models import (
|
|
25
|
+
Author,
|
|
26
|
+
File,
|
|
27
|
+
Source,
|
|
28
|
+
SourceType,
|
|
29
|
+
)
|
|
23
30
|
from kodit.source.source_repository import SourceRepository
|
|
24
31
|
|
|
25
32
|
|
|
@@ -82,39 +89,16 @@ class SourceService:
|
|
|
82
89
|
)
|
|
83
90
|
|
|
84
91
|
async def create(self, uri_or_path_like: str) -> SourceView:
|
|
85
|
-
"""Create a new source from a URI.
|
|
92
|
+
"""Create a new source from a URI or path."""
|
|
93
|
+
# If it's possible to clone it, then do so
|
|
94
|
+
if is_valid_clone_target(uri_or_path_like):
|
|
95
|
+
return await self._create_git_source(uri_or_path_like)
|
|
86
96
|
|
|
87
|
-
|
|
88
|
-
uri: The URI of the source to create. Can be a git-like URI or a local
|
|
89
|
-
directory.
|
|
90
|
-
|
|
91
|
-
Raises:
|
|
92
|
-
ValueError: If the source type is not supported or if the folder doesn't
|
|
93
|
-
exist.
|
|
94
|
-
|
|
95
|
-
"""
|
|
97
|
+
# Otherwise just treat it as a directory
|
|
96
98
|
if Path(uri_or_path_like).is_dir():
|
|
97
99
|
return await self._create_folder_source(Path(uri_or_path_like))
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
if parsed.scheme == "file":
|
|
101
|
-
return await self._create_folder_source(Path(parsed.path))
|
|
102
|
-
if parsed.scheme in ("git", "http", "https") and parsed.path.endswith(
|
|
103
|
-
".git"
|
|
104
|
-
):
|
|
105
|
-
return await self._create_git_source(uri_or_path_like)
|
|
106
|
-
|
|
107
|
-
# Try adding a .git suffix, sometimes people just pass the url
|
|
108
|
-
if not uri_or_path_like.endswith(".git"):
|
|
109
|
-
uri_or_path_like = uri_or_path_like + ".git"
|
|
110
|
-
try:
|
|
111
|
-
return await self._create_git_source(uri_or_path_like)
|
|
112
|
-
except git.GitCommandError:
|
|
113
|
-
raise
|
|
114
|
-
except ValueError:
|
|
115
|
-
pass
|
|
116
|
-
|
|
117
|
-
msg = f"Unsupported source type: {uri_or_path_like}"
|
|
100
|
+
|
|
101
|
+
msg = f"Unsupported source: {uri_or_path_like}"
|
|
118
102
|
raise ValueError(msg)
|
|
119
103
|
|
|
120
104
|
async def _create_folder_source(self, directory: Path) -> SourceView:
|
|
@@ -159,7 +143,11 @@ class SourceService:
|
|
|
159
143
|
)
|
|
160
144
|
|
|
161
145
|
source = await self.repository.create_source(
|
|
162
|
-
Source(
|
|
146
|
+
Source(
|
|
147
|
+
uri=directory.as_uri(),
|
|
148
|
+
cloned_path=str(clone_path),
|
|
149
|
+
source_type=SourceType.FOLDER,
|
|
150
|
+
),
|
|
163
151
|
)
|
|
164
152
|
|
|
165
153
|
# Add all files to the source
|
|
@@ -168,7 +156,7 @@ class SourceService:
|
|
|
168
156
|
|
|
169
157
|
# Process each file in the source directory
|
|
170
158
|
for path in tqdm(clone_path.rglob("*"), total=file_count, leave=False):
|
|
171
|
-
await self._process_file(source
|
|
159
|
+
await self._process_file(source, path.absolute())
|
|
172
160
|
|
|
173
161
|
return SourceView(
|
|
174
162
|
id=source.id,
|
|
@@ -188,7 +176,13 @@ class SourceService:
|
|
|
188
176
|
ValueError: If the repository cloning fails.
|
|
189
177
|
|
|
190
178
|
"""
|
|
191
|
-
|
|
179
|
+
self.log.debug("Normalising git uri", uri=uri)
|
|
180
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
181
|
+
git.Repo.clone_from(uri, temp_dir)
|
|
182
|
+
remote = git.Repo(temp_dir).remote()
|
|
183
|
+
uri = remote.url
|
|
184
|
+
|
|
185
|
+
self.log.debug("Checking if source already exists", uri=uri)
|
|
192
186
|
source = await self.repository.get_source_by_uri(uri)
|
|
193
187
|
|
|
194
188
|
if source:
|
|
@@ -208,18 +202,27 @@ class SourceService:
|
|
|
208
202
|
msg = f"Failed to clone repository: {e}"
|
|
209
203
|
raise ValueError(msg) from e
|
|
210
204
|
|
|
205
|
+
self.log.debug("Creating source", uri=uri, clone_path=str(clone_path))
|
|
211
206
|
source = await self.repository.create_source(
|
|
212
|
-
Source(
|
|
207
|
+
Source(
|
|
208
|
+
uri=uri,
|
|
209
|
+
cloned_path=str(clone_path),
|
|
210
|
+
source_type=SourceType.GIT,
|
|
211
|
+
),
|
|
213
212
|
)
|
|
214
213
|
|
|
215
|
-
#
|
|
216
|
-
|
|
217
|
-
|
|
214
|
+
# Get the ignore patterns for this source
|
|
215
|
+
ignore_patterns = IgnorePatterns(clone_path)
|
|
216
|
+
|
|
217
|
+
# Get all files that are not ignored
|
|
218
|
+
files = [
|
|
219
|
+
f for f in clone_path.rglob("*") if not ignore_patterns.should_ignore(f)
|
|
220
|
+
]
|
|
218
221
|
|
|
219
222
|
# Process each file in the source directory
|
|
220
|
-
self.log.info("Inspecting files", source_id=source.id)
|
|
221
|
-
for path in tqdm(
|
|
222
|
-
await self._process_file(source
|
|
223
|
+
self.log.info("Inspecting files", source_id=source.id, num_files=len(files))
|
|
224
|
+
for path in tqdm(files, total=len(files), leave=False):
|
|
225
|
+
await self._process_file(source, path.absolute())
|
|
223
226
|
|
|
224
227
|
return SourceView(
|
|
225
228
|
id=source.id,
|
|
@@ -231,32 +234,79 @@ class SourceService:
|
|
|
231
234
|
|
|
232
235
|
async def _process_file(
|
|
233
236
|
self,
|
|
234
|
-
|
|
235
|
-
|
|
237
|
+
source: Source,
|
|
238
|
+
cloned_file: Path,
|
|
236
239
|
) -> None:
|
|
237
240
|
"""Process a single file for indexing."""
|
|
238
|
-
if not
|
|
241
|
+
if not cloned_file.is_file():
|
|
239
242
|
return
|
|
240
243
|
|
|
241
|
-
|
|
244
|
+
# If this file exists in a git repository, pull out the file's metadata
|
|
245
|
+
authors: list[Author] = []
|
|
246
|
+
first_modified_at: datetime | None = None
|
|
247
|
+
last_modified_at: datetime | None = None
|
|
248
|
+
if source.type == SourceType.GIT:
|
|
249
|
+
# Get the git repository
|
|
250
|
+
git_repo = git.Repo(source.cloned_path)
|
|
251
|
+
|
|
252
|
+
# Get the last commit that touched this file
|
|
253
|
+
commits = list(
|
|
254
|
+
git_repo.iter_commits(
|
|
255
|
+
paths=str(cloned_file),
|
|
256
|
+
all=True,
|
|
257
|
+
)
|
|
258
|
+
)
|
|
259
|
+
if len(commits) > 0:
|
|
260
|
+
last_modified_at = commits[0].committed_datetime
|
|
261
|
+
first_modified_at = commits[-1].committed_datetime
|
|
262
|
+
|
|
263
|
+
# Get the file's blame
|
|
264
|
+
blames = git_repo.blame("HEAD", str(cloned_file))
|
|
265
|
+
|
|
266
|
+
# Extract the blame's authors
|
|
267
|
+
actors = [
|
|
268
|
+
commit.author
|
|
269
|
+
for blame in blames or []
|
|
270
|
+
for commit in blame
|
|
271
|
+
if isinstance(commit, git.Commit)
|
|
272
|
+
]
|
|
273
|
+
|
|
274
|
+
# Get or create the authors in the database
|
|
275
|
+
for actor in actors:
|
|
276
|
+
if actor.name or actor.email:
|
|
277
|
+
author = await self.repository.get_or_create_author(
|
|
278
|
+
actor.name or "", actor.email or ""
|
|
279
|
+
)
|
|
280
|
+
authors.append(author)
|
|
281
|
+
|
|
282
|
+
# Create the file record
|
|
283
|
+
async with aiofiles.open(cloned_file, "rb") as f:
|
|
242
284
|
content = await f.read()
|
|
243
|
-
mime_type = mimetypes.guess_type(
|
|
285
|
+
mime_type = mimetypes.guess_type(cloned_file)
|
|
244
286
|
sha = sha256(content).hexdigest()
|
|
245
287
|
|
|
246
288
|
# Create file record
|
|
247
289
|
file = File(
|
|
248
|
-
|
|
249
|
-
|
|
290
|
+
created_at=first_modified_at or datetime.now(UTC),
|
|
291
|
+
updated_at=last_modified_at or datetime.now(UTC),
|
|
292
|
+
source_id=source.id,
|
|
293
|
+
cloned_path=str(cloned_file),
|
|
250
294
|
mime_type=mime_type[0]
|
|
251
295
|
if mime_type and mime_type[0]
|
|
252
296
|
else "application/octet-stream",
|
|
253
|
-
uri=
|
|
297
|
+
uri=cloned_file.as_uri(),
|
|
254
298
|
sha256=sha,
|
|
255
299
|
size_bytes=len(content),
|
|
256
300
|
)
|
|
257
301
|
|
|
258
302
|
await self.repository.create_file(file)
|
|
259
303
|
|
|
304
|
+
# Create mapping of authors to the file
|
|
305
|
+
for author in authors:
|
|
306
|
+
await self.repository.get_or_create_author_file_mapping(
|
|
307
|
+
author_id=author.id, file_id=file.id
|
|
308
|
+
)
|
|
309
|
+
|
|
260
310
|
async def list_sources(self) -> list[SourceView]:
|
|
261
311
|
"""List all available sources.
|
|
262
312
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kodit
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Code indexing for better AI code generation
|
|
5
5
|
Project-URL: Homepage, https://docs.helixml.tech/kodit/
|
|
6
6
|
Project-URL: Documentation, https://docs.helixml.tech/kodit/
|
|
@@ -18,6 +18,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
19
|
Classifier: Topic :: Software Development :: Code Generators
|
|
20
20
|
Requires-Python: >=3.12
|
|
21
|
+
Requires-Dist: accelerate>=1.7.0
|
|
21
22
|
Requires-Dist: aiofiles>=24.1.0
|
|
22
23
|
Requires-Dist: aiosqlite>=0.20.0
|
|
23
24
|
Requires-Dist: alembic>=1.15.2
|
|
@@ -35,6 +36,7 @@ Requires-Dist: hf-xet>=1.1.2
|
|
|
35
36
|
Requires-Dist: httpx-retries>=0.3.2
|
|
36
37
|
Requires-Dist: httpx>=0.28.1
|
|
37
38
|
Requires-Dist: openai>=1.82.0
|
|
39
|
+
Requires-Dist: pathspec>=0.12.1
|
|
38
40
|
Requires-Dist: posthog>=4.0.1
|
|
39
41
|
Requires-Dist: pydantic-settings>=2.9.1
|
|
40
42
|
Requires-Dist: pytable-formatter>=0.1.1
|
|
@@ -92,6 +94,7 @@ code. This index is used to build a snippet library, ready for ingestion into an
|
|
|
92
94
|
- Build comprehensive snippet libraries for LLM ingestion
|
|
93
95
|
- Support for multiple codebase types and languages
|
|
94
96
|
- Efficient indexing and search capabilities
|
|
97
|
+
- Privacy first: respects .gitignore and .noindex files.
|
|
95
98
|
|
|
96
99
|
### MCP Server
|
|
97
100
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
2
|
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
-
kodit/_version.py,sha256=
|
|
3
|
+
kodit/_version.py,sha256=OjGGK5TcHVG44Y62aAqeJH4CskkZoY9ydbHOtCDew50,511
|
|
4
4
|
kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
|
|
5
5
|
kodit/cli.py,sha256=i7eEt0FdIQGEfXKFte-8fBcZZGE8BPXBp40aGwJDQGI,11323
|
|
6
|
-
kodit/config.py,sha256=
|
|
6
|
+
kodit/config.py,sha256=3yh7hfLSILjZK_qJMhcExwRcrWJ0b5Eb1JjjOvMPJZo,4146
|
|
7
7
|
kodit/database.py,sha256=WB1KpVxUYPgiJGU0gJa2hqytYB8wJEJ5z3WayhWzNMU,2403
|
|
8
8
|
kodit/log.py,sha256=HU1OmuxO4FcVw61k4WW7Y4WM7BrDaeplw1PcBHhuIZY,5434
|
|
9
9
|
kodit/mcp.py,sha256=QruyPskWB0_x59pkfj5BBeXuR13GMny5TAZEa2j4U9s,5752
|
|
@@ -11,36 +11,37 @@ kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
|
|
|
11
11
|
kodit/bm25/__init__.py,sha256=j8zyriNWhbwE5Lbybzg1hQAhANlU9mKHWw4beeUR6og,19
|
|
12
12
|
kodit/bm25/keyword_search_factory.py,sha256=rp-wx3DJsc2KlELK1V337EyeYvmwnMQwUqOo1WVPSmg,631
|
|
13
13
|
kodit/bm25/keyword_search_service.py,sha256=aBbWQKgQmi2re3EIHdXFS00n7Wj3b2D0pZsLZ4qmHfE,754
|
|
14
|
-
kodit/bm25/local_bm25.py,sha256=
|
|
15
|
-
kodit/bm25/vectorchord_bm25.py,sha256=
|
|
14
|
+
kodit/bm25/local_bm25.py,sha256=nokrd_xAeqXi3m68X5P1R5KBhRRB1E2L_J6Zgm26PCg,3869
|
|
15
|
+
kodit/bm25/vectorchord_bm25.py,sha256=0p_FgliaoevB8GLSmzWnV3zUjdcWgCgOKIpLURr7Qfo,6549
|
|
16
16
|
kodit/embedding/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
|
|
17
|
-
kodit/embedding/embedding_factory.py,sha256=
|
|
17
|
+
kodit/embedding/embedding_factory.py,sha256=UbrTl3NEqBBH3ecvRG7vGW5wuvUMbWJEWbAAFALOGqs,2141
|
|
18
18
|
kodit/embedding/embedding_models.py,sha256=rN90vSs86dYiqoawcp8E9jtwY31JoJXYfaDlsJK7uqc,656
|
|
19
19
|
kodit/embedding/embedding_repository.py,sha256=-ux3scpBzel8c0pMH9fNOEsSXFIzl-IfgaWrkTb1szo,6907
|
|
20
|
-
kodit/embedding/local_vector_search_service.py,sha256=
|
|
20
|
+
kodit/embedding/local_vector_search_service.py,sha256=dgMi8hQNUbYEgHnEYmLIpon4yLduoNUpu7k7VP6sOHI,2042
|
|
21
21
|
kodit/embedding/vector_search_service.py,sha256=pQJ129QjGrAWOXzqkywmgtDRpy8_gtzYgkivyqF9Vrs,1009
|
|
22
|
-
kodit/embedding/vectorchord_vector_search_service.py,sha256=
|
|
22
|
+
kodit/embedding/vectorchord_vector_search_service.py,sha256=TKNR3HgWHwwWtJ1SsvSaj_BXLJ_uw6Bdr_tpaePMeAA,5383
|
|
23
23
|
kodit/embedding/embedding_provider/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
|
|
24
|
-
kodit/embedding/embedding_provider/embedding_provider.py,sha256=
|
|
24
|
+
kodit/embedding/embedding_provider/embedding_provider.py,sha256=IC7fZaZ_ze-DxpxKfK44pRDwHWUQhVIqVKKQ3alO5Qc,1882
|
|
25
25
|
kodit/embedding/embedding_provider/hash_embedding_provider.py,sha256=nAhlhh8j8PqqCCbhVl26Y8ntFBm2vJBCtB4X04g5Wwg,2638
|
|
26
|
-
kodit/embedding/embedding_provider/local_embedding_provider.py,sha256=
|
|
27
|
-
kodit/embedding/embedding_provider/openai_embedding_provider.py,sha256
|
|
26
|
+
kodit/embedding/embedding_provider/local_embedding_provider.py,sha256=WP8lw6XG7v1_5Mw4_rhIOETooYRsxhkwmFaXCqCouQU,1977
|
|
27
|
+
kodit/embedding/embedding_provider/openai_embedding_provider.py,sha256=-phz5FKYM_tI3Q4_3SPzjzIOK3k92Uk52TAOTmoVoWI,2722
|
|
28
28
|
kodit/enrichment/__init__.py,sha256=vBEolHpKaHUhfINX0dSGyAPlvgpLNAer9YzFtdvCB24,18
|
|
29
|
-
kodit/enrichment/enrichment_factory.py,sha256=
|
|
29
|
+
kodit/enrichment/enrichment_factory.py,sha256=JbWFNciB6Yf79SFVjG9UhLgCcrXZ1rIJrenU8QmNLBE,1411
|
|
30
30
|
kodit/enrichment/enrichment_service.py,sha256=87Sd3gGbEMJYb_wVrHG8L1yGIZmQNR7foUS4_y94azI,977
|
|
31
31
|
kodit/enrichment/enrichment_provider/__init__.py,sha256=klf8iuLVWX4iRz-DZQauFFNAoJC5CByczh48TBZPW-o,27
|
|
32
32
|
kodit/enrichment/enrichment_provider/enrichment_provider.py,sha256=E0H5rq3OENM0yYbA8K_3nSnj5lUHCpoIOqpWLo-2MVU,413
|
|
33
|
-
kodit/enrichment/enrichment_provider/local_enrichment_provider.py,sha256=
|
|
34
|
-
kodit/enrichment/enrichment_provider/openai_enrichment_provider.py,sha256=
|
|
33
|
+
kodit/enrichment/enrichment_provider/local_enrichment_provider.py,sha256=RqwUD0BnwRQ8zlkFNkaKq8d58r33k2jIdnSdf6zla1w,3325
|
|
34
|
+
kodit/enrichment/enrichment_provider/openai_enrichment_provider.py,sha256=0Yw7h9RXptoI4bKuqJSKIRQXPUUhNV7eACavgoy_T8s,2874
|
|
35
35
|
kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
|
|
36
36
|
kodit/indexing/fusion.py,sha256=TZb4fPAedXdEUXzwzOofW98QIOymdbclBOP1KOijuEk,1674
|
|
37
37
|
kodit/indexing/indexing_models.py,sha256=6NX9HVcj6Pu9ePwHC7n-PWSyAgukpJq0nCNmUIigtbo,1282
|
|
38
38
|
kodit/indexing/indexing_repository.py,sha256=dqOS0pxKM6bUjMXWqYukAK8XdiD36OnskFASgZRXRQM,6955
|
|
39
|
-
kodit/indexing/indexing_service.py,sha256=
|
|
39
|
+
kodit/indexing/indexing_service.py,sha256=79BZ4yaSJqADkivzjsq1bDCBtbfWikVRC7Fjlp1HmZw,10885
|
|
40
40
|
kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
|
|
41
41
|
kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
|
|
42
42
|
kodit/migrations/env.py,sha256=w1M7OZh-ZeR2dPHS0ByXAUxQjfZQ8xIzMseWuzLDTWw,2469
|
|
43
43
|
kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
|
|
44
|
+
kodit/migrations/versions/42e836b21102_add_authors.py,sha256=KmXlHb_y8bIa_ABNU67zZi13r0DAfHA9G8tjQNkdITM,2638
|
|
44
45
|
kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
|
|
45
46
|
kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
|
|
46
47
|
kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
|
|
@@ -55,13 +56,15 @@ kodit/snippets/languages/javascript.scm,sha256=Ini5TsVNmcBKQ8aL46a5Id9ut0g9Udmvm
|
|
|
55
56
|
kodit/snippets/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyrFGrVeo,429
|
|
56
57
|
kodit/snippets/languages/typescript.scm,sha256=U-ujbbv4tylbUBj9wuhL-e5cW6hmgPCNs4xrIX3r_hE,448
|
|
57
58
|
kodit/source/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
|
|
58
|
-
kodit/source/
|
|
59
|
-
kodit/source/
|
|
60
|
-
kodit/source/
|
|
59
|
+
kodit/source/git.py,sha256=CpNczc06SbxpzfQKq76lZFzuol10ZJvTRSzeXW9DFUs,363
|
|
60
|
+
kodit/source/ignore.py,sha256=W7cuIrYlgfu3S1qyoIepXe8PqYmtFv61Tt5RO8cbZbg,1701
|
|
61
|
+
kodit/source/source_models.py,sha256=lCaaoukLlMHuRWJBuYM2nkNKGtFASgbk7ZXq8kp4H5c,3519
|
|
62
|
+
kodit/source/source_repository.py,sha256=4L-W0uE4LOB9LQlefk5f2sgHlsJjj8t33USPxU0na40,4448
|
|
63
|
+
kodit/source/source_service.py,sha256=v-lY-7tsNFCyXo9yCUo7Q00NOWYKGiDB_M2-Hr8hp3U,11391
|
|
61
64
|
kodit/util/__init__.py,sha256=bPu6CtqDWCRGU7VgW2_aiQrCBi8G89FS6k1PjvDajJ0,37
|
|
62
65
|
kodit/util/spinner.py,sha256=R9bzrHtBiIH6IfLbmsIVHL53s8vg-tqW4lwGGALu4dw,1932
|
|
63
|
-
kodit-0.2.
|
|
64
|
-
kodit-0.2.
|
|
65
|
-
kodit-0.2.
|
|
66
|
-
kodit-0.2.
|
|
67
|
-
kodit-0.2.
|
|
66
|
+
kodit-0.2.2.dist-info/METADATA,sha256=UU1curOx-XMql_IiXty-eoz-MJrd5QdlzfCj7ZoSzhg,5857
|
|
67
|
+
kodit-0.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
68
|
+
kodit-0.2.2.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
69
|
+
kodit-0.2.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
70
|
+
kodit-0.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|