kodit 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/bm25/local_bm25.py +8 -0
- kodit/bm25/vectorchord_bm25.py +4 -1
- kodit/config.py +14 -24
- kodit/embedding/embedding_factory.py +20 -6
- kodit/embedding/embedding_provider/openai_embedding_provider.py +3 -1
- kodit/embedding/local_vector_search_service.py +4 -0
- kodit/embedding/vectorchord_vector_search_service.py +10 -2
- kodit/enrichment/enrichment_factory.py +23 -7
- kodit/enrichment/enrichment_provider/local_enrichment_provider.py +4 -0
- kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +5 -1
- kodit/indexing/indexing_service.py +4 -0
- kodit/migrations/versions/42e836b21102_add_authors.py +64 -0
- kodit/source/git.py +16 -0
- kodit/source/ignore.py +53 -0
- kodit/source/source_models.py +40 -2
- kodit/source/source_repository.py +51 -16
- kodit/source/source_service.py +99 -32
- {kodit-0.2.1.dist-info → kodit-0.2.2.dist-info}/METADATA +3 -1
- {kodit-0.2.1.dist-info → kodit-0.2.2.dist-info}/RECORD +23 -20
- {kodit-0.2.1.dist-info → kodit-0.2.2.dist-info}/WHEEL +0 -0
- {kodit-0.2.1.dist-info → kodit-0.2.2.dist-info}/entry_points.txt +0 -0
- {kodit-0.2.1.dist-info → kodit-0.2.2.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
kodit/bm25/local_bm25.py
CHANGED
|
@@ -64,6 +64,10 @@ class BM25Service(KeywordSearchProvider):
|
|
|
64
64
|
async def index(self, corpus: list[BM25Document]) -> None:
|
|
65
65
|
"""Index a new corpus."""
|
|
66
66
|
self.log.debug("Indexing corpus")
|
|
67
|
+
if not corpus or len(corpus) == 0:
|
|
68
|
+
self.log.warning("Corpus is empty, skipping bm25 index")
|
|
69
|
+
return
|
|
70
|
+
|
|
67
71
|
vocab = self._tokenize([doc.text for doc in corpus])
|
|
68
72
|
self._retriever().index(vocab, show_progress=False)
|
|
69
73
|
self._retriever().save(self.index_path)
|
|
@@ -77,6 +81,10 @@ class BM25Service(KeywordSearchProvider):
|
|
|
77
81
|
self.log.warning("Top k is 0, returning empty list")
|
|
78
82
|
return []
|
|
79
83
|
|
|
84
|
+
# Check that the index has data
|
|
85
|
+
if not hasattr(self._retriever(), "scores"):
|
|
86
|
+
return []
|
|
87
|
+
|
|
80
88
|
# Get the number of documents in the index
|
|
81
89
|
num_docs = self._retriever().scores["num_docs"]
|
|
82
90
|
if num_docs == 0:
|
kodit/bm25/vectorchord_bm25.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import Any
|
|
4
4
|
|
|
5
|
+
import structlog
|
|
5
6
|
from sqlalchemy import Result, TextClause, bindparam, text
|
|
6
7
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
8
|
|
|
@@ -93,6 +94,7 @@ class VectorChordBM25(KeywordSearchProvider):
|
|
|
93
94
|
"""Initialize the VectorChord BM25."""
|
|
94
95
|
self.__session = session
|
|
95
96
|
self._initialized = False
|
|
97
|
+
self.log = structlog.get_logger(__name__)
|
|
96
98
|
|
|
97
99
|
async def _initialize(self) -> None:
|
|
98
100
|
"""Initialize the VectorChord environment."""
|
|
@@ -149,7 +151,8 @@ class VectorChordBM25(KeywordSearchProvider):
|
|
|
149
151
|
if doc.snippet_id is not None and doc.text is not None and doc.text != ""
|
|
150
152
|
]
|
|
151
153
|
|
|
152
|
-
if not corpus:
|
|
154
|
+
if not corpus or len(corpus) == 0:
|
|
155
|
+
self.log.warning("Corpus is empty, skipping bm25 index")
|
|
153
156
|
return
|
|
154
157
|
|
|
155
158
|
# Execute inserts
|
kodit/config.py
CHANGED
|
@@ -14,7 +14,6 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
14
14
|
if TYPE_CHECKING:
|
|
15
15
|
from collections.abc import Callable, Coroutine
|
|
16
16
|
|
|
17
|
-
from openai import AsyncOpenAI
|
|
18
17
|
|
|
19
18
|
from kodit.database import Database
|
|
20
19
|
|
|
@@ -25,13 +24,16 @@ DEFAULT_LOG_FORMAT = "pretty"
|
|
|
25
24
|
DEFAULT_DISABLE_TELEMETRY = False
|
|
26
25
|
T = TypeVar("T")
|
|
27
26
|
|
|
27
|
+
EndpointType = Literal["openai"]
|
|
28
|
+
|
|
28
29
|
|
|
29
30
|
class Endpoint(BaseModel):
|
|
30
31
|
"""Endpoint provides configuration for an AI service."""
|
|
31
32
|
|
|
32
|
-
type:
|
|
33
|
-
api_key: str | None = None
|
|
33
|
+
type: EndpointType | None = None
|
|
34
34
|
base_url: str | None = None
|
|
35
|
+
model: str | None = None
|
|
36
|
+
api_key: str | None = None
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
class Search(BaseModel):
|
|
@@ -57,15 +59,20 @@ class AppContext(BaseSettings):
|
|
|
57
59
|
log_format: str = Field(default=DEFAULT_LOG_FORMAT)
|
|
58
60
|
disable_telemetry: bool = Field(default=DEFAULT_DISABLE_TELEMETRY)
|
|
59
61
|
default_endpoint: Endpoint | None = Field(
|
|
60
|
-
default=
|
|
61
|
-
type="openai",
|
|
62
|
-
base_url="https://api.openai.com/v1",
|
|
63
|
-
),
|
|
62
|
+
default=None,
|
|
64
63
|
description=(
|
|
65
64
|
"Default endpoint to use for all AI interactions "
|
|
66
65
|
"(can be overridden by task-specific configuration)."
|
|
67
66
|
),
|
|
68
67
|
)
|
|
68
|
+
embedding_endpoint: Endpoint | None = Field(
|
|
69
|
+
default=None,
|
|
70
|
+
description="Endpoint to use for embedding.",
|
|
71
|
+
)
|
|
72
|
+
enrichment_endpoint: Endpoint | None = Field(
|
|
73
|
+
default=None,
|
|
74
|
+
description="Endpoint to use for enrichment.",
|
|
75
|
+
)
|
|
69
76
|
default_search: Search = Field(
|
|
70
77
|
default=Search(),
|
|
71
78
|
)
|
|
@@ -95,23 +102,6 @@ class AppContext(BaseSettings):
|
|
|
95
102
|
await self._db.run_migrations(self.db_url)
|
|
96
103
|
return self._db
|
|
97
104
|
|
|
98
|
-
def get_default_openai_client(self) -> AsyncOpenAI | None:
|
|
99
|
-
"""Get the default OpenAI client, if it is configured."""
|
|
100
|
-
from openai import AsyncOpenAI
|
|
101
|
-
|
|
102
|
-
endpoint = self.default_endpoint
|
|
103
|
-
if not (
|
|
104
|
-
endpoint
|
|
105
|
-
and endpoint.type == "openai"
|
|
106
|
-
and endpoint.api_key
|
|
107
|
-
and endpoint.base_url
|
|
108
|
-
):
|
|
109
|
-
return None
|
|
110
|
-
return AsyncOpenAI(
|
|
111
|
-
api_key=endpoint.api_key,
|
|
112
|
-
base_url=endpoint.base_url,
|
|
113
|
-
)
|
|
114
|
-
|
|
115
105
|
|
|
116
106
|
with_app_context = click.make_pass_decorator(AppContext)
|
|
117
107
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
4
|
|
|
5
|
-
from kodit.config import AppContext
|
|
5
|
+
from kodit.config import AppContext, Endpoint
|
|
6
6
|
from kodit.embedding.embedding_provider.local_embedding_provider import (
|
|
7
7
|
CODE,
|
|
8
8
|
LocalEmbeddingProvider,
|
|
@@ -16,19 +16,33 @@ from kodit.embedding.vector_search_service import (
|
|
|
16
16
|
VectorSearchService,
|
|
17
17
|
)
|
|
18
18
|
from kodit.embedding.vectorchord_vector_search_service import (
|
|
19
|
+
TaskName,
|
|
19
20
|
VectorChordVectorSearchService,
|
|
20
21
|
)
|
|
21
22
|
|
|
22
23
|
|
|
24
|
+
def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
|
|
25
|
+
"""Get the endpoint configuration for the embedding service."""
|
|
26
|
+
return app_context.embedding_endpoint or app_context.default_endpoint or None
|
|
27
|
+
|
|
28
|
+
|
|
23
29
|
def embedding_factory(
|
|
24
|
-
task_name:
|
|
30
|
+
task_name: TaskName, app_context: AppContext, session: AsyncSession
|
|
25
31
|
) -> VectorSearchService:
|
|
26
32
|
"""Create an embedding service."""
|
|
27
33
|
embedding_repository = EmbeddingRepository(session=session)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
if
|
|
31
|
-
|
|
34
|
+
endpoint = _get_endpoint_configuration(app_context)
|
|
35
|
+
|
|
36
|
+
if endpoint and endpoint.type == "openai":
|
|
37
|
+
from openai import AsyncOpenAI
|
|
38
|
+
|
|
39
|
+
embedding_provider = OpenAIEmbeddingProvider(
|
|
40
|
+
openai_client=AsyncOpenAI(
|
|
41
|
+
api_key=endpoint.api_key or "default",
|
|
42
|
+
base_url=endpoint.base_url or "https://api.openai.com/v1",
|
|
43
|
+
),
|
|
44
|
+
model_name=endpoint.model or "text-embedding-3-small",
|
|
45
|
+
)
|
|
32
46
|
else:
|
|
33
47
|
embedding_provider = LocalEmbeddingProvider(CODE)
|
|
34
48
|
|
|
@@ -27,7 +27,9 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
|
|
|
27
27
|
self.log = structlog.get_logger(__name__)
|
|
28
28
|
self.openai_client = openai_client
|
|
29
29
|
self.model_name = model_name
|
|
30
|
-
self.encoding = tiktoken.encoding_for_model(
|
|
30
|
+
self.encoding = tiktoken.encoding_for_model(
|
|
31
|
+
"text-embedding-3-small"
|
|
32
|
+
) # Sensible default
|
|
31
33
|
|
|
32
34
|
async def embed(self, data: list[str]) -> list[Vector]:
|
|
33
35
|
"""Embed a list of documents."""
|
|
@@ -29,6 +29,10 @@ class LocalVectorSearchService(VectorSearchService):
|
|
|
29
29
|
|
|
30
30
|
async def index(self, data: list[VectorSearchRequest]) -> None:
|
|
31
31
|
"""Embed a list of documents."""
|
|
32
|
+
if not data or len(data) == 0:
|
|
33
|
+
self.log.warning("Embedding data is empty, skipping embedding")
|
|
34
|
+
return
|
|
35
|
+
|
|
32
36
|
embeddings = await self.embedding_provider.embed([i.text for i in data])
|
|
33
37
|
for i, x in zip(data, embeddings, strict=False):
|
|
34
38
|
await self.embedding_repository.create_embedding(
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""Vectorchord vector search."""
|
|
2
2
|
|
|
3
|
-
from typing import Any
|
|
3
|
+
from typing import Any, Literal
|
|
4
4
|
|
|
5
|
+
import structlog
|
|
5
6
|
from sqlalchemy import Result, TextClause, text
|
|
6
7
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
8
|
|
|
@@ -51,13 +52,15 @@ ORDER BY score ASC
|
|
|
51
52
|
LIMIT :top_k;
|
|
52
53
|
"""
|
|
53
54
|
|
|
55
|
+
TaskName = Literal["code", "text"]
|
|
56
|
+
|
|
54
57
|
|
|
55
58
|
class VectorChordVectorSearchService(VectorSearchService):
|
|
56
59
|
"""VectorChord vector search."""
|
|
57
60
|
|
|
58
61
|
def __init__(
|
|
59
62
|
self,
|
|
60
|
-
task_name:
|
|
63
|
+
task_name: TaskName,
|
|
61
64
|
session: AsyncSession,
|
|
62
65
|
embedding_provider: EmbeddingProvider,
|
|
63
66
|
) -> None:
|
|
@@ -67,6 +70,7 @@ class VectorChordVectorSearchService(VectorSearchService):
|
|
|
67
70
|
self._initialized = False
|
|
68
71
|
self.table_name = f"vectorchord_{task_name}_embeddings"
|
|
69
72
|
self.index_name = f"{self.table_name}_idx"
|
|
73
|
+
self.log = structlog.get_logger(__name__)
|
|
70
74
|
|
|
71
75
|
async def _initialize(self) -> None:
|
|
72
76
|
"""Initialize the VectorChord environment."""
|
|
@@ -128,6 +132,10 @@ class VectorChordVectorSearchService(VectorSearchService):
|
|
|
128
132
|
|
|
129
133
|
async def index(self, data: list[VectorSearchRequest]) -> None:
|
|
130
134
|
"""Embed a list of documents."""
|
|
135
|
+
if not data or len(data) == 0:
|
|
136
|
+
self.log.warning("Embedding data is empty, skipping embedding")
|
|
137
|
+
return
|
|
138
|
+
|
|
131
139
|
embeddings = await self.embedding_provider.embed([doc.text for doc in data])
|
|
132
140
|
# Execute inserts
|
|
133
141
|
await self._execute(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Embedding service."""
|
|
2
2
|
|
|
3
|
-
from kodit.config import AppContext
|
|
3
|
+
from kodit.config import AppContext, Endpoint
|
|
4
4
|
from kodit.enrichment.enrichment_provider.local_enrichment_provider import (
|
|
5
5
|
LocalEnrichmentProvider,
|
|
6
6
|
)
|
|
@@ -13,11 +13,27 @@ from kodit.enrichment.enrichment_service import (
|
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
|
|
17
|
+
"""Get the endpoint configuration for the enrichment service."""
|
|
18
|
+
return app_context.enrichment_endpoint or app_context.default_endpoint or None
|
|
19
|
+
|
|
20
|
+
|
|
16
21
|
def enrichment_factory(app_context: AppContext) -> EnrichmentService:
|
|
17
|
-
"""Create an
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
+
"""Create an enrichment service."""
|
|
23
|
+
endpoint = _get_endpoint_configuration(app_context)
|
|
24
|
+
endpoint = app_context.enrichment_endpoint or app_context.default_endpoint or None
|
|
25
|
+
|
|
26
|
+
if endpoint and endpoint.type == "openai":
|
|
27
|
+
from openai import AsyncOpenAI
|
|
28
|
+
|
|
29
|
+
enrichment_provider = OpenAIEnrichmentProvider(
|
|
30
|
+
openai_client=AsyncOpenAI(
|
|
31
|
+
api_key=endpoint.api_key or "default",
|
|
32
|
+
base_url=endpoint.base_url or "https://api.openai.com/v1",
|
|
33
|
+
),
|
|
34
|
+
model_name=endpoint.model or "gpt-4o-mini",
|
|
35
|
+
)
|
|
36
|
+
else:
|
|
37
|
+
enrichment_provider = LocalEnrichmentProvider()
|
|
22
38
|
|
|
23
|
-
return LLMEnrichmentService(
|
|
39
|
+
return LLMEnrichmentService(enrichment_provider=enrichment_provider)
|
|
@@ -34,6 +34,10 @@ class LocalEnrichmentProvider(EnrichmentProvider):
|
|
|
34
34
|
|
|
35
35
|
async def enrich(self, data: list[str]) -> list[str]:
|
|
36
36
|
"""Enrich a list of strings."""
|
|
37
|
+
if not data or len(data) == 0:
|
|
38
|
+
self.log.warning("Data is empty, skipping enrichment")
|
|
39
|
+
return []
|
|
40
|
+
|
|
37
41
|
from transformers.models.auto.modeling_auto import (
|
|
38
42
|
AutoModelForCausalLM,
|
|
39
43
|
)
|
|
@@ -27,10 +27,14 @@ class OpenAIEnrichmentProvider(EnrichmentProvider):
|
|
|
27
27
|
self.log = structlog.get_logger(__name__)
|
|
28
28
|
self.openai_client = openai_client
|
|
29
29
|
self.model_name = model_name
|
|
30
|
-
self.encoding = tiktoken.encoding_for_model(
|
|
30
|
+
self.encoding = tiktoken.encoding_for_model("gpt-4o-mini") # Approximation
|
|
31
31
|
|
|
32
32
|
async def enrich(self, data: list[str]) -> list[str]:
|
|
33
33
|
"""Enrich a list of documents."""
|
|
34
|
+
if not data or len(data) == 0:
|
|
35
|
+
self.log.warning("Data is empty, skipping enrichment")
|
|
36
|
+
return []
|
|
37
|
+
|
|
34
38
|
# Process batches in parallel with a semaphore to limit concurrent requests
|
|
35
39
|
sem = asyncio.Semaphore(OPENAI_NUM_PARALLEL_TASKS)
|
|
36
40
|
|
|
@@ -289,6 +289,10 @@ class IndexService:
|
|
|
289
289
|
|
|
290
290
|
"""
|
|
291
291
|
files = await self.repository.files_for_index(index_id)
|
|
292
|
+
if not files:
|
|
293
|
+
self.log.warning("No files to create snippets for")
|
|
294
|
+
return
|
|
295
|
+
|
|
292
296
|
for file in tqdm(files, total=len(files), leave=False):
|
|
293
297
|
# Skip unsupported file types
|
|
294
298
|
if file.mime_type in MIME_BLACKLIST:
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# ruff: noqa
|
|
2
|
+
"""add authors
|
|
3
|
+
|
|
4
|
+
Revision ID: 42e836b21102
|
|
5
|
+
Revises: c3f5137d30f5
|
|
6
|
+
Create Date: 2025-06-13 14:48:50.152940
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Sequence, Union
|
|
11
|
+
|
|
12
|
+
from alembic import op
|
|
13
|
+
import sqlalchemy as sa
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# revision identifiers, used by Alembic.
|
|
17
|
+
revision: str = '42e836b21102'
|
|
18
|
+
down_revision: Union[str, None] = 'c3f5137d30f5'
|
|
19
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
20
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def upgrade() -> None:
|
|
24
|
+
"""Upgrade schema."""
|
|
25
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
26
|
+
op.create_table('authors',
|
|
27
|
+
sa.Column('name', sa.String(length=255), nullable=False),
|
|
28
|
+
sa.Column('email', sa.String(length=255), nullable=False),
|
|
29
|
+
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
|
|
30
|
+
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
|
|
31
|
+
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
|
|
32
|
+
sa.PrimaryKeyConstraint('id')
|
|
33
|
+
)
|
|
34
|
+
op.create_index(op.f('ix_authors_email'), 'authors', ['email'], unique=True)
|
|
35
|
+
op.create_index(op.f('ix_authors_name'), 'authors', ['name'], unique=True)
|
|
36
|
+
op.create_table('author_file_mappings',
|
|
37
|
+
sa.Column('author_id', sa.Integer(), nullable=False),
|
|
38
|
+
sa.Column('file_id', sa.Integer(), nullable=False),
|
|
39
|
+
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False),
|
|
40
|
+
sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
|
|
41
|
+
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
|
|
42
|
+
sa.ForeignKeyConstraint(['author_id'], ['authors.id'], ),
|
|
43
|
+
sa.ForeignKeyConstraint(['file_id'], ['files.id'], ),
|
|
44
|
+
sa.PrimaryKeyConstraint('id')
|
|
45
|
+
)
|
|
46
|
+
op.add_column('files', sa.Column('extension', sa.String(length=255), nullable=False))
|
|
47
|
+
op.create_index(op.f('ix_files_extension'), 'files', ['extension'], unique=False)
|
|
48
|
+
op.add_column('sources', sa.Column('type', sa.Enum('UNKNOWN', 'FOLDER', 'GIT', name='sourcetype'), nullable=False))
|
|
49
|
+
op.create_index(op.f('ix_sources_type'), 'sources', ['type'], unique=False)
|
|
50
|
+
# ### end Alembic commands ###
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def downgrade() -> None:
|
|
54
|
+
"""Downgrade schema."""
|
|
55
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
56
|
+
op.drop_index(op.f('ix_sources_type'), table_name='sources')
|
|
57
|
+
op.drop_column('sources', 'type')
|
|
58
|
+
op.drop_index(op.f('ix_files_extension'), table_name='files')
|
|
59
|
+
op.drop_column('files', 'extension')
|
|
60
|
+
op.drop_table('author_file_mappings')
|
|
61
|
+
op.drop_index(op.f('ix_authors_name'), table_name='authors')
|
|
62
|
+
op.drop_index(op.f('ix_authors_email'), table_name='authors')
|
|
63
|
+
op.drop_table('authors')
|
|
64
|
+
# ### end Alembic commands ###
|
kodit/source/git.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Git utilities."""
|
|
2
|
+
|
|
3
|
+
import tempfile
|
|
4
|
+
|
|
5
|
+
import git
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def is_valid_clone_target(target: str) -> bool:
|
|
9
|
+
"""Return True if the target is clonable."""
|
|
10
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
11
|
+
try:
|
|
12
|
+
git.Repo.clone_from(target, temp_dir)
|
|
13
|
+
except git.GitCommandError:
|
|
14
|
+
return False
|
|
15
|
+
else:
|
|
16
|
+
return True
|
kodit/source/ignore.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Ignore patterns."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import git
|
|
6
|
+
import pathspec
|
|
7
|
+
|
|
8
|
+
from kodit.source.git import is_valid_clone_target
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class IgnorePatterns:
|
|
12
|
+
"""Ignore patterns."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, base_dir: Path) -> None:
|
|
15
|
+
"""Initialize the ignore patterns."""
|
|
16
|
+
if not base_dir.is_dir():
|
|
17
|
+
msg = f"Base directory is not a directory: {base_dir}"
|
|
18
|
+
raise ValueError(msg)
|
|
19
|
+
|
|
20
|
+
self.base_dir = base_dir
|
|
21
|
+
|
|
22
|
+
# Check if the base_dir is a valid git repository
|
|
23
|
+
self.git_repo = None
|
|
24
|
+
if is_valid_clone_target(str(base_dir)):
|
|
25
|
+
self.git_repo = git.Repo(base_dir)
|
|
26
|
+
|
|
27
|
+
def should_ignore(self, path: Path) -> bool:
|
|
28
|
+
"""Check if a path should be ignored."""
|
|
29
|
+
if path.is_dir():
|
|
30
|
+
return False
|
|
31
|
+
|
|
32
|
+
# Get the path relative to the base_dir
|
|
33
|
+
relative_path = path.relative_to(self.base_dir)
|
|
34
|
+
|
|
35
|
+
# If this file is _part_ of a .git directory, then it should be ignored
|
|
36
|
+
if relative_path.as_posix().startswith(".git"):
|
|
37
|
+
return True
|
|
38
|
+
|
|
39
|
+
# If it is a git repository, then we need to check if the file is ignored
|
|
40
|
+
if self.git_repo and len(self.git_repo.ignored(path)) > 0:
|
|
41
|
+
return True
|
|
42
|
+
|
|
43
|
+
# If the repo has a .noindex file
|
|
44
|
+
noindex_path = Path(self.base_dir / ".noindex")
|
|
45
|
+
if noindex_path.exists():
|
|
46
|
+
with noindex_path.open() as f:
|
|
47
|
+
patterns = [line.strip() for line in f if line.strip()]
|
|
48
|
+
if patterns:
|
|
49
|
+
spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
|
50
|
+
if spec.match_file(relative_path.as_posix()):
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
return False
|
kodit/source/source_models.py
CHANGED
|
@@ -5,7 +5,10 @@ It includes models for tracking different types of sources (git repositories and
|
|
|
5
5
|
folders) and their relationships.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
import datetime
|
|
9
|
+
from enum import Enum as EnumType
|
|
10
|
+
|
|
11
|
+
from sqlalchemy import Enum, ForeignKey, Integer, String
|
|
9
12
|
from sqlalchemy.orm import Mapped, mapped_column
|
|
10
13
|
|
|
11
14
|
from kodit.database import Base, CommonMixin
|
|
@@ -14,6 +17,14 @@ from kodit.database import Base, CommonMixin
|
|
|
14
17
|
__all__ = ["File", "Source"]
|
|
15
18
|
|
|
16
19
|
|
|
20
|
+
class SourceType(EnumType):
|
|
21
|
+
"""The type of source."""
|
|
22
|
+
|
|
23
|
+
UNKNOWN = 0
|
|
24
|
+
FOLDER = 1
|
|
25
|
+
GIT = 2
|
|
26
|
+
|
|
27
|
+
|
|
17
28
|
class Source(Base, CommonMixin):
|
|
18
29
|
"""Base model for tracking code sources.
|
|
19
30
|
|
|
@@ -32,12 +43,34 @@ class Source(Base, CommonMixin):
|
|
|
32
43
|
__tablename__ = "sources"
|
|
33
44
|
uri: Mapped[str] = mapped_column(String(1024), index=True, unique=True)
|
|
34
45
|
cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
46
|
+
type: Mapped[SourceType] = mapped_column(
|
|
47
|
+
Enum(SourceType), default=SourceType.UNKNOWN, index=True
|
|
48
|
+
)
|
|
35
49
|
|
|
36
|
-
def __init__(self, uri: str, cloned_path: str) -> None:
|
|
50
|
+
def __init__(self, uri: str, cloned_path: str, source_type: SourceType) -> None:
|
|
37
51
|
"""Initialize a new Source instance for typing purposes."""
|
|
38
52
|
super().__init__()
|
|
39
53
|
self.uri = uri
|
|
40
54
|
self.cloned_path = cloned_path
|
|
55
|
+
self.type = source_type
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class Author(Base, CommonMixin):
|
|
59
|
+
"""Author model."""
|
|
60
|
+
|
|
61
|
+
__tablename__ = "authors"
|
|
62
|
+
|
|
63
|
+
name: Mapped[str] = mapped_column(String(255), index=True, unique=True)
|
|
64
|
+
email: Mapped[str] = mapped_column(String(255), index=True, unique=True)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class AuthorFileMapping(Base, CommonMixin):
|
|
68
|
+
"""Author file mapping model."""
|
|
69
|
+
|
|
70
|
+
__tablename__ = "author_file_mappings"
|
|
71
|
+
|
|
72
|
+
author_id: Mapped[int] = mapped_column(ForeignKey("authors.id"))
|
|
73
|
+
file_id: Mapped[int] = mapped_column(ForeignKey("files.id"))
|
|
41
74
|
|
|
42
75
|
|
|
43
76
|
class File(Base, CommonMixin):
|
|
@@ -51,9 +84,12 @@ class File(Base, CommonMixin):
|
|
|
51
84
|
cloned_path: Mapped[str] = mapped_column(String(1024), index=True)
|
|
52
85
|
sha256: Mapped[str] = mapped_column(String(64), default="", index=True)
|
|
53
86
|
size_bytes: Mapped[int] = mapped_column(Integer, default=0)
|
|
87
|
+
extension: Mapped[str] = mapped_column(String(255), default="", index=True)
|
|
54
88
|
|
|
55
89
|
def __init__( # noqa: PLR0913
|
|
56
90
|
self,
|
|
91
|
+
created_at: datetime.datetime,
|
|
92
|
+
updated_at: datetime.datetime,
|
|
57
93
|
source_id: int,
|
|
58
94
|
cloned_path: str,
|
|
59
95
|
mime_type: str = "",
|
|
@@ -63,6 +99,8 @@ class File(Base, CommonMixin):
|
|
|
63
99
|
) -> None:
|
|
64
100
|
"""Initialize a new File instance for typing purposes."""
|
|
65
101
|
super().__init__()
|
|
102
|
+
self.created_at = created_at
|
|
103
|
+
self.updated_at = updated_at
|
|
66
104
|
self.source_id = source_id
|
|
67
105
|
self.cloned_path = cloned_path
|
|
68
106
|
self.mime_type = mime_type
|
|
@@ -3,7 +3,13 @@
|
|
|
3
3
|
from sqlalchemy import func, select
|
|
4
4
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
5
5
|
|
|
6
|
-
from kodit.source.source_models import
|
|
6
|
+
from kodit.source.source_models import (
|
|
7
|
+
Author,
|
|
8
|
+
AuthorFileMapping,
|
|
9
|
+
File,
|
|
10
|
+
Source,
|
|
11
|
+
SourceType,
|
|
12
|
+
)
|
|
7
13
|
|
|
8
14
|
|
|
9
15
|
class SourceRepository:
|
|
@@ -22,22 +28,12 @@ class SourceRepository:
|
|
|
22
28
|
self.session = session
|
|
23
29
|
|
|
24
30
|
async def create_source(self, source: Source) -> Source:
|
|
25
|
-
"""
|
|
31
|
+
"""Add a new source to the database."""
|
|
32
|
+
# Validate the source
|
|
33
|
+
if source.type == SourceType.UNKNOWN:
|
|
34
|
+
msg = "Source type is required"
|
|
35
|
+
raise ValueError(msg)
|
|
26
36
|
|
|
27
|
-
This method creates both a Source record and a linked FolderSource record
|
|
28
|
-
in a single transaction.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
path: The absolute path of the folder to create a source for.
|
|
32
|
-
|
|
33
|
-
Returns:
|
|
34
|
-
The created Source model instance.
|
|
35
|
-
|
|
36
|
-
Note:
|
|
37
|
-
This method commits the transaction to ensure the source.id is available
|
|
38
|
-
for creating the linked FolderSource record.
|
|
39
|
-
|
|
40
|
-
"""
|
|
41
37
|
self.session.add(source)
|
|
42
38
|
await self.session.commit()
|
|
43
39
|
return source
|
|
@@ -52,6 +48,12 @@ class SourceRepository:
|
|
|
52
48
|
await self.session.commit()
|
|
53
49
|
return file
|
|
54
50
|
|
|
51
|
+
async def list_files_for_source(self, source_id: int) -> list[File]:
|
|
52
|
+
"""List all files for a source."""
|
|
53
|
+
query = select(File).where(File.source_id == source_id)
|
|
54
|
+
result = await self.session.execute(query)
|
|
55
|
+
return list(result.scalars())
|
|
56
|
+
|
|
55
57
|
async def num_files_for_source(self, source_id: int) -> int:
|
|
56
58
|
"""Get the number of files for a source.
|
|
57
59
|
|
|
@@ -103,3 +105,36 @@ class SourceRepository:
|
|
|
103
105
|
query = select(Source).where(Source.id == source_id)
|
|
104
106
|
result = await self.session.execute(query)
|
|
105
107
|
return result.scalar_one_or_none()
|
|
108
|
+
|
|
109
|
+
async def get_or_create_author(self, name: str, email: str) -> Author:
|
|
110
|
+
"""Get or create an author by name and email.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
name: The name of the author.
|
|
114
|
+
email: The email of the author.
|
|
115
|
+
|
|
116
|
+
"""
|
|
117
|
+
query = select(Author).where(Author.name == name, Author.email == email)
|
|
118
|
+
result = await self.session.execute(query)
|
|
119
|
+
author = result.scalar_one_or_none()
|
|
120
|
+
if not author:
|
|
121
|
+
author = Author(name=name, email=email)
|
|
122
|
+
self.session.add(author)
|
|
123
|
+
await self.session.commit()
|
|
124
|
+
return author
|
|
125
|
+
|
|
126
|
+
async def get_or_create_author_file_mapping(
|
|
127
|
+
self, author_id: int, file_id: int
|
|
128
|
+
) -> AuthorFileMapping:
|
|
129
|
+
"""Create a new author file mapping record in the database."""
|
|
130
|
+
query = select(AuthorFileMapping).where(
|
|
131
|
+
AuthorFileMapping.author_id == author_id,
|
|
132
|
+
AuthorFileMapping.file_id == file_id,
|
|
133
|
+
)
|
|
134
|
+
result = await self.session.execute(query)
|
|
135
|
+
mapping = result.scalar_one_or_none()
|
|
136
|
+
if not mapping:
|
|
137
|
+
mapping = AuthorFileMapping(author_id=author_id, file_id=file_id)
|
|
138
|
+
self.session.add(mapping)
|
|
139
|
+
await self.session.commit()
|
|
140
|
+
return mapping
|
kodit/source/source_service.py
CHANGED
|
@@ -8,7 +8,8 @@ source management.
|
|
|
8
8
|
|
|
9
9
|
import mimetypes
|
|
10
10
|
import shutil
|
|
11
|
-
|
|
11
|
+
import tempfile
|
|
12
|
+
from datetime import UTC, datetime
|
|
12
13
|
from hashlib import sha256
|
|
13
14
|
from pathlib import Path
|
|
14
15
|
|
|
@@ -17,9 +18,15 @@ import git
|
|
|
17
18
|
import pydantic
|
|
18
19
|
import structlog
|
|
19
20
|
from tqdm import tqdm
|
|
20
|
-
from uritools import isuri, urisplit
|
|
21
21
|
|
|
22
|
-
from kodit.source.
|
|
22
|
+
from kodit.source.git import is_valid_clone_target
|
|
23
|
+
from kodit.source.ignore import IgnorePatterns
|
|
24
|
+
from kodit.source.source_models import (
|
|
25
|
+
Author,
|
|
26
|
+
File,
|
|
27
|
+
Source,
|
|
28
|
+
SourceType,
|
|
29
|
+
)
|
|
23
30
|
from kodit.source.source_repository import SourceRepository
|
|
24
31
|
|
|
25
32
|
|
|
@@ -83,19 +90,13 @@ class SourceService:
|
|
|
83
90
|
|
|
84
91
|
async def create(self, uri_or_path_like: str) -> SourceView:
|
|
85
92
|
"""Create a new source from a URI or path."""
|
|
93
|
+
# If it's possible to clone it, then do so
|
|
94
|
+
if is_valid_clone_target(uri_or_path_like):
|
|
95
|
+
return await self._create_git_source(uri_or_path_like)
|
|
96
|
+
|
|
97
|
+
# Otherwise just treat it as a directory
|
|
86
98
|
if Path(uri_or_path_like).is_dir():
|
|
87
99
|
return await self._create_folder_source(Path(uri_or_path_like))
|
|
88
|
-
if isuri(uri_or_path_like):
|
|
89
|
-
parsed = urisplit(uri_or_path_like)
|
|
90
|
-
if parsed.scheme == "file":
|
|
91
|
-
return await self._create_folder_source(Path(parsed.path))
|
|
92
|
-
if parsed.scheme in ("git", "http", "https") and parsed.path.endswith(
|
|
93
|
-
".git"
|
|
94
|
-
):
|
|
95
|
-
return await self._create_git_source(uri_or_path_like)
|
|
96
|
-
if not uri_or_path_like.endswith(".git"):
|
|
97
|
-
uri_or_path_like = uri_or_path_like.strip("/") + ".git"
|
|
98
|
-
return await self._create_git_source(uri_or_path_like)
|
|
99
100
|
|
|
100
101
|
msg = f"Unsupported source: {uri_or_path_like}"
|
|
101
102
|
raise ValueError(msg)
|
|
@@ -142,7 +143,11 @@ class SourceService:
|
|
|
142
143
|
)
|
|
143
144
|
|
|
144
145
|
source = await self.repository.create_source(
|
|
145
|
-
Source(
|
|
146
|
+
Source(
|
|
147
|
+
uri=directory.as_uri(),
|
|
148
|
+
cloned_path=str(clone_path),
|
|
149
|
+
source_type=SourceType.FOLDER,
|
|
150
|
+
),
|
|
146
151
|
)
|
|
147
152
|
|
|
148
153
|
# Add all files to the source
|
|
@@ -151,7 +156,7 @@ class SourceService:
|
|
|
151
156
|
|
|
152
157
|
# Process each file in the source directory
|
|
153
158
|
for path in tqdm(clone_path.rglob("*"), total=file_count, leave=False):
|
|
154
|
-
await self._process_file(source
|
|
159
|
+
await self._process_file(source, path.absolute())
|
|
155
160
|
|
|
156
161
|
return SourceView(
|
|
157
162
|
id=source.id,
|
|
@@ -171,7 +176,13 @@ class SourceService:
|
|
|
171
176
|
ValueError: If the repository cloning fails.
|
|
172
177
|
|
|
173
178
|
"""
|
|
174
|
-
|
|
179
|
+
self.log.debug("Normalising git uri", uri=uri)
|
|
180
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
181
|
+
git.Repo.clone_from(uri, temp_dir)
|
|
182
|
+
remote = git.Repo(temp_dir).remote()
|
|
183
|
+
uri = remote.url
|
|
184
|
+
|
|
185
|
+
self.log.debug("Checking if source already exists", uri=uri)
|
|
175
186
|
source = await self.repository.get_source_by_uri(uri)
|
|
176
187
|
|
|
177
188
|
if source:
|
|
@@ -191,18 +202,27 @@ class SourceService:
|
|
|
191
202
|
msg = f"Failed to clone repository: {e}"
|
|
192
203
|
raise ValueError(msg) from e
|
|
193
204
|
|
|
205
|
+
self.log.debug("Creating source", uri=uri, clone_path=str(clone_path))
|
|
194
206
|
source = await self.repository.create_source(
|
|
195
|
-
Source(
|
|
207
|
+
Source(
|
|
208
|
+
uri=uri,
|
|
209
|
+
cloned_path=str(clone_path),
|
|
210
|
+
source_type=SourceType.GIT,
|
|
211
|
+
),
|
|
196
212
|
)
|
|
197
213
|
|
|
198
|
-
#
|
|
199
|
-
|
|
200
|
-
|
|
214
|
+
# Get the ignore patterns for this source
|
|
215
|
+
ignore_patterns = IgnorePatterns(clone_path)
|
|
216
|
+
|
|
217
|
+
# Get all files that are not ignored
|
|
218
|
+
files = [
|
|
219
|
+
f for f in clone_path.rglob("*") if not ignore_patterns.should_ignore(f)
|
|
220
|
+
]
|
|
201
221
|
|
|
202
222
|
# Process each file in the source directory
|
|
203
|
-
self.log.info("Inspecting files", source_id=source.id)
|
|
204
|
-
for path in tqdm(
|
|
205
|
-
await self._process_file(source
|
|
223
|
+
self.log.info("Inspecting files", source_id=source.id, num_files=len(files))
|
|
224
|
+
for path in tqdm(files, total=len(files), leave=False):
|
|
225
|
+
await self._process_file(source, path.absolute())
|
|
206
226
|
|
|
207
227
|
return SourceView(
|
|
208
228
|
id=source.id,
|
|
@@ -214,32 +234,79 @@ class SourceService:
|
|
|
214
234
|
|
|
215
235
|
async def _process_file(
|
|
216
236
|
self,
|
|
217
|
-
|
|
218
|
-
|
|
237
|
+
source: Source,
|
|
238
|
+
cloned_file: Path,
|
|
219
239
|
) -> None:
|
|
220
240
|
"""Process a single file for indexing."""
|
|
221
|
-
if not
|
|
241
|
+
if not cloned_file.is_file():
|
|
222
242
|
return
|
|
223
243
|
|
|
224
|
-
|
|
244
|
+
# If this file exists in a git repository, pull out the file's metadata
|
|
245
|
+
authors: list[Author] = []
|
|
246
|
+
first_modified_at: datetime | None = None
|
|
247
|
+
last_modified_at: datetime | None = None
|
|
248
|
+
if source.type == SourceType.GIT:
|
|
249
|
+
# Get the git repository
|
|
250
|
+
git_repo = git.Repo(source.cloned_path)
|
|
251
|
+
|
|
252
|
+
# Get the last commit that touched this file
|
|
253
|
+
commits = list(
|
|
254
|
+
git_repo.iter_commits(
|
|
255
|
+
paths=str(cloned_file),
|
|
256
|
+
all=True,
|
|
257
|
+
)
|
|
258
|
+
)
|
|
259
|
+
if len(commits) > 0:
|
|
260
|
+
last_modified_at = commits[0].committed_datetime
|
|
261
|
+
first_modified_at = commits[-1].committed_datetime
|
|
262
|
+
|
|
263
|
+
# Get the file's blame
|
|
264
|
+
blames = git_repo.blame("HEAD", str(cloned_file))
|
|
265
|
+
|
|
266
|
+
# Extract the blame's authors
|
|
267
|
+
actors = [
|
|
268
|
+
commit.author
|
|
269
|
+
for blame in blames or []
|
|
270
|
+
for commit in blame
|
|
271
|
+
if isinstance(commit, git.Commit)
|
|
272
|
+
]
|
|
273
|
+
|
|
274
|
+
# Get or create the authors in the database
|
|
275
|
+
for actor in actors:
|
|
276
|
+
if actor.name or actor.email:
|
|
277
|
+
author = await self.repository.get_or_create_author(
|
|
278
|
+
actor.name or "", actor.email or ""
|
|
279
|
+
)
|
|
280
|
+
authors.append(author)
|
|
281
|
+
|
|
282
|
+
# Create the file record
|
|
283
|
+
async with aiofiles.open(cloned_file, "rb") as f:
|
|
225
284
|
content = await f.read()
|
|
226
|
-
mime_type = mimetypes.guess_type(
|
|
285
|
+
mime_type = mimetypes.guess_type(cloned_file)
|
|
227
286
|
sha = sha256(content).hexdigest()
|
|
228
287
|
|
|
229
288
|
# Create file record
|
|
230
289
|
file = File(
|
|
231
|
-
|
|
232
|
-
|
|
290
|
+
created_at=first_modified_at or datetime.now(UTC),
|
|
291
|
+
updated_at=last_modified_at or datetime.now(UTC),
|
|
292
|
+
source_id=source.id,
|
|
293
|
+
cloned_path=str(cloned_file),
|
|
233
294
|
mime_type=mime_type[0]
|
|
234
295
|
if mime_type and mime_type[0]
|
|
235
296
|
else "application/octet-stream",
|
|
236
|
-
uri=
|
|
297
|
+
uri=cloned_file.as_uri(),
|
|
237
298
|
sha256=sha,
|
|
238
299
|
size_bytes=len(content),
|
|
239
300
|
)
|
|
240
301
|
|
|
241
302
|
await self.repository.create_file(file)
|
|
242
303
|
|
|
304
|
+
# Create mapping of authors to the file
|
|
305
|
+
for author in authors:
|
|
306
|
+
await self.repository.get_or_create_author_file_mapping(
|
|
307
|
+
author_id=author.id, file_id=file.id
|
|
308
|
+
)
|
|
309
|
+
|
|
243
310
|
async def list_sources(self) -> list[SourceView]:
|
|
244
311
|
"""List all available sources.
|
|
245
312
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kodit
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Code indexing for better AI code generation
|
|
5
5
|
Project-URL: Homepage, https://docs.helixml.tech/kodit/
|
|
6
6
|
Project-URL: Documentation, https://docs.helixml.tech/kodit/
|
|
@@ -36,6 +36,7 @@ Requires-Dist: hf-xet>=1.1.2
|
|
|
36
36
|
Requires-Dist: httpx-retries>=0.3.2
|
|
37
37
|
Requires-Dist: httpx>=0.28.1
|
|
38
38
|
Requires-Dist: openai>=1.82.0
|
|
39
|
+
Requires-Dist: pathspec>=0.12.1
|
|
39
40
|
Requires-Dist: posthog>=4.0.1
|
|
40
41
|
Requires-Dist: pydantic-settings>=2.9.1
|
|
41
42
|
Requires-Dist: pytable-formatter>=0.1.1
|
|
@@ -93,6 +94,7 @@ code. This index is used to build a snippet library, ready for ingestion into an
|
|
|
93
94
|
- Build comprehensive snippet libraries for LLM ingestion
|
|
94
95
|
- Support for multiple codebase types and languages
|
|
95
96
|
- Efficient indexing and search capabilities
|
|
97
|
+
- Privacy first: respects .gitignore and .noindex files.
|
|
96
98
|
|
|
97
99
|
### MCP Server
|
|
98
100
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
2
|
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
-
kodit/_version.py,sha256=
|
|
3
|
+
kodit/_version.py,sha256=OjGGK5TcHVG44Y62aAqeJH4CskkZoY9ydbHOtCDew50,511
|
|
4
4
|
kodit/app.py,sha256=Mr5BFHOHx5zppwjC4XPWVvHjwgl1yrKbUjTWXKubJQM,891
|
|
5
5
|
kodit/cli.py,sha256=i7eEt0FdIQGEfXKFte-8fBcZZGE8BPXBp40aGwJDQGI,11323
|
|
6
|
-
kodit/config.py,sha256=
|
|
6
|
+
kodit/config.py,sha256=3yh7hfLSILjZK_qJMhcExwRcrWJ0b5Eb1JjjOvMPJZo,4146
|
|
7
7
|
kodit/database.py,sha256=WB1KpVxUYPgiJGU0gJa2hqytYB8wJEJ5z3WayhWzNMU,2403
|
|
8
8
|
kodit/log.py,sha256=HU1OmuxO4FcVw61k4WW7Y4WM7BrDaeplw1PcBHhuIZY,5434
|
|
9
9
|
kodit/mcp.py,sha256=QruyPskWB0_x59pkfj5BBeXuR13GMny5TAZEa2j4U9s,5752
|
|
@@ -11,36 +11,37 @@ kodit/middleware.py,sha256=I6FOkqG9-8RH5kR1-0ZoQWfE4qLCB8lZYv8H_OCH29o,2714
|
|
|
11
11
|
kodit/bm25/__init__.py,sha256=j8zyriNWhbwE5Lbybzg1hQAhANlU9mKHWw4beeUR6og,19
|
|
12
12
|
kodit/bm25/keyword_search_factory.py,sha256=rp-wx3DJsc2KlELK1V337EyeYvmwnMQwUqOo1WVPSmg,631
|
|
13
13
|
kodit/bm25/keyword_search_service.py,sha256=aBbWQKgQmi2re3EIHdXFS00n7Wj3b2D0pZsLZ4qmHfE,754
|
|
14
|
-
kodit/bm25/local_bm25.py,sha256=
|
|
15
|
-
kodit/bm25/vectorchord_bm25.py,sha256=
|
|
14
|
+
kodit/bm25/local_bm25.py,sha256=nokrd_xAeqXi3m68X5P1R5KBhRRB1E2L_J6Zgm26PCg,3869
|
|
15
|
+
kodit/bm25/vectorchord_bm25.py,sha256=0p_FgliaoevB8GLSmzWnV3zUjdcWgCgOKIpLURr7Qfo,6549
|
|
16
16
|
kodit/embedding/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
|
|
17
|
-
kodit/embedding/embedding_factory.py,sha256=
|
|
17
|
+
kodit/embedding/embedding_factory.py,sha256=UbrTl3NEqBBH3ecvRG7vGW5wuvUMbWJEWbAAFALOGqs,2141
|
|
18
18
|
kodit/embedding/embedding_models.py,sha256=rN90vSs86dYiqoawcp8E9jtwY31JoJXYfaDlsJK7uqc,656
|
|
19
19
|
kodit/embedding/embedding_repository.py,sha256=-ux3scpBzel8c0pMH9fNOEsSXFIzl-IfgaWrkTb1szo,6907
|
|
20
|
-
kodit/embedding/local_vector_search_service.py,sha256=
|
|
20
|
+
kodit/embedding/local_vector_search_service.py,sha256=dgMi8hQNUbYEgHnEYmLIpon4yLduoNUpu7k7VP6sOHI,2042
|
|
21
21
|
kodit/embedding/vector_search_service.py,sha256=pQJ129QjGrAWOXzqkywmgtDRpy8_gtzYgkivyqF9Vrs,1009
|
|
22
|
-
kodit/embedding/vectorchord_vector_search_service.py,sha256=
|
|
22
|
+
kodit/embedding/vectorchord_vector_search_service.py,sha256=TKNR3HgWHwwWtJ1SsvSaj_BXLJ_uw6Bdr_tpaePMeAA,5383
|
|
23
23
|
kodit/embedding/embedding_provider/__init__.py,sha256=h9NXzDA1r-K23nvBajBV-RJzHJN0p3UJ7UQsmdnOoRw,24
|
|
24
24
|
kodit/embedding/embedding_provider/embedding_provider.py,sha256=IC7fZaZ_ze-DxpxKfK44pRDwHWUQhVIqVKKQ3alO5Qc,1882
|
|
25
25
|
kodit/embedding/embedding_provider/hash_embedding_provider.py,sha256=nAhlhh8j8PqqCCbhVl26Y8ntFBm2vJBCtB4X04g5Wwg,2638
|
|
26
26
|
kodit/embedding/embedding_provider/local_embedding_provider.py,sha256=WP8lw6XG7v1_5Mw4_rhIOETooYRsxhkwmFaXCqCouQU,1977
|
|
27
|
-
kodit/embedding/embedding_provider/openai_embedding_provider.py,sha256
|
|
27
|
+
kodit/embedding/embedding_provider/openai_embedding_provider.py,sha256=-phz5FKYM_tI3Q4_3SPzjzIOK3k92Uk52TAOTmoVoWI,2722
|
|
28
28
|
kodit/enrichment/__init__.py,sha256=vBEolHpKaHUhfINX0dSGyAPlvgpLNAer9YzFtdvCB24,18
|
|
29
|
-
kodit/enrichment/enrichment_factory.py,sha256=
|
|
29
|
+
kodit/enrichment/enrichment_factory.py,sha256=JbWFNciB6Yf79SFVjG9UhLgCcrXZ1rIJrenU8QmNLBE,1411
|
|
30
30
|
kodit/enrichment/enrichment_service.py,sha256=87Sd3gGbEMJYb_wVrHG8L1yGIZmQNR7foUS4_y94azI,977
|
|
31
31
|
kodit/enrichment/enrichment_provider/__init__.py,sha256=klf8iuLVWX4iRz-DZQauFFNAoJC5CByczh48TBZPW-o,27
|
|
32
32
|
kodit/enrichment/enrichment_provider/enrichment_provider.py,sha256=E0H5rq3OENM0yYbA8K_3nSnj5lUHCpoIOqpWLo-2MVU,413
|
|
33
|
-
kodit/enrichment/enrichment_provider/local_enrichment_provider.py,sha256=
|
|
34
|
-
kodit/enrichment/enrichment_provider/openai_enrichment_provider.py,sha256=
|
|
33
|
+
kodit/enrichment/enrichment_provider/local_enrichment_provider.py,sha256=RqwUD0BnwRQ8zlkFNkaKq8d58r33k2jIdnSdf6zla1w,3325
|
|
34
|
+
kodit/enrichment/enrichment_provider/openai_enrichment_provider.py,sha256=0Yw7h9RXptoI4bKuqJSKIRQXPUUhNV7eACavgoy_T8s,2874
|
|
35
35
|
kodit/indexing/__init__.py,sha256=cPyi2Iej3G1JFWlWr7X80_UrsMaTu5W5rBwgif1B3xo,75
|
|
36
36
|
kodit/indexing/fusion.py,sha256=TZb4fPAedXdEUXzwzOofW98QIOymdbclBOP1KOijuEk,1674
|
|
37
37
|
kodit/indexing/indexing_models.py,sha256=6NX9HVcj6Pu9ePwHC7n-PWSyAgukpJq0nCNmUIigtbo,1282
|
|
38
38
|
kodit/indexing/indexing_repository.py,sha256=dqOS0pxKM6bUjMXWqYukAK8XdiD36OnskFASgZRXRQM,6955
|
|
39
|
-
kodit/indexing/indexing_service.py,sha256=
|
|
39
|
+
kodit/indexing/indexing_service.py,sha256=79BZ4yaSJqADkivzjsq1bDCBtbfWikVRC7Fjlp1HmZw,10885
|
|
40
40
|
kodit/migrations/README,sha256=ISVtAOvqvKk_5ThM5ioJE-lMkvf9IbknFUFVU_vPma4,58
|
|
41
41
|
kodit/migrations/__init__.py,sha256=lP5MuwlyWRMO6UcDWnQcQ3G-GYHcFb6rl9gYPHJ1sjo,40
|
|
42
42
|
kodit/migrations/env.py,sha256=w1M7OZh-ZeR2dPHS0ByXAUxQjfZQ8xIzMseWuzLDTWw,2469
|
|
43
43
|
kodit/migrations/script.py.mako,sha256=zWziKtiwYKEWuwPV_HBNHwa9LCT45_bi01-uSNFaOOE,703
|
|
44
|
+
kodit/migrations/versions/42e836b21102_add_authors.py,sha256=KmXlHb_y8bIa_ABNU67zZi13r0DAfHA9G8tjQNkdITM,2638
|
|
44
45
|
kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py,sha256=-61qol9PfQKILCDQRA5jEaats9aGZs9Wdtp-j-38SF4,1644
|
|
45
46
|
kodit/migrations/versions/85155663351e_initial.py,sha256=Cg7zlF871o9ShV5rQMQ1v7hRV7fI59veDY9cjtTrs-8,3306
|
|
46
47
|
kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD4MiqRIdo,36
|
|
@@ -55,13 +56,15 @@ kodit/snippets/languages/javascript.scm,sha256=Ini5TsVNmcBKQ8aL46a5Id9ut0g9Udmvm
|
|
|
55
56
|
kodit/snippets/languages/python.scm,sha256=ee85R9PBzwye3IMTE7-iVoKWd_ViU3EJISTyrFGrVeo,429
|
|
56
57
|
kodit/snippets/languages/typescript.scm,sha256=U-ujbbv4tylbUBj9wuhL-e5cW6hmgPCNs4xrIX3r_hE,448
|
|
57
58
|
kodit/source/__init__.py,sha256=1NTZyPdjThVQpZO1Mp1ColVsS7sqYanOVLqnoqV9Ipo,83
|
|
58
|
-
kodit/source/
|
|
59
|
-
kodit/source/
|
|
60
|
-
kodit/source/
|
|
59
|
+
kodit/source/git.py,sha256=CpNczc06SbxpzfQKq76lZFzuol10ZJvTRSzeXW9DFUs,363
|
|
60
|
+
kodit/source/ignore.py,sha256=W7cuIrYlgfu3S1qyoIepXe8PqYmtFv61Tt5RO8cbZbg,1701
|
|
61
|
+
kodit/source/source_models.py,sha256=lCaaoukLlMHuRWJBuYM2nkNKGtFASgbk7ZXq8kp4H5c,3519
|
|
62
|
+
kodit/source/source_repository.py,sha256=4L-W0uE4LOB9LQlefk5f2sgHlsJjj8t33USPxU0na40,4448
|
|
63
|
+
kodit/source/source_service.py,sha256=v-lY-7tsNFCyXo9yCUo7Q00NOWYKGiDB_M2-Hr8hp3U,11391
|
|
61
64
|
kodit/util/__init__.py,sha256=bPu6CtqDWCRGU7VgW2_aiQrCBi8G89FS6k1PjvDajJ0,37
|
|
62
65
|
kodit/util/spinner.py,sha256=R9bzrHtBiIH6IfLbmsIVHL53s8vg-tqW4lwGGALu4dw,1932
|
|
63
|
-
kodit-0.2.
|
|
64
|
-
kodit-0.2.
|
|
65
|
-
kodit-0.2.
|
|
66
|
-
kodit-0.2.
|
|
67
|
-
kodit-0.2.
|
|
66
|
+
kodit-0.2.2.dist-info/METADATA,sha256=UU1curOx-XMql_IiXty-eoz-MJrd5QdlzfCj7ZoSzhg,5857
|
|
67
|
+
kodit-0.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
68
|
+
kodit-0.2.2.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
69
|
+
kodit-0.2.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
70
|
+
kodit-0.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|