kodit 0.1.4__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- {kodit-0.1.4 → kodit-0.1.5}/.github/workflows/pypi.yaml +0 -1
- {kodit-0.1.4 → kodit-0.1.5}/.github/workflows/test.yaml +3 -0
- kodit-0.1.5/.vscode/launch.json +15 -0
- {kodit-0.1.4 → kodit-0.1.5}/.vscode/settings.json +1 -1
- {kodit-0.1.4 → kodit-0.1.5}/PKG-INFO +6 -2
- {kodit-0.1.4 → kodit-0.1.5}/docs/_index.md +2 -1
- {kodit-0.1.4 → kodit-0.1.5}/pyproject.toml +6 -2
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/_version.py +2 -2
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/alembic/env.py +0 -2
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/app.py +8 -8
- kodit-0.1.5/src/kodit/bm25/__init__.py +1 -0
- kodit-0.1.5/src/kodit/bm25/bm25.py +71 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/cli.py +87 -33
- kodit-0.1.5/src/kodit/config.py +89 -0
- kodit-0.1.5/src/kodit/database.py +72 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/indexing/repository.py +11 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/indexing/service.py +26 -16
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/logging.py +20 -18
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/mcp.py +16 -4
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/retreival/repository.py +32 -0
- kodit-0.1.5/src/kodit/retreival/service.py +68 -0
- kodit-0.1.5/src/kodit/snippets/__init__.py +1 -0
- kodit-0.1.5/src/kodit/snippets/languages/__init__.py +53 -0
- kodit-0.1.5/src/kodit/snippets/languages/csharp.scm +12 -0
- kodit-0.1.5/src/kodit/snippets/languages/python.scm +22 -0
- kodit-0.1.5/src/kodit/snippets/method_snippets.py +120 -0
- kodit-0.1.5/src/kodit/snippets/snippets.py +48 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/sources/service.py +3 -5
- kodit-0.1.5/tests/kodit/cli_test.py +51 -0
- kodit-0.1.5/tests/kodit/e2e.py +145 -0
- {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/indexing/test_service.py +8 -5
- kodit-0.1.5/tests/kodit/mcp_test.py +27 -0
- {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/retreival/test_service.py +25 -5
- kodit-0.1.5/tests/kodit/snippets/__init__.py +0 -0
- kodit-0.1.5/tests/kodit/snippets/csharp.cs +44 -0
- kodit-0.1.5/tests/kodit/snippets/detect_language_test.py +87 -0
- kodit-0.1.5/tests/kodit/snippets/method_extraction_test.py +108 -0
- kodit-0.1.5/tests/kodit/snippets/python.py +24 -0
- {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/sources/test_service.py +2 -2
- kodit-0.1.5/tests/smoke.sh +36 -0
- {kodit-0.1.4 → kodit-0.1.5}/uv.lock +428 -92
- kodit-0.1.4/src/kodit/config.py +0 -5
- kodit-0.1.4/src/kodit/database.py +0 -91
- kodit-0.1.4/src/kodit/retreival/service.py +0 -30
- kodit-0.1.4/src/kodit/sse.py +0 -61
- kodit-0.1.4/tests/kodit/cli_test.py +0 -19
- kodit-0.1.4/tests/kodit/mcp_test.py +0 -109
- kodit-0.1.4/tests/smoke.sh +0 -20
- {kodit-0.1.4 → kodit-0.1.5}/.cursor/rules/kodit.mdc +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/.github/CODE_OF_CONDUCT.md +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/.github/CONTRIBUTING.md +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/.github/workflows/docker.yaml +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/.github/workflows/docs.yaml +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/.github/workflows/pypi-test.yaml +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/.gitignore +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/.python-version +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/Dockerfile +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/LICENSE +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/README.md +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/alembic.ini +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/docs/developer/index.md +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/.gitignore +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/__init__.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/alembic/README +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/alembic/__init__.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/alembic/script.py.mako +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/alembic/versions/85155663351e_initial.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/alembic/versions/__init__.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/indexing/__init__.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/indexing/models.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/middleware.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/retreival/__init__.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/sources/__init__.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/sources/models.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/src/kodit/sources/repository.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/tests/__init__.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/tests/conftest.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/__init__.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/indexing/__init__.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/retreival/__init__.py +0 -0
- {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/sources/__init__.py +0 -0
|
@@ -14,6 +14,7 @@ permissions:
|
|
|
14
14
|
jobs:
|
|
15
15
|
test:
|
|
16
16
|
runs-on: ubuntu-latest
|
|
17
|
+
timeout-minutes: 10
|
|
17
18
|
steps:
|
|
18
19
|
- name: Checkout code
|
|
19
20
|
uses: actions/checkout@v4
|
|
@@ -44,6 +45,7 @@ jobs:
|
|
|
44
45
|
|
|
45
46
|
build-package:
|
|
46
47
|
runs-on: ubuntu-latest
|
|
48
|
+
timeout-minutes: 10
|
|
47
49
|
steps:
|
|
48
50
|
- name: Checkout code
|
|
49
51
|
uses: actions/checkout@v4
|
|
@@ -67,6 +69,7 @@ jobs:
|
|
|
67
69
|
test-package:
|
|
68
70
|
needs: build-package
|
|
69
71
|
runs-on: ubuntu-latest
|
|
72
|
+
timeout-minutes: 10
|
|
70
73
|
steps:
|
|
71
74
|
- uses: actions/checkout@v4
|
|
72
75
|
with:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kodit
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Code indexing for better AI code generation
|
|
5
5
|
Project-URL: Homepage, https://docs.helixml.tech/kodit/
|
|
6
6
|
Project-URL: Documentation, https://docs.helixml.tech/kodit/
|
|
@@ -22,18 +22,22 @@ Requires-Dist: aiosqlite>=0.20.0
|
|
|
22
22
|
Requires-Dist: alembic>=1.15.2
|
|
23
23
|
Requires-Dist: asgi-correlation-id>=4.3.4
|
|
24
24
|
Requires-Dist: better-exceptions>=0.3.3
|
|
25
|
+
Requires-Dist: bm25s[core]>=0.2.12
|
|
25
26
|
Requires-Dist: click>=8.1.8
|
|
26
27
|
Requires-Dist: colorama>=0.4.6
|
|
27
28
|
Requires-Dist: dotenv>=0.9.9
|
|
28
29
|
Requires-Dist: fastapi[standard]>=0.115.12
|
|
30
|
+
Requires-Dist: fastmcp>=2.3.3
|
|
29
31
|
Requires-Dist: httpx-retries>=0.3.2
|
|
30
32
|
Requires-Dist: httpx>=0.28.1
|
|
31
|
-
Requires-Dist: mcp>=1.6.0
|
|
32
33
|
Requires-Dist: posthog>=4.0.1
|
|
34
|
+
Requires-Dist: pydantic-settings>=2.9.1
|
|
33
35
|
Requires-Dist: pytable-formatter>=0.1.1
|
|
34
36
|
Requires-Dist: sqlalchemy[asyncio]>=2.0.40
|
|
35
37
|
Requires-Dist: structlog>=25.3.0
|
|
36
38
|
Requires-Dist: tdqm>=0.0.1
|
|
39
|
+
Requires-Dist: tree-sitter-language-pack>=0.7.3
|
|
40
|
+
Requires-Dist: tree-sitter>=0.24.0
|
|
37
41
|
Requires-Dist: uritools>=5.0.0
|
|
38
42
|
Description-Content-Type: text/markdown
|
|
39
43
|
|
|
@@ -87,6 +87,7 @@ description:
|
|
|
87
87
|
globs:
|
|
88
88
|
alwaysApply: true
|
|
89
89
|
---
|
|
90
|
-
You MUST use the code-search MCP tool and always include any file context the user has provided in the chat
|
|
90
|
+
You MUST use the code-search MCP tool and always include any file context the user has provided in the chat.
|
|
91
|
+
```
|
|
91
92
|
|
|
92
93
|
Alternatively, you can browse to the cursor settings and set this prompt globally.
|
|
@@ -29,7 +29,6 @@ dependencies = [
|
|
|
29
29
|
"fastapi[standard]>=0.115.12",
|
|
30
30
|
"httpx-retries>=0.3.2",
|
|
31
31
|
"httpx>=0.28.1",
|
|
32
|
-
"mcp>=1.6.0",
|
|
33
32
|
"structlog>=25.3.0",
|
|
34
33
|
"posthog>=4.0.1",
|
|
35
34
|
"sqlalchemy[asyncio]>=2.0.40",
|
|
@@ -39,6 +38,11 @@ dependencies = [
|
|
|
39
38
|
"aiofiles>=24.1.0",
|
|
40
39
|
"tdqm>=0.0.1",
|
|
41
40
|
"uritools>=5.0.0",
|
|
41
|
+
"tree-sitter-language-pack>=0.7.3",
|
|
42
|
+
"tree-sitter>=0.24.0",
|
|
43
|
+
"fastmcp>=2.3.3",
|
|
44
|
+
"pydantic-settings>=2.9.1",
|
|
45
|
+
"bm25s[core]>=0.2.12",
|
|
42
46
|
]
|
|
43
47
|
|
|
44
48
|
[dependency-groups]
|
|
@@ -104,7 +108,7 @@ ignore = [
|
|
|
104
108
|
"PGH004", # If I've disabled all, I mean disable all
|
|
105
109
|
]
|
|
106
110
|
select = ["ALL"]
|
|
107
|
-
exclude = []
|
|
111
|
+
exclude = ["./tests/*"]
|
|
108
112
|
|
|
109
113
|
[[tool.uv.index]]
|
|
110
114
|
name = "pypi"
|
|
@@ -66,8 +66,6 @@ async def run_async_migrations() -> None:
|
|
|
66
66
|
prefix="sqlalchemy.",
|
|
67
67
|
poolclass=pool.NullPool,
|
|
68
68
|
)
|
|
69
|
-
log = structlog.get_logger(__name__)
|
|
70
|
-
log.debug("Running migrations on %s", connectable.url)
|
|
71
69
|
|
|
72
70
|
async with connectable.connect() as connection:
|
|
73
71
|
await connection.run_sync(do_run_migrations)
|
|
@@ -5,14 +5,10 @@ from fastapi import FastAPI
|
|
|
5
5
|
|
|
6
6
|
from kodit.mcp import mcp
|
|
7
7
|
from kodit.middleware import logging_middleware
|
|
8
|
-
from kodit.sse import create_sse_server
|
|
9
8
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
sse_app = create_sse_server(mcp)
|
|
14
|
-
for route in sse_app.routes:
|
|
15
|
-
app.router.routes.append(route)
|
|
9
|
+
# See https://gofastmcp.com/deployment/asgi#fastapi-integration
|
|
10
|
+
mcp_app = mcp.sse_app()
|
|
11
|
+
app = FastAPI(title="kodit API", lifespan=mcp_app.router.lifespan_context)
|
|
16
12
|
|
|
17
13
|
# Add middleware
|
|
18
14
|
app.middleware("http")(logging_middleware)
|
|
@@ -22,4 +18,8 @@ app.add_middleware(CorrelationIdMiddleware)
|
|
|
22
18
|
@app.get("/")
|
|
23
19
|
async def root() -> dict[str, str]:
|
|
24
20
|
"""Return a welcome message for the kodit API."""
|
|
25
|
-
return {"message": "
|
|
21
|
+
return {"message": "Hello, World!"}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Add mcp routes last, otherwise previous routes aren't added
|
|
25
|
+
app.mount("", mcp_app)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""BM25 module."""
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""BM25 service."""
|
|
2
|
+
|
|
3
|
+
import bm25s
|
|
4
|
+
import Stemmer
|
|
5
|
+
import structlog
|
|
6
|
+
from bm25s.tokenization import Tokenized
|
|
7
|
+
|
|
8
|
+
from kodit.config import Config
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BM25Service:
|
|
12
|
+
"""Service for BM25."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, config: Config) -> None:
|
|
15
|
+
"""Initialize the BM25 service."""
|
|
16
|
+
self.log = structlog.get_logger(__name__)
|
|
17
|
+
self.index_path = config.get_data_dir() / "bm25s_index"
|
|
18
|
+
try:
|
|
19
|
+
self.log.debug("Loading BM25 index")
|
|
20
|
+
self.retriever = bm25s.BM25.load(self.index_path, mmap=True)
|
|
21
|
+
except FileNotFoundError:
|
|
22
|
+
self.log.debug("BM25 index not found, creating new index")
|
|
23
|
+
self.retriever = bm25s.BM25()
|
|
24
|
+
|
|
25
|
+
self.stemmer = Stemmer.Stemmer("english")
|
|
26
|
+
|
|
27
|
+
def _tokenize(self, corpus: list[str]) -> list[list[str]] | Tokenized:
|
|
28
|
+
return bm25s.tokenize(
|
|
29
|
+
corpus,
|
|
30
|
+
stopwords="en",
|
|
31
|
+
stemmer=self.stemmer,
|
|
32
|
+
return_ids=False,
|
|
33
|
+
show_progress=True,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def index(self, corpus: list[str]) -> None:
|
|
37
|
+
"""Index a new corpus."""
|
|
38
|
+
self.log.debug("Indexing corpus")
|
|
39
|
+
vocab = self._tokenize(corpus)
|
|
40
|
+
self.retriever = bm25s.BM25()
|
|
41
|
+
self.retriever.index(vocab)
|
|
42
|
+
self.retriever.save(self.index_path)
|
|
43
|
+
|
|
44
|
+
def retrieve(
|
|
45
|
+
self, doc_ids: list[int], query: str, top_k: int = 2
|
|
46
|
+
) -> list[tuple[int, float]]:
|
|
47
|
+
"""Retrieve from the index."""
|
|
48
|
+
if top_k == 0:
|
|
49
|
+
self.log.warning("Top k is 0, returning empty list")
|
|
50
|
+
return []
|
|
51
|
+
if len(doc_ids) == 0:
|
|
52
|
+
self.log.warning("No documents to retrieve from, returning empty list")
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
top_k = min(top_k, len(doc_ids))
|
|
56
|
+
self.log.debug(
|
|
57
|
+
"Retrieving from index", query=query, top_k=top_k, num_docs=len(doc_ids)
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
query_tokens = self._tokenize([query])
|
|
61
|
+
|
|
62
|
+
self.log.debug("Query tokens", query_tokens=query_tokens)
|
|
63
|
+
|
|
64
|
+
results, scores = self.retriever.retrieve(
|
|
65
|
+
query_tokens=query_tokens, corpus=doc_ids, k=top_k
|
|
66
|
+
)
|
|
67
|
+
self.log.debug("Raw results", results=results, scores=scores)
|
|
68
|
+
return [
|
|
69
|
+
(int(result), float(score))
|
|
70
|
+
for result, score in zip(results[0], scores[0], strict=False)
|
|
71
|
+
]
|
|
@@ -1,41 +1,74 @@
|
|
|
1
1
|
"""Command line interface for kodit."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import signal
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
4
7
|
|
|
5
8
|
import click
|
|
6
9
|
import structlog
|
|
7
10
|
import uvicorn
|
|
8
|
-
from dotenv import dotenv_values
|
|
9
11
|
from pytable_formatter import Table
|
|
10
12
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
11
13
|
|
|
12
|
-
from kodit.
|
|
14
|
+
from kodit.config import (
|
|
15
|
+
DEFAULT_BASE_DIR,
|
|
16
|
+
DEFAULT_DB_URL,
|
|
17
|
+
DEFAULT_DISABLE_TELEMETRY,
|
|
18
|
+
DEFAULT_LOG_FORMAT,
|
|
19
|
+
DEFAULT_LOG_LEVEL,
|
|
20
|
+
get_config,
|
|
21
|
+
reset_config,
|
|
22
|
+
with_session,
|
|
23
|
+
)
|
|
13
24
|
from kodit.indexing.repository import IndexRepository
|
|
14
25
|
from kodit.indexing.service import IndexService
|
|
15
|
-
from kodit.logging import
|
|
26
|
+
from kodit.logging import configure_logging, configure_telemetry, log_event
|
|
16
27
|
from kodit.retreival.repository import RetrievalRepository
|
|
17
28
|
from kodit.retreival.service import RetrievalRequest, RetrievalService
|
|
18
29
|
from kodit.sources.repository import SourceRepository
|
|
19
30
|
from kodit.sources.service import SourceService
|
|
20
31
|
|
|
21
|
-
env_vars = dict(dotenv_values())
|
|
22
|
-
os.environ.update(env_vars)
|
|
23
32
|
|
|
24
|
-
|
|
25
|
-
@click.
|
|
26
|
-
@click.option("--log-
|
|
27
|
-
@click.option(
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
@click.group(context_settings={"max_content_width": 100})
|
|
34
|
+
@click.option("--log-level", help=f"Log level [default: {DEFAULT_LOG_LEVEL}]")
|
|
35
|
+
@click.option("--log-format", help=f"Log format [default: {DEFAULT_LOG_FORMAT}]")
|
|
36
|
+
@click.option(
|
|
37
|
+
"--disable-telemetry",
|
|
38
|
+
is_flag=True,
|
|
39
|
+
help=f"Disable telemetry [default: {DEFAULT_DISABLE_TELEMETRY}]",
|
|
40
|
+
)
|
|
41
|
+
@click.option("--db-url", help=f"Database URL [default: {DEFAULT_DB_URL}]")
|
|
42
|
+
@click.option("--data-dir", help=f"Data directory [default: {DEFAULT_BASE_DIR}]")
|
|
43
|
+
@click.option("--env-file", help="Path to a .env file [default: .env]")
|
|
44
|
+
def cli( # noqa: PLR0913
|
|
45
|
+
log_level: str | None,
|
|
46
|
+
log_format: str | None,
|
|
47
|
+
disable_telemetry: bool | None,
|
|
48
|
+
db_url: str | None,
|
|
49
|
+
data_dir: str | None,
|
|
50
|
+
env_file: str | None,
|
|
33
51
|
) -> None:
|
|
34
52
|
"""kodit CLI - Code indexing for better AI code generation.""" # noqa: D403
|
|
35
|
-
|
|
53
|
+
# First check if env-file is set and reload config if it is
|
|
54
|
+
if env_file:
|
|
55
|
+
reset_config()
|
|
56
|
+
get_config(env_file)
|
|
57
|
+
|
|
58
|
+
# Override global config with cli args, if set
|
|
59
|
+
config = get_config()
|
|
60
|
+
if data_dir:
|
|
61
|
+
config.data_dir = Path(data_dir)
|
|
62
|
+
if db_url:
|
|
63
|
+
config.db_url = db_url
|
|
64
|
+
if log_level:
|
|
65
|
+
config.log_level = log_level
|
|
66
|
+
if log_format:
|
|
67
|
+
config.log_format = log_format
|
|
36
68
|
if disable_telemetry:
|
|
37
|
-
|
|
38
|
-
|
|
69
|
+
config.disable_telemetry = disable_telemetry
|
|
70
|
+
configure_logging(config)
|
|
71
|
+
configure_telemetry(config)
|
|
39
72
|
|
|
40
73
|
|
|
41
74
|
@cli.group()
|
|
@@ -48,7 +81,7 @@ def sources() -> None:
|
|
|
48
81
|
async def list_sources(session: AsyncSession) -> None:
|
|
49
82
|
"""List all code sources."""
|
|
50
83
|
repository = SourceRepository(session)
|
|
51
|
-
service = SourceService(repository)
|
|
84
|
+
service = SourceService(get_config().get_clone_dir(), repository)
|
|
52
85
|
sources = await service.list_sources()
|
|
53
86
|
|
|
54
87
|
# Define headers and data
|
|
@@ -66,7 +99,7 @@ async def list_sources(session: AsyncSession) -> None:
|
|
|
66
99
|
async def create_source(session: AsyncSession, uri: str) -> None:
|
|
67
100
|
"""Add a new code source."""
|
|
68
101
|
repository = SourceRepository(session)
|
|
69
|
-
service = SourceService(repository)
|
|
102
|
+
service = SourceService(get_config().get_clone_dir(), repository)
|
|
70
103
|
source = await service.create(uri)
|
|
71
104
|
click.echo(f"Source created: {source.id}")
|
|
72
105
|
|
|
@@ -82,9 +115,9 @@ def indexes() -> None:
|
|
|
82
115
|
async def create_index(session: AsyncSession, source_id: int) -> None:
|
|
83
116
|
"""Create an index for a source."""
|
|
84
117
|
source_repository = SourceRepository(session)
|
|
85
|
-
source_service = SourceService(source_repository)
|
|
118
|
+
source_service = SourceService(get_config().get_clone_dir(), source_repository)
|
|
86
119
|
repository = IndexRepository(session)
|
|
87
|
-
service = IndexService(repository, source_service)
|
|
120
|
+
service = IndexService(get_config(), repository, source_service)
|
|
88
121
|
index = await service.create(source_id)
|
|
89
122
|
click.echo(f"Index created: {index.id}")
|
|
90
123
|
|
|
@@ -94,9 +127,9 @@ async def create_index(session: AsyncSession, source_id: int) -> None:
|
|
|
94
127
|
async def list_indexes(session: AsyncSession) -> None:
|
|
95
128
|
"""List all indexes."""
|
|
96
129
|
source_repository = SourceRepository(session)
|
|
97
|
-
source_service = SourceService(source_repository)
|
|
130
|
+
source_service = SourceService(get_config().get_clone_dir(), source_repository)
|
|
98
131
|
repository = IndexRepository(session)
|
|
99
|
-
service = IndexService(repository, source_service)
|
|
132
|
+
service = IndexService(get_config(), repository, source_service)
|
|
100
133
|
indexes = await service.list_indexes()
|
|
101
134
|
|
|
102
135
|
# Define headers and data
|
|
@@ -127,48 +160,69 @@ async def list_indexes(session: AsyncSession) -> None:
|
|
|
127
160
|
async def run_index(session: AsyncSession, index_id: int) -> None:
|
|
128
161
|
"""Run an index."""
|
|
129
162
|
source_repository = SourceRepository(session)
|
|
130
|
-
source_service = SourceService(source_repository)
|
|
163
|
+
source_service = SourceService(get_config().get_clone_dir(), source_repository)
|
|
131
164
|
repository = IndexRepository(session)
|
|
132
|
-
service = IndexService(repository, source_service)
|
|
165
|
+
service = IndexService(get_config(), repository, source_service)
|
|
133
166
|
await service.run(index_id)
|
|
134
167
|
|
|
135
168
|
|
|
136
169
|
@cli.command()
|
|
137
170
|
@click.argument("query")
|
|
171
|
+
@click.option("--top-k", default=10, help="Number of snippets to retrieve")
|
|
138
172
|
@with_session
|
|
139
|
-
async def retrieve(session: AsyncSession, query: str) -> None:
|
|
173
|
+
async def retrieve(session: AsyncSession, query: str, top_k: int) -> None:
|
|
140
174
|
"""Retrieve snippets from the database."""
|
|
141
175
|
repository = RetrievalRepository(session)
|
|
142
|
-
service = RetrievalService(repository)
|
|
143
|
-
|
|
176
|
+
service = RetrievalService(get_config(), repository)
|
|
177
|
+
# Temporary request while we don't have all search capabilities
|
|
178
|
+
snippets = await service.retrieve(
|
|
179
|
+
RetrievalRequest(keywords=query.split(","), top_k=top_k)
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if len(snippets) == 0:
|
|
183
|
+
click.echo("No snippets found")
|
|
184
|
+
return
|
|
144
185
|
|
|
145
186
|
for snippet in snippets:
|
|
187
|
+
click.echo("-" * 80)
|
|
146
188
|
click.echo(f"{snippet.uri}")
|
|
147
189
|
click.echo(snippet.content)
|
|
190
|
+
click.echo("-" * 80)
|
|
148
191
|
click.echo()
|
|
149
192
|
|
|
150
193
|
|
|
151
194
|
@cli.command()
|
|
152
195
|
@click.option("--host", default="127.0.0.1", help="Host to bind the server to")
|
|
153
196
|
@click.option("--port", default=8080, help="Port to bind the server to")
|
|
154
|
-
@click.option("--reload", is_flag=True, help="Enable auto-reload for development")
|
|
155
197
|
def serve(
|
|
156
198
|
host: str,
|
|
157
199
|
port: int,
|
|
158
|
-
reload: bool, # noqa: FBT001
|
|
159
200
|
) -> None:
|
|
160
201
|
"""Start the kodit server, which hosts the MCP server and the kodit API."""
|
|
161
202
|
log = structlog.get_logger(__name__)
|
|
162
|
-
log.info("Starting kodit server", host=host, port=port
|
|
203
|
+
log.info("Starting kodit server", host=host, port=port)
|
|
163
204
|
log_event("kodit_server_started")
|
|
164
|
-
|
|
205
|
+
os.environ["HELLO"] = "WORLD"
|
|
206
|
+
|
|
207
|
+
# Configure uvicorn with graceful shutdown
|
|
208
|
+
config = uvicorn.Config(
|
|
165
209
|
"kodit.app:app",
|
|
166
210
|
host=host,
|
|
167
211
|
port=port,
|
|
168
|
-
reload=
|
|
212
|
+
reload=False,
|
|
169
213
|
log_config=None, # Setting to None forces uvicorn to use our structlog setup
|
|
170
214
|
access_log=False, # Using own middleware for access logging
|
|
215
|
+
timeout_graceful_shutdown=0, # The mcp server does not shutdown cleanly, force
|
|
171
216
|
)
|
|
217
|
+
server = uvicorn.Server(config)
|
|
218
|
+
|
|
219
|
+
def handle_sigint(signum: int, frame: Any) -> None:
|
|
220
|
+
"""Handle SIGINT (Ctrl+C)."""
|
|
221
|
+
log.info("Received shutdown signal, force killing MCP connections")
|
|
222
|
+
server.handle_exit(signum, frame)
|
|
223
|
+
|
|
224
|
+
signal.signal(signal.SIGINT, handle_sigint)
|
|
225
|
+
server.run()
|
|
172
226
|
|
|
173
227
|
|
|
174
228
|
@cli.command()
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Global configuration for the kodit project."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from functools import wraps
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, TypeVar
|
|
8
|
+
|
|
9
|
+
from pydantic import Field
|
|
10
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
11
|
+
|
|
12
|
+
from kodit.database import Database
|
|
13
|
+
|
|
14
|
+
DEFAULT_BASE_DIR = Path.home() / ".kodit"
|
|
15
|
+
DEFAULT_DB_URL = f"sqlite+aiosqlite:///{DEFAULT_BASE_DIR}/kodit.db"
|
|
16
|
+
DEFAULT_LOG_LEVEL = "INFO"
|
|
17
|
+
DEFAULT_LOG_FORMAT = "pretty"
|
|
18
|
+
DEFAULT_DISABLE_TELEMETRY = False
|
|
19
|
+
T = TypeVar("T")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Config(BaseSettings):
|
|
23
|
+
"""Global configuration for the kodit project."""
|
|
24
|
+
|
|
25
|
+
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
|
|
26
|
+
|
|
27
|
+
data_dir: Path = Field(default=DEFAULT_BASE_DIR)
|
|
28
|
+
db_url: str = Field(default=DEFAULT_DB_URL)
|
|
29
|
+
log_level: str = Field(default=DEFAULT_LOG_LEVEL)
|
|
30
|
+
log_format: str = Field(default=DEFAULT_LOG_FORMAT)
|
|
31
|
+
disable_telemetry: bool = Field(default=DEFAULT_DISABLE_TELEMETRY)
|
|
32
|
+
_db: Database | None = None
|
|
33
|
+
|
|
34
|
+
def model_post_init(self, _: Any) -> None:
|
|
35
|
+
"""Post-initialization hook."""
|
|
36
|
+
# Call this to ensure the data dir exists for the default db location
|
|
37
|
+
self.get_data_dir()
|
|
38
|
+
|
|
39
|
+
def get_data_dir(self) -> Path:
|
|
40
|
+
"""Get the data directory."""
|
|
41
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
return self.data_dir
|
|
43
|
+
|
|
44
|
+
def get_clone_dir(self) -> Path:
|
|
45
|
+
"""Get the clone directory."""
|
|
46
|
+
clone_dir = self.get_data_dir() / "clones"
|
|
47
|
+
clone_dir.mkdir(parents=True, exist_ok=True)
|
|
48
|
+
return clone_dir
|
|
49
|
+
|
|
50
|
+
def get_db(self, *, run_migrations: bool = True) -> Database:
|
|
51
|
+
"""Get the database."""
|
|
52
|
+
if self._db is None:
|
|
53
|
+
self._db = Database(self.db_url, run_migrations=run_migrations)
|
|
54
|
+
return self._db
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# Global config instance for mcp Apps
|
|
58
|
+
config = None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_config(env_file: str | None = None) -> Config:
|
|
62
|
+
"""Get the global config instance."""
|
|
63
|
+
global config # noqa: PLW0603
|
|
64
|
+
if config is None:
|
|
65
|
+
config = Config(_env_file=env_file)
|
|
66
|
+
return config
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def reset_config() -> None:
|
|
70
|
+
"""Reset the global config instance."""
|
|
71
|
+
global config # noqa: PLW0603
|
|
72
|
+
config = None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def with_session(func: Callable[..., T]) -> Callable[..., T]:
|
|
76
|
+
"""Provide an async session to CLI commands."""
|
|
77
|
+
|
|
78
|
+
@wraps(func)
|
|
79
|
+
def wrapper(*args: Any, **kwargs: Any) -> T:
|
|
80
|
+
# Create DB connection before starting event loop
|
|
81
|
+
db = get_config().get_db()
|
|
82
|
+
|
|
83
|
+
async def _run() -> T:
|
|
84
|
+
async with db.get_session() as session:
|
|
85
|
+
return await func(session, *args, **kwargs)
|
|
86
|
+
|
|
87
|
+
return asyncio.run(_run())
|
|
88
|
+
|
|
89
|
+
return wrapper
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Database configuration for kodit."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import AsyncGenerator
|
|
4
|
+
from contextlib import asynccontextmanager
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import structlog
|
|
9
|
+
from alembic import command
|
|
10
|
+
from alembic.config import Config as AlembicConfig
|
|
11
|
+
from sqlalchemy import DateTime
|
|
12
|
+
from sqlalchemy.ext.asyncio import (
|
|
13
|
+
AsyncAttrs,
|
|
14
|
+
AsyncSession,
|
|
15
|
+
async_sessionmaker,
|
|
16
|
+
create_async_engine,
|
|
17
|
+
)
|
|
18
|
+
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
|
19
|
+
|
|
20
|
+
from kodit import alembic
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Base(AsyncAttrs, DeclarativeBase):
|
|
24
|
+
"""Base class for all models."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class CommonMixin:
|
|
28
|
+
"""Common mixin for all models."""
|
|
29
|
+
|
|
30
|
+
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
|
31
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
32
|
+
DateTime, default=lambda: datetime.now(UTC)
|
|
33
|
+
)
|
|
34
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
35
|
+
DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC)
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class Database:
|
|
40
|
+
"""Database class for kodit."""
|
|
41
|
+
|
|
42
|
+
def __init__(self, db_url: str, *, run_migrations: bool = True) -> None:
|
|
43
|
+
"""Initialize the database."""
|
|
44
|
+
self.log = structlog.get_logger(__name__)
|
|
45
|
+
if run_migrations:
|
|
46
|
+
self._run_migrations(db_url)
|
|
47
|
+
db_engine = create_async_engine(db_url, echo=False)
|
|
48
|
+
self.db_session_factory = async_sessionmaker(
|
|
49
|
+
db_engine,
|
|
50
|
+
class_=AsyncSession,
|
|
51
|
+
expire_on_commit=False,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
@asynccontextmanager
|
|
55
|
+
async def get_session(self) -> AsyncGenerator[AsyncSession, None]:
|
|
56
|
+
"""Get a database session."""
|
|
57
|
+
async with self.db_session_factory() as session:
|
|
58
|
+
try:
|
|
59
|
+
yield session
|
|
60
|
+
finally:
|
|
61
|
+
await session.close()
|
|
62
|
+
|
|
63
|
+
def _run_migrations(self, db_url: str) -> None:
|
|
64
|
+
"""Run any pending migrations."""
|
|
65
|
+
# Create Alembic configuration and run migrations
|
|
66
|
+
alembic_cfg = AlembicConfig()
|
|
67
|
+
alembic_cfg.set_main_option(
|
|
68
|
+
"script_location", str(Path(alembic.__file__).parent)
|
|
69
|
+
)
|
|
70
|
+
alembic_cfg.set_main_option("sqlalchemy.url", db_url)
|
|
71
|
+
self.log.debug("Running migrations", db_url=db_url)
|
|
72
|
+
command.upgrade(alembic_cfg, "head")
|
|
@@ -130,3 +130,14 @@ class IndexRepository:
|
|
|
130
130
|
query = select(Snippet).where(Snippet.index_id == index_id)
|
|
131
131
|
result = await self.session.execute(query)
|
|
132
132
|
return list(result.scalars())
|
|
133
|
+
|
|
134
|
+
async def get_all_snippets(self) -> list[Snippet]:
|
|
135
|
+
"""Get all snippets.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
A list of all snippets.
|
|
139
|
+
|
|
140
|
+
"""
|
|
141
|
+
query = select(Snippet).order_by(Snippet.id)
|
|
142
|
+
result = await self.session.execute(query)
|
|
143
|
+
return list(result.scalars())
|