kodit 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit-0.1.5/.cursor/rules/kodit.mdc +6 -0
- {kodit-0.1.3 → kodit-0.1.5}/.github/workflows/pypi.yaml +19 -3
- {kodit-0.1.3 → kodit-0.1.5}/.github/workflows/test.yaml +3 -0
- kodit-0.1.5/.vscode/launch.json +15 -0
- {kodit-0.1.3 → kodit-0.1.5}/.vscode/settings.json +1 -1
- {kodit-0.1.3 → kodit-0.1.5}/PKG-INFO +6 -2
- kodit-0.1.5/docs/_index.md +93 -0
- kodit-0.1.5/docs/developer/index.md +30 -0
- {kodit-0.1.3 → kodit-0.1.5}/pyproject.toml +6 -2
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/_version.py +2 -2
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/alembic/env.py +0 -2
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/app.py +8 -8
- kodit-0.1.5/src/kodit/bm25/__init__.py +1 -0
- kodit-0.1.5/src/kodit/bm25/bm25.py +71 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/cli.py +87 -35
- kodit-0.1.5/src/kodit/config.py +89 -0
- kodit-0.1.5/src/kodit/database.py +72 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/indexing/repository.py +11 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/indexing/service.py +26 -17
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/logging.py +20 -18
- kodit-0.1.5/src/kodit/mcp.py +122 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/retreival/repository.py +32 -0
- kodit-0.1.5/src/kodit/retreival/service.py +68 -0
- kodit-0.1.5/src/kodit/snippets/__init__.py +1 -0
- kodit-0.1.5/src/kodit/snippets/languages/__init__.py +53 -0
- kodit-0.1.5/src/kodit/snippets/languages/csharp.scm +12 -0
- kodit-0.1.5/src/kodit/snippets/languages/python.scm +22 -0
- kodit-0.1.5/src/kodit/snippets/method_snippets.py +120 -0
- kodit-0.1.5/src/kodit/snippets/snippets.py +48 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/sources/service.py +3 -5
- kodit-0.1.5/tests/kodit/cli_test.py +51 -0
- kodit-0.1.5/tests/kodit/e2e.py +145 -0
- {kodit-0.1.3 → kodit-0.1.5}/tests/kodit/indexing/test_service.py +8 -5
- kodit-0.1.5/tests/kodit/mcp_test.py +27 -0
- {kodit-0.1.3 → kodit-0.1.5}/tests/kodit/retreival/test_service.py +25 -5
- kodit-0.1.5/tests/kodit/snippets/__init__.py +0 -0
- kodit-0.1.5/tests/kodit/snippets/csharp.cs +44 -0
- kodit-0.1.5/tests/kodit/snippets/detect_language_test.py +87 -0
- kodit-0.1.5/tests/kodit/snippets/method_extraction_test.py +108 -0
- kodit-0.1.5/tests/kodit/snippets/python.py +24 -0
- {kodit-0.1.3 → kodit-0.1.5}/tests/kodit/sources/test_service.py +2 -2
- kodit-0.1.5/tests/smoke.sh +36 -0
- {kodit-0.1.3 → kodit-0.1.5}/uv.lock +428 -92
- kodit-0.1.3/docs/_index.md +0 -53
- kodit-0.1.3/docs/developer/index.md +0 -17
- kodit-0.1.3/src/kodit/config.py +0 -5
- kodit-0.1.3/src/kodit/database.py +0 -89
- kodit-0.1.3/src/kodit/mcp.py +0 -51
- kodit-0.1.3/src/kodit/retreival/service.py +0 -30
- kodit-0.1.3/src/kodit/sse.py +0 -61
- kodit-0.1.3/tests/kodit/cli_test.py +0 -19
- kodit-0.1.3/tests/kodit/mcp_test.py +0 -66
- kodit-0.1.3/tests/smoke.sh +0 -20
- {kodit-0.1.3 → kodit-0.1.5}/.github/CODE_OF_CONDUCT.md +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/.github/CONTRIBUTING.md +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/.github/workflows/docker.yaml +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/.github/workflows/docs.yaml +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/.github/workflows/pypi-test.yaml +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/.gitignore +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/.python-version +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/Dockerfile +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/LICENSE +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/README.md +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/alembic.ini +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/.gitignore +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/__init__.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/alembic/README +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/alembic/__init__.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/alembic/script.py.mako +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/alembic/versions/85155663351e_initial.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/alembic/versions/__init__.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/indexing/__init__.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/indexing/models.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/middleware.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/retreival/__init__.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/sources/__init__.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/sources/models.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/src/kodit/sources/repository.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/tests/__init__.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/tests/conftest.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/tests/kodit/__init__.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/tests/kodit/indexing/__init__.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/tests/kodit/retreival/__init__.py +0 -0
- {kodit-0.1.3 → kodit-0.1.5}/tests/kodit/sources/__init__.py +0 -0
|
@@ -28,10 +28,26 @@ jobs:
|
|
|
28
28
|
steps:
|
|
29
29
|
- name: Wait for PyPI release to be available
|
|
30
30
|
run: |
|
|
31
|
+
set -x
|
|
32
|
+
|
|
33
|
+
REPO_NAME=${{ github.event.repository.name }}
|
|
34
|
+
REPO_TAG=${{ github.event.release.tag_name }}
|
|
35
|
+
|
|
36
|
+
# Get the first letter of the repo name
|
|
37
|
+
REPO_NAME_FIRST_LETTER=${REPO_NAME:0:1}
|
|
38
|
+
|
|
39
|
+
# Initialize counter for 60 second timeout
|
|
40
|
+
count=0
|
|
31
41
|
while true; do
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
42
|
+
if curl -sfL https://pypi.io/packages/source/${REPO_NAME_FIRST_LETTER}/${REPO_NAME}/${REPO_NAME}-${REPO_TAG}.tar.gz > /dev/null; then
|
|
43
|
+
break
|
|
44
|
+
fi
|
|
45
|
+
sleep 1
|
|
46
|
+
((count++))
|
|
47
|
+
if [ $count -ge 60 ]; then
|
|
48
|
+
echo "Timeout reached after 60 seconds"
|
|
49
|
+
exit 1
|
|
50
|
+
fi
|
|
35
51
|
done
|
|
36
52
|
- uses: mislav/bump-homebrew-formula-action@v3
|
|
37
53
|
with:
|
|
@@ -14,6 +14,7 @@ permissions:
|
|
|
14
14
|
jobs:
|
|
15
15
|
test:
|
|
16
16
|
runs-on: ubuntu-latest
|
|
17
|
+
timeout-minutes: 10
|
|
17
18
|
steps:
|
|
18
19
|
- name: Checkout code
|
|
19
20
|
uses: actions/checkout@v4
|
|
@@ -44,6 +45,7 @@ jobs:
|
|
|
44
45
|
|
|
45
46
|
build-package:
|
|
46
47
|
runs-on: ubuntu-latest
|
|
48
|
+
timeout-minutes: 10
|
|
47
49
|
steps:
|
|
48
50
|
- name: Checkout code
|
|
49
51
|
uses: actions/checkout@v4
|
|
@@ -67,6 +69,7 @@ jobs:
|
|
|
67
69
|
test-package:
|
|
68
70
|
needs: build-package
|
|
69
71
|
runs-on: ubuntu-latest
|
|
72
|
+
timeout-minutes: 10
|
|
70
73
|
steps:
|
|
71
74
|
- uses: actions/checkout@v4
|
|
72
75
|
with:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kodit
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Code indexing for better AI code generation
|
|
5
5
|
Project-URL: Homepage, https://docs.helixml.tech/kodit/
|
|
6
6
|
Project-URL: Documentation, https://docs.helixml.tech/kodit/
|
|
@@ -22,18 +22,22 @@ Requires-Dist: aiosqlite>=0.20.0
|
|
|
22
22
|
Requires-Dist: alembic>=1.15.2
|
|
23
23
|
Requires-Dist: asgi-correlation-id>=4.3.4
|
|
24
24
|
Requires-Dist: better-exceptions>=0.3.3
|
|
25
|
+
Requires-Dist: bm25s[core]>=0.2.12
|
|
25
26
|
Requires-Dist: click>=8.1.8
|
|
26
27
|
Requires-Dist: colorama>=0.4.6
|
|
27
28
|
Requires-Dist: dotenv>=0.9.9
|
|
28
29
|
Requires-Dist: fastapi[standard]>=0.115.12
|
|
30
|
+
Requires-Dist: fastmcp>=2.3.3
|
|
29
31
|
Requires-Dist: httpx-retries>=0.3.2
|
|
30
32
|
Requires-Dist: httpx>=0.28.1
|
|
31
|
-
Requires-Dist: mcp>=1.6.0
|
|
32
33
|
Requires-Dist: posthog>=4.0.1
|
|
34
|
+
Requires-Dist: pydantic-settings>=2.9.1
|
|
33
35
|
Requires-Dist: pytable-formatter>=0.1.1
|
|
34
36
|
Requires-Dist: sqlalchemy[asyncio]>=2.0.40
|
|
35
37
|
Requires-Dist: structlog>=25.3.0
|
|
36
38
|
Requires-Dist: tdqm>=0.0.1
|
|
39
|
+
Requires-Dist: tree-sitter-language-pack>=0.7.3
|
|
40
|
+
Requires-Dist: tree-sitter>=0.24.0
|
|
37
41
|
Requires-Dist: uritools>=5.0.0
|
|
38
42
|
Description-Content-Type: text/markdown
|
|
39
43
|
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "kodit: Code Indexing MCP Server"
|
|
3
|
+
linkTitle: kodit Docs
|
|
4
|
+
cascade:
|
|
5
|
+
type: docs
|
|
6
|
+
menu:
|
|
7
|
+
main:
|
|
8
|
+
name: kodit Docs
|
|
9
|
+
weight: 3
|
|
10
|
+
# next: /helix/getting-started
|
|
11
|
+
weight: 1
|
|
12
|
+
aliases:
|
|
13
|
+
- /coda
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
Please choose your preferred installation method. They all ultimately install the kodit
|
|
19
|
+
cli, which contains the kodit MCP server and other tools to manage your data sources.
|
|
20
|
+
|
|
21
|
+
### Docker
|
|
22
|
+
|
|
23
|
+
```sh
|
|
24
|
+
docker run -it --rm registry.helix.ml/helix/kodit:latest
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Always replace latest with a specific version.
|
|
28
|
+
|
|
29
|
+
### pipx
|
|
30
|
+
|
|
31
|
+
```sh
|
|
32
|
+
pipx install kodit
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### homebrew
|
|
36
|
+
|
|
37
|
+
```sh
|
|
38
|
+
brew install helixml/kodit/kodit
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### uv
|
|
42
|
+
|
|
43
|
+
```sh
|
|
44
|
+
uv tool install kodit
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### pip
|
|
48
|
+
|
|
49
|
+
Use this if you want to use kodit as a python library:
|
|
50
|
+
|
|
51
|
+
```sh
|
|
52
|
+
pip install kodit
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Usage
|
|
56
|
+
|
|
57
|
+
Kodit has two key parts. A configuration CLI to manage what gets indexed and an MCP
|
|
58
|
+
server to expose your code to an AI coding assistant.
|
|
59
|
+
|
|
60
|
+
1. Add a source: `kodit sources create /path/to/your/code`
|
|
61
|
+
2. Create an index on that source: `kodit indexes create 1`
|
|
62
|
+
3. Test retrieval on your index: `kodit retrieve "test"`
|
|
63
|
+
4. Start an MCP server: `kodit serve`
|
|
64
|
+
|
|
65
|
+
Now browse to your AI coding assistant and add the MCP server. You will also need to
|
|
66
|
+
tell your assistant to use this server in coding tasks, otherwise it won't get called!
|
|
67
|
+
|
|
68
|
+
### Integration with Cursor
|
|
69
|
+
|
|
70
|
+
Add the following to `.cursor/mcp.json`:
|
|
71
|
+
|
|
72
|
+
```json
|
|
73
|
+
{
|
|
74
|
+
"mcpServers": {
|
|
75
|
+
"code-search": {
|
|
76
|
+
"url": "http://localhost:8080/sse"
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Now add the following prompt to `.cursor/rules/kodit.mdc` in your project directory:
|
|
83
|
+
|
|
84
|
+
```markdown
|
|
85
|
+
---
|
|
86
|
+
description:
|
|
87
|
+
globs:
|
|
88
|
+
alwaysApply: true
|
|
89
|
+
---
|
|
90
|
+
You MUST use the code-search MCP tool and always include any file context the user has provided in the chat.
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Alternatively, you can browse to the cursor settings and set this prompt globally.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Kodit Developer Documentation"
|
|
3
|
+
linkTitle: Developer Docs
|
|
4
|
+
# next: /helix/getting-started
|
|
5
|
+
weight: 99
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Database
|
|
9
|
+
|
|
10
|
+
All database operations are handled by SQLAlchemy and Alembic.
|
|
11
|
+
|
|
12
|
+
### Creating a Database Migration
|
|
13
|
+
|
|
14
|
+
1. Make changes to your models
|
|
15
|
+
2. Ensure the model is referenced in [alembic's env.py](src/kodit/alembic/env.py)
|
|
16
|
+
3. Run `alembic revision --autogenerate -m "your message"`
|
|
17
|
+
4. The new migration will be applied when you next run a kodit command
|
|
18
|
+
|
|
19
|
+
## Releasing
|
|
20
|
+
|
|
21
|
+
Performing a release is designed to be fully automated. If you spot opportunities to
|
|
22
|
+
improve the CI to help performing an automated release, please do so.
|
|
23
|
+
|
|
24
|
+
1. Create a new release in GitHub.
|
|
25
|
+
2. Set the version number. Use patch versions for bugfixes or minor small improvements.
|
|
26
|
+
Use minor versions when adding significant new functionality. Use major versions for
|
|
27
|
+
overhauls.
|
|
28
|
+
3. Generate the release notes. <- this could be improved, because we use a strict
|
|
29
|
+
pr/commit naming structure.
|
|
30
|
+
4. Wait for all jobs to succeed, then you should be able to brew install, pipx install, etc.
|
|
@@ -29,7 +29,6 @@ dependencies = [
|
|
|
29
29
|
"fastapi[standard]>=0.115.12",
|
|
30
30
|
"httpx-retries>=0.3.2",
|
|
31
31
|
"httpx>=0.28.1",
|
|
32
|
-
"mcp>=1.6.0",
|
|
33
32
|
"structlog>=25.3.0",
|
|
34
33
|
"posthog>=4.0.1",
|
|
35
34
|
"sqlalchemy[asyncio]>=2.0.40",
|
|
@@ -39,6 +38,11 @@ dependencies = [
|
|
|
39
38
|
"aiofiles>=24.1.0",
|
|
40
39
|
"tdqm>=0.0.1",
|
|
41
40
|
"uritools>=5.0.0",
|
|
41
|
+
"tree-sitter-language-pack>=0.7.3",
|
|
42
|
+
"tree-sitter>=0.24.0",
|
|
43
|
+
"fastmcp>=2.3.3",
|
|
44
|
+
"pydantic-settings>=2.9.1",
|
|
45
|
+
"bm25s[core]>=0.2.12",
|
|
42
46
|
]
|
|
43
47
|
|
|
44
48
|
[dependency-groups]
|
|
@@ -104,7 +108,7 @@ ignore = [
|
|
|
104
108
|
"PGH004", # If I've disabled all, I mean disable all
|
|
105
109
|
]
|
|
106
110
|
select = ["ALL"]
|
|
107
|
-
exclude = []
|
|
111
|
+
exclude = ["./tests/*"]
|
|
108
112
|
|
|
109
113
|
[[tool.uv.index]]
|
|
110
114
|
name = "pypi"
|
|
@@ -66,8 +66,6 @@ async def run_async_migrations() -> None:
|
|
|
66
66
|
prefix="sqlalchemy.",
|
|
67
67
|
poolclass=pool.NullPool,
|
|
68
68
|
)
|
|
69
|
-
log = structlog.get_logger(__name__)
|
|
70
|
-
log.debug("Running migrations on %s", connectable.url)
|
|
71
69
|
|
|
72
70
|
async with connectable.connect() as connection:
|
|
73
71
|
await connection.run_sync(do_run_migrations)
|
|
@@ -5,14 +5,10 @@ from fastapi import FastAPI
|
|
|
5
5
|
|
|
6
6
|
from kodit.mcp import mcp
|
|
7
7
|
from kodit.middleware import logging_middleware
|
|
8
|
-
from kodit.sse import create_sse_server
|
|
9
8
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
sse_app = create_sse_server(mcp)
|
|
14
|
-
for route in sse_app.routes:
|
|
15
|
-
app.router.routes.append(route)
|
|
9
|
+
# See https://gofastmcp.com/deployment/asgi#fastapi-integration
|
|
10
|
+
mcp_app = mcp.sse_app()
|
|
11
|
+
app = FastAPI(title="kodit API", lifespan=mcp_app.router.lifespan_context)
|
|
16
12
|
|
|
17
13
|
# Add middleware
|
|
18
14
|
app.middleware("http")(logging_middleware)
|
|
@@ -22,4 +18,8 @@ app.add_middleware(CorrelationIdMiddleware)
|
|
|
22
18
|
@app.get("/")
|
|
23
19
|
async def root() -> dict[str, str]:
|
|
24
20
|
"""Return a welcome message for the kodit API."""
|
|
25
|
-
return {"message": "
|
|
21
|
+
return {"message": "Hello, World!"}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Add mcp routes last, otherwise previous routes aren't added
|
|
25
|
+
app.mount("", mcp_app)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""BM25 module."""
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""BM25 service."""
|
|
2
|
+
|
|
3
|
+
import bm25s
|
|
4
|
+
import Stemmer
|
|
5
|
+
import structlog
|
|
6
|
+
from bm25s.tokenization import Tokenized
|
|
7
|
+
|
|
8
|
+
from kodit.config import Config
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BM25Service:
|
|
12
|
+
"""Service for BM25."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, config: Config) -> None:
|
|
15
|
+
"""Initialize the BM25 service."""
|
|
16
|
+
self.log = structlog.get_logger(__name__)
|
|
17
|
+
self.index_path = config.get_data_dir() / "bm25s_index"
|
|
18
|
+
try:
|
|
19
|
+
self.log.debug("Loading BM25 index")
|
|
20
|
+
self.retriever = bm25s.BM25.load(self.index_path, mmap=True)
|
|
21
|
+
except FileNotFoundError:
|
|
22
|
+
self.log.debug("BM25 index not found, creating new index")
|
|
23
|
+
self.retriever = bm25s.BM25()
|
|
24
|
+
|
|
25
|
+
self.stemmer = Stemmer.Stemmer("english")
|
|
26
|
+
|
|
27
|
+
def _tokenize(self, corpus: list[str]) -> list[list[str]] | Tokenized:
|
|
28
|
+
return bm25s.tokenize(
|
|
29
|
+
corpus,
|
|
30
|
+
stopwords="en",
|
|
31
|
+
stemmer=self.stemmer,
|
|
32
|
+
return_ids=False,
|
|
33
|
+
show_progress=True,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def index(self, corpus: list[str]) -> None:
|
|
37
|
+
"""Index a new corpus."""
|
|
38
|
+
self.log.debug("Indexing corpus")
|
|
39
|
+
vocab = self._tokenize(corpus)
|
|
40
|
+
self.retriever = bm25s.BM25()
|
|
41
|
+
self.retriever.index(vocab)
|
|
42
|
+
self.retriever.save(self.index_path)
|
|
43
|
+
|
|
44
|
+
def retrieve(
|
|
45
|
+
self, doc_ids: list[int], query: str, top_k: int = 2
|
|
46
|
+
) -> list[tuple[int, float]]:
|
|
47
|
+
"""Retrieve from the index."""
|
|
48
|
+
if top_k == 0:
|
|
49
|
+
self.log.warning("Top k is 0, returning empty list")
|
|
50
|
+
return []
|
|
51
|
+
if len(doc_ids) == 0:
|
|
52
|
+
self.log.warning("No documents to retrieve from, returning empty list")
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
top_k = min(top_k, len(doc_ids))
|
|
56
|
+
self.log.debug(
|
|
57
|
+
"Retrieving from index", query=query, top_k=top_k, num_docs=len(doc_ids)
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
query_tokens = self._tokenize([query])
|
|
61
|
+
|
|
62
|
+
self.log.debug("Query tokens", query_tokens=query_tokens)
|
|
63
|
+
|
|
64
|
+
results, scores = self.retriever.retrieve(
|
|
65
|
+
query_tokens=query_tokens, corpus=doc_ids, k=top_k
|
|
66
|
+
)
|
|
67
|
+
self.log.debug("Raw results", results=results, scores=scores)
|
|
68
|
+
return [
|
|
69
|
+
(int(result), float(score))
|
|
70
|
+
for result, score in zip(results[0], scores[0], strict=False)
|
|
71
|
+
]
|
|
@@ -1,41 +1,74 @@
|
|
|
1
1
|
"""Command line interface for kodit."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import signal
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
4
7
|
|
|
5
8
|
import click
|
|
6
9
|
import structlog
|
|
7
10
|
import uvicorn
|
|
8
|
-
from dotenv import dotenv_values
|
|
9
11
|
from pytable_formatter import Table
|
|
10
12
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
11
13
|
|
|
12
|
-
from kodit.
|
|
14
|
+
from kodit.config import (
|
|
15
|
+
DEFAULT_BASE_DIR,
|
|
16
|
+
DEFAULT_DB_URL,
|
|
17
|
+
DEFAULT_DISABLE_TELEMETRY,
|
|
18
|
+
DEFAULT_LOG_FORMAT,
|
|
19
|
+
DEFAULT_LOG_LEVEL,
|
|
20
|
+
get_config,
|
|
21
|
+
reset_config,
|
|
22
|
+
with_session,
|
|
23
|
+
)
|
|
13
24
|
from kodit.indexing.repository import IndexRepository
|
|
14
25
|
from kodit.indexing.service import IndexService
|
|
15
|
-
from kodit.logging import
|
|
26
|
+
from kodit.logging import configure_logging, configure_telemetry, log_event
|
|
16
27
|
from kodit.retreival.repository import RetrievalRepository
|
|
17
28
|
from kodit.retreival.service import RetrievalRequest, RetrievalService
|
|
18
29
|
from kodit.sources.repository import SourceRepository
|
|
19
30
|
from kodit.sources.service import SourceService
|
|
20
31
|
|
|
21
|
-
env_vars = dict(dotenv_values())
|
|
22
|
-
os.environ.update(env_vars)
|
|
23
32
|
|
|
24
|
-
|
|
25
|
-
@click.
|
|
26
|
-
@click.option("--log-
|
|
27
|
-
@click.option(
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
@click.group(context_settings={"max_content_width": 100})
|
|
34
|
+
@click.option("--log-level", help=f"Log level [default: {DEFAULT_LOG_LEVEL}]")
|
|
35
|
+
@click.option("--log-format", help=f"Log format [default: {DEFAULT_LOG_FORMAT}]")
|
|
36
|
+
@click.option(
|
|
37
|
+
"--disable-telemetry",
|
|
38
|
+
is_flag=True,
|
|
39
|
+
help=f"Disable telemetry [default: {DEFAULT_DISABLE_TELEMETRY}]",
|
|
40
|
+
)
|
|
41
|
+
@click.option("--db-url", help=f"Database URL [default: {DEFAULT_DB_URL}]")
|
|
42
|
+
@click.option("--data-dir", help=f"Data directory [default: {DEFAULT_BASE_DIR}]")
|
|
43
|
+
@click.option("--env-file", help="Path to a .env file [default: .env]")
|
|
44
|
+
def cli( # noqa: PLR0913
|
|
45
|
+
log_level: str | None,
|
|
46
|
+
log_format: str | None,
|
|
47
|
+
disable_telemetry: bool | None,
|
|
48
|
+
db_url: str | None,
|
|
49
|
+
data_dir: str | None,
|
|
50
|
+
env_file: str | None,
|
|
33
51
|
) -> None:
|
|
34
52
|
"""kodit CLI - Code indexing for better AI code generation.""" # noqa: D403
|
|
35
|
-
|
|
53
|
+
# First check if env-file is set and reload config if it is
|
|
54
|
+
if env_file:
|
|
55
|
+
reset_config()
|
|
56
|
+
get_config(env_file)
|
|
57
|
+
|
|
58
|
+
# Override global config with cli args, if set
|
|
59
|
+
config = get_config()
|
|
60
|
+
if data_dir:
|
|
61
|
+
config.data_dir = Path(data_dir)
|
|
62
|
+
if db_url:
|
|
63
|
+
config.db_url = db_url
|
|
64
|
+
if log_level:
|
|
65
|
+
config.log_level = log_level
|
|
66
|
+
if log_format:
|
|
67
|
+
config.log_format = log_format
|
|
36
68
|
if disable_telemetry:
|
|
37
|
-
|
|
38
|
-
|
|
69
|
+
config.disable_telemetry = disable_telemetry
|
|
70
|
+
configure_logging(config)
|
|
71
|
+
configure_telemetry(config)
|
|
39
72
|
|
|
40
73
|
|
|
41
74
|
@cli.group()
|
|
@@ -48,7 +81,7 @@ def sources() -> None:
|
|
|
48
81
|
async def list_sources(session: AsyncSession) -> None:
|
|
49
82
|
"""List all code sources."""
|
|
50
83
|
repository = SourceRepository(session)
|
|
51
|
-
service = SourceService(repository)
|
|
84
|
+
service = SourceService(get_config().get_clone_dir(), repository)
|
|
52
85
|
sources = await service.list_sources()
|
|
53
86
|
|
|
54
87
|
# Define headers and data
|
|
@@ -66,7 +99,7 @@ async def list_sources(session: AsyncSession) -> None:
|
|
|
66
99
|
async def create_source(session: AsyncSession, uri: str) -> None:
|
|
67
100
|
"""Add a new code source."""
|
|
68
101
|
repository = SourceRepository(session)
|
|
69
|
-
service = SourceService(repository)
|
|
102
|
+
service = SourceService(get_config().get_clone_dir(), repository)
|
|
70
103
|
source = await service.create(uri)
|
|
71
104
|
click.echo(f"Source created: {source.id}")
|
|
72
105
|
|
|
@@ -82,9 +115,9 @@ def indexes() -> None:
|
|
|
82
115
|
async def create_index(session: AsyncSession, source_id: int) -> None:
|
|
83
116
|
"""Create an index for a source."""
|
|
84
117
|
source_repository = SourceRepository(session)
|
|
85
|
-
source_service = SourceService(source_repository)
|
|
118
|
+
source_service = SourceService(get_config().get_clone_dir(), source_repository)
|
|
86
119
|
repository = IndexRepository(session)
|
|
87
|
-
service = IndexService(repository, source_service)
|
|
120
|
+
service = IndexService(get_config(), repository, source_service)
|
|
88
121
|
index = await service.create(source_id)
|
|
89
122
|
click.echo(f"Index created: {index.id}")
|
|
90
123
|
|
|
@@ -94,9 +127,9 @@ async def create_index(session: AsyncSession, source_id: int) -> None:
|
|
|
94
127
|
async def list_indexes(session: AsyncSession) -> None:
|
|
95
128
|
"""List all indexes."""
|
|
96
129
|
source_repository = SourceRepository(session)
|
|
97
|
-
source_service = SourceService(source_repository)
|
|
130
|
+
source_service = SourceService(get_config().get_clone_dir(), source_repository)
|
|
98
131
|
repository = IndexRepository(session)
|
|
99
|
-
service = IndexService(repository, source_service)
|
|
132
|
+
service = IndexService(get_config(), repository, source_service)
|
|
100
133
|
indexes = await service.list_indexes()
|
|
101
134
|
|
|
102
135
|
# Define headers and data
|
|
@@ -104,7 +137,6 @@ async def list_indexes(session: AsyncSession) -> None:
|
|
|
104
137
|
"ID",
|
|
105
138
|
"Created At",
|
|
106
139
|
"Updated At",
|
|
107
|
-
"Source URI",
|
|
108
140
|
"Num Snippets",
|
|
109
141
|
]
|
|
110
142
|
data = [
|
|
@@ -112,7 +144,6 @@ async def list_indexes(session: AsyncSession) -> None:
|
|
|
112
144
|
index.id,
|
|
113
145
|
index.created_at,
|
|
114
146
|
index.updated_at,
|
|
115
|
-
index.source_uri,
|
|
116
147
|
index.num_snippets,
|
|
117
148
|
]
|
|
118
149
|
for index in indexes
|
|
@@ -129,48 +160,69 @@ async def list_indexes(session: AsyncSession) -> None:
|
|
|
129
160
|
async def run_index(session: AsyncSession, index_id: int) -> None:
|
|
130
161
|
"""Run an index."""
|
|
131
162
|
source_repository = SourceRepository(session)
|
|
132
|
-
source_service = SourceService(source_repository)
|
|
163
|
+
source_service = SourceService(get_config().get_clone_dir(), source_repository)
|
|
133
164
|
repository = IndexRepository(session)
|
|
134
|
-
service = IndexService(repository, source_service)
|
|
165
|
+
service = IndexService(get_config(), repository, source_service)
|
|
135
166
|
await service.run(index_id)
|
|
136
167
|
|
|
137
168
|
|
|
138
169
|
@cli.command()
|
|
139
170
|
@click.argument("query")
|
|
171
|
+
@click.option("--top-k", default=10, help="Number of snippets to retrieve")
|
|
140
172
|
@with_session
|
|
141
|
-
async def retrieve(session: AsyncSession, query: str) -> None:
|
|
173
|
+
async def retrieve(session: AsyncSession, query: str, top_k: int) -> None:
|
|
142
174
|
"""Retrieve snippets from the database."""
|
|
143
175
|
repository = RetrievalRepository(session)
|
|
144
|
-
service = RetrievalService(repository)
|
|
145
|
-
|
|
176
|
+
service = RetrievalService(get_config(), repository)
|
|
177
|
+
# Temporary request while we don't have all search capabilities
|
|
178
|
+
snippets = await service.retrieve(
|
|
179
|
+
RetrievalRequest(keywords=query.split(","), top_k=top_k)
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if len(snippets) == 0:
|
|
183
|
+
click.echo("No snippets found")
|
|
184
|
+
return
|
|
146
185
|
|
|
147
186
|
for snippet in snippets:
|
|
187
|
+
click.echo("-" * 80)
|
|
148
188
|
click.echo(f"{snippet.uri}")
|
|
149
189
|
click.echo(snippet.content)
|
|
190
|
+
click.echo("-" * 80)
|
|
150
191
|
click.echo()
|
|
151
192
|
|
|
152
193
|
|
|
153
194
|
@cli.command()
|
|
154
195
|
@click.option("--host", default="127.0.0.1", help="Host to bind the server to")
|
|
155
196
|
@click.option("--port", default=8080, help="Port to bind the server to")
|
|
156
|
-
@click.option("--reload", is_flag=True, help="Enable auto-reload for development")
|
|
157
197
|
def serve(
|
|
158
198
|
host: str,
|
|
159
199
|
port: int,
|
|
160
|
-
reload: bool, # noqa: FBT001
|
|
161
200
|
) -> None:
|
|
162
201
|
"""Start the kodit server, which hosts the MCP server and the kodit API."""
|
|
163
202
|
log = structlog.get_logger(__name__)
|
|
164
|
-
log.info("Starting kodit server", host=host, port=port
|
|
203
|
+
log.info("Starting kodit server", host=host, port=port)
|
|
165
204
|
log_event("kodit_server_started")
|
|
166
|
-
|
|
205
|
+
os.environ["HELLO"] = "WORLD"
|
|
206
|
+
|
|
207
|
+
# Configure uvicorn with graceful shutdown
|
|
208
|
+
config = uvicorn.Config(
|
|
167
209
|
"kodit.app:app",
|
|
168
210
|
host=host,
|
|
169
211
|
port=port,
|
|
170
|
-
reload=
|
|
212
|
+
reload=False,
|
|
171
213
|
log_config=None, # Setting to None forces uvicorn to use our structlog setup
|
|
172
214
|
access_log=False, # Using own middleware for access logging
|
|
215
|
+
timeout_graceful_shutdown=0, # The mcp server does not shutdown cleanly, force
|
|
173
216
|
)
|
|
217
|
+
server = uvicorn.Server(config)
|
|
218
|
+
|
|
219
|
+
def handle_sigint(signum: int, frame: Any) -> None:
|
|
220
|
+
"""Handle SIGINT (Ctrl+C)."""
|
|
221
|
+
log.info("Received shutdown signal, force killing MCP connections")
|
|
222
|
+
server.handle_exit(signum, frame)
|
|
223
|
+
|
|
224
|
+
signal.signal(signal.SIGINT, handle_sigint)
|
|
225
|
+
server.run()
|
|
174
226
|
|
|
175
227
|
|
|
176
228
|
@cli.command()
|