kodit 0.1.4__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (84) hide show
  1. {kodit-0.1.4 → kodit-0.1.5}/.github/workflows/pypi.yaml +0 -1
  2. {kodit-0.1.4 → kodit-0.1.5}/.github/workflows/test.yaml +3 -0
  3. kodit-0.1.5/.vscode/launch.json +15 -0
  4. {kodit-0.1.4 → kodit-0.1.5}/.vscode/settings.json +1 -1
  5. {kodit-0.1.4 → kodit-0.1.5}/PKG-INFO +6 -2
  6. {kodit-0.1.4 → kodit-0.1.5}/docs/_index.md +2 -1
  7. {kodit-0.1.4 → kodit-0.1.5}/pyproject.toml +6 -2
  8. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/_version.py +2 -2
  9. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/alembic/env.py +0 -2
  10. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/app.py +8 -8
  11. kodit-0.1.5/src/kodit/bm25/__init__.py +1 -0
  12. kodit-0.1.5/src/kodit/bm25/bm25.py +71 -0
  13. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/cli.py +87 -33
  14. kodit-0.1.5/src/kodit/config.py +89 -0
  15. kodit-0.1.5/src/kodit/database.py +72 -0
  16. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/indexing/repository.py +11 -0
  17. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/indexing/service.py +26 -16
  18. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/logging.py +20 -18
  19. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/mcp.py +16 -4
  20. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/retreival/repository.py +32 -0
  21. kodit-0.1.5/src/kodit/retreival/service.py +68 -0
  22. kodit-0.1.5/src/kodit/snippets/__init__.py +1 -0
  23. kodit-0.1.5/src/kodit/snippets/languages/__init__.py +53 -0
  24. kodit-0.1.5/src/kodit/snippets/languages/csharp.scm +12 -0
  25. kodit-0.1.5/src/kodit/snippets/languages/python.scm +22 -0
  26. kodit-0.1.5/src/kodit/snippets/method_snippets.py +120 -0
  27. kodit-0.1.5/src/kodit/snippets/snippets.py +48 -0
  28. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/sources/service.py +3 -5
  29. kodit-0.1.5/tests/kodit/cli_test.py +51 -0
  30. kodit-0.1.5/tests/kodit/e2e.py +145 -0
  31. {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/indexing/test_service.py +8 -5
  32. kodit-0.1.5/tests/kodit/mcp_test.py +27 -0
  33. {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/retreival/test_service.py +25 -5
  34. kodit-0.1.5/tests/kodit/snippets/__init__.py +0 -0
  35. kodit-0.1.5/tests/kodit/snippets/csharp.cs +44 -0
  36. kodit-0.1.5/tests/kodit/snippets/detect_language_test.py +87 -0
  37. kodit-0.1.5/tests/kodit/snippets/method_extraction_test.py +108 -0
  38. kodit-0.1.5/tests/kodit/snippets/python.py +24 -0
  39. {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/sources/test_service.py +2 -2
  40. kodit-0.1.5/tests/smoke.sh +36 -0
  41. {kodit-0.1.4 → kodit-0.1.5}/uv.lock +428 -92
  42. kodit-0.1.4/src/kodit/config.py +0 -5
  43. kodit-0.1.4/src/kodit/database.py +0 -91
  44. kodit-0.1.4/src/kodit/retreival/service.py +0 -30
  45. kodit-0.1.4/src/kodit/sse.py +0 -61
  46. kodit-0.1.4/tests/kodit/cli_test.py +0 -19
  47. kodit-0.1.4/tests/kodit/mcp_test.py +0 -109
  48. kodit-0.1.4/tests/smoke.sh +0 -20
  49. {kodit-0.1.4 → kodit-0.1.5}/.cursor/rules/kodit.mdc +0 -0
  50. {kodit-0.1.4 → kodit-0.1.5}/.github/CODE_OF_CONDUCT.md +0 -0
  51. {kodit-0.1.4 → kodit-0.1.5}/.github/CONTRIBUTING.md +0 -0
  52. {kodit-0.1.4 → kodit-0.1.5}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  53. {kodit-0.1.4 → kodit-0.1.5}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  54. {kodit-0.1.4 → kodit-0.1.5}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  55. {kodit-0.1.4 → kodit-0.1.5}/.github/workflows/docker.yaml +0 -0
  56. {kodit-0.1.4 → kodit-0.1.5}/.github/workflows/docs.yaml +0 -0
  57. {kodit-0.1.4 → kodit-0.1.5}/.github/workflows/pypi-test.yaml +0 -0
  58. {kodit-0.1.4 → kodit-0.1.5}/.gitignore +0 -0
  59. {kodit-0.1.4 → kodit-0.1.5}/.python-version +0 -0
  60. {kodit-0.1.4 → kodit-0.1.5}/Dockerfile +0 -0
  61. {kodit-0.1.4 → kodit-0.1.5}/LICENSE +0 -0
  62. {kodit-0.1.4 → kodit-0.1.5}/README.md +0 -0
  63. {kodit-0.1.4 → kodit-0.1.5}/alembic.ini +0 -0
  64. {kodit-0.1.4 → kodit-0.1.5}/docs/developer/index.md +0 -0
  65. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/.gitignore +0 -0
  66. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/__init__.py +0 -0
  67. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/alembic/README +0 -0
  68. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/alembic/__init__.py +0 -0
  69. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/alembic/script.py.mako +0 -0
  70. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/alembic/versions/85155663351e_initial.py +0 -0
  71. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/alembic/versions/__init__.py +0 -0
  72. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/indexing/__init__.py +0 -0
  73. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/indexing/models.py +0 -0
  74. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/middleware.py +0 -0
  75. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/retreival/__init__.py +0 -0
  76. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/sources/__init__.py +0 -0
  77. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/sources/models.py +0 -0
  78. {kodit-0.1.4 → kodit-0.1.5}/src/kodit/sources/repository.py +0 -0
  79. {kodit-0.1.4 → kodit-0.1.5}/tests/__init__.py +0 -0
  80. {kodit-0.1.4 → kodit-0.1.5}/tests/conftest.py +0 -0
  81. {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/__init__.py +0 -0
  82. {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/indexing/__init__.py +0 -0
  83. {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/retreival/__init__.py +0 -0
  84. {kodit-0.1.4 → kodit-0.1.5}/tests/kodit/sources/__init__.py +0 -0
@@ -32,7 +32,6 @@ jobs:
32
32
 
33
33
  REPO_NAME=${{ github.event.repository.name }}
34
34
  REPO_TAG=${{ github.event.release.tag_name }}
35
- REPO_TAG=0.1.3
36
35
 
37
36
  # Get the first letter of the repo name
38
37
  REPO_NAME_FIRST_LETTER=${REPO_NAME:0:1}
@@ -14,6 +14,7 @@ permissions:
14
14
  jobs:
15
15
  test:
16
16
  runs-on: ubuntu-latest
17
+ timeout-minutes: 10
17
18
  steps:
18
19
  - name: Checkout code
19
20
  uses: actions/checkout@v4
@@ -44,6 +45,7 @@ jobs:
44
45
 
45
46
  build-package:
46
47
  runs-on: ubuntu-latest
48
+ timeout-minutes: 10
47
49
  steps:
48
50
  - name: Checkout code
49
51
  uses: actions/checkout@v4
@@ -67,6 +69,7 @@ jobs:
67
69
  test-package:
68
70
  needs: build-package
69
71
  runs-on: ubuntu-latest
72
+ timeout-minutes: 10
70
73
  steps:
71
74
  - uses: actions/checkout@v4
72
75
  with:
@@ -0,0 +1,15 @@
1
+ {
2
+ "version": "0.2.0",
3
+ "configurations": [
4
+ {
5
+ "name": "Python Debugger: Module",
6
+ "type": "debugpy",
7
+ "request": "launch",
8
+ "module": "src.kodit.cli",
9
+ "args": [
10
+ "retrieve",
11
+ "hello",
12
+ ]
13
+ }
14
+ ]
15
+ }
@@ -24,5 +24,5 @@
24
24
  "python.analysis.autoImportCompletions": true,
25
25
  "python.analysis.completeFunctionParens": true,
26
26
  "python.analysis.inlayHints.functionReturnTypes": true,
27
- "python.analysis.typeCheckingMode": "strict",
27
+ "python.analysis.typeCheckingMode": "standard",
28
28
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kodit
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Code indexing for better AI code generation
5
5
  Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
6
  Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -22,18 +22,22 @@ Requires-Dist: aiosqlite>=0.20.0
22
22
  Requires-Dist: alembic>=1.15.2
23
23
  Requires-Dist: asgi-correlation-id>=4.3.4
24
24
  Requires-Dist: better-exceptions>=0.3.3
25
+ Requires-Dist: bm25s[core]>=0.2.12
25
26
  Requires-Dist: click>=8.1.8
26
27
  Requires-Dist: colorama>=0.4.6
27
28
  Requires-Dist: dotenv>=0.9.9
28
29
  Requires-Dist: fastapi[standard]>=0.115.12
30
+ Requires-Dist: fastmcp>=2.3.3
29
31
  Requires-Dist: httpx-retries>=0.3.2
30
32
  Requires-Dist: httpx>=0.28.1
31
- Requires-Dist: mcp>=1.6.0
32
33
  Requires-Dist: posthog>=4.0.1
34
+ Requires-Dist: pydantic-settings>=2.9.1
33
35
  Requires-Dist: pytable-formatter>=0.1.1
34
36
  Requires-Dist: sqlalchemy[asyncio]>=2.0.40
35
37
  Requires-Dist: structlog>=25.3.0
36
38
  Requires-Dist: tdqm>=0.0.1
39
+ Requires-Dist: tree-sitter-language-pack>=0.7.3
40
+ Requires-Dist: tree-sitter>=0.24.0
37
41
  Requires-Dist: uritools>=5.0.0
38
42
  Description-Content-Type: text/markdown
39
43
 
@@ -87,6 +87,7 @@ description:
87
87
  globs:
88
88
  alwaysApply: true
89
89
  ---
90
- You MUST use the code-search MCP tool and always include any file context the user has provided in the chat.```
90
+ You MUST use the code-search MCP tool and always include any file context the user has provided in the chat.
91
+ ```
91
92
 
92
93
  Alternatively, you can browse to the cursor settings and set this prompt globally.
@@ -29,7 +29,6 @@ dependencies = [
29
29
  "fastapi[standard]>=0.115.12",
30
30
  "httpx-retries>=0.3.2",
31
31
  "httpx>=0.28.1",
32
- "mcp>=1.6.0",
33
32
  "structlog>=25.3.0",
34
33
  "posthog>=4.0.1",
35
34
  "sqlalchemy[asyncio]>=2.0.40",
@@ -39,6 +38,11 @@ dependencies = [
39
38
  "aiofiles>=24.1.0",
40
39
  "tdqm>=0.0.1",
41
40
  "uritools>=5.0.0",
41
+ "tree-sitter-language-pack>=0.7.3",
42
+ "tree-sitter>=0.24.0",
43
+ "fastmcp>=2.3.3",
44
+ "pydantic-settings>=2.9.1",
45
+ "bm25s[core]>=0.2.12",
42
46
  ]
43
47
 
44
48
  [dependency-groups]
@@ -104,7 +108,7 @@ ignore = [
104
108
  "PGH004", # If I've disabled all, I mean disable all
105
109
  ]
106
110
  select = ["ALL"]
107
- exclude = []
111
+ exclude = ["./tests/*"]
108
112
 
109
113
  [[tool.uv.index]]
110
114
  name = "pypi"
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.1.4'
21
- __version_tuple__ = version_tuple = (0, 1, 4)
20
+ __version__ = version = '0.1.5'
21
+ __version_tuple__ = version_tuple = (0, 1, 5)
@@ -66,8 +66,6 @@ async def run_async_migrations() -> None:
66
66
  prefix="sqlalchemy.",
67
67
  poolclass=pool.NullPool,
68
68
  )
69
- log = structlog.get_logger(__name__)
70
- log.debug("Running migrations on %s", connectable.url)
71
69
 
72
70
  async with connectable.connect() as connection:
73
71
  await connection.run_sync(do_run_migrations)
@@ -5,14 +5,10 @@ from fastapi import FastAPI
5
5
 
6
6
  from kodit.mcp import mcp
7
7
  from kodit.middleware import logging_middleware
8
- from kodit.sse import create_sse_server
9
8
 
10
- app = FastAPI(title="kodit API")
11
-
12
- # Get the SSE routes from the Starlette app hosting the MCP server
13
- sse_app = create_sse_server(mcp)
14
- for route in sse_app.routes:
15
- app.router.routes.append(route)
9
+ # See https://gofastmcp.com/deployment/asgi#fastapi-integration
10
+ mcp_app = mcp.sse_app()
11
+ app = FastAPI(title="kodit API", lifespan=mcp_app.router.lifespan_context)
16
12
 
17
13
  # Add middleware
18
14
  app.middleware("http")(logging_middleware)
@@ -22,4 +18,8 @@ app.add_middleware(CorrelationIdMiddleware)
22
18
  @app.get("/")
23
19
  async def root() -> dict[str, str]:
24
20
  """Return a welcome message for the kodit API."""
25
- return {"message": "Welcome to kodit API"}
21
+ return {"message": "Hello, World!"}
22
+
23
+
24
+ # Add mcp routes last, otherwise previous routes aren't added
25
+ app.mount("", mcp_app)
@@ -0,0 +1 @@
1
+ """BM25 module."""
@@ -0,0 +1,71 @@
1
+ """BM25 service."""
2
+
3
+ import bm25s
4
+ import Stemmer
5
+ import structlog
6
+ from bm25s.tokenization import Tokenized
7
+
8
+ from kodit.config import Config
9
+
10
+
11
+ class BM25Service:
12
+ """Service for BM25."""
13
+
14
+ def __init__(self, config: Config) -> None:
15
+ """Initialize the BM25 service."""
16
+ self.log = structlog.get_logger(__name__)
17
+ self.index_path = config.get_data_dir() / "bm25s_index"
18
+ try:
19
+ self.log.debug("Loading BM25 index")
20
+ self.retriever = bm25s.BM25.load(self.index_path, mmap=True)
21
+ except FileNotFoundError:
22
+ self.log.debug("BM25 index not found, creating new index")
23
+ self.retriever = bm25s.BM25()
24
+
25
+ self.stemmer = Stemmer.Stemmer("english")
26
+
27
+ def _tokenize(self, corpus: list[str]) -> list[list[str]] | Tokenized:
28
+ return bm25s.tokenize(
29
+ corpus,
30
+ stopwords="en",
31
+ stemmer=self.stemmer,
32
+ return_ids=False,
33
+ show_progress=True,
34
+ )
35
+
36
+ def index(self, corpus: list[str]) -> None:
37
+ """Index a new corpus."""
38
+ self.log.debug("Indexing corpus")
39
+ vocab = self._tokenize(corpus)
40
+ self.retriever = bm25s.BM25()
41
+ self.retriever.index(vocab)
42
+ self.retriever.save(self.index_path)
43
+
44
+ def retrieve(
45
+ self, doc_ids: list[int], query: str, top_k: int = 2
46
+ ) -> list[tuple[int, float]]:
47
+ """Retrieve from the index."""
48
+ if top_k == 0:
49
+ self.log.warning("Top k is 0, returning empty list")
50
+ return []
51
+ if len(doc_ids) == 0:
52
+ self.log.warning("No documents to retrieve from, returning empty list")
53
+ return []
54
+
55
+ top_k = min(top_k, len(doc_ids))
56
+ self.log.debug(
57
+ "Retrieving from index", query=query, top_k=top_k, num_docs=len(doc_ids)
58
+ )
59
+
60
+ query_tokens = self._tokenize([query])
61
+
62
+ self.log.debug("Query tokens", query_tokens=query_tokens)
63
+
64
+ results, scores = self.retriever.retrieve(
65
+ query_tokens=query_tokens, corpus=doc_ids, k=top_k
66
+ )
67
+ self.log.debug("Raw results", results=results, scores=scores)
68
+ return [
69
+ (int(result), float(score))
70
+ for result, score in zip(results[0], scores[0], strict=False)
71
+ ]
@@ -1,41 +1,74 @@
1
1
  """Command line interface for kodit."""
2
2
 
3
3
  import os
4
+ import signal
5
+ from pathlib import Path
6
+ from typing import Any
4
7
 
5
8
  import click
6
9
  import structlog
7
10
  import uvicorn
8
- from dotenv import dotenv_values
9
11
  from pytable_formatter import Table
10
12
  from sqlalchemy.ext.asyncio import AsyncSession
11
13
 
12
- from kodit.database import configure_database, with_session
14
+ from kodit.config import (
15
+ DEFAULT_BASE_DIR,
16
+ DEFAULT_DB_URL,
17
+ DEFAULT_DISABLE_TELEMETRY,
18
+ DEFAULT_LOG_FORMAT,
19
+ DEFAULT_LOG_LEVEL,
20
+ get_config,
21
+ reset_config,
22
+ with_session,
23
+ )
13
24
  from kodit.indexing.repository import IndexRepository
14
25
  from kodit.indexing.service import IndexService
15
- from kodit.logging import LogFormat, configure_logging, disable_posthog, log_event
26
+ from kodit.logging import configure_logging, configure_telemetry, log_event
16
27
  from kodit.retreival.repository import RetrievalRepository
17
28
  from kodit.retreival.service import RetrievalRequest, RetrievalService
18
29
  from kodit.sources.repository import SourceRepository
19
30
  from kodit.sources.service import SourceService
20
31
 
21
- env_vars = dict(dotenv_values())
22
- os.environ.update(env_vars)
23
32
 
24
-
25
- @click.group(context_settings={"auto_envvar_prefix": "KODIT", "show_default": True})
26
- @click.option("--log-level", default="INFO", help="Log level")
27
- @click.option("--log-format", default=LogFormat.PRETTY, help="Log format")
28
- @click.option("--disable-telemetry", is_flag=True, help="Disable telemetry")
29
- def cli(
30
- log_level: str,
31
- log_format: LogFormat,
32
- disable_telemetry: bool, # noqa: FBT001
33
+ @click.group(context_settings={"max_content_width": 100})
34
+ @click.option("--log-level", help=f"Log level [default: {DEFAULT_LOG_LEVEL}]")
35
+ @click.option("--log-format", help=f"Log format [default: {DEFAULT_LOG_FORMAT}]")
36
+ @click.option(
37
+ "--disable-telemetry",
38
+ is_flag=True,
39
+ help=f"Disable telemetry [default: {DEFAULT_DISABLE_TELEMETRY}]",
40
+ )
41
+ @click.option("--db-url", help=f"Database URL [default: {DEFAULT_DB_URL}]")
42
+ @click.option("--data-dir", help=f"Data directory [default: {DEFAULT_BASE_DIR}]")
43
+ @click.option("--env-file", help="Path to a .env file [default: .env]")
44
+ def cli( # noqa: PLR0913
45
+ log_level: str | None,
46
+ log_format: str | None,
47
+ disable_telemetry: bool | None,
48
+ db_url: str | None,
49
+ data_dir: str | None,
50
+ env_file: str | None,
33
51
  ) -> None:
34
52
  """kodit CLI - Code indexing for better AI code generation.""" # noqa: D403
35
- configure_logging(log_level, log_format)
53
+ # First check if env-file is set and reload config if it is
54
+ if env_file:
55
+ reset_config()
56
+ get_config(env_file)
57
+
58
+ # Override global config with cli args, if set
59
+ config = get_config()
60
+ if data_dir:
61
+ config.data_dir = Path(data_dir)
62
+ if db_url:
63
+ config.db_url = db_url
64
+ if log_level:
65
+ config.log_level = log_level
66
+ if log_format:
67
+ config.log_format = log_format
36
68
  if disable_telemetry:
37
- disable_posthog()
38
- configure_database()
69
+ config.disable_telemetry = disable_telemetry
70
+ configure_logging(config)
71
+ configure_telemetry(config)
39
72
 
40
73
 
41
74
  @cli.group()
@@ -48,7 +81,7 @@ def sources() -> None:
48
81
  async def list_sources(session: AsyncSession) -> None:
49
82
  """List all code sources."""
50
83
  repository = SourceRepository(session)
51
- service = SourceService(repository)
84
+ service = SourceService(get_config().get_clone_dir(), repository)
52
85
  sources = await service.list_sources()
53
86
 
54
87
  # Define headers and data
@@ -66,7 +99,7 @@ async def list_sources(session: AsyncSession) -> None:
66
99
  async def create_source(session: AsyncSession, uri: str) -> None:
67
100
  """Add a new code source."""
68
101
  repository = SourceRepository(session)
69
- service = SourceService(repository)
102
+ service = SourceService(get_config().get_clone_dir(), repository)
70
103
  source = await service.create(uri)
71
104
  click.echo(f"Source created: {source.id}")
72
105
 
@@ -82,9 +115,9 @@ def indexes() -> None:
82
115
  async def create_index(session: AsyncSession, source_id: int) -> None:
83
116
  """Create an index for a source."""
84
117
  source_repository = SourceRepository(session)
85
- source_service = SourceService(source_repository)
118
+ source_service = SourceService(get_config().get_clone_dir(), source_repository)
86
119
  repository = IndexRepository(session)
87
- service = IndexService(repository, source_service)
120
+ service = IndexService(get_config(), repository, source_service)
88
121
  index = await service.create(source_id)
89
122
  click.echo(f"Index created: {index.id}")
90
123
 
@@ -94,9 +127,9 @@ async def create_index(session: AsyncSession, source_id: int) -> None:
94
127
  async def list_indexes(session: AsyncSession) -> None:
95
128
  """List all indexes."""
96
129
  source_repository = SourceRepository(session)
97
- source_service = SourceService(source_repository)
130
+ source_service = SourceService(get_config().get_clone_dir(), source_repository)
98
131
  repository = IndexRepository(session)
99
- service = IndexService(repository, source_service)
132
+ service = IndexService(get_config(), repository, source_service)
100
133
  indexes = await service.list_indexes()
101
134
 
102
135
  # Define headers and data
@@ -127,48 +160,69 @@ async def list_indexes(session: AsyncSession) -> None:
127
160
  async def run_index(session: AsyncSession, index_id: int) -> None:
128
161
  """Run an index."""
129
162
  source_repository = SourceRepository(session)
130
- source_service = SourceService(source_repository)
163
+ source_service = SourceService(get_config().get_clone_dir(), source_repository)
131
164
  repository = IndexRepository(session)
132
- service = IndexService(repository, source_service)
165
+ service = IndexService(get_config(), repository, source_service)
133
166
  await service.run(index_id)
134
167
 
135
168
 
136
169
  @cli.command()
137
170
  @click.argument("query")
171
+ @click.option("--top-k", default=10, help="Number of snippets to retrieve")
138
172
  @with_session
139
- async def retrieve(session: AsyncSession, query: str) -> None:
173
+ async def retrieve(session: AsyncSession, query: str, top_k: int) -> None:
140
174
  """Retrieve snippets from the database."""
141
175
  repository = RetrievalRepository(session)
142
- service = RetrievalService(repository)
143
- snippets = await service.retrieve(RetrievalRequest(query=query))
176
+ service = RetrievalService(get_config(), repository)
177
+ # Temporary request while we don't have all search capabilities
178
+ snippets = await service.retrieve(
179
+ RetrievalRequest(keywords=query.split(","), top_k=top_k)
180
+ )
181
+
182
+ if len(snippets) == 0:
183
+ click.echo("No snippets found")
184
+ return
144
185
 
145
186
  for snippet in snippets:
187
+ click.echo("-" * 80)
146
188
  click.echo(f"{snippet.uri}")
147
189
  click.echo(snippet.content)
190
+ click.echo("-" * 80)
148
191
  click.echo()
149
192
 
150
193
 
151
194
  @cli.command()
152
195
  @click.option("--host", default="127.0.0.1", help="Host to bind the server to")
153
196
  @click.option("--port", default=8080, help="Port to bind the server to")
154
- @click.option("--reload", is_flag=True, help="Enable auto-reload for development")
155
197
  def serve(
156
198
  host: str,
157
199
  port: int,
158
- reload: bool, # noqa: FBT001
159
200
  ) -> None:
160
201
  """Start the kodit server, which hosts the MCP server and the kodit API."""
161
202
  log = structlog.get_logger(__name__)
162
- log.info("Starting kodit server", host=host, port=port, reload=reload)
203
+ log.info("Starting kodit server", host=host, port=port)
163
204
  log_event("kodit_server_started")
164
- uvicorn.run(
205
+ os.environ["HELLO"] = "WORLD"
206
+
207
+ # Configure uvicorn with graceful shutdown
208
+ config = uvicorn.Config(
165
209
  "kodit.app:app",
166
210
  host=host,
167
211
  port=port,
168
- reload=reload,
212
+ reload=False,
169
213
  log_config=None, # Setting to None forces uvicorn to use our structlog setup
170
214
  access_log=False, # Using own middleware for access logging
215
+ timeout_graceful_shutdown=0, # The mcp server does not shutdown cleanly, force
171
216
  )
217
+ server = uvicorn.Server(config)
218
+
219
+ def handle_sigint(signum: int, frame: Any) -> None:
220
+ """Handle SIGINT (Ctrl+C)."""
221
+ log.info("Received shutdown signal, force killing MCP connections")
222
+ server.handle_exit(signum, frame)
223
+
224
+ signal.signal(signal.SIGINT, handle_sigint)
225
+ server.run()
172
226
 
173
227
 
174
228
  @cli.command()
@@ -0,0 +1,89 @@
1
+ """Global configuration for the kodit project."""
2
+
3
+ import asyncio
4
+ from collections.abc import Callable
5
+ from functools import wraps
6
+ from pathlib import Path
7
+ from typing import Any, TypeVar
8
+
9
+ from pydantic import Field
10
+ from pydantic_settings import BaseSettings, SettingsConfigDict
11
+
12
+ from kodit.database import Database
13
+
14
+ DEFAULT_BASE_DIR = Path.home() / ".kodit"
15
+ DEFAULT_DB_URL = f"sqlite+aiosqlite:///{DEFAULT_BASE_DIR}/kodit.db"
16
+ DEFAULT_LOG_LEVEL = "INFO"
17
+ DEFAULT_LOG_FORMAT = "pretty"
18
+ DEFAULT_DISABLE_TELEMETRY = False
19
+ T = TypeVar("T")
20
+
21
+
22
+ class Config(BaseSettings):
23
+ """Global configuration for the kodit project."""
24
+
25
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
26
+
27
+ data_dir: Path = Field(default=DEFAULT_BASE_DIR)
28
+ db_url: str = Field(default=DEFAULT_DB_URL)
29
+ log_level: str = Field(default=DEFAULT_LOG_LEVEL)
30
+ log_format: str = Field(default=DEFAULT_LOG_FORMAT)
31
+ disable_telemetry: bool = Field(default=DEFAULT_DISABLE_TELEMETRY)
32
+ _db: Database | None = None
33
+
34
+ def model_post_init(self, _: Any) -> None:
35
+ """Post-initialization hook."""
36
+ # Call this to ensure the data dir exists for the default db location
37
+ self.get_data_dir()
38
+
39
+ def get_data_dir(self) -> Path:
40
+ """Get the data directory."""
41
+ self.data_dir.mkdir(parents=True, exist_ok=True)
42
+ return self.data_dir
43
+
44
+ def get_clone_dir(self) -> Path:
45
+ """Get the clone directory."""
46
+ clone_dir = self.get_data_dir() / "clones"
47
+ clone_dir.mkdir(parents=True, exist_ok=True)
48
+ return clone_dir
49
+
50
+ def get_db(self, *, run_migrations: bool = True) -> Database:
51
+ """Get the database."""
52
+ if self._db is None:
53
+ self._db = Database(self.db_url, run_migrations=run_migrations)
54
+ return self._db
55
+
56
+
57
+ # Global config instance for mcp Apps
58
+ config = None
59
+
60
+
61
+ def get_config(env_file: str | None = None) -> Config:
62
+ """Get the global config instance."""
63
+ global config # noqa: PLW0603
64
+ if config is None:
65
+ config = Config(_env_file=env_file)
66
+ return config
67
+
68
+
69
+ def reset_config() -> None:
70
+ """Reset the global config instance."""
71
+ global config # noqa: PLW0603
72
+ config = None
73
+
74
+
75
+ def with_session(func: Callable[..., T]) -> Callable[..., T]:
76
+ """Provide an async session to CLI commands."""
77
+
78
+ @wraps(func)
79
+ def wrapper(*args: Any, **kwargs: Any) -> T:
80
+ # Create DB connection before starting event loop
81
+ db = get_config().get_db()
82
+
83
+ async def _run() -> T:
84
+ async with db.get_session() as session:
85
+ return await func(session, *args, **kwargs)
86
+
87
+ return asyncio.run(_run())
88
+
89
+ return wrapper
@@ -0,0 +1,72 @@
1
+ """Database configuration for kodit."""
2
+
3
+ from collections.abc import AsyncGenerator
4
+ from contextlib import asynccontextmanager
5
+ from datetime import UTC, datetime
6
+ from pathlib import Path
7
+
8
+ import structlog
9
+ from alembic import command
10
+ from alembic.config import Config as AlembicConfig
11
+ from sqlalchemy import DateTime
12
+ from sqlalchemy.ext.asyncio import (
13
+ AsyncAttrs,
14
+ AsyncSession,
15
+ async_sessionmaker,
16
+ create_async_engine,
17
+ )
18
+ from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
19
+
20
+ from kodit import alembic
21
+
22
+
23
+ class Base(AsyncAttrs, DeclarativeBase):
24
+ """Base class for all models."""
25
+
26
+
27
+ class CommonMixin:
28
+ """Common mixin for all models."""
29
+
30
+ id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
31
+ created_at: Mapped[datetime] = mapped_column(
32
+ DateTime, default=lambda: datetime.now(UTC)
33
+ )
34
+ updated_at: Mapped[datetime] = mapped_column(
35
+ DateTime, default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC)
36
+ )
37
+
38
+
39
+ class Database:
40
+ """Database class for kodit."""
41
+
42
+ def __init__(self, db_url: str, *, run_migrations: bool = True) -> None:
43
+ """Initialize the database."""
44
+ self.log = structlog.get_logger(__name__)
45
+ if run_migrations:
46
+ self._run_migrations(db_url)
47
+ db_engine = create_async_engine(db_url, echo=False)
48
+ self.db_session_factory = async_sessionmaker(
49
+ db_engine,
50
+ class_=AsyncSession,
51
+ expire_on_commit=False,
52
+ )
53
+
54
+ @asynccontextmanager
55
+ async def get_session(self) -> AsyncGenerator[AsyncSession, None]:
56
+ """Get a database session."""
57
+ async with self.db_session_factory() as session:
58
+ try:
59
+ yield session
60
+ finally:
61
+ await session.close()
62
+
63
+ def _run_migrations(self, db_url: str) -> None:
64
+ """Run any pending migrations."""
65
+ # Create Alembic configuration and run migrations
66
+ alembic_cfg = AlembicConfig()
67
+ alembic_cfg.set_main_option(
68
+ "script_location", str(Path(alembic.__file__).parent)
69
+ )
70
+ alembic_cfg.set_main_option("sqlalchemy.url", db_url)
71
+ self.log.debug("Running migrations", db_url=db_url)
72
+ command.upgrade(alembic_cfg, "head")
@@ -130,3 +130,14 @@ class IndexRepository:
130
130
  query = select(Snippet).where(Snippet.index_id == index_id)
131
131
  result = await self.session.execute(query)
132
132
  return list(result.scalars())
133
+
134
+ async def get_all_snippets(self) -> list[Snippet]:
135
+ """Get all snippets.
136
+
137
+ Returns:
138
+ A list of all snippets.
139
+
140
+ """
141
+ query = select(Snippet).order_by(Snippet.id)
142
+ result = await self.session.execute(query)
143
+ return list(result.scalars())