kodit 0.1.16__tar.gz → 0.1.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (128) hide show
  1. {kodit-0.1.16 → kodit-0.1.18}/.github/workflows/docker.yaml +26 -1
  2. {kodit-0.1.16 → kodit-0.1.18}/Dockerfile +2 -2
  3. kodit-0.1.18/PKG-INFO +152 -0
  4. kodit-0.1.18/README.md +101 -0
  5. kodit-0.1.18/docs/demos/_index.md +13 -0
  6. kodit-0.1.18/docs/demos/knock-knock-auth/index.md +333 -0
  7. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/_version.py +2 -2
  8. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/embedding/vectorchord_vector_search_service.py +2 -0
  9. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/indexing/indexing_repository.py +22 -24
  10. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/indexing/indexing_service.py +10 -6
  11. kodit-0.1.18/src/kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +44 -0
  12. kodit-0.1.18/src/kodit/snippets/languages/javascript.scm +24 -0
  13. kodit-0.1.18/src/kodit/snippets/languages/typescript.scm +25 -0
  14. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/source/source_models.py +4 -4
  15. kodit-0.1.18/tests/kodit/indexing/indexing_repository_test.py +96 -0
  16. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/indexing/indexing_service_test.py +7 -4
  17. kodit-0.1.18/tests/kodit/snippets/javascript.js +74 -0
  18. kodit-0.1.18/tests/kodit/snippets/knock-knock-server.py +89 -0
  19. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/snippets/method_extraction_test.py +62 -0
  20. kodit-0.1.18/tests/kodit/snippets/typescript.tsx +73 -0
  21. kodit-0.1.16/PKG-INFO +0 -91
  22. kodit-0.1.16/README.md +0 -40
  23. {kodit-0.1.16 → kodit-0.1.18}/.cursor/rules/kodit.mdc +0 -0
  24. {kodit-0.1.16 → kodit-0.1.18}/.github/CODE_OF_CONDUCT.md +0 -0
  25. {kodit-0.1.16 → kodit-0.1.18}/.github/CONTRIBUTING.md +0 -0
  26. {kodit-0.1.16 → kodit-0.1.18}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  27. {kodit-0.1.16 → kodit-0.1.18}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  28. {kodit-0.1.16 → kodit-0.1.18}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  29. {kodit-0.1.16 → kodit-0.1.18}/.github/dependabot.yml +0 -0
  30. {kodit-0.1.16 → kodit-0.1.18}/.github/workflows/docs.yaml +0 -0
  31. {kodit-0.1.16 → kodit-0.1.18}/.github/workflows/pull_request.yaml +0 -0
  32. {kodit-0.1.16 → kodit-0.1.18}/.github/workflows/pypi-test.yaml +0 -0
  33. {kodit-0.1.16 → kodit-0.1.18}/.github/workflows/pypi.yaml +0 -0
  34. {kodit-0.1.16 → kodit-0.1.18}/.github/workflows/test.yaml +0 -0
  35. {kodit-0.1.16 → kodit-0.1.18}/.gitignore +0 -0
  36. {kodit-0.1.16 → kodit-0.1.18}/.python-version +0 -0
  37. {kodit-0.1.16 → kodit-0.1.18}/.vscode/launch.json +0 -0
  38. {kodit-0.1.16 → kodit-0.1.18}/.vscode/settings.json +0 -0
  39. {kodit-0.1.16 → kodit-0.1.18}/LICENSE +0 -0
  40. {kodit-0.1.16 → kodit-0.1.18}/alembic.ini +0 -0
  41. {kodit-0.1.16 → kodit-0.1.18}/docs/_index.md +0 -0
  42. {kodit-0.1.16 → kodit-0.1.18}/docs/developer/index.md +0 -0
  43. {kodit-0.1.16 → kodit-0.1.18}/pyproject.toml +0 -0
  44. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/.gitignore +0 -0
  45. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/__init__.py +0 -0
  46. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/app.py +0 -0
  47. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/bm25/__init__.py +0 -0
  48. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/bm25/keyword_search_factory.py +0 -0
  49. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/bm25/keyword_search_service.py +0 -0
  50. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/bm25/local_bm25.py +0 -0
  51. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/bm25/vectorchord_bm25.py +0 -0
  52. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/cli.py +0 -0
  53. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/config.py +0 -0
  54. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/database.py +0 -0
  55. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/embedding/__init__.py +0 -0
  56. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/embedding/embedding_factory.py +0 -0
  57. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/embedding/embedding_models.py +0 -0
  58. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/embedding/embedding_provider/__init__.py +0 -0
  59. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/embedding/embedding_provider/embedding_provider.py +0 -0
  60. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/embedding/embedding_provider/hash_embedding_provider.py +0 -0
  61. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/embedding/embedding_provider/local_embedding_provider.py +0 -0
  62. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/embedding/embedding_provider/openai_embedding_provider.py +0 -0
  63. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/embedding/embedding_repository.py +0 -0
  64. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/embedding/local_vector_search_service.py +0 -0
  65. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/embedding/vector_search_service.py +0 -0
  66. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/enrichment/__init__.py +0 -0
  67. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/enrichment/enrichment_factory.py +0 -0
  68. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/enrichment/enrichment_provider/__init__.py +0 -0
  69. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -0
  70. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/enrichment/enrichment_provider/local_enrichment_provider.py +0 -0
  71. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +0 -0
  72. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/enrichment/enrichment_service.py +0 -0
  73. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/indexing/__init__.py +0 -0
  74. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/indexing/fusion.py +0 -0
  75. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/indexing/indexing_models.py +0 -0
  76. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/log.py +0 -0
  77. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/mcp.py +0 -0
  78. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/middleware.py +0 -0
  79. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/migrations/README +0 -0
  80. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/migrations/__init__.py +0 -0
  81. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/migrations/env.py +0 -0
  82. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/migrations/script.py.mako +0 -0
  83. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +0 -0
  84. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/migrations/versions/85155663351e_initial.py +0 -0
  85. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/migrations/versions/__init__.py +0 -0
  86. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/snippets/__init__.py +0 -0
  87. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/snippets/languages/__init__.py +0 -0
  88. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/snippets/languages/csharp.scm +0 -0
  89. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/snippets/languages/go.scm +0 -0
  90. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/snippets/languages/python.scm +0 -0
  91. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/snippets/method_snippets.py +0 -0
  92. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/snippets/snippets.py +0 -0
  93. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/source/__init__.py +0 -0
  94. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/source/source_repository.py +0 -0
  95. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/source/source_service.py +0 -0
  96. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/util/__init__.py +0 -0
  97. {kodit-0.1.16 → kodit-0.1.18}/src/kodit/util/spinner.py +0 -0
  98. {kodit-0.1.16 → kodit-0.1.18}/tests/__init__.py +0 -0
  99. {kodit-0.1.16 → kodit-0.1.18}/tests/conftest.py +0 -0
  100. {kodit-0.1.16 → kodit-0.1.18}/tests/experiments/cline-prompt-regression-tests/cline_prompt.txt +0 -0
  101. {kodit-0.1.16 → kodit-0.1.18}/tests/experiments/cline-prompt-regression-tests/cline_prompt_test.py +0 -0
  102. {kodit-0.1.16 → kodit-0.1.18}/tests/experiments/embedding.py +0 -0
  103. {kodit-0.1.16 → kodit-0.1.18}/tests/experiments/similarity_test.py +0 -0
  104. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/__init__.py +0 -0
  105. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/bm25/local_bm25_test.py +0 -0
  106. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/bm25/vectorchord_repository_test.py +0 -0
  107. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/cli_test.py +0 -0
  108. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/e2e.py +0 -0
  109. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/embedding/__init__.py +0 -0
  110. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/embedding/embedding_provider/local_embedding_provider_test.py +0 -0
  111. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/embedding/embedding_provider/openai_embedding_provider_test.py +0 -0
  112. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/embedding/local_vector_search_service_test.py +0 -0
  113. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/embedding/vectorchord_vector_search_service_test.py +0 -0
  114. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/enrichment/__init__.py +0 -0
  115. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/enrichment/enrichment_provider/__init__.py +0 -0
  116. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/enrichment/enrichment_provider/openai_enrichment_provider_test.py +0 -0
  117. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/indexing/__init__.py +0 -0
  118. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/mcp_test.py +0 -0
  119. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/snippets/__init__.py +0 -0
  120. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/snippets/csharp.cs +0 -0
  121. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/snippets/detect_language_test.py +0 -0
  122. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/snippets/golang.go +0 -0
  123. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/snippets/python.py +0 -0
  124. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/source/__init__.py +0 -0
  125. {kodit-0.1.16 → kodit-0.1.18}/tests/kodit/source/source_service_test.py +0 -0
  126. {kodit-0.1.16 → kodit-0.1.18}/tests/performance/similarity.py +0 -0
  127. {kodit-0.1.16 → kodit-0.1.18}/tests/smoke.sh +0 -0
  128. {kodit-0.1.16 → kodit-0.1.18}/uv.lock +0 -0
@@ -2,8 +2,30 @@ name: Publish Docker image
2
2
 
3
3
  on:
4
4
  push:
5
+ pull_request_target:
6
+ types: [opened, synchronize, reopened]
7
+
8
+ env:
9
+ TEST_TAG: user/app:test
5
10
 
6
11
  jobs:
12
+ test-build:
13
+ name: Test and build Docker image
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - name: Check out the repo
17
+ uses: actions/checkout@v4
18
+
19
+ - name: Build and push Docker image
20
+ id: push
21
+ uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
22
+ with:
23
+ context: .
24
+ file: ./Dockerfile
25
+ push: false
26
+ load: true
27
+ tags: ${{ env.TEST_TAG }}
28
+
7
29
  push_to_registry:
8
30
  name: Push Docker image to registry
9
31
  runs-on: ubuntu-latest
@@ -14,6 +36,9 @@ jobs:
14
36
  contents: read
15
37
  attestations: write
16
38
  id-token: write
39
+ needs: test-build
40
+ # Only run on main branch or when explicitly triggered
41
+ if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository
17
42
  steps:
18
43
  - name: Check out the repo
19
44
  uses: actions/checkout@v4
@@ -46,4 +71,4 @@ jobs:
46
71
  with:
47
72
  subject-name: ${{ vars.REGISTRY }}/${{ vars.REGISTRY_ORG }}/${{ github.event.repository.name }}
48
73
  subject-digest: ${{ steps.push.outputs.digest }}
49
- push-to-registry: true
74
+ push-to-registry: true
@@ -1,5 +1,5 @@
1
1
  # syntax=docker/dockerfile:1.9
2
- FROM python:3.13.3-slim-bookworm AS build
2
+ FROM python:3.13.4-slim-bookworm AS build
3
3
 
4
4
  # The following does not work in Podman unless you build in Docker
5
5
  # compatibility mode: <https://github.com/containers/podman/issues/8477>
@@ -60,7 +60,7 @@ RUN --mount=type=cache,target=/root/.cache \
60
60
 
61
61
  ##########################################################################
62
62
 
63
- FROM python:3.13.3-slim-bookworm
63
+ FROM python:3.13.4-slim-bookworm
64
64
  SHELL ["sh", "-exc"]
65
65
 
66
66
  ENV PATH=/app/bin:$PATH
kodit-0.1.18/PKG-INFO ADDED
@@ -0,0 +1,152 @@
1
+ Metadata-Version: 2.4
2
+ Name: kodit
3
+ Version: 0.1.18
4
+ Summary: Code indexing for better AI code generation
5
+ Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
+ Project-URL: Documentation, https://docs.helixml.tech/kodit/
7
+ Project-URL: Repository, https://github.com/helixml/kodit.git
8
+ Project-URL: Issues, https://github.com/helixml/kodit/issues
9
+ Project-URL: Changelog, https://github.com/helixml/kodit/releases
10
+ Author-email: "Helix.ML" <founders@helix.ml>
11
+ Maintainer-email: "Helix.ML" <founders@helix.ml>
12
+ License-Expression: Apache-2.0
13
+ License-File: LICENSE
14
+ Keywords: ai,indexing,mcp,rag
15
+ Classifier: Development Status :: 2 - Pre-Alpha
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Code Generators
20
+ Requires-Python: >=3.12
21
+ Requires-Dist: aiofiles>=24.1.0
22
+ Requires-Dist: aiosqlite>=0.20.0
23
+ Requires-Dist: alembic>=1.15.2
24
+ Requires-Dist: asgi-correlation-id>=4.3.4
25
+ Requires-Dist: asyncpg>=0.30.0
26
+ Requires-Dist: better-exceptions>=0.3.3
27
+ Requires-Dist: bm25s[core]>=0.2.12
28
+ Requires-Dist: click>=8.1.8
29
+ Requires-Dist: colorama>=0.4.6
30
+ Requires-Dist: dotenv>=0.9.9
31
+ Requires-Dist: fastapi[standard]>=0.115.12
32
+ Requires-Dist: fastmcp>=2.3.3
33
+ Requires-Dist: gitpython>=3.1.44
34
+ Requires-Dist: hf-xet>=1.1.2
35
+ Requires-Dist: httpx-retries>=0.3.2
36
+ Requires-Dist: httpx>=0.28.1
37
+ Requires-Dist: openai>=1.82.0
38
+ Requires-Dist: posthog>=4.0.1
39
+ Requires-Dist: pydantic-settings>=2.9.1
40
+ Requires-Dist: pytable-formatter>=0.1.1
41
+ Requires-Dist: sentence-transformers>=4.1.0
42
+ Requires-Dist: sqlalchemy[asyncio]>=2.0.40
43
+ Requires-Dist: structlog>=25.3.0
44
+ Requires-Dist: tdqm>=0.0.1
45
+ Requires-Dist: tiktoken>=0.9.0
46
+ Requires-Dist: transformers>=4.51.3
47
+ Requires-Dist: tree-sitter-language-pack>=0.7.3
48
+ Requires-Dist: tree-sitter>=0.24.0
49
+ Requires-Dist: uritools>=5.0.0
50
+ Description-Content-Type: text/markdown
51
+
52
+ <p align="center">
53
+ <a href="https://docs.helix.ml/kodit/"><img src="https://docs.helix.ml/images/helix-kodit-logo.png" alt="Helix Kodit Logo" width="300"></a>
54
+ </p>
55
+
56
+ <h1 align="center">
57
+ Kodit: A Code Indexing MCP Server
58
+ </h1>
59
+
60
+ <p align="center">
61
+ Kodit connects your AI coding assistant to external codebases to provide accurate and up-to-date snippets of code.
62
+ </p>
63
+
64
+ <div align="center">
65
+
66
+ [![Documentation](https://img.shields.io/badge/Documentation-6B46C1?style=for-the-badge&logo=readthedocs&logoColor=white)](https://docs.helix.ml/kodit/)
67
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg?style=for-the-badge)](./LICENSE)
68
+ [![Discussions](https://img.shields.io/badge/Discussions-181717?style=for-the-badge&logo=github&logoColor=white)](https://github.com/helixml/kodit/discussions)
69
+
70
+ </div>
71
+
72
+ **Helix Kodit** is an **MCP server** that connects your AI coding assistant to external codebases. It can:
73
+
74
+ - Improve your AI-assisted code by providing canonical examples direct from the source
75
+ - Index local and public codebases
76
+ - Integrates with any AI coding assistant via MCP
77
+ - Search using keyword and semantic search
78
+ - Integrate with any OpenAI-compatible or custom API/model
79
+
80
+ If you're an engineer working with AI-powered coding assistants, Kodit helps by
81
+ providing relevant and up-to-date examples of your task so that LLMs make less mistakes
82
+ and produce fewer hallucinations.
83
+
84
+ ## ✨ Features
85
+
86
+ ### Codebase Indexing
87
+
88
+ Kodit connects to a variety of local and remote codebases to build an index of your
89
+ code. This index is used to build a snippet library, ready for ingestion into an LLM.
90
+
91
+ - Index local directories and public Git repositories
92
+ - Build comprehensive snippet libraries for LLM ingestion
93
+ - Support for multiple codebase types and languages
94
+ - Efficient indexing and search capabilities
95
+
96
+ ### MCP Server
97
+
98
+ Relevant snippets are exposed to an AI coding assistant via an MCP server. This allows
99
+ the assistant to request relevant snippets by providing keywords, code, and semantic
100
+ intent. Kodit has been tested to work well with:
101
+
102
+ - Seamless integration with popular AI coding assistants
103
+ - Tested and verified with:
104
+ - [Cursor](https://docs.helix.ml/kodit/#integration-with-cursor)
105
+ - [Cline](https://docs.helix.ml/kodit/#integration-with-cline)
106
+ - Please contribute more instructions! ... any other assistant is likely to work ...
107
+
108
+ ### Enterprise Ready
109
+
110
+ Out of the box, Kodit works with a local SQLite database and very small, local models.
111
+ But enterprises can scale out with performant databases and dedicated models. Everything
112
+ can even run securely, privately, with on-premise LLM platforms like
113
+ [Helix](https://helix.ml).
114
+
115
+ Supported databases:
116
+
117
+ - SQLite
118
+ - [Vectorchord](https://github.com/tensorchord/VectorChord)
119
+
120
+ Supported providers:
121
+
122
+ - Local (which uses tiny CPU-only open-source models)
123
+ - OpenAI
124
+ - Secure, private LLM enclave with [Helix](https://helix.ml).
125
+ - Any other OpenAI compatible API
126
+
127
+ ## 🚀 Quick Start
128
+
129
+ 1. [Install Kodit](https://docs.helix.ml/kodit/#installation)
130
+ 2. [Index codebases](https://docs.helix.ml/kodit/#quick-start)
131
+ 3. [Integrate with your coding assistant](https://docs.helix.ml/kodit/#integrating-kodit-with-coding-assistants)
132
+
133
+ ### Documentation
134
+
135
+ - [Installation Guide](https://docs.helix.ml/kodit/#installation)
136
+ - [Usage Guide](https://docs.helix.ml/kodit/#quick-start)
137
+ - [Connecting to Kodit](https://docs.helix.ml/kodit/#integrating-kodit-with-coding-assistants)
138
+ - [Configuration Options](https://docs.helix.ml/kodit/#configuring-kodit)
139
+ - [Contribution Guidelines](.github/CONTRIBUTING.md)
140
+
141
+ ## Roadmap
142
+
143
+ The roadmap is currently maintained as a [Github Project](https://github.com/orgs/helixml/projects/4).
144
+
145
+ ## 💬 Support
146
+
147
+ For commercial support, please contact [Helix.ML](founders@helix.ml). To ask a question,
148
+ please [open a discussion](https://github.com/helixml/kodit/discussions).
149
+
150
+ ## License
151
+
152
+ [Apache 2.0 © 2025 HelixML, Inc.](./LICENSE)
kodit-0.1.18/README.md ADDED
@@ -0,0 +1,101 @@
1
+ <p align="center">
2
+ <a href="https://docs.helix.ml/kodit/"><img src="https://docs.helix.ml/images/helix-kodit-logo.png" alt="Helix Kodit Logo" width="300"></a>
3
+ </p>
4
+
5
+ <h1 align="center">
6
+ Kodit: A Code Indexing MCP Server
7
+ </h1>
8
+
9
+ <p align="center">
10
+ Kodit connects your AI coding assistant to external codebases to provide accurate and up-to-date snippets of code.
11
+ </p>
12
+
13
+ <div align="center">
14
+
15
+ [![Documentation](https://img.shields.io/badge/Documentation-6B46C1?style=for-the-badge&logo=readthedocs&logoColor=white)](https://docs.helix.ml/kodit/)
16
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg?style=for-the-badge)](./LICENSE)
17
+ [![Discussions](https://img.shields.io/badge/Discussions-181717?style=for-the-badge&logo=github&logoColor=white)](https://github.com/helixml/kodit/discussions)
18
+
19
+ </div>
20
+
21
+ **Helix Kodit** is an **MCP server** that connects your AI coding assistant to external codebases. It can:
22
+
23
+ - Improve your AI-assisted code by providing canonical examples direct from the source
24
+ - Index local and public codebases
25
+ - Integrates with any AI coding assistant via MCP
26
+ - Search using keyword and semantic search
27
+ - Integrate with any OpenAI-compatible or custom API/model
28
+
29
+ If you're an engineer working with AI-powered coding assistants, Kodit helps by
30
+ providing relevant and up-to-date examples of your task so that LLMs make less mistakes
31
+ and produce fewer hallucinations.
32
+
33
+ ## ✨ Features
34
+
35
+ ### Codebase Indexing
36
+
37
+ Kodit connects to a variety of local and remote codebases to build an index of your
38
+ code. This index is used to build a snippet library, ready for ingestion into an LLM.
39
+
40
+ - Index local directories and public Git repositories
41
+ - Build comprehensive snippet libraries for LLM ingestion
42
+ - Support for multiple codebase types and languages
43
+ - Efficient indexing and search capabilities
44
+
45
+ ### MCP Server
46
+
47
+ Relevant snippets are exposed to an AI coding assistant via an MCP server. This allows
48
+ the assistant to request relevant snippets by providing keywords, code, and semantic
49
+ intent. Kodit has been tested to work well with:
50
+
51
+ - Seamless integration with popular AI coding assistants
52
+ - Tested and verified with:
53
+ - [Cursor](https://docs.helix.ml/kodit/#integration-with-cursor)
54
+ - [Cline](https://docs.helix.ml/kodit/#integration-with-cline)
55
+ - Please contribute more instructions! ... any other assistant is likely to work ...
56
+
57
+ ### Enterprise Ready
58
+
59
+ Out of the box, Kodit works with a local SQLite database and very small, local models.
60
+ But enterprises can scale out with performant databases and dedicated models. Everything
61
+ can even run securely, privately, with on-premise LLM platforms like
62
+ [Helix](https://helix.ml).
63
+
64
+ Supported databases:
65
+
66
+ - SQLite
67
+ - [Vectorchord](https://github.com/tensorchord/VectorChord)
68
+
69
+ Supported providers:
70
+
71
+ - Local (which uses tiny CPU-only open-source models)
72
+ - OpenAI
73
+ - Secure, private LLM enclave with [Helix](https://helix.ml).
74
+ - Any other OpenAI compatible API
75
+
76
+ ## 🚀 Quick Start
77
+
78
+ 1. [Install Kodit](https://docs.helix.ml/kodit/#installation)
79
+ 2. [Index codebases](https://docs.helix.ml/kodit/#quick-start)
80
+ 3. [Integrate with your coding assistant](https://docs.helix.ml/kodit/#integrating-kodit-with-coding-assistants)
81
+
82
+ ### Documentation
83
+
84
+ - [Installation Guide](https://docs.helix.ml/kodit/#installation)
85
+ - [Usage Guide](https://docs.helix.ml/kodit/#quick-start)
86
+ - [Connecting to Kodit](https://docs.helix.ml/kodit/#integrating-kodit-with-coding-assistants)
87
+ - [Configuration Options](https://docs.helix.ml/kodit/#configuring-kodit)
88
+ - [Contribution Guidelines](.github/CONTRIBUTING.md)
89
+
90
+ ## Roadmap
91
+
92
+ The roadmap is currently maintained as a [Github Project](https://github.com/orgs/helixml/projects/4).
93
+
94
+ ## 💬 Support
95
+
96
+ For commercial support, please contact [Helix.ML](founders@helix.ml). To ask a question,
97
+ please [open a discussion](https://github.com/helixml/kodit/discussions).
98
+
99
+ ## License
100
+
101
+ [Apache 2.0 © 2025 HelixML, Inc.](./LICENSE)
@@ -0,0 +1,13 @@
1
+ ---
2
+ title: Kodit Demos
3
+ linkTitle: Demos
4
+ weight: 4
5
+ tags:
6
+ - demo
7
+ ---
8
+
9
+ The following sections provide lots of examples and demos of using Kodit.
10
+
11
+ <!--more-->
12
+
13
+ {{< default-section-cards-list >}}
@@ -0,0 +1,333 @@
1
+ ---
2
+ title: "Kodit Demo: Proprietary Auth Patterns"
3
+ weight: 1
4
+ tags:
5
+ - demo
6
+ - auth
7
+ ---
8
+
9
+ Another common feature of private codebases is proprietary code to achieve a certain
10
+ task. For example, you might a specific way to request data from your data lake, or a
11
+ specific format to use when using a common API.
12
+
13
+ This example uses the analogy of a proprietary authentication mechanism. I have a super
14
+ secret server with sensitive data. It is protected with a [custom auth implementation](https://gist.github.com/cbf0bd1f3338ddf9f98879148d2d752d)
15
+ that requires the user to "knock three times" before they get access.
16
+
17
+ ## With Cursor
18
+
19
+ Without Kodit, an AI assistant makes an average assumption about what the auth might
20
+ look like, based upon the context provided by the user. The transcript is below. You can
21
+ see that Cursor doesn't have a chance.
22
+
23
+ ### Original Cursor Response
24
+
25
+ > **User**
26
+ >
27
+ > Write a python script that is able to access my super secret knock knock api on localhost:8000/secret.
28
+ >
29
+ > **Cursor**
30
+ >
31
+ > I'll help you create a Python script to interact with your local knock knock API. I'll create a script that makes a > request to the specified endpoint.
32
+ >
33
+ > Let me write a script that uses the `requests` library to make HTTP requests to your local API:
34
+ >
35
+ > ```python
36
+ > import requests
37
+ >
38
+ > def access_secret_api():
39
+ > try:
40
+ > # Make a GET request to the secret endpoint
41
+ > response = requests.get('http://localhost:8000/secret')
42
+ >
43
+ > # Check if the request was successful
44
+ > response.raise_for_status()
45
+ >
46
+ > # Print the response
47
+ > print("Response from secret API:", response.text)
48
+ >
49
+ > except requests.exceptions.RequestException as e:
50
+ > print(f"Error accessing the API: {e}")
51
+ >
52
+ > if __name__ == "__main__":
53
+ > access_secret_api()
54
+ > ```
55
+ >
56
+ > I've created a Python script that will:
57
+ >
58
+ > 1. Make a GET request to `http://localhost:8000/secret`
59
+ > 2. Handle any potential errors that might occur during the request
60
+ > 3. Print the response from the API if successful
61
+ >
62
+ > To run this script, you'll need to have the `requests` library installed. You can install it using pip:
63
+
64
+ ## With Kodit
65
+
66
+ Now let's index the code for my secret server and then leverage Kodit in Cursor.
67
+
68
+ 1. Start afresh. Delete your data directory or your database.
69
+ 2. Index the [server code](https://gist.github.com/cbf0bd1f3338ddf9f98879148d2d752d): `kodit index https://gist.github.com/cbf0bd1f3338ddf9f98879148d2d752d.git`
70
+ 3. Launch the MCP server: `kodit serve`kodit
71
+ 4. Connect Cursor.
72
+
73
+ The results are shown below. Cursor has correctly queried Kodit, which has responded with examples of the super secret auth implementation. This lead to cursor producing valid, working code that adhered to our esoteric auth implementation!
74
+
75
+ Here is the result of running the generated code:
76
+
77
+ ```sh
78
+ uv run temp.py
79
+ Knock 1: Knock 1 received (token: sAQmj2uFrRxc-e9blSCfFg)
80
+ Knock 2: Knock 2 received (token: sAQmj2uFrRxc-e9blSCfFg)
81
+ Knock 3: Knock 3 received (token: sAQmj2uFrRxc-e9blSCfFg)
82
+ Secret area: {'message': 'Welcome to the secret area!', 'secret': 'The answer is 42'}
83
+ ```
84
+
85
+ ### Kodit Cursor Response
86
+
87
+ > **User**
88
+ >
89
+ > Write a python script that is able to access my super secret knock knock api on localhost:8000/secret.
90
+ >
91
+ > **Cursor**
92
+ >
93
+ > Based on the information found in the codebase, your "super secret knock knock API" on `localhost:8000/secret` requires a > special authentication flow:
94
+ >
95
+ > 1. You must make 3 POST requests to `/knock`. Each time, you use the same token (provided in the response) in the > Authorization header as a Bearer token.
96
+ > 2. After 3 knocks, you can access `/secret` with a GET request, using the same token in the Authorization header.
97
+ >
98
+ > Would you like a Python script that automates this entire process (performs the 3 knocks and then accesses the secret), > or do you want just an example of how to access `/secret` assuming you already have a valid token?
99
+ >
100
+ > **User**
101
+ >
102
+ > the whole process.
103
+ >
104
+ > **Cursor**
105
+ >
106
+ > ```python
107
+ > import requests
108
+ >
109
+ > BASE_URL = "http://localhost:8000"
110
+ > KNOCKS_REQUIRED = 3
111
+ >
112
+ > def knock_and_get_secret():
113
+ > token = None
114
+ > headers = {}
115
+ > for i in range(KNOCKS_REQUIRED):
116
+ > if token:
117
+ > headers = {"Authorization": f"Bearer {token}"}
118
+ > else:
119
+ > headers = {}
120
+ > resp = requests.post(f"{BASE_URL}/knock", headers=headers)
121
+ > resp.raise_for_status()
122
+ > data = resp.json()
123
+ > token = data["token"]
124
+ > print(f"Knock {i+1}: {data['message']} (token: {token})")
125
+ >
126
+ > # Now access the secret
127
+ > headers = {"Authorization": f"Bearer {token}"}
128
+ > resp = requests.get(f"{BASE_URL}/secret", headers=headers)
129
+ > if resp.status_code == 200:
130
+ > print("Secret area:", resp.json())
131
+ > else:
132
+ > print("Failed to access secret:", resp.status_code, resp.text)
133
+ >
134
+ > if __name__ == "__main__":
135
+ > knock_and_get_secret()
136
+ >
137
+ > ```
138
+ >
139
+ > I've written a Python script in `temp.py` that automates the entire process:
140
+ >
141
+ > - It performs 3 POST requests to `/knock`, passing the token as required.
142
+ > - After the third knock, it makes a GET request to `/secret` with the token.
143
+ > - The script prints out the secret or any error message.
144
+ >
145
+ > You just need to have the `requests` library installed (`pip install requests`) to run this script. Let me know if you > want any adjustments or further explanation!
146
+
147
+ ---
148
+
149
+ ## Appendix
150
+
151
+ ### Kodit Request/Response from Cursor
152
+
153
+ #### Request
154
+
155
+ ```json
156
+ {
157
+ "user_intent": "Write a Python script to access a secret API at http://localhost:8000/secret.",
158
+ "related_file_paths": [
159
+ "temp.py"
160
+ ],
161
+ "related_file_contents": [
162
+ ""
163
+ ],
164
+ "keywords": [
165
+ "Python",
166
+ "API request",
167
+ "localhost",
168
+ "secret",
169
+ "http://localhost:8000/secret"
170
+ ]
171
+ }
172
+ ```
173
+
174
+ #### Response
175
+
176
+ > file:///Users/phil/.kodit/clones/https___gist.github.com_cbf0bd1f3338ddf9f98879148d2d752d.git/knock-knock-server.py
177
+ > This Python code snippet sets up a basic HTTP server that listens on a specified port (default is 8000) using the > `HTTPServer` class from the `http.server` module. The server utilizes a custom request handler called `KnockAuthHandler`, > which likely handles authentication through a "knock" method. The `run_server` function outputs instructions to the > console (using the `rich` library for formatted text) on how to access a secret area of the server. Users are required to > make three POST requests to the `/knock` endpoint, receive a token, and then use that token in an Authorization header to > make a GET request to the `/secret` endpoint. The server will continue to run indefinitely until it is stopped.
178
+ >
179
+ > ```
180
+ > import json
181
+ > import secrets
182
+ > from http.server import BaseHTTPRequestHandler, HTTPServer
183
+ > from rich.console import Console
184
+ > def run_server(port=8000):
185
+ > server_address = ("", port)
186
+ > httpd = HTTPServer(server_address, KnockAuthHandler)
187
+ > console.print(f"[bold blue]Starting server on port {port}[/bold blue]")
188
+ > console.print("[yellow]To access the secret area:[/yellow]")
189
+ > console.print("1. Make 3 POST requests to /knock")
190
+ > console.print("2. Use the returned token in the Authorization header")
191
+ > console.print("3. Make a GET request to /secret with the token")
192
+ > httpd.serve_forever()
193
+ > ```
194
+ >
195
+ > file:///Users/phil/.kodit/clones/https___gist.github.com_cbf0bd1f3338ddf9f98879148d2d752d.git/knock-knock-server.py
196
+ > This code defines a basic HTTP server in Python that responds to POST requests at the "/knock" endpoint. It includes a > class `KnockAuthHandler` that extends `BaseHTTPRequestHandler` to handle incoming requests.
197
+ >
198
+ > Here's a breakdown of the main components:
199
+ >
200
+ > 1. **Imports**: The code imports necessary modules, including `json` for encoding and decoding JSON data, `secrets` for > generating secure random tokens, `BaseHTTPRequestHandler` and `HTTPServer` for creating the HTTP server, and `rich.> console.Console` for formatted console output.
201
+ >
202
+ > 2. **do_POST Method**: This method handles POST requests:
203
+ > - It checks if the request path is "/knock". If so:
204
+ > - It attempts to retrieve a token from the request header using the `_get_token_from_header` method (which is not > defined in the snippet).
205
+ > - If no token is found, a new secure token is created using `secrets.token_urlsafe(16)` and initialized in the > `knock_counts` dictionary with a count of 0.
206
+ > - The count for the token is incremented, and the number of knocks received is printed to the console in green.
207
+ > - A JSON response is constructed that includes a message about the number of knocks received, the token, and how > many knocks remain before a limit (set to 3).
208
+ > - If the request path is anything other than "/knock", it returns a 404 error with a JSON response indicating that the > endpoint was not found.
209
+ >
210
+ > 3. **Response Handling**: The `_set_headers` method is called to set appropriate HTTP headers before sending back the > JSON response.
211
+ >
212
+ > Overall, this snippet sets up a simple knocking authentication system, allowing clients to "knock" by sending POST > requests to a specified endpoint while maintaining a count of knocks per token.
213
+ >
214
+ > ```
215
+ > import json
216
+ > import secrets
217
+ > from http.server import BaseHTTPRequestHandler, HTTPServer
218
+ > from rich.console import Console
219
+ > class KnockAuthHandler(BaseHTTPRequestHandler):
220
+ > def do_POST(self):
221
+ > if self.path == "/knock":
222
+ > token = self._get_token_from_header()
223
+ > if not token:
224
+ > token = secrets.token_urlsafe(16)
225
+ > knock_counts[token] = 0
226
+ >
227
+ > knock_counts[token] += 1
228
+ > console.print(
229
+ > f"[green]Knock {knock_counts[token]} received for token {token}[/green]"
230
+ > )
231
+ >
232
+ > response = {
233
+ > "message": f"Knock {knock_counts[token]} received",
234
+ > "token": token,
235
+ > "knocks_remaining": max(0, 3 - knock_counts[token]),
236
+ > }
237
+ >
238
+ > self._set_headers()
239
+ > self.wfile.write(json.dumps(response).encode())
240
+ > else:
241
+ > self._set_headers(404)
242
+ > self.wfile.write(json.dumps({"error": "Not found"}).encode())
243
+ > ```
244
+ >
245
+ > file:///Users/phil/.kodit/clones/https___gist.github.com_cbf0bd1f3338ddf9f98879148d2d752d.git/knock-knock-server.py
246
+ > This code snippet defines a simple HTTP server using Python's `http.server` module and a custom request handler named > `KnockAuthHandler`.
247
+ >
248
+ > Here's a breakdown of the functionality:
249
+ >
250
+ > - The server listens for GET requests.
251
+ > - When a request is made to the path `/secret`, it checks for a token in the request headers.
252
+ > - The token is validated against a `knock_counts` dictionary, which tracks the number of times the token has been > "knocked" (i.e., how many requests have been made).
253
+ > - If the provided token is not present or if the knock count is less than 3, it responds with a 401 Unauthorized error > along with a message indicating that three knocks are required.
254
+ > - If the token is valid and the count is at least 3, it responds with a 200 status and welcomes the user to the secret > area, revealing a hidden message.
255
+ > - For any other paths, it returns a 404 Not Found error.
256
+ >
257
+ > Additional features include using JSON for responses and a potential use of the `rich` library for improved console > output (though it is not utilized in this snippet). The `secrets` module is imported but not used in the provided code.
258
+ >
259
+ > ```
260
+ > import json
261
+ > import secrets
262
+ > from http.server import BaseHTTPRequestHandler, HTTPServer
263
+ > from rich.console import Console
264
+ > class KnockAuthHandler(BaseHTTPRequestHandler):
265
+ > def do_GET(self):
266
+ > if self.path == "/secret":
267
+ > token = self._get_token_from_header()
268
+ > if not token or knock_counts.get(token, 0) < 3:
269
+ > self._set_headers(401)
270
+ > self.wfile.write(
271
+ > json.dumps(
272
+ > {
273
+ > "error": "Unauthorized",
274
+ > "message": "You need to knock three times first!",
275
+ > }
276
+ > ).encode()
277
+ > )
278
+ > return
279
+ >
280
+ > self._set_headers()
281
+ > self.wfile.write(
282
+ > json.dumps(
283
+ > {
284
+ > "message": "Welcome to the secret area!",
285
+ > "secret": "The answer is 42",
286
+ > }
287
+ > ).encode()
288
+ > )
289
+ > else:
290
+ > self._set_headers(404)
291
+ > self.wfile.write(json.dumps({"error": "Not found"}).encode())
292
+ > ```
293
+ >
294
+ > file:///Users/phil/.kodit/clones/https___gist.github.com_cbf0bd1f3338ddf9f98879148d2d752d.git/knock-knock-server.py
295
+ > This code snippet imports necessary libraries and defines a class `KnockAuthHandler`, which extends > `BaseHTTPRequestHandler` from the `http.server` module. The class includes a method `_set_headers` that sets the HTTP > response status code (defaulting to 200) and specifies the content type of the response as "application/json". This > method prepares the server to send a JSON response for HTTP requests.
296
+ >
297
+ > The other imported modules, `json` and `secrets`, suggest that the class may be working with JSON data and generating > secure tokens or identifiers, although the snippet does not currently use them. The `rich.console` import indicates that > the script may eventually use the Rich library for enhanced console output, though it is not utilized in the provided > code.
298
+ >
299
+ > ```
300
+ > import json
301
+ > import secrets
302
+ > from http.server import BaseHTTPRequestHandler, HTTPServer
303
+ > from rich.console import Console
304
+ > class KnockAuthHandler(BaseHTTPRequestHandler):
305
+ > def _set_headers(self, status_code=200):
306
+ > self.send_response(status_code)
307
+ > self.send_header("Content-type", "application/json")
308
+ > self.end_headers()
309
+ > ```
310
+ >
311
+ > file:///Users/phil/.kodit/clones/https___gist.github.com_cbf0bd1f3338ddf9f98879148d2d752d.git/knock-knock-server.py
312
+ > The provided code snippet is a Python definition of a class `KnockAuthHandler` that extends `BaseHTTPRequestHandler` from > the `http.server` module. This class is designed to handle HTTP requests, specifically for authentication purposes.
313
+ >
314
+ > In this class:
315
+ >
316
+ > - The method `_get_token_from_header` retrieves the "Authorization" header from the HTTP request.
317
+ > - It checks if the header starts with the prefix "Bearer " and, if so, splits the string to extract the token part (the > portion after "Bearer ").
318
+ > - If the "Authorization" header does not contain a valid Bearer token, the method returns an empty string.
319
+ >
320
+ > The use of `json`, `secrets`, and `rich.console.Console` indicates that there may be additional functionality related to > JSON handling, secure random number generation, and enhanced console output, respectively, though these elements are not > utilized in the provided snippet.
321
+ >
322
+ > ```
323
+ > import json
324
+ > import secrets
325
+ > from http.server import BaseHTTPRequestHandler, HTTPServer
326
+ > from rich.console import Console
327
+ > class KnockAuthHandler(BaseHTTPRequestHandler):
328
+ > def _get_token_from_header(self) -> str:
329
+ > auth_header = self.headers.get("Authorization", "")
330
+ > if auth_header.startswith("Bearer "):
331
+ > return auth_header.split(" ")[1]
332
+ > return ""
333
+ > ```
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.1.16'
21
- __version_tuple__ = version_tuple = (0, 1, 16)
20
+ __version__ = version = '0.1.18'
21
+ __version_tuple__ = version_tuple = (0, 1, 18)
@@ -142,6 +142,8 @@ class VectorChordVectorSearchService(VectorSearchService):
142
142
  async def retrieve(self, query: str, top_k: int = 10) -> list[VectorSearchResponse]:
143
143
  """Query the embedding model."""
144
144
  embedding = await self.embedding_provider.embed([query])
145
+ if len(embedding) == 0 or len(embedding[0]) == 0:
146
+ return []
145
147
  result = await self._execute(
146
148
  text(SEARCH_QUERY.format(TABLE_NAME=self.table_name)),
147
149
  {"query": str(embedding[0]), "top_k": top_k},