kodit 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (158) hide show
  1. {kodit-0.2.2 → kodit-0.2.4}/Dockerfile +3 -2
  2. {kodit-0.2.2 → kodit-0.2.4}/PKG-INFO +2 -2
  3. {kodit-0.2.2 → kodit-0.2.4}/docs/_index.md +1 -1
  4. {kodit-0.2.2 → kodit-0.2.4}/docs/developer/index.md +5 -4
  5. kodit-0.2.4/docs/reference/deployment/docker-compose.yaml +40 -0
  6. kodit-0.2.4/docs/reference/deployment/index.md +35 -0
  7. kodit-0.2.4/docs/reference/deployment/kubernetes.yaml +99 -0
  8. kodit-0.2.4/docs/reference/telemetry/index.md +31 -0
  9. {kodit-0.2.2 → kodit-0.2.4}/pyproject.toml +1 -1
  10. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/_version.py +2 -2
  11. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/app.py +6 -0
  12. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/cli.py +8 -2
  13. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/embedding/embedding_factory.py +11 -0
  14. kodit-0.2.4/src/kodit/embedding/embedding_provider/embedding_provider.py +92 -0
  15. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/embedding/embedding_provider/hash_embedding_provider.py +16 -7
  16. kodit-0.2.4/src/kodit/embedding/embedding_provider/local_embedding_provider.py +96 -0
  17. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/embedding/embedding_provider/openai_embedding_provider.py +18 -22
  18. kodit-0.2.4/src/kodit/embedding/local_vector_search_service.py +87 -0
  19. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/embedding/vector_search_service.py +18 -1
  20. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/embedding/vectorchord_vector_search_service.py +63 -16
  21. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/enrichment/enrichment_factory.py +3 -0
  22. kodit-0.2.4/src/kodit/enrichment/enrichment_provider/enrichment_provider.py +36 -0
  23. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/enrichment/enrichment_provider/local_enrichment_provider.py +39 -28
  24. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +25 -27
  25. kodit-0.2.4/src/kodit/enrichment/enrichment_service.py +45 -0
  26. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/indexing/indexing_service.py +50 -23
  27. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/log.py +126 -24
  28. kodit-0.2.4/src/kodit/migrations/versions/9e53ea8bb3b0_add_authors.py +103 -0
  29. kodit-0.2.4/src/kodit/source/source_factories.py +356 -0
  30. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/source/source_models.py +17 -5
  31. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/source/source_repository.py +49 -20
  32. kodit-0.2.4/src/kodit/source/source_service.py +150 -0
  33. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/embedding/embedding_provider/local_embedding_provider_test.py +59 -10
  34. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/embedding/embedding_provider/openai_embedding_provider_test.py +38 -10
  35. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/embedding/local_vector_search_service_test.py +32 -3
  36. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/embedding/vectorchord_vector_search_service_test.py +31 -5
  37. kodit-0.2.4/tests/kodit/enrichment/enrichment_provider/local_enrichment_provider_test.py +218 -0
  38. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/enrichment/enrichment_provider/openai_enrichment_provider_test.py +78 -47
  39. kodit-0.2.4/tests/kodit/log_test.py +18 -0
  40. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/source/source_service_test.py +5 -7
  41. {kodit-0.2.2 → kodit-0.2.4}/uv.lock +40 -18
  42. kodit-0.2.2/docs/reference/telemetry/index.md +0 -34
  43. kodit-0.2.2/src/kodit/embedding/embedding_provider/embedding_provider.py +0 -64
  44. kodit-0.2.2/src/kodit/embedding/embedding_provider/local_embedding_provider.py +0 -64
  45. kodit-0.2.2/src/kodit/embedding/local_vector_search_service.py +0 -54
  46. kodit-0.2.2/src/kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -16
  47. kodit-0.2.2/src/kodit/enrichment/enrichment_service.py +0 -33
  48. kodit-0.2.2/src/kodit/migrations/versions/42e836b21102_add_authors.py +0 -64
  49. kodit-0.2.2/src/kodit/source/source_service.py +0 -327
  50. {kodit-0.2.2 → kodit-0.2.4}/.cursor/rules/kodit.mdc +0 -0
  51. {kodit-0.2.2 → kodit-0.2.4}/.dockerignore +0 -0
  52. {kodit-0.2.2 → kodit-0.2.4}/.github/CODE_OF_CONDUCT.md +0 -0
  53. {kodit-0.2.2 → kodit-0.2.4}/.github/CONTRIBUTING.md +0 -0
  54. {kodit-0.2.2 → kodit-0.2.4}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  55. {kodit-0.2.2 → kodit-0.2.4}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  56. {kodit-0.2.2 → kodit-0.2.4}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  57. {kodit-0.2.2 → kodit-0.2.4}/.github/dependabot.yml +0 -0
  58. {kodit-0.2.2 → kodit-0.2.4}/.github/workflows/docker.yaml +0 -0
  59. {kodit-0.2.2 → kodit-0.2.4}/.github/workflows/docs.yaml +0 -0
  60. {kodit-0.2.2 → kodit-0.2.4}/.github/workflows/pull_request.yaml +0 -0
  61. {kodit-0.2.2 → kodit-0.2.4}/.github/workflows/pypi-test.yaml +0 -0
  62. {kodit-0.2.2 → kodit-0.2.4}/.github/workflows/pypi.yaml +0 -0
  63. {kodit-0.2.2 → kodit-0.2.4}/.github/workflows/test.yaml +0 -0
  64. {kodit-0.2.2 → kodit-0.2.4}/.gitignore +0 -0
  65. {kodit-0.2.2 → kodit-0.2.4}/.python-version +0 -0
  66. {kodit-0.2.2 → kodit-0.2.4}/.vscode/launch.json +0 -0
  67. {kodit-0.2.2 → kodit-0.2.4}/.vscode/settings.json +0 -0
  68. {kodit-0.2.2 → kodit-0.2.4}/LICENSE +0 -0
  69. {kodit-0.2.2 → kodit-0.2.4}/README.md +0 -0
  70. {kodit-0.2.2 → kodit-0.2.4}/alembic.ini +0 -0
  71. {kodit-0.2.2 → kodit-0.2.4}/docs/demos/_index.md +0 -0
  72. {kodit-0.2.2 → kodit-0.2.4}/docs/demos/go-simple-microservice/index.md +0 -0
  73. {kodit-0.2.2 → kodit-0.2.4}/docs/demos/knock-knock-auth/index.md +0 -0
  74. {kodit-0.2.2 → kodit-0.2.4}/docs/getting-started/_index.md +0 -0
  75. {kodit-0.2.2 → kodit-0.2.4}/docs/getting-started/installation/index.md +0 -0
  76. {kodit-0.2.2 → kodit-0.2.4}/docs/getting-started/integration/index.md +0 -0
  77. {kodit-0.2.2 → kodit-0.2.4}/docs/getting-started/quick-start/index.md +0 -0
  78. {kodit-0.2.2 → kodit-0.2.4}/docs/reference/_index.md +0 -0
  79. {kodit-0.2.2 → kodit-0.2.4}/docs/reference/configuration/index.md +0 -0
  80. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/.gitignore +0 -0
  81. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/__init__.py +0 -0
  82. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/bm25/__init__.py +0 -0
  83. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/bm25/keyword_search_factory.py +0 -0
  84. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/bm25/keyword_search_service.py +0 -0
  85. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/bm25/local_bm25.py +0 -0
  86. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/bm25/vectorchord_bm25.py +0 -0
  87. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/config.py +0 -0
  88. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/database.py +0 -0
  89. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/embedding/__init__.py +0 -0
  90. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/embedding/embedding_models.py +0 -0
  91. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/embedding/embedding_provider/__init__.py +0 -0
  92. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/embedding/embedding_repository.py +0 -0
  93. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/enrichment/__init__.py +0 -0
  94. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/enrichment/enrichment_provider/__init__.py +0 -0
  95. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/indexing/__init__.py +0 -0
  96. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/indexing/fusion.py +0 -0
  97. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/indexing/indexing_models.py +0 -0
  98. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/indexing/indexing_repository.py +0 -0
  99. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/mcp.py +0 -0
  100. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/middleware.py +0 -0
  101. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/migrations/README +0 -0
  102. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/migrations/__init__.py +0 -0
  103. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/migrations/env.py +0 -0
  104. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/migrations/script.py.mako +0 -0
  105. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +0 -0
  106. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/migrations/versions/85155663351e_initial.py +0 -0
  107. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/migrations/versions/__init__.py +0 -0
  108. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +0 -0
  109. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/snippets/__init__.py +0 -0
  110. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/snippets/languages/__init__.py +0 -0
  111. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/snippets/languages/csharp.scm +0 -0
  112. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/snippets/languages/go.scm +0 -0
  113. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/snippets/languages/javascript.scm +0 -0
  114. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/snippets/languages/python.scm +0 -0
  115. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/snippets/languages/typescript.scm +0 -0
  116. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/snippets/method_snippets.py +0 -0
  117. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/snippets/snippets.py +0 -0
  118. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/source/__init__.py +0 -0
  119. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/source/git.py +0 -0
  120. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/source/ignore.py +0 -0
  121. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/util/__init__.py +0 -0
  122. {kodit-0.2.2 → kodit-0.2.4}/src/kodit/util/spinner.py +0 -0
  123. {kodit-0.2.2 → kodit-0.2.4}/tests/__init__.py +0 -0
  124. {kodit-0.2.2 → kodit-0.2.4}/tests/conftest.py +0 -0
  125. {kodit-0.2.2 → kodit-0.2.4}/tests/docker-smoke.sh +0 -0
  126. {kodit-0.2.2 → kodit-0.2.4}/tests/experiments/cline-prompt-regression-tests/cline_prompt.txt +0 -0
  127. {kodit-0.2.2 → kodit-0.2.4}/tests/experiments/cline-prompt-regression-tests/cline_prompt_test.py +0 -0
  128. {kodit-0.2.2 → kodit-0.2.4}/tests/experiments/embedding.py +0 -0
  129. {kodit-0.2.2 → kodit-0.2.4}/tests/experiments/similarity_test.py +0 -0
  130. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/__init__.py +0 -0
  131. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/bm25/local_bm25_test.py +0 -0
  132. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/bm25/vectorchord_repository_test.py +0 -0
  133. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/cli_test.py +0 -0
  134. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/e2e.py +0 -0
  135. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/embedding/__init__.py +0 -0
  136. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/embedding/embedding_factory_test.py +0 -0
  137. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/enrichment/__init__.py +0 -0
  138. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/enrichment/enrichment_factory_test.py +0 -0
  139. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/enrichment/enrichment_provider/__init__.py +0 -0
  140. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/indexing/__init__.py +0 -0
  141. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/indexing/indexing_repository_test.py +0 -0
  142. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/indexing/indexing_service_test.py +0 -0
  143. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/mcp_test.py +0 -0
  144. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/snippets/__init__.py +0 -0
  145. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/snippets/csharp.cs +0 -0
  146. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/snippets/detect_language_test.py +0 -0
  147. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/snippets/golang.go +0 -0
  148. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/snippets/javascript.js +0 -0
  149. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/snippets/knock-knock-server.py +0 -0
  150. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/snippets/method_extraction_test.py +0 -0
  151. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/snippets/python.py +0 -0
  152. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/snippets/typescript.tsx +0 -0
  153. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/source/__init__.py +0 -0
  154. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/source/git_test.py +0 -0
  155. {kodit-0.2.2 → kodit-0.2.4}/tests/kodit/source/ignore_test.py +0 -0
  156. {kodit-0.2.2 → kodit-0.2.4}/tests/performance/similarity.py +0 -0
  157. {kodit-0.2.2 → kodit-0.2.4}/tests/smoke.sh +0 -0
  158. {kodit-0.2.2 → kodit-0.2.4}/tests/vectorchord-smoke.sh +0 -0
@@ -1,5 +1,6 @@
1
1
  # syntax=docker/dockerfile:1.9
2
- FROM python:3.13.4-slim-bookworm AS build
2
+ ARG PYTHON_VERSION=3.13.5
3
+ FROM python:${PYTHON_VERSION}-slim-bookworm AS build
3
4
 
4
5
  # The following does not work in Podman unless you build in Docker
5
6
  # compatibility mode: <https://github.com/containers/podman/issues/8477>
@@ -60,7 +61,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
60
61
 
61
62
  ##########################################################################
62
63
 
63
- FROM python:3.13.4-slim-bookworm
64
+ FROM python:${PYTHON_VERSION}-slim-bookworm
64
65
  SHELL ["sh", "-exc"]
65
66
 
66
67
  RUN <<EOT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kodit
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Code indexing for better AI code generation
5
5
  Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
6
  Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -37,9 +37,9 @@ Requires-Dist: httpx-retries>=0.3.2
37
37
  Requires-Dist: httpx>=0.28.1
38
38
  Requires-Dist: openai>=1.82.0
39
39
  Requires-Dist: pathspec>=0.12.1
40
- Requires-Dist: posthog>=4.0.1
41
40
  Requires-Dist: pydantic-settings>=2.9.1
42
41
  Requires-Dist: pytable-formatter>=0.1.1
42
+ Requires-Dist: rudder-sdk-python>=2.1.4
43
43
  Requires-Dist: sentence-transformers>=4.1.0
44
44
  Requires-Dist: sqlalchemy[asyncio]>=2.0.40
45
45
  Requires-Dist: structlog>=25.3.0
@@ -90,7 +90,7 @@ The roadmap is currently maintained as a [Github Project](https://github.com/org
90
90
 
91
91
  ## 💬 Support
92
92
 
93
- For commercial support, please contact [Helix.ML](founders@helix.ml). To ask a question,
93
+ For commercial support, please contact [Helix.ML](https://docs.helixml.tech/helix/help/). To ask a question,
94
94
  please [open a discussion](https://github.com/helixml/kodit/discussions).
95
95
 
96
96
  ## License
@@ -11,10 +11,11 @@ All database operations are handled by SQLAlchemy and Alembic.
11
11
  ### Creating a Database Migration
12
12
 
13
13
  1. Make changes to your models
14
- 2. Ensure the model is referenced in [alembic's env.py](src/kodit/alembic/env.py)
15
- 3. Run `alembic upgrade head` to create a temporary DB to compute the upgrade
16
- 4. Run `alembic revision --autogenerate -m "your message"`
17
- 5. The new migration will be applied when you next run a kodit command
14
+ 2. Ensure the model is referenced in [alembic's env.py](https://github.com/helixml/kodit/blob/main/src/kodit/migrations/env.py)
15
+ 3. Remove the temporary DB if it exists from a previous migration: `rm -f .kodit.db`
16
+ 4. Run `alembic upgrade head` to create a temporary DB to compute the upgrade
17
+ 5. Run `alembic revision --autogenerate -m "your message"`
18
+ 6. The new migration will be applied when you next run a kodit command
18
19
 
19
20
  ## Releasing
20
21
 
@@ -0,0 +1,40 @@
1
+ version: "3.9"
2
+
3
+ services:
4
+ kodit:
5
+ image: registry.helix.ml/helix/kodit:latest # Replace with a version
6
+ ports:
7
+ - "8080:8080" # Expose the MCP server
8
+ # Start the Kodit MCP server and bind to all interfaces
9
+ command: ["serve", "--host", "0.0.0.0", "--port", "8080"]
10
+ restart: unless-stopped
11
+ depends_on:
12
+ - vectorchord # Wait for VectorChord to start before Kodit
13
+
14
+ # Configure Kodit
15
+ environment:
16
+ # Configure the database
17
+ DB_URL: postgresql+asyncpg://postgres:mysecretpassword@vectorchord:5432/kodit
18
+ DEFAULT_SEARCH_PROVIDER: vectorchord
19
+
20
+ # External embedding provider
21
+ EMBEDDING_ENDPOINT_TYPE: openai
22
+ EMBEDDING_ENDPOINT_BASE_URL: https://api.openai.com/v1
23
+ EMBEDDING_ENDPOINT_API_KEY: REPLACE_WITH_YOUR_API_KEY
24
+ EMBEDDING_ENDPOINT_MODEL: text-embedding-3-large
25
+
26
+ # External enrichment provider
27
+ ENRICHMENT_ENDPOINT_TYPE: openai
28
+ ENRICHMENT_ENDPOINT_BASE_URL: https://api.openai.com/v1
29
+ ENRICHMENT_ENDPOINT_API_KEY: REPLACE_WITH_YOUR_API_KEY
30
+ ENRICHMENT_ENDPOINT_MODEL: o3-mini
31
+
32
+
33
+ vectorchord:
34
+ image: tensorchord/vchord-suite:pg17-20250601
35
+ environment:
36
+ - POSTGRES_DB=kodit
37
+ - POSTGRES_PASSWORD=mysecretpassword
38
+ ports:
39
+ - "5432:5432"
40
+ restart: unless-stopped
@@ -0,0 +1,35 @@
1
+ ---
2
+ title: Deployment
3
+ description: Deploying Kodit with Docker Compose and Kubernetes.
4
+ weight: 10
5
+ ---
6
+
7
+ Kodit is packaged as a Docker container so you can run it on any popular orchestration platform. This page describes how to deploy Kodit as a service.
8
+
9
+ ## Deploying With Docker Compose
10
+
11
+ Create a [docker-compose file](https://github.com/helixml/kodit/tree/main/docs/reference/deployment/docker-compose.yaml) that specifies Kodit and Vectorchord containers. Replace the latest tag with a version. Replace any API keys with your own or configure internal endpoints.
12
+
13
+ Then run Kodit with `docker compose -f docker-compose.yaml up -d`. For more instructions see the [Docker Compose documentation](https://docs.docker.com/compose/).
14
+
15
+ Here is an example:
16
+
17
+ {{< code file="docker-compose.yaml" >}}
18
+
19
+ ## Deploying With Kubernetes
20
+
21
+ To deploy with Kubernetes we recommend using a templating solution like Helm or Kustomize.
22
+
23
+ Here is a simple [raw Kubernetes manifest](https://github.com/helixml/kodit/tree/main/docs/reference/deployment/kubernetes.yaml) to help get you started. Remember to pin the Kodit container at a specific version and update the required API keys.
24
+
25
+ Deploy with `kubectl -n kodit apply -f kubernetes.yaml`
26
+
27
+ {{< code file="kubernetes.yaml" >}}
28
+
29
+ ### Deploying With a Kind Kubernetes Cluster
30
+
31
+ [Kind](https://kind.sigs.k8s.io/) is a k8s cluster that runs in a Docker container. So it's great for k8s development.
32
+
33
+ 1. `kind create cluster`
34
+ 2. `kubectl -n kodit apply -f kubernetes.yaml`
35
+
@@ -0,0 +1,99 @@
1
+ apiVersion: apps/v1
2
+ kind: Deployment
3
+ metadata:
4
+ name: vectorchord
5
+ labels:
6
+ app: vectorchord
7
+ spec:
8
+ replicas: 1
9
+ selector:
10
+ matchLabels:
11
+ app: vectorchord
12
+ template:
13
+ metadata:
14
+ labels:
15
+ app: vectorchord
16
+ spec:
17
+ containers:
18
+ - name: vectorchord
19
+ image: tensorchord/vchord-suite:pg17-20250601
20
+ env:
21
+ - name: POSTGRES_DB
22
+ value: "kodit"
23
+ - name: POSTGRES_PASSWORD
24
+ value: "mysecretpassword"
25
+ ports:
26
+ - containerPort: 5432
27
+ ---
28
+ apiVersion: v1
29
+ kind: Service
30
+ metadata:
31
+ name: vectorchord
32
+ spec:
33
+ selector:
34
+ app: vectorchord
35
+ ports:
36
+ - port: 5432
37
+ targetPort: 5432
38
+ ---
39
+ apiVersion: apps/v1
40
+ kind: Deployment
41
+ metadata:
42
+ name: kodit
43
+ labels:
44
+ app: kodit
45
+ spec:
46
+ replicas: 1
47
+ selector:
48
+ matchLabels:
49
+ app: kodit
50
+ template:
51
+ metadata:
52
+ labels:
53
+ app: kodit
54
+ spec:
55
+ containers:
56
+ - name: kodit
57
+ image: registry.helix.ml/helix/kodit:latest # Replace with a version
58
+ args: ["serve", "--host", "0.0.0.0", "--port", "8080"]
59
+ env:
60
+ - name: DB_URL
61
+ value: "postgresql+asyncpg://postgres:mysecretpassword@vectorchord:5432/kodit"
62
+ - name: DEFAULT_SEARCH_PROVIDER
63
+ value: "vectorchord"
64
+ - name: EMBEDDING_ENDPOINT_TYPE
65
+ value: "openai"
66
+ - name: EMBEDDING_ENDPOINT_BASE_URL
67
+ value: "https://api.openai.com/v1"
68
+ - name: EMBEDDING_ENDPOINT_API_KEY
69
+ value: "REPLACE_WITH_YOUR_API_KEY"
70
+ - name: EMBEDDING_ENDPOINT_MODEL
71
+ value: "text-embedding-3-large"
72
+ - name: ENRICHMENT_ENDPOINT_TYPE
73
+ value: "openai"
74
+ - name: ENRICHMENT_ENDPOINT_BASE_URL
75
+ value: "https://api.openai.com/v1"
76
+ - name: ENRICHMENT_ENDPOINT_API_KEY
77
+ value: "REPLACE_WITH_YOUR_API_KEY"
78
+ - name: ENRICHMENT_ENDPOINT_MODEL
79
+ value: "o3-mini"
80
+ ports:
81
+ - containerPort: 8080
82
+ readinessProbe:
83
+ httpGet:
84
+ path: /
85
+ port: 8080
86
+ initialDelaySeconds: 10
87
+ periodSeconds: 5
88
+ ---
89
+ apiVersion: v1
90
+ kind: Service
91
+ metadata:
92
+ name: kodit
93
+ spec:
94
+ type: LoadBalancer
95
+ selector:
96
+ app: kodit
97
+ ports:
98
+ - port: 8080
99
+ targetPort: 8080
@@ -0,0 +1,31 @@
1
+ ---
2
+ title: Telemetry
3
+ description: Learn about what data is collected and how to disable it.
4
+ weight: 99
5
+ ---
6
+
7
+ Kodit includes a very limited amount anonymous telemetry to help guide product
8
+ development. At the moment Kodit uses [Rudderstack](https://rudderstack.com) to capture
9
+ anonymous usage metrics.
10
+
11
+ ## What Kodit Captures
12
+
13
+ You can see what metrics are sent by searching for [use of the helper
14
+ functions](https://github.com/helixml/kodit/blob/main/src/kodit/log.py#L169) in the Kodit
15
+ codebase.
16
+
17
+ Kodit currently captures use of the following:
18
+
19
+ - When a user uses the CLI methods
20
+ - When the indexing service is used or queried
21
+
22
+ No user data is collected, only metadata about Kodit usage.
23
+
24
+ ## Disabling Telemetry
25
+
26
+ We hope that you will help us improve Kodit by leaving telemetry turned on, but if you'd
27
+ like to turn it off, add the following environmental variable (or add it to your .env file):
28
+
29
+ ```sh
30
+ DISABLE_TELEMETRY=true
31
+ ```
@@ -31,7 +31,6 @@ dependencies = [
31
31
  "httpx-retries>=0.3.2",
32
32
  "httpx>=0.28.1",
33
33
  "structlog>=25.3.0",
34
- "posthog>=4.0.1",
35
34
  "sqlalchemy[asyncio]>=2.0.40",
36
35
  "alembic>=1.15.2",
37
36
  "aiosqlite>=0.20.0",
@@ -53,6 +52,7 @@ dependencies = [
53
52
  "asyncpg>=0.30.0",
54
53
  "transformers>=4.51.3",
55
54
  "accelerate>=1.7.0",
55
+ "rudder-sdk-python>=2.1.4",
56
56
  ]
57
57
 
58
58
  [dependency-groups]
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.2.2'
21
- __version_tuple__ = version_tuple = (0, 2, 2)
20
+ __version__ = version = '0.2.4'
21
+ __version_tuple__ = version_tuple = (0, 2, 4)
@@ -21,6 +21,12 @@ async def root() -> dict[str, str]:
21
21
  return {"message": "Hello, World!"}
22
22
 
23
23
 
24
+ @app.get("/healthz")
25
+ async def healthz() -> dict[str, str]:
26
+ """Return a health check for the kodit API."""
27
+ return {"status": "ok"}
28
+
29
+
24
30
  # Add mcp routes last, otherwise previous routes aren't added
25
31
  app.mount("", mcp_app)
26
32
 
@@ -81,6 +81,7 @@ async def index(
81
81
  )
82
82
 
83
83
  if not sources:
84
+ log_event("kodit.cli.index.list")
84
85
  # No source specified, list all indexes
85
86
  indexes = await service.list_indexes()
86
87
  headers: list[str | Cell] = [
@@ -108,7 +109,8 @@ async def index(
108
109
  msg = "File indexing is not implemented yet"
109
110
  raise click.UsageError(msg)
110
111
 
111
- # Index directory
112
+ # Index source
113
+ log_event("kodit.cli.index.create")
112
114
  s = await source_service.create(source)
113
115
  index = await service.create(s.id)
114
116
  await service.run(index.id)
@@ -134,6 +136,7 @@ async def code(
134
136
 
135
137
  This works best if your query is code.
136
138
  """
139
+ log_event("kodit.cli.search.code")
137
140
  source_repository = SourceRepository(session)
138
141
  source_service = SourceService(app_context.get_clone_dir(), source_repository)
139
142
  repository = IndexRepository(session)
@@ -177,6 +180,7 @@ async def keyword(
177
180
  top_k: int,
178
181
  ) -> None:
179
182
  """Search for snippets using keyword search."""
183
+ log_event("kodit.cli.search.keyword")
180
184
  source_repository = SourceRepository(session)
181
185
  source_service = SourceService(app_context.get_clone_dir(), source_repository)
182
186
  repository = IndexRepository(session)
@@ -223,6 +227,7 @@ async def text(
223
227
 
224
228
  This works best if your query is text.
225
229
  """
230
+ log_event("kodit.cli.search.text")
226
231
  source_repository = SourceRepository(session)
227
232
  source_service = SourceService(app_context.get_clone_dir(), source_repository)
228
233
  repository = IndexRepository(session)
@@ -270,6 +275,7 @@ async def hybrid( # noqa: PLR0913
270
275
  text: str,
271
276
  ) -> None:
272
277
  """Search for snippets using hybrid search."""
278
+ log_event("kodit.cli.search.hybrid")
273
279
  source_repository = SourceRepository(session)
274
280
  source_service = SourceService(app_context.get_clone_dir(), source_repository)
275
281
  repository = IndexRepository(session)
@@ -321,7 +327,7 @@ def serve(
321
327
  """Start the kodit server, which hosts the MCP server and the kodit API."""
322
328
  log = structlog.get_logger(__name__)
323
329
  log.info("Starting kodit server", host=host, port=port)
324
- log_event("kodit_server_started")
330
+ log_event("kodit.cli.serve")
325
331
 
326
332
  # Configure uvicorn with graceful shutdown
327
333
  config = uvicorn.Config(
@@ -3,6 +3,7 @@
3
3
  from sqlalchemy.ext.asyncio import AsyncSession
4
4
 
5
5
  from kodit.config import AppContext, Endpoint
6
+ from kodit.embedding.embedding_models import EmbeddingType
6
7
  from kodit.embedding.embedding_provider.local_embedding_provider import (
7
8
  CODE,
8
9
  LocalEmbeddingProvider,
@@ -19,6 +20,7 @@ from kodit.embedding.vectorchord_vector_search_service import (
19
20
  TaskName,
20
21
  VectorChordVectorSearchService,
21
22
  )
23
+ from kodit.log import log_event
22
24
 
23
25
 
24
26
  def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
@@ -34,6 +36,7 @@ def embedding_factory(
34
36
  endpoint = _get_endpoint_configuration(app_context)
35
37
 
36
38
  if endpoint and endpoint.type == "openai":
39
+ log_event("kodit.embedding", {"provider": "openai"})
37
40
  from openai import AsyncOpenAI
38
41
 
39
42
  embedding_provider = OpenAIEmbeddingProvider(
@@ -44,14 +47,22 @@ def embedding_factory(
44
47
  model_name=endpoint.model or "text-embedding-3-small",
45
48
  )
46
49
  else:
50
+ log_event("kodit.embedding", {"provider": "local"})
47
51
  embedding_provider = LocalEmbeddingProvider(CODE)
48
52
 
49
53
  if app_context.default_search.provider == "vectorchord":
54
+ log_event("kodit.database", {"provider": "vectorchord"})
50
55
  return VectorChordVectorSearchService(task_name, session, embedding_provider)
51
56
  if app_context.default_search.provider == "sqlite":
57
+ log_event("kodit.database", {"provider": "sqlite"})
58
+ if task_name == "code":
59
+ embedding_type = EmbeddingType.CODE
60
+ elif task_name == "text":
61
+ embedding_type = EmbeddingType.TEXT
52
62
  return LocalVectorSearchService(
53
63
  embedding_repository=embedding_repository,
54
64
  embedding_provider=embedding_provider,
65
+ embedding_type=embedding_type,
55
66
  )
56
67
 
57
68
  msg = f"Invalid semantic search provider: {app_context.default_search.provider}"
@@ -0,0 +1,92 @@
1
+ """Embedding provider."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from collections.abc import AsyncGenerator
5
+ from dataclasses import dataclass
6
+
7
+ import structlog
8
+ import tiktoken
9
+
10
+ OPENAI_MAX_EMBEDDING_SIZE = 8192
11
+
12
+ Vector = list[float]
13
+
14
+
15
+ @dataclass
16
+ class EmbeddingRequest:
17
+ """Embedding request."""
18
+
19
+ id: int
20
+ text: str
21
+
22
+
23
+ @dataclass
24
+ class EmbeddingResponse:
25
+ """Embedding response."""
26
+
27
+ id: int
28
+ embedding: Vector
29
+
30
+
31
+ class EmbeddingProvider(ABC):
32
+ """Embedding provider."""
33
+
34
+ @abstractmethod
35
+ def embed(
36
+ self, data: list[EmbeddingRequest]
37
+ ) -> AsyncGenerator[list[EmbeddingResponse], None]:
38
+ """Embed a list of strings.
39
+
40
+ The embedding provider is responsible for embedding a list of strings into a
41
+ list of vectors. The embedding provider is responsible for splitting the list of
42
+ strings into smaller sub-batches and embedding them in parallel.
43
+ """
44
+
45
+
46
+ def split_sub_batches(
47
+ encoding: tiktoken.Encoding,
48
+ data: list[EmbeddingRequest],
49
+ max_context_window: int = OPENAI_MAX_EMBEDDING_SIZE,
50
+ ) -> list[list[EmbeddingRequest]]:
51
+ """Split a list of strings into smaller sub-batches."""
52
+ log = structlog.get_logger(__name__)
53
+ result = []
54
+ data_to_process = [s for s in data if s.text.strip()] # Filter out empty strings
55
+
56
+ while data_to_process:
57
+ next_batch = []
58
+ current_tokens = 0
59
+
60
+ while data_to_process:
61
+ next_item = data_to_process[0]
62
+ item_tokens = len(encoding.encode(next_item.text, disallowed_special=()))
63
+
64
+ if item_tokens > max_context_window:
65
+ # Optimise truncation by operating on tokens directly instead of
66
+ # removing one character at a time and repeatedly re-encoding.
67
+ tokens = encoding.encode(next_item.text, disallowed_special=())
68
+ if len(tokens) > max_context_window:
69
+ # Keep only the first *max_context_window* tokens.
70
+ tokens = tokens[:max_context_window]
71
+ # Convert back to text. This requires only one decode call and
72
+ # guarantees that the resulting string fits the token budget.
73
+ next_item.text = encoding.decode(tokens)
74
+ item_tokens = max_context_window # We know the exact size now
75
+
76
+ data_to_process[0] = next_item
77
+
78
+ log.warning(
79
+ "Truncated snippet because it was too long to embed",
80
+ snippet=next_item.text[:100] + "...",
81
+ )
82
+
83
+ if current_tokens + item_tokens > max_context_window:
84
+ break
85
+
86
+ next_batch.append(data_to_process.pop(0))
87
+ current_tokens += item_tokens
88
+
89
+ if next_batch:
90
+ result.append(next_batch)
91
+
92
+ return result
@@ -3,10 +3,12 @@
3
3
  import asyncio
4
4
  import hashlib
5
5
  import math
6
- from collections.abc import Generator, Sequence
6
+ from collections.abc import AsyncGenerator, Generator, Sequence
7
7
 
8
8
  from kodit.embedding.embedding_provider.embedding_provider import (
9
9
  EmbeddingProvider,
10
+ EmbeddingRequest,
11
+ EmbeddingResponse,
10
12
  Vector,
11
13
  )
12
14
 
@@ -31,27 +33,34 @@ class HashEmbeddingProvider(EmbeddingProvider):
31
33
  self.dim = dim
32
34
  self.batch_size = batch_size
33
35
 
34
- async def embed(self, data: list[str]) -> list[Vector]:
36
+ async def embed(
37
+ self, data: list[EmbeddingRequest]
38
+ ) -> AsyncGenerator[list[EmbeddingResponse], None]:
35
39
  """Embed every string in *data*, preserving order.
36
40
 
37
41
  Work is sliced into *batch_size* chunks and scheduled concurrently
38
42
  (still CPU-bound, but enough to cooperate with an asyncio loop).
39
43
  """
40
44
  if not data:
41
- return []
45
+ yield []
42
46
 
43
47
  async def _embed_chunk(chunk: Sequence[str]) -> list[Vector]:
44
48
  return [self._string_to_vector(text) for text in chunk]
45
49
 
46
50
  tasks = [
47
51
  asyncio.create_task(_embed_chunk(chunk))
48
- for chunk in self._chunked(data, self.batch_size)
52
+ for chunk in self._chunked([i.text for i in data], self.batch_size)
49
53
  ]
50
54
 
51
- vectors: list[Vector] = []
52
55
  for task in tasks:
53
- vectors.extend(await task)
54
- return vectors
56
+ result = await task
57
+ yield [
58
+ EmbeddingResponse(
59
+ id=item.id,
60
+ embedding=embedding,
61
+ )
62
+ for item, embedding in zip(data, result, strict=True)
63
+ ]
55
64
 
56
65
  @staticmethod
57
66
  def _chunked(seq: Sequence[str], size: int) -> Generator[Sequence[str], None, None]:
@@ -0,0 +1,96 @@
1
+ """Local embedding service."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from time import time
7
+ from typing import TYPE_CHECKING
8
+
9
+ import structlog
10
+
11
+ from kodit.embedding.embedding_provider.embedding_provider import (
12
+ EmbeddingProvider,
13
+ EmbeddingRequest,
14
+ EmbeddingResponse,
15
+ split_sub_batches,
16
+ )
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import AsyncGenerator
20
+
21
+ from sentence_transformers import SentenceTransformer
22
+ from tiktoken import Encoding
23
+
24
+
25
+ TINY = "tiny"
26
+ CODE = "code"
27
+ TEST = "test"
28
+
29
+ COMMON_EMBEDDING_MODELS = {
30
+ TINY: "ibm-granite/granite-embedding-30m-english",
31
+ CODE: "flax-sentence-embeddings/st-codesearch-distilroberta-base",
32
+ TEST: "minishlab/potion-base-4M",
33
+ }
34
+
35
+
36
+ class LocalEmbeddingProvider(EmbeddingProvider):
37
+ """Local embedder."""
38
+
39
+ def __init__(self, model_name: str) -> None:
40
+ """Initialize the local embedder."""
41
+ self.log = structlog.get_logger(__name__)
42
+ self.model_name = COMMON_EMBEDDING_MODELS.get(model_name, model_name)
43
+ self.encoding_name = "text-embedding-3-small"
44
+ self.embedding_model = None
45
+ self.encoding = None
46
+
47
+ def _encoding(self) -> Encoding:
48
+ if self.encoding is None:
49
+ from tiktoken import encoding_for_model
50
+
51
+ start_time = time()
52
+ self.encoding = encoding_for_model(self.encoding_name)
53
+ self.log.debug(
54
+ "Encoding loaded",
55
+ model_name=self.encoding_name,
56
+ duration=time() - start_time,
57
+ )
58
+ return self.encoding
59
+
60
+ def _model(self) -> SentenceTransformer:
61
+ """Get the embedding model."""
62
+ if self.embedding_model is None:
63
+ os.environ["TOKENIZERS_PARALLELISM"] = "false" # Avoid warnings
64
+ from sentence_transformers import SentenceTransformer
65
+
66
+ start_time = time()
67
+ self.embedding_model = SentenceTransformer(
68
+ self.model_name,
69
+ trust_remote_code=True,
70
+ )
71
+ self.log.debug(
72
+ "Model loaded",
73
+ model_name=self.model_name,
74
+ duration=time() - start_time,
75
+ )
76
+ return self.embedding_model
77
+
78
+ async def embed(
79
+ self, data: list[EmbeddingRequest]
80
+ ) -> AsyncGenerator[list[EmbeddingResponse], None]:
81
+ """Embed a list of strings."""
82
+ model = self._model()
83
+
84
+ batched_data = split_sub_batches(self._encoding(), data)
85
+
86
+ for batch in batched_data:
87
+ embeddings = model.encode(
88
+ [i.text for i in batch], show_progress_bar=False, batch_size=4
89
+ )
90
+ yield [
91
+ EmbeddingResponse(
92
+ id=item.id,
93
+ embedding=[float(x) for x in embedding],
94
+ )
95
+ for item, embedding in zip(batch, embeddings, strict=True)
96
+ ]