kodit 0.3.3__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (253) hide show
  1. kodit-0.3.4/.claude/commands/update-docs.md +79 -0
  2. {kodit-0.3.3 → kodit-0.3.4}/CLAUDE.md +12 -7
  3. {kodit-0.3.3 → kodit-0.3.4}/PKG-INFO +9 -4
  4. {kodit-0.3.3 → kodit-0.3.4}/README.md +8 -3
  5. {kodit-0.3.3 → kodit-0.3.4}/docs/_index.md +8 -3
  6. {kodit-0.3.3 → kodit-0.3.4}/docs/reference/indexing/index.md +64 -13
  7. {kodit-0.3.3 → kodit-0.3.4}/docs/reference/mcp/index.md +17 -6
  8. {kodit-0.3.3 → kodit-0.3.4}/pyproject.toml +5 -0
  9. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/_version.py +2 -2
  10. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/application/factories/code_indexing_factory.py +2 -24
  11. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/application/services/code_indexing_application_service.py +10 -2
  12. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/domain/services/index_service.py +25 -66
  13. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/domain/value_objects.py +10 -22
  14. kodit-0.3.4/src/kodit/infrastructure/slicing/__init__.py +1 -0
  15. kodit-0.3.4/src/kodit/infrastructure/slicing/language_detection_service.py +18 -0
  16. kodit-0.3.4/src/kodit/infrastructure/slicing/slicer.py +894 -0
  17. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/sqlalchemy/index_repository.py +29 -0
  18. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +6 -4
  19. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
  20. kodit-0.3.4/src/kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +55 -0
  21. kodit-0.3.4/src/kodit/migrations/versions/85155663351e_initial.py +98 -0
  22. kodit-0.3.4/src/kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +50 -0
  23. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/application/test_code_indexing_application_service.py +24 -58
  24. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/domain/services/index_service_test.py +3 -27
  25. kodit-0.3.4/tests/kodit/infrastructure/slicing/__init__.py +1 -0
  26. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/__init__.py +1 -0
  27. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/c/main.c +72 -0
  28. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/c/models.c +75 -0
  29. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/c/models.h +99 -0
  30. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/c/utils.c +17 -0
  31. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/c/utils.h +33 -0
  32. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/cpp/main.cpp +85 -0
  33. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/cpp/models.cpp +39 -0
  34. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/cpp/models.hpp +98 -0
  35. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/cpp/utils.cpp +40 -0
  36. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/cpp/utils.hpp +56 -0
  37. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/csharp/Main.cs +52 -0
  38. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/csharp/Models.cs +89 -0
  39. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/csharp/Utils.cs +85 -0
  40. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/css/components.css +428 -0
  41. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/css/main.css +259 -0
  42. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/css/utilities.css +456 -0
  43. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/go/main.go +79 -0
  44. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/go/models.go +75 -0
  45. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/go/utils.go +45 -0
  46. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/html/components.html +165 -0
  47. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/html/forms.html +344 -0
  48. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/html/main.html +72 -0
  49. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/java/Main.java +75 -0
  50. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/java/Models.java +108 -0
  51. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/java/Utils.java +74 -0
  52. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/javascript/main.js +66 -0
  53. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/javascript/models.js +87 -0
  54. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/javascript/utils.js +61 -0
  55. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/python/__init__.py +1 -0
  56. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/python/main.py +55 -0
  57. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/python/models.py +54 -0
  58. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/python/utils.py +27 -0
  59. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/rust/main.rs +58 -0
  60. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/rust/models.rs +84 -0
  61. kodit-0.3.4/tests/kodit/infrastructure/slicing/data/rust/utils.rs +50 -0
  62. kodit-0.3.4/tests/kodit/infrastructure/slicing/slicer_test.py +830 -0
  63. kodit-0.3.3/src/kodit/infrastructure/snippet_extraction/__init__.py +0 -1
  64. kodit-0.3.3/src/kodit/infrastructure/snippet_extraction/factories.py +0 -13
  65. kodit-0.3.3/src/kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
  66. kodit-0.3.3/src/kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
  67. kodit-0.3.3/src/kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
  68. kodit-0.3.3/src/kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
  69. kodit-0.3.3/src/kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
  70. kodit-0.3.3/src/kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
  71. kodit-0.3.3/src/kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
  72. kodit-0.3.3/src/kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
  73. kodit-0.3.3/src/kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -44
  74. kodit-0.3.3/src/kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
  75. kodit-0.3.3/src/kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
  76. kodit-0.3.3/src/kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +0 -47
  77. kodit-0.3.3/src/kodit/migrations/versions/85155663351e_initial.py +0 -82
  78. kodit-0.3.3/src/kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +0 -44
  79. {kodit-0.3.3 → kodit-0.3.4}/.claude/commands/debug.md +0 -0
  80. {kodit-0.3.3 → kodit-0.3.4}/.claude/commands/new-requirement.md +0 -0
  81. {kodit-0.3.3 → kodit-0.3.4}/.claude/commands/refactor.md +0 -0
  82. {kodit-0.3.3 → kodit-0.3.4}/.claude/settings.json +0 -0
  83. {kodit-0.3.3 → kodit-0.3.4}/.cursor/rules/kodit.mdc +0 -0
  84. {kodit-0.3.3 → kodit-0.3.4}/.cursor/rules/style.mdc +0 -0
  85. {kodit-0.3.3 → kodit-0.3.4}/.dockerignore +0 -0
  86. {kodit-0.3.3 → kodit-0.3.4}/.github/CODE_OF_CONDUCT.md +0 -0
  87. {kodit-0.3.3 → kodit-0.3.4}/.github/CONTRIBUTING.md +0 -0
  88. {kodit-0.3.3 → kodit-0.3.4}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  89. {kodit-0.3.3 → kodit-0.3.4}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  90. {kodit-0.3.3 → kodit-0.3.4}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  91. {kodit-0.3.3 → kodit-0.3.4}/.github/dependabot.yml +0 -0
  92. {kodit-0.3.3 → kodit-0.3.4}/.github/workflows/docker.yaml +0 -0
  93. {kodit-0.3.3 → kodit-0.3.4}/.github/workflows/docs.yaml +0 -0
  94. {kodit-0.3.3 → kodit-0.3.4}/.github/workflows/pull_request.yaml +0 -0
  95. {kodit-0.3.3 → kodit-0.3.4}/.github/workflows/pypi-test.yaml +0 -0
  96. {kodit-0.3.3 → kodit-0.3.4}/.github/workflows/pypi.yaml +0 -0
  97. {kodit-0.3.3 → kodit-0.3.4}/.github/workflows/test.yaml +0 -0
  98. {kodit-0.3.3 → kodit-0.3.4}/.gitignore +0 -0
  99. {kodit-0.3.3 → kodit-0.3.4}/.python-version +0 -0
  100. {kodit-0.3.3 → kodit-0.3.4}/.vscode/launch.json +0 -0
  101. {kodit-0.3.3 → kodit-0.3.4}/.vscode/settings.json +0 -0
  102. {kodit-0.3.3 → kodit-0.3.4}/Dockerfile +0 -0
  103. {kodit-0.3.3 → kodit-0.3.4}/LICENSE +0 -0
  104. {kodit-0.3.3 → kodit-0.3.4}/alembic.ini +0 -0
  105. {kodit-0.3.3 → kodit-0.3.4}/docs/MIGRATION_TO_INDEX_AGGREGATE.md +0 -0
  106. {kodit-0.3.3 → kodit-0.3.4}/docs/demos/_index.md +0 -0
  107. {kodit-0.3.3 → kodit-0.3.4}/docs/demos/go-simple-microservice/index.md +0 -0
  108. {kodit-0.3.3 → kodit-0.3.4}/docs/demos/knock-knock-auth/index.md +0 -0
  109. {kodit-0.3.3 → kodit-0.3.4}/docs/developer/index.md +0 -0
  110. {kodit-0.3.3 → kodit-0.3.4}/docs/getting-started/_index.md +0 -0
  111. {kodit-0.3.3 → kodit-0.3.4}/docs/getting-started/installation/index.md +0 -0
  112. {kodit-0.3.3 → kodit-0.3.4}/docs/getting-started/integration/index.md +0 -0
  113. {kodit-0.3.3 → kodit-0.3.4}/docs/getting-started/quick-start/index.md +0 -0
  114. {kodit-0.3.3 → kodit-0.3.4}/docs/reference/_index.md +0 -0
  115. {kodit-0.3.3 → kodit-0.3.4}/docs/reference/configuration/index.md +0 -0
  116. {kodit-0.3.3 → kodit-0.3.4}/docs/reference/deployment/docker-compose.yaml +0 -0
  117. {kodit-0.3.3 → kodit-0.3.4}/docs/reference/deployment/index.md +0 -0
  118. {kodit-0.3.3 → kodit-0.3.4}/docs/reference/deployment/kubernetes.yaml +0 -0
  119. {kodit-0.3.3 → kodit-0.3.4}/docs/reference/telemetry/index.md +0 -0
  120. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/.gitignore +0 -0
  121. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/__init__.py +0 -0
  122. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/app.py +0 -0
  123. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/application/__init__.py +0 -0
  124. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/application/factories/__init__.py +0 -0
  125. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/application/services/__init__.py +0 -0
  126. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/cli.py +0 -0
  127. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/config.py +0 -0
  128. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/database.py +0 -0
  129. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/domain/__init__.py +0 -0
  130. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/domain/entities.py +0 -0
  131. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/domain/errors.py +0 -0
  132. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/domain/interfaces.py +0 -0
  133. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/domain/protocols.py +0 -0
  134. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/domain/services/__init__.py +0 -0
  135. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/domain/services/bm25_service.py +0 -0
  136. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/domain/services/embedding_service.py +0 -0
  137. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/domain/services/enrichment_service.py +0 -0
  138. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/domain/services/index_query_service.py +0 -0
  139. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/__init__.py +0 -0
  140. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/bm25/__init__.py +0 -0
  141. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/bm25/bm25_factory.py +0 -0
  142. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/bm25/local_bm25_repository.py +0 -0
  143. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/bm25/vectorchord_bm25_repository.py +0 -0
  144. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/cloning/__init__.py +0 -0
  145. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/cloning/git/__init__.py +0 -0
  146. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/cloning/git/working_copy.py +0 -0
  147. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/cloning/metadata.py +0 -0
  148. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/embedding/__init__.py +0 -0
  149. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/embedding/embedding_factory.py +0 -0
  150. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/embedding/embedding_providers/__init__.py +0 -0
  151. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/embedding/embedding_providers/batching.py +0 -0
  152. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +0 -0
  153. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +0 -0
  154. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +0 -0
  155. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/embedding/local_vector_search_repository.py +0 -0
  156. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +0 -0
  157. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/enrichment/__init__.py +0 -0
  158. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/enrichment/enrichment_factory.py +0 -0
  159. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/enrichment/local_enrichment_provider.py +0 -0
  160. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -0
  161. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/enrichment/openai_enrichment_provider.py +0 -0
  162. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/git/__init__.py +0 -0
  163. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/git/git_utils.py +0 -0
  164. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/ignore/__init__.py +0 -0
  165. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/ignore/ignore_pattern_provider.py +0 -0
  166. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/indexing/__init__.py +0 -0
  167. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/indexing/auto_indexing_service.py +0 -0
  168. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/indexing/fusion_service.py +0 -0
  169. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/indexing/indexing_factory.py +0 -0
  170. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/mappers/__init__.py +0 -0
  171. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/mappers/index_mapper.py +0 -0
  172. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/sqlalchemy/__init__.py +0 -0
  173. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/sqlalchemy/embedding_repository.py +0 -0
  174. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/sqlalchemy/entities.py +0 -0
  175. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/ui/__init__.py +0 -0
  176. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/ui/progress.py +0 -0
  177. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/infrastructure/ui/spinner.py +0 -0
  178. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/log.py +0 -0
  179. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/mcp.py +0 -0
  180. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/middleware.py +0 -0
  181. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/migrations/README +0 -0
  182. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/migrations/__init__.py +0 -0
  183. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/migrations/env.py +0 -0
  184. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/migrations/script.py.mako +0 -0
  185. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/migrations/versions/9e53ea8bb3b0_add_authors.py +0 -0
  186. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/migrations/versions/__init__.py +0 -0
  187. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/reporting.py +0 -0
  188. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/utils/__init__.py +0 -0
  189. {kodit-0.3.3 → kodit-0.3.4}/src/kodit/utils/path_utils.py +0 -0
  190. {kodit-0.3.3 → kodit-0.3.4}/tests/__init__.py +0 -0
  191. {kodit-0.3.3 → kodit-0.3.4}/tests/conftest.py +0 -0
  192. {kodit-0.3.3 → kodit-0.3.4}/tests/docker-smoke.sh +0 -0
  193. {kodit-0.3.3 → kodit-0.3.4}/tests/experiments/__init__.py +0 -0
  194. {kodit-0.3.3 → kodit-0.3.4}/tests/experiments/cline_prompt_tests/__init__.py +0 -0
  195. {kodit-0.3.3 → kodit-0.3.4}/tests/experiments/cline_prompt_tests/cline_prompt.txt +0 -0
  196. {kodit-0.3.3 → kodit-0.3.4}/tests/experiments/cline_prompt_tests/cline_prompt_test.py +0 -0
  197. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/__init__.py +0 -0
  198. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/application/__init__.py +0 -0
  199. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/cli_test.py +0 -0
  200. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/config_test.py +0 -0
  201. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/domain/__init__.py +0 -0
  202. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/domain/bm25_domain_service_test.py +0 -0
  203. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/domain/enrichment_domain_service_test.py +0 -0
  204. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/domain/entities_test.py +0 -0
  205. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/domain/services/__init__.py +0 -0
  206. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/domain/test_embedding_service.py +0 -0
  207. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/domain/test_language_mapping.py +0 -0
  208. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/domain/test_multi_search_result.py +0 -0
  209. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/e2e.py +0 -0
  210. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/__init__.py +0 -0
  211. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/bm25/__init__.py +0 -0
  212. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/bm25/local_bm25_repository_test.py +0 -0
  213. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/bm25/vectorchord_bm25_repository_test.py +0 -0
  214. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/cloning/git_cloning/__init__.py +0 -0
  215. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/cloning/git_cloning/working_copy_test.py +0 -0
  216. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/embedding/__init__.py +0 -0
  217. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/embedding/embedding_factory_test.py +0 -0
  218. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/embedding/embedding_provider/__init__.py +0 -0
  219. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/embedding/embedding_provider/test_hash_embedding_provider.py +0 -0
  220. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/embedding/embedding_provider/test_local_embedding_provider.py +0 -0
  221. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/embedding/embedding_provider/test_openai_embedding_provider.py +0 -0
  222. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/embedding/test_batching.py +0 -0
  223. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/embedding/test_embedding_integration.py +0 -0
  224. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/embedding/test_local_vector_search_repository.py +0 -0
  225. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/embedding/test_vectorchord_vector_search_repository.py +0 -0
  226. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/enrichment/__init__.py +0 -0
  227. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/enrichment/enrichment_provider/__init__.py +0 -0
  228. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/enrichment/enrichment_provider/test_local_enrichment_provider.py +0 -0
  229. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/enrichment/enrichment_provider/test_null_enrichment_provider.py +0 -0
  230. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/enrichment/enrichment_provider/test_openai_enrichment_provider.py +0 -0
  231. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/enrichment/test_enrichment_factory.py +0 -0
  232. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/indexing/__init__.py +0 -0
  233. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/indexing/test_auto_indexing_service.py +0 -0
  234. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/mappers/__init__.py +0 -0
  235. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/mappers/test_index_mapper.py +0 -0
  236. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/snippets/__init__.py +0 -0
  237. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/snippets/csharp.cs +0 -0
  238. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/snippets/golang.go +0 -0
  239. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/snippets/javascript.js +0 -0
  240. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/snippets/knock_knock_server.py +0 -0
  241. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/snippets/python.py +0 -0
  242. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/snippets/typescript.tsx +0 -0
  243. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/sqlalchemy/__init__.py +0 -0
  244. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/infrastructure/sqlalchemy/test_embedding_repository.py +0 -0
  245. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/log_test.py +0 -0
  246. {kodit-0.3.3 → kodit-0.3.4}/tests/kodit/mcp_test.py +0 -0
  247. {kodit-0.3.3 → kodit-0.3.4}/tests/performance/__init__.py +0 -0
  248. {kodit-0.3.3 → kodit-0.3.4}/tests/performance/similarity.py +0 -0
  249. {kodit-0.3.3 → kodit-0.3.4}/tests/smoke.sh +0 -0
  250. {kodit-0.3.3 → kodit-0.3.4}/tests/utils/__init__.py +0 -0
  251. {kodit-0.3.3 → kodit-0.3.4}/tests/utils/test_path_utils.py +0 -0
  252. {kodit-0.3.3 → kodit-0.3.4}/tests/vectorchord-smoke.sh +0 -0
  253. {kodit-0.3.3 → kodit-0.3.4}/uv.lock +0 -0
@@ -0,0 +1,79 @@
1
+ # Update Documentation Based on Recent Code Changes
2
+
3
+ ## Objective
4
+
5
+ Analyze recent commits in the current Git branch and update relevant documentation to reflect code changes.
6
+
7
+ ## Steps to Complete
8
+
9
+ ### 1. Analyze Recent Commits
10
+
11
+ - Run `git log --oneline -n 20` to view the last 20 commits on the current branch, or
12
+ all commits if it is not the main branch.
13
+ - For each commit, run `git show --name-status <commit-hash>` to see which files were modified
14
+ - Focus on commits that modified source code files (`.js`, `.py`, `.ts`, `.java`, `.go`, `.rs`, etc.)
15
+
16
+ ### 2. Identify Code Changes
17
+
18
+ For each modified source file in recent commits:
19
+
20
+ - Examine the diff using `git diff <commit-hash>^ <commit-hash> -- <file-path>`
21
+ - Identify:
22
+ - New functions or methods added
23
+ - Functions or methods removed or renamed
24
+ - Changes to function signatures (parameters, return types)
25
+ - New classes or modules
26
+ - Changes to public APIs
27
+ - New configuration options or environment variables
28
+ - Breaking changes
29
+
30
+ ### 3. Update README.md
31
+
32
+ Check if the README.md needs updates for:
33
+
34
+ - **Installation instructions**: If dependencies or setup steps changed
35
+ - **Usage examples**: If APIs or interfaces changed
36
+ - **Configuration**: If new environment variables or config options were added
37
+ - **Features list**: If new features were implemented
38
+ - **Quick start guide**: If the basic usage pattern changed
39
+
40
+ ### 4. Update Documentation in /docs
41
+
42
+ For each markdown file in the `docs/` folder:
43
+
44
+ - Check if it references any of the changed code
45
+ - Update:
46
+ - API documentation with new/changed function signatures
47
+ - Code examples that may no longer work
48
+ - Configuration guides if settings changed
49
+ - Architecture diagrams if structural changes occurred
50
+ - Migration guides if there are breaking changes
51
+
52
+ ### 5. Create or Update Specific Docs
53
+
54
+ Based on the changes found:
55
+
56
+ - If new features were added without documentation, create new doc files
57
+ - If breaking changes exist, create or update a migration guide
58
+ - If new APIs were added, ensure they have proper documentation
59
+
60
+ ### 6. Verify Documentation Accuracy
61
+
62
+ - Ensure all code examples in documentation are up-to-date
63
+ - Check that any referenced file paths still exist
64
+ - Verify that installation and setup instructions still work
65
+
66
+ ## Output Required
67
+
68
+ 1. Summary of commits analyzed and significant changes found
69
+ 2. List of documentation files updated with brief description of changes
70
+ 3. Any new documentation files created
71
+ 4. Warnings about potentially outdated documentation that needs manual review
72
+
73
+ ## Important Notes
74
+
75
+ - Focus on user-facing changes that affect how people use Kodit
76
+ - Don't document internal implementation details unless they affect the public API
77
+ - Keep documentation concise and example-driven
78
+ - If unsure about a change's impact, flag it for manual review
79
+ - Ensure all documentation follows the existing style and format in the repository
@@ -50,18 +50,23 @@ The codebase follows Domain-Driven Design (DDD) with clean architecture:
50
50
 
51
51
  ### Key Components
52
52
 
53
- **Indexing Pipeline:**
53
+ **Advanced Indexing Pipeline:**
54
54
 
55
- 1. Clone/read source code
56
- 2. Extract snippets using Tree-sitter
57
- 3. Generate embeddings and BM25 indices
58
- 4. Store in database
55
+ 1. Clone/read source code with Git metadata extraction
56
+ 2. Language detection for 20+ programming languages
57
+ 3. Advanced snippet extraction using Tree-sitter with dependency analysis
58
+ 4. Build call graphs and import maps for context-aware extraction
59
+ 5. Generate embeddings and BM25 indices
60
+ 6. Store in database with selective reindexing for performance
59
61
 
60
- **Search System:**
62
+ **Advanced Search System:**
61
63
 
62
- - Hybrid search combining semantic (embeddings) and keyword (BM25)
64
+ - Hybrid search combining semantic (embeddings) and keyword (BM25) with Reciprocal Rank Fusion
65
+ - Multi-dimensional filtering: language, author, date range, source, file path
66
+ - Context-aware results with dependency tracking and usage examples
63
67
  - Multiple providers: local models, OpenAI, custom APIs
64
68
  - Configurable via environment variables
69
+ - Support for 20+ programming languages including HTML/CSS
65
70
 
66
71
  **MCP Server:**
67
72
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kodit
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Code indexing for better AI code generation
5
5
  Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
6
  Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -92,13 +92,16 @@ code. This index is used to build a snippet library, ready for ingestion into an
92
92
 
93
93
  - Index local directories and public Git repositories
94
94
  - Build comprehensive snippet libraries for LLM ingestion
95
- - Support for multiple codebase types and languages
96
- - Efficient indexing and search capabilities
95
+ - Support for 20+ programming languages including Python, JavaScript/TypeScript, Java, Go, Rust, C/C++, C#, HTML/CSS, and more
96
+ - Advanced code analysis with dependency tracking and call graph generation
97
+ - Intelligent snippet extraction with context-aware dependencies
98
+ - Efficient indexing with selective reindexing (only processes modified files)
97
99
  - Privacy first: respects .gitignore and .noindex files
98
100
  - **NEW in 0.3**: Auto-indexing configuration for shared server deployments
99
101
  - **NEW in 0.3**: Enhanced Git provider support including Azure DevOps
100
102
  - **NEW in 0.3**: Index private repositories via a PAT
101
103
  - **NEW in 0.3**: Improved progress monitoring and reporting during indexing
104
+ - **NEW in 0.3**: Advanced code slicing infrastructure with Tree-sitter parsing
102
105
 
103
106
  ### MCP Server
104
107
 
@@ -111,7 +114,9 @@ intent. Kodit has been tested to work well with:
111
114
  - [Cursor](https://docs.helix.ml/kodit/getting-started/integration/#integration-with-cursor)
112
115
  - [Cline](https://docs.helix.ml/kodit/getting-started/integration/#integration-with-cline)
113
116
  - Please contribute more instructions! ... any other assistant is likely to work ...
114
- - **New in 0.3**: Filter snippets by source, language, author or timestamp.
117
+ - **New in 0.3**: Advanced search filters by source, language, author, date range, and file path
118
+ - **New in 0.3**: Hybrid search combining BM25 keyword search with semantic search
119
+ - **New in 0.4**: Enhanced MCP tools with rich context parameters and metadata
115
120
 
116
121
  ### Enterprise Ready
117
122
 
@@ -39,13 +39,16 @@ code. This index is used to build a snippet library, ready for ingestion into an
39
39
 
40
40
  - Index local directories and public Git repositories
41
41
  - Build comprehensive snippet libraries for LLM ingestion
42
- - Support for multiple codebase types and languages
43
- - Efficient indexing and search capabilities
42
+ - Support for 20+ programming languages including Python, JavaScript/TypeScript, Java, Go, Rust, C/C++, C#, HTML/CSS, and more
43
+ - Advanced code analysis with dependency tracking and call graph generation
44
+ - Intelligent snippet extraction with context-aware dependencies
45
+ - Efficient indexing with selective reindexing (only processes modified files)
44
46
  - Privacy first: respects .gitignore and .noindex files
45
47
  - **NEW in 0.3**: Auto-indexing configuration for shared server deployments
46
48
  - **NEW in 0.3**: Enhanced Git provider support including Azure DevOps
47
49
  - **NEW in 0.3**: Index private repositories via a PAT
48
50
  - **NEW in 0.3**: Improved progress monitoring and reporting during indexing
51
+ - **NEW in 0.3**: Advanced code slicing infrastructure with Tree-sitter parsing
49
52
 
50
53
  ### MCP Server
51
54
 
@@ -58,7 +61,9 @@ intent. Kodit has been tested to work well with:
58
61
  - [Cursor](https://docs.helix.ml/kodit/getting-started/integration/#integration-with-cursor)
59
62
  - [Cline](https://docs.helix.ml/kodit/getting-started/integration/#integration-with-cline)
60
63
  - Please contribute more instructions! ... any other assistant is likely to work ...
61
- - **New in 0.3**: Filter snippets by source, language, author or timestamp.
64
+ - **New in 0.3**: Advanced search filters by source, language, author, date range, and file path
65
+ - **New in 0.3**: Hybrid search combining BM25 keyword search with semantic search
66
+ - **New in 0.4**: Enhanced MCP tools with rich context parameters and metadata
62
67
 
63
68
  ### Enterprise Ready
64
69
 
@@ -49,13 +49,16 @@ code. This index is used to build a snippet library, ready for ingestion into an
49
49
 
50
50
  - Index local directories and public Git repositories
51
51
  - Build comprehensive snippet libraries for LLM ingestion
52
- - Support for multiple codebase types and languages
53
- - Efficient indexing and search capabilities
52
+ - Support for 20+ programming languages including Python, JavaScript/TypeScript, Java, Go, Rust, C/C++, C#, HTML/CSS, and more
53
+ - Advanced code analysis with dependency tracking and call graph generation
54
+ - Intelligent snippet extraction with context-aware dependencies
55
+ - Efficient indexing with selective reindexing (only processes modified files)
54
56
  - Privacy first: respects .gitignore and .noindex files
55
57
  - **NEW in 0.3**: Auto-indexing configuration for shared server deployments
56
58
  - **NEW in 0.3**: Enhanced Git provider support including Azure DevOps
57
59
  - **NEW in 0.3**: Index private repositories via a PAT
58
60
  - **NEW in 0.3**: Improved progress monitoring and reporting during indexing
61
+ - **NEW in 0.4**: Advanced code slicing infrastructure with Tree-sitter parsing
59
62
 
60
63
  ### MCP Server
61
64
 
@@ -68,7 +71,9 @@ intent. Kodit has been tested to work well with:
68
71
  - [Cursor](./getting-started/integration/index.md#integration-with-cursor)
69
72
  - [Cline](./getting-started/integration/index.md#integration-with-cline)
70
73
  - Please contribute more instructions! ... any other assistant is likely to work ...
71
- - **New in 0.3**: Filter snippets by source, language, author or timestamp.
74
+ - **New in 0.3**: Advanced search filters by source, language, author, date range, and file path
75
+ - **New in 0.3**: Hybrid search combining BM25 keyword search with semantic search
76
+ - **New in 0.3**: Enhanced MCP tools with rich context parameters and metadata
72
77
 
73
78
  ### Enterprise Ready
74
79
 
@@ -209,26 +209,51 @@ Kodit respects [standard ignore patterns](#ignore-patterns):
209
209
  - **`.gitignore`**: Standard Git ignore patterns
210
210
  - **`.noindex`**: Custom ignore patterns for Kodit (uses gitignore syntax)
211
211
 
212
- ### Supported File Types
212
+ ### Supported Programming Languages
213
213
 
214
- Kodit automatically detects and processes files based on their extensions:
214
+ Kodit automatically detects and processes files based on their extensions. The following languages are supported with advanced Tree-sitter parsing:
215
215
 
216
- | Language | Extensions |
217
- |----------|------------|
218
- | Python | `.py` |
219
- | JavaScript | `.js`, `.jsx` |
220
- | TypeScript | `.ts`, `.tsx` |
221
- | Go | `.go` |
222
- | C# | `.cs` |
216
+ | Language | Extensions | Features |
217
+ |----------|------------|----------|
218
+ | Python | `.py`, `.pyw`, `.pyx`, `.pxd` | Function/method extraction, import analysis, call graph |
219
+ | JavaScript | `.js`, `.jsx`, `.mjs` | Function extraction, ES6 modules, JSX support |
220
+ | TypeScript | `.ts`, `.tsx` | Type definitions, interfaces, decorators |
221
+ | Java | `.java` | Method declarations, constructors, class hierarchies |
222
+ | Go | `.go` | Function/method extraction, package imports |
223
+ | Rust | `.rs` | Function definitions, trait implementations |
224
+ | C/C++ | `.c`, `.h`, `.cpp`, `.cc`, `.cxx`, `.hpp`, `.hxx` | Function definitions, header includes |
225
+ | C# | `.cs` | Method declarations, using directives, constructors |
226
+ | HTML | `.html`, `.htm` | Element extraction with ID/class identification |
227
+ | CSS | `.css`, `.scss`, `.sass`, `.less` | Rule extraction, selector analysis, keyframes |
223
228
 
224
- ### Snippet Extraction
229
+ ### Advanced Snippet Extraction
225
230
 
226
- Kodit uses tree-sitter to intelligently extract code snippets:
231
+ Kodit uses a sophisticated Tree-sitter-based slicing system to intelligently extract code snippets with context:
232
+
233
+ #### Core Features
227
234
 
228
235
  - **Functions and Methods**: Complete function definitions with their bodies
229
236
  - **Classes**: Class definitions and their methods
230
237
  - **Imports**: Import statements for context
231
238
  - **Dependencies**: Ancestor classes and functions that the snippet depends on
239
+ - **Call Graph Analysis**: Builds relationships between functions to understand dependencies
240
+ - **Context-Aware Extraction**: Includes related functions and usage examples
241
+ - **Topological Sorting**: Orders dependencies for optimal LLM consumption
242
+
243
+ #### Smart Dependency Tracking
244
+
245
+ - **Import Maps**: Tracks import statements and their usage
246
+ - **Function Calls**: Identifies which functions call which others
247
+ - **Reverse Dependencies**: Finds all callers of a given function
248
+ - **Usage Examples**: Includes examples of how functions are used in the codebase
249
+
250
+ #### Language-Specific Extraction
251
+
252
+ - **Python**: Decorators, async functions, class inheritance
253
+ - **JavaScript/TypeScript**: Arrow functions, async/await, ES6 modules
254
+ - **Java**: Annotations, generics, inheritance hierarchies
255
+ - **Go**: Interfaces, struct methods, package organization
256
+ - **HTML/CSS**: Elements with semantic context, CSS rules and selectors
232
257
 
233
258
  ## Configuration
234
259
 
@@ -269,9 +294,35 @@ DEFAULT_ENDPOINT_API_KEY=sk-your-api-key
269
294
 
270
295
  ## Advanced Features
271
296
 
272
- ### Re-indexing Sources
297
+ ### Selective Re-indexing
298
+
299
+ Kodit includes intelligent re-indexing that only processes files that have been modified:
300
+
301
+ #### How It Works
302
+
303
+ - **SHA256 Change Detection**: Compares file content hashes to detect changes
304
+ - **File Status Tracking**: Tracks files as CLEAN, MODIFIED, or DELETED
305
+ - **Incremental Updates**: Only re-processes changed files, improving performance for large codebases
306
+ - **Metadata Preservation**: Maintains file metadata and Git information
273
307
 
274
- Future feature!
308
+ #### Benefits
309
+
310
+ - **Performance**: Dramatically faster re-indexing for large repositories
311
+ - **Resource Efficiency**: Reduces CPU and memory usage during updates
312
+ - **Consistency**: Ensures only actual changes trigger re-processing
313
+ - **Scalability**: Enables efficient handling of large, frequently-updated codebases
314
+
315
+ #### Usage
316
+
317
+ Re-indexing automatically uses selective processing when you re-index an existing source:
318
+
319
+ ```sh
320
+ # Re-index with selective processing
321
+ kodit index /path/to/existing/source
322
+
323
+ # Or for Git repositories
324
+ kodit index https://github.com/username/repo.git
325
+ ```
275
326
 
276
327
  ### Progress Monitoring
277
328
 
@@ -53,21 +53,30 @@ The search tool accepts the following parameters:
53
53
  | `related_file_paths` | list[Path] | Absolute paths to relevant files | `["/path/to/auth.py"]` |
54
54
  | `related_file_contents` | list[string] | Contents of relevant files | `["def authenticate(): ..."]` |
55
55
  | `keywords` | list[string] | Relevant keywords for the search | `["authentication", "jwt", "login"]` |
56
- | `language` | string \| None | Filter by programming language | `"python"`, `"go"`, `"javascript"` |
56
+ | `language` | string \| None | Filter by programming language (20+ supported) | `"python"`, `"go"`, `"javascript"`, `"html"`, `"css"` |
57
57
  | `author` | string \| None | Filter by author name | `"john.doe"` |
58
58
  | `created_after` | string \| None | Filter by creation date (YYYY-MM-DD) | `"2023-01-01"` |
59
59
  | `created_before` | string \| None | Filter by creation date (YYYY-MM-DD) | `"2023-12-31"` |
60
60
  | `source_repo` | string \| None | Filter by source repository | `"github.com/example/repo"` |
61
+ | `file_path` | string \| None | Filter by file path pattern | `"src/"`, `"*.test.py"` |
61
62
 
62
- ### Search Functionality
63
+ ### Advanced Search Functionality
63
64
 
64
- The search tool combines multiple search strategies:
65
+ The search tool combines multiple search strategies with sophisticated ranking:
65
66
 
66
- 1. **Keyword Search** - Uses BM25 algorithm for exact keyword matching
67
- 2. **Semantic Code Search** - Uses embeddings to find semantically similar code
67
+ 1. **BM25 Keyword Search** - Advanced keyword matching with relevance scoring
68
+ 2. **Semantic Code Search** - Uses embeddings to find semantically similar code patterns
68
69
  3. **Semantic Text Search** - Uses embeddings to find code matching natural language descriptions
70
+ 4. **Reciprocal Rank Fusion (RRF)** - Intelligently combines results from multiple search strategies
71
+ 5. **Context-Aware Filtering** - Advanced filtering by language, author, date, source, and file path
72
+ 6. **Dependency-Aware Results** - Returns code snippets with their dependencies and usage examples
69
73
 
70
- Results are fused together to provide the most relevant snippets for the user's intent.
74
+ #### Enhanced Result Quality
75
+
76
+ - **Smart Snippet Selection**: Returns functions with their dependencies and context
77
+ - **Rich Metadata**: Each result includes file path, language, author, and creation date
78
+ - **Usage Examples**: Includes examples of how functions are used in the codebase
79
+ - **Topological Ordering**: Dependencies are ordered for optimal LLM consumption
71
80
 
72
81
  ## Filtering Capabilities
73
82
 
@@ -81,6 +90,8 @@ Filter results by programming language:
81
90
  > "I need to create a web server in Python. Please search for Flask or FastAPI examples and show me the best practices."
82
91
  > "I'm working on a Go microservice. Can you search for Go-specific patterns for handling HTTP requests and database connections?"
83
92
  > "I need JavaScript examples for form validation. Please search for modern JavaScript/TypeScript validation patterns."
93
+ > "I'm building a responsive layout. Please search for CSS Grid and Flexbox examples in our stylesheets."
94
+ > "I need HTML form examples. Please search for form elements with proper accessibility attributes."
84
95
 
85
96
  ### Author Filtering
86
97
 
@@ -104,6 +104,11 @@ source = ["src"]
104
104
  branch = true
105
105
  omit = []
106
106
 
107
+ [tool.ruff]
108
+ exclude = [
109
+ "tests/kodit/infrastructure/slicing/data/**/*.py",
110
+ ]
111
+
107
112
  [tool.ruff.lint]
108
113
  ignore = [
109
114
  "ANN401", # Opinionated warning on disallowing dynamically typed expressions
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.3.3'
21
- __version_tuple__ = version_tuple = (0, 3, 3)
20
+ __version__ = version = '0.3.4'
21
+ __version_tuple__ = version_tuple = (0, 3, 4)
@@ -13,7 +13,7 @@ from kodit.domain.services.index_query_service import IndexQueryService
13
13
  from kodit.domain.services.index_service import (
14
14
  IndexDomainService,
15
15
  )
16
- from kodit.domain.value_objects import LanguageMapping, SnippetExtractionStrategy
16
+ from kodit.domain.value_objects import LanguageMapping
17
17
  from kodit.infrastructure.bm25.bm25_factory import bm25_repository_factory
18
18
  from kodit.infrastructure.embedding.embedding_factory import (
19
19
  embedding_domain_service_factory,
@@ -31,15 +31,9 @@ from kodit.infrastructure.enrichment.null_enrichment_provider import (
31
31
  NullEnrichmentProvider,
32
32
  )
33
33
  from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
34
- from kodit.infrastructure.snippet_extraction.factories import (
35
- create_snippet_query_provider,
36
- )
37
- from kodit.infrastructure.snippet_extraction.language_detection_service import (
34
+ from kodit.infrastructure.slicing.language_detection_service import (
38
35
  FileSystemLanguageDetectionService,
39
36
  )
40
- from kodit.infrastructure.snippet_extraction.tree_sitter_snippet_extractor import (
41
- TreeSitterSnippetExtractor,
42
- )
43
37
  from kodit.infrastructure.sqlalchemy.embedding_repository import (
44
38
  SqlAlchemyEmbeddingRepository,
45
39
  )
@@ -63,17 +57,9 @@ def create_code_indexing_application_service(
63
57
 
64
58
  # Create infrastructure services
65
59
  language_detector = FileSystemLanguageDetectionService(language_map)
66
- query_provider = create_snippet_query_provider()
67
60
 
68
- # Create snippet extractors
69
- method_extractor = TreeSitterSnippetExtractor(query_provider)
70
-
71
- snippet_extractors = {
72
- SnippetExtractionStrategy.METHOD_BASED: method_extractor,
73
- }
74
61
  index_domain_service = IndexDomainService(
75
62
  language_detector=language_detector,
76
- snippet_extractors=snippet_extractors,
77
63
  enrichment_service=enrichment_service,
78
64
  clone_dir=app_context.get_clone_dir(),
79
65
  )
@@ -136,17 +122,9 @@ def create_fast_test_code_indexing_application_service(
136
122
 
137
123
  # Create infrastructure services
138
124
  language_detector = FileSystemLanguageDetectionService(language_map)
139
- query_provider = create_snippet_query_provider()
140
-
141
- # Create snippet extractors
142
- method_extractor = TreeSitterSnippetExtractor(query_provider)
143
125
 
144
- snippet_extractors = {
145
- SnippetExtractionStrategy.METHOD_BASED: method_extractor,
146
- }
147
126
  index_domain_service = IndexDomainService(
148
127
  language_detector=language_detector,
149
- snippet_extractors=snippet_extractors,
150
128
  enrichment_service=enrichment_service,
151
129
  clone_dir=app_context.get_clone_dir(),
152
130
  )
@@ -100,6 +100,11 @@ class CodeIndexingApplicationService:
100
100
  self.log.info("No new changes to index", index_id=index.id)
101
101
  return
102
102
 
103
+ # Delete the old snippets from the files that have changed
104
+ await self.index_repository.delete_snippets_by_file_ids(
105
+ [file.id for file in index.source.working_copy.changed_files() if file.id]
106
+ )
107
+
103
108
  # Extract and create snippets (domain service handles progress)
104
109
  self.log.info("Creating snippets for files", index_id=index.id)
105
110
  index = await self.index_domain_service.extract_snippets_from_index(
@@ -115,6 +120,9 @@ class CodeIndexingApplicationService:
115
120
  msg = f"Index {index.id} not found after snippet extraction"
116
121
  raise ValueError(msg)
117
122
  index = flushed_index
123
+ if len(index.snippets) == 0:
124
+ self.log.info("No snippets to index after extraction", index_id=index.id)
125
+ return
118
126
 
119
127
  # Create BM25 index
120
128
  self.log.info("Creating keyword index")
@@ -154,8 +162,8 @@ class CodeIndexingApplicationService:
154
162
  # Apply filters if provided
155
163
  filtered_snippet_ids: list[int] | None = None
156
164
  if request.filters:
157
- # Use domain service for filtering
158
- prefilter_request = replace(request, top_k=None)
165
+ # Use domain service for filtering (use large top_k for pre-filtering)
166
+ prefilter_request = replace(request, top_k=10000)
159
167
  snippet_results = await self.index_query_service.search_snippets(
160
168
  prefilter_request
161
169
  )
@@ -1,7 +1,6 @@
1
1
  """Pure domain service for Index aggregate operations."""
2
2
 
3
3
  from abc import ABC, abstractmethod
4
- from collections.abc import Mapping
5
4
  from pathlib import Path
6
5
 
7
6
  import structlog
@@ -13,14 +12,13 @@ from kodit.domain.services.enrichment_service import EnrichmentDomainService
13
12
  from kodit.domain.value_objects import (
14
13
  EnrichmentIndexRequest,
15
14
  EnrichmentRequest,
16
- SnippetExtractionRequest,
17
- SnippetExtractionResult,
18
- SnippetExtractionStrategy,
15
+ LanguageMapping,
19
16
  )
20
17
  from kodit.infrastructure.cloning.git.working_copy import GitWorkingCopyProvider
21
18
  from kodit.infrastructure.cloning.metadata import FileMetadataExtractor
22
19
  from kodit.infrastructure.git.git_utils import is_valid_clone_target
23
20
  from kodit.infrastructure.ignore.ignore_pattern_provider import GitIgnorePatternProvider
21
+ from kodit.infrastructure.slicing.slicer import Slicer
24
22
  from kodit.reporting import Reporter
25
23
  from kodit.utils.path_utils import path_from_uri
26
24
 
@@ -33,14 +31,6 @@ class LanguageDetectionService(ABC):
33
31
  """Detect the programming language of a file."""
34
32
 
35
33
 
36
- class SnippetExtractor(ABC):
37
- """Abstract interface for snippet extraction."""
38
-
39
- @abstractmethod
40
- async def extract(self, file_path: Path, language: str) -> list[str]:
41
- """Extract snippets from a file."""
42
-
43
-
44
34
  class IndexDomainService:
45
35
  """Pure domain service for Index aggregate operations.
46
36
 
@@ -54,14 +44,12 @@ class IndexDomainService:
54
44
  def __init__(
55
45
  self,
56
46
  language_detector: LanguageDetectionService,
57
- snippet_extractors: Mapping[SnippetExtractionStrategy, SnippetExtractor],
58
47
  enrichment_service: EnrichmentDomainService,
59
48
  clone_dir: Path,
60
49
  ) -> None:
61
50
  """Initialize the index domain service."""
62
51
  self._clone_dir = clone_dir
63
52
  self._language_detector = language_detector
64
- self._snippet_extractors = snippet_extractors
65
53
  self._enrichment_service = enrichment_service
66
54
  self.log = structlog.get_logger(__name__)
67
55
 
@@ -99,7 +87,6 @@ class IndexDomainService:
99
87
  async def extract_snippets_from_index(
100
88
  self,
101
89
  index: domain_entities.Index,
102
- strategy: SnippetExtractionStrategy = SnippetExtractionStrategy.METHOD_BASED,
103
90
  progress_callback: ProgressCallback | None = None,
104
91
  ) -> domain_entities.Index:
105
92
  """Extract code snippets from files in the index."""
@@ -109,46 +96,40 @@ class IndexDomainService:
109
96
  "Extracting snippets",
110
97
  index_id=index.id,
111
98
  file_count=file_count,
112
- strategy=strategy.value,
113
99
  )
114
100
 
115
101
  # Only create snippets for files that have been added or modified
116
102
  files = index.source.working_copy.changed_files()
117
103
  index.delete_snippets_for_files(files)
118
104
 
119
- reporter = Reporter(self.log, progress_callback)
120
- await reporter.start(
121
- "extract_snippets", len(files), "Extracting code snippets..."
122
- )
123
-
124
- new_snippets = []
125
- for i, domain_file in enumerate(files, 1):
105
+ # Create a set of languages to extract snippets for
106
+ extensions = {file.extension() for file in files}
107
+ languages = []
108
+ for ext in extensions:
126
109
  try:
127
- # Extract snippets from file
128
- request = SnippetExtractionRequest(
129
- file_path=domain_file.as_path(), strategy=strategy
130
- )
131
- result = await self._extract_snippets(request)
132
- for snippet_text in result.snippets:
133
- snippet = domain_entities.Snippet(
134
- derives_from=[domain_file],
135
- )
136
- snippet.add_original_content(snippet_text, result.language)
137
- new_snippets.append(snippet)
138
-
139
- except (OSError, ValueError) as e:
140
- self.log.debug(
141
- "Skipping file for snippet extraction",
142
- file_uri=str(domain_file.uri),
143
- error=str(e),
144
- )
110
+ languages.append(LanguageMapping.get_language_for_extension(ext))
111
+ except ValueError as e:
112
+ self.log.info("Skipping", error=str(e))
145
113
  continue
146
114
 
115
+ reporter = Reporter(self.log, progress_callback)
116
+ await reporter.start(
117
+ "extract_snippets",
118
+ len(files) * len(languages),
119
+ "Extracting code snippets...",
120
+ )
121
+ # Calculate snippets for each language
122
+ slicer = Slicer()
123
+ for i, language in enumerate(languages):
147
124
  await reporter.step(
148
- "extract_snippets", i, len(files), f"Processed {domain_file.uri.path}"
125
+ "extract_snippets",
126
+ len(files) * (i + 1),
127
+ len(files) * len(languages),
128
+ "Extracting code snippets...",
149
129
  )
130
+ s = slicer.extract_snippets(files, language=language)
131
+ index.snippets.extend(s)
150
132
 
151
- index.snippets.extend(new_snippets)
152
133
  await reporter.done("extract_snippets")
153
134
  return index
154
135
 
@@ -187,28 +168,6 @@ class IndexDomainService:
187
168
  await reporter.done("enrichment")
188
169
  return list(snippet_map.values())
189
170
 
190
- async def _extract_snippets(
191
- self, request: SnippetExtractionRequest
192
- ) -> SnippetExtractionResult:
193
- # Domain logic: validate file exists
194
- if not request.file_path.exists():
195
- raise ValueError(f"File does not exist: {request.file_path}")
196
-
197
- # Domain logic: detect language
198
- language = await self._language_detector.detect_language(request.file_path)
199
-
200
- # Domain logic: choose strategy and extractor
201
- if request.strategy not in self._snippet_extractors:
202
- raise ValueError(f"Unsupported extraction strategy: {request.strategy}")
203
-
204
- extractor = self._snippet_extractors[request.strategy]
205
- snippets = await extractor.extract(request.file_path, language)
206
-
207
- # Domain logic: filter out empty snippets
208
- filtered_snippets = [snippet for snippet in snippets if snippet.strip()]
209
-
210
- return SnippetExtractionResult(snippets=filtered_snippets, language=language)
211
-
212
171
  def sanitize_uri(
213
172
  self, uri_or_path_like: str
214
173
  ) -> tuple[AnyUrl, domain_entities.SourceType]:
@@ -297,7 +256,7 @@ class IndexDomainService:
297
256
  await metadata_extractor.extract(file_path=file_path)
298
257
  )
299
258
  except (OSError, ValueError) as e:
300
- self.log.info("Skipping file", file=str(file_path), error=str(e))
259
+ self.log.debug("Skipping file", file=str(file_path), error=str(e))
301
260
  continue
302
261
 
303
262
  # Finally check if there are any modified files