kiln-ai 0.22.0__tar.gz → 0.22.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (253) hide show
  1. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/PKG-INFO +77 -1
  2. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/README.md +76 -0
  3. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/litellm_adapter.py +6 -2
  4. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/lancedb_adapter.py +24 -70
  5. kiln_ai-0.22.1/kiln_ai/adapters/vector_store/lancedb_helpers.py +101 -0
  6. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/test_lancedb_adapter.py +9 -16
  7. kiln_ai-0.22.1/kiln_ai/adapters/vector_store/test_lancedb_helpers.py +142 -0
  8. kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/__init__.py +0 -0
  9. kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/test_lancedb_loader.py +282 -0
  10. kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/test_vector_store_loader.py +544 -0
  11. kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/vector_store_loader.py +91 -0
  12. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/tool_id.py +13 -0
  13. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/base_tool.py +18 -3
  14. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/kiln_task_tool.py +6 -2
  15. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/mcp_server_tool.py +6 -4
  16. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/rag_tools.py +7 -3
  17. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/pyproject.toml +1 -1
  18. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/.gitignore +0 -0
  19. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/.python-version +0 -0
  20. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/LICENSE.txt +0 -0
  21. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/index.html +0 -0
  22. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/data_gen/data_gen_task.html +0 -0
  23. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/data_gen.html +0 -0
  24. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/base_eval.html +0 -0
  25. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/eval_runner.html +0 -0
  26. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/g_eval.html +0 -0
  27. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/registry.html +0 -0
  28. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval.html +0 -0
  29. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/base_finetune.html +0 -0
  30. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/dataset_formatter.html +0 -0
  31. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/finetune_registry.html +0 -0
  32. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/openai_finetune.html +0 -0
  33. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune.html +0 -0
  34. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/ml_model_list.html +0 -0
  35. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/base_adapter.html +0 -0
  36. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/litellm_adapter.html +0 -0
  37. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters.html +0 -0
  38. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/prompt_builders.html +0 -0
  39. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/repair/repair_task.html +0 -0
  40. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/repair.html +0 -0
  41. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters.html +0 -0
  42. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel/dataset_split.html +0 -0
  43. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel/eval.html +0 -0
  44. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel/strict_mode.html +0 -0
  45. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel.html +0 -0
  46. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/utils/config.html +0 -0
  47. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/utils/formatting.html +0 -0
  48. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/utils.html +0 -0
  49. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai.html +0 -0
  50. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/search.js +0 -0
  51. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/__init__.py +0 -0
  52. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/__init__.py +0 -0
  53. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/adapter_registry.py +0 -0
  54. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chat/__init__.py +0 -0
  55. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chat/chat_formatter.py +0 -0
  56. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chat/test_chat_formatter.py +0 -0
  57. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/__init__.py +0 -0
  58. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/base_chunker.py +0 -0
  59. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/chunker_registry.py +0 -0
  60. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/fixed_window_chunker.py +0 -0
  61. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/helpers.py +0 -0
  62. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_base_chunker.py +0 -0
  63. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_chunker_registry.py +0 -0
  64. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +0 -0
  65. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_helpers.py +0 -0
  66. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/__init__.py +0 -0
  67. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/data_gen_prompts.py +0 -0
  68. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/data_gen_task.py +0 -0
  69. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/test_data_gen_task.py +0 -0
  70. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/docker_model_runner_tools.py +0 -0
  71. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/__init__.py +0 -0
  72. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/base_embedding_adapter.py +0 -0
  73. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/embedding_registry.py +0 -0
  74. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/litellm_embedding_adapter.py +0 -0
  75. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/test_base_embedding_adapter.py +0 -0
  76. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/test_embedding_registry.py +0 -0
  77. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +0 -0
  78. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/__init__.py +0 -0
  79. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/base_eval.py +0 -0
  80. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/eval_runner.py +0 -0
  81. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/g_eval.py +0 -0
  82. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/registry.py +0 -0
  83. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_base_eval.py +0 -0
  84. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_eval_runner.py +0 -0
  85. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_g_eval.py +0 -0
  86. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_g_eval_data.py +0 -0
  87. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/__init__.py +0 -0
  88. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/base_extractor.py +0 -0
  89. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/encoding.py +0 -0
  90. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/extractor_registry.py +0 -0
  91. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/extractor_runner.py +0 -0
  92. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/litellm_extractor.py +0 -0
  93. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_base_extractor.py +0 -0
  94. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_encoding.py +0 -0
  95. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_extractor_registry.py +0 -0
  96. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_extractor_runner.py +0 -0
  97. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_litellm_extractor.py +0 -0
  98. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/__init__.py +0 -0
  99. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/base_finetune.py +0 -0
  100. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/dataset_formatter.py +0 -0
  101. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/finetune_registry.py +0 -0
  102. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/fireworks_finetune.py +0 -0
  103. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/openai_finetune.py +0 -0
  104. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_base_finetune.py +0 -0
  105. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_dataset_formatter.py +0 -0
  106. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +0 -0
  107. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_openai_finetune.py +0 -0
  108. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_together_finetune.py +0 -0
  109. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_vertex_finetune.py +0 -0
  110. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/together_finetune.py +0 -0
  111. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/vertex_finetune.py +0 -0
  112. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/ml_embedding_model_list.py +0 -0
  113. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/ml_model_list.py +0 -0
  114. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/__init__.py +0 -0
  115. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/base_adapter.py +0 -0
  116. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/litellm_config.py +0 -0
  117. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_base_adapter.py +0 -0
  118. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_litellm_adapter.py +0 -0
  119. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +0 -0
  120. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +0 -0
  121. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_structured_output.py +0 -0
  122. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/ollama_tools.py +0 -0
  123. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/__init__.py +0 -0
  124. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/base_parser.py +0 -0
  125. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/json_parser.py +0 -0
  126. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/parser_registry.py +0 -0
  127. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/r1_parser.py +0 -0
  128. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/request_formatters.py +0 -0
  129. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_json_parser.py +0 -0
  130. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_parser_registry.py +0 -0
  131. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_r1_parser.py +0 -0
  132. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_request_formatters.py +0 -0
  133. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/prompt_builders.py +0 -0
  134. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/provider_tools.py +0 -0
  135. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/deduplication.py +0 -0
  136. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/progress.py +0 -0
  137. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/rag_runners.py +0 -0
  138. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/test_deduplication.py +0 -0
  139. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/test_progress.py +0 -0
  140. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/test_rag_runners.py +0 -0
  141. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/remote_config.py +0 -0
  142. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/repair/__init__.py +0 -0
  143. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/repair/repair_task.py +0 -0
  144. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/repair/test_repair_task.py +0 -0
  145. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/run_output.py +0 -0
  146. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_adapter_registry.py +0 -0
  147. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_docker_model_runner_tools.py +0 -0
  148. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_ml_embedding_model_list.py +0 -0
  149. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_ml_model_list.py +0 -0
  150. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_ollama_tools.py +0 -0
  151. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_prompt_adaptors.py +0 -0
  152. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_prompt_builders.py +0 -0
  153. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_provider_tools.py +0 -0
  154. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_remote_config.py +0 -0
  155. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/__init__.py +0 -0
  156. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/base_vector_store_adapter.py +0 -0
  157. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/test_base_vector_store.py +0 -0
  158. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/test_vector_store_registry.py +0 -0
  159. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/vector_store_registry.py +0 -0
  160. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/__init__.py +0 -0
  161. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/basemodel.py +0 -0
  162. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/chunk.py +0 -0
  163. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/datamodel_enums.py +0 -0
  164. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/dataset_filters.py +0 -0
  165. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/dataset_split.py +0 -0
  166. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/embedding.py +0 -0
  167. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/eval.py +0 -0
  168. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/external_tool_server.py +0 -0
  169. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/extraction.py +0 -0
  170. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/finetune.py +0 -0
  171. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/json_schema.py +0 -0
  172. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/model_cache.py +0 -0
  173. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/project.py +0 -0
  174. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/prompt.py +0 -0
  175. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/prompt_id.py +0 -0
  176. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/rag.py +0 -0
  177. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/registry.py +0 -0
  178. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/run_config.py +0 -0
  179. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/strict_mode.py +0 -0
  180. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/task.py +0 -0
  181. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/task_output.py +0 -0
  182. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/task_run.py +0 -0
  183. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_attachment.py +0 -0
  184. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_basemodel.py +0 -0
  185. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_chunk_models.py +0 -0
  186. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_dataset_filters.py +0 -0
  187. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_dataset_split.py +0 -0
  188. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_datasource.py +0 -0
  189. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_embedding_models.py +0 -0
  190. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_eval_model.py +0 -0
  191. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_example_models.py +0 -0
  192. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_external_tool_server.py +0 -0
  193. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_extraction_chunk.py +0 -0
  194. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_extraction_model.py +0 -0
  195. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_json_schema.py +0 -0
  196. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_model_cache.py +0 -0
  197. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_model_perf.py +0 -0
  198. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_models.py +0 -0
  199. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_nested_save.py +0 -0
  200. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_output_rating.py +0 -0
  201. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_prompt_id.py +0 -0
  202. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_rag.py +0 -0
  203. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_registry.py +0 -0
  204. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_task.py +0 -0
  205. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_tool_id.py +0 -0
  206. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_vector_store.py +0 -0
  207. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/vector_store.py +0 -0
  208. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/__init__.py +0 -0
  209. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/built_in_tools/__init__.py +0 -0
  210. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/built_in_tools/math_tools.py +0 -0
  211. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/built_in_tools/test_math_tools.py +0 -0
  212. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/mcp_session_manager.py +0 -0
  213. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_base_tools.py +0 -0
  214. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_kiln_task_tool.py +0 -0
  215. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_mcp_server_tool.py +0 -0
  216. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_mcp_session_manager.py +0 -0
  217. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_rag_tools.py +0 -0
  218. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_tool_registry.py +0 -0
  219. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/tool_registry.py +0 -0
  220. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/__init__.py +0 -0
  221. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/async_job_runner.py +0 -0
  222. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/config.py +0 -0
  223. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/dataset_import.py +0 -0
  224. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/env.py +0 -0
  225. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/exhaustive_error.py +0 -0
  226. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/filesystem.py +0 -0
  227. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/filesystem_cache.py +0 -0
  228. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/formatting.py +0 -0
  229. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/litellm.py +0 -0
  230. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/lock.py +0 -0
  231. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/logging.py +0 -0
  232. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/mime_type.py +0 -0
  233. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/name_generator.py +0 -0
  234. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/open_ai_types.py +0 -0
  235. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/pdf_utils.py +0 -0
  236. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/project_utils.py +0 -0
  237. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_async_job_runner.py +0 -0
  238. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_config.py +0 -0
  239. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_dataset_import.py +0 -0
  240. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_env.py +0 -0
  241. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_filesystem_cache.py +0 -0
  242. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_litellm.py +0 -0
  243. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_lock.py +0 -0
  244. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_mime_type.py +0 -0
  245. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_name_geneator.py +0 -0
  246. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_open_ai_types.py +0 -0
  247. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_pdf_utils.py +0 -0
  248. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_uuid.py +0 -0
  249. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_validation.py +0 -0
  250. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/uuid.py +0 -0
  251. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/validation.py +0 -0
  252. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/setup.cfg +0 -0
  253. {kiln_ai-0.22.0 → kiln_ai-0.22.1}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kiln-ai
3
- Version: 0.22.0
3
+ Version: 0.22.1
4
4
  Summary: Kiln AI
5
5
  Project-URL: Homepage, https://kiln.tech
6
6
  Project-URL: Repository, https://github.com/Kiln-AI/kiln
@@ -85,6 +85,10 @@ The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kil
85
85
  - [Building and Running a Kiln Task from Code](#building-and-running-a-kiln-task-from-code)
86
86
  - [Tagging Task Runs Programmatically](#tagging-task-runs-programmatically)
87
87
  - [Adding Custom Model or AI Provider from Code](#adding-custom-model-or-ai-provider-from-code)
88
+ - [Taking Kiln RAG to production](#taking-kiln-rag-to-production)
89
+ - [Load a LlamaIndex Vector Store](#load-a-llamaindex-vector-store)
90
+ - [Example: LanceDB Cloud](#example-lancedb-cloud)
91
+ - [Deploy RAG without LlamaIndex](#deploy-rag-without-llamaindex)t
88
92
  - [Full API Reference](#full-api-reference)
89
93
 
90
94
  ## Installation
@@ -352,6 +356,78 @@ custom_model_ids.append(new_model)
352
356
  Config.shared().custom_models = custom_model_ids
353
357
  ```
354
358
 
359
+ ## Taking Kiln RAG to production
360
+
361
+ When you're ready to deploy your RAG system, you can export your processed documents to any vector store supported by LlamaIndex. This allows you to use your Kiln-configured chunking and embedding settings in production.
362
+
363
+ ### Load a LlamaIndex Vector Store
364
+
365
+ Kiln provides a `VectorStoreLoader` that yields your processed document chunks as LlamaIndex `TextNode` objects. These nodes contain the same metadata, chunking and embedding data as your Kiln Search Tool configuration.
366
+
367
+ ```py
368
+ from kiln_ai.datamodel import Project
369
+ from kiln_ai.datamodel.rag import RagConfig
370
+ from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
371
+
372
+ # Load your project and RAG configuration
373
+ project = Project.load_from_file("path/to/your/project.kiln")
374
+ rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
375
+
376
+ # Create the loader
377
+ loader = VectorStoreLoader(project=project, rag_config=rag_config)
378
+
379
+ # Export chunks to any LlamaIndex vector store
380
+ async for batch in loader.iter_llama_index_nodes(batch_size=10):
381
+ # Insert into your chosen vector store
382
+ # Examples: LanceDB, Pinecone, Chroma, Qdrant, etc.
383
+ pass
384
+ ```
385
+
386
+ **Supported Vector Stores:** LlamaIndex supports 20+ vector stores including LanceDB, Pinecone, Weaviate, Chroma, Qdrant, and more. See the [full list](https://developers.llamaindex.ai/python/framework/module_guides/storing/vector_stores/).
387
+
388
+ ### Example: LanceDB Cloud
389
+
390
+ Internally Kiln uses LanceDB. By using LanceDB cloud you'll get the same indexing behaviour as in app.
391
+
392
+ Here's a complete example using LanceDB Cloud:
393
+
394
+ ```py
395
+ from kiln_ai.datamodel import Project
396
+ from kiln_ai.datamodel.rag import RagConfig
397
+ from kiln_ai.datamodel.vector_store import VectorStoreConfig
398
+ from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
399
+ from kiln_ai.adapters.vector_store.lancedb_adapter import lancedb_construct_from_config
400
+
401
+ # Load configurations
402
+ project = Project.load_from_file("path/to/your/project.kiln")
403
+ rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
404
+ vector_store_config = VectorStoreConfig.from_id_and_parent_path(
405
+ rag_config.vector_store_config_id, project.path,
406
+ )
407
+
408
+ # Create LanceDB vector store
409
+ lancedb_store = lancedb_construct_from_config(
410
+ vector_store_config=vector_store_config,
411
+ uri="db://my-project",
412
+ api_key="sk_...",
413
+ region="us-east-1",
414
+ table_name="my-documents", # Created automatically
415
+ )
416
+
417
+ # Export and insert your documents
418
+ loader = VectorStoreLoader(project=project, rag_config=rag_config)
419
+ async for batch in loader.iter_llama_index_nodes(batch_size=100):
420
+ await lancedb_store.async_add(batch)
421
+
422
+ print("Documents successfully exported to LanceDB!")
423
+ ```
424
+
425
+ After export, query your data using [LlamaIndex](https://developers.llamaindex.ai/python/framework-api-reference/storage/vector_store/lancedb/) or the [LanceDB client](https://lancedb.github.io/lancedb/).
426
+
427
+ ### Deploy RAG without LlamaIndex
428
+
429
+ While Kiln is designed for deploying to LlamaIndex, you don't need to use it. The `iter_llama_index_nodes` returns a `TextNode` object which includes all the data you need to build a RAG index in any stack: embedding, text, document name, chunk ID, etc.
430
+
355
431
  ## Full API Reference
356
432
 
357
433
  The library can do a lot more than the examples we've shown here.
@@ -43,6 +43,10 @@ The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kil
43
43
  - [Building and Running a Kiln Task from Code](#building-and-running-a-kiln-task-from-code)
44
44
  - [Tagging Task Runs Programmatically](#tagging-task-runs-programmatically)
45
45
  - [Adding Custom Model or AI Provider from Code](#adding-custom-model-or-ai-provider-from-code)
46
+ - [Taking Kiln RAG to production](#taking-kiln-rag-to-production)
47
+ - [Load a LlamaIndex Vector Store](#load-a-llamaindex-vector-store)
48
+ - [Example: LanceDB Cloud](#example-lancedb-cloud)
49
+ - [Deploy RAG without LlamaIndex](#deploy-rag-without-llamaindex)t
46
50
  - [Full API Reference](#full-api-reference)
47
51
 
48
52
  ## Installation
@@ -310,6 +314,78 @@ custom_model_ids.append(new_model)
310
314
  Config.shared().custom_models = custom_model_ids
311
315
  ```
312
316
 
317
+ ## Taking Kiln RAG to production
318
+
319
+ When you're ready to deploy your RAG system, you can export your processed documents to any vector store supported by LlamaIndex. This allows you to use your Kiln-configured chunking and embedding settings in production.
320
+
321
+ ### Load a LlamaIndex Vector Store
322
+
323
+ Kiln provides a `VectorStoreLoader` that yields your processed document chunks as LlamaIndex `TextNode` objects. These nodes contain the same metadata, chunking and embedding data as your Kiln Search Tool configuration.
324
+
325
+ ```py
326
+ from kiln_ai.datamodel import Project
327
+ from kiln_ai.datamodel.rag import RagConfig
328
+ from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
329
+
330
+ # Load your project and RAG configuration
331
+ project = Project.load_from_file("path/to/your/project.kiln")
332
+ rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
333
+
334
+ # Create the loader
335
+ loader = VectorStoreLoader(project=project, rag_config=rag_config)
336
+
337
+ # Export chunks to any LlamaIndex vector store
338
+ async for batch in loader.iter_llama_index_nodes(batch_size=10):
339
+ # Insert into your chosen vector store
340
+ # Examples: LanceDB, Pinecone, Chroma, Qdrant, etc.
341
+ pass
342
+ ```
343
+
344
+ **Supported Vector Stores:** LlamaIndex supports 20+ vector stores including LanceDB, Pinecone, Weaviate, Chroma, Qdrant, and more. See the [full list](https://developers.llamaindex.ai/python/framework/module_guides/storing/vector_stores/).
345
+
346
+ ### Example: LanceDB Cloud
347
+
348
+ Internally Kiln uses LanceDB. By using LanceDB cloud you'll get the same indexing behaviour as in app.
349
+
350
+ Here's a complete example using LanceDB Cloud:
351
+
352
+ ```py
353
+ from kiln_ai.datamodel import Project
354
+ from kiln_ai.datamodel.rag import RagConfig
355
+ from kiln_ai.datamodel.vector_store import VectorStoreConfig
356
+ from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
357
+ from kiln_ai.adapters.vector_store.lancedb_adapter import lancedb_construct_from_config
358
+
359
+ # Load configurations
360
+ project = Project.load_from_file("path/to/your/project.kiln")
361
+ rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
362
+ vector_store_config = VectorStoreConfig.from_id_and_parent_path(
363
+ rag_config.vector_store_config_id, project.path,
364
+ )
365
+
366
+ # Create LanceDB vector store
367
+ lancedb_store = lancedb_construct_from_config(
368
+ vector_store_config=vector_store_config,
369
+ uri="db://my-project",
370
+ api_key="sk_...",
371
+ region="us-east-1",
372
+ table_name="my-documents", # Created automatically
373
+ )
374
+
375
+ # Export and insert your documents
376
+ loader = VectorStoreLoader(project=project, rag_config=rag_config)
377
+ async for batch in loader.iter_llama_index_nodes(batch_size=100):
378
+ await lancedb_store.async_add(batch)
379
+
380
+ print("Documents successfully exported to LanceDB!")
381
+ ```
382
+
383
+ After export, query your data using [LlamaIndex](https://developers.llamaindex.ai/python/framework-api-reference/storage/vector_store/lancedb/) or the [LanceDB client](https://lancedb.github.io/lancedb/).
384
+
385
+ ### Deploy RAG without LlamaIndex
386
+
387
+ While Kiln is designed for deploying to LlamaIndex, you don't need to use it. The `iter_llama_index_nodes` returns a `TextNode` object which includes all the data you need to build a RAG index in any stack: embedding, text, document name, chunk ID, etc.
388
+
313
389
  ## Full API Reference
314
390
 
315
391
  The library can do a lot more than the examples we've shown here.
@@ -31,7 +31,11 @@ from kiln_ai.adapters.model_adapters.base_adapter import (
31
31
  )
32
32
  from kiln_ai.adapters.model_adapters.litellm_config import LiteLlmConfig
33
33
  from kiln_ai.datamodel.json_schema import validate_schema_with_value_error
34
- from kiln_ai.tools.base_tool import KilnToolInterface, ToolCallContext
34
+ from kiln_ai.tools.base_tool import (
35
+ KilnToolInterface,
36
+ ToolCallContext,
37
+ ToolCallDefinition,
38
+ )
35
39
  from kiln_ai.tools.kiln_task_tool import KilnTaskToolResult
36
40
  from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
37
41
  from kiln_ai.utils.litellm import get_litellm_provider_info
@@ -560,7 +564,7 @@ class LiteLlmAdapter(BaseAdapter):
560
564
  self._cached_available_tools = await self.available_tools()
561
565
  return self._cached_available_tools
562
566
 
563
- async def litellm_tools(self) -> list[Dict]:
567
+ async def litellm_tools(self) -> list[ToolCallDefinition]:
564
568
  available_tools = await self.cached_available_tools()
565
569
 
566
570
  # LiteLLM takes the standard OpenAI-compatible tool call format
@@ -5,12 +5,7 @@ from pathlib import Path
5
5
  from typing import Any, Dict, List, Literal, Optional, Set, TypedDict
6
6
 
7
7
  from llama_index.core import StorageContext, VectorStoreIndex
8
- from llama_index.core.schema import (
9
- BaseNode,
10
- NodeRelationship,
11
- RelatedNodeInfo,
12
- TextNode,
13
- )
8
+ from llama_index.core.schema import BaseNode, TextNode
14
9
  from llama_index.core.vector_stores.types import (
15
10
  VectorStoreQuery as LlamaIndexVectorStoreQuery,
16
11
  )
@@ -24,15 +19,19 @@ from kiln_ai.adapters.vector_store.base_vector_store_adapter import (
24
19
  SearchResult,
25
20
  VectorStoreQuery,
26
21
  )
22
+ from kiln_ai.adapters.vector_store.lancedb_helpers import (
23
+ convert_to_llama_index_node,
24
+ deterministic_chunk_id,
25
+ lancedb_construct_from_config,
26
+ store_type_to_lancedb_query_type,
27
+ )
27
28
  from kiln_ai.datamodel.rag import RagConfig
28
29
  from kiln_ai.datamodel.vector_store import (
29
30
  VectorStoreConfig,
30
- VectorStoreType,
31
31
  raise_exhaustive_enum_error,
32
32
  )
33
33
  from kiln_ai.utils.config import Config
34
34
  from kiln_ai.utils.env import temporary_env
35
- from kiln_ai.utils.uuid import string_to_uuid
36
35
 
37
36
  logger = logging.getLogger(__name__)
38
37
 
@@ -48,6 +47,7 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
48
47
  self,
49
48
  rag_config: RagConfig,
50
49
  vector_store_config: VectorStoreConfig,
50
+ lancedb_vector_store: LanceDBVectorStore | None = None,
51
51
  ):
52
52
  super().__init__(rag_config, vector_store_config)
53
53
  self.config_properties = self.vector_store_config.lancedb_properties
@@ -56,17 +56,15 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
56
56
  if vector_store_config.lancedb_properties.nprobes is not None:
57
57
  kwargs["nprobes"] = vector_store_config.lancedb_properties.nprobes
58
58
 
59
- self.lancedb_vector_store = LanceDBVectorStore(
60
- mode="create",
61
- uri=LanceDBAdapter.lancedb_path_for_config(rag_config),
62
- query_type=self.query_type,
63
- overfetch_factor=vector_store_config.lancedb_properties.overfetch_factor,
64
- vector_column_name=vector_store_config.lancedb_properties.vector_column_name,
65
- text_key=vector_store_config.lancedb_properties.text_key,
66
- doc_id_key=vector_store_config.lancedb_properties.doc_id_key,
67
- **kwargs,
59
+ # allow overriding the vector store with a custom one, useful for user loading into an arbitrary
60
+ # deployment
61
+ self.lancedb_vector_store = (
62
+ lancedb_vector_store
63
+ or lancedb_construct_from_config(
64
+ vector_store_config,
65
+ uri=LanceDBAdapter.lancedb_path_for_config(rag_config),
66
+ )
68
67
  )
69
-
70
68
  self._index = None
71
69
 
72
70
  @property
@@ -149,7 +147,7 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
149
147
 
150
148
  chunk_count_for_document = len(chunks)
151
149
  deterministic_chunk_ids = [
152
- self.compute_deterministic_chunk_id(document_id, chunk_idx)
150
+ deterministic_chunk_id(document_id, chunk_idx)
153
151
  for chunk_idx in range(chunk_count_for_document)
154
152
  ]
155
153
 
@@ -176,42 +174,12 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
176
174
  zip(chunks_text, embeddings)
177
175
  ):
178
176
  node_batch.append(
179
- TextNode(
180
- id_=deterministic_chunk_ids[chunk_idx],
177
+ convert_to_llama_index_node(
178
+ document_id=document_id,
179
+ chunk_idx=chunk_idx,
180
+ node_id=deterministic_chunk_id(document_id, chunk_idx),
181
181
  text=chunk_text,
182
- embedding=embedding.vector,
183
- metadata={
184
- # metadata is populated by some internal llama_index logic
185
- # that uses for example the source_node relationship
186
- "kiln_doc_id": document_id,
187
- "kiln_chunk_idx": chunk_idx,
188
- #
189
- # llama_index lancedb vector store automatically sets these metadata:
190
- # "doc_id": "UUID node_id of the Source Node relationship",
191
- # "document_id": "UUID node_id of the Source Node relationship",
192
- # "ref_doc_id": "UUID node_id of the Source Node relationship"
193
- #
194
- # llama_index file loaders set these metadata, which would be useful to also support:
195
- # "creation_date": "2025-09-03",
196
- # "file_name": "file.pdf",
197
- # "file_path": "/absolute/path/to/the/file.pdf",
198
- # "file_size": 395154,
199
- # "file_type": "application\/pdf",
200
- # "last_modified_date": "2025-09-03",
201
- # "page_label": "1",
202
- },
203
- relationships={
204
- # when using the llama_index loaders, llama_index groups Nodes under Documents
205
- # and relationships point to the Document (which is also a Node), which confusingly
206
- # enough does not map to an actual file (for a PDF, a Document is a page of the PDF)
207
- # the Document structure is not something that is persisted, so it is fine here
208
- # if we have a relationship to a node_id that does not exist in the db
209
- NodeRelationship.SOURCE: RelatedNodeInfo(
210
- node_id=document_id,
211
- node_type="1",
212
- metadata={},
213
- ),
214
- },
182
+ vector=embedding.vector,
215
183
  )
216
184
  )
217
185
 
@@ -330,10 +298,6 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
330
298
  return []
331
299
  raise
332
300
 
333
- def compute_deterministic_chunk_id(self, document_id: str, chunk_idx: int) -> str:
334
- # the id_ of the Node must be a UUID string, otherwise llama_index / LanceDB fails downstream
335
- return str(string_to_uuid(f"{document_id}::{chunk_idx}"))
336
-
337
301
  async def count_records(self) -> int:
338
302
  try:
339
303
  table = self.lancedb_vector_store.table
@@ -346,15 +310,7 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
346
310
 
347
311
  @property
348
312
  def query_type(self) -> Literal["fts", "hybrid", "vector"]:
349
- match self.vector_store_config.store_type:
350
- case VectorStoreType.LANCE_DB_FTS:
351
- return "fts"
352
- case VectorStoreType.LANCE_DB_HYBRID:
353
- return "hybrid"
354
- case VectorStoreType.LANCE_DB_VECTOR:
355
- return "vector"
356
- case _:
357
- raise_exhaustive_enum_error(self.vector_store_config.store_type)
313
+ return store_type_to_lancedb_query_type(self.vector_store_config.store_type)
358
314
 
359
315
  @staticmethod
360
316
  def lancedb_path_for_config(rag_config: RagConfig) -> str:
@@ -380,9 +336,7 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
380
336
  kiln_doc_id = row["metadata"]["kiln_doc_id"]
381
337
  if kiln_doc_id not in document_ids:
382
338
  kiln_chunk_idx = row["metadata"]["kiln_chunk_idx"]
383
- record_id = self.compute_deterministic_chunk_id(
384
- kiln_doc_id, kiln_chunk_idx
385
- )
339
+ record_id = deterministic_chunk_id(kiln_doc_id, kiln_chunk_idx)
386
340
  rows_to_delete.append(record_id)
387
341
 
388
342
  if rows_to_delete:
@@ -0,0 +1,101 @@
1
+ from typing import Any, Dict, List, Literal
2
+
3
+ from llama_index.core.schema import NodeRelationship, RelatedNodeInfo, TextNode
4
+ from llama_index.vector_stores.lancedb import LanceDBVectorStore
5
+
6
+ from kiln_ai.datamodel.vector_store import (
7
+ VectorStoreConfig,
8
+ VectorStoreType,
9
+ raise_exhaustive_enum_error,
10
+ )
11
+ from kiln_ai.utils.uuid import string_to_uuid
12
+
13
+
14
+ def store_type_to_lancedb_query_type(
15
+ store_type: VectorStoreType,
16
+ ) -> Literal["fts", "hybrid", "vector"]:
17
+ match store_type:
18
+ case VectorStoreType.LANCE_DB_FTS:
19
+ return "fts"
20
+ case VectorStoreType.LANCE_DB_HYBRID:
21
+ return "hybrid"
22
+ case VectorStoreType.LANCE_DB_VECTOR:
23
+ return "vector"
24
+ case _:
25
+ raise_exhaustive_enum_error(store_type)
26
+
27
+
28
+ def lancedb_construct_from_config(
29
+ vector_store_config: VectorStoreConfig,
30
+ uri: str,
31
+ **extra_params: Any,
32
+ ) -> LanceDBVectorStore:
33
+ """Construct a LanceDBVectorStore from a VectorStoreConfig."""
34
+ kwargs: Dict[str, Any] = {**extra_params}
35
+ if (
36
+ vector_store_config.lancedb_properties.nprobes is not None
37
+ and "nprobes" not in kwargs
38
+ ):
39
+ kwargs["nprobes"] = vector_store_config.lancedb_properties.nprobes
40
+
41
+ return LanceDBVectorStore(
42
+ mode="create",
43
+ query_type=store_type_to_lancedb_query_type(vector_store_config.store_type),
44
+ overfetch_factor=vector_store_config.lancedb_properties.overfetch_factor,
45
+ vector_column_name=vector_store_config.lancedb_properties.vector_column_name,
46
+ text_key=vector_store_config.lancedb_properties.text_key,
47
+ doc_id_key=vector_store_config.lancedb_properties.doc_id_key,
48
+ uri=uri,
49
+ **kwargs,
50
+ )
51
+
52
+
53
+ def convert_to_llama_index_node(
54
+ document_id: str,
55
+ chunk_idx: int,
56
+ node_id: str,
57
+ text: str,
58
+ vector: List[float],
59
+ ) -> TextNode:
60
+ return TextNode(
61
+ id_=node_id,
62
+ text=text,
63
+ embedding=vector,
64
+ metadata={
65
+ # metadata is populated by some internal llama_index logic
66
+ # that uses for example the source_node relationship
67
+ "kiln_doc_id": document_id,
68
+ "kiln_chunk_idx": chunk_idx,
69
+ #
70
+ # llama_index lancedb vector store automatically sets these metadata:
71
+ # "doc_id": "UUID node_id of the Source Node relationship",
72
+ # "document_id": "UUID node_id of the Source Node relationship",
73
+ # "ref_doc_id": "UUID node_id of the Source Node relationship"
74
+ #
75
+ # llama_index file loaders set these metadata, which would be useful to also support:
76
+ # "creation_date": "2025-09-03",
77
+ # "file_name": "file.pdf",
78
+ # "file_path": "/absolute/path/to/the/file.pdf",
79
+ # "file_size": 395154,
80
+ # "file_type": "application\/pdf",
81
+ # "last_modified_date": "2025-09-03",
82
+ # "page_label": "1",
83
+ },
84
+ relationships={
85
+ # when using the llama_index loaders, llama_index groups Nodes under Documents
86
+ # and relationships point to the Document (which is also a Node), which confusingly
87
+ # enough does not map to an actual file (for a PDF, a Document is a page of the PDF)
88
+ # the Document structure is not something that is persisted, so it is fine here
89
+ # if we have a relationship to a node_id that does not exist in the db
90
+ NodeRelationship.SOURCE: RelatedNodeInfo(
91
+ node_id=document_id,
92
+ node_type="1",
93
+ metadata={},
94
+ ),
95
+ },
96
+ )
97
+
98
+
99
+ def deterministic_chunk_id(document_id: str, chunk_idx: int) -> str:
100
+ # the id_ of the Node must be a UUID string, otherwise llama_index / LanceDB fails downstream
101
+ return str(string_to_uuid(f"{document_id}::{chunk_idx}"))
@@ -17,6 +17,7 @@ from kiln_ai.adapters.vector_store.base_vector_store_adapter import (
17
17
  VectorStoreQuery,
18
18
  )
19
19
  from kiln_ai.adapters.vector_store.lancedb_adapter import LanceDBAdapter
20
+ from kiln_ai.adapters.vector_store.lancedb_helpers import deterministic_chunk_id
20
21
  from kiln_ai.adapters.vector_store.vector_store_registry import (
21
22
  vector_store_adapter_for_config,
22
23
  )
@@ -925,9 +926,7 @@ async def test_get_nodes_by_ids_functionality(
925
926
  await adapter.add_chunks_with_embeddings([mock_chunked_documents[0]]) # doc_001
926
927
 
927
928
  # Test getting nodes by IDs - compute expected IDs
928
- expected_ids = [
929
- adapter.compute_deterministic_chunk_id("doc_001", i) for i in range(4)
930
- ]
929
+ expected_ids = [deterministic_chunk_id("doc_001", i) for i in range(4)]
931
930
 
932
931
  # Get nodes by IDs
933
932
  retrieved_nodes = await adapter.get_nodes_by_ids(expected_ids)
@@ -943,7 +942,7 @@ async def test_get_nodes_by_ids_functionality(
943
942
  assert len(node.get_content()) > 0
944
943
 
945
944
  # Test with non-existent IDs
946
- fake_ids = [adapter.compute_deterministic_chunk_id("fake_doc", i) for i in range(2)]
945
+ fake_ids = [deterministic_chunk_id("fake_doc", i) for i in range(2)]
947
946
  retrieved_fake = await adapter.get_nodes_by_ids(fake_ids)
948
947
  assert len(retrieved_fake) == 0
949
948
 
@@ -1019,7 +1018,7 @@ async def test_uuid_scheme_retrieval_and_node_properties(
1019
1018
  # Test the UUID scheme: document_id::chunk_idx
1020
1019
  for chunk_idx in range(4):
1021
1020
  # Compute expected ID using the same scheme as the adapter
1022
- expected_id = adapter.compute_deterministic_chunk_id("doc_001", chunk_idx)
1021
+ expected_id = deterministic_chunk_id("doc_001", chunk_idx)
1023
1022
 
1024
1023
  # Retrieve the specific node by ID
1025
1024
  retrieved_nodes = await adapter.get_nodes_by_ids([expected_id])
@@ -1053,7 +1052,7 @@ async def test_uuid_scheme_retrieval_and_node_properties(
1053
1052
 
1054
1053
  # Test retrieval of doc_002 chunks
1055
1054
  for chunk_idx in range(4):
1056
- expected_id = adapter.compute_deterministic_chunk_id("doc_002", chunk_idx)
1055
+ expected_id = deterministic_chunk_id("doc_002", chunk_idx)
1057
1056
  retrieved_nodes = await adapter.get_nodes_by_ids([expected_id])
1058
1057
  assert len(retrieved_nodes) == 1
1059
1058
 
@@ -1080,25 +1079,19 @@ async def test_deterministic_chunk_id_consistency(
1080
1079
  create_rag_config_factory,
1081
1080
  ):
1082
1081
  """Test that the deterministic chunk ID generation is consistent."""
1083
- rag_config = create_rag_config_factory(fts_vector_store_config, embedding_config)
1084
-
1085
- adapter = LanceDBAdapter(
1086
- rag_config,
1087
- fts_vector_store_config,
1088
- )
1089
1082
 
1090
1083
  # Test that the same document_id and chunk_idx always produce the same UUID
1091
1084
  doc_id = "test_doc_123"
1092
1085
  chunk_idx = 5
1093
1086
 
1094
- id1 = adapter.compute_deterministic_chunk_id(doc_id, chunk_idx)
1095
- id2 = adapter.compute_deterministic_chunk_id(doc_id, chunk_idx)
1087
+ id1 = deterministic_chunk_id(doc_id, chunk_idx)
1088
+ id2 = deterministic_chunk_id(doc_id, chunk_idx)
1096
1089
 
1097
1090
  assert id1 == id2
1098
1091
 
1099
1092
  # Test that different inputs produce different UUIDs
1100
- id3 = adapter.compute_deterministic_chunk_id(doc_id, chunk_idx + 1)
1101
- id4 = adapter.compute_deterministic_chunk_id(doc_id + "_different", chunk_idx)
1093
+ id3 = deterministic_chunk_id(doc_id, chunk_idx + 1)
1094
+ id4 = deterministic_chunk_id(doc_id + "_different", chunk_idx)
1102
1095
 
1103
1096
  assert id1 != id3
1104
1097
  assert id1 != id4