kiln-ai 0.22.0__tar.gz → 0.22.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/PKG-INFO +77 -1
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/README.md +76 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/litellm_adapter.py +6 -2
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/lancedb_adapter.py +24 -70
- kiln_ai-0.22.1/kiln_ai/adapters/vector_store/lancedb_helpers.py +101 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/test_lancedb_adapter.py +9 -16
- kiln_ai-0.22.1/kiln_ai/adapters/vector_store/test_lancedb_helpers.py +142 -0
- kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/__init__.py +0 -0
- kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/test_lancedb_loader.py +282 -0
- kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/test_vector_store_loader.py +544 -0
- kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/vector_store_loader.py +91 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/tool_id.py +13 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/base_tool.py +18 -3
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/kiln_task_tool.py +6 -2
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/mcp_server_tool.py +6 -4
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/rag_tools.py +7 -3
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/pyproject.toml +1 -1
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/.gitignore +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/.python-version +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/LICENSE.txt +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/index.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/data_gen/data_gen_task.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/data_gen.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/base_eval.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/eval_runner.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/g_eval.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/registry.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/base_finetune.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/dataset_formatter.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/finetune_registry.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/openai_finetune.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/ml_model_list.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/base_adapter.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/litellm_adapter.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/prompt_builders.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/repair/repair_task.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/repair.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel/dataset_split.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel/eval.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel/strict_mode.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/utils/config.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/utils/formatting.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/utils.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai.html +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/search.js +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/adapter_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chat/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chat/chat_formatter.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chat/test_chat_formatter.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/base_chunker.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/chunker_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/fixed_window_chunker.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/helpers.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_base_chunker.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_chunker_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_helpers.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/data_gen_prompts.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/data_gen_task.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/test_data_gen_task.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/docker_model_runner_tools.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/base_embedding_adapter.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/embedding_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/litellm_embedding_adapter.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/test_base_embedding_adapter.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/test_embedding_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/base_eval.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/eval_runner.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/g_eval.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_base_eval.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_eval_runner.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_g_eval.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_g_eval_data.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/base_extractor.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/encoding.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/extractor_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/extractor_runner.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/litellm_extractor.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_base_extractor.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_encoding.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_extractor_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_extractor_runner.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_litellm_extractor.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/base_finetune.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/dataset_formatter.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/finetune_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/fireworks_finetune.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/openai_finetune.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_base_finetune.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_dataset_formatter.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_openai_finetune.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_together_finetune.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_vertex_finetune.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/together_finetune.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/vertex_finetune.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/ml_embedding_model_list.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/ml_model_list.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/base_adapter.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/litellm_config.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_base_adapter.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_litellm_adapter.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_structured_output.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/ollama_tools.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/base_parser.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/json_parser.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/parser_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/r1_parser.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/request_formatters.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_json_parser.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_parser_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_r1_parser.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_request_formatters.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/prompt_builders.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/provider_tools.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/deduplication.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/progress.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/rag_runners.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/test_deduplication.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/test_progress.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/test_rag_runners.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/remote_config.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/repair/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/repair/repair_task.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/repair/test_repair_task.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/run_output.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_adapter_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_docker_model_runner_tools.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_ml_embedding_model_list.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_ml_model_list.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_ollama_tools.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_prompt_adaptors.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_prompt_builders.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_provider_tools.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_remote_config.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/base_vector_store_adapter.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/test_base_vector_store.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/test_vector_store_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/vector_store_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/basemodel.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/chunk.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/datamodel_enums.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/dataset_filters.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/dataset_split.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/embedding.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/eval.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/external_tool_server.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/extraction.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/finetune.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/json_schema.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/model_cache.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/project.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/prompt.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/prompt_id.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/rag.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/run_config.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/strict_mode.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/task.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/task_output.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/task_run.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_attachment.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_basemodel.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_chunk_models.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_dataset_filters.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_dataset_split.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_datasource.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_embedding_models.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_eval_model.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_example_models.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_external_tool_server.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_extraction_chunk.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_extraction_model.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_json_schema.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_model_cache.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_model_perf.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_models.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_nested_save.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_output_rating.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_prompt_id.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_rag.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_task.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_tool_id.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_vector_store.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/vector_store.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/built_in_tools/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/built_in_tools/math_tools.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/built_in_tools/test_math_tools.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/mcp_session_manager.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_base_tools.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_kiln_task_tool.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_mcp_server_tool.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_mcp_session_manager.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_rag_tools.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_tool_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/tools/tool_registry.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/__init__.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/async_job_runner.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/config.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/dataset_import.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/env.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/exhaustive_error.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/filesystem.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/filesystem_cache.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/formatting.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/litellm.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/lock.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/logging.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/mime_type.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/name_generator.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/open_ai_types.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/pdf_utils.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/project_utils.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_async_job_runner.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_config.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_dataset_import.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_env.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_filesystem_cache.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_litellm.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_lock.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_mime_type.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_name_geneator.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_open_ai_types.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_pdf_utils.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_uuid.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_validation.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/uuid.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/kiln_ai/utils/validation.py +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/setup.cfg +0 -0
- {kiln_ai-0.22.0 → kiln_ai-0.22.1}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kiln-ai
|
|
3
|
-
Version: 0.22.
|
|
3
|
+
Version: 0.22.1
|
|
4
4
|
Summary: Kiln AI
|
|
5
5
|
Project-URL: Homepage, https://kiln.tech
|
|
6
6
|
Project-URL: Repository, https://github.com/Kiln-AI/kiln
|
|
@@ -85,6 +85,10 @@ The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kil
|
|
|
85
85
|
- [Building and Running a Kiln Task from Code](#building-and-running-a-kiln-task-from-code)
|
|
86
86
|
- [Tagging Task Runs Programmatically](#tagging-task-runs-programmatically)
|
|
87
87
|
- [Adding Custom Model or AI Provider from Code](#adding-custom-model-or-ai-provider-from-code)
|
|
88
|
+
- [Taking Kiln RAG to production](#taking-kiln-rag-to-production)
|
|
89
|
+
- [Load a LlamaIndex Vector Store](#load-a-llamaindex-vector-store)
|
|
90
|
+
- [Example: LanceDB Cloud](#example-lancedb-cloud)
|
|
91
|
+
- [Deploy RAG without LlamaIndex](#deploy-rag-without-llamaindex)t
|
|
88
92
|
- [Full API Reference](#full-api-reference)
|
|
89
93
|
|
|
90
94
|
## Installation
|
|
@@ -352,6 +356,78 @@ custom_model_ids.append(new_model)
|
|
|
352
356
|
Config.shared().custom_models = custom_model_ids
|
|
353
357
|
```
|
|
354
358
|
|
|
359
|
+
## Taking Kiln RAG to production
|
|
360
|
+
|
|
361
|
+
When you're ready to deploy your RAG system, you can export your processed documents to any vector store supported by LlamaIndex. This allows you to use your Kiln-configured chunking and embedding settings in production.
|
|
362
|
+
|
|
363
|
+
### Load a LlamaIndex Vector Store
|
|
364
|
+
|
|
365
|
+
Kiln provides a `VectorStoreLoader` that yields your processed document chunks as LlamaIndex `TextNode` objects. These nodes contain the same metadata, chunking and embedding data as your Kiln Search Tool configuration.
|
|
366
|
+
|
|
367
|
+
```py
|
|
368
|
+
from kiln_ai.datamodel import Project
|
|
369
|
+
from kiln_ai.datamodel.rag import RagConfig
|
|
370
|
+
from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
|
|
371
|
+
|
|
372
|
+
# Load your project and RAG configuration
|
|
373
|
+
project = Project.load_from_file("path/to/your/project.kiln")
|
|
374
|
+
rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
|
|
375
|
+
|
|
376
|
+
# Create the loader
|
|
377
|
+
loader = VectorStoreLoader(project=project, rag_config=rag_config)
|
|
378
|
+
|
|
379
|
+
# Export chunks to any LlamaIndex vector store
|
|
380
|
+
async for batch in loader.iter_llama_index_nodes(batch_size=10):
|
|
381
|
+
# Insert into your chosen vector store
|
|
382
|
+
# Examples: LanceDB, Pinecone, Chroma, Qdrant, etc.
|
|
383
|
+
pass
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
**Supported Vector Stores:** LlamaIndex supports 20+ vector stores including LanceDB, Pinecone, Weaviate, Chroma, Qdrant, and more. See the [full list](https://developers.llamaindex.ai/python/framework/module_guides/storing/vector_stores/).
|
|
387
|
+
|
|
388
|
+
### Example: LanceDB Cloud
|
|
389
|
+
|
|
390
|
+
Internally Kiln uses LanceDB. By using LanceDB cloud you'll get the same indexing behaviour as in app.
|
|
391
|
+
|
|
392
|
+
Here's a complete example using LanceDB Cloud:
|
|
393
|
+
|
|
394
|
+
```py
|
|
395
|
+
from kiln_ai.datamodel import Project
|
|
396
|
+
from kiln_ai.datamodel.rag import RagConfig
|
|
397
|
+
from kiln_ai.datamodel.vector_store import VectorStoreConfig
|
|
398
|
+
from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
|
|
399
|
+
from kiln_ai.adapters.vector_store.lancedb_adapter import lancedb_construct_from_config
|
|
400
|
+
|
|
401
|
+
# Load configurations
|
|
402
|
+
project = Project.load_from_file("path/to/your/project.kiln")
|
|
403
|
+
rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
|
|
404
|
+
vector_store_config = VectorStoreConfig.from_id_and_parent_path(
|
|
405
|
+
rag_config.vector_store_config_id, project.path,
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
# Create LanceDB vector store
|
|
409
|
+
lancedb_store = lancedb_construct_from_config(
|
|
410
|
+
vector_store_config=vector_store_config,
|
|
411
|
+
uri="db://my-project",
|
|
412
|
+
api_key="sk_...",
|
|
413
|
+
region="us-east-1",
|
|
414
|
+
table_name="my-documents", # Created automatically
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
# Export and insert your documents
|
|
418
|
+
loader = VectorStoreLoader(project=project, rag_config=rag_config)
|
|
419
|
+
async for batch in loader.iter_llama_index_nodes(batch_size=100):
|
|
420
|
+
await lancedb_store.async_add(batch)
|
|
421
|
+
|
|
422
|
+
print("Documents successfully exported to LanceDB!")
|
|
423
|
+
```
|
|
424
|
+
|
|
425
|
+
After export, query your data using [LlamaIndex](https://developers.llamaindex.ai/python/framework-api-reference/storage/vector_store/lancedb/) or the [LanceDB client](https://lancedb.github.io/lancedb/).
|
|
426
|
+
|
|
427
|
+
### Deploy RAG without LlamaIndex
|
|
428
|
+
|
|
429
|
+
While Kiln is designed for deploying to LlamaIndex, you don't need to use it. The `iter_llama_index_nodes` returns a `TextNode` object which includes all the data you need to build a RAG index in any stack: embedding, text, document name, chunk ID, etc.
|
|
430
|
+
|
|
355
431
|
## Full API Reference
|
|
356
432
|
|
|
357
433
|
The library can do a lot more than the examples we've shown here.
|
|
@@ -43,6 +43,10 @@ The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kil
|
|
|
43
43
|
- [Building and Running a Kiln Task from Code](#building-and-running-a-kiln-task-from-code)
|
|
44
44
|
- [Tagging Task Runs Programmatically](#tagging-task-runs-programmatically)
|
|
45
45
|
- [Adding Custom Model or AI Provider from Code](#adding-custom-model-or-ai-provider-from-code)
|
|
46
|
+
- [Taking Kiln RAG to production](#taking-kiln-rag-to-production)
|
|
47
|
+
- [Load a LlamaIndex Vector Store](#load-a-llamaindex-vector-store)
|
|
48
|
+
- [Example: LanceDB Cloud](#example-lancedb-cloud)
|
|
49
|
+
- [Deploy RAG without LlamaIndex](#deploy-rag-without-llamaindex)t
|
|
46
50
|
- [Full API Reference](#full-api-reference)
|
|
47
51
|
|
|
48
52
|
## Installation
|
|
@@ -310,6 +314,78 @@ custom_model_ids.append(new_model)
|
|
|
310
314
|
Config.shared().custom_models = custom_model_ids
|
|
311
315
|
```
|
|
312
316
|
|
|
317
|
+
## Taking Kiln RAG to production
|
|
318
|
+
|
|
319
|
+
When you're ready to deploy your RAG system, you can export your processed documents to any vector store supported by LlamaIndex. This allows you to use your Kiln-configured chunking and embedding settings in production.
|
|
320
|
+
|
|
321
|
+
### Load a LlamaIndex Vector Store
|
|
322
|
+
|
|
323
|
+
Kiln provides a `VectorStoreLoader` that yields your processed document chunks as LlamaIndex `TextNode` objects. These nodes contain the same metadata, chunking and embedding data as your Kiln Search Tool configuration.
|
|
324
|
+
|
|
325
|
+
```py
|
|
326
|
+
from kiln_ai.datamodel import Project
|
|
327
|
+
from kiln_ai.datamodel.rag import RagConfig
|
|
328
|
+
from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
|
|
329
|
+
|
|
330
|
+
# Load your project and RAG configuration
|
|
331
|
+
project = Project.load_from_file("path/to/your/project.kiln")
|
|
332
|
+
rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
|
|
333
|
+
|
|
334
|
+
# Create the loader
|
|
335
|
+
loader = VectorStoreLoader(project=project, rag_config=rag_config)
|
|
336
|
+
|
|
337
|
+
# Export chunks to any LlamaIndex vector store
|
|
338
|
+
async for batch in loader.iter_llama_index_nodes(batch_size=10):
|
|
339
|
+
# Insert into your chosen vector store
|
|
340
|
+
# Examples: LanceDB, Pinecone, Chroma, Qdrant, etc.
|
|
341
|
+
pass
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
**Supported Vector Stores:** LlamaIndex supports 20+ vector stores including LanceDB, Pinecone, Weaviate, Chroma, Qdrant, and more. See the [full list](https://developers.llamaindex.ai/python/framework/module_guides/storing/vector_stores/).
|
|
345
|
+
|
|
346
|
+
### Example: LanceDB Cloud
|
|
347
|
+
|
|
348
|
+
Internally Kiln uses LanceDB. By using LanceDB cloud you'll get the same indexing behaviour as in app.
|
|
349
|
+
|
|
350
|
+
Here's a complete example using LanceDB Cloud:
|
|
351
|
+
|
|
352
|
+
```py
|
|
353
|
+
from kiln_ai.datamodel import Project
|
|
354
|
+
from kiln_ai.datamodel.rag import RagConfig
|
|
355
|
+
from kiln_ai.datamodel.vector_store import VectorStoreConfig
|
|
356
|
+
from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
|
|
357
|
+
from kiln_ai.adapters.vector_store.lancedb_adapter import lancedb_construct_from_config
|
|
358
|
+
|
|
359
|
+
# Load configurations
|
|
360
|
+
project = Project.load_from_file("path/to/your/project.kiln")
|
|
361
|
+
rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
|
|
362
|
+
vector_store_config = VectorStoreConfig.from_id_and_parent_path(
|
|
363
|
+
rag_config.vector_store_config_id, project.path,
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
# Create LanceDB vector store
|
|
367
|
+
lancedb_store = lancedb_construct_from_config(
|
|
368
|
+
vector_store_config=vector_store_config,
|
|
369
|
+
uri="db://my-project",
|
|
370
|
+
api_key="sk_...",
|
|
371
|
+
region="us-east-1",
|
|
372
|
+
table_name="my-documents", # Created automatically
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
# Export and insert your documents
|
|
376
|
+
loader = VectorStoreLoader(project=project, rag_config=rag_config)
|
|
377
|
+
async for batch in loader.iter_llama_index_nodes(batch_size=100):
|
|
378
|
+
await lancedb_store.async_add(batch)
|
|
379
|
+
|
|
380
|
+
print("Documents successfully exported to LanceDB!")
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
After export, query your data using [LlamaIndex](https://developers.llamaindex.ai/python/framework-api-reference/storage/vector_store/lancedb/) or the [LanceDB client](https://lancedb.github.io/lancedb/).
|
|
384
|
+
|
|
385
|
+
### Deploy RAG without LlamaIndex
|
|
386
|
+
|
|
387
|
+
While Kiln is designed for deploying to LlamaIndex, you don't need to use it. The `iter_llama_index_nodes` returns a `TextNode` object which includes all the data you need to build a RAG index in any stack: embedding, text, document name, chunk ID, etc.
|
|
388
|
+
|
|
313
389
|
## Full API Reference
|
|
314
390
|
|
|
315
391
|
The library can do a lot more than the examples we've shown here.
|
|
@@ -31,7 +31,11 @@ from kiln_ai.adapters.model_adapters.base_adapter import (
|
|
|
31
31
|
)
|
|
32
32
|
from kiln_ai.adapters.model_adapters.litellm_config import LiteLlmConfig
|
|
33
33
|
from kiln_ai.datamodel.json_schema import validate_schema_with_value_error
|
|
34
|
-
from kiln_ai.tools.base_tool import
|
|
34
|
+
from kiln_ai.tools.base_tool import (
|
|
35
|
+
KilnToolInterface,
|
|
36
|
+
ToolCallContext,
|
|
37
|
+
ToolCallDefinition,
|
|
38
|
+
)
|
|
35
39
|
from kiln_ai.tools.kiln_task_tool import KilnTaskToolResult
|
|
36
40
|
from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
|
|
37
41
|
from kiln_ai.utils.litellm import get_litellm_provider_info
|
|
@@ -560,7 +564,7 @@ class LiteLlmAdapter(BaseAdapter):
|
|
|
560
564
|
self._cached_available_tools = await self.available_tools()
|
|
561
565
|
return self._cached_available_tools
|
|
562
566
|
|
|
563
|
-
async def litellm_tools(self) -> list[
|
|
567
|
+
async def litellm_tools(self) -> list[ToolCallDefinition]:
|
|
564
568
|
available_tools = await self.cached_available_tools()
|
|
565
569
|
|
|
566
570
|
# LiteLLM takes the standard OpenAI-compatible tool call format
|
|
@@ -5,12 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
from typing import Any, Dict, List, Literal, Optional, Set, TypedDict
|
|
6
6
|
|
|
7
7
|
from llama_index.core import StorageContext, VectorStoreIndex
|
|
8
|
-
from llama_index.core.schema import
|
|
9
|
-
BaseNode,
|
|
10
|
-
NodeRelationship,
|
|
11
|
-
RelatedNodeInfo,
|
|
12
|
-
TextNode,
|
|
13
|
-
)
|
|
8
|
+
from llama_index.core.schema import BaseNode, TextNode
|
|
14
9
|
from llama_index.core.vector_stores.types import (
|
|
15
10
|
VectorStoreQuery as LlamaIndexVectorStoreQuery,
|
|
16
11
|
)
|
|
@@ -24,15 +19,19 @@ from kiln_ai.adapters.vector_store.base_vector_store_adapter import (
|
|
|
24
19
|
SearchResult,
|
|
25
20
|
VectorStoreQuery,
|
|
26
21
|
)
|
|
22
|
+
from kiln_ai.adapters.vector_store.lancedb_helpers import (
|
|
23
|
+
convert_to_llama_index_node,
|
|
24
|
+
deterministic_chunk_id,
|
|
25
|
+
lancedb_construct_from_config,
|
|
26
|
+
store_type_to_lancedb_query_type,
|
|
27
|
+
)
|
|
27
28
|
from kiln_ai.datamodel.rag import RagConfig
|
|
28
29
|
from kiln_ai.datamodel.vector_store import (
|
|
29
30
|
VectorStoreConfig,
|
|
30
|
-
VectorStoreType,
|
|
31
31
|
raise_exhaustive_enum_error,
|
|
32
32
|
)
|
|
33
33
|
from kiln_ai.utils.config import Config
|
|
34
34
|
from kiln_ai.utils.env import temporary_env
|
|
35
|
-
from kiln_ai.utils.uuid import string_to_uuid
|
|
36
35
|
|
|
37
36
|
logger = logging.getLogger(__name__)
|
|
38
37
|
|
|
@@ -48,6 +47,7 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
|
|
|
48
47
|
self,
|
|
49
48
|
rag_config: RagConfig,
|
|
50
49
|
vector_store_config: VectorStoreConfig,
|
|
50
|
+
lancedb_vector_store: LanceDBVectorStore | None = None,
|
|
51
51
|
):
|
|
52
52
|
super().__init__(rag_config, vector_store_config)
|
|
53
53
|
self.config_properties = self.vector_store_config.lancedb_properties
|
|
@@ -56,17 +56,15 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
|
|
|
56
56
|
if vector_store_config.lancedb_properties.nprobes is not None:
|
|
57
57
|
kwargs["nprobes"] = vector_store_config.lancedb_properties.nprobes
|
|
58
58
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
**kwargs,
|
|
59
|
+
# allow overriding the vector store with a custom one, useful for user loading into an arbitrary
|
|
60
|
+
# deployment
|
|
61
|
+
self.lancedb_vector_store = (
|
|
62
|
+
lancedb_vector_store
|
|
63
|
+
or lancedb_construct_from_config(
|
|
64
|
+
vector_store_config,
|
|
65
|
+
uri=LanceDBAdapter.lancedb_path_for_config(rag_config),
|
|
66
|
+
)
|
|
68
67
|
)
|
|
69
|
-
|
|
70
68
|
self._index = None
|
|
71
69
|
|
|
72
70
|
@property
|
|
@@ -149,7 +147,7 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
|
|
|
149
147
|
|
|
150
148
|
chunk_count_for_document = len(chunks)
|
|
151
149
|
deterministic_chunk_ids = [
|
|
152
|
-
|
|
150
|
+
deterministic_chunk_id(document_id, chunk_idx)
|
|
153
151
|
for chunk_idx in range(chunk_count_for_document)
|
|
154
152
|
]
|
|
155
153
|
|
|
@@ -176,42 +174,12 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
|
|
|
176
174
|
zip(chunks_text, embeddings)
|
|
177
175
|
):
|
|
178
176
|
node_batch.append(
|
|
179
|
-
|
|
180
|
-
|
|
177
|
+
convert_to_llama_index_node(
|
|
178
|
+
document_id=document_id,
|
|
179
|
+
chunk_idx=chunk_idx,
|
|
180
|
+
node_id=deterministic_chunk_id(document_id, chunk_idx),
|
|
181
181
|
text=chunk_text,
|
|
182
|
-
|
|
183
|
-
metadata={
|
|
184
|
-
# metadata is populated by some internal llama_index logic
|
|
185
|
-
# that uses for example the source_node relationship
|
|
186
|
-
"kiln_doc_id": document_id,
|
|
187
|
-
"kiln_chunk_idx": chunk_idx,
|
|
188
|
-
#
|
|
189
|
-
# llama_index lancedb vector store automatically sets these metadata:
|
|
190
|
-
# "doc_id": "UUID node_id of the Source Node relationship",
|
|
191
|
-
# "document_id": "UUID node_id of the Source Node relationship",
|
|
192
|
-
# "ref_doc_id": "UUID node_id of the Source Node relationship"
|
|
193
|
-
#
|
|
194
|
-
# llama_index file loaders set these metadata, which would be useful to also support:
|
|
195
|
-
# "creation_date": "2025-09-03",
|
|
196
|
-
# "file_name": "file.pdf",
|
|
197
|
-
# "file_path": "/absolute/path/to/the/file.pdf",
|
|
198
|
-
# "file_size": 395154,
|
|
199
|
-
# "file_type": "application\/pdf",
|
|
200
|
-
# "last_modified_date": "2025-09-03",
|
|
201
|
-
# "page_label": "1",
|
|
202
|
-
},
|
|
203
|
-
relationships={
|
|
204
|
-
# when using the llama_index loaders, llama_index groups Nodes under Documents
|
|
205
|
-
# and relationships point to the Document (which is also a Node), which confusingly
|
|
206
|
-
# enough does not map to an actual file (for a PDF, a Document is a page of the PDF)
|
|
207
|
-
# the Document structure is not something that is persisted, so it is fine here
|
|
208
|
-
# if we have a relationship to a node_id that does not exist in the db
|
|
209
|
-
NodeRelationship.SOURCE: RelatedNodeInfo(
|
|
210
|
-
node_id=document_id,
|
|
211
|
-
node_type="1",
|
|
212
|
-
metadata={},
|
|
213
|
-
),
|
|
214
|
-
},
|
|
182
|
+
vector=embedding.vector,
|
|
215
183
|
)
|
|
216
184
|
)
|
|
217
185
|
|
|
@@ -330,10 +298,6 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
|
|
|
330
298
|
return []
|
|
331
299
|
raise
|
|
332
300
|
|
|
333
|
-
def compute_deterministic_chunk_id(self, document_id: str, chunk_idx: int) -> str:
|
|
334
|
-
# the id_ of the Node must be a UUID string, otherwise llama_index / LanceDB fails downstream
|
|
335
|
-
return str(string_to_uuid(f"{document_id}::{chunk_idx}"))
|
|
336
|
-
|
|
337
301
|
async def count_records(self) -> int:
|
|
338
302
|
try:
|
|
339
303
|
table = self.lancedb_vector_store.table
|
|
@@ -346,15 +310,7 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
|
|
|
346
310
|
|
|
347
311
|
@property
|
|
348
312
|
def query_type(self) -> Literal["fts", "hybrid", "vector"]:
|
|
349
|
-
|
|
350
|
-
case VectorStoreType.LANCE_DB_FTS:
|
|
351
|
-
return "fts"
|
|
352
|
-
case VectorStoreType.LANCE_DB_HYBRID:
|
|
353
|
-
return "hybrid"
|
|
354
|
-
case VectorStoreType.LANCE_DB_VECTOR:
|
|
355
|
-
return "vector"
|
|
356
|
-
case _:
|
|
357
|
-
raise_exhaustive_enum_error(self.vector_store_config.store_type)
|
|
313
|
+
return store_type_to_lancedb_query_type(self.vector_store_config.store_type)
|
|
358
314
|
|
|
359
315
|
@staticmethod
|
|
360
316
|
def lancedb_path_for_config(rag_config: RagConfig) -> str:
|
|
@@ -380,9 +336,7 @@ class LanceDBAdapter(BaseVectorStoreAdapter):
|
|
|
380
336
|
kiln_doc_id = row["metadata"]["kiln_doc_id"]
|
|
381
337
|
if kiln_doc_id not in document_ids:
|
|
382
338
|
kiln_chunk_idx = row["metadata"]["kiln_chunk_idx"]
|
|
383
|
-
record_id =
|
|
384
|
-
kiln_doc_id, kiln_chunk_idx
|
|
385
|
-
)
|
|
339
|
+
record_id = deterministic_chunk_id(kiln_doc_id, kiln_chunk_idx)
|
|
386
340
|
rows_to_delete.append(record_id)
|
|
387
341
|
|
|
388
342
|
if rows_to_delete:
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Literal
|
|
2
|
+
|
|
3
|
+
from llama_index.core.schema import NodeRelationship, RelatedNodeInfo, TextNode
|
|
4
|
+
from llama_index.vector_stores.lancedb import LanceDBVectorStore
|
|
5
|
+
|
|
6
|
+
from kiln_ai.datamodel.vector_store import (
|
|
7
|
+
VectorStoreConfig,
|
|
8
|
+
VectorStoreType,
|
|
9
|
+
raise_exhaustive_enum_error,
|
|
10
|
+
)
|
|
11
|
+
from kiln_ai.utils.uuid import string_to_uuid
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def store_type_to_lancedb_query_type(
|
|
15
|
+
store_type: VectorStoreType,
|
|
16
|
+
) -> Literal["fts", "hybrid", "vector"]:
|
|
17
|
+
match store_type:
|
|
18
|
+
case VectorStoreType.LANCE_DB_FTS:
|
|
19
|
+
return "fts"
|
|
20
|
+
case VectorStoreType.LANCE_DB_HYBRID:
|
|
21
|
+
return "hybrid"
|
|
22
|
+
case VectorStoreType.LANCE_DB_VECTOR:
|
|
23
|
+
return "vector"
|
|
24
|
+
case _:
|
|
25
|
+
raise_exhaustive_enum_error(store_type)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def lancedb_construct_from_config(
|
|
29
|
+
vector_store_config: VectorStoreConfig,
|
|
30
|
+
uri: str,
|
|
31
|
+
**extra_params: Any,
|
|
32
|
+
) -> LanceDBVectorStore:
|
|
33
|
+
"""Construct a LanceDBVectorStore from a VectorStoreConfig."""
|
|
34
|
+
kwargs: Dict[str, Any] = {**extra_params}
|
|
35
|
+
if (
|
|
36
|
+
vector_store_config.lancedb_properties.nprobes is not None
|
|
37
|
+
and "nprobes" not in kwargs
|
|
38
|
+
):
|
|
39
|
+
kwargs["nprobes"] = vector_store_config.lancedb_properties.nprobes
|
|
40
|
+
|
|
41
|
+
return LanceDBVectorStore(
|
|
42
|
+
mode="create",
|
|
43
|
+
query_type=store_type_to_lancedb_query_type(vector_store_config.store_type),
|
|
44
|
+
overfetch_factor=vector_store_config.lancedb_properties.overfetch_factor,
|
|
45
|
+
vector_column_name=vector_store_config.lancedb_properties.vector_column_name,
|
|
46
|
+
text_key=vector_store_config.lancedb_properties.text_key,
|
|
47
|
+
doc_id_key=vector_store_config.lancedb_properties.doc_id_key,
|
|
48
|
+
uri=uri,
|
|
49
|
+
**kwargs,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def convert_to_llama_index_node(
|
|
54
|
+
document_id: str,
|
|
55
|
+
chunk_idx: int,
|
|
56
|
+
node_id: str,
|
|
57
|
+
text: str,
|
|
58
|
+
vector: List[float],
|
|
59
|
+
) -> TextNode:
|
|
60
|
+
return TextNode(
|
|
61
|
+
id_=node_id,
|
|
62
|
+
text=text,
|
|
63
|
+
embedding=vector,
|
|
64
|
+
metadata={
|
|
65
|
+
# metadata is populated by some internal llama_index logic
|
|
66
|
+
# that uses for example the source_node relationship
|
|
67
|
+
"kiln_doc_id": document_id,
|
|
68
|
+
"kiln_chunk_idx": chunk_idx,
|
|
69
|
+
#
|
|
70
|
+
# llama_index lancedb vector store automatically sets these metadata:
|
|
71
|
+
# "doc_id": "UUID node_id of the Source Node relationship",
|
|
72
|
+
# "document_id": "UUID node_id of the Source Node relationship",
|
|
73
|
+
# "ref_doc_id": "UUID node_id of the Source Node relationship"
|
|
74
|
+
#
|
|
75
|
+
# llama_index file loaders set these metadata, which would be useful to also support:
|
|
76
|
+
# "creation_date": "2025-09-03",
|
|
77
|
+
# "file_name": "file.pdf",
|
|
78
|
+
# "file_path": "/absolute/path/to/the/file.pdf",
|
|
79
|
+
# "file_size": 395154,
|
|
80
|
+
# "file_type": "application\/pdf",
|
|
81
|
+
# "last_modified_date": "2025-09-03",
|
|
82
|
+
# "page_label": "1",
|
|
83
|
+
},
|
|
84
|
+
relationships={
|
|
85
|
+
# when using the llama_index loaders, llama_index groups Nodes under Documents
|
|
86
|
+
# and relationships point to the Document (which is also a Node), which confusingly
|
|
87
|
+
# enough does not map to an actual file (for a PDF, a Document is a page of the PDF)
|
|
88
|
+
# the Document structure is not something that is persisted, so it is fine here
|
|
89
|
+
# if we have a relationship to a node_id that does not exist in the db
|
|
90
|
+
NodeRelationship.SOURCE: RelatedNodeInfo(
|
|
91
|
+
node_id=document_id,
|
|
92
|
+
node_type="1",
|
|
93
|
+
metadata={},
|
|
94
|
+
),
|
|
95
|
+
},
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def deterministic_chunk_id(document_id: str, chunk_idx: int) -> str:
|
|
100
|
+
# the id_ of the Node must be a UUID string, otherwise llama_index / LanceDB fails downstream
|
|
101
|
+
return str(string_to_uuid(f"{document_id}::{chunk_idx}"))
|
|
@@ -17,6 +17,7 @@ from kiln_ai.adapters.vector_store.base_vector_store_adapter import (
|
|
|
17
17
|
VectorStoreQuery,
|
|
18
18
|
)
|
|
19
19
|
from kiln_ai.adapters.vector_store.lancedb_adapter import LanceDBAdapter
|
|
20
|
+
from kiln_ai.adapters.vector_store.lancedb_helpers import deterministic_chunk_id
|
|
20
21
|
from kiln_ai.adapters.vector_store.vector_store_registry import (
|
|
21
22
|
vector_store_adapter_for_config,
|
|
22
23
|
)
|
|
@@ -925,9 +926,7 @@ async def test_get_nodes_by_ids_functionality(
|
|
|
925
926
|
await adapter.add_chunks_with_embeddings([mock_chunked_documents[0]]) # doc_001
|
|
926
927
|
|
|
927
928
|
# Test getting nodes by IDs - compute expected IDs
|
|
928
|
-
expected_ids = [
|
|
929
|
-
adapter.compute_deterministic_chunk_id("doc_001", i) for i in range(4)
|
|
930
|
-
]
|
|
929
|
+
expected_ids = [deterministic_chunk_id("doc_001", i) for i in range(4)]
|
|
931
930
|
|
|
932
931
|
# Get nodes by IDs
|
|
933
932
|
retrieved_nodes = await adapter.get_nodes_by_ids(expected_ids)
|
|
@@ -943,7 +942,7 @@ async def test_get_nodes_by_ids_functionality(
|
|
|
943
942
|
assert len(node.get_content()) > 0
|
|
944
943
|
|
|
945
944
|
# Test with non-existent IDs
|
|
946
|
-
fake_ids = [
|
|
945
|
+
fake_ids = [deterministic_chunk_id("fake_doc", i) for i in range(2)]
|
|
947
946
|
retrieved_fake = await adapter.get_nodes_by_ids(fake_ids)
|
|
948
947
|
assert len(retrieved_fake) == 0
|
|
949
948
|
|
|
@@ -1019,7 +1018,7 @@ async def test_uuid_scheme_retrieval_and_node_properties(
|
|
|
1019
1018
|
# Test the UUID scheme: document_id::chunk_idx
|
|
1020
1019
|
for chunk_idx in range(4):
|
|
1021
1020
|
# Compute expected ID using the same scheme as the adapter
|
|
1022
|
-
expected_id =
|
|
1021
|
+
expected_id = deterministic_chunk_id("doc_001", chunk_idx)
|
|
1023
1022
|
|
|
1024
1023
|
# Retrieve the specific node by ID
|
|
1025
1024
|
retrieved_nodes = await adapter.get_nodes_by_ids([expected_id])
|
|
@@ -1053,7 +1052,7 @@ async def test_uuid_scheme_retrieval_and_node_properties(
|
|
|
1053
1052
|
|
|
1054
1053
|
# Test retrieval of doc_002 chunks
|
|
1055
1054
|
for chunk_idx in range(4):
|
|
1056
|
-
expected_id =
|
|
1055
|
+
expected_id = deterministic_chunk_id("doc_002", chunk_idx)
|
|
1057
1056
|
retrieved_nodes = await adapter.get_nodes_by_ids([expected_id])
|
|
1058
1057
|
assert len(retrieved_nodes) == 1
|
|
1059
1058
|
|
|
@@ -1080,25 +1079,19 @@ async def test_deterministic_chunk_id_consistency(
|
|
|
1080
1079
|
create_rag_config_factory,
|
|
1081
1080
|
):
|
|
1082
1081
|
"""Test that the deterministic chunk ID generation is consistent."""
|
|
1083
|
-
rag_config = create_rag_config_factory(fts_vector_store_config, embedding_config)
|
|
1084
|
-
|
|
1085
|
-
adapter = LanceDBAdapter(
|
|
1086
|
-
rag_config,
|
|
1087
|
-
fts_vector_store_config,
|
|
1088
|
-
)
|
|
1089
1082
|
|
|
1090
1083
|
# Test that the same document_id and chunk_idx always produce the same UUID
|
|
1091
1084
|
doc_id = "test_doc_123"
|
|
1092
1085
|
chunk_idx = 5
|
|
1093
1086
|
|
|
1094
|
-
id1 =
|
|
1095
|
-
id2 =
|
|
1087
|
+
id1 = deterministic_chunk_id(doc_id, chunk_idx)
|
|
1088
|
+
id2 = deterministic_chunk_id(doc_id, chunk_idx)
|
|
1096
1089
|
|
|
1097
1090
|
assert id1 == id2
|
|
1098
1091
|
|
|
1099
1092
|
# Test that different inputs produce different UUIDs
|
|
1100
|
-
id3 =
|
|
1101
|
-
id4 =
|
|
1093
|
+
id3 = deterministic_chunk_id(doc_id, chunk_idx + 1)
|
|
1094
|
+
id4 = deterministic_chunk_id(doc_id + "_different", chunk_idx)
|
|
1102
1095
|
|
|
1103
1096
|
assert id1 != id3
|
|
1104
1097
|
assert id1 != id4
|