kiln-ai 0.21.0__tar.gz → 0.22.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/PKG-INFO +79 -1
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/README.md +76 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/litellm_extractor.py +52 -32
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_litellm_extractor.py +169 -71
- kiln_ai-0.22.1/kiln_ai/adapters/ml_embedding_model_list.py +494 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/ml_model_list.py +503 -23
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/litellm_adapter.py +39 -8
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_litellm_adapter.py +78 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +119 -5
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +9 -3
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_structured_output.py +6 -9
- kiln_ai-0.22.1/kiln_ai/adapters/test_ml_embedding_model_list.py +239 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_ml_model_list.py +0 -10
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/lancedb_adapter.py +24 -70
- kiln_ai-0.22.1/kiln_ai/adapters/vector_store/lancedb_helpers.py +101 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/test_lancedb_adapter.py +9 -16
- kiln_ai-0.22.1/kiln_ai/adapters/vector_store/test_lancedb_helpers.py +142 -0
- kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/__init__.py +0 -0
- kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/test_lancedb_loader.py +282 -0
- kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/test_vector_store_loader.py +544 -0
- kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/vector_store_loader.py +91 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/basemodel.py +31 -3
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/external_tool_server.py +206 -54
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/extraction.py +14 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/task.py +5 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/task_output.py +41 -11
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_attachment.py +3 -3
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_basemodel.py +269 -13
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_datasource.py +50 -0
- kiln_ai-0.22.1/kiln_ai/datamodel/test_external_tool_server.py +1073 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_extraction_model.py +31 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_task.py +35 -1
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_tool_id.py +106 -1
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/tool_id.py +49 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/base_tool.py +30 -6
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/built_in_tools/math_tools.py +12 -4
- kiln_ai-0.22.1/kiln_ai/tools/kiln_task_tool.py +162 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/mcp_server_tool.py +7 -5
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/mcp_session_manager.py +50 -24
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/rag_tools.py +17 -6
- kiln_ai-0.22.1/kiln_ai/tools/test_kiln_task_tool.py +527 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_mcp_server_tool.py +4 -15
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_mcp_session_manager.py +186 -226
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_rag_tools.py +86 -5
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_tool_registry.py +199 -5
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/tool_registry.py +49 -17
- kiln_ai-0.22.1/kiln_ai/utils/filesystem.py +14 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/open_ai_types.py +19 -2
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/pdf_utils.py +21 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_open_ai_types.py +88 -12
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_pdf_utils.py +14 -1
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/pyproject.toml +3 -1
- kiln_ai-0.21.0/kiln_ai/adapters/ml_embedding_model_list.py +0 -192
- kiln_ai-0.21.0/kiln_ai/adapters/test_ml_embedding_model_list.py +0 -429
- kiln_ai-0.21.0/kiln_ai/datamodel/test_external_tool_server.py +0 -691
- kiln_ai-0.21.0/kiln_ai/utils/filesystem.py +0 -14
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/.gitignore +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/.python-version +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/LICENSE.txt +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/index.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/data_gen/data_gen_task.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/data_gen.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/base_eval.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/eval_runner.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/g_eval.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/registry.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/base_finetune.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/dataset_formatter.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/finetune_registry.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/openai_finetune.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/ml_model_list.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/base_adapter.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/litellm_adapter.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/prompt_builders.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/repair/repair_task.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/repair.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel/dataset_split.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel/eval.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel/strict_mode.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/utils/config.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/utils/formatting.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/utils.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai.html +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/search.js +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/adapter_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chat/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chat/chat_formatter.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chat/test_chat_formatter.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/base_chunker.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/chunker_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/fixed_window_chunker.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/helpers.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_base_chunker.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_chunker_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_helpers.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/data_gen_prompts.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/data_gen_task.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/test_data_gen_task.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/docker_model_runner_tools.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/base_embedding_adapter.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/embedding_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/litellm_embedding_adapter.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/test_base_embedding_adapter.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/test_embedding_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/base_eval.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/eval_runner.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/g_eval.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_base_eval.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_eval_runner.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_g_eval.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_g_eval_data.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/base_extractor.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/encoding.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/extractor_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/extractor_runner.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_base_extractor.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_encoding.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_extractor_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_extractor_runner.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/base_finetune.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/dataset_formatter.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/finetune_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/fireworks_finetune.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/openai_finetune.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_base_finetune.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_dataset_formatter.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_openai_finetune.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_together_finetune.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_vertex_finetune.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/together_finetune.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/vertex_finetune.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/base_adapter.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/litellm_config.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_base_adapter.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/ollama_tools.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/base_parser.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/json_parser.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/parser_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/r1_parser.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/request_formatters.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_json_parser.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_parser_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_r1_parser.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_request_formatters.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/prompt_builders.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/provider_tools.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/deduplication.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/progress.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/rag_runners.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/test_deduplication.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/test_progress.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/test_rag_runners.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/remote_config.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/repair/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/repair/repair_task.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/repair/test_repair_task.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/run_output.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_adapter_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_docker_model_runner_tools.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_ollama_tools.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_prompt_adaptors.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_prompt_builders.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_provider_tools.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_remote_config.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/base_vector_store_adapter.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/test_base_vector_store.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/test_vector_store_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/vector_store_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/chunk.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/datamodel_enums.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/dataset_filters.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/dataset_split.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/embedding.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/eval.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/finetune.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/json_schema.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/model_cache.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/project.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/prompt.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/prompt_id.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/rag.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/run_config.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/strict_mode.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/task_run.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_chunk_models.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_dataset_filters.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_dataset_split.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_embedding_models.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_eval_model.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_example_models.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_extraction_chunk.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_json_schema.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_model_cache.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_model_perf.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_models.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_nested_save.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_output_rating.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_prompt_id.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_rag.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_registry.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_vector_store.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/vector_store.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/built_in_tools/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/built_in_tools/test_math_tools.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_base_tools.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/__init__.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/async_job_runner.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/config.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/dataset_import.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/env.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/exhaustive_error.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/filesystem_cache.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/formatting.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/litellm.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/lock.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/logging.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/mime_type.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/name_generator.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/project_utils.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_async_job_runner.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_config.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_dataset_import.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_env.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_filesystem_cache.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_litellm.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_lock.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_mime_type.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_name_geneator.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_uuid.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_validation.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/uuid.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/validation.py +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/setup.cfg +0 -0
- {kiln_ai-0.21.0 → kiln_ai-0.22.1}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kiln-ai
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.22.1
|
|
4
4
|
Summary: Kiln AI
|
|
5
5
|
Project-URL: Homepage, https://kiln.tech
|
|
6
6
|
Project-URL: Repository, https://github.com/Kiln-AI/kiln
|
|
@@ -28,8 +28,10 @@ Requires-Dist: llama-index-vector-stores-lancedb>=0.3.3
|
|
|
28
28
|
Requires-Dist: llama-index>=0.13.3
|
|
29
29
|
Requires-Dist: openai>=1.53.0
|
|
30
30
|
Requires-Dist: pdoc>=15.0.0
|
|
31
|
+
Requires-Dist: pillow>=11.1.0
|
|
31
32
|
Requires-Dist: pydantic>=2.9.2
|
|
32
33
|
Requires-Dist: pypdf>=6.0.0
|
|
34
|
+
Requires-Dist: pypdfium2>=4.30.0
|
|
33
35
|
Requires-Dist: pytest-benchmark>=5.1.0
|
|
34
36
|
Requires-Dist: pytest-cov>=6.0.0
|
|
35
37
|
Requires-Dist: pyyaml>=6.0.2
|
|
@@ -83,6 +85,10 @@ The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kil
|
|
|
83
85
|
- [Building and Running a Kiln Task from Code](#building-and-running-a-kiln-task-from-code)
|
|
84
86
|
- [Tagging Task Runs Programmatically](#tagging-task-runs-programmatically)
|
|
85
87
|
- [Adding Custom Model or AI Provider from Code](#adding-custom-model-or-ai-provider-from-code)
|
|
88
|
+
- [Taking Kiln RAG to production](#taking-kiln-rag-to-production)
|
|
89
|
+
- [Load a LlamaIndex Vector Store](#load-a-llamaindex-vector-store)
|
|
90
|
+
- [Example: LanceDB Cloud](#example-lancedb-cloud)
|
|
91
|
+
- [Deploy RAG without LlamaIndex](#deploy-rag-without-llamaindex)t
|
|
86
92
|
- [Full API Reference](#full-api-reference)
|
|
87
93
|
|
|
88
94
|
## Installation
|
|
@@ -350,6 +356,78 @@ custom_model_ids.append(new_model)
|
|
|
350
356
|
Config.shared().custom_models = custom_model_ids
|
|
351
357
|
```
|
|
352
358
|
|
|
359
|
+
## Taking Kiln RAG to production
|
|
360
|
+
|
|
361
|
+
When you're ready to deploy your RAG system, you can export your processed documents to any vector store supported by LlamaIndex. This allows you to use your Kiln-configured chunking and embedding settings in production.
|
|
362
|
+
|
|
363
|
+
### Load a LlamaIndex Vector Store
|
|
364
|
+
|
|
365
|
+
Kiln provides a `VectorStoreLoader` that yields your processed document chunks as LlamaIndex `TextNode` objects. These nodes contain the same metadata, chunking and embedding data as your Kiln Search Tool configuration.
|
|
366
|
+
|
|
367
|
+
```py
|
|
368
|
+
from kiln_ai.datamodel import Project
|
|
369
|
+
from kiln_ai.datamodel.rag import RagConfig
|
|
370
|
+
from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
|
|
371
|
+
|
|
372
|
+
# Load your project and RAG configuration
|
|
373
|
+
project = Project.load_from_file("path/to/your/project.kiln")
|
|
374
|
+
rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
|
|
375
|
+
|
|
376
|
+
# Create the loader
|
|
377
|
+
loader = VectorStoreLoader(project=project, rag_config=rag_config)
|
|
378
|
+
|
|
379
|
+
# Export chunks to any LlamaIndex vector store
|
|
380
|
+
async for batch in loader.iter_llama_index_nodes(batch_size=10):
|
|
381
|
+
# Insert into your chosen vector store
|
|
382
|
+
# Examples: LanceDB, Pinecone, Chroma, Qdrant, etc.
|
|
383
|
+
pass
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
**Supported Vector Stores:** LlamaIndex supports 20+ vector stores including LanceDB, Pinecone, Weaviate, Chroma, Qdrant, and more. See the [full list](https://developers.llamaindex.ai/python/framework/module_guides/storing/vector_stores/).
|
|
387
|
+
|
|
388
|
+
### Example: LanceDB Cloud
|
|
389
|
+
|
|
390
|
+
Internally Kiln uses LanceDB. By using LanceDB cloud you'll get the same indexing behaviour as in app.
|
|
391
|
+
|
|
392
|
+
Here's a complete example using LanceDB Cloud:
|
|
393
|
+
|
|
394
|
+
```py
|
|
395
|
+
from kiln_ai.datamodel import Project
|
|
396
|
+
from kiln_ai.datamodel.rag import RagConfig
|
|
397
|
+
from kiln_ai.datamodel.vector_store import VectorStoreConfig
|
|
398
|
+
from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
|
|
399
|
+
from kiln_ai.adapters.vector_store.lancedb_adapter import lancedb_construct_from_config
|
|
400
|
+
|
|
401
|
+
# Load configurations
|
|
402
|
+
project = Project.load_from_file("path/to/your/project.kiln")
|
|
403
|
+
rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
|
|
404
|
+
vector_store_config = VectorStoreConfig.from_id_and_parent_path(
|
|
405
|
+
rag_config.vector_store_config_id, project.path,
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
# Create LanceDB vector store
|
|
409
|
+
lancedb_store = lancedb_construct_from_config(
|
|
410
|
+
vector_store_config=vector_store_config,
|
|
411
|
+
uri="db://my-project",
|
|
412
|
+
api_key="sk_...",
|
|
413
|
+
region="us-east-1",
|
|
414
|
+
table_name="my-documents", # Created automatically
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
# Export and insert your documents
|
|
418
|
+
loader = VectorStoreLoader(project=project, rag_config=rag_config)
|
|
419
|
+
async for batch in loader.iter_llama_index_nodes(batch_size=100):
|
|
420
|
+
await lancedb_store.async_add(batch)
|
|
421
|
+
|
|
422
|
+
print("Documents successfully exported to LanceDB!")
|
|
423
|
+
```
|
|
424
|
+
|
|
425
|
+
After export, query your data using [LlamaIndex](https://developers.llamaindex.ai/python/framework-api-reference/storage/vector_store/lancedb/) or the [LanceDB client](https://lancedb.github.io/lancedb/).
|
|
426
|
+
|
|
427
|
+
### Deploy RAG without LlamaIndex
|
|
428
|
+
|
|
429
|
+
While Kiln is designed for deploying to LlamaIndex, you don't need to use it. The `iter_llama_index_nodes` returns a `TextNode` object which includes all the data you need to build a RAG index in any stack: embedding, text, document name, chunk ID, etc.
|
|
430
|
+
|
|
353
431
|
## Full API Reference
|
|
354
432
|
|
|
355
433
|
The library can do a lot more than the examples we've shown here.
|
|
@@ -43,6 +43,10 @@ The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kil
|
|
|
43
43
|
- [Building and Running a Kiln Task from Code](#building-and-running-a-kiln-task-from-code)
|
|
44
44
|
- [Tagging Task Runs Programmatically](#tagging-task-runs-programmatically)
|
|
45
45
|
- [Adding Custom Model or AI Provider from Code](#adding-custom-model-or-ai-provider-from-code)
|
|
46
|
+
- [Taking Kiln RAG to production](#taking-kiln-rag-to-production)
|
|
47
|
+
- [Load a LlamaIndex Vector Store](#load-a-llamaindex-vector-store)
|
|
48
|
+
- [Example: LanceDB Cloud](#example-lancedb-cloud)
|
|
49
|
+
- [Deploy RAG without LlamaIndex](#deploy-rag-without-llamaindex)t
|
|
46
50
|
- [Full API Reference](#full-api-reference)
|
|
47
51
|
|
|
48
52
|
## Installation
|
|
@@ -310,6 +314,78 @@ custom_model_ids.append(new_model)
|
|
|
310
314
|
Config.shared().custom_models = custom_model_ids
|
|
311
315
|
```
|
|
312
316
|
|
|
317
|
+
## Taking Kiln RAG to production
|
|
318
|
+
|
|
319
|
+
When you're ready to deploy your RAG system, you can export your processed documents to any vector store supported by LlamaIndex. This allows you to use your Kiln-configured chunking and embedding settings in production.
|
|
320
|
+
|
|
321
|
+
### Load a LlamaIndex Vector Store
|
|
322
|
+
|
|
323
|
+
Kiln provides a `VectorStoreLoader` that yields your processed document chunks as LlamaIndex `TextNode` objects. These nodes contain the same metadata, chunking and embedding data as your Kiln Search Tool configuration.
|
|
324
|
+
|
|
325
|
+
```py
|
|
326
|
+
from kiln_ai.datamodel import Project
|
|
327
|
+
from kiln_ai.datamodel.rag import RagConfig
|
|
328
|
+
from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
|
|
329
|
+
|
|
330
|
+
# Load your project and RAG configuration
|
|
331
|
+
project = Project.load_from_file("path/to/your/project.kiln")
|
|
332
|
+
rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
|
|
333
|
+
|
|
334
|
+
# Create the loader
|
|
335
|
+
loader = VectorStoreLoader(project=project, rag_config=rag_config)
|
|
336
|
+
|
|
337
|
+
# Export chunks to any LlamaIndex vector store
|
|
338
|
+
async for batch in loader.iter_llama_index_nodes(batch_size=10):
|
|
339
|
+
# Insert into your chosen vector store
|
|
340
|
+
# Examples: LanceDB, Pinecone, Chroma, Qdrant, etc.
|
|
341
|
+
pass
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
**Supported Vector Stores:** LlamaIndex supports 20+ vector stores including LanceDB, Pinecone, Weaviate, Chroma, Qdrant, and more. See the [full list](https://developers.llamaindex.ai/python/framework/module_guides/storing/vector_stores/).
|
|
345
|
+
|
|
346
|
+
### Example: LanceDB Cloud
|
|
347
|
+
|
|
348
|
+
Internally Kiln uses LanceDB. By using LanceDB cloud you'll get the same indexing behaviour as in app.
|
|
349
|
+
|
|
350
|
+
Here's a complete example using LanceDB Cloud:
|
|
351
|
+
|
|
352
|
+
```py
|
|
353
|
+
from kiln_ai.datamodel import Project
|
|
354
|
+
from kiln_ai.datamodel.rag import RagConfig
|
|
355
|
+
from kiln_ai.datamodel.vector_store import VectorStoreConfig
|
|
356
|
+
from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
|
|
357
|
+
from kiln_ai.adapters.vector_store.lancedb_adapter import lancedb_construct_from_config
|
|
358
|
+
|
|
359
|
+
# Load configurations
|
|
360
|
+
project = Project.load_from_file("path/to/your/project.kiln")
|
|
361
|
+
rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
|
|
362
|
+
vector_store_config = VectorStoreConfig.from_id_and_parent_path(
|
|
363
|
+
rag_config.vector_store_config_id, project.path,
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
# Create LanceDB vector store
|
|
367
|
+
lancedb_store = lancedb_construct_from_config(
|
|
368
|
+
vector_store_config=vector_store_config,
|
|
369
|
+
uri="db://my-project",
|
|
370
|
+
api_key="sk_...",
|
|
371
|
+
region="us-east-1",
|
|
372
|
+
table_name="my-documents", # Created automatically
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
# Export and insert your documents
|
|
376
|
+
loader = VectorStoreLoader(project=project, rag_config=rag_config)
|
|
377
|
+
async for batch in loader.iter_llama_index_nodes(batch_size=100):
|
|
378
|
+
await lancedb_store.async_add(batch)
|
|
379
|
+
|
|
380
|
+
print("Documents successfully exported to LanceDB!")
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
After export, query your data using [LlamaIndex](https://developers.llamaindex.ai/python/framework-api-reference/storage/vector_store/lancedb/) or the [LanceDB client](https://lancedb.github.io/lancedb/).
|
|
384
|
+
|
|
385
|
+
### Deploy RAG without LlamaIndex
|
|
386
|
+
|
|
387
|
+
While Kiln is designed for deploying to LlamaIndex, you don't need to use it. The `iter_llama_index_nodes` returns a `TextNode` object which includes all the data you need to build a RAG index in any stack: embedding, text, document name, chunk ID, etc.
|
|
388
|
+
|
|
313
389
|
## Full API Reference
|
|
314
390
|
|
|
315
391
|
The library can do a lot more than the examples we've shown here.
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import hashlib
|
|
3
3
|
import logging
|
|
4
|
+
from functools import cached_property
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Any, List
|
|
6
7
|
|
|
@@ -13,23 +14,16 @@ from kiln_ai.adapters.extractors.base_extractor import (
|
|
|
13
14
|
ExtractionOutput,
|
|
14
15
|
)
|
|
15
16
|
from kiln_ai.adapters.extractors.encoding import to_base64_url
|
|
16
|
-
from kiln_ai.adapters.ml_model_list import
|
|
17
|
+
from kiln_ai.adapters.ml_model_list import (
|
|
18
|
+
KilnModelProvider,
|
|
19
|
+
built_in_models_from_provider,
|
|
20
|
+
)
|
|
17
21
|
from kiln_ai.adapters.provider_tools import LiteLlmCoreConfig
|
|
18
22
|
from kiln_ai.datamodel.datamodel_enums import ModelProviderName
|
|
19
23
|
from kiln_ai.datamodel.extraction import ExtractorConfig, ExtractorType, Kind
|
|
20
24
|
from kiln_ai.utils.filesystem_cache import FilesystemCache
|
|
21
25
|
from kiln_ai.utils.litellm import get_litellm_provider_info
|
|
22
|
-
from kiln_ai.utils.pdf_utils import split_pdf_into_pages
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def max_pdf_page_concurrency_for_model(model_name: str) -> int:
|
|
26
|
-
# we assume each batch takes ~5s to complete (likely more in practice)
|
|
27
|
-
# lowest rate limit is 150 RPM for Tier 1 accounts for gemini-2.5-pro
|
|
28
|
-
if model_name == "gemini/gemini-2.5-pro":
|
|
29
|
-
return 2
|
|
30
|
-
# other models support at least 500 RPM for lowest tier accounts
|
|
31
|
-
return 5
|
|
32
|
-
|
|
26
|
+
from kiln_ai.utils.pdf_utils import convert_pdf_to_images, split_pdf_into_pages
|
|
33
27
|
|
|
34
28
|
logger = logging.getLogger(__name__)
|
|
35
29
|
|
|
@@ -74,11 +68,11 @@ def encode_file_litellm_format(path: Path, mime_type: str) -> dict[str, Any]:
|
|
|
74
68
|
"text/markdown",
|
|
75
69
|
"text/plain",
|
|
76
70
|
] or any(mime_type.startswith(m) for m in ["video/", "audio/"]):
|
|
77
|
-
|
|
71
|
+
file_bytes = path.read_bytes()
|
|
78
72
|
return {
|
|
79
73
|
"type": "file",
|
|
80
74
|
"file": {
|
|
81
|
-
"file_data": to_base64_url(mime_type,
|
|
75
|
+
"file_data": to_base64_url(mime_type, file_bytes),
|
|
82
76
|
},
|
|
83
77
|
}
|
|
84
78
|
|
|
@@ -101,6 +95,7 @@ class LitellmExtractor(BaseExtractor):
|
|
|
101
95
|
extractor_config: ExtractorConfig,
|
|
102
96
|
litellm_core_config: LiteLlmCoreConfig,
|
|
103
97
|
filesystem_cache: FilesystemCache | None = None,
|
|
98
|
+
default_max_parallel_requests: int = 5,
|
|
104
99
|
):
|
|
105
100
|
if extractor_config.extractor_type != ExtractorType.LITELLM:
|
|
106
101
|
raise ValueError(
|
|
@@ -133,6 +128,7 @@ class LitellmExtractor(BaseExtractor):
|
|
|
133
128
|
}
|
|
134
129
|
|
|
135
130
|
self.litellm_core_config = litellm_core_config
|
|
131
|
+
self.default_max_parallel_requests = default_max_parallel_requests
|
|
136
132
|
|
|
137
133
|
def pdf_page_cache_key(self, pdf_path: Path, page_number: int) -> str:
|
|
138
134
|
"""
|
|
@@ -171,13 +167,35 @@ class LitellmExtractor(BaseExtractor):
|
|
|
171
167
|
logger.debug(f"Cache miss for page {page_number} of {pdf_path}")
|
|
172
168
|
return None
|
|
173
169
|
|
|
170
|
+
async def convert_pdf_page_to_image_input(
|
|
171
|
+
self, page_path: Path, page_number: int
|
|
172
|
+
) -> ExtractionInput:
|
|
173
|
+
image_paths = await convert_pdf_to_images(page_path, page_path.parent)
|
|
174
|
+
if len(image_paths) != 1:
|
|
175
|
+
raise ValueError(
|
|
176
|
+
f"Expected 1 image, got {len(image_paths)} for page {page_number} in {page_path}"
|
|
177
|
+
)
|
|
178
|
+
image_path = image_paths[0]
|
|
179
|
+
page_input = ExtractionInput(path=str(image_path), mime_type="image/png")
|
|
180
|
+
return page_input
|
|
181
|
+
|
|
174
182
|
async def _extract_single_pdf_page(
|
|
175
|
-
self,
|
|
183
|
+
self,
|
|
184
|
+
pdf_path: Path,
|
|
185
|
+
page_path: Path,
|
|
186
|
+
prompt: str,
|
|
187
|
+
page_number: int,
|
|
176
188
|
) -> str:
|
|
177
189
|
try:
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
190
|
+
if self.model_provider.multimodal_requires_pdf_as_image:
|
|
191
|
+
page_input = await self.convert_pdf_page_to_image_input(
|
|
192
|
+
page_path, page_number
|
|
193
|
+
)
|
|
194
|
+
else:
|
|
195
|
+
page_input = ExtractionInput(
|
|
196
|
+
path=str(page_path), mime_type="application/pdf"
|
|
197
|
+
)
|
|
198
|
+
|
|
181
199
|
completion_kwargs = self._build_completion_kwargs(prompt, page_input)
|
|
182
200
|
response = await litellm.acompletion(**completion_kwargs)
|
|
183
201
|
except Exception as e:
|
|
@@ -201,11 +219,6 @@ class LitellmExtractor(BaseExtractor):
|
|
|
201
219
|
)
|
|
202
220
|
|
|
203
221
|
content = response.choices[0].message.content
|
|
204
|
-
if not content:
|
|
205
|
-
raise ValueError(
|
|
206
|
-
f"No text returned from extraction model when extracting page {page_number} for {page_path}"
|
|
207
|
-
)
|
|
208
|
-
|
|
209
222
|
if self.filesystem_cache is not None:
|
|
210
223
|
# we don't want to fail the whole extraction just because cache write fails
|
|
211
224
|
# as that would block the whole flow
|
|
@@ -242,13 +255,14 @@ class LitellmExtractor(BaseExtractor):
|
|
|
242
255
|
continue
|
|
243
256
|
|
|
244
257
|
extract_page_jobs.append(
|
|
245
|
-
self._extract_single_pdf_page(
|
|
258
|
+
self._extract_single_pdf_page(
|
|
259
|
+
pdf_path, page_path, prompt, page_number=i
|
|
260
|
+
)
|
|
246
261
|
)
|
|
247
262
|
page_indices_for_jobs.append(i)
|
|
248
263
|
|
|
249
264
|
if (
|
|
250
|
-
len(extract_page_jobs)
|
|
251
|
-
>= max_pdf_page_concurrency_for_model(self.litellm_model_slug())
|
|
265
|
+
len(extract_page_jobs) >= self.max_parallel_requests_for_model
|
|
252
266
|
or i == len(page_paths) - 1
|
|
253
267
|
):
|
|
254
268
|
extraction_results = await asyncio.gather(
|
|
@@ -295,7 +309,7 @@ class LitellmExtractor(BaseExtractor):
|
|
|
295
309
|
self, prompt: str, extraction_input: ExtractionInput
|
|
296
310
|
) -> dict[str, Any]:
|
|
297
311
|
completion_kwargs = {
|
|
298
|
-
"model": self.litellm_model_slug
|
|
312
|
+
"model": self.litellm_model_slug,
|
|
299
313
|
"messages": [
|
|
300
314
|
{
|
|
301
315
|
"role": "user",
|
|
@@ -367,20 +381,26 @@ class LitellmExtractor(BaseExtractor):
|
|
|
367
381
|
content_format=self.extractor_config.output_format,
|
|
368
382
|
)
|
|
369
383
|
|
|
370
|
-
|
|
384
|
+
@cached_property
|
|
385
|
+
def model_provider(self) -> KilnModelProvider:
|
|
371
386
|
kiln_model_provider = built_in_models_from_provider(
|
|
372
387
|
ModelProviderName(self.extractor_config.model_provider_name),
|
|
373
388
|
self.extractor_config.model_name,
|
|
374
389
|
)
|
|
375
|
-
|
|
376
390
|
if kiln_model_provider is None:
|
|
377
391
|
raise ValueError(
|
|
378
392
|
f"Model provider {self.extractor_config.model_provider_name} not found in the list of built-in models"
|
|
379
393
|
)
|
|
394
|
+
return kiln_model_provider
|
|
395
|
+
|
|
396
|
+
@cached_property
|
|
397
|
+
def max_parallel_requests_for_model(self) -> int:
|
|
398
|
+
value = self.model_provider.max_parallel_requests
|
|
399
|
+
return value if value is not None else self.default_max_parallel_requests
|
|
380
400
|
|
|
381
|
-
|
|
401
|
+
@cached_property
|
|
402
|
+
def litellm_model_slug(self) -> str:
|
|
382
403
|
litellm_provider_name = get_litellm_provider_info(
|
|
383
|
-
|
|
404
|
+
self.model_provider,
|
|
384
405
|
)
|
|
385
|
-
|
|
386
406
|
return litellm_provider_name.litellm_model_id
|