kiln-ai 0.21.0__tar.gz → 0.22.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (257) hide show
  1. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/PKG-INFO +79 -1
  2. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/README.md +76 -0
  3. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/litellm_extractor.py +52 -32
  4. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_litellm_extractor.py +169 -71
  5. kiln_ai-0.22.1/kiln_ai/adapters/ml_embedding_model_list.py +494 -0
  6. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/ml_model_list.py +503 -23
  7. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/litellm_adapter.py +39 -8
  8. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_litellm_adapter.py +78 -0
  9. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +119 -5
  10. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +9 -3
  11. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_structured_output.py +6 -9
  12. kiln_ai-0.22.1/kiln_ai/adapters/test_ml_embedding_model_list.py +239 -0
  13. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_ml_model_list.py +0 -10
  14. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/lancedb_adapter.py +24 -70
  15. kiln_ai-0.22.1/kiln_ai/adapters/vector_store/lancedb_helpers.py +101 -0
  16. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/test_lancedb_adapter.py +9 -16
  17. kiln_ai-0.22.1/kiln_ai/adapters/vector_store/test_lancedb_helpers.py +142 -0
  18. kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/__init__.py +0 -0
  19. kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/test_lancedb_loader.py +282 -0
  20. kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/test_vector_store_loader.py +544 -0
  21. kiln_ai-0.22.1/kiln_ai/adapters/vector_store_loaders/vector_store_loader.py +91 -0
  22. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/basemodel.py +31 -3
  23. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/external_tool_server.py +206 -54
  24. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/extraction.py +14 -0
  25. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/task.py +5 -0
  26. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/task_output.py +41 -11
  27. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_attachment.py +3 -3
  28. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_basemodel.py +269 -13
  29. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_datasource.py +50 -0
  30. kiln_ai-0.22.1/kiln_ai/datamodel/test_external_tool_server.py +1073 -0
  31. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_extraction_model.py +31 -0
  32. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_task.py +35 -1
  33. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_tool_id.py +106 -1
  34. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/tool_id.py +49 -0
  35. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/base_tool.py +30 -6
  36. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/built_in_tools/math_tools.py +12 -4
  37. kiln_ai-0.22.1/kiln_ai/tools/kiln_task_tool.py +162 -0
  38. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/mcp_server_tool.py +7 -5
  39. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/mcp_session_manager.py +50 -24
  40. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/rag_tools.py +17 -6
  41. kiln_ai-0.22.1/kiln_ai/tools/test_kiln_task_tool.py +527 -0
  42. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_mcp_server_tool.py +4 -15
  43. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_mcp_session_manager.py +186 -226
  44. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_rag_tools.py +86 -5
  45. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_tool_registry.py +199 -5
  46. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/tool_registry.py +49 -17
  47. kiln_ai-0.22.1/kiln_ai/utils/filesystem.py +14 -0
  48. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/open_ai_types.py +19 -2
  49. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/pdf_utils.py +21 -0
  50. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_open_ai_types.py +88 -12
  51. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_pdf_utils.py +14 -1
  52. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/pyproject.toml +3 -1
  53. kiln_ai-0.21.0/kiln_ai/adapters/ml_embedding_model_list.py +0 -192
  54. kiln_ai-0.21.0/kiln_ai/adapters/test_ml_embedding_model_list.py +0 -429
  55. kiln_ai-0.21.0/kiln_ai/datamodel/test_external_tool_server.py +0 -691
  56. kiln_ai-0.21.0/kiln_ai/utils/filesystem.py +0 -14
  57. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/.gitignore +0 -0
  58. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/.python-version +0 -0
  59. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/LICENSE.txt +0 -0
  60. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/index.html +0 -0
  61. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/data_gen/data_gen_task.html +0 -0
  62. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/data_gen.html +0 -0
  63. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/base_eval.html +0 -0
  64. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/eval_runner.html +0 -0
  65. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/g_eval.html +0 -0
  66. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval/registry.html +0 -0
  67. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/eval.html +0 -0
  68. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/base_finetune.html +0 -0
  69. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/dataset_formatter.html +0 -0
  70. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/finetune_registry.html +0 -0
  71. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/openai_finetune.html +0 -0
  72. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune.html +0 -0
  73. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/ml_model_list.html +0 -0
  74. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/base_adapter.html +0 -0
  75. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/litellm_adapter.html +0 -0
  76. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters.html +0 -0
  77. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/prompt_builders.html +0 -0
  78. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/repair/repair_task.html +0 -0
  79. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters/repair.html +0 -0
  80. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/adapters.html +0 -0
  81. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel/dataset_split.html +0 -0
  82. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel/eval.html +0 -0
  83. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel/strict_mode.html +0 -0
  84. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/datamodel.html +0 -0
  85. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/utils/config.html +0 -0
  86. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/utils/formatting.html +0 -0
  87. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai/utils.html +0 -0
  88. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/kiln_ai.html +0 -0
  89. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/docs/kiln_core_docs/search.js +0 -0
  90. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/__init__.py +0 -0
  91. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/__init__.py +0 -0
  92. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/adapter_registry.py +0 -0
  93. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chat/__init__.py +0 -0
  94. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chat/chat_formatter.py +0 -0
  95. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chat/test_chat_formatter.py +0 -0
  96. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/__init__.py +0 -0
  97. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/base_chunker.py +0 -0
  98. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/chunker_registry.py +0 -0
  99. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/fixed_window_chunker.py +0 -0
  100. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/helpers.py +0 -0
  101. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_base_chunker.py +0 -0
  102. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_chunker_registry.py +0 -0
  103. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +0 -0
  104. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/chunkers/test_helpers.py +0 -0
  105. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/__init__.py +0 -0
  106. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/data_gen_prompts.py +0 -0
  107. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/data_gen_task.py +0 -0
  108. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/data_gen/test_data_gen_task.py +0 -0
  109. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/docker_model_runner_tools.py +0 -0
  110. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/__init__.py +0 -0
  111. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/base_embedding_adapter.py +0 -0
  112. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/embedding_registry.py +0 -0
  113. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/litellm_embedding_adapter.py +0 -0
  114. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/test_base_embedding_adapter.py +0 -0
  115. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/test_embedding_registry.py +0 -0
  116. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +0 -0
  117. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/__init__.py +0 -0
  118. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/base_eval.py +0 -0
  119. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/eval_runner.py +0 -0
  120. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/g_eval.py +0 -0
  121. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/registry.py +0 -0
  122. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_base_eval.py +0 -0
  123. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_eval_runner.py +0 -0
  124. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_g_eval.py +0 -0
  125. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/eval/test_g_eval_data.py +0 -0
  126. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/__init__.py +0 -0
  127. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/base_extractor.py +0 -0
  128. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/encoding.py +0 -0
  129. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/extractor_registry.py +0 -0
  130. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/extractor_runner.py +0 -0
  131. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_base_extractor.py +0 -0
  132. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_encoding.py +0 -0
  133. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_extractor_registry.py +0 -0
  134. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/extractors/test_extractor_runner.py +0 -0
  135. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/__init__.py +0 -0
  136. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/base_finetune.py +0 -0
  137. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/dataset_formatter.py +0 -0
  138. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/finetune_registry.py +0 -0
  139. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/fireworks_finetune.py +0 -0
  140. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/openai_finetune.py +0 -0
  141. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_base_finetune.py +0 -0
  142. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_dataset_formatter.py +0 -0
  143. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +0 -0
  144. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_openai_finetune.py +0 -0
  145. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_together_finetune.py +0 -0
  146. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/test_vertex_finetune.py +0 -0
  147. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/together_finetune.py +0 -0
  148. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/fine_tune/vertex_finetune.py +0 -0
  149. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/__init__.py +0 -0
  150. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/base_adapter.py +0 -0
  151. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/litellm_config.py +0 -0
  152. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/model_adapters/test_base_adapter.py +0 -0
  153. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/ollama_tools.py +0 -0
  154. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/__init__.py +0 -0
  155. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/base_parser.py +0 -0
  156. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/json_parser.py +0 -0
  157. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/parser_registry.py +0 -0
  158. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/r1_parser.py +0 -0
  159. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/request_formatters.py +0 -0
  160. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_json_parser.py +0 -0
  161. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_parser_registry.py +0 -0
  162. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_r1_parser.py +0 -0
  163. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/parsers/test_request_formatters.py +0 -0
  164. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/prompt_builders.py +0 -0
  165. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/provider_tools.py +0 -0
  166. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/deduplication.py +0 -0
  167. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/progress.py +0 -0
  168. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/rag_runners.py +0 -0
  169. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/test_deduplication.py +0 -0
  170. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/test_progress.py +0 -0
  171. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/rag/test_rag_runners.py +0 -0
  172. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/remote_config.py +0 -0
  173. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/repair/__init__.py +0 -0
  174. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/repair/repair_task.py +0 -0
  175. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/repair/test_repair_task.py +0 -0
  176. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/run_output.py +0 -0
  177. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_adapter_registry.py +0 -0
  178. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_docker_model_runner_tools.py +0 -0
  179. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_ollama_tools.py +0 -0
  180. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_prompt_adaptors.py +0 -0
  181. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_prompt_builders.py +0 -0
  182. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_provider_tools.py +0 -0
  183. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/test_remote_config.py +0 -0
  184. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/__init__.py +0 -0
  185. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/base_vector_store_adapter.py +0 -0
  186. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/test_base_vector_store.py +0 -0
  187. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/test_vector_store_registry.py +0 -0
  188. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/adapters/vector_store/vector_store_registry.py +0 -0
  189. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/__init__.py +0 -0
  190. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/chunk.py +0 -0
  191. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/datamodel_enums.py +0 -0
  192. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/dataset_filters.py +0 -0
  193. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/dataset_split.py +0 -0
  194. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/embedding.py +0 -0
  195. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/eval.py +0 -0
  196. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/finetune.py +0 -0
  197. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/json_schema.py +0 -0
  198. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/model_cache.py +0 -0
  199. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/project.py +0 -0
  200. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/prompt.py +0 -0
  201. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/prompt_id.py +0 -0
  202. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/rag.py +0 -0
  203. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/registry.py +0 -0
  204. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/run_config.py +0 -0
  205. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/strict_mode.py +0 -0
  206. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/task_run.py +0 -0
  207. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_chunk_models.py +0 -0
  208. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_dataset_filters.py +0 -0
  209. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_dataset_split.py +0 -0
  210. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_embedding_models.py +0 -0
  211. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_eval_model.py +0 -0
  212. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_example_models.py +0 -0
  213. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_extraction_chunk.py +0 -0
  214. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_json_schema.py +0 -0
  215. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_model_cache.py +0 -0
  216. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_model_perf.py +0 -0
  217. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_models.py +0 -0
  218. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_nested_save.py +0 -0
  219. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_output_rating.py +0 -0
  220. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_prompt_id.py +0 -0
  221. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_rag.py +0 -0
  222. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_registry.py +0 -0
  223. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/test_vector_store.py +0 -0
  224. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/datamodel/vector_store.py +0 -0
  225. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/__init__.py +0 -0
  226. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/built_in_tools/__init__.py +0 -0
  227. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/built_in_tools/test_math_tools.py +0 -0
  228. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/tools/test_base_tools.py +0 -0
  229. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/__init__.py +0 -0
  230. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/async_job_runner.py +0 -0
  231. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/config.py +0 -0
  232. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/dataset_import.py +0 -0
  233. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/env.py +0 -0
  234. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/exhaustive_error.py +0 -0
  235. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/filesystem_cache.py +0 -0
  236. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/formatting.py +0 -0
  237. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/litellm.py +0 -0
  238. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/lock.py +0 -0
  239. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/logging.py +0 -0
  240. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/mime_type.py +0 -0
  241. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/name_generator.py +0 -0
  242. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/project_utils.py +0 -0
  243. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_async_job_runner.py +0 -0
  244. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_config.py +0 -0
  245. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_dataset_import.py +0 -0
  246. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_env.py +0 -0
  247. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_filesystem_cache.py +0 -0
  248. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_litellm.py +0 -0
  249. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_lock.py +0 -0
  250. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_mime_type.py +0 -0
  251. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_name_geneator.py +0 -0
  252. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_uuid.py +0 -0
  253. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/test_validation.py +0 -0
  254. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/uuid.py +0 -0
  255. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/kiln_ai/utils/validation.py +0 -0
  256. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/setup.cfg +0 -0
  257. {kiln_ai-0.21.0 → kiln_ai-0.22.1}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kiln-ai
3
- Version: 0.21.0
3
+ Version: 0.22.1
4
4
  Summary: Kiln AI
5
5
  Project-URL: Homepage, https://kiln.tech
6
6
  Project-URL: Repository, https://github.com/Kiln-AI/kiln
@@ -28,8 +28,10 @@ Requires-Dist: llama-index-vector-stores-lancedb>=0.3.3
28
28
  Requires-Dist: llama-index>=0.13.3
29
29
  Requires-Dist: openai>=1.53.0
30
30
  Requires-Dist: pdoc>=15.0.0
31
+ Requires-Dist: pillow>=11.1.0
31
32
  Requires-Dist: pydantic>=2.9.2
32
33
  Requires-Dist: pypdf>=6.0.0
34
+ Requires-Dist: pypdfium2>=4.30.0
33
35
  Requires-Dist: pytest-benchmark>=5.1.0
34
36
  Requires-Dist: pytest-cov>=6.0.0
35
37
  Requires-Dist: pyyaml>=6.0.2
@@ -83,6 +85,10 @@ The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kil
83
85
  - [Building and Running a Kiln Task from Code](#building-and-running-a-kiln-task-from-code)
84
86
  - [Tagging Task Runs Programmatically](#tagging-task-runs-programmatically)
85
87
  - [Adding Custom Model or AI Provider from Code](#adding-custom-model-or-ai-provider-from-code)
88
+ - [Taking Kiln RAG to production](#taking-kiln-rag-to-production)
89
+ - [Load a LlamaIndex Vector Store](#load-a-llamaindex-vector-store)
90
+ - [Example: LanceDB Cloud](#example-lancedb-cloud)
91
+ - [Deploy RAG without LlamaIndex](#deploy-rag-without-llamaindex)t
86
92
  - [Full API Reference](#full-api-reference)
87
93
 
88
94
  ## Installation
@@ -350,6 +356,78 @@ custom_model_ids.append(new_model)
350
356
  Config.shared().custom_models = custom_model_ids
351
357
  ```
352
358
 
359
+ ## Taking Kiln RAG to production
360
+
361
+ When you're ready to deploy your RAG system, you can export your processed documents to any vector store supported by LlamaIndex. This allows you to use your Kiln-configured chunking and embedding settings in production.
362
+
363
+ ### Load a LlamaIndex Vector Store
364
+
365
+ Kiln provides a `VectorStoreLoader` that yields your processed document chunks as LlamaIndex `TextNode` objects. These nodes contain the same metadata, chunking and embedding data as your Kiln Search Tool configuration.
366
+
367
+ ```py
368
+ from kiln_ai.datamodel import Project
369
+ from kiln_ai.datamodel.rag import RagConfig
370
+ from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
371
+
372
+ # Load your project and RAG configuration
373
+ project = Project.load_from_file("path/to/your/project.kiln")
374
+ rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
375
+
376
+ # Create the loader
377
+ loader = VectorStoreLoader(project=project, rag_config=rag_config)
378
+
379
+ # Export chunks to any LlamaIndex vector store
380
+ async for batch in loader.iter_llama_index_nodes(batch_size=10):
381
+ # Insert into your chosen vector store
382
+ # Examples: LanceDB, Pinecone, Chroma, Qdrant, etc.
383
+ pass
384
+ ```
385
+
386
+ **Supported Vector Stores:** LlamaIndex supports 20+ vector stores including LanceDB, Pinecone, Weaviate, Chroma, Qdrant, and more. See the [full list](https://developers.llamaindex.ai/python/framework/module_guides/storing/vector_stores/).
387
+
388
+ ### Example: LanceDB Cloud
389
+
390
+ Internally Kiln uses LanceDB. By using LanceDB cloud you'll get the same indexing behaviour as in app.
391
+
392
+ Here's a complete example using LanceDB Cloud:
393
+
394
+ ```py
395
+ from kiln_ai.datamodel import Project
396
+ from kiln_ai.datamodel.rag import RagConfig
397
+ from kiln_ai.datamodel.vector_store import VectorStoreConfig
398
+ from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
399
+ from kiln_ai.adapters.vector_store.lancedb_adapter import lancedb_construct_from_config
400
+
401
+ # Load configurations
402
+ project = Project.load_from_file("path/to/your/project.kiln")
403
+ rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
404
+ vector_store_config = VectorStoreConfig.from_id_and_parent_path(
405
+ rag_config.vector_store_config_id, project.path,
406
+ )
407
+
408
+ # Create LanceDB vector store
409
+ lancedb_store = lancedb_construct_from_config(
410
+ vector_store_config=vector_store_config,
411
+ uri="db://my-project",
412
+ api_key="sk_...",
413
+ region="us-east-1",
414
+ table_name="my-documents", # Created automatically
415
+ )
416
+
417
+ # Export and insert your documents
418
+ loader = VectorStoreLoader(project=project, rag_config=rag_config)
419
+ async for batch in loader.iter_llama_index_nodes(batch_size=100):
420
+ await lancedb_store.async_add(batch)
421
+
422
+ print("Documents successfully exported to LanceDB!")
423
+ ```
424
+
425
+ After export, query your data using [LlamaIndex](https://developers.llamaindex.ai/python/framework-api-reference/storage/vector_store/lancedb/) or the [LanceDB client](https://lancedb.github.io/lancedb/).
426
+
427
+ ### Deploy RAG without LlamaIndex
428
+
429
+ While Kiln is designed for deploying to LlamaIndex, you don't need to use it. The `iter_llama_index_nodes` returns a `TextNode` object which includes all the data you need to build a RAG index in any stack: embedding, text, document name, chunk ID, etc.
430
+
353
431
  ## Full API Reference
354
432
 
355
433
  The library can do a lot more than the examples we've shown here.
@@ -43,6 +43,10 @@ The library has a [comprehensive set of docs](https://kiln-ai.github.io/Kiln/kil
43
43
  - [Building and Running a Kiln Task from Code](#building-and-running-a-kiln-task-from-code)
44
44
  - [Tagging Task Runs Programmatically](#tagging-task-runs-programmatically)
45
45
  - [Adding Custom Model or AI Provider from Code](#adding-custom-model-or-ai-provider-from-code)
46
+ - [Taking Kiln RAG to production](#taking-kiln-rag-to-production)
47
+ - [Load a LlamaIndex Vector Store](#load-a-llamaindex-vector-store)
48
+ - [Example: LanceDB Cloud](#example-lancedb-cloud)
49
+ - [Deploy RAG without LlamaIndex](#deploy-rag-without-llamaindex)t
46
50
  - [Full API Reference](#full-api-reference)
47
51
 
48
52
  ## Installation
@@ -310,6 +314,78 @@ custom_model_ids.append(new_model)
310
314
  Config.shared().custom_models = custom_model_ids
311
315
  ```
312
316
 
317
+ ## Taking Kiln RAG to production
318
+
319
+ When you're ready to deploy your RAG system, you can export your processed documents to any vector store supported by LlamaIndex. This allows you to use your Kiln-configured chunking and embedding settings in production.
320
+
321
+ ### Load a LlamaIndex Vector Store
322
+
323
+ Kiln provides a `VectorStoreLoader` that yields your processed document chunks as LlamaIndex `TextNode` objects. These nodes contain the same metadata, chunking and embedding data as your Kiln Search Tool configuration.
324
+
325
+ ```py
326
+ from kiln_ai.datamodel import Project
327
+ from kiln_ai.datamodel.rag import RagConfig
328
+ from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
329
+
330
+ # Load your project and RAG configuration
331
+ project = Project.load_from_file("path/to/your/project.kiln")
332
+ rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
333
+
334
+ # Create the loader
335
+ loader = VectorStoreLoader(project=project, rag_config=rag_config)
336
+
337
+ # Export chunks to any LlamaIndex vector store
338
+ async for batch in loader.iter_llama_index_nodes(batch_size=10):
339
+ # Insert into your chosen vector store
340
+ # Examples: LanceDB, Pinecone, Chroma, Qdrant, etc.
341
+ pass
342
+ ```
343
+
344
+ **Supported Vector Stores:** LlamaIndex supports 20+ vector stores including LanceDB, Pinecone, Weaviate, Chroma, Qdrant, and more. See the [full list](https://developers.llamaindex.ai/python/framework/module_guides/storing/vector_stores/).
345
+
346
+ ### Example: LanceDB Cloud
347
+
348
+ Internally Kiln uses LanceDB. By using LanceDB cloud you'll get the same indexing behaviour as in app.
349
+
350
+ Here's a complete example using LanceDB Cloud:
351
+
352
+ ```py
353
+ from kiln_ai.datamodel import Project
354
+ from kiln_ai.datamodel.rag import RagConfig
355
+ from kiln_ai.datamodel.vector_store import VectorStoreConfig
356
+ from kiln_ai.adapters.vector_store_loaders import VectorStoreLoader
357
+ from kiln_ai.adapters.vector_store.lancedb_adapter import lancedb_construct_from_config
358
+
359
+ # Load configurations
360
+ project = Project.load_from_file("path/to/your/project.kiln")
361
+ rag_config = RagConfig.from_id_and_parent_path("rag-config-id", project.path)
362
+ vector_store_config = VectorStoreConfig.from_id_and_parent_path(
363
+ rag_config.vector_store_config_id, project.path,
364
+ )
365
+
366
+ # Create LanceDB vector store
367
+ lancedb_store = lancedb_construct_from_config(
368
+ vector_store_config=vector_store_config,
369
+ uri="db://my-project",
370
+ api_key="sk_...",
371
+ region="us-east-1",
372
+ table_name="my-documents", # Created automatically
373
+ )
374
+
375
+ # Export and insert your documents
376
+ loader = VectorStoreLoader(project=project, rag_config=rag_config)
377
+ async for batch in loader.iter_llama_index_nodes(batch_size=100):
378
+ await lancedb_store.async_add(batch)
379
+
380
+ print("Documents successfully exported to LanceDB!")
381
+ ```
382
+
383
+ After export, query your data using [LlamaIndex](https://developers.llamaindex.ai/python/framework-api-reference/storage/vector_store/lancedb/) or the [LanceDB client](https://lancedb.github.io/lancedb/).
384
+
385
+ ### Deploy RAG without LlamaIndex
386
+
387
+ While Kiln is designed for deploying to LlamaIndex, you don't need to use it. The `iter_llama_index_nodes` returns a `TextNode` object which includes all the data you need to build a RAG index in any stack: embedding, text, document name, chunk ID, etc.
388
+
313
389
  ## Full API Reference
314
390
 
315
391
  The library can do a lot more than the examples we've shown here.
@@ -1,6 +1,7 @@
1
1
  import asyncio
2
2
  import hashlib
3
3
  import logging
4
+ from functools import cached_property
4
5
  from pathlib import Path
5
6
  from typing import Any, List
6
7
 
@@ -13,23 +14,16 @@ from kiln_ai.adapters.extractors.base_extractor import (
13
14
  ExtractionOutput,
14
15
  )
15
16
  from kiln_ai.adapters.extractors.encoding import to_base64_url
16
- from kiln_ai.adapters.ml_model_list import built_in_models_from_provider
17
+ from kiln_ai.adapters.ml_model_list import (
18
+ KilnModelProvider,
19
+ built_in_models_from_provider,
20
+ )
17
21
  from kiln_ai.adapters.provider_tools import LiteLlmCoreConfig
18
22
  from kiln_ai.datamodel.datamodel_enums import ModelProviderName
19
23
  from kiln_ai.datamodel.extraction import ExtractorConfig, ExtractorType, Kind
20
24
  from kiln_ai.utils.filesystem_cache import FilesystemCache
21
25
  from kiln_ai.utils.litellm import get_litellm_provider_info
22
- from kiln_ai.utils.pdf_utils import split_pdf_into_pages
23
-
24
-
25
- def max_pdf_page_concurrency_for_model(model_name: str) -> int:
26
- # we assume each batch takes ~5s to complete (likely more in practice)
27
- # lowest rate limit is 150 RPM for Tier 1 accounts for gemini-2.5-pro
28
- if model_name == "gemini/gemini-2.5-pro":
29
- return 2
30
- # other models support at least 500 RPM for lowest tier accounts
31
- return 5
32
-
26
+ from kiln_ai.utils.pdf_utils import convert_pdf_to_images, split_pdf_into_pages
33
27
 
34
28
  logger = logging.getLogger(__name__)
35
29
 
@@ -74,11 +68,11 @@ def encode_file_litellm_format(path: Path, mime_type: str) -> dict[str, Any]:
74
68
  "text/markdown",
75
69
  "text/plain",
76
70
  ] or any(mime_type.startswith(m) for m in ["video/", "audio/"]):
77
- pdf_bytes = path.read_bytes()
71
+ file_bytes = path.read_bytes()
78
72
  return {
79
73
  "type": "file",
80
74
  "file": {
81
- "file_data": to_base64_url(mime_type, pdf_bytes),
75
+ "file_data": to_base64_url(mime_type, file_bytes),
82
76
  },
83
77
  }
84
78
 
@@ -101,6 +95,7 @@ class LitellmExtractor(BaseExtractor):
101
95
  extractor_config: ExtractorConfig,
102
96
  litellm_core_config: LiteLlmCoreConfig,
103
97
  filesystem_cache: FilesystemCache | None = None,
98
+ default_max_parallel_requests: int = 5,
104
99
  ):
105
100
  if extractor_config.extractor_type != ExtractorType.LITELLM:
106
101
  raise ValueError(
@@ -133,6 +128,7 @@ class LitellmExtractor(BaseExtractor):
133
128
  }
134
129
 
135
130
  self.litellm_core_config = litellm_core_config
131
+ self.default_max_parallel_requests = default_max_parallel_requests
136
132
 
137
133
  def pdf_page_cache_key(self, pdf_path: Path, page_number: int) -> str:
138
134
  """
@@ -171,13 +167,35 @@ class LitellmExtractor(BaseExtractor):
171
167
  logger.debug(f"Cache miss for page {page_number} of {pdf_path}")
172
168
  return None
173
169
 
170
+ async def convert_pdf_page_to_image_input(
171
+ self, page_path: Path, page_number: int
172
+ ) -> ExtractionInput:
173
+ image_paths = await convert_pdf_to_images(page_path, page_path.parent)
174
+ if len(image_paths) != 1:
175
+ raise ValueError(
176
+ f"Expected 1 image, got {len(image_paths)} for page {page_number} in {page_path}"
177
+ )
178
+ image_path = image_paths[0]
179
+ page_input = ExtractionInput(path=str(image_path), mime_type="image/png")
180
+ return page_input
181
+
174
182
  async def _extract_single_pdf_page(
175
- self, pdf_path: Path, page_path: Path, prompt: str, page_number: int
183
+ self,
184
+ pdf_path: Path,
185
+ page_path: Path,
186
+ prompt: str,
187
+ page_number: int,
176
188
  ) -> str:
177
189
  try:
178
- page_input = ExtractionInput(
179
- path=str(page_path), mime_type="application/pdf"
180
- )
190
+ if self.model_provider.multimodal_requires_pdf_as_image:
191
+ page_input = await self.convert_pdf_page_to_image_input(
192
+ page_path, page_number
193
+ )
194
+ else:
195
+ page_input = ExtractionInput(
196
+ path=str(page_path), mime_type="application/pdf"
197
+ )
198
+
181
199
  completion_kwargs = self._build_completion_kwargs(prompt, page_input)
182
200
  response = await litellm.acompletion(**completion_kwargs)
183
201
  except Exception as e:
@@ -201,11 +219,6 @@ class LitellmExtractor(BaseExtractor):
201
219
  )
202
220
 
203
221
  content = response.choices[0].message.content
204
- if not content:
205
- raise ValueError(
206
- f"No text returned from extraction model when extracting page {page_number} for {page_path}"
207
- )
208
-
209
222
  if self.filesystem_cache is not None:
210
223
  # we don't want to fail the whole extraction just because cache write fails
211
224
  # as that would block the whole flow
@@ -242,13 +255,14 @@ class LitellmExtractor(BaseExtractor):
242
255
  continue
243
256
 
244
257
  extract_page_jobs.append(
245
- self._extract_single_pdf_page(pdf_path, page_path, prompt, i)
258
+ self._extract_single_pdf_page(
259
+ pdf_path, page_path, prompt, page_number=i
260
+ )
246
261
  )
247
262
  page_indices_for_jobs.append(i)
248
263
 
249
264
  if (
250
- len(extract_page_jobs)
251
- >= max_pdf_page_concurrency_for_model(self.litellm_model_slug())
265
+ len(extract_page_jobs) >= self.max_parallel_requests_for_model
252
266
  or i == len(page_paths) - 1
253
267
  ):
254
268
  extraction_results = await asyncio.gather(
@@ -295,7 +309,7 @@ class LitellmExtractor(BaseExtractor):
295
309
  self, prompt: str, extraction_input: ExtractionInput
296
310
  ) -> dict[str, Any]:
297
311
  completion_kwargs = {
298
- "model": self.litellm_model_slug(),
312
+ "model": self.litellm_model_slug,
299
313
  "messages": [
300
314
  {
301
315
  "role": "user",
@@ -367,20 +381,26 @@ class LitellmExtractor(BaseExtractor):
367
381
  content_format=self.extractor_config.output_format,
368
382
  )
369
383
 
370
- def litellm_model_slug(self) -> str:
384
+ @cached_property
385
+ def model_provider(self) -> KilnModelProvider:
371
386
  kiln_model_provider = built_in_models_from_provider(
372
387
  ModelProviderName(self.extractor_config.model_provider_name),
373
388
  self.extractor_config.model_name,
374
389
  )
375
-
376
390
  if kiln_model_provider is None:
377
391
  raise ValueError(
378
392
  f"Model provider {self.extractor_config.model_provider_name} not found in the list of built-in models"
379
393
  )
394
+ return kiln_model_provider
395
+
396
+ @cached_property
397
+ def max_parallel_requests_for_model(self) -> int:
398
+ value = self.model_provider.max_parallel_requests
399
+ return value if value is not None else self.default_max_parallel_requests
380
400
 
381
- # need to translate into LiteLLM model slug
401
+ @cached_property
402
+ def litellm_model_slug(self) -> str:
382
403
  litellm_provider_name = get_litellm_provider_info(
383
- kiln_model_provider,
404
+ self.model_provider,
384
405
  )
385
-
386
406
  return litellm_provider_name.litellm_model_id