kiln-ai 0.20.1__tar.gz → 0.21.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (251) hide show
  1. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/.gitignore +1 -0
  2. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/PKG-INFO +7 -1
  3. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/__init__.py +6 -0
  4. kiln_ai-0.21.0/kiln_ai/adapters/adapter_registry.py +62 -0
  5. kiln_ai-0.21.0/kiln_ai/adapters/chunkers/__init__.py +13 -0
  6. kiln_ai-0.21.0/kiln_ai/adapters/chunkers/base_chunker.py +42 -0
  7. kiln_ai-0.21.0/kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
  8. kiln_ai-0.21.0/kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
  9. kiln_ai-0.21.0/kiln_ai/adapters/chunkers/helpers.py +23 -0
  10. kiln_ai-0.21.0/kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
  11. kiln_ai-0.21.0/kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
  12. kiln_ai-0.21.0/kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
  13. kiln_ai-0.21.0/kiln_ai/adapters/chunkers/test_helpers.py +75 -0
  14. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
  15. kiln_ai-0.21.0/kiln_ai/adapters/embedding/__init__.py +0 -0
  16. kiln_ai-0.21.0/kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
  17. kiln_ai-0.21.0/kiln_ai/adapters/embedding/embedding_registry.py +32 -0
  18. kiln_ai-0.21.0/kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
  19. kiln_ai-0.21.0/kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
  20. kiln_ai-0.21.0/kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
  21. kiln_ai-0.21.0/kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
  22. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/eval/eval_runner.py +6 -2
  23. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/eval/test_base_eval.py +1 -3
  24. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/eval/test_g_eval.py +1 -1
  25. kiln_ai-0.21.0/kiln_ai/adapters/extractors/__init__.py +18 -0
  26. kiln_ai-0.21.0/kiln_ai/adapters/extractors/base_extractor.py +72 -0
  27. kiln_ai-0.21.0/kiln_ai/adapters/extractors/encoding.py +20 -0
  28. kiln_ai-0.21.0/kiln_ai/adapters/extractors/extractor_registry.py +44 -0
  29. kiln_ai-0.21.0/kiln_ai/adapters/extractors/extractor_runner.py +112 -0
  30. kiln_ai-0.21.0/kiln_ai/adapters/extractors/litellm_extractor.py +386 -0
  31. kiln_ai-0.21.0/kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
  32. kiln_ai-0.21.0/kiln_ai/adapters/extractors/test_encoding.py +54 -0
  33. kiln_ai-0.21.0/kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
  34. kiln_ai-0.21.0/kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
  35. kiln_ai-0.21.0/kiln_ai/adapters/extractors/test_litellm_extractor.py +1192 -0
  36. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
  37. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
  38. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
  39. kiln_ai-0.21.0/kiln_ai/adapters/ml_embedding_model_list.py +192 -0
  40. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/ml_model_list.py +382 -4
  41. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/model_adapters/litellm_adapter.py +7 -69
  42. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/model_adapters/test_litellm_adapter.py +1 -1
  43. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/model_adapters/test_structured_output.py +3 -1
  44. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/ollama_tools.py +69 -12
  45. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/provider_tools.py +190 -46
  46. kiln_ai-0.21.0/kiln_ai/adapters/rag/deduplication.py +49 -0
  47. kiln_ai-0.21.0/kiln_ai/adapters/rag/progress.py +252 -0
  48. kiln_ai-0.21.0/kiln_ai/adapters/rag/rag_runners.py +844 -0
  49. kiln_ai-0.21.0/kiln_ai/adapters/rag/test_deduplication.py +195 -0
  50. kiln_ai-0.21.0/kiln_ai/adapters/rag/test_progress.py +785 -0
  51. kiln_ai-0.21.0/kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
  52. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/remote_config.py +80 -8
  53. kiln_ai-0.21.0/kiln_ai/adapters/test_adapter_registry.py +834 -0
  54. kiln_ai-0.21.0/kiln_ai/adapters/test_ml_embedding_model_list.py +429 -0
  55. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/test_ml_model_list.py +212 -0
  56. kiln_ai-0.21.0/kiln_ai/adapters/test_ollama_tools.py +380 -0
  57. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/test_prompt_builders.py +1 -1
  58. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/test_provider_tools.py +199 -8
  59. kiln_ai-0.21.0/kiln_ai/adapters/test_remote_config.py +1049 -0
  60. kiln_ai-0.21.0/kiln_ai/adapters/vector_store/__init__.py +1 -0
  61. kiln_ai-0.21.0/kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
  62. kiln_ai-0.21.0/kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
  63. kiln_ai-0.21.0/kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
  64. kiln_ai-0.21.0/kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
  65. kiln_ai-0.21.0/kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
  66. kiln_ai-0.21.0/kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
  67. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/__init__.py +16 -13
  68. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/basemodel.py +170 -1
  69. kiln_ai-0.21.0/kiln_ai/datamodel/chunk.py +158 -0
  70. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/datamodel_enums.py +27 -0
  71. kiln_ai-0.21.0/kiln_ai/datamodel/embedding.py +64 -0
  72. kiln_ai-0.21.0/kiln_ai/datamodel/extraction.py +303 -0
  73. kiln_ai-0.21.0/kiln_ai/datamodel/project.py +62 -0
  74. kiln_ai-0.21.0/kiln_ai/datamodel/rag.py +79 -0
  75. kiln_ai-0.21.0/kiln_ai/datamodel/test_attachment.py +649 -0
  76. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_basemodel.py +1 -1
  77. kiln_ai-0.21.0/kiln_ai/datamodel/test_chunk_models.py +317 -0
  78. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_dataset_split.py +1 -1
  79. kiln_ai-0.21.0/kiln_ai/datamodel/test_embedding_models.py +448 -0
  80. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_eval_model.py +6 -6
  81. kiln_ai-0.21.0/kiln_ai/datamodel/test_extraction_chunk.py +206 -0
  82. kiln_ai-0.21.0/kiln_ai/datamodel/test_extraction_model.py +470 -0
  83. kiln_ai-0.21.0/kiln_ai/datamodel/test_rag.py +641 -0
  84. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_tool_id.py +81 -0
  85. kiln_ai-0.21.0/kiln_ai/datamodel/test_vector_store.py +320 -0
  86. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/tool_id.py +22 -0
  87. kiln_ai-0.21.0/kiln_ai/datamodel/vector_store.py +141 -0
  88. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/tools/mcp_session_manager.py +4 -1
  89. kiln_ai-0.21.0/kiln_ai/tools/rag_tools.py +157 -0
  90. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/tools/test_mcp_session_manager.py +1 -1
  91. kiln_ai-0.21.0/kiln_ai/tools/test_rag_tools.py +848 -0
  92. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/tools/test_tool_registry.py +91 -2
  93. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/tools/tool_registry.py +21 -0
  94. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/__init__.py +3 -0
  95. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/async_job_runner.py +62 -17
  96. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/config.py +2 -2
  97. kiln_ai-0.21.0/kiln_ai/utils/env.py +15 -0
  98. kiln_ai-0.21.0/kiln_ai/utils/filesystem.py +14 -0
  99. kiln_ai-0.21.0/kiln_ai/utils/filesystem_cache.py +60 -0
  100. kiln_ai-0.21.0/kiln_ai/utils/litellm.py +94 -0
  101. kiln_ai-0.21.0/kiln_ai/utils/lock.py +100 -0
  102. kiln_ai-0.21.0/kiln_ai/utils/mime_type.py +38 -0
  103. kiln_ai-0.21.0/kiln_ai/utils/pdf_utils.py +38 -0
  104. kiln_ai-0.21.0/kiln_ai/utils/test_async_job_runner.py +315 -0
  105. kiln_ai-0.21.0/kiln_ai/utils/test_env.py +142 -0
  106. kiln_ai-0.21.0/kiln_ai/utils/test_filesystem_cache.py +316 -0
  107. kiln_ai-0.21.0/kiln_ai/utils/test_litellm.py +206 -0
  108. kiln_ai-0.21.0/kiln_ai/utils/test_lock.py +185 -0
  109. kiln_ai-0.21.0/kiln_ai/utils/test_mime_type.py +66 -0
  110. kiln_ai-0.21.0/kiln_ai/utils/test_pdf_utils.py +73 -0
  111. kiln_ai-0.21.0/kiln_ai/utils/test_uuid.py +111 -0
  112. kiln_ai-0.21.0/kiln_ai/utils/test_validation.py +524 -0
  113. kiln_ai-0.21.0/kiln_ai/utils/uuid.py +9 -0
  114. kiln_ai-0.21.0/kiln_ai/utils/validation.py +90 -0
  115. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/pyproject.toml +7 -1
  116. kiln_ai-0.20.1/kiln_ai/adapters/adapter_registry.py +0 -245
  117. kiln_ai-0.20.1/kiln_ai/adapters/test_adapter_registry.py +0 -341
  118. kiln_ai-0.20.1/kiln_ai/adapters/test_ollama_tools.py +0 -41
  119. kiln_ai-0.20.1/kiln_ai/adapters/test_remote_config.py +0 -554
  120. kiln_ai-0.20.1/kiln_ai/datamodel/project.py +0 -30
  121. kiln_ai-0.20.1/kiln_ai/utils/test_async_job_runner.py +0 -199
  122. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/.python-version +0 -0
  123. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/LICENSE.txt +0 -0
  124. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/README.md +0 -0
  125. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/index.html +0 -0
  126. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/data_gen/data_gen_task.html +0 -0
  127. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/data_gen.html +0 -0
  128. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/eval/base_eval.html +0 -0
  129. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/eval/eval_runner.html +0 -0
  130. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/eval/g_eval.html +0 -0
  131. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/eval/registry.html +0 -0
  132. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/eval.html +0 -0
  133. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/base_finetune.html +0 -0
  134. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/dataset_formatter.html +0 -0
  135. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/finetune_registry.html +0 -0
  136. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune/openai_finetune.html +0 -0
  137. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/fine_tune.html +0 -0
  138. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/ml_model_list.html +0 -0
  139. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/base_adapter.html +0 -0
  140. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters/litellm_adapter.html +0 -0
  141. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/model_adapters.html +0 -0
  142. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/prompt_builders.html +0 -0
  143. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/repair/repair_task.html +0 -0
  144. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters/repair.html +0 -0
  145. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/adapters.html +0 -0
  146. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/datamodel/dataset_split.html +0 -0
  147. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/datamodel/eval.html +0 -0
  148. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/datamodel/strict_mode.html +0 -0
  149. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/datamodel.html +0 -0
  150. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/utils/config.html +0 -0
  151. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/utils/formatting.html +0 -0
  152. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai/utils.html +0 -0
  153. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/kiln_ai.html +0 -0
  154. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/docs/kiln_core_docs/search.js +0 -0
  155. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/__init__.py +0 -0
  156. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/chat/__init__.py +0 -0
  157. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/chat/chat_formatter.py +0 -0
  158. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/chat/test_chat_formatter.py +0 -0
  159. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/data_gen/__init__.py +0 -0
  160. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/data_gen/data_gen_prompts.py +0 -0
  161. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/data_gen/data_gen_task.py +0 -0
  162. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/docker_model_runner_tools.py +0 -0
  163. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/eval/__init__.py +0 -0
  164. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/eval/base_eval.py +0 -0
  165. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/eval/g_eval.py +0 -0
  166. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/eval/registry.py +0 -0
  167. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/eval/test_eval_runner.py +0 -0
  168. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/eval/test_g_eval_data.py +0 -0
  169. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/__init__.py +0 -0
  170. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/base_finetune.py +0 -0
  171. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/dataset_formatter.py +0 -0
  172. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/finetune_registry.py +0 -0
  173. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/fireworks_finetune.py +0 -0
  174. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/openai_finetune.py +0 -0
  175. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/test_base_finetune.py +0 -0
  176. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/test_openai_finetune.py +0 -0
  177. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/test_vertex_finetune.py +0 -0
  178. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/together_finetune.py +0 -0
  179. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/fine_tune/vertex_finetune.py +0 -0
  180. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/model_adapters/__init__.py +0 -0
  181. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/model_adapters/base_adapter.py +0 -0
  182. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/model_adapters/litellm_config.py +0 -0
  183. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/model_adapters/test_base_adapter.py +0 -0
  184. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +0 -0
  185. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +0 -0
  186. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/parsers/__init__.py +0 -0
  187. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/parsers/base_parser.py +0 -0
  188. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/parsers/json_parser.py +0 -0
  189. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/parsers/parser_registry.py +0 -0
  190. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/parsers/r1_parser.py +0 -0
  191. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/parsers/request_formatters.py +0 -0
  192. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/parsers/test_json_parser.py +0 -0
  193. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/parsers/test_parser_registry.py +0 -0
  194. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/parsers/test_r1_parser.py +0 -0
  195. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/parsers/test_request_formatters.py +0 -0
  196. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/prompt_builders.py +0 -0
  197. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/repair/__init__.py +0 -0
  198. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/repair/repair_task.py +0 -0
  199. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/repair/test_repair_task.py +0 -0
  200. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/run_output.py +0 -0
  201. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/test_docker_model_runner_tools.py +0 -0
  202. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/adapters/test_prompt_adaptors.py +0 -0
  203. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/dataset_filters.py +0 -0
  204. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/dataset_split.py +0 -0
  205. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/eval.py +0 -0
  206. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/external_tool_server.py +0 -0
  207. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/finetune.py +0 -0
  208. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/json_schema.py +0 -0
  209. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/model_cache.py +0 -0
  210. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/prompt.py +0 -0
  211. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/prompt_id.py +0 -0
  212. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/registry.py +0 -0
  213. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/run_config.py +0 -0
  214. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/strict_mode.py +0 -0
  215. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/task.py +0 -0
  216. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/task_output.py +0 -0
  217. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/task_run.py +0 -0
  218. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_dataset_filters.py +0 -0
  219. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_datasource.py +0 -0
  220. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_example_models.py +0 -0
  221. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_external_tool_server.py +0 -0
  222. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_json_schema.py +0 -0
  223. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_model_cache.py +0 -0
  224. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_model_perf.py +0 -0
  225. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_models.py +0 -0
  226. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_nested_save.py +0 -0
  227. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_output_rating.py +0 -0
  228. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_prompt_id.py +0 -0
  229. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_registry.py +0 -0
  230. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/datamodel/test_task.py +0 -0
  231. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/tools/__init__.py +0 -0
  232. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/tools/base_tool.py +0 -0
  233. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/tools/built_in_tools/__init__.py +0 -0
  234. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/tools/built_in_tools/math_tools.py +0 -0
  235. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/tools/built_in_tools/test_math_tools.py +0 -0
  236. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/tools/mcp_server_tool.py +0 -0
  237. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/tools/test_base_tools.py +0 -0
  238. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/tools/test_mcp_server_tool.py +0 -0
  239. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/dataset_import.py +0 -0
  240. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/exhaustive_error.py +0 -0
  241. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/formatting.py +0 -0
  242. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/logging.py +0 -0
  243. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/name_generator.py +0 -0
  244. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/open_ai_types.py +0 -0
  245. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/project_utils.py +0 -0
  246. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/test_config.py +0 -0
  247. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/test_dataset_import.py +0 -0
  248. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/test_name_geneator.py +0 -0
  249. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/kiln_ai/utils/test_open_ai_types.py +0 -0
  250. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/setup.cfg +0 -0
  251. {kiln_ai-0.20.1 → kiln_ai-0.21.0}/uv.lock +0 -0
@@ -8,6 +8,7 @@ __pycache__/
8
8
  .coverage
9
9
  **/.venv
10
10
  **/*.egg-info
11
+ node_modules/
11
12
 
12
13
  libs/core/docs
13
14
  libs/core/build
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kiln-ai
3
- Version: 0.20.1
3
+ Version: 0.21.0
4
4
  Summary: Kiln AI
5
5
  Project-URL: Homepage, https://kiln.tech
6
6
  Project-URL: Repository, https://github.com/Kiln-AI/kiln
@@ -15,15 +15,21 @@ Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
16
  Classifier: Programming Language :: Python :: 3.13
17
17
  Requires-Python: >=3.10
18
+ Requires-Dist: anyio>=4.10.0
18
19
  Requires-Dist: boto3>=1.37.10
19
20
  Requires-Dist: coverage>=7.6.4
20
21
  Requires-Dist: exceptiongroup>=1.0.0; python_version < '3.11'
21
22
  Requires-Dist: google-cloud-aiplatform>=1.84.0
23
+ Requires-Dist: google-genai>=1.21.1
22
24
  Requires-Dist: jsonschema>=4.23.0
25
+ Requires-Dist: lancedb>=0.24.2
23
26
  Requires-Dist: litellm>=1.72.6
27
+ Requires-Dist: llama-index-vector-stores-lancedb>=0.3.3
28
+ Requires-Dist: llama-index>=0.13.3
24
29
  Requires-Dist: openai>=1.53.0
25
30
  Requires-Dist: pdoc>=15.0.0
26
31
  Requires-Dist: pydantic>=2.9.2
32
+ Requires-Dist: pypdf>=6.0.0
27
33
  Requires-Dist: pytest-benchmark>=5.1.0
28
34
  Requires-Dist: pytest-cov>=6.0.0
29
35
  Requires-Dist: pyyaml>=6.0.2
@@ -18,9 +18,12 @@ The eval submodule contains the code for evaluating the performance of a model.
18
18
 
19
19
  from . import (
20
20
  chat,
21
+ chunkers,
21
22
  data_gen,
22
23
  eval,
24
+ extractors,
23
25
  fine_tune,
26
+ ml_embedding_model_list,
24
27
  ml_model_list,
25
28
  model_adapters,
26
29
  prompt_builders,
@@ -29,9 +32,12 @@ from . import (
29
32
 
30
33
  __all__ = [
31
34
  "chat",
35
+ "chunkers",
32
36
  "data_gen",
33
37
  "eval",
38
+ "extractors",
34
39
  "fine_tune",
40
+ "ml_embedding_model_list",
35
41
  "ml_model_list",
36
42
  "model_adapters",
37
43
  "prompt_builders",
@@ -0,0 +1,62 @@
1
+ from kiln_ai import datamodel
2
+ from kiln_ai.adapters.ml_model_list import ModelProviderName
3
+ from kiln_ai.adapters.model_adapters.base_adapter import AdapterConfig, BaseAdapter
4
+ from kiln_ai.adapters.model_adapters.litellm_adapter import (
5
+ LiteLlmAdapter,
6
+ LiteLlmConfig,
7
+ )
8
+ from kiln_ai.adapters.provider_tools import (
9
+ core_provider,
10
+ lite_llm_core_config_for_provider,
11
+ )
12
+ from kiln_ai.datamodel.task import RunConfigProperties
13
+
14
+
15
+ def litellm_core_provider_config(
16
+ run_config_properties: RunConfigProperties,
17
+ ) -> LiteLlmConfig:
18
+ # For things like the fine-tune provider, we want to run the underlying provider (e.g. openai)
19
+ core_provider_name = core_provider(
20
+ run_config_properties.model_name, run_config_properties.model_provider_name
21
+ )
22
+
23
+ # For OpenAI compatible providers, we want to retrieve the underlying provider and update the run config properties to match
24
+ openai_compatible_provider_name = None
25
+ if run_config_properties.model_provider_name == ModelProviderName.openai_compatible:
26
+ model_id = run_config_properties.model_name
27
+ try:
28
+ openai_compatible_provider_name, model_id = model_id.split("::")
29
+ except Exception:
30
+ raise ValueError(f"Invalid openai compatible model ID: {model_id}")
31
+
32
+ # Update a copy of the run config properties to use the openai compatible provider
33
+ updated_run_config_properties = run_config_properties.model_copy(deep=True)
34
+ updated_run_config_properties.model_name = model_id
35
+ run_config_properties = updated_run_config_properties
36
+
37
+ config = lite_llm_core_config_for_provider(
38
+ core_provider_name, openai_compatible_provider_name
39
+ )
40
+ if config is None:
41
+ raise ValueError(
42
+ "Fine tune or custom openai compatible provider is not a core provider. The underlying provider should be used when requesting the adapter litellm config instead."
43
+ )
44
+
45
+ return LiteLlmConfig(
46
+ run_config_properties=run_config_properties,
47
+ base_url=config.base_url,
48
+ default_headers=config.default_headers,
49
+ additional_body_options=config.additional_body_options or {},
50
+ )
51
+
52
+
53
+ def adapter_for_task(
54
+ kiln_task: datamodel.Task,
55
+ run_config_properties: RunConfigProperties,
56
+ base_adapter_config: AdapterConfig | None = None,
57
+ ) -> BaseAdapter:
58
+ return LiteLlmAdapter(
59
+ kiln_task=kiln_task,
60
+ config=litellm_core_provider_config(run_config_properties),
61
+ base_adapter_config=base_adapter_config,
62
+ )
@@ -0,0 +1,13 @@
1
+ """
2
+ Chunkers for processing different document types.
3
+
4
+ This package provides a framework for chunking text into smaller chunks.
5
+ """
6
+
7
+ from . import base_chunker, chunker_registry, fixed_window_chunker
8
+
9
+ __all__ = [
10
+ "base_chunker",
11
+ "chunker_registry",
12
+ "fixed_window_chunker",
13
+ ]
@@ -0,0 +1,42 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+
4
+ from pydantic import BaseModel, Field
5
+
6
+ from kiln_ai.adapters.chunkers.helpers import clean_up_text
7
+ from kiln_ai.datamodel.chunk import ChunkerConfig
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class TextChunk(BaseModel):
13
+ text: str = Field(description="The text of the chunk.")
14
+
15
+
16
+ class ChunkingResult(BaseModel):
17
+ chunks: list[TextChunk] = Field(description="The chunks of the text.")
18
+
19
+
20
+ class BaseChunker(ABC):
21
+ """
22
+ Base class for all chunkers.
23
+
24
+ Should be subclassed by each chunker.
25
+ """
26
+
27
+ def __init__(self, chunker_config: ChunkerConfig):
28
+ self.chunker_config = chunker_config
29
+
30
+ async def chunk(self, text: str) -> ChunkingResult:
31
+ if not text:
32
+ return ChunkingResult(chunks=[])
33
+
34
+ sanitized_text = clean_up_text(text)
35
+ if not sanitized_text:
36
+ return ChunkingResult(chunks=[])
37
+
38
+ return await self._chunk(sanitized_text)
39
+
40
+ @abstractmethod
41
+ async def _chunk(self, text: str) -> ChunkingResult:
42
+ pass
@@ -0,0 +1,16 @@
1
+ from kiln_ai.adapters.chunkers.base_chunker import BaseChunker
2
+ from kiln_ai.adapters.chunkers.fixed_window_chunker import FixedWindowChunker
3
+ from kiln_ai.datamodel.chunk import ChunkerConfig, ChunkerType
4
+ from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
5
+
6
+
7
+ def chunker_adapter_from_type(
8
+ chunker_type: ChunkerType,
9
+ chunker_config: ChunkerConfig,
10
+ ) -> BaseChunker:
11
+ match chunker_type:
12
+ case ChunkerType.FIXED_WINDOW:
13
+ return FixedWindowChunker(chunker_config)
14
+ case _:
15
+ # type checking will catch missing cases
16
+ raise_exhaustive_enum_error(chunker_type)
@@ -0,0 +1,39 @@
1
+ from typing import List
2
+
3
+ from llama_index.core.text_splitter import SentenceSplitter
4
+
5
+ from kiln_ai.adapters.chunkers.base_chunker import (
6
+ BaseChunker,
7
+ ChunkingResult,
8
+ TextChunk,
9
+ )
10
+ from kiln_ai.datamodel.chunk import ChunkerConfig, ChunkerType
11
+
12
+
13
+ class FixedWindowChunker(BaseChunker):
14
+ def __init__(self, chunker_config: ChunkerConfig):
15
+ if chunker_config.chunker_type != ChunkerType.FIXED_WINDOW:
16
+ raise ValueError("Chunker type must be FIXED_WINDOW")
17
+
18
+ chunk_size = chunker_config.chunk_size()
19
+ if chunk_size is None:
20
+ raise ValueError("Chunk size must be set")
21
+
22
+ chunk_overlap = chunker_config.chunk_overlap()
23
+ if chunk_overlap is None:
24
+ raise ValueError("Chunk overlap must be set")
25
+
26
+ super().__init__(chunker_config)
27
+ self.splitter = SentenceSplitter(
28
+ chunk_size=chunk_size,
29
+ chunk_overlap=chunk_overlap,
30
+ )
31
+
32
+ async def _chunk(self, text: str) -> ChunkingResult:
33
+ sentences = self.splitter.split_text(text)
34
+
35
+ chunks: List[TextChunk] = []
36
+ for sentence in sentences:
37
+ chunks.append(TextChunk(text=sentence))
38
+
39
+ return ChunkingResult(chunks=chunks)
@@ -0,0 +1,23 @@
1
+ import re
2
+
3
+
4
+ def clean_up_text(text: str) -> str:
5
+ """
6
+ Clean up text by limiting consecutive newlines and consecutive whitespace. Models sometimes send a lot of those.
7
+ It seems to happen more when the transcription is done at low temperature.
8
+
9
+ - Replaces 6+ consecutive newlines with exactly 6 newlines
10
+ - Replaces 50+ consecutive spaces with exactly 50 spaces
11
+ - Leaves 1-5 consecutive newlines unchanged
12
+ - Leaves 1-49 consecutive spaces unchanged
13
+ """
14
+ max_consecutive_newlines = 6
15
+ max_consecutive_whitespace = 50
16
+
17
+ # Replace 6+ consecutive newlines with exactly 6 newlines
18
+ text = re.sub(r"\n{6,}", "\n" * max_consecutive_newlines, text)
19
+
20
+ # Replace 50+ consecutive spaces with exactly 50 spaces
21
+ text = re.sub(r" {50,}", " " * max_consecutive_whitespace, text)
22
+
23
+ return text.strip()
@@ -0,0 +1,63 @@
1
+ from unittest.mock import patch
2
+
3
+ import pytest
4
+
5
+ from kiln_ai.adapters.chunkers.base_chunker import (
6
+ BaseChunker,
7
+ ChunkingResult,
8
+ TextChunk,
9
+ )
10
+ from kiln_ai.adapters.chunkers.helpers import clean_up_text
11
+ from kiln_ai.datamodel.chunk import ChunkerConfig, ChunkerType
12
+
13
+
14
+ @pytest.fixture
15
+ def config() -> ChunkerConfig:
16
+ return ChunkerConfig(
17
+ name="test-chunker",
18
+ chunker_type=ChunkerType.FIXED_WINDOW,
19
+ properties={"chunk_size": 100, "chunk_overlap": 10},
20
+ )
21
+
22
+
23
+ class WhitespaceChunker(BaseChunker):
24
+ async def _chunk(self, text: str) -> ChunkingResult:
25
+ return ChunkingResult(chunks=[TextChunk(text=chunk) for chunk in text.split()])
26
+
27
+
28
+ @pytest.fixture
29
+ def chunker(config: ChunkerConfig) -> WhitespaceChunker:
30
+ return WhitespaceChunker(config)
31
+
32
+
33
+ async def test_base_chunker_chunk_empty_text(chunker: WhitespaceChunker):
34
+ assert await chunker.chunk("") == ChunkingResult(chunks=[])
35
+
36
+
37
+ async def test_base_chunker_concrete_chunker(chunker: WhitespaceChunker):
38
+ output = await chunker.chunk("Hello, world!")
39
+ assert len(output.chunks) == 2
40
+
41
+
42
+ async def test_base_chunker_calls_clean_up_text(chunker: WhitespaceChunker):
43
+ with patch(
44
+ "kiln_ai.adapters.chunkers.base_chunker.clean_up_text"
45
+ ) as mock_clean_up_text:
46
+ mock_clean_up_text.side_effect = clean_up_text
47
+ await chunker.chunk("Hello, world!")
48
+ mock_clean_up_text.assert_called_once_with("Hello, world!")
49
+
50
+
51
+ async def test_base_chunker_empty_text(chunker: WhitespaceChunker):
52
+ chunks = await chunker.chunk("")
53
+ assert chunks == ChunkingResult(chunks=[])
54
+
55
+
56
+ async def test_base_chunker_empty_text_after_clean_up(chunker: WhitespaceChunker):
57
+ with patch(
58
+ "kiln_ai.adapters.chunkers.base_chunker.clean_up_text"
59
+ ) as mock_clean_up_text:
60
+ mock_clean_up_text.side_effect = clean_up_text
61
+ chunks = await chunker.chunk("\n\n ")
62
+ mock_clean_up_text.assert_called_once_with("\n\n ")
63
+ assert chunks == ChunkingResult(chunks=[])
@@ -0,0 +1,28 @@
1
+ import pytest
2
+
3
+ from kiln_ai.adapters.chunkers.chunker_registry import chunker_adapter_from_type
4
+ from kiln_ai.adapters.chunkers.fixed_window_chunker import FixedWindowChunker
5
+ from kiln_ai.datamodel.chunk import ChunkerConfig, ChunkerType
6
+
7
+
8
+ def test_chunker_adapter_from_type():
9
+ chunker = chunker_adapter_from_type(
10
+ ChunkerType.FIXED_WINDOW,
11
+ ChunkerConfig(
12
+ name="test-chunker",
13
+ chunker_type=ChunkerType.FIXED_WINDOW,
14
+ properties={
15
+ # do not use these values in production!
16
+ "chunk_size": 5555,
17
+ "chunk_overlap": 1111,
18
+ },
19
+ ),
20
+ )
21
+ assert isinstance(chunker, FixedWindowChunker)
22
+ assert chunker.chunker_config.chunk_size() == 5555
23
+ assert chunker.chunker_config.chunk_overlap() == 1111
24
+
25
+
26
+ def test_chunker_adapter_from_type_invalid():
27
+ with pytest.raises(ValueError):
28
+ chunker_adapter_from_type("invalid-type", {})