kiln-ai 0.19.0__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (158) hide show
  1. kiln_ai/adapters/__init__.py +8 -2
  2. kiln_ai/adapters/adapter_registry.py +43 -208
  3. kiln_ai/adapters/chat/chat_formatter.py +8 -12
  4. kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
  5. kiln_ai/adapters/chunkers/__init__.py +13 -0
  6. kiln_ai/adapters/chunkers/base_chunker.py +42 -0
  7. kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
  8. kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
  9. kiln_ai/adapters/chunkers/helpers.py +23 -0
  10. kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
  11. kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
  12. kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
  13. kiln_ai/adapters/chunkers/test_helpers.py +75 -0
  14. kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
  15. kiln_ai/adapters/docker_model_runner_tools.py +119 -0
  16. kiln_ai/adapters/embedding/__init__.py +0 -0
  17. kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
  18. kiln_ai/adapters/embedding/embedding_registry.py +32 -0
  19. kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
  20. kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
  21. kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
  22. kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
  23. kiln_ai/adapters/eval/base_eval.py +2 -2
  24. kiln_ai/adapters/eval/eval_runner.py +9 -3
  25. kiln_ai/adapters/eval/g_eval.py +2 -2
  26. kiln_ai/adapters/eval/test_base_eval.py +2 -4
  27. kiln_ai/adapters/eval/test_g_eval.py +4 -5
  28. kiln_ai/adapters/extractors/__init__.py +18 -0
  29. kiln_ai/adapters/extractors/base_extractor.py +72 -0
  30. kiln_ai/adapters/extractors/encoding.py +20 -0
  31. kiln_ai/adapters/extractors/extractor_registry.py +44 -0
  32. kiln_ai/adapters/extractors/extractor_runner.py +112 -0
  33. kiln_ai/adapters/extractors/litellm_extractor.py +386 -0
  34. kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
  35. kiln_ai/adapters/extractors/test_encoding.py +54 -0
  36. kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
  37. kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
  38. kiln_ai/adapters/extractors/test_litellm_extractor.py +1192 -0
  39. kiln_ai/adapters/fine_tune/__init__.py +1 -1
  40. kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
  41. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
  42. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
  43. kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
  44. kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
  45. kiln_ai/adapters/ml_embedding_model_list.py +192 -0
  46. kiln_ai/adapters/ml_model_list.py +761 -37
  47. kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
  48. kiln_ai/adapters/model_adapters/litellm_adapter.py +380 -138
  49. kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
  50. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -2
  51. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
  52. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
  53. kiln_ai/adapters/model_adapters/test_structured_output.py +113 -5
  54. kiln_ai/adapters/ollama_tools.py +69 -12
  55. kiln_ai/adapters/parsers/__init__.py +1 -1
  56. kiln_ai/adapters/provider_tools.py +205 -47
  57. kiln_ai/adapters/rag/deduplication.py +49 -0
  58. kiln_ai/adapters/rag/progress.py +252 -0
  59. kiln_ai/adapters/rag/rag_runners.py +844 -0
  60. kiln_ai/adapters/rag/test_deduplication.py +195 -0
  61. kiln_ai/adapters/rag/test_progress.py +785 -0
  62. kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
  63. kiln_ai/adapters/remote_config.py +80 -8
  64. kiln_ai/adapters/repair/test_repair_task.py +12 -9
  65. kiln_ai/adapters/run_output.py +3 -0
  66. kiln_ai/adapters/test_adapter_registry.py +657 -85
  67. kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
  68. kiln_ai/adapters/test_ml_embedding_model_list.py +429 -0
  69. kiln_ai/adapters/test_ml_model_list.py +251 -1
  70. kiln_ai/adapters/test_ollama_tools.py +340 -1
  71. kiln_ai/adapters/test_prompt_adaptors.py +13 -6
  72. kiln_ai/adapters/test_prompt_builders.py +1 -1
  73. kiln_ai/adapters/test_provider_tools.py +254 -8
  74. kiln_ai/adapters/test_remote_config.py +651 -58
  75. kiln_ai/adapters/vector_store/__init__.py +1 -0
  76. kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
  77. kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
  78. kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
  79. kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
  80. kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
  81. kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
  82. kiln_ai/datamodel/__init__.py +39 -34
  83. kiln_ai/datamodel/basemodel.py +170 -1
  84. kiln_ai/datamodel/chunk.py +158 -0
  85. kiln_ai/datamodel/datamodel_enums.py +28 -0
  86. kiln_ai/datamodel/embedding.py +64 -0
  87. kiln_ai/datamodel/eval.py +1 -1
  88. kiln_ai/datamodel/external_tool_server.py +298 -0
  89. kiln_ai/datamodel/extraction.py +303 -0
  90. kiln_ai/datamodel/json_schema.py +25 -10
  91. kiln_ai/datamodel/project.py +40 -1
  92. kiln_ai/datamodel/rag.py +79 -0
  93. kiln_ai/datamodel/registry.py +0 -15
  94. kiln_ai/datamodel/run_config.py +62 -0
  95. kiln_ai/datamodel/task.py +2 -77
  96. kiln_ai/datamodel/task_output.py +6 -1
  97. kiln_ai/datamodel/task_run.py +41 -0
  98. kiln_ai/datamodel/test_attachment.py +649 -0
  99. kiln_ai/datamodel/test_basemodel.py +4 -4
  100. kiln_ai/datamodel/test_chunk_models.py +317 -0
  101. kiln_ai/datamodel/test_dataset_split.py +1 -1
  102. kiln_ai/datamodel/test_embedding_models.py +448 -0
  103. kiln_ai/datamodel/test_eval_model.py +6 -6
  104. kiln_ai/datamodel/test_example_models.py +175 -0
  105. kiln_ai/datamodel/test_external_tool_server.py +691 -0
  106. kiln_ai/datamodel/test_extraction_chunk.py +206 -0
  107. kiln_ai/datamodel/test_extraction_model.py +470 -0
  108. kiln_ai/datamodel/test_rag.py +641 -0
  109. kiln_ai/datamodel/test_registry.py +8 -3
  110. kiln_ai/datamodel/test_task.py +15 -47
  111. kiln_ai/datamodel/test_tool_id.py +320 -0
  112. kiln_ai/datamodel/test_vector_store.py +320 -0
  113. kiln_ai/datamodel/tool_id.py +105 -0
  114. kiln_ai/datamodel/vector_store.py +141 -0
  115. kiln_ai/tools/__init__.py +8 -0
  116. kiln_ai/tools/base_tool.py +82 -0
  117. kiln_ai/tools/built_in_tools/__init__.py +13 -0
  118. kiln_ai/tools/built_in_tools/math_tools.py +124 -0
  119. kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
  120. kiln_ai/tools/mcp_server_tool.py +95 -0
  121. kiln_ai/tools/mcp_session_manager.py +246 -0
  122. kiln_ai/tools/rag_tools.py +157 -0
  123. kiln_ai/tools/test_base_tools.py +199 -0
  124. kiln_ai/tools/test_mcp_server_tool.py +457 -0
  125. kiln_ai/tools/test_mcp_session_manager.py +1585 -0
  126. kiln_ai/tools/test_rag_tools.py +848 -0
  127. kiln_ai/tools/test_tool_registry.py +562 -0
  128. kiln_ai/tools/tool_registry.py +85 -0
  129. kiln_ai/utils/__init__.py +3 -0
  130. kiln_ai/utils/async_job_runner.py +62 -17
  131. kiln_ai/utils/config.py +24 -2
  132. kiln_ai/utils/env.py +15 -0
  133. kiln_ai/utils/filesystem.py +14 -0
  134. kiln_ai/utils/filesystem_cache.py +60 -0
  135. kiln_ai/utils/litellm.py +94 -0
  136. kiln_ai/utils/lock.py +100 -0
  137. kiln_ai/utils/mime_type.py +38 -0
  138. kiln_ai/utils/open_ai_types.py +94 -0
  139. kiln_ai/utils/pdf_utils.py +38 -0
  140. kiln_ai/utils/project_utils.py +17 -0
  141. kiln_ai/utils/test_async_job_runner.py +151 -35
  142. kiln_ai/utils/test_config.py +138 -1
  143. kiln_ai/utils/test_env.py +142 -0
  144. kiln_ai/utils/test_filesystem_cache.py +316 -0
  145. kiln_ai/utils/test_litellm.py +206 -0
  146. kiln_ai/utils/test_lock.py +185 -0
  147. kiln_ai/utils/test_mime_type.py +66 -0
  148. kiln_ai/utils/test_open_ai_types.py +131 -0
  149. kiln_ai/utils/test_pdf_utils.py +73 -0
  150. kiln_ai/utils/test_uuid.py +111 -0
  151. kiln_ai/utils/test_validation.py +524 -0
  152. kiln_ai/utils/uuid.py +9 -0
  153. kiln_ai/utils/validation.py +90 -0
  154. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/METADATA +12 -5
  155. kiln_ai-0.21.0.dist-info/RECORD +211 -0
  156. kiln_ai-0.19.0.dist-info/RECORD +0 -115
  157. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/WHEEL +0 -0
  158. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,192 @@
1
+ from enum import Enum
2
+ from typing import List
3
+
4
+ from pydantic import BaseModel, Field
5
+
6
+ from kiln_ai.datamodel.datamodel_enums import ModelProviderName
7
+
8
+
9
+ class KilnEmbeddingModelFamily(str, Enum):
10
+ """
11
+ Enumeration of supported embedding model families.
12
+ """
13
+
14
+ # for bespoke proprietary models, the family tends to be the same
15
+ # as provider name, but it does not have to be
16
+ openai = "openai"
17
+ gemini = "gemini"
18
+ gemma = "gemma"
19
+ nomic = "nomic"
20
+
21
+
22
+ class EmbeddingModelName(str, Enum):
23
+ """
24
+ Enumeration of specific model versions supported by the system.
25
+ """
26
+
27
+ # Embedding model names are often generic (e.g., "text-embedding"),
28
+ # so we prefix them with the provider name (e.g., "openai_") to ensure
29
+ # uniqueness across providers now and in the future
30
+ openai_text_embedding_3_small = "openai_text_embedding_3_small"
31
+ openai_text_embedding_3_large = "openai_text_embedding_3_large"
32
+ gemini_text_embedding_004 = "gemini_text_embedding_004"
33
+ gemini_embedding_001 = "gemini_embedding_001"
34
+ embedding_gemma_300m = "embedding_gemma_300m"
35
+ nomic_text_embedding_v1_5 = "nomic_text_embedding_v1_5"
36
+
37
+
38
+ class KilnEmbeddingModelProvider(BaseModel):
39
+ name: ModelProviderName
40
+
41
+ model_id: str = Field(
42
+ description="The model ID for the embedding model. This is the ID used to identify the model in the provider's API.",
43
+ )
44
+
45
+ max_input_tokens: int | None = Field(
46
+ default=None,
47
+ description="The maximum number of tokens that can be input to the model.",
48
+ )
49
+
50
+ n_dimensions: int = Field(
51
+ description="The number of dimensions in the output embedding.",
52
+ )
53
+
54
+ supports_custom_dimensions: bool = Field(
55
+ default=False,
56
+ description="Whether the model supports setting a custom output dimension. If true, the user can set the output dimension in the UI.",
57
+ )
58
+
59
+ suggested_for_chunk_embedding: bool = Field(
60
+ default=False,
61
+ description="Whether the model is particularly good for chunk embedding.",
62
+ )
63
+
64
+ ollama_model_aliases: List[str] | None = None
65
+
66
+
67
+ class KilnEmbeddingModel(BaseModel):
68
+ """
69
+ Configuration for a specific embedding model.
70
+ """
71
+
72
+ family: str
73
+ name: str
74
+ friendly_name: str
75
+ providers: List[KilnEmbeddingModelProvider]
76
+
77
+
78
+ built_in_embedding_models: List[KilnEmbeddingModel] = [
79
+ # openai
80
+ KilnEmbeddingModel(
81
+ family=KilnEmbeddingModelFamily.openai,
82
+ name=EmbeddingModelName.openai_text_embedding_3_small,
83
+ friendly_name="Text Embedding 3 Small",
84
+ providers=[
85
+ KilnEmbeddingModelProvider(
86
+ name=ModelProviderName.openai,
87
+ model_id="text-embedding-3-small",
88
+ n_dimensions=1536,
89
+ max_input_tokens=8192,
90
+ supports_custom_dimensions=True,
91
+ ),
92
+ ],
93
+ ),
94
+ KilnEmbeddingModel(
95
+ family=KilnEmbeddingModelFamily.openai,
96
+ name=EmbeddingModelName.openai_text_embedding_3_large,
97
+ friendly_name="Text Embedding 3 Large",
98
+ providers=[
99
+ KilnEmbeddingModelProvider(
100
+ name=ModelProviderName.openai,
101
+ model_id="text-embedding-3-large",
102
+ n_dimensions=3072,
103
+ max_input_tokens=8192,
104
+ supports_custom_dimensions=True,
105
+ suggested_for_chunk_embedding=True,
106
+ ),
107
+ ],
108
+ ),
109
+ # gemini
110
+ KilnEmbeddingModel(
111
+ family=KilnEmbeddingModelFamily.gemini,
112
+ name=EmbeddingModelName.gemini_text_embedding_004,
113
+ friendly_name="Text Embedding 004",
114
+ providers=[
115
+ KilnEmbeddingModelProvider(
116
+ name=ModelProviderName.gemini_api,
117
+ model_id="text-embedding-004",
118
+ n_dimensions=768,
119
+ max_input_tokens=2048,
120
+ ),
121
+ ],
122
+ ),
123
+ KilnEmbeddingModel(
124
+ family=KilnEmbeddingModelFamily.gemini,
125
+ name=EmbeddingModelName.gemini_embedding_001,
126
+ friendly_name="Gemini Embedding 001",
127
+ providers=[
128
+ KilnEmbeddingModelProvider(
129
+ name=ModelProviderName.gemini_api,
130
+ model_id="gemini-embedding-001",
131
+ n_dimensions=3072,
132
+ max_input_tokens=2048,
133
+ supports_custom_dimensions=True,
134
+ suggested_for_chunk_embedding=True,
135
+ ),
136
+ ],
137
+ ),
138
+ # gemma
139
+ KilnEmbeddingModel(
140
+ family=KilnEmbeddingModelFamily.gemma,
141
+ name=EmbeddingModelName.embedding_gemma_300m,
142
+ friendly_name="Embedding Gemma 300m",
143
+ providers=[
144
+ KilnEmbeddingModelProvider(
145
+ name=ModelProviderName.ollama,
146
+ model_id="embeddinggemma:300m",
147
+ n_dimensions=768,
148
+ max_input_tokens=2048,
149
+ # the model itself does support custom dimensions, but
150
+ # not sure if ollama supports it
151
+ supports_custom_dimensions=False,
152
+ ollama_model_aliases=["embeddinggemma"],
153
+ ),
154
+ ],
155
+ ),
156
+ # nomic
157
+ KilnEmbeddingModel(
158
+ family=KilnEmbeddingModelFamily.nomic,
159
+ name=EmbeddingModelName.nomic_text_embedding_v1_5,
160
+ friendly_name="Nomic Embed Text v1.5",
161
+ providers=[
162
+ KilnEmbeddingModelProvider(
163
+ name=ModelProviderName.ollama,
164
+ model_id="nomic-embed-text:v1.5",
165
+ n_dimensions=768,
166
+ max_input_tokens=2048,
167
+ # the model itself does support custom dimensions, but
168
+ # not sure if ollama supports it
169
+ supports_custom_dimensions=False,
170
+ ollama_model_aliases=["nomic-embed-text"],
171
+ ),
172
+ ],
173
+ ),
174
+ ]
175
+
176
+
177
+ def get_model_by_name(name: EmbeddingModelName) -> KilnEmbeddingModel:
178
+ for model in built_in_embedding_models:
179
+ if model.name == name:
180
+ return model
181
+ raise ValueError(f"Embedding model {name} not found in the list of built-in models")
182
+
183
+
184
+ def built_in_embedding_models_from_provider(
185
+ provider_name: ModelProviderName, model_name: str
186
+ ) -> KilnEmbeddingModelProvider | None:
187
+ for model in built_in_embedding_models:
188
+ if model.name == model_name:
189
+ for p in model.providers:
190
+ if p.name == provider_name:
191
+ return p
192
+ return None