kiln-ai 0.21.0__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (53) hide show
  1. kiln_ai/adapters/extractors/litellm_extractor.py +52 -32
  2. kiln_ai/adapters/extractors/test_litellm_extractor.py +169 -71
  3. kiln_ai/adapters/ml_embedding_model_list.py +330 -28
  4. kiln_ai/adapters/ml_model_list.py +503 -23
  5. kiln_ai/adapters/model_adapters/litellm_adapter.py +39 -8
  6. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +78 -0
  7. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +119 -5
  8. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +9 -3
  9. kiln_ai/adapters/model_adapters/test_structured_output.py +6 -9
  10. kiln_ai/adapters/test_ml_embedding_model_list.py +89 -279
  11. kiln_ai/adapters/test_ml_model_list.py +0 -10
  12. kiln_ai/adapters/vector_store/lancedb_adapter.py +24 -70
  13. kiln_ai/adapters/vector_store/lancedb_helpers.py +101 -0
  14. kiln_ai/adapters/vector_store/test_lancedb_adapter.py +9 -16
  15. kiln_ai/adapters/vector_store/test_lancedb_helpers.py +142 -0
  16. kiln_ai/adapters/vector_store_loaders/__init__.py +0 -0
  17. kiln_ai/adapters/vector_store_loaders/test_lancedb_loader.py +282 -0
  18. kiln_ai/adapters/vector_store_loaders/test_vector_store_loader.py +544 -0
  19. kiln_ai/adapters/vector_store_loaders/vector_store_loader.py +91 -0
  20. kiln_ai/datamodel/basemodel.py +31 -3
  21. kiln_ai/datamodel/external_tool_server.py +206 -54
  22. kiln_ai/datamodel/extraction.py +14 -0
  23. kiln_ai/datamodel/task.py +5 -0
  24. kiln_ai/datamodel/task_output.py +41 -11
  25. kiln_ai/datamodel/test_attachment.py +3 -3
  26. kiln_ai/datamodel/test_basemodel.py +269 -13
  27. kiln_ai/datamodel/test_datasource.py +50 -0
  28. kiln_ai/datamodel/test_external_tool_server.py +534 -152
  29. kiln_ai/datamodel/test_extraction_model.py +31 -0
  30. kiln_ai/datamodel/test_task.py +35 -1
  31. kiln_ai/datamodel/test_tool_id.py +106 -1
  32. kiln_ai/datamodel/tool_id.py +49 -0
  33. kiln_ai/tools/base_tool.py +30 -6
  34. kiln_ai/tools/built_in_tools/math_tools.py +12 -4
  35. kiln_ai/tools/kiln_task_tool.py +162 -0
  36. kiln_ai/tools/mcp_server_tool.py +7 -5
  37. kiln_ai/tools/mcp_session_manager.py +50 -24
  38. kiln_ai/tools/rag_tools.py +17 -6
  39. kiln_ai/tools/test_kiln_task_tool.py +527 -0
  40. kiln_ai/tools/test_mcp_server_tool.py +4 -15
  41. kiln_ai/tools/test_mcp_session_manager.py +186 -226
  42. kiln_ai/tools/test_rag_tools.py +86 -5
  43. kiln_ai/tools/test_tool_registry.py +199 -5
  44. kiln_ai/tools/tool_registry.py +49 -17
  45. kiln_ai/utils/filesystem.py +4 -4
  46. kiln_ai/utils/open_ai_types.py +19 -2
  47. kiln_ai/utils/pdf_utils.py +21 -0
  48. kiln_ai/utils/test_open_ai_types.py +88 -12
  49. kiln_ai/utils/test_pdf_utils.py +14 -1
  50. {kiln_ai-0.21.0.dist-info → kiln_ai-0.22.1.dist-info}/METADATA +79 -1
  51. {kiln_ai-0.21.0.dist-info → kiln_ai-0.22.1.dist-info}/RECORD +53 -45
  52. {kiln_ai-0.21.0.dist-info → kiln_ai-0.22.1.dist-info}/WHEEL +0 -0
  53. {kiln_ai-0.21.0.dist-info → kiln_ai-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -17,6 +17,15 @@ class KilnEmbeddingModelFamily(str, Enum):
17
17
  gemini = "gemini"
18
18
  gemma = "gemma"
19
19
  nomic = "nomic"
20
+ qwen = "qwen"
21
+ baai = "baai"
22
+ modernbert = "modernbert"
23
+ intfloat = "intfloat"
24
+ together = "together"
25
+ thenlper = "thenlper"
26
+ where_is_ai = "where_is_ai"
27
+ mixedbread = "mixedbread"
28
+ netease = "netease"
20
29
 
21
30
 
22
31
  class EmbeddingModelName(str, Enum):
@@ -33,6 +42,20 @@ class EmbeddingModelName(str, Enum):
33
42
  gemini_embedding_001 = "gemini_embedding_001"
34
43
  embedding_gemma_300m = "embedding_gemma_300m"
35
44
  nomic_text_embedding_v1_5 = "nomic_text_embedding_v1_5"
45
+ qwen_3_embedding_0p6b = "qwen_3_embedding_0p6b"
46
+ qwen_3_embedding_4b = "qwen_3_embedding_4b"
47
+ qwen_3_embedding_8b = "qwen_3_embedding_8b"
48
+ baai_bge_small_1_5 = "baai_bge_small_1_5"
49
+ baai_bge_base_1_5 = "baai_bge_base_1_5"
50
+ baai_bge_large_1_5 = "baai_bge_large_1_5"
51
+ m2_bert_retrieval_32k = "m2_bert_retrieval_32k"
52
+ gte_modernbert_base = "gte_modernbert_base"
53
+ multilingual_e5_large_instruct = "multilingual_e5_large_instruct"
54
+ thenlper_gte_large = "thenlper_gte_large"
55
+ thenlper_gte_base = "thenlper_gte_base"
56
+ where_is_ai_uae_large_v1 = "where_is_ai_uae_large_v1"
57
+ mixedbread_ai_mxbai_embed_large_v1 = "mixedbread_ai_mxbai_embed_large_v1"
58
+ netease_youdao_bce_embedding_base_v1 = "netease_youdao_bce_embedding_base_v1"
36
59
 
37
60
 
38
61
  class KilnEmbeddingModelProvider(BaseModel):
@@ -76,66 +99,68 @@ class KilnEmbeddingModel(BaseModel):
76
99
 
77
100
 
78
101
  built_in_embedding_models: List[KilnEmbeddingModel] = [
79
- # openai
102
+ # OpenAI Text Embedding 3 Large
80
103
  KilnEmbeddingModel(
81
104
  family=KilnEmbeddingModelFamily.openai,
82
- name=EmbeddingModelName.openai_text_embedding_3_small,
83
- friendly_name="Text Embedding 3 Small",
105
+ name=EmbeddingModelName.openai_text_embedding_3_large,
106
+ friendly_name="Text Embedding 3 Large",
84
107
  providers=[
85
108
  KilnEmbeddingModelProvider(
86
109
  name=ModelProviderName.openai,
87
- model_id="text-embedding-3-small",
88
- n_dimensions=1536,
110
+ model_id="text-embedding-3-large",
111
+ n_dimensions=3072,
89
112
  max_input_tokens=8192,
90
113
  supports_custom_dimensions=True,
114
+ suggested_for_chunk_embedding=True,
91
115
  ),
92
116
  ],
93
117
  ),
118
+ # OpenAI Text Embedding 3 Small
94
119
  KilnEmbeddingModel(
95
120
  family=KilnEmbeddingModelFamily.openai,
96
- name=EmbeddingModelName.openai_text_embedding_3_large,
97
- friendly_name="Text Embedding 3 Large",
121
+ name=EmbeddingModelName.openai_text_embedding_3_small,
122
+ friendly_name="Text Embedding 3 Small",
98
123
  providers=[
99
124
  KilnEmbeddingModelProvider(
100
125
  name=ModelProviderName.openai,
101
- model_id="text-embedding-3-large",
102
- n_dimensions=3072,
126
+ model_id="text-embedding-3-small",
127
+ n_dimensions=1536,
103
128
  max_input_tokens=8192,
104
129
  supports_custom_dimensions=True,
105
- suggested_for_chunk_embedding=True,
106
130
  ),
107
131
  ],
108
132
  ),
109
- # gemini
133
+ # Gemini Embedding 001
110
134
  KilnEmbeddingModel(
111
135
  family=KilnEmbeddingModelFamily.gemini,
112
- name=EmbeddingModelName.gemini_text_embedding_004,
113
- friendly_name="Text Embedding 004",
136
+ name=EmbeddingModelName.gemini_embedding_001,
137
+ friendly_name="Gemini Embedding 001",
114
138
  providers=[
115
139
  KilnEmbeddingModelProvider(
116
140
  name=ModelProviderName.gemini_api,
117
- model_id="text-embedding-004",
118
- n_dimensions=768,
141
+ model_id="gemini-embedding-001",
142
+ n_dimensions=3072,
119
143
  max_input_tokens=2048,
144
+ supports_custom_dimensions=True,
145
+ suggested_for_chunk_embedding=True,
120
146
  ),
121
147
  ],
122
148
  ),
149
+ # Gemini Text Embedding 004
123
150
  KilnEmbeddingModel(
124
151
  family=KilnEmbeddingModelFamily.gemini,
125
- name=EmbeddingModelName.gemini_embedding_001,
126
- friendly_name="Gemini Embedding 001",
152
+ name=EmbeddingModelName.gemini_text_embedding_004,
153
+ friendly_name="Text Embedding 004",
127
154
  providers=[
128
155
  KilnEmbeddingModelProvider(
129
156
  name=ModelProviderName.gemini_api,
130
- model_id="gemini-embedding-001",
131
- n_dimensions=3072,
157
+ model_id="text-embedding-004",
158
+ n_dimensions=768,
132
159
  max_input_tokens=2048,
133
- supports_custom_dimensions=True,
134
- suggested_for_chunk_embedding=True,
135
160
  ),
136
161
  ],
137
162
  ),
138
- # gemma
163
+ # Embedding Gemma 300m
139
164
  KilnEmbeddingModel(
140
165
  family=KilnEmbeddingModelFamily.gemma,
141
166
  name=EmbeddingModelName.embedding_gemma_300m,
@@ -146,14 +171,15 @@ built_in_embedding_models: List[KilnEmbeddingModel] = [
146
171
  model_id="embeddinggemma:300m",
147
172
  n_dimensions=768,
148
173
  max_input_tokens=2048,
149
- # the model itself does support custom dimensions, but
150
- # not sure if ollama supports it
174
+ # the model itself does support custom dimensions, but not working
175
+ # because litellm rejects the param:
176
+ # https://github.com/BerriAI/litellm/issues/11940
151
177
  supports_custom_dimensions=False,
152
178
  ollama_model_aliases=["embeddinggemma"],
153
179
  ),
154
180
  ],
155
181
  ),
156
- # nomic
182
+ # Nomic Embed Text v1.5
157
183
  KilnEmbeddingModel(
158
184
  family=KilnEmbeddingModelFamily.nomic,
159
185
  name=EmbeddingModelName.nomic_text_embedding_v1_5,
@@ -163,12 +189,288 @@ built_in_embedding_models: List[KilnEmbeddingModel] = [
163
189
  name=ModelProviderName.ollama,
164
190
  model_id="nomic-embed-text:v1.5",
165
191
  n_dimensions=768,
166
- max_input_tokens=2048,
167
- # the model itself does support custom dimensions, but
168
- # not sure if ollama supports it
192
+ max_input_tokens=8192,
193
+ # the model itself does support custom dimensions, but not working
194
+ # because litellm rejects the param:
195
+ # https://github.com/BerriAI/litellm/issues/11940
169
196
  supports_custom_dimensions=False,
170
197
  ollama_model_aliases=["nomic-embed-text"],
171
198
  ),
199
+ KilnEmbeddingModelProvider(
200
+ name=ModelProviderName.fireworks_ai,
201
+ model_id="nomic-ai/nomic-embed-text-v1.5",
202
+ n_dimensions=768,
203
+ max_input_tokens=8192,
204
+ supports_custom_dimensions=True,
205
+ ),
206
+ ],
207
+ ),
208
+ # Qwen3 Embedding 8B
209
+ KilnEmbeddingModel(
210
+ family=KilnEmbeddingModelFamily.qwen,
211
+ name=EmbeddingModelName.qwen_3_embedding_8b,
212
+ friendly_name="Qwen 3 Embedding 8B",
213
+ providers=[
214
+ KilnEmbeddingModelProvider(
215
+ name=ModelProviderName.ollama,
216
+ model_id="qwen3-embedding:8b",
217
+ n_dimensions=4096,
218
+ max_input_tokens=32_000,
219
+ # the model itself does support custom dimensions, but not working
220
+ # because litellm rejects the param:
221
+ # https://github.com/BerriAI/litellm/issues/11940
222
+ supports_custom_dimensions=False,
223
+ ollama_model_aliases=[
224
+ # 8b is default
225
+ "qwen3-embedding",
226
+ ],
227
+ ),
228
+ KilnEmbeddingModelProvider(
229
+ name=ModelProviderName.fireworks_ai,
230
+ model_id="accounts/fireworks/models/qwen3-embedding-8b",
231
+ n_dimensions=4096,
232
+ max_input_tokens=32_000,
233
+ # the model itself does support custom dimensions, but not working
234
+ supports_custom_dimensions=True,
235
+ ),
236
+ KilnEmbeddingModelProvider(
237
+ name=ModelProviderName.siliconflow_cn,
238
+ model_id="Qwen/Qwen3-Embedding-8B",
239
+ n_dimensions=4096,
240
+ max_input_tokens=32_000,
241
+ # the model itself does support custom dimensions, but not working
242
+ # because litellm rejects the param:
243
+ # https://github.com/BerriAI/litellm/issues/11940
244
+ supports_custom_dimensions=False,
245
+ ),
246
+ ],
247
+ ),
248
+ # Qwen3 Embedding 4B
249
+ KilnEmbeddingModel(
250
+ family=KilnEmbeddingModelFamily.qwen,
251
+ name=EmbeddingModelName.qwen_3_embedding_4b,
252
+ friendly_name="Qwen 3 Embedding 4B",
253
+ providers=[
254
+ KilnEmbeddingModelProvider(
255
+ name=ModelProviderName.ollama,
256
+ model_id="qwen3-embedding:4b",
257
+ n_dimensions=2560,
258
+ max_input_tokens=32_000,
259
+ # the model itself does support custom dimensions, but not working
260
+ # because litellm rejects the param:
261
+ # https://github.com/BerriAI/litellm/issues/11940
262
+ supports_custom_dimensions=False,
263
+ ),
264
+ KilnEmbeddingModelProvider(
265
+ name=ModelProviderName.siliconflow_cn,
266
+ model_id="Qwen/Qwen3-Embedding-4B",
267
+ n_dimensions=2560,
268
+ max_input_tokens=32_000,
269
+ # the model itself does support custom dimensions, but not working
270
+ # because litellm rejects the param:
271
+ # https://github.com/BerriAI/litellm/issues/11940
272
+ supports_custom_dimensions=False,
273
+ ),
274
+ ],
275
+ ),
276
+ # Qwen3 Embedding 0.6B
277
+ KilnEmbeddingModel(
278
+ family=KilnEmbeddingModelFamily.qwen,
279
+ name=EmbeddingModelName.qwen_3_embedding_0p6b,
280
+ friendly_name="Qwen 3 Embedding 0.6B",
281
+ providers=[
282
+ KilnEmbeddingModelProvider(
283
+ name=ModelProviderName.ollama,
284
+ model_id="qwen3-embedding:0.6b",
285
+ n_dimensions=1024,
286
+ max_input_tokens=32_000,
287
+ # the model itself does support custom dimensions, but not working
288
+ # because litellm rejects the param:
289
+ # https://github.com/BerriAI/litellm/issues/11940
290
+ supports_custom_dimensions=False,
291
+ ),
292
+ KilnEmbeddingModelProvider(
293
+ name=ModelProviderName.siliconflow_cn,
294
+ model_id="Qwen/Qwen3-Embedding-0.6B",
295
+ n_dimensions=1024,
296
+ max_input_tokens=32_000,
297
+ # the model itself does support custom dimensions, but not working
298
+ # because litellm rejects the param:
299
+ # https://github.com/BerriAI/litellm/issues/11940
300
+ supports_custom_dimensions=False,
301
+ ),
302
+ ],
303
+ ),
304
+ # BAAI-Bge-Large-1.5
305
+ KilnEmbeddingModel(
306
+ family=KilnEmbeddingModelFamily.baai,
307
+ name=EmbeddingModelName.baai_bge_large_1_5,
308
+ friendly_name="BAAI Bge Large 1.5",
309
+ providers=[
310
+ KilnEmbeddingModelProvider(
311
+ name=ModelProviderName.together_ai,
312
+ model_id="BAAI/bge-large-en-v1.5",
313
+ n_dimensions=1024,
314
+ max_input_tokens=512,
315
+ supports_custom_dimensions=False,
316
+ ),
317
+ ],
318
+ ),
319
+ # BAAI-Bge-Base-1.5
320
+ KilnEmbeddingModel(
321
+ family=KilnEmbeddingModelFamily.baai,
322
+ name=EmbeddingModelName.baai_bge_base_1_5,
323
+ friendly_name="BAAI Bge Base 1.5",
324
+ providers=[
325
+ KilnEmbeddingModelProvider(
326
+ name=ModelProviderName.fireworks_ai,
327
+ model_id="BAAI/bge-base-en-v1.5",
328
+ n_dimensions=768,
329
+ max_input_tokens=512,
330
+ supports_custom_dimensions=False,
331
+ ),
332
+ KilnEmbeddingModelProvider(
333
+ name=ModelProviderName.together_ai,
334
+ model_id="BAAI/bge-base-en-v1.5",
335
+ n_dimensions=768,
336
+ max_input_tokens=512,
337
+ supports_custom_dimensions=False,
338
+ ),
339
+ ],
340
+ ),
341
+ # BAAI-Bge-Small-1.5
342
+ KilnEmbeddingModel(
343
+ family=KilnEmbeddingModelFamily.baai,
344
+ name=EmbeddingModelName.baai_bge_small_1_5,
345
+ friendly_name="BAAI Bge Small 1.5",
346
+ providers=[
347
+ KilnEmbeddingModelProvider(
348
+ name=ModelProviderName.fireworks_ai,
349
+ model_id="BAAI/bge-small-en-v1.5",
350
+ n_dimensions=384,
351
+ max_input_tokens=512,
352
+ supports_custom_dimensions=False,
353
+ ),
354
+ ],
355
+ ),
356
+ # M2-BERT-Retrieval-32k
357
+ KilnEmbeddingModel(
358
+ family=KilnEmbeddingModelFamily.together,
359
+ name=EmbeddingModelName.m2_bert_retrieval_32k,
360
+ friendly_name="M2 BERT Retrieval 32k",
361
+ providers=[
362
+ KilnEmbeddingModelProvider(
363
+ name=ModelProviderName.together_ai,
364
+ model_id="togethercomputer/m2-bert-80M-32k-retrieval",
365
+ n_dimensions=768,
366
+ max_input_tokens=32_768,
367
+ supports_custom_dimensions=False,
368
+ ),
369
+ ],
370
+ ),
371
+ # Gte Modernbert Base
372
+ KilnEmbeddingModel(
373
+ family=KilnEmbeddingModelFamily.modernbert,
374
+ name=EmbeddingModelName.gte_modernbert_base,
375
+ friendly_name="Gte Modernbert Base",
376
+ providers=[
377
+ KilnEmbeddingModelProvider(
378
+ name=ModelProviderName.together_ai,
379
+ model_id="Alibaba-NLP/gte-modernbert-base",
380
+ n_dimensions=768,
381
+ max_input_tokens=8192,
382
+ supports_custom_dimensions=False,
383
+ ),
384
+ ],
385
+ ),
386
+ # Multilingual E5 Large Instruct
387
+ KilnEmbeddingModel(
388
+ family=KilnEmbeddingModelFamily.intfloat,
389
+ name=EmbeddingModelName.multilingual_e5_large_instruct,
390
+ friendly_name="Multilingual E5 Large Instruct",
391
+ providers=[
392
+ KilnEmbeddingModelProvider(
393
+ name=ModelProviderName.together_ai,
394
+ model_id="intfloat/multilingual-e5-large-instruct",
395
+ n_dimensions=1024,
396
+ max_input_tokens=512,
397
+ supports_custom_dimensions=False,
398
+ ),
399
+ ],
400
+ ),
401
+ # Thenlper Gte Large
402
+ KilnEmbeddingModel(
403
+ family=KilnEmbeddingModelFamily.thenlper,
404
+ name=EmbeddingModelName.thenlper_gte_large,
405
+ friendly_name="Thenlper Gte Large",
406
+ providers=[
407
+ KilnEmbeddingModelProvider(
408
+ name=ModelProviderName.fireworks_ai,
409
+ model_id="thenlper/gte-large",
410
+ n_dimensions=1024,
411
+ max_input_tokens=512,
412
+ supports_custom_dimensions=False,
413
+ ),
414
+ ],
415
+ ),
416
+ # Thenlper Gte Base
417
+ KilnEmbeddingModel(
418
+ family=KilnEmbeddingModelFamily.thenlper,
419
+ name=EmbeddingModelName.thenlper_gte_base,
420
+ friendly_name="Thenlper Gte Base",
421
+ providers=[
422
+ KilnEmbeddingModelProvider(
423
+ name=ModelProviderName.fireworks_ai,
424
+ model_id="thenlper/gte-base",
425
+ n_dimensions=768,
426
+ max_input_tokens=512,
427
+ supports_custom_dimensions=False,
428
+ ),
429
+ ],
430
+ ),
431
+ # Where Is AI UAE Large V1
432
+ KilnEmbeddingModel(
433
+ family=KilnEmbeddingModelFamily.where_is_ai,
434
+ name=EmbeddingModelName.where_is_ai_uae_large_v1,
435
+ friendly_name="Where Is AI UAE Large V1",
436
+ providers=[
437
+ KilnEmbeddingModelProvider(
438
+ name=ModelProviderName.fireworks_ai,
439
+ model_id="WhereIsAI/UAE-Large-V1",
440
+ n_dimensions=1024,
441
+ max_input_tokens=512,
442
+ supports_custom_dimensions=False,
443
+ ),
444
+ ],
445
+ ),
446
+ # Mixedbread AI Mxbai Embed Large V1
447
+ KilnEmbeddingModel(
448
+ family=KilnEmbeddingModelFamily.mixedbread,
449
+ name=EmbeddingModelName.mixedbread_ai_mxbai_embed_large_v1,
450
+ friendly_name="Mixedbread AI Mxbai Embed Large V1",
451
+ providers=[
452
+ KilnEmbeddingModelProvider(
453
+ name=ModelProviderName.fireworks_ai,
454
+ model_id="mixedbread-ai/mxbai-embed-large-v1",
455
+ n_dimensions=1024,
456
+ max_input_tokens=512,
457
+ supports_custom_dimensions=False,
458
+ ),
459
+ ],
460
+ ),
461
+ # Netease Youdao Bce Embedding Base V1
462
+ KilnEmbeddingModel(
463
+ family=KilnEmbeddingModelFamily.netease,
464
+ name=EmbeddingModelName.netease_youdao_bce_embedding_base_v1,
465
+ friendly_name="Netease Youdao Bce Embedding Base V1",
466
+ providers=[
467
+ KilnEmbeddingModelProvider(
468
+ name=ModelProviderName.siliconflow_cn,
469
+ model_id="netease-youdao/bce-embedding-base_v1",
470
+ n_dimensions=768,
471
+ max_input_tokens=512,
472
+ supports_custom_dimensions=False,
473
+ ),
172
474
  ],
173
475
  ),
174
476
  ]