kiln-ai 0.19.0__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (158) hide show
  1. kiln_ai/adapters/__init__.py +8 -2
  2. kiln_ai/adapters/adapter_registry.py +43 -208
  3. kiln_ai/adapters/chat/chat_formatter.py +8 -12
  4. kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
  5. kiln_ai/adapters/chunkers/__init__.py +13 -0
  6. kiln_ai/adapters/chunkers/base_chunker.py +42 -0
  7. kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
  8. kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
  9. kiln_ai/adapters/chunkers/helpers.py +23 -0
  10. kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
  11. kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
  12. kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
  13. kiln_ai/adapters/chunkers/test_helpers.py +75 -0
  14. kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
  15. kiln_ai/adapters/docker_model_runner_tools.py +119 -0
  16. kiln_ai/adapters/embedding/__init__.py +0 -0
  17. kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
  18. kiln_ai/adapters/embedding/embedding_registry.py +32 -0
  19. kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
  20. kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
  21. kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
  22. kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
  23. kiln_ai/adapters/eval/base_eval.py +2 -2
  24. kiln_ai/adapters/eval/eval_runner.py +9 -3
  25. kiln_ai/adapters/eval/g_eval.py +2 -2
  26. kiln_ai/adapters/eval/test_base_eval.py +2 -4
  27. kiln_ai/adapters/eval/test_g_eval.py +4 -5
  28. kiln_ai/adapters/extractors/__init__.py +18 -0
  29. kiln_ai/adapters/extractors/base_extractor.py +72 -0
  30. kiln_ai/adapters/extractors/encoding.py +20 -0
  31. kiln_ai/adapters/extractors/extractor_registry.py +44 -0
  32. kiln_ai/adapters/extractors/extractor_runner.py +112 -0
  33. kiln_ai/adapters/extractors/litellm_extractor.py +386 -0
  34. kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
  35. kiln_ai/adapters/extractors/test_encoding.py +54 -0
  36. kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
  37. kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
  38. kiln_ai/adapters/extractors/test_litellm_extractor.py +1192 -0
  39. kiln_ai/adapters/fine_tune/__init__.py +1 -1
  40. kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
  41. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
  42. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
  43. kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
  44. kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
  45. kiln_ai/adapters/ml_embedding_model_list.py +192 -0
  46. kiln_ai/adapters/ml_model_list.py +761 -37
  47. kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
  48. kiln_ai/adapters/model_adapters/litellm_adapter.py +380 -138
  49. kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
  50. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -2
  51. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
  52. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
  53. kiln_ai/adapters/model_adapters/test_structured_output.py +113 -5
  54. kiln_ai/adapters/ollama_tools.py +69 -12
  55. kiln_ai/adapters/parsers/__init__.py +1 -1
  56. kiln_ai/adapters/provider_tools.py +205 -47
  57. kiln_ai/adapters/rag/deduplication.py +49 -0
  58. kiln_ai/adapters/rag/progress.py +252 -0
  59. kiln_ai/adapters/rag/rag_runners.py +844 -0
  60. kiln_ai/adapters/rag/test_deduplication.py +195 -0
  61. kiln_ai/adapters/rag/test_progress.py +785 -0
  62. kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
  63. kiln_ai/adapters/remote_config.py +80 -8
  64. kiln_ai/adapters/repair/test_repair_task.py +12 -9
  65. kiln_ai/adapters/run_output.py +3 -0
  66. kiln_ai/adapters/test_adapter_registry.py +657 -85
  67. kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
  68. kiln_ai/adapters/test_ml_embedding_model_list.py +429 -0
  69. kiln_ai/adapters/test_ml_model_list.py +251 -1
  70. kiln_ai/adapters/test_ollama_tools.py +340 -1
  71. kiln_ai/adapters/test_prompt_adaptors.py +13 -6
  72. kiln_ai/adapters/test_prompt_builders.py +1 -1
  73. kiln_ai/adapters/test_provider_tools.py +254 -8
  74. kiln_ai/adapters/test_remote_config.py +651 -58
  75. kiln_ai/adapters/vector_store/__init__.py +1 -0
  76. kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
  77. kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
  78. kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
  79. kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
  80. kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
  81. kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
  82. kiln_ai/datamodel/__init__.py +39 -34
  83. kiln_ai/datamodel/basemodel.py +170 -1
  84. kiln_ai/datamodel/chunk.py +158 -0
  85. kiln_ai/datamodel/datamodel_enums.py +28 -0
  86. kiln_ai/datamodel/embedding.py +64 -0
  87. kiln_ai/datamodel/eval.py +1 -1
  88. kiln_ai/datamodel/external_tool_server.py +298 -0
  89. kiln_ai/datamodel/extraction.py +303 -0
  90. kiln_ai/datamodel/json_schema.py +25 -10
  91. kiln_ai/datamodel/project.py +40 -1
  92. kiln_ai/datamodel/rag.py +79 -0
  93. kiln_ai/datamodel/registry.py +0 -15
  94. kiln_ai/datamodel/run_config.py +62 -0
  95. kiln_ai/datamodel/task.py +2 -77
  96. kiln_ai/datamodel/task_output.py +6 -1
  97. kiln_ai/datamodel/task_run.py +41 -0
  98. kiln_ai/datamodel/test_attachment.py +649 -0
  99. kiln_ai/datamodel/test_basemodel.py +4 -4
  100. kiln_ai/datamodel/test_chunk_models.py +317 -0
  101. kiln_ai/datamodel/test_dataset_split.py +1 -1
  102. kiln_ai/datamodel/test_embedding_models.py +448 -0
  103. kiln_ai/datamodel/test_eval_model.py +6 -6
  104. kiln_ai/datamodel/test_example_models.py +175 -0
  105. kiln_ai/datamodel/test_external_tool_server.py +691 -0
  106. kiln_ai/datamodel/test_extraction_chunk.py +206 -0
  107. kiln_ai/datamodel/test_extraction_model.py +470 -0
  108. kiln_ai/datamodel/test_rag.py +641 -0
  109. kiln_ai/datamodel/test_registry.py +8 -3
  110. kiln_ai/datamodel/test_task.py +15 -47
  111. kiln_ai/datamodel/test_tool_id.py +320 -0
  112. kiln_ai/datamodel/test_vector_store.py +320 -0
  113. kiln_ai/datamodel/tool_id.py +105 -0
  114. kiln_ai/datamodel/vector_store.py +141 -0
  115. kiln_ai/tools/__init__.py +8 -0
  116. kiln_ai/tools/base_tool.py +82 -0
  117. kiln_ai/tools/built_in_tools/__init__.py +13 -0
  118. kiln_ai/tools/built_in_tools/math_tools.py +124 -0
  119. kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
  120. kiln_ai/tools/mcp_server_tool.py +95 -0
  121. kiln_ai/tools/mcp_session_manager.py +246 -0
  122. kiln_ai/tools/rag_tools.py +157 -0
  123. kiln_ai/tools/test_base_tools.py +199 -0
  124. kiln_ai/tools/test_mcp_server_tool.py +457 -0
  125. kiln_ai/tools/test_mcp_session_manager.py +1585 -0
  126. kiln_ai/tools/test_rag_tools.py +848 -0
  127. kiln_ai/tools/test_tool_registry.py +562 -0
  128. kiln_ai/tools/tool_registry.py +85 -0
  129. kiln_ai/utils/__init__.py +3 -0
  130. kiln_ai/utils/async_job_runner.py +62 -17
  131. kiln_ai/utils/config.py +24 -2
  132. kiln_ai/utils/env.py +15 -0
  133. kiln_ai/utils/filesystem.py +14 -0
  134. kiln_ai/utils/filesystem_cache.py +60 -0
  135. kiln_ai/utils/litellm.py +94 -0
  136. kiln_ai/utils/lock.py +100 -0
  137. kiln_ai/utils/mime_type.py +38 -0
  138. kiln_ai/utils/open_ai_types.py +94 -0
  139. kiln_ai/utils/pdf_utils.py +38 -0
  140. kiln_ai/utils/project_utils.py +17 -0
  141. kiln_ai/utils/test_async_job_runner.py +151 -35
  142. kiln_ai/utils/test_config.py +138 -1
  143. kiln_ai/utils/test_env.py +142 -0
  144. kiln_ai/utils/test_filesystem_cache.py +316 -0
  145. kiln_ai/utils/test_litellm.py +206 -0
  146. kiln_ai/utils/test_lock.py +185 -0
  147. kiln_ai/utils/test_mime_type.py +66 -0
  148. kiln_ai/utils/test_open_ai_types.py +131 -0
  149. kiln_ai/utils/test_pdf_utils.py +73 -0
  150. kiln_ai/utils/test_uuid.py +111 -0
  151. kiln_ai/utils/test_validation.py +524 -0
  152. kiln_ai/utils/uuid.py +9 -0
  153. kiln_ai/utils/validation.py +90 -0
  154. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/METADATA +12 -5
  155. kiln_ai-0.21.0.dist-info/RECORD +211 -0
  156. kiln_ai-0.19.0.dist-info/RECORD +0 -115
  157. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/WHEEL +0 -0
  158. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,448 @@
1
+ import uuid
2
+ from pathlib import Path
3
+
4
+ import pytest
5
+
6
+ from kiln_ai.datamodel.basemodel import KilnAttachmentModel
7
+ from kiln_ai.datamodel.chunk import Chunk, ChunkedDocument
8
+ from kiln_ai.datamodel.embedding import ChunkEmbeddings, Embedding, EmbeddingConfig
9
+ from kiln_ai.datamodel.project import Project
10
+
11
+
12
+ @pytest.fixture
13
+ def mock_project(tmp_path):
14
+ project_root = tmp_path / str(uuid.uuid4())
15
+ project_root.mkdir()
16
+ project = Project(
17
+ name="Test Project",
18
+ description="Test description",
19
+ path=project_root / "project.kiln",
20
+ )
21
+ project.save_to_file()
22
+ return project
23
+
24
+
25
+ @pytest.fixture
26
+ def mock_chunked_document(tmp_path):
27
+ # Create a temporary file for the attachment
28
+ tmp_dir = tmp_path / str(uuid.uuid4())
29
+ tmp_dir.mkdir()
30
+
31
+ tmp_path_file = Path(tmp_dir) / f"{uuid.uuid4()}.txt"
32
+ tmp_path_file.write_text("test content")
33
+
34
+ attachment = KilnAttachmentModel.from_file(tmp_path_file)
35
+ chunks = [Chunk(content=attachment) for _ in range(3)]
36
+
37
+ doc = ChunkedDocument(
38
+ chunks=chunks,
39
+ chunker_config_id="fake-chunker-id",
40
+ path=Path(tmp_dir) / "chunked_document.kiln",
41
+ )
42
+ doc.save_to_file()
43
+
44
+ return doc
45
+
46
+
47
+ class TestEmbeddingConfig:
48
+ """Test the EmbeddingConfig class."""
49
+
50
+ def test_required_fields(self):
51
+ """Test that required fields are set correctly."""
52
+ config = EmbeddingConfig(
53
+ name="test-embedding",
54
+ model_provider_name="openai",
55
+ model_name="openai_text_embedding_3_small",
56
+ properties={"dimensions": 1536},
57
+ )
58
+ assert config.name == "test-embedding"
59
+ assert config.model_provider_name == "openai"
60
+ assert config.model_name == "openai_text_embedding_3_small"
61
+ assert config.properties == {"dimensions": 1536}
62
+
63
+ def test_optional_description(self):
64
+ """Test that description is optional."""
65
+ config = EmbeddingConfig(
66
+ name="test-embedding",
67
+ model_provider_name="openai",
68
+ model_name="openai_text_embedding_3_small",
69
+ properties={"dimensions": 1536},
70
+ )
71
+ assert config.description is None
72
+
73
+ config_with_desc = EmbeddingConfig(
74
+ name="test-embedding",
75
+ description="A test embedding config",
76
+ model_provider_name="openai",
77
+ model_name="openai_text_embedding_3_small",
78
+ properties={"dimensions": 1536},
79
+ )
80
+ assert config_with_desc.description == "A test embedding config"
81
+
82
+ def test_name_validation(self):
83
+ """Test name field validation."""
84
+ # Test valid name
85
+ config = EmbeddingConfig(
86
+ name="valid-name_123",
87
+ model_provider_name="openai",
88
+ model_name="openai_text_embedding_3_small",
89
+ properties={"dimensions": 1536},
90
+ )
91
+ assert config.name == "valid-name_123"
92
+
93
+ # Test empty name
94
+ with pytest.raises(ValueError):
95
+ EmbeddingConfig(
96
+ name="",
97
+ model_provider_name="openai",
98
+ model_name="openai_text_embedding_3_small",
99
+ properties={"dimensions": 1536},
100
+ )
101
+
102
+ def test_properties_validation(self):
103
+ """Test properties field validation."""
104
+ # Test with valid properties
105
+ config = EmbeddingConfig(
106
+ name="test-embedding",
107
+ model_provider_name="openai",
108
+ model_name="openai_text_embedding_3_small",
109
+ properties={
110
+ "dimensions": 1536,
111
+ },
112
+ )
113
+ assert config.properties == {
114
+ "dimensions": 1536,
115
+ }
116
+
117
+ # Test with empty properties
118
+ config_empty = EmbeddingConfig(
119
+ name="test-embedding",
120
+ model_provider_name="openai",
121
+ model_name="openai_text_embedding_3_small",
122
+ properties={},
123
+ )
124
+ assert config_empty.properties == {}
125
+
126
+ def test_parent_project_method_no_parent(self):
127
+ """Test parent_project method when no parent is set."""
128
+ config = EmbeddingConfig(
129
+ name="test-embedding",
130
+ model_provider_name="openai",
131
+ model_name="openai_text_embedding_3_small",
132
+ properties={"dimensions": 1536},
133
+ )
134
+ assert config.parent_project() is None
135
+
136
+ def test_parent_project_method_with_project_parent(self, mock_project):
137
+ """Test parent_project method when parent is a Project."""
138
+ config = EmbeddingConfig(
139
+ name="test-embedding",
140
+ model_provider_name="openai",
141
+ model_name="openai_text_embedding_3_small",
142
+ properties={"dimensions": 1536},
143
+ parent=mock_project,
144
+ )
145
+ assert config.parent_project() == mock_project
146
+
147
+ def test_model_provider_name_validation(self, mock_project):
148
+ """Test model_provider_name field validation."""
149
+ config = EmbeddingConfig(
150
+ name="test-embedding",
151
+ model_provider_name="openai",
152
+ model_name="openai_text_embedding_3_small",
153
+ properties={},
154
+ parent=mock_project,
155
+ )
156
+ assert config.model_provider_name == "openai"
157
+
158
+ with pytest.raises(ValueError):
159
+ EmbeddingConfig(
160
+ name="test-embedding",
161
+ model_provider_name="invalid-provider",
162
+ model_name="openai_text_embedding_3_small",
163
+ parent=mock_project,
164
+ properties={},
165
+ )
166
+
167
+ def test_custom_dimensions_validation(self):
168
+ """Test that custom dimensions are properly validated."""
169
+
170
+ # this model supports custom dimensions
171
+ config = EmbeddingConfig(
172
+ name="test-embedding",
173
+ model_provider_name="openai",
174
+ model_name="openai_text_embedding_3_small",
175
+ properties={"dimensions": 1536},
176
+ )
177
+ assert config.properties == {"dimensions": 1536}
178
+
179
+ # dimensions is negative
180
+ with pytest.raises(ValueError, match="Dimensions must be a positive integer"):
181
+ EmbeddingConfig(
182
+ name="test-embedding",
183
+ model_provider_name="openai",
184
+ model_name="openai_text_embedding_3_small",
185
+ properties={"dimensions": -1},
186
+ )
187
+
188
+ # dimensions is not an integer
189
+ with pytest.raises(ValueError, match="Dimensions must be a positive integer"):
190
+ EmbeddingConfig(
191
+ name="test-embedding",
192
+ model_provider_name="openai",
193
+ model_name="openai_text_embedding_3_small",
194
+ properties={"dimensions": 1.5},
195
+ )
196
+
197
+ # dimensions is not a positive integer
198
+ with pytest.raises(ValueError, match="Dimensions must be a positive integer"):
199
+ EmbeddingConfig(
200
+ name="test-embedding",
201
+ model_provider_name="openai",
202
+ model_name="openai_text_embedding_3_small",
203
+ properties={"dimensions": "512"},
204
+ )
205
+
206
+ def test_dimensions_optional(self):
207
+ """Test that dimensions is optional and should be ignored if not provided."""
208
+ config = EmbeddingConfig(
209
+ name="test-embedding",
210
+ model_provider_name="openai",
211
+ model_name="openai_text_embedding_3_small",
212
+ properties={},
213
+ )
214
+ assert config.properties == {}
215
+
216
+
217
+ class TestEmbedding:
218
+ """Test the Embedding class."""
219
+
220
+ def test_required_fields(self):
221
+ """Test that required fields are properly validated."""
222
+ vector = [0.1, 0.2, 0.3, 0.4, 0.5]
223
+ embedding = Embedding(vector=vector)
224
+ assert embedding.vector == vector
225
+
226
+ def test_vector_validation(self):
227
+ """Test that vector field is properly validated."""
228
+ # Test with valid vector
229
+ vector = [0.1, 0.2, 0.3]
230
+ embedding = Embedding(vector=vector)
231
+ assert embedding.vector == vector
232
+
233
+ # Test with empty vector
234
+ empty_vector = []
235
+ embedding_empty = Embedding(vector=empty_vector)
236
+ assert embedding_empty.vector == empty_vector
237
+
238
+ # Test with large vector
239
+ large_vector = [0.1] * 1536
240
+ embedding_large = Embedding(vector=large_vector)
241
+ assert len(embedding_large.vector) == 1536
242
+
243
+ def test_vector_types(self):
244
+ """Test that vector accepts different numeric types."""
245
+ # Test with integers
246
+ int_vector = [1, 2, 3, 4, 5]
247
+ embedding_int = Embedding(vector=int_vector)
248
+ assert embedding_int.vector == int_vector
249
+
250
+ # Test with floats
251
+ float_vector = [1.1, 2.2, 3.3, 4.4, 5.5]
252
+ embedding_float = Embedding(vector=float_vector)
253
+ assert embedding_float.vector == float_vector
254
+
255
+ # Test with mixed types
256
+ mixed_vector = [1, 2.5, 3, 4.7, 5]
257
+ embedding_mixed = Embedding(vector=mixed_vector)
258
+ assert embedding_mixed.vector == mixed_vector
259
+
260
+
261
+ class TestChunkEmbeddings:
262
+ """Test the ChunkEmbeddings class."""
263
+
264
+ def test_required_fields(self):
265
+ """Test that required fields are properly validated."""
266
+ embedding_config_id = "test-config-id"
267
+ embeddings = [
268
+ Embedding(vector=[0.1, 0.2, 0.3]),
269
+ Embedding(vector=[0.4, 0.5, 0.6]),
270
+ ]
271
+
272
+ chunk_embeddings = ChunkEmbeddings(
273
+ embedding_config_id=embedding_config_id,
274
+ embeddings=embeddings,
275
+ )
276
+ assert chunk_embeddings.embedding_config_id == embedding_config_id
277
+ assert chunk_embeddings.embeddings == embeddings
278
+
279
+ def test_embeddings_validation(self):
280
+ """Test that embeddings field validation works correctly."""
281
+ embedding_config_id = "test-config-id"
282
+
283
+ # Test with valid list of embeddings
284
+ embeddings = [Embedding(vector=[0.1, 0.2, 0.3])]
285
+ chunk_embeddings = ChunkEmbeddings(
286
+ embedding_config_id=embedding_config_id,
287
+ embeddings=embeddings,
288
+ )
289
+ assert chunk_embeddings.embeddings == embeddings
290
+
291
+ # Test with empty embeddings list
292
+ empty_embeddings = []
293
+ chunk_embeddings_empty = ChunkEmbeddings(
294
+ embedding_config_id=embedding_config_id,
295
+ embeddings=empty_embeddings,
296
+ )
297
+ assert chunk_embeddings_empty.embeddings == empty_embeddings
298
+
299
+ # Test with multiple embeddings
300
+ multiple_embeddings = [
301
+ Embedding(vector=[0.1, 0.2, 0.3]),
302
+ Embedding(vector=[0.4, 0.5, 0.6]),
303
+ Embedding(vector=[0.7, 0.8, 0.9]),
304
+ ]
305
+ chunk_embeddings_multiple = ChunkEmbeddings(
306
+ embedding_config_id=embedding_config_id,
307
+ embeddings=multiple_embeddings,
308
+ )
309
+ assert chunk_embeddings_multiple.embeddings == multiple_embeddings
310
+ assert len(chunk_embeddings_multiple.embeddings) == 3
311
+
312
+ def test_embedding_config_id_validation(self):
313
+ """Test embedding_config_id field validation."""
314
+ embeddings = [Embedding(vector=[0.1, 0.2, 0.3])]
315
+
316
+ # Test with valid ID
317
+ valid_id = "test-config-id-123"
318
+ chunk_embeddings = ChunkEmbeddings(
319
+ embedding_config_id=valid_id,
320
+ embeddings=embeddings,
321
+ )
322
+ assert chunk_embeddings.embedding_config_id == valid_id
323
+
324
+ # Test with numeric string ID
325
+ numeric_id = "12345"
326
+ chunk_embeddings_numeric = ChunkEmbeddings(
327
+ embedding_config_id=numeric_id,
328
+ embeddings=embeddings,
329
+ )
330
+ assert chunk_embeddings_numeric.embedding_config_id == numeric_id
331
+
332
+ def test_parent_chunked_document_method_no_parent(self):
333
+ """Test parent_chunked_document method when no parent is set."""
334
+ chunk_embeddings = ChunkEmbeddings(
335
+ embedding_config_id="test-config-id",
336
+ embeddings=[Embedding(vector=[0.1, 0.2, 0.3])],
337
+ )
338
+ assert chunk_embeddings.parent_chunked_document() is None
339
+
340
+ def test_parent_chunked_document_method_with_chunked_document_parent(
341
+ self, mock_chunked_document
342
+ ):
343
+ """Test parent_chunked_document method when parent is a ChunkedDocument."""
344
+ chunk_embeddings = ChunkEmbeddings(
345
+ embedding_config_id="test-config-id",
346
+ embeddings=[Embedding(vector=[0.1, 0.2, 0.3])],
347
+ parent=mock_chunked_document,
348
+ )
349
+ assert chunk_embeddings.parent_chunked_document() == mock_chunked_document
350
+
351
+ def test_embeddings_correspond_to_chunks(self, mock_chunked_document):
352
+ """Test that embeddings correspond to chunks in the parent chunked document."""
353
+ # Create embeddings that match the number of chunks in the parent
354
+ num_chunks = len(mock_chunked_document.chunks)
355
+ embeddings = [Embedding(vector=[0.1, 0.2, 0.3]) for _ in range(num_chunks)]
356
+
357
+ chunk_embeddings = ChunkEmbeddings(
358
+ embedding_config_id="test-config-id",
359
+ embeddings=embeddings,
360
+ parent=mock_chunked_document,
361
+ )
362
+ assert len(chunk_embeddings.embeddings) == num_chunks
363
+
364
+ def test_embeddings_with_different_vector_sizes(self):
365
+ """Test embeddings with different vector sizes."""
366
+ embedding_config_id = "test-config-id"
367
+ embeddings = [
368
+ Embedding(vector=[0.1, 0.2, 0.3]), # 3 dimensions
369
+ Embedding(vector=[0.4, 0.5, 0.6, 0.7]), # 4 dimensions
370
+ Embedding(vector=[0.8, 0.9]), # 2 dimensions
371
+ ]
372
+
373
+ chunk_embeddings = ChunkEmbeddings(
374
+ embedding_config_id=embedding_config_id,
375
+ embeddings=embeddings,
376
+ )
377
+ assert len(chunk_embeddings.embeddings) == 3
378
+ assert len(chunk_embeddings.embeddings[0].vector) == 3
379
+ assert len(chunk_embeddings.embeddings[1].vector) == 4
380
+ assert len(chunk_embeddings.embeddings[2].vector) == 2
381
+
382
+
383
+ class TestEmbeddingIntegration:
384
+ """Integration tests for embedding models."""
385
+
386
+ def test_embedding_config_with_project_parent(self, mock_project):
387
+ """Test EmbeddingConfig with Project parent."""
388
+ config = EmbeddingConfig(
389
+ name="test-embedding",
390
+ description="Test embedding configuration",
391
+ model_provider_name="openai",
392
+ model_name="openai_text_embedding_3_small",
393
+ properties={"dimensions": 1536},
394
+ parent=mock_project,
395
+ )
396
+ assert config.parent_project() == mock_project
397
+ assert config.name == "test-embedding"
398
+ assert config.model_provider_name == "openai"
399
+ assert config.model_name == "openai_text_embedding_3_small"
400
+
401
+ def test_chunk_embeddings_with_chunked_document_parent(self, mock_chunked_document):
402
+ """Test ChunkEmbeddings with ChunkedDocument parent."""
403
+ # Create embeddings for each chunk
404
+ embeddings = []
405
+ for chunk in mock_chunked_document.chunks:
406
+ # Create a mock embedding (in real usage, this would be generated by the embedding model)
407
+ embedding = Embedding(vector=[0.1, 0.2, 0.3, 0.4, 0.5])
408
+ embeddings.append(embedding)
409
+
410
+ chunk_embeddings = ChunkEmbeddings(
411
+ embedding_config_id="test-config-id",
412
+ embeddings=embeddings,
413
+ parent=mock_chunked_document,
414
+ )
415
+ assert chunk_embeddings.parent_chunked_document() == mock_chunked_document
416
+ assert len(chunk_embeddings.embeddings) == len(mock_chunked_document.chunks)
417
+
418
+ def test_embedding_workflow(self, mock_project, mock_chunked_document):
419
+ """Test a complete embedding workflow."""
420
+ # 1. Create an embedding config
421
+ embedding_config = EmbeddingConfig(
422
+ name="test-embedding-config",
423
+ description="Test embedding configuration for workflow",
424
+ model_provider_name="openai",
425
+ model_name="openai_text_embedding_3_small",
426
+ properties={"dimensions": 1536},
427
+ parent=mock_project,
428
+ )
429
+
430
+ # 2. Create embeddings for the chunked document
431
+ embeddings = []
432
+ for chunk in mock_chunked_document.chunks:
433
+ # Simulate embedding generation
434
+ embedding = Embedding(vector=[0.1] * 1536)
435
+ embeddings.append(embedding)
436
+
437
+ # 3. Create chunk embeddings
438
+ chunk_embeddings = ChunkEmbeddings(
439
+ embedding_config_id=embedding_config.id,
440
+ embeddings=embeddings,
441
+ parent=mock_chunked_document,
442
+ )
443
+
444
+ # 4. Verify the relationships
445
+ assert embedding_config.parent_project() == mock_project
446
+ assert chunk_embeddings.parent_chunked_document() == mock_chunked_document
447
+ assert len(chunk_embeddings.embeddings) == len(mock_chunked_document.chunks)
448
+ assert chunk_embeddings.embedding_config_id == embedding_config.id
@@ -402,13 +402,13 @@ def test_eval_run_five_star_score_validation(valid_eval_config, valid_eval_run_d
402
402
  assert run.scores["accuracy"] == 4.5
403
403
 
404
404
  # Invalid scores
405
- with pytest.raises(ValueError, match="must be a float between 1.0 and 5.0"):
405
+ with pytest.raises(ValueError, match=r"must be a float between 1.0 and 5.0"):
406
406
  run = EvalRun(
407
407
  parent=valid_eval_config,
408
408
  **{**valid_eval_run_data, "scores": {"accuracy": 0.5}},
409
409
  )
410
410
 
411
- with pytest.raises(ValueError, match="must be a float between 1.0 and 5.0"):
411
+ with pytest.raises(ValueError, match=r"must be a float between 1.0 and 5.0"):
412
412
  run = EvalRun(
413
413
  parent=valid_eval_config,
414
414
  **{**valid_eval_run_data, "scores": {"accuracy": 5.5}},
@@ -442,13 +442,13 @@ def test_eval_run_pass_fail_score_validation(valid_eval_config, valid_eval_run_d
442
442
  assert run.scores["check"] == 0.0
443
443
 
444
444
  # Invalid scores
445
- with pytest.raises(ValueError, match="must be a float between 0.0 and 1.0"):
445
+ with pytest.raises(ValueError, match=r"must be a float between 0.0 and 1.0"):
446
446
  run = EvalRun(
447
447
  parent=valid_eval_config,
448
448
  **{**valid_eval_run_data, "scores": {"check": -0.1}},
449
449
  )
450
450
 
451
- with pytest.raises(ValueError, match="must be a float between 0.0 and 1.0"):
451
+ with pytest.raises(ValueError, match=r"must be a float between 0.0 and 1.0"):
452
452
  run = EvalRun(
453
453
  parent=valid_eval_config,
454
454
  **{**valid_eval_run_data, "scores": {"check": 1.1}},
@@ -485,13 +485,13 @@ def test_eval_run_pass_fail_critical_score_validation(
485
485
  assert run.scores["critical"] == -1.0
486
486
 
487
487
  # Invalid scores
488
- with pytest.raises(ValueError, match="must be a float between -1.0 and 1.0"):
488
+ with pytest.raises(ValueError, match=r"must be a float between -1.0 and 1.0"):
489
489
  run = EvalRun(
490
490
  parent=valid_eval_config,
491
491
  **{**valid_eval_run_data, "scores": {"critical": -1.1}},
492
492
  )
493
493
 
494
- with pytest.raises(ValueError, match="must be a float between -1.0 and 1.0"):
494
+ with pytest.raises(ValueError, match=r"must be a float between -1.0 and 1.0"):
495
495
  run = EvalRun(
496
496
  parent=valid_eval_config,
497
497
  **{**valid_eval_run_data, "scores": {"critical": 1.1}},
@@ -797,3 +797,178 @@ def test_usage_model_in_task_run(valid_task_run):
797
797
  assert task_run.usage.output_tokens == 50
798
798
  assert task_run.usage.total_tokens == 150
799
799
  assert task_run.usage.cost == 0.002
800
+
801
+
802
+ @pytest.mark.parametrize(
803
+ "usage1_data,usage2_data,expected_data",
804
+ [
805
+ # None + None = None
806
+ (
807
+ {
808
+ "input_tokens": None,
809
+ "output_tokens": None,
810
+ "total_tokens": None,
811
+ "cost": None,
812
+ },
813
+ {
814
+ "input_tokens": None,
815
+ "output_tokens": None,
816
+ "total_tokens": None,
817
+ "cost": None,
818
+ },
819
+ {
820
+ "input_tokens": None,
821
+ "output_tokens": None,
822
+ "total_tokens": None,
823
+ "cost": None,
824
+ },
825
+ ),
826
+ # None + value = value
827
+ (
828
+ {
829
+ "input_tokens": None,
830
+ "output_tokens": None,
831
+ "total_tokens": None,
832
+ "cost": None,
833
+ },
834
+ {
835
+ "input_tokens": 100,
836
+ "output_tokens": 50,
837
+ "total_tokens": 150,
838
+ "cost": 0.005,
839
+ },
840
+ {
841
+ "input_tokens": 100,
842
+ "output_tokens": 50,
843
+ "total_tokens": 150,
844
+ "cost": 0.005,
845
+ },
846
+ ),
847
+ # value + None = value
848
+ (
849
+ {
850
+ "input_tokens": 100,
851
+ "output_tokens": 50,
852
+ "total_tokens": 150,
853
+ "cost": 0.005,
854
+ },
855
+ {
856
+ "input_tokens": None,
857
+ "output_tokens": None,
858
+ "total_tokens": None,
859
+ "cost": None,
860
+ },
861
+ {
862
+ "input_tokens": 100,
863
+ "output_tokens": 50,
864
+ "total_tokens": 150,
865
+ "cost": 0.005,
866
+ },
867
+ ),
868
+ # value1 + value2 = value1 + value2
869
+ (
870
+ {
871
+ "input_tokens": 100,
872
+ "output_tokens": 50,
873
+ "total_tokens": 150,
874
+ "cost": 0.005,
875
+ },
876
+ {
877
+ "input_tokens": 200,
878
+ "output_tokens": 75,
879
+ "total_tokens": 275,
880
+ "cost": 0.010,
881
+ },
882
+ {
883
+ "input_tokens": 300,
884
+ "output_tokens": 125,
885
+ "total_tokens": 425,
886
+ "cost": 0.015,
887
+ },
888
+ ),
889
+ # Mixed scenarios
890
+ (
891
+ {
892
+ "input_tokens": 100,
893
+ "output_tokens": None,
894
+ "total_tokens": 150,
895
+ "cost": None,
896
+ },
897
+ {
898
+ "input_tokens": None,
899
+ "output_tokens": 75,
900
+ "total_tokens": None,
901
+ "cost": 0.010,
902
+ },
903
+ {
904
+ "input_tokens": 100,
905
+ "output_tokens": 75,
906
+ "total_tokens": 150,
907
+ "cost": 0.010,
908
+ },
909
+ ),
910
+ # Edge case: zeros
911
+ (
912
+ {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0, "cost": 0.0},
913
+ {
914
+ "input_tokens": 100,
915
+ "output_tokens": 50,
916
+ "total_tokens": 150,
917
+ "cost": 0.005,
918
+ },
919
+ {
920
+ "input_tokens": 100,
921
+ "output_tokens": 50,
922
+ "total_tokens": 150,
923
+ "cost": 0.005,
924
+ },
925
+ ),
926
+ ],
927
+ )
928
+ def test_usage_addition(usage1_data, usage2_data, expected_data):
929
+ """Test Usage addition with various combinations of None and numeric values."""
930
+ usage1 = Usage(**usage1_data)
931
+ usage2 = Usage(**usage2_data)
932
+ result = usage1 + usage2
933
+
934
+ assert result.input_tokens == expected_data["input_tokens"]
935
+ assert result.output_tokens == expected_data["output_tokens"]
936
+ assert result.total_tokens == expected_data["total_tokens"]
937
+ assert result.cost == expected_data["cost"]
938
+
939
+
940
+ def test_usage_addition_type_error():
941
+ """Test that adding Usage to non-Usage raises TypeError."""
942
+ usage = Usage(input_tokens=100, output_tokens=50, total_tokens=150, cost=0.005)
943
+
944
+ with pytest.raises(TypeError, match="Cannot add Usage with"):
945
+ usage + "not_a_usage" # type: ignore
946
+
947
+ with pytest.raises(TypeError, match="Cannot add Usage with"):
948
+ usage + 42 # type: ignore
949
+
950
+ with pytest.raises(TypeError, match="Cannot add Usage with"):
951
+ usage + {"input_tokens": 100} # type: ignore
952
+
953
+
954
+ def test_usage_addition_immutability():
955
+ """Test that addition creates new Usage objects and doesn't mutate originals."""
956
+ usage1 = Usage(input_tokens=100, output_tokens=50, total_tokens=150, cost=0.005)
957
+ usage2 = Usage(input_tokens=200, output_tokens=75, total_tokens=275, cost=0.010)
958
+
959
+ original_usage1_data = usage1.model_dump()
960
+ original_usage2_data = usage2.model_dump()
961
+
962
+ result = usage1 + usage2
963
+
964
+ # Original objects should be unchanged
965
+ assert usage1.model_dump() == original_usage1_data
966
+ assert usage2.model_dump() == original_usage2_data
967
+
968
+ # Result should be a new object
969
+ assert result is not usage1
970
+ assert result is not usage2
971
+ assert result.input_tokens == 300
972
+ assert result.output_tokens == 125
973
+ assert result.total_tokens == 425
974
+ assert result.cost == 0.015