kiln-ai 0.20.1__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (133) hide show
  1. kiln_ai/adapters/__init__.py +6 -0
  2. kiln_ai/adapters/adapter_registry.py +43 -226
  3. kiln_ai/adapters/chunkers/__init__.py +13 -0
  4. kiln_ai/adapters/chunkers/base_chunker.py +42 -0
  5. kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
  6. kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
  7. kiln_ai/adapters/chunkers/helpers.py +23 -0
  8. kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
  9. kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
  10. kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
  11. kiln_ai/adapters/chunkers/test_helpers.py +75 -0
  12. kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
  13. kiln_ai/adapters/embedding/__init__.py +0 -0
  14. kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
  15. kiln_ai/adapters/embedding/embedding_registry.py +32 -0
  16. kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
  17. kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
  18. kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
  19. kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
  20. kiln_ai/adapters/eval/eval_runner.py +6 -2
  21. kiln_ai/adapters/eval/test_base_eval.py +1 -3
  22. kiln_ai/adapters/eval/test_g_eval.py +1 -1
  23. kiln_ai/adapters/extractors/__init__.py +18 -0
  24. kiln_ai/adapters/extractors/base_extractor.py +72 -0
  25. kiln_ai/adapters/extractors/encoding.py +20 -0
  26. kiln_ai/adapters/extractors/extractor_registry.py +44 -0
  27. kiln_ai/adapters/extractors/extractor_runner.py +112 -0
  28. kiln_ai/adapters/extractors/litellm_extractor.py +406 -0
  29. kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
  30. kiln_ai/adapters/extractors/test_encoding.py +54 -0
  31. kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
  32. kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
  33. kiln_ai/adapters/extractors/test_litellm_extractor.py +1290 -0
  34. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
  35. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
  36. kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
  37. kiln_ai/adapters/ml_embedding_model_list.py +494 -0
  38. kiln_ai/adapters/ml_model_list.py +876 -18
  39. kiln_ai/adapters/model_adapters/litellm_adapter.py +40 -75
  40. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +79 -1
  41. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +119 -5
  42. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +9 -3
  43. kiln_ai/adapters/model_adapters/test_structured_output.py +9 -10
  44. kiln_ai/adapters/ollama_tools.py +69 -12
  45. kiln_ai/adapters/provider_tools.py +190 -46
  46. kiln_ai/adapters/rag/deduplication.py +49 -0
  47. kiln_ai/adapters/rag/progress.py +252 -0
  48. kiln_ai/adapters/rag/rag_runners.py +844 -0
  49. kiln_ai/adapters/rag/test_deduplication.py +195 -0
  50. kiln_ai/adapters/rag/test_progress.py +785 -0
  51. kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
  52. kiln_ai/adapters/remote_config.py +80 -8
  53. kiln_ai/adapters/test_adapter_registry.py +579 -86
  54. kiln_ai/adapters/test_ml_embedding_model_list.py +239 -0
  55. kiln_ai/adapters/test_ml_model_list.py +202 -0
  56. kiln_ai/adapters/test_ollama_tools.py +340 -1
  57. kiln_ai/adapters/test_prompt_builders.py +1 -1
  58. kiln_ai/adapters/test_provider_tools.py +199 -8
  59. kiln_ai/adapters/test_remote_config.py +551 -56
  60. kiln_ai/adapters/vector_store/__init__.py +1 -0
  61. kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
  62. kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
  63. kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
  64. kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
  65. kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
  66. kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
  67. kiln_ai/datamodel/__init__.py +16 -13
  68. kiln_ai/datamodel/basemodel.py +201 -4
  69. kiln_ai/datamodel/chunk.py +158 -0
  70. kiln_ai/datamodel/datamodel_enums.py +27 -0
  71. kiln_ai/datamodel/embedding.py +64 -0
  72. kiln_ai/datamodel/external_tool_server.py +206 -54
  73. kiln_ai/datamodel/extraction.py +317 -0
  74. kiln_ai/datamodel/project.py +33 -1
  75. kiln_ai/datamodel/rag.py +79 -0
  76. kiln_ai/datamodel/task.py +5 -0
  77. kiln_ai/datamodel/task_output.py +41 -11
  78. kiln_ai/datamodel/test_attachment.py +649 -0
  79. kiln_ai/datamodel/test_basemodel.py +270 -14
  80. kiln_ai/datamodel/test_chunk_models.py +317 -0
  81. kiln_ai/datamodel/test_dataset_split.py +1 -1
  82. kiln_ai/datamodel/test_datasource.py +50 -0
  83. kiln_ai/datamodel/test_embedding_models.py +448 -0
  84. kiln_ai/datamodel/test_eval_model.py +6 -6
  85. kiln_ai/datamodel/test_external_tool_server.py +534 -152
  86. kiln_ai/datamodel/test_extraction_chunk.py +206 -0
  87. kiln_ai/datamodel/test_extraction_model.py +501 -0
  88. kiln_ai/datamodel/test_rag.py +641 -0
  89. kiln_ai/datamodel/test_task.py +35 -1
  90. kiln_ai/datamodel/test_tool_id.py +187 -1
  91. kiln_ai/datamodel/test_vector_store.py +320 -0
  92. kiln_ai/datamodel/tool_id.py +58 -0
  93. kiln_ai/datamodel/vector_store.py +141 -0
  94. kiln_ai/tools/base_tool.py +12 -3
  95. kiln_ai/tools/built_in_tools/math_tools.py +12 -4
  96. kiln_ai/tools/kiln_task_tool.py +158 -0
  97. kiln_ai/tools/mcp_server_tool.py +2 -2
  98. kiln_ai/tools/mcp_session_manager.py +51 -22
  99. kiln_ai/tools/rag_tools.py +164 -0
  100. kiln_ai/tools/test_kiln_task_tool.py +527 -0
  101. kiln_ai/tools/test_mcp_server_tool.py +4 -15
  102. kiln_ai/tools/test_mcp_session_manager.py +187 -227
  103. kiln_ai/tools/test_rag_tools.py +929 -0
  104. kiln_ai/tools/test_tool_registry.py +290 -7
  105. kiln_ai/tools/tool_registry.py +69 -16
  106. kiln_ai/utils/__init__.py +3 -0
  107. kiln_ai/utils/async_job_runner.py +62 -17
  108. kiln_ai/utils/config.py +2 -2
  109. kiln_ai/utils/env.py +15 -0
  110. kiln_ai/utils/filesystem.py +14 -0
  111. kiln_ai/utils/filesystem_cache.py +60 -0
  112. kiln_ai/utils/litellm.py +94 -0
  113. kiln_ai/utils/lock.py +100 -0
  114. kiln_ai/utils/mime_type.py +38 -0
  115. kiln_ai/utils/open_ai_types.py +19 -2
  116. kiln_ai/utils/pdf_utils.py +59 -0
  117. kiln_ai/utils/test_async_job_runner.py +151 -35
  118. kiln_ai/utils/test_env.py +142 -0
  119. kiln_ai/utils/test_filesystem_cache.py +316 -0
  120. kiln_ai/utils/test_litellm.py +206 -0
  121. kiln_ai/utils/test_lock.py +185 -0
  122. kiln_ai/utils/test_mime_type.py +66 -0
  123. kiln_ai/utils/test_open_ai_types.py +88 -12
  124. kiln_ai/utils/test_pdf_utils.py +86 -0
  125. kiln_ai/utils/test_uuid.py +111 -0
  126. kiln_ai/utils/test_validation.py +524 -0
  127. kiln_ai/utils/uuid.py +9 -0
  128. kiln_ai/utils/validation.py +90 -0
  129. {kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/METADATA +9 -1
  130. kiln_ai-0.22.0.dist-info/RECORD +213 -0
  131. kiln_ai-0.20.1.dist-info/RECORD +0 -138
  132. {kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/WHEEL +0 -0
  133. {kiln_ai-0.20.1.dist-info → kiln_ai-0.22.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -38,6 +38,15 @@ def test_valid_file_import_data_source():
38
38
  assert data_source.properties["file_name"] == "test.txt"
39
39
 
40
40
 
41
+ def test_empty_valid_tool_call_data_source():
42
+ data_source = DataSource(
43
+ type=DataSourceType.tool_call,
44
+ properties={},
45
+ )
46
+ assert data_source.type == DataSourceType.tool_call
47
+ assert data_source.properties == {}
48
+
49
+
41
50
  def test_missing_required_property():
42
51
  with pytest.raises(ValidationError, match="'created_by' is required for"):
43
52
  DataSource(type=DataSourceType.human)
@@ -79,6 +88,35 @@ def test_not_allowed_property():
79
88
  )
80
89
 
81
90
 
91
+ def test_not_allowed_property_tool_call():
92
+ with pytest.raises(
93
+ ValidationError,
94
+ match="'created_by' is not allowed for",
95
+ ):
96
+ DataSource(
97
+ type=DataSourceType.tool_call,
98
+ properties={
99
+ "model_name": "GPT-4",
100
+ "model_provider": "OpenAI",
101
+ "adapter_name": "langchain",
102
+ "created_by": "John Doe",
103
+ },
104
+ )
105
+
106
+
107
+ def test_not_allowed_file_name_tool_call():
108
+ with pytest.raises(
109
+ ValidationError,
110
+ match="'file_name' is not allowed for",
111
+ ):
112
+ DataSource(
113
+ type=DataSourceType.tool_call,
114
+ properties={
115
+ "file_name": "test.txt",
116
+ },
117
+ )
118
+
119
+
82
120
  def test_extra_properties():
83
121
  data_source = DataSource(
84
122
  type=DataSourceType.synthetic,
@@ -94,6 +132,18 @@ def test_extra_properties():
94
132
  assert data_source.properties["max_tokens"] == 100
95
133
 
96
134
 
135
+ def test_extra_properties_tool_call():
136
+ data_source = DataSource(
137
+ type=DataSourceType.tool_call,
138
+ properties={
139
+ "temperature": 0.7,
140
+ "max_tokens": 100,
141
+ },
142
+ )
143
+ assert data_source.properties["temperature"] == 0.7
144
+ assert data_source.properties["max_tokens"] == 100
145
+
146
+
97
147
  def test_prompt_type_optional_for_synthetic():
98
148
  data_source = DataSource(
99
149
  type=DataSourceType.synthetic,
@@ -0,0 +1,448 @@
1
+ import uuid
2
+ from pathlib import Path
3
+
4
+ import pytest
5
+
6
+ from kiln_ai.datamodel.basemodel import KilnAttachmentModel
7
+ from kiln_ai.datamodel.chunk import Chunk, ChunkedDocument
8
+ from kiln_ai.datamodel.embedding import ChunkEmbeddings, Embedding, EmbeddingConfig
9
+ from kiln_ai.datamodel.project import Project
10
+
11
+
12
+ @pytest.fixture
13
+ def mock_project(tmp_path):
14
+ project_root = tmp_path / str(uuid.uuid4())
15
+ project_root.mkdir()
16
+ project = Project(
17
+ name="Test Project",
18
+ description="Test description",
19
+ path=project_root / "project.kiln",
20
+ )
21
+ project.save_to_file()
22
+ return project
23
+
24
+
25
+ @pytest.fixture
26
+ def mock_chunked_document(tmp_path):
27
+ # Create a temporary file for the attachment
28
+ tmp_dir = tmp_path / str(uuid.uuid4())
29
+ tmp_dir.mkdir()
30
+
31
+ tmp_path_file = Path(tmp_dir) / f"{uuid.uuid4()}.txt"
32
+ tmp_path_file.write_text("test content")
33
+
34
+ attachment = KilnAttachmentModel.from_file(tmp_path_file)
35
+ chunks = [Chunk(content=attachment) for _ in range(3)]
36
+
37
+ doc = ChunkedDocument(
38
+ chunks=chunks,
39
+ chunker_config_id="fake-chunker-id",
40
+ path=Path(tmp_dir) / "chunked_document.kiln",
41
+ )
42
+ doc.save_to_file()
43
+
44
+ return doc
45
+
46
+
47
+ class TestEmbeddingConfig:
48
+ """Test the EmbeddingConfig class."""
49
+
50
+ def test_required_fields(self):
51
+ """Test that required fields are set correctly."""
52
+ config = EmbeddingConfig(
53
+ name="test-embedding",
54
+ model_provider_name="openai",
55
+ model_name="openai_text_embedding_3_small",
56
+ properties={"dimensions": 1536},
57
+ )
58
+ assert config.name == "test-embedding"
59
+ assert config.model_provider_name == "openai"
60
+ assert config.model_name == "openai_text_embedding_3_small"
61
+ assert config.properties == {"dimensions": 1536}
62
+
63
+ def test_optional_description(self):
64
+ """Test that description is optional."""
65
+ config = EmbeddingConfig(
66
+ name="test-embedding",
67
+ model_provider_name="openai",
68
+ model_name="openai_text_embedding_3_small",
69
+ properties={"dimensions": 1536},
70
+ )
71
+ assert config.description is None
72
+
73
+ config_with_desc = EmbeddingConfig(
74
+ name="test-embedding",
75
+ description="A test embedding config",
76
+ model_provider_name="openai",
77
+ model_name="openai_text_embedding_3_small",
78
+ properties={"dimensions": 1536},
79
+ )
80
+ assert config_with_desc.description == "A test embedding config"
81
+
82
+ def test_name_validation(self):
83
+ """Test name field validation."""
84
+ # Test valid name
85
+ config = EmbeddingConfig(
86
+ name="valid-name_123",
87
+ model_provider_name="openai",
88
+ model_name="openai_text_embedding_3_small",
89
+ properties={"dimensions": 1536},
90
+ )
91
+ assert config.name == "valid-name_123"
92
+
93
+ # Test empty name
94
+ with pytest.raises(ValueError):
95
+ EmbeddingConfig(
96
+ name="",
97
+ model_provider_name="openai",
98
+ model_name="openai_text_embedding_3_small",
99
+ properties={"dimensions": 1536},
100
+ )
101
+
102
+ def test_properties_validation(self):
103
+ """Test properties field validation."""
104
+ # Test with valid properties
105
+ config = EmbeddingConfig(
106
+ name="test-embedding",
107
+ model_provider_name="openai",
108
+ model_name="openai_text_embedding_3_small",
109
+ properties={
110
+ "dimensions": 1536,
111
+ },
112
+ )
113
+ assert config.properties == {
114
+ "dimensions": 1536,
115
+ }
116
+
117
+ # Test with empty properties
118
+ config_empty = EmbeddingConfig(
119
+ name="test-embedding",
120
+ model_provider_name="openai",
121
+ model_name="openai_text_embedding_3_small",
122
+ properties={},
123
+ )
124
+ assert config_empty.properties == {}
125
+
126
+ def test_parent_project_method_no_parent(self):
127
+ """Test parent_project method when no parent is set."""
128
+ config = EmbeddingConfig(
129
+ name="test-embedding",
130
+ model_provider_name="openai",
131
+ model_name="openai_text_embedding_3_small",
132
+ properties={"dimensions": 1536},
133
+ )
134
+ assert config.parent_project() is None
135
+
136
+ def test_parent_project_method_with_project_parent(self, mock_project):
137
+ """Test parent_project method when parent is a Project."""
138
+ config = EmbeddingConfig(
139
+ name="test-embedding",
140
+ model_provider_name="openai",
141
+ model_name="openai_text_embedding_3_small",
142
+ properties={"dimensions": 1536},
143
+ parent=mock_project,
144
+ )
145
+ assert config.parent_project() == mock_project
146
+
147
+ def test_model_provider_name_validation(self, mock_project):
148
+ """Test model_provider_name field validation."""
149
+ config = EmbeddingConfig(
150
+ name="test-embedding",
151
+ model_provider_name="openai",
152
+ model_name="openai_text_embedding_3_small",
153
+ properties={},
154
+ parent=mock_project,
155
+ )
156
+ assert config.model_provider_name == "openai"
157
+
158
+ with pytest.raises(ValueError):
159
+ EmbeddingConfig(
160
+ name="test-embedding",
161
+ model_provider_name="invalid-provider",
162
+ model_name="openai_text_embedding_3_small",
163
+ parent=mock_project,
164
+ properties={},
165
+ )
166
+
167
+ def test_custom_dimensions_validation(self):
168
+ """Test that custom dimensions are properly validated."""
169
+
170
+ # this model supports custom dimensions
171
+ config = EmbeddingConfig(
172
+ name="test-embedding",
173
+ model_provider_name="openai",
174
+ model_name="openai_text_embedding_3_small",
175
+ properties={"dimensions": 1536},
176
+ )
177
+ assert config.properties == {"dimensions": 1536}
178
+
179
+ # dimensions is negative
180
+ with pytest.raises(ValueError, match="Dimensions must be a positive integer"):
181
+ EmbeddingConfig(
182
+ name="test-embedding",
183
+ model_provider_name="openai",
184
+ model_name="openai_text_embedding_3_small",
185
+ properties={"dimensions": -1},
186
+ )
187
+
188
+ # dimensions is not an integer
189
+ with pytest.raises(ValueError, match="Dimensions must be a positive integer"):
190
+ EmbeddingConfig(
191
+ name="test-embedding",
192
+ model_provider_name="openai",
193
+ model_name="openai_text_embedding_3_small",
194
+ properties={"dimensions": 1.5},
195
+ )
196
+
197
+ # dimensions is not a positive integer
198
+ with pytest.raises(ValueError, match="Dimensions must be a positive integer"):
199
+ EmbeddingConfig(
200
+ name="test-embedding",
201
+ model_provider_name="openai",
202
+ model_name="openai_text_embedding_3_small",
203
+ properties={"dimensions": "512"},
204
+ )
205
+
206
+ def test_dimensions_optional(self):
207
+ """Test that dimensions is optional and should be ignored if not provided."""
208
+ config = EmbeddingConfig(
209
+ name="test-embedding",
210
+ model_provider_name="openai",
211
+ model_name="openai_text_embedding_3_small",
212
+ properties={},
213
+ )
214
+ assert config.properties == {}
215
+
216
+
217
+ class TestEmbedding:
218
+ """Test the Embedding class."""
219
+
220
+ def test_required_fields(self):
221
+ """Test that required fields are properly validated."""
222
+ vector = [0.1, 0.2, 0.3, 0.4, 0.5]
223
+ embedding = Embedding(vector=vector)
224
+ assert embedding.vector == vector
225
+
226
+ def test_vector_validation(self):
227
+ """Test that vector field is properly validated."""
228
+ # Test with valid vector
229
+ vector = [0.1, 0.2, 0.3]
230
+ embedding = Embedding(vector=vector)
231
+ assert embedding.vector == vector
232
+
233
+ # Test with empty vector
234
+ empty_vector = []
235
+ embedding_empty = Embedding(vector=empty_vector)
236
+ assert embedding_empty.vector == empty_vector
237
+
238
+ # Test with large vector
239
+ large_vector = [0.1] * 1536
240
+ embedding_large = Embedding(vector=large_vector)
241
+ assert len(embedding_large.vector) == 1536
242
+
243
+ def test_vector_types(self):
244
+ """Test that vector accepts different numeric types."""
245
+ # Test with integers
246
+ int_vector = [1, 2, 3, 4, 5]
247
+ embedding_int = Embedding(vector=int_vector)
248
+ assert embedding_int.vector == int_vector
249
+
250
+ # Test with floats
251
+ float_vector = [1.1, 2.2, 3.3, 4.4, 5.5]
252
+ embedding_float = Embedding(vector=float_vector)
253
+ assert embedding_float.vector == float_vector
254
+
255
+ # Test with mixed types
256
+ mixed_vector = [1, 2.5, 3, 4.7, 5]
257
+ embedding_mixed = Embedding(vector=mixed_vector)
258
+ assert embedding_mixed.vector == mixed_vector
259
+
260
+
261
+ class TestChunkEmbeddings:
262
+ """Test the ChunkEmbeddings class."""
263
+
264
+ def test_required_fields(self):
265
+ """Test that required fields are properly validated."""
266
+ embedding_config_id = "test-config-id"
267
+ embeddings = [
268
+ Embedding(vector=[0.1, 0.2, 0.3]),
269
+ Embedding(vector=[0.4, 0.5, 0.6]),
270
+ ]
271
+
272
+ chunk_embeddings = ChunkEmbeddings(
273
+ embedding_config_id=embedding_config_id,
274
+ embeddings=embeddings,
275
+ )
276
+ assert chunk_embeddings.embedding_config_id == embedding_config_id
277
+ assert chunk_embeddings.embeddings == embeddings
278
+
279
+ def test_embeddings_validation(self):
280
+ """Test that embeddings field validation works correctly."""
281
+ embedding_config_id = "test-config-id"
282
+
283
+ # Test with valid list of embeddings
284
+ embeddings = [Embedding(vector=[0.1, 0.2, 0.3])]
285
+ chunk_embeddings = ChunkEmbeddings(
286
+ embedding_config_id=embedding_config_id,
287
+ embeddings=embeddings,
288
+ )
289
+ assert chunk_embeddings.embeddings == embeddings
290
+
291
+ # Test with empty embeddings list
292
+ empty_embeddings = []
293
+ chunk_embeddings_empty = ChunkEmbeddings(
294
+ embedding_config_id=embedding_config_id,
295
+ embeddings=empty_embeddings,
296
+ )
297
+ assert chunk_embeddings_empty.embeddings == empty_embeddings
298
+
299
+ # Test with multiple embeddings
300
+ multiple_embeddings = [
301
+ Embedding(vector=[0.1, 0.2, 0.3]),
302
+ Embedding(vector=[0.4, 0.5, 0.6]),
303
+ Embedding(vector=[0.7, 0.8, 0.9]),
304
+ ]
305
+ chunk_embeddings_multiple = ChunkEmbeddings(
306
+ embedding_config_id=embedding_config_id,
307
+ embeddings=multiple_embeddings,
308
+ )
309
+ assert chunk_embeddings_multiple.embeddings == multiple_embeddings
310
+ assert len(chunk_embeddings_multiple.embeddings) == 3
311
+
312
+ def test_embedding_config_id_validation(self):
313
+ """Test embedding_config_id field validation."""
314
+ embeddings = [Embedding(vector=[0.1, 0.2, 0.3])]
315
+
316
+ # Test with valid ID
317
+ valid_id = "test-config-id-123"
318
+ chunk_embeddings = ChunkEmbeddings(
319
+ embedding_config_id=valid_id,
320
+ embeddings=embeddings,
321
+ )
322
+ assert chunk_embeddings.embedding_config_id == valid_id
323
+
324
+ # Test with numeric string ID
325
+ numeric_id = "12345"
326
+ chunk_embeddings_numeric = ChunkEmbeddings(
327
+ embedding_config_id=numeric_id,
328
+ embeddings=embeddings,
329
+ )
330
+ assert chunk_embeddings_numeric.embedding_config_id == numeric_id
331
+
332
+ def test_parent_chunked_document_method_no_parent(self):
333
+ """Test parent_chunked_document method when no parent is set."""
334
+ chunk_embeddings = ChunkEmbeddings(
335
+ embedding_config_id="test-config-id",
336
+ embeddings=[Embedding(vector=[0.1, 0.2, 0.3])],
337
+ )
338
+ assert chunk_embeddings.parent_chunked_document() is None
339
+
340
+ def test_parent_chunked_document_method_with_chunked_document_parent(
341
+ self, mock_chunked_document
342
+ ):
343
+ """Test parent_chunked_document method when parent is a ChunkedDocument."""
344
+ chunk_embeddings = ChunkEmbeddings(
345
+ embedding_config_id="test-config-id",
346
+ embeddings=[Embedding(vector=[0.1, 0.2, 0.3])],
347
+ parent=mock_chunked_document,
348
+ )
349
+ assert chunk_embeddings.parent_chunked_document() == mock_chunked_document
350
+
351
+ def test_embeddings_correspond_to_chunks(self, mock_chunked_document):
352
+ """Test that embeddings correspond to chunks in the parent chunked document."""
353
+ # Create embeddings that match the number of chunks in the parent
354
+ num_chunks = len(mock_chunked_document.chunks)
355
+ embeddings = [Embedding(vector=[0.1, 0.2, 0.3]) for _ in range(num_chunks)]
356
+
357
+ chunk_embeddings = ChunkEmbeddings(
358
+ embedding_config_id="test-config-id",
359
+ embeddings=embeddings,
360
+ parent=mock_chunked_document,
361
+ )
362
+ assert len(chunk_embeddings.embeddings) == num_chunks
363
+
364
+ def test_embeddings_with_different_vector_sizes(self):
365
+ """Test embeddings with different vector sizes."""
366
+ embedding_config_id = "test-config-id"
367
+ embeddings = [
368
+ Embedding(vector=[0.1, 0.2, 0.3]), # 3 dimensions
369
+ Embedding(vector=[0.4, 0.5, 0.6, 0.7]), # 4 dimensions
370
+ Embedding(vector=[0.8, 0.9]), # 2 dimensions
371
+ ]
372
+
373
+ chunk_embeddings = ChunkEmbeddings(
374
+ embedding_config_id=embedding_config_id,
375
+ embeddings=embeddings,
376
+ )
377
+ assert len(chunk_embeddings.embeddings) == 3
378
+ assert len(chunk_embeddings.embeddings[0].vector) == 3
379
+ assert len(chunk_embeddings.embeddings[1].vector) == 4
380
+ assert len(chunk_embeddings.embeddings[2].vector) == 2
381
+
382
+
383
+ class TestEmbeddingIntegration:
384
+ """Integration tests for embedding models."""
385
+
386
+ def test_embedding_config_with_project_parent(self, mock_project):
387
+ """Test EmbeddingConfig with Project parent."""
388
+ config = EmbeddingConfig(
389
+ name="test-embedding",
390
+ description="Test embedding configuration",
391
+ model_provider_name="openai",
392
+ model_name="openai_text_embedding_3_small",
393
+ properties={"dimensions": 1536},
394
+ parent=mock_project,
395
+ )
396
+ assert config.parent_project() == mock_project
397
+ assert config.name == "test-embedding"
398
+ assert config.model_provider_name == "openai"
399
+ assert config.model_name == "openai_text_embedding_3_small"
400
+
401
+ def test_chunk_embeddings_with_chunked_document_parent(self, mock_chunked_document):
402
+ """Test ChunkEmbeddings with ChunkedDocument parent."""
403
+ # Create embeddings for each chunk
404
+ embeddings = []
405
+ for chunk in mock_chunked_document.chunks:
406
+ # Create a mock embedding (in real usage, this would be generated by the embedding model)
407
+ embedding = Embedding(vector=[0.1, 0.2, 0.3, 0.4, 0.5])
408
+ embeddings.append(embedding)
409
+
410
+ chunk_embeddings = ChunkEmbeddings(
411
+ embedding_config_id="test-config-id",
412
+ embeddings=embeddings,
413
+ parent=mock_chunked_document,
414
+ )
415
+ assert chunk_embeddings.parent_chunked_document() == mock_chunked_document
416
+ assert len(chunk_embeddings.embeddings) == len(mock_chunked_document.chunks)
417
+
418
+ def test_embedding_workflow(self, mock_project, mock_chunked_document):
419
+ """Test a complete embedding workflow."""
420
+ # 1. Create an embedding config
421
+ embedding_config = EmbeddingConfig(
422
+ name="test-embedding-config",
423
+ description="Test embedding configuration for workflow",
424
+ model_provider_name="openai",
425
+ model_name="openai_text_embedding_3_small",
426
+ properties={"dimensions": 1536},
427
+ parent=mock_project,
428
+ )
429
+
430
+ # 2. Create embeddings for the chunked document
431
+ embeddings = []
432
+ for chunk in mock_chunked_document.chunks:
433
+ # Simulate embedding generation
434
+ embedding = Embedding(vector=[0.1] * 1536)
435
+ embeddings.append(embedding)
436
+
437
+ # 3. Create chunk embeddings
438
+ chunk_embeddings = ChunkEmbeddings(
439
+ embedding_config_id=embedding_config.id,
440
+ embeddings=embeddings,
441
+ parent=mock_chunked_document,
442
+ )
443
+
444
+ # 4. Verify the relationships
445
+ assert embedding_config.parent_project() == mock_project
446
+ assert chunk_embeddings.parent_chunked_document() == mock_chunked_document
447
+ assert len(chunk_embeddings.embeddings) == len(mock_chunked_document.chunks)
448
+ assert chunk_embeddings.embedding_config_id == embedding_config.id
@@ -402,13 +402,13 @@ def test_eval_run_five_star_score_validation(valid_eval_config, valid_eval_run_d
402
402
  assert run.scores["accuracy"] == 4.5
403
403
 
404
404
  # Invalid scores
405
- with pytest.raises(ValueError, match="must be a float between 1.0 and 5.0"):
405
+ with pytest.raises(ValueError, match=r"must be a float between 1.0 and 5.0"):
406
406
  run = EvalRun(
407
407
  parent=valid_eval_config,
408
408
  **{**valid_eval_run_data, "scores": {"accuracy": 0.5}},
409
409
  )
410
410
 
411
- with pytest.raises(ValueError, match="must be a float between 1.0 and 5.0"):
411
+ with pytest.raises(ValueError, match=r"must be a float between 1.0 and 5.0"):
412
412
  run = EvalRun(
413
413
  parent=valid_eval_config,
414
414
  **{**valid_eval_run_data, "scores": {"accuracy": 5.5}},
@@ -442,13 +442,13 @@ def test_eval_run_pass_fail_score_validation(valid_eval_config, valid_eval_run_d
442
442
  assert run.scores["check"] == 0.0
443
443
 
444
444
  # Invalid scores
445
- with pytest.raises(ValueError, match="must be a float between 0.0 and 1.0"):
445
+ with pytest.raises(ValueError, match=r"must be a float between 0.0 and 1.0"):
446
446
  run = EvalRun(
447
447
  parent=valid_eval_config,
448
448
  **{**valid_eval_run_data, "scores": {"check": -0.1}},
449
449
  )
450
450
 
451
- with pytest.raises(ValueError, match="must be a float between 0.0 and 1.0"):
451
+ with pytest.raises(ValueError, match=r"must be a float between 0.0 and 1.0"):
452
452
  run = EvalRun(
453
453
  parent=valid_eval_config,
454
454
  **{**valid_eval_run_data, "scores": {"check": 1.1}},
@@ -485,13 +485,13 @@ def test_eval_run_pass_fail_critical_score_validation(
485
485
  assert run.scores["critical"] == -1.0
486
486
 
487
487
  # Invalid scores
488
- with pytest.raises(ValueError, match="must be a float between -1.0 and 1.0"):
488
+ with pytest.raises(ValueError, match=r"must be a float between -1.0 and 1.0"):
489
489
  run = EvalRun(
490
490
  parent=valid_eval_config,
491
491
  **{**valid_eval_run_data, "scores": {"critical": -1.1}},
492
492
  )
493
493
 
494
- with pytest.raises(ValueError, match="must be a float between -1.0 and 1.0"):
494
+ with pytest.raises(ValueError, match=r"must be a float between -1.0 and 1.0"):
495
495
  run = EvalRun(
496
496
  parent=valid_eval_config,
497
497
  **{**valid_eval_run_data, "scores": {"critical": 1.1}},