kiln-ai 0.19.0__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (158) hide show
  1. kiln_ai/adapters/__init__.py +8 -2
  2. kiln_ai/adapters/adapter_registry.py +43 -208
  3. kiln_ai/adapters/chat/chat_formatter.py +8 -12
  4. kiln_ai/adapters/chat/test_chat_formatter.py +6 -2
  5. kiln_ai/adapters/chunkers/__init__.py +13 -0
  6. kiln_ai/adapters/chunkers/base_chunker.py +42 -0
  7. kiln_ai/adapters/chunkers/chunker_registry.py +16 -0
  8. kiln_ai/adapters/chunkers/fixed_window_chunker.py +39 -0
  9. kiln_ai/adapters/chunkers/helpers.py +23 -0
  10. kiln_ai/adapters/chunkers/test_base_chunker.py +63 -0
  11. kiln_ai/adapters/chunkers/test_chunker_registry.py +28 -0
  12. kiln_ai/adapters/chunkers/test_fixed_window_chunker.py +346 -0
  13. kiln_ai/adapters/chunkers/test_helpers.py +75 -0
  14. kiln_ai/adapters/data_gen/test_data_gen_task.py +9 -3
  15. kiln_ai/adapters/docker_model_runner_tools.py +119 -0
  16. kiln_ai/adapters/embedding/__init__.py +0 -0
  17. kiln_ai/adapters/embedding/base_embedding_adapter.py +44 -0
  18. kiln_ai/adapters/embedding/embedding_registry.py +32 -0
  19. kiln_ai/adapters/embedding/litellm_embedding_adapter.py +199 -0
  20. kiln_ai/adapters/embedding/test_base_embedding_adapter.py +283 -0
  21. kiln_ai/adapters/embedding/test_embedding_registry.py +166 -0
  22. kiln_ai/adapters/embedding/test_litellm_embedding_adapter.py +1149 -0
  23. kiln_ai/adapters/eval/base_eval.py +2 -2
  24. kiln_ai/adapters/eval/eval_runner.py +9 -3
  25. kiln_ai/adapters/eval/g_eval.py +2 -2
  26. kiln_ai/adapters/eval/test_base_eval.py +2 -4
  27. kiln_ai/adapters/eval/test_g_eval.py +4 -5
  28. kiln_ai/adapters/extractors/__init__.py +18 -0
  29. kiln_ai/adapters/extractors/base_extractor.py +72 -0
  30. kiln_ai/adapters/extractors/encoding.py +20 -0
  31. kiln_ai/adapters/extractors/extractor_registry.py +44 -0
  32. kiln_ai/adapters/extractors/extractor_runner.py +112 -0
  33. kiln_ai/adapters/extractors/litellm_extractor.py +386 -0
  34. kiln_ai/adapters/extractors/test_base_extractor.py +244 -0
  35. kiln_ai/adapters/extractors/test_encoding.py +54 -0
  36. kiln_ai/adapters/extractors/test_extractor_registry.py +181 -0
  37. kiln_ai/adapters/extractors/test_extractor_runner.py +181 -0
  38. kiln_ai/adapters/extractors/test_litellm_extractor.py +1192 -0
  39. kiln_ai/adapters/fine_tune/__init__.py +1 -1
  40. kiln_ai/adapters/fine_tune/openai_finetune.py +14 -4
  41. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +2 -2
  42. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +2 -6
  43. kiln_ai/adapters/fine_tune/test_openai_finetune.py +108 -111
  44. kiln_ai/adapters/fine_tune/test_together_finetune.py +2 -6
  45. kiln_ai/adapters/ml_embedding_model_list.py +192 -0
  46. kiln_ai/adapters/ml_model_list.py +761 -37
  47. kiln_ai/adapters/model_adapters/base_adapter.py +51 -21
  48. kiln_ai/adapters/model_adapters/litellm_adapter.py +380 -138
  49. kiln_ai/adapters/model_adapters/test_base_adapter.py +193 -17
  50. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -2
  51. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +1103 -0
  52. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +5 -5
  53. kiln_ai/adapters/model_adapters/test_structured_output.py +113 -5
  54. kiln_ai/adapters/ollama_tools.py +69 -12
  55. kiln_ai/adapters/parsers/__init__.py +1 -1
  56. kiln_ai/adapters/provider_tools.py +205 -47
  57. kiln_ai/adapters/rag/deduplication.py +49 -0
  58. kiln_ai/adapters/rag/progress.py +252 -0
  59. kiln_ai/adapters/rag/rag_runners.py +844 -0
  60. kiln_ai/adapters/rag/test_deduplication.py +195 -0
  61. kiln_ai/adapters/rag/test_progress.py +785 -0
  62. kiln_ai/adapters/rag/test_rag_runners.py +2376 -0
  63. kiln_ai/adapters/remote_config.py +80 -8
  64. kiln_ai/adapters/repair/test_repair_task.py +12 -9
  65. kiln_ai/adapters/run_output.py +3 -0
  66. kiln_ai/adapters/test_adapter_registry.py +657 -85
  67. kiln_ai/adapters/test_docker_model_runner_tools.py +305 -0
  68. kiln_ai/adapters/test_ml_embedding_model_list.py +429 -0
  69. kiln_ai/adapters/test_ml_model_list.py +251 -1
  70. kiln_ai/adapters/test_ollama_tools.py +340 -1
  71. kiln_ai/adapters/test_prompt_adaptors.py +13 -6
  72. kiln_ai/adapters/test_prompt_builders.py +1 -1
  73. kiln_ai/adapters/test_provider_tools.py +254 -8
  74. kiln_ai/adapters/test_remote_config.py +651 -58
  75. kiln_ai/adapters/vector_store/__init__.py +1 -0
  76. kiln_ai/adapters/vector_store/base_vector_store_adapter.py +83 -0
  77. kiln_ai/adapters/vector_store/lancedb_adapter.py +389 -0
  78. kiln_ai/adapters/vector_store/test_base_vector_store.py +160 -0
  79. kiln_ai/adapters/vector_store/test_lancedb_adapter.py +1841 -0
  80. kiln_ai/adapters/vector_store/test_vector_store_registry.py +199 -0
  81. kiln_ai/adapters/vector_store/vector_store_registry.py +33 -0
  82. kiln_ai/datamodel/__init__.py +39 -34
  83. kiln_ai/datamodel/basemodel.py +170 -1
  84. kiln_ai/datamodel/chunk.py +158 -0
  85. kiln_ai/datamodel/datamodel_enums.py +28 -0
  86. kiln_ai/datamodel/embedding.py +64 -0
  87. kiln_ai/datamodel/eval.py +1 -1
  88. kiln_ai/datamodel/external_tool_server.py +298 -0
  89. kiln_ai/datamodel/extraction.py +303 -0
  90. kiln_ai/datamodel/json_schema.py +25 -10
  91. kiln_ai/datamodel/project.py +40 -1
  92. kiln_ai/datamodel/rag.py +79 -0
  93. kiln_ai/datamodel/registry.py +0 -15
  94. kiln_ai/datamodel/run_config.py +62 -0
  95. kiln_ai/datamodel/task.py +2 -77
  96. kiln_ai/datamodel/task_output.py +6 -1
  97. kiln_ai/datamodel/task_run.py +41 -0
  98. kiln_ai/datamodel/test_attachment.py +649 -0
  99. kiln_ai/datamodel/test_basemodel.py +4 -4
  100. kiln_ai/datamodel/test_chunk_models.py +317 -0
  101. kiln_ai/datamodel/test_dataset_split.py +1 -1
  102. kiln_ai/datamodel/test_embedding_models.py +448 -0
  103. kiln_ai/datamodel/test_eval_model.py +6 -6
  104. kiln_ai/datamodel/test_example_models.py +175 -0
  105. kiln_ai/datamodel/test_external_tool_server.py +691 -0
  106. kiln_ai/datamodel/test_extraction_chunk.py +206 -0
  107. kiln_ai/datamodel/test_extraction_model.py +470 -0
  108. kiln_ai/datamodel/test_rag.py +641 -0
  109. kiln_ai/datamodel/test_registry.py +8 -3
  110. kiln_ai/datamodel/test_task.py +15 -47
  111. kiln_ai/datamodel/test_tool_id.py +320 -0
  112. kiln_ai/datamodel/test_vector_store.py +320 -0
  113. kiln_ai/datamodel/tool_id.py +105 -0
  114. kiln_ai/datamodel/vector_store.py +141 -0
  115. kiln_ai/tools/__init__.py +8 -0
  116. kiln_ai/tools/base_tool.py +82 -0
  117. kiln_ai/tools/built_in_tools/__init__.py +13 -0
  118. kiln_ai/tools/built_in_tools/math_tools.py +124 -0
  119. kiln_ai/tools/built_in_tools/test_math_tools.py +204 -0
  120. kiln_ai/tools/mcp_server_tool.py +95 -0
  121. kiln_ai/tools/mcp_session_manager.py +246 -0
  122. kiln_ai/tools/rag_tools.py +157 -0
  123. kiln_ai/tools/test_base_tools.py +199 -0
  124. kiln_ai/tools/test_mcp_server_tool.py +457 -0
  125. kiln_ai/tools/test_mcp_session_manager.py +1585 -0
  126. kiln_ai/tools/test_rag_tools.py +848 -0
  127. kiln_ai/tools/test_tool_registry.py +562 -0
  128. kiln_ai/tools/tool_registry.py +85 -0
  129. kiln_ai/utils/__init__.py +3 -0
  130. kiln_ai/utils/async_job_runner.py +62 -17
  131. kiln_ai/utils/config.py +24 -2
  132. kiln_ai/utils/env.py +15 -0
  133. kiln_ai/utils/filesystem.py +14 -0
  134. kiln_ai/utils/filesystem_cache.py +60 -0
  135. kiln_ai/utils/litellm.py +94 -0
  136. kiln_ai/utils/lock.py +100 -0
  137. kiln_ai/utils/mime_type.py +38 -0
  138. kiln_ai/utils/open_ai_types.py +94 -0
  139. kiln_ai/utils/pdf_utils.py +38 -0
  140. kiln_ai/utils/project_utils.py +17 -0
  141. kiln_ai/utils/test_async_job_runner.py +151 -35
  142. kiln_ai/utils/test_config.py +138 -1
  143. kiln_ai/utils/test_env.py +142 -0
  144. kiln_ai/utils/test_filesystem_cache.py +316 -0
  145. kiln_ai/utils/test_litellm.py +206 -0
  146. kiln_ai/utils/test_lock.py +185 -0
  147. kiln_ai/utils/test_mime_type.py +66 -0
  148. kiln_ai/utils/test_open_ai_types.py +131 -0
  149. kiln_ai/utils/test_pdf_utils.py +73 -0
  150. kiln_ai/utils/test_uuid.py +111 -0
  151. kiln_ai/utils/test_validation.py +524 -0
  152. kiln_ai/utils/uuid.py +9 -0
  153. kiln_ai/utils/validation.py +90 -0
  154. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/METADATA +12 -5
  155. kiln_ai-0.21.0.dist-info/RECORD +211 -0
  156. kiln_ai-0.19.0.dist-info/RECORD +0 -115
  157. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/WHEEL +0 -0
  158. {kiln_ai-0.19.0.dist-info → kiln_ai-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,185 @@
1
+ import asyncio
2
+
3
+ from .lock import AsyncLockManager, shared_async_lock_manager
4
+
5
+
6
+ async def test_same_key_returns_same_lock():
7
+ """Test that the same key returns the same lock object."""
8
+ locks = AsyncLockManager()
9
+
10
+ # Test that the same key gets the same lock entry
11
+ async with locks.acquire("test_key"):
12
+ # The lock should exist in the manager
13
+ snapshot = await locks.snapshot()
14
+ assert "test_key" in snapshot
15
+
16
+
17
+ async def test_different_keys_return_different_locks():
18
+ """Test that different keys return different lock objects."""
19
+ locks = AsyncLockManager()
20
+
21
+ # Use different keys
22
+ async with locks.acquire("key1"):
23
+ async with locks.acquire("key2"):
24
+ snapshot = await locks.snapshot()
25
+ assert "key1" in snapshot
26
+ assert "key2" in snapshot
27
+
28
+
29
+ async def test_lock_functionality():
30
+ """Test that the locks actually provide mutual exclusion."""
31
+ results = []
32
+ locks = AsyncLockManager()
33
+
34
+ async def worker(worker_id: int):
35
+ async with locks.acquire("shared_resource"):
36
+ # Record start
37
+ results.append(f"worker_{worker_id}_start")
38
+ await asyncio.sleep(0.1) # Simulate work
39
+ # Record end
40
+ results.append(f"worker_{worker_id}_end")
41
+
42
+ # Run multiple workers concurrently
43
+ await asyncio.gather(*[worker(i) for i in range(3)])
44
+
45
+ # Verify that the work was done exclusively
46
+ # Each worker's start should be immediately followed by its end
47
+ i = 0
48
+ while i < len(results):
49
+ start_event = results[i]
50
+ end_event = results[i + 1]
51
+
52
+ # Extract worker ID from start event
53
+ worker_id = start_event.split("_")[1]
54
+ expected_end = f"worker_{worker_id}_end"
55
+
56
+ assert end_event == expected_end, (
57
+ f"Non-exclusive access detected. Expected {expected_end}, got {end_event}. Full results: {results}"
58
+ )
59
+ i += 2
60
+
61
+
62
+ async def test_lock_cleanup():
63
+ """Test that locks are automatically cleaned up when no longer needed."""
64
+ locks = AsyncLockManager()
65
+
66
+ # Use a lock
67
+ async with locks.acquire("cleanup_test"):
68
+ pass
69
+
70
+ # Check that the lock was cleaned up
71
+ snapshot = await locks.snapshot()
72
+ assert "cleanup_test" not in snapshot
73
+
74
+
75
+ async def test_multiple_holders_cleanup():
76
+ """Test that locks are cleaned up when multiple holders finish."""
77
+ locks = AsyncLockManager()
78
+
79
+ # Create multiple tasks that will hold the lock sequentially
80
+ async def holder(holder_id: int):
81
+ async with locks.acquire("multi_holder"):
82
+ await asyncio.sleep(0.05)
83
+ return f"holder_{holder_id}_done"
84
+
85
+ # Run multiple holders sequentially (not concurrently to avoid deadlock)
86
+ results = []
87
+ for i in range(3):
88
+ result = await holder(i)
89
+ results.append(result)
90
+
91
+ # Check that all holders completed
92
+ assert len(results) == 3
93
+ assert all(result.startswith("holder_") for result in results)
94
+
95
+ # Check that the lock was cleaned up
96
+ snapshot = await locks.snapshot()
97
+ assert "multi_holder" not in snapshot
98
+
99
+
100
+ async def test_global_instance():
101
+ """Test that the global shared_async_lock_manager instance works correctly."""
102
+ results = []
103
+
104
+ async def worker(worker_id: int):
105
+ async with shared_async_lock_manager.acquire("global_test"):
106
+ results.append(f"worker_{worker_id}_start")
107
+ await asyncio.sleep(0.05)
108
+ results.append(f"worker_{worker_id}_end")
109
+
110
+ # Run multiple workers sequentially to avoid deadlock
111
+ for i in range(2):
112
+ await worker(i)
113
+
114
+ # Verify sequential access
115
+ assert len(results) == 4
116
+ assert results[0] == "worker_0_start"
117
+ assert results[1] == "worker_0_end"
118
+ assert results[2] == "worker_1_start"
119
+ assert results[3] == "worker_1_end"
120
+
121
+
122
+ async def test_timeout():
123
+ """Test that timeout functionality works correctly."""
124
+ locks = AsyncLockManager()
125
+
126
+ # Hold the lock for a while
127
+ async def holder():
128
+ async with locks.acquire("timeout_test"):
129
+ await asyncio.sleep(0.3)
130
+
131
+ # Try to acquire with a short timeout
132
+ async def waiter():
133
+ try:
134
+ async with locks.acquire("timeout_test", timeout=0.1):
135
+ assert False, "Should have timed out"
136
+ except asyncio.TimeoutError:
137
+ return "timed_out"
138
+
139
+ # Start holder first
140
+ holder_task = asyncio.create_task(holder())
141
+ await asyncio.sleep(0.05) # Let holder acquire the lock
142
+
143
+ # Then try waiter
144
+ result = await waiter()
145
+ assert result == "timed_out"
146
+
147
+ # Wait for holder to finish
148
+ await holder_task
149
+
150
+
151
+ async def test_cancellation():
152
+ """Test that cancellation is handled correctly."""
153
+ locks = AsyncLockManager()
154
+
155
+ # Hold the lock
156
+ async def holder():
157
+ async with locks.acquire("cancel_test"):
158
+ await asyncio.sleep(0.3)
159
+
160
+ # Try to acquire but get cancelled
161
+ async def waiter():
162
+ try:
163
+ async with locks.acquire("cancel_test"):
164
+ assert False, "Should not acquire lock"
165
+ except asyncio.CancelledError:
166
+ return "cancelled"
167
+
168
+ # Start holder first
169
+ holder_task = asyncio.create_task(holder())
170
+ await asyncio.sleep(0.05) # Let holder acquire the lock
171
+
172
+ # Start waiter and then cancel it
173
+ waiter_task = asyncio.create_task(waiter())
174
+ await asyncio.sleep(0.05)
175
+ waiter_task.cancel()
176
+
177
+ # Check result
178
+ try:
179
+ result = await waiter_task
180
+ assert result == "cancelled"
181
+ except asyncio.CancelledError:
182
+ pass # Expected
183
+
184
+ # Wait for holder to finish
185
+ await holder_task
@@ -0,0 +1,66 @@
1
+ from kiln_ai.utils.mime_type import guess_mime_type
2
+
3
+
4
+ def test_mov_files():
5
+ assert guess_mime_type("video.mov") == "video/quicktime"
6
+ assert guess_mime_type("my_video.mov") == "video/quicktime"
7
+ assert guess_mime_type("path/to/video.mov") == "video/quicktime"
8
+
9
+
10
+ def test_mp3_files():
11
+ assert guess_mime_type("song.mp3") == "audio/mpeg"
12
+ assert guess_mime_type("music_file.mp3") == "audio/mpeg"
13
+ assert guess_mime_type("audio/track.mp3") == "audio/mpeg"
14
+
15
+
16
+ def test_wav_files():
17
+ assert guess_mime_type("sound.wav") == "audio/wav"
18
+ assert guess_mime_type("audio_file.wav") == "audio/wav"
19
+ assert guess_mime_type("sounds/effect.wav") == "audio/wav"
20
+
21
+
22
+ def test_mp4_files():
23
+ assert guess_mime_type("movie.mp4") == "video/mp4"
24
+ assert guess_mime_type("video_file.mp4") == "video/mp4"
25
+ assert guess_mime_type("videos/clip.mp4") == "video/mp4"
26
+
27
+
28
+ def test_case_insensitive_extensions():
29
+ assert guess_mime_type("video.MOV") == "video/quicktime"
30
+ assert guess_mime_type("song.MP3") == "audio/mpeg"
31
+ assert guess_mime_type("sound.WAV") == "audio/wav"
32
+ assert guess_mime_type("movie.MP4") == "video/mp4"
33
+
34
+
35
+ def test_standard_mimetypes_fallback():
36
+ assert guess_mime_type("document.pdf") == "application/pdf"
37
+ assert guess_mime_type("image.jpg") == "image/jpeg"
38
+ assert guess_mime_type("image.png") == "image/png"
39
+ assert guess_mime_type("text.txt") == "text/plain"
40
+ assert guess_mime_type("data.json") == "application/json"
41
+
42
+
43
+ def test_unknown_extensions():
44
+ assert guess_mime_type("file.invalidmime") is None
45
+ assert guess_mime_type("no_extension") is None
46
+
47
+
48
+ def test_edge_cases():
49
+ # Files with multiple dots
50
+ assert guess_mime_type("video.backup.mov") == "video/quicktime"
51
+ assert guess_mime_type("song.remix.mp3") == "audio/mpeg"
52
+
53
+ # Files with dots in the middle
54
+ assert guess_mime_type("my.video.mov") == "video/quicktime"
55
+ assert guess_mime_type("track.1.mp3") == "audio/mpeg"
56
+
57
+ # Empty filename
58
+ assert guess_mime_type("") is None
59
+
60
+ # Just extension
61
+ assert guess_mime_type(".mov") == "video/quicktime"
62
+ assert guess_mime_type(".mp3") == "audio/mpeg"
63
+
64
+
65
+ def test_priority_order():
66
+ assert guess_mime_type("file.mov.mp3") == "audio/mpeg"
@@ -0,0 +1,131 @@
1
+ """Tests for OpenAI types wrapper to ensure compatibility."""
2
+
3
+ from typing import get_args, get_origin
4
+
5
+ from openai.types.chat import (
6
+ ChatCompletionAssistantMessageParam as OpenAIChatCompletionAssistantMessageParam,
7
+ )
8
+ from openai.types.chat import (
9
+ ChatCompletionMessageParam as OpenAIChatCompletionMessageParam,
10
+ )
11
+
12
+ from kiln_ai.utils.open_ai_types import (
13
+ ChatCompletionAssistantMessageParamWrapper,
14
+ )
15
+ from kiln_ai.utils.open_ai_types import (
16
+ ChatCompletionMessageParam as KilnChatCompletionMessageParam,
17
+ )
18
+
19
+
20
+ def test_assistant_message_param_properties_match():
21
+ """
22
+ Test that ChatCompletionAssistantMessageParamWrapper has all the same properties
23
+ as OpenAI's ChatCompletionAssistantMessageParam, except for the known tool_calls type difference.
24
+
25
+ This will catch any changes to the OpenAI types that we haven't updated our wrapper for.
26
+ """
27
+ # Get annotations for both types
28
+ openai_annotations = OpenAIChatCompletionAssistantMessageParam.__annotations__
29
+ kiln_annotations = ChatCompletionAssistantMessageParamWrapper.__annotations__
30
+
31
+ # Check that both have the same property names
32
+ openai_properties = set(openai_annotations.keys())
33
+ kiln_properties = set(kiln_annotations.keys())
34
+
35
+ # Reasoning content is an added property. Confirm it's there and remove it from the comparison.
36
+ assert "reasoning_content" in kiln_properties, "Kiln should have reasoning_content"
37
+ kiln_properties.remove("reasoning_content")
38
+
39
+ assert openai_properties == kiln_properties, (
40
+ f"Property names don't match. "
41
+ f"OpenAI has: {openai_properties}, "
42
+ f"Kiln has: {kiln_properties}, "
43
+ f"Missing from Kiln: {openai_properties - kiln_properties}, "
44
+ f"Extra in Kiln: {kiln_properties - openai_properties}"
45
+ )
46
+
47
+
48
+ def test_chat_completion_message_param_union_compatibility():
49
+ """
50
+ Test that our ChatCompletionMessageParam union contains the same types as OpenAI's,
51
+ except with our wrapper instead of the original assistant message param.
52
+ """
53
+ # Get the union members for both types
54
+ openai_union_args = get_args(OpenAIChatCompletionMessageParam)
55
+ kiln_union_args = get_args(KilnChatCompletionMessageParam)
56
+
57
+ # Both should be unions with the same number of members
58
+ assert get_origin(OpenAIChatCompletionMessageParam) == get_origin(
59
+ KilnChatCompletionMessageParam
60
+ ), (
61
+ f"Both should be Union types. OpenAI: {get_origin(OpenAIChatCompletionMessageParam)}, "
62
+ f"Kiln: {get_origin(KilnChatCompletionMessageParam)}"
63
+ )
64
+ assert len(openai_union_args) == len(kiln_union_args), (
65
+ f"Union member count mismatch. OpenAI has {len(openai_union_args)} members, "
66
+ f"Kiln has {len(kiln_union_args)} members"
67
+ )
68
+
69
+ # Convert to sets of type names for easier comparison
70
+ openai_type_names = {arg.__name__ for arg in openai_union_args}
71
+ kiln_type_names = {arg.__name__ for arg in kiln_union_args}
72
+
73
+ # Expected difference: OpenAI has ChatCompletionAssistantMessageParam,
74
+ # Kiln has ChatCompletionAssistantMessageParamWrapper
75
+ expected_openai_only = {"ChatCompletionAssistantMessageParam"}
76
+ expected_kiln_only = {"ChatCompletionAssistantMessageParamWrapper"}
77
+
78
+ openai_only = openai_type_names - kiln_type_names
79
+ kiln_only = kiln_type_names - openai_type_names
80
+
81
+ assert openai_only == expected_openai_only, (
82
+ f"Unexpected types only in OpenAI union: {openai_only - expected_openai_only}"
83
+ )
84
+ assert kiln_only == expected_kiln_only, (
85
+ f"Unexpected types only in Kiln union: {kiln_only - expected_kiln_only}"
86
+ )
87
+
88
+ # All other types should be identical
89
+ common_types = openai_type_names & kiln_type_names
90
+ expected_common_types = {
91
+ "ChatCompletionDeveloperMessageParam",
92
+ "ChatCompletionSystemMessageParam",
93
+ "ChatCompletionUserMessageParam",
94
+ "ChatCompletionToolMessageParam",
95
+ "ChatCompletionFunctionMessageParam",
96
+ }
97
+
98
+ assert common_types == expected_common_types, (
99
+ f"Common types mismatch. Expected: {expected_common_types}, Got: {common_types}"
100
+ )
101
+
102
+
103
+ def test_wrapper_can_be_instantiated():
104
+ """Test that our wrapper can be instantiated with the same data as the original."""
105
+ # Create a sample message that should work with both types
106
+ sample_message: ChatCompletionAssistantMessageParamWrapper = {
107
+ "role": "assistant",
108
+ "content": "Hello, world!",
109
+ }
110
+
111
+ # This should work without type errors (runtime test)
112
+ assert sample_message["role"] == "assistant"
113
+ assert sample_message.get("content") == "Hello, world!"
114
+
115
+ # Test with tool calls using List instead of Iterable
116
+ sample_with_tools: ChatCompletionAssistantMessageParamWrapper = {
117
+ "role": "assistant",
118
+ "content": "I'll help you with that.",
119
+ "tool_calls": [
120
+ {
121
+ "id": "call_123",
122
+ "type": "function",
123
+ "function": {"name": "test_function", "arguments": '{"arg": "value"}'},
124
+ }
125
+ ],
126
+ }
127
+
128
+ assert len(sample_with_tools.get("tool_calls", [])) == 1
129
+ tool_calls = sample_with_tools.get("tool_calls", [])
130
+ if tool_calls:
131
+ assert tool_calls[0]["id"] == "call_123"
@@ -0,0 +1,73 @@
1
+ import pytest
2
+ from pypdf import PdfReader
3
+
4
+ from conftest import MockFileFactoryMimeType
5
+ from kiln_ai.utils.pdf_utils import split_pdf_into_pages
6
+
7
+
8
+ async def test_split_pdf_into_pages_success(mock_file_factory):
9
+ """Test that split_pdf_into_pages successfully splits a PDF into individual pages."""
10
+ test_file = mock_file_factory(MockFileFactoryMimeType.PDF)
11
+
12
+ async with split_pdf_into_pages(test_file) as page_paths:
13
+ # Verify we get the expected number of pages (test PDF has 2 pages)
14
+ assert len(page_paths) == 2
15
+
16
+ # Verify all page files exist
17
+ for page_path in page_paths:
18
+ assert page_path.exists()
19
+ assert page_path.suffix == ".pdf"
20
+
21
+ # Verify page files are named correctly
22
+ assert page_paths[0].name == "page_1.pdf"
23
+ assert page_paths[1].name == "page_2.pdf"
24
+
25
+ # Verify each page file is a valid PDF with exactly 1 page
26
+ for page_path in page_paths:
27
+ with open(page_path, "rb") as file:
28
+ reader = PdfReader(file)
29
+ assert len(reader.pages) == 1
30
+
31
+ # Verify cleanup: all page files should be removed after context exit
32
+ for page_path in page_paths:
33
+ assert not page_path.exists()
34
+
35
+
36
+ async def test_split_pdf_into_pages_cleanup_on_exception(mock_file_factory):
37
+ """Test that temporary files are cleaned up even when an exception occurs during normal usage."""
38
+ test_file = mock_file_factory(MockFileFactoryMimeType.PDF)
39
+ captured_page_paths = []
40
+
41
+ # Test that cleanup happens even when an exception occurs during the with block
42
+ with pytest.raises(RuntimeError, match="Simulated error during usage"):
43
+ async with split_pdf_into_pages(test_file) as page_paths:
44
+ # Capture the page paths before the exception
45
+ captured_page_paths.extend(page_paths)
46
+ # Simulate an exception during normal usage of the context manager
47
+ raise RuntimeError("Simulated error during usage")
48
+
49
+ # Verify cleanup happened: the specific page files we created should be gone
50
+ for page_path in captured_page_paths:
51
+ assert not page_path.exists()
52
+
53
+ # Also verify the temporary directory itself is gone
54
+ if captured_page_paths:
55
+ temp_dir = captured_page_paths[0].parent
56
+ assert not temp_dir.exists()
57
+
58
+
59
+ async def test_split_pdf_into_pages_temporary_directory_creation(mock_file_factory):
60
+ """Test that temporary directories are created with the correct prefix."""
61
+ test_file = mock_file_factory(MockFileFactoryMimeType.PDF)
62
+ captured_temp_dirs = []
63
+
64
+ async with split_pdf_into_pages(test_file) as page_paths:
65
+ # Check that page paths are in a directory with the expected prefix
66
+ temp_dir = page_paths[0].parent
67
+ captured_temp_dirs.append(temp_dir)
68
+ assert "kiln_pdf_pages_" in temp_dir.name
69
+ assert temp_dir.exists()
70
+
71
+ # Verify the temporary directory is cleaned up
72
+ for temp_dir in captured_temp_dirs:
73
+ assert not temp_dir.exists()
@@ -0,0 +1,111 @@
1
+ import uuid
2
+
3
+ import pytest
4
+
5
+ from kiln_ai.utils.uuid import string_to_uuid
6
+
7
+
8
+ class TestStringToUuid:
9
+ """Test the string_to_uuid function for consistency and correctness."""
10
+
11
+ def test_same_string_produces_same_uuid(self):
12
+ """Test that the same string consistently produces the same UUID."""
13
+ test_string = "hello world"
14
+
15
+ uuid1 = string_to_uuid(test_string)
16
+ uuid2 = string_to_uuid(test_string)
17
+ uuid3 = string_to_uuid(test_string)
18
+
19
+ assert uuid1 == uuid2 == uuid3
20
+ assert isinstance(uuid1, uuid.UUID)
21
+
22
+ def test_different_strings_produce_different_uuids(self):
23
+ """Test that different strings produce different UUIDs."""
24
+ uuid1 = string_to_uuid("hello")
25
+ uuid2 = string_to_uuid("world")
26
+ uuid3 = string_to_uuid("hello world")
27
+
28
+ assert uuid1 != uuid2
29
+ assert uuid1 != uuid3
30
+ assert uuid2 != uuid3
31
+
32
+ def test_case_sensitivity(self):
33
+ """Test that string case affects the generated UUID."""
34
+ uuid_lower = string_to_uuid("hello")
35
+ uuid_upper = string_to_uuid("HELLO")
36
+ uuid_mixed = string_to_uuid("Hello")
37
+
38
+ assert uuid_lower != uuid_upper
39
+ assert uuid_lower != uuid_mixed
40
+ assert uuid_upper != uuid_mixed
41
+
42
+ @pytest.mark.parametrize(
43
+ "test_string",
44
+ [
45
+ "",
46
+ "a",
47
+ "test string with spaces",
48
+ "string_with_underscores",
49
+ "string-with-dashes",
50
+ "string.with.dots",
51
+ "string/with/slashes",
52
+ "string@with#special$characters!",
53
+ "1234567890",
54
+ "string with 数字 and unicode 🚀",
55
+ "\n\t\r", # whitespace characters
56
+ "a" * 1000, # very long string
57
+ ],
58
+ )
59
+ def test_various_string_inputs(self, test_string):
60
+ """Test that various string inputs produce consistent UUIDs."""
61
+ uuid1 = string_to_uuid(test_string)
62
+ uuid2 = string_to_uuid(test_string)
63
+
64
+ assert uuid1 == uuid2
65
+ assert isinstance(uuid1, uuid.UUID)
66
+
67
+ def test_uuid_format_is_valid(self):
68
+ """Test that the generated UUID is a valid UUID5."""
69
+ test_string = "test"
70
+ result_uuid = string_to_uuid(test_string)
71
+
72
+ # UUID5 should have version 5
73
+ assert result_uuid.version == 5
74
+
75
+ # Should be a valid UUID string format
76
+ uuid_str = str(result_uuid)
77
+ assert len(uuid_str) == 36
78
+ assert uuid_str.count("-") == 4
79
+
80
+ # Should be able to recreate UUID from string
81
+ recreated_uuid = uuid.UUID(uuid_str)
82
+ assert recreated_uuid == result_uuid
83
+
84
+ def test_deterministic_across_runs(self):
85
+ """Test that the function is deterministic across multiple test runs."""
86
+ # These are known expected values for specific inputs using UUID5 with DNS namespace
87
+ expected_mappings = {
88
+ "hello": "aec070645fe53ee3b3763059376134f058cc337247c978add178b6ccdfb0019f",
89
+ "test": "098f6bcd4621d373cade4e832627b4f6",
90
+ "": "e3b0c44298fc1c149afbf4c8996fb924",
91
+ }
92
+
93
+ for test_string in expected_mappings.keys():
94
+ result_uuid = string_to_uuid(test_string)
95
+ # The actual UUID will be different, but it should be consistent
96
+ # We're mainly testing that it's deterministic, not the exact value
97
+ second_result = string_to_uuid(test_string)
98
+ assert result_uuid == second_result
99
+
100
+ def test_known_uuid5_behavior(self):
101
+ """Test that the function behaves as expected for UUID5 generation."""
102
+ test_string = "example.com"
103
+ result_uuid = string_to_uuid(test_string)
104
+
105
+ # Manually generate the same UUID using uuid.uuid5 to verify behavior
106
+ assert str(result_uuid) == "cea6b86d-3f0b-5b2f-b6f2-1174f00da196", (
107
+ f"Expected {test_string} to produce {result_uuid}. You may have changed the mapping from string to UUID5 - that will break backwards compatibility with code relying on the mapping being deterministic."
108
+ )
109
+
110
+ # Verify it's using the DNS namespace as expected
111
+ assert result_uuid.version == 5