firecrawl 4.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firecrawl/__init__.py +87 -0
- firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
- firecrawl/__tests__/e2e/v2/conftest.py +73 -0
- firecrawl/__tests__/e2e/v2/test_async.py +73 -0
- firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
- firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
- firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
- firecrawl/__tests__/e2e/v2/test_map.py +61 -0
- firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
- firecrawl/__tests__/e2e/v2/test_search.py +270 -0
- firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
- firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
- firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
- firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
- firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
- firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
- firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
- firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
- firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
- firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
- firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
- firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
- firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
- firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
- firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
- firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
- firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
- firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
- firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
- firecrawl/client.py +281 -0
- firecrawl/firecrawl.backup.py +4635 -0
- firecrawl/types.py +167 -0
- firecrawl/v1/__init__.py +14 -0
- firecrawl/v1/client.py +5164 -0
- firecrawl/v2/__init__.py +4 -0
- firecrawl/v2/client.py +967 -0
- firecrawl/v2/client_async.py +408 -0
- firecrawl/v2/methods/agent.py +144 -0
- firecrawl/v2/methods/aio/__init__.py +1 -0
- firecrawl/v2/methods/aio/agent.py +137 -0
- firecrawl/v2/methods/aio/batch.py +188 -0
- firecrawl/v2/methods/aio/crawl.py +351 -0
- firecrawl/v2/methods/aio/extract.py +133 -0
- firecrawl/v2/methods/aio/map.py +65 -0
- firecrawl/v2/methods/aio/scrape.py +33 -0
- firecrawl/v2/methods/aio/search.py +176 -0
- firecrawl/v2/methods/aio/usage.py +89 -0
- firecrawl/v2/methods/batch.py +499 -0
- firecrawl/v2/methods/crawl.py +592 -0
- firecrawl/v2/methods/extract.py +161 -0
- firecrawl/v2/methods/map.py +83 -0
- firecrawl/v2/methods/scrape.py +64 -0
- firecrawl/v2/methods/search.py +215 -0
- firecrawl/v2/methods/usage.py +84 -0
- firecrawl/v2/types.py +1143 -0
- firecrawl/v2/utils/__init__.py +9 -0
- firecrawl/v2/utils/error_handler.py +107 -0
- firecrawl/v2/utils/get_version.py +15 -0
- firecrawl/v2/utils/http_client.py +178 -0
- firecrawl/v2/utils/http_client_async.py +69 -0
- firecrawl/v2/utils/normalize.py +125 -0
- firecrawl/v2/utils/validation.py +692 -0
- firecrawl/v2/watcher.py +301 -0
- firecrawl/v2/watcher_async.py +243 -0
- firecrawl-4.12.0.dist-info/METADATA +234 -0
- firecrawl-4.12.0.dist-info/RECORD +92 -0
- firecrawl-4.12.0.dist-info/WHEEL +5 -0
- firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
- firecrawl-4.12.0.dist-info/top_level.txt +2 -0
- tests/test_agent_integration.py +277 -0
- tests/test_api_key_handling.py +44 -0
- tests/test_change_tracking.py +98 -0
- tests/test_timeout_conversion.py +117 -0
|
@@ -0,0 +1,1209 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unit tests for recursive schema handling in v1 client.
|
|
3
|
+
"""
|
|
4
|
+
import unittest
|
|
5
|
+
import os
|
|
6
|
+
from firecrawl.v1.client import V1FirecrawlApp
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestV1RecursiveRefDetection(unittest.TestCase):
|
|
10
|
+
"""Tests for _contains_recursive_ref method in V1FirecrawlApp."""
|
|
11
|
+
|
|
12
|
+
def setUp(self):
|
|
13
|
+
"""Set up test client."""
|
|
14
|
+
self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
|
|
15
|
+
|
|
16
|
+
def test_no_recursive_ref(self):
|
|
17
|
+
"""Test schema with no recursive references."""
|
|
18
|
+
schema = {
|
|
19
|
+
"type": "object",
|
|
20
|
+
"properties": {
|
|
21
|
+
"name": {"type": "string"}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
defs = {}
|
|
25
|
+
result = self.app._contains_recursive_ref(schema, "Person", defs)
|
|
26
|
+
self.assertFalse(result)
|
|
27
|
+
|
|
28
|
+
def test_simple_recursive_ref(self):
|
|
29
|
+
"""Test schema with simple recursive reference."""
|
|
30
|
+
defs = {
|
|
31
|
+
"Person": {
|
|
32
|
+
"type": "object",
|
|
33
|
+
"properties": {
|
|
34
|
+
"name": {"type": "string"},
|
|
35
|
+
"parent": {"$ref": "#/$defs/Person"}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
result = self.app._contains_recursive_ref(defs["Person"], "Person", defs)
|
|
40
|
+
self.assertTrue(result)
|
|
41
|
+
|
|
42
|
+
def test_indirect_recursive_ref(self):
|
|
43
|
+
"""Test schema with indirect recursive reference."""
|
|
44
|
+
defs = {
|
|
45
|
+
"Person": {
|
|
46
|
+
"type": "object",
|
|
47
|
+
"properties": {
|
|
48
|
+
"name": {"type": "string"},
|
|
49
|
+
"address": {"$ref": "#/$defs/Address"}
|
|
50
|
+
}
|
|
51
|
+
},
|
|
52
|
+
"Address": {
|
|
53
|
+
"type": "object",
|
|
54
|
+
"properties": {
|
|
55
|
+
"street": {"type": "string"},
|
|
56
|
+
"owner": {"$ref": "#/$defs/Person"}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
result = self.app._contains_recursive_ref(defs["Person"], "Person", defs)
|
|
61
|
+
self.assertTrue(result)
|
|
62
|
+
|
|
63
|
+
def test_recursive_ref_in_array(self):
|
|
64
|
+
"""Test schema with recursive reference in array items."""
|
|
65
|
+
defs = {
|
|
66
|
+
"TreeNode": {
|
|
67
|
+
"type": "object",
|
|
68
|
+
"properties": {
|
|
69
|
+
"value": {"type": "string"},
|
|
70
|
+
"children": {
|
|
71
|
+
"type": "array",
|
|
72
|
+
"items": {"$ref": "#/$defs/TreeNode"}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
result = self.app._contains_recursive_ref(defs["TreeNode"], "TreeNode", defs)
|
|
78
|
+
self.assertTrue(result)
|
|
79
|
+
|
|
80
|
+
def test_no_ref_in_empty_schema(self):
|
|
81
|
+
"""Test empty schema returns False."""
|
|
82
|
+
result = self.app._contains_recursive_ref({}, "Person", {})
|
|
83
|
+
self.assertFalse(result)
|
|
84
|
+
|
|
85
|
+
def test_no_ref_with_none_input(self):
|
|
86
|
+
"""Test None input returns False."""
|
|
87
|
+
result = self.app._contains_recursive_ref(None, "Person", {})
|
|
88
|
+
self.assertFalse(result)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class TestV1CircularDefsDetection(unittest.TestCase):
|
|
92
|
+
"""Tests for _check_for_circular_defs method in V1FirecrawlApp."""
|
|
93
|
+
|
|
94
|
+
def setUp(self):
|
|
95
|
+
"""Set up test client."""
|
|
96
|
+
self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
|
|
97
|
+
|
|
98
|
+
def test_no_circular_refs(self):
|
|
99
|
+
"""Test definitions with no circular references."""
|
|
100
|
+
defs = {
|
|
101
|
+
"Person": {
|
|
102
|
+
"type": "object",
|
|
103
|
+
"properties": {
|
|
104
|
+
"name": {"type": "string"}
|
|
105
|
+
}
|
|
106
|
+
},
|
|
107
|
+
"Address": {
|
|
108
|
+
"type": "object",
|
|
109
|
+
"properties": {
|
|
110
|
+
"street": {"type": "string"}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
result = self.app._check_for_circular_defs(defs)
|
|
115
|
+
self.assertFalse(result)
|
|
116
|
+
|
|
117
|
+
def test_self_referencing_def(self):
|
|
118
|
+
"""Test definition that references itself."""
|
|
119
|
+
defs = {
|
|
120
|
+
"TreeNode": {
|
|
121
|
+
"type": "object",
|
|
122
|
+
"properties": {
|
|
123
|
+
"value": {"type": "string"},
|
|
124
|
+
"left": {"$ref": "#/$defs/TreeNode"},
|
|
125
|
+
"right": {"$ref": "#/$defs/TreeNode"}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
result = self.app._check_for_circular_defs(defs)
|
|
130
|
+
self.assertTrue(result)
|
|
131
|
+
|
|
132
|
+
def test_mutually_recursive_defs(self):
|
|
133
|
+
"""Test mutually recursive definitions."""
|
|
134
|
+
defs = {
|
|
135
|
+
"Person": {
|
|
136
|
+
"type": "object",
|
|
137
|
+
"properties": {
|
|
138
|
+
"name": {"type": "string"},
|
|
139
|
+
"address": {"$ref": "#/$defs/Address"}
|
|
140
|
+
}
|
|
141
|
+
},
|
|
142
|
+
"Address": {
|
|
143
|
+
"type": "object",
|
|
144
|
+
"properties": {
|
|
145
|
+
"street": {"type": "string"},
|
|
146
|
+
"resident": {"$ref": "#/$defs/Person"}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
result = self.app._check_for_circular_defs(defs)
|
|
151
|
+
self.assertTrue(result)
|
|
152
|
+
|
|
153
|
+
def test_empty_defs(self):
|
|
154
|
+
"""Test empty definitions dict."""
|
|
155
|
+
result = self.app._check_for_circular_defs({})
|
|
156
|
+
self.assertFalse(result)
|
|
157
|
+
|
|
158
|
+
def test_none_defs(self):
|
|
159
|
+
"""Test None definitions."""
|
|
160
|
+
result = self.app._check_for_circular_defs(None)
|
|
161
|
+
self.assertFalse(result)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class TestV1ResolveRefs(unittest.TestCase):
|
|
165
|
+
"""Tests for _resolve_refs method in V1FirecrawlApp."""
|
|
166
|
+
|
|
167
|
+
def setUp(self):
|
|
168
|
+
"""Set up test client."""
|
|
169
|
+
self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
|
|
170
|
+
|
|
171
|
+
def test_resolve_simple_ref(self):
|
|
172
|
+
"""Test resolving a simple $ref."""
|
|
173
|
+
schema = {
|
|
174
|
+
"type": "object",
|
|
175
|
+
"properties": {
|
|
176
|
+
"person": {"$ref": "#/$defs/Person"}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
defs = {
|
|
180
|
+
"Person": {
|
|
181
|
+
"type": "object",
|
|
182
|
+
"properties": {
|
|
183
|
+
"name": {"type": "string"}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
result = self.app._resolve_refs(schema, defs)
|
|
188
|
+
self.assertEqual(result["properties"]["person"]["type"], "object")
|
|
189
|
+
self.assertIn("name", result["properties"]["person"]["properties"])
|
|
190
|
+
|
|
191
|
+
def test_resolve_nested_refs(self):
|
|
192
|
+
"""Test resolving nested $refs."""
|
|
193
|
+
schema = {
|
|
194
|
+
"type": "object",
|
|
195
|
+
"properties": {
|
|
196
|
+
"data": {"$ref": "#/$defs/Data"}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
defs = {
|
|
200
|
+
"Data": {
|
|
201
|
+
"type": "object",
|
|
202
|
+
"properties": {
|
|
203
|
+
"person": {"$ref": "#/$defs/Person"}
|
|
204
|
+
}
|
|
205
|
+
},
|
|
206
|
+
"Person": {
|
|
207
|
+
"type": "object",
|
|
208
|
+
"properties": {
|
|
209
|
+
"name": {"type": "string"}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
result = self.app._resolve_refs(schema, defs)
|
|
214
|
+
self.assertEqual(result["properties"]["data"]["type"], "object")
|
|
215
|
+
|
|
216
|
+
def test_resolve_refs_in_array(self):
|
|
217
|
+
"""Test resolving $refs in array items."""
|
|
218
|
+
schema = {
|
|
219
|
+
"type": "array",
|
|
220
|
+
"items": {"$ref": "#/$defs/Person"}
|
|
221
|
+
}
|
|
222
|
+
defs = {
|
|
223
|
+
"Person": {
|
|
224
|
+
"type": "object",
|
|
225
|
+
"properties": {
|
|
226
|
+
"name": {"type": "string"}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
result = self.app._resolve_refs(schema, defs)
|
|
231
|
+
# Result should be a dict with type: array and resolved items
|
|
232
|
+
self.assertIsInstance(result, dict)
|
|
233
|
+
self.assertEqual(result["type"], "array")
|
|
234
|
+
self.assertIn("items", result)
|
|
235
|
+
|
|
236
|
+
def test_resolve_refs_max_depth(self):
|
|
237
|
+
"""Test that resolution stops at max depth."""
|
|
238
|
+
schema = {"$ref": "#/$defs/A"}
|
|
239
|
+
defs = {
|
|
240
|
+
"A": {"$ref": "#/$defs/B"},
|
|
241
|
+
"B": {"$ref": "#/$defs/C"},
|
|
242
|
+
"C": {"$ref": "#/$defs/D"},
|
|
243
|
+
"D": {"type": "string"}
|
|
244
|
+
}
|
|
245
|
+
result = self.app._resolve_refs(schema, defs, depth=0)
|
|
246
|
+
# Should resolve until depth limit
|
|
247
|
+
self.assertIsNotNone(result)
|
|
248
|
+
|
|
249
|
+
def test_resolve_refs_with_circular_ref(self):
|
|
250
|
+
"""Test resolving refs with circular references."""
|
|
251
|
+
schema = {"$ref": "#/$defs/TreeNode"}
|
|
252
|
+
defs = {
|
|
253
|
+
"TreeNode": {
|
|
254
|
+
"type": "object",
|
|
255
|
+
"properties": {
|
|
256
|
+
"value": {"type": "string"},
|
|
257
|
+
"children": {
|
|
258
|
+
"type": "array",
|
|
259
|
+
"items": {"$ref": "#/$defs/TreeNode"}
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
# Should not throw error, but may preserve $ref
|
|
265
|
+
result = self.app._resolve_refs(schema, defs)
|
|
266
|
+
self.assertIsNotNone(result)
|
|
267
|
+
|
|
268
|
+
def test_resolve_refs_no_defs(self):
|
|
269
|
+
"""Test schema with no $defs."""
|
|
270
|
+
schema = {
|
|
271
|
+
"type": "object",
|
|
272
|
+
"properties": {
|
|
273
|
+
"name": {"type": "string"}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
result = self.app._resolve_refs(schema, {})
|
|
277
|
+
self.assertEqual(result, schema)
|
|
278
|
+
|
|
279
|
+
def test_resolve_refs_with_none(self):
|
|
280
|
+
"""Test resolving None."""
|
|
281
|
+
result = self.app._resolve_refs(None, {})
|
|
282
|
+
self.assertIsNone(result)
|
|
283
|
+
|
|
284
|
+
def test_resolve_refs_skips_defs_key(self):
|
|
285
|
+
"""Test that $defs key is skipped during resolution."""
|
|
286
|
+
schema = {
|
|
287
|
+
"type": "object",
|
|
288
|
+
"properties": {
|
|
289
|
+
"person": {"$ref": "#/$defs/Person"}
|
|
290
|
+
},
|
|
291
|
+
"$defs": {
|
|
292
|
+
"Person": {"type": "object"}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
defs = schema["$defs"]
|
|
296
|
+
result = self.app._resolve_refs(schema, defs)
|
|
297
|
+
# $defs should not be in resolved result
|
|
298
|
+
self.assertNotIn("$defs", result)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
class TestV1NormalizeSchemaForOpenAI(unittest.TestCase):
|
|
302
|
+
"""Tests for _normalize_schema_for_openai method in V1FirecrawlApp."""
|
|
303
|
+
|
|
304
|
+
def setUp(self):
|
|
305
|
+
"""Set up test client."""
|
|
306
|
+
self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
|
|
307
|
+
|
|
308
|
+
def test_normalize_removes_additional_properties(self):
|
|
309
|
+
"""Test that additionalProperties: true is removed when properties exist."""
|
|
310
|
+
schema = {
|
|
311
|
+
"type": "object",
|
|
312
|
+
"properties": {
|
|
313
|
+
"name": {"type": "string"}
|
|
314
|
+
},
|
|
315
|
+
"additionalProperties": True
|
|
316
|
+
}
|
|
317
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
318
|
+
self.assertNotIn("additionalProperties", result)
|
|
319
|
+
|
|
320
|
+
def test_normalize_preserves_additional_properties_false(self):
|
|
321
|
+
"""Test that additionalProperties: false is preserved."""
|
|
322
|
+
schema = {
|
|
323
|
+
"type": "object",
|
|
324
|
+
"properties": {
|
|
325
|
+
"name": {"type": "string"}
|
|
326
|
+
},
|
|
327
|
+
"additionalProperties": False
|
|
328
|
+
}
|
|
329
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
330
|
+
self.assertIn("additionalProperties", result)
|
|
331
|
+
self.assertFalse(result["additionalProperties"])
|
|
332
|
+
|
|
333
|
+
def test_normalize_removes_invalid_required(self):
|
|
334
|
+
"""Test that required fields not in properties are removed."""
|
|
335
|
+
schema = {
|
|
336
|
+
"type": "object",
|
|
337
|
+
"properties": {
|
|
338
|
+
"name": {"type": "string"}
|
|
339
|
+
},
|
|
340
|
+
"required": ["name", "age", "email"]
|
|
341
|
+
}
|
|
342
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
343
|
+
self.assertEqual(result["required"], ["name"])
|
|
344
|
+
|
|
345
|
+
def test_normalize_removes_empty_required(self):
|
|
346
|
+
"""Test that empty required array is removed."""
|
|
347
|
+
schema = {
|
|
348
|
+
"type": "object",
|
|
349
|
+
"properties": {
|
|
350
|
+
"name": {"type": "string"}
|
|
351
|
+
},
|
|
352
|
+
"required": ["age"]
|
|
353
|
+
}
|
|
354
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
355
|
+
self.assertNotIn("required", result)
|
|
356
|
+
|
|
357
|
+
def test_normalize_preserves_ref(self):
|
|
358
|
+
"""Test that $ref is preserved."""
|
|
359
|
+
schema = {
|
|
360
|
+
"$ref": "#/$defs/Person"
|
|
361
|
+
}
|
|
362
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
363
|
+
self.assertEqual(result, schema)
|
|
364
|
+
|
|
365
|
+
def test_normalize_handles_defs(self):
|
|
366
|
+
"""Test normalization with $defs."""
|
|
367
|
+
schema = {
|
|
368
|
+
"type": "object",
|
|
369
|
+
"properties": {
|
|
370
|
+
"person": {"$ref": "#/$defs/Person"}
|
|
371
|
+
},
|
|
372
|
+
"$defs": {
|
|
373
|
+
"Person": {
|
|
374
|
+
"type": "object",
|
|
375
|
+
"properties": {
|
|
376
|
+
"name": {"type": "string"}
|
|
377
|
+
},
|
|
378
|
+
"additionalProperties": True
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
383
|
+
self.assertIn("$defs", result)
|
|
384
|
+
self.assertNotIn("additionalProperties", result["$defs"]["Person"])
|
|
385
|
+
|
|
386
|
+
def test_normalize_nested_objects(self):
|
|
387
|
+
"""Test normalization of nested objects."""
|
|
388
|
+
schema = {
|
|
389
|
+
"type": "object",
|
|
390
|
+
"properties": {
|
|
391
|
+
"person": {
|
|
392
|
+
"type": "object",
|
|
393
|
+
"properties": {
|
|
394
|
+
"name": {"type": "string"}
|
|
395
|
+
},
|
|
396
|
+
"additionalProperties": True
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
401
|
+
self.assertNotIn("additionalProperties", result["properties"]["person"])
|
|
402
|
+
|
|
403
|
+
def test_normalize_arrays_with_objects(self):
|
|
404
|
+
"""Test normalization of arrays containing objects."""
|
|
405
|
+
schema = {
|
|
406
|
+
"anyOf": [
|
|
407
|
+
{
|
|
408
|
+
"type": "object",
|
|
409
|
+
"properties": {"name": {"type": "string"}},
|
|
410
|
+
"additionalProperties": True
|
|
411
|
+
},
|
|
412
|
+
{"type": "string"}
|
|
413
|
+
]
|
|
414
|
+
}
|
|
415
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
416
|
+
self.assertNotIn("additionalProperties", result["anyOf"][0])
|
|
417
|
+
|
|
418
|
+
def test_normalize_with_none(self):
|
|
419
|
+
"""Test normalization with None input."""
|
|
420
|
+
result = self.app._normalize_schema_for_openai(None)
|
|
421
|
+
self.assertIsNone(result)
|
|
422
|
+
|
|
423
|
+
def test_normalize_with_non_dict(self):
|
|
424
|
+
"""Test normalization with non-dict input."""
|
|
425
|
+
result = self.app._normalize_schema_for_openai("string")
|
|
426
|
+
self.assertEqual(result, "string")
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
class TestV1ValidateSchemaForOpenAI(unittest.TestCase):
|
|
430
|
+
"""Tests for _validate_schema_for_openai method in V1FirecrawlApp."""
|
|
431
|
+
|
|
432
|
+
def setUp(self):
|
|
433
|
+
"""Set up test client."""
|
|
434
|
+
self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
|
|
435
|
+
|
|
436
|
+
def test_valid_schema_with_properties(self):
|
|
437
|
+
"""Test valid schema with properties defined."""
|
|
438
|
+
schema = {
|
|
439
|
+
"type": "object",
|
|
440
|
+
"properties": {
|
|
441
|
+
"name": {"type": "string"}
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
result = self.app._validate_schema_for_openai(schema)
|
|
445
|
+
self.assertTrue(result)
|
|
446
|
+
|
|
447
|
+
def test_valid_schema_with_ref(self):
|
|
448
|
+
"""Test valid schema with $ref."""
|
|
449
|
+
schema = {
|
|
450
|
+
"type": "object",
|
|
451
|
+
"properties": {
|
|
452
|
+
"person": {"$ref": "#/$defs/Person"}
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
result = self.app._validate_schema_for_openai(schema)
|
|
456
|
+
self.assertTrue(result)
|
|
457
|
+
|
|
458
|
+
def test_invalid_schema_no_properties_with_additional_properties(self):
|
|
459
|
+
"""Test invalid schema: object with no properties but additionalProperties: true."""
|
|
460
|
+
schema = {
|
|
461
|
+
"type": "object",
|
|
462
|
+
"additionalProperties": True
|
|
463
|
+
}
|
|
464
|
+
result = self.app._validate_schema_for_openai(schema)
|
|
465
|
+
self.assertFalse(result)
|
|
466
|
+
|
|
467
|
+
def test_valid_schema_with_pattern_properties(self):
|
|
468
|
+
"""Test valid schema with patternProperties."""
|
|
469
|
+
schema = {
|
|
470
|
+
"type": "object",
|
|
471
|
+
"patternProperties": {
|
|
472
|
+
"^[a-z]+$": {"type": "string"}
|
|
473
|
+
},
|
|
474
|
+
"additionalProperties": True
|
|
475
|
+
}
|
|
476
|
+
result = self.app._validate_schema_for_openai(schema)
|
|
477
|
+
self.assertTrue(result)
|
|
478
|
+
|
|
479
|
+
def test_invalid_nested_schema(self):
|
|
480
|
+
"""Test invalid nested schema."""
|
|
481
|
+
schema = {
|
|
482
|
+
"type": "object",
|
|
483
|
+
"properties": {
|
|
484
|
+
"data": {
|
|
485
|
+
"type": "object",
|
|
486
|
+
"additionalProperties": True
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
result = self.app._validate_schema_for_openai(schema)
|
|
491
|
+
self.assertFalse(result)
|
|
492
|
+
|
|
493
|
+
def test_invalid_schema_in_array(self):
|
|
494
|
+
"""Test invalid schema within array (anyOf/oneOf)."""
|
|
495
|
+
schema = {
|
|
496
|
+
"anyOf": [
|
|
497
|
+
{
|
|
498
|
+
"type": "object",
|
|
499
|
+
"additionalProperties": True
|
|
500
|
+
},
|
|
501
|
+
{"type": "string"}
|
|
502
|
+
]
|
|
503
|
+
}
|
|
504
|
+
result = self.app._validate_schema_for_openai(schema)
|
|
505
|
+
self.assertFalse(result)
|
|
506
|
+
|
|
507
|
+
def test_valid_with_none(self):
|
|
508
|
+
"""Test validation with None input."""
|
|
509
|
+
result = self.app._validate_schema_for_openai(None)
|
|
510
|
+
self.assertTrue(result)
|
|
511
|
+
|
|
512
|
+
def test_valid_with_non_dict(self):
|
|
513
|
+
"""Test validation with non-dict input."""
|
|
514
|
+
result = self.app._validate_schema_for_openai("string")
|
|
515
|
+
self.assertTrue(result)
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
class TestV1DetectRecursiveSchema(unittest.TestCase):
|
|
519
|
+
"""Tests for _detect_recursive_schema method in V1FirecrawlApp."""
|
|
520
|
+
|
|
521
|
+
def setUp(self):
|
|
522
|
+
"""Set up test client."""
|
|
523
|
+
self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
|
|
524
|
+
|
|
525
|
+
def test_detect_ref(self):
|
|
526
|
+
"""Test detection of $ref."""
|
|
527
|
+
schema = {
|
|
528
|
+
"type": "object",
|
|
529
|
+
"properties": {
|
|
530
|
+
"person": {"$ref": "#/$defs/Person"}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
result = self.app._detect_recursive_schema(schema)
|
|
534
|
+
self.assertTrue(result)
|
|
535
|
+
|
|
536
|
+
def test_detect_defs(self):
|
|
537
|
+
"""Test detection of $defs."""
|
|
538
|
+
schema = {
|
|
539
|
+
"type": "object",
|
|
540
|
+
"properties": {
|
|
541
|
+
"name": {"type": "string"}
|
|
542
|
+
},
|
|
543
|
+
"$defs": {
|
|
544
|
+
"Person": {
|
|
545
|
+
"type": "object",
|
|
546
|
+
"properties": {"name": {"type": "string"}}
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
result = self.app._detect_recursive_schema(schema)
|
|
551
|
+
self.assertTrue(result)
|
|
552
|
+
|
|
553
|
+
def test_detect_definitions(self):
|
|
554
|
+
"""Test detection of definitions (alternative to $defs)."""
|
|
555
|
+
schema = {
|
|
556
|
+
"type": "object",
|
|
557
|
+
"definitions": {
|
|
558
|
+
"Person": {
|
|
559
|
+
"type": "object",
|
|
560
|
+
"properties": {"name": {"type": "string"}}
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
result = self.app._detect_recursive_schema(schema)
|
|
565
|
+
self.assertTrue(result)
|
|
566
|
+
|
|
567
|
+
def test_no_recursion(self):
|
|
568
|
+
"""Test schema with no recursive patterns."""
|
|
569
|
+
schema = {
|
|
570
|
+
"type": "object",
|
|
571
|
+
"properties": {
|
|
572
|
+
"name": {"type": "string"}
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
result = self.app._detect_recursive_schema(schema)
|
|
576
|
+
self.assertFalse(result)
|
|
577
|
+
|
|
578
|
+
def test_with_none(self):
|
|
579
|
+
"""Test with None input."""
|
|
580
|
+
result = self.app._detect_recursive_schema(None)
|
|
581
|
+
self.assertFalse(result)
|
|
582
|
+
|
|
583
|
+
def test_with_non_dict(self):
|
|
584
|
+
"""Test with non-dict input."""
|
|
585
|
+
result = self.app._detect_recursive_schema("string")
|
|
586
|
+
self.assertFalse(result)
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
class TestV1SelectModelForSchema(unittest.TestCase):
|
|
590
|
+
"""Tests for _select_model_for_schema method in V1FirecrawlApp."""
|
|
591
|
+
|
|
592
|
+
def setUp(self):
|
|
593
|
+
"""Set up test client."""
|
|
594
|
+
self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
|
|
595
|
+
|
|
596
|
+
def test_no_schema(self):
|
|
597
|
+
"""Test model selection with no schema."""
|
|
598
|
+
result = self.app._select_model_for_schema(None)
|
|
599
|
+
self.assertEqual(result["modelName"], "gpt-4o-mini")
|
|
600
|
+
self.assertEqual(result["reason"], "no_schema")
|
|
601
|
+
|
|
602
|
+
def test_simple_schema(self):
|
|
603
|
+
"""Test model selection with simple schema."""
|
|
604
|
+
schema = {
|
|
605
|
+
"type": "object",
|
|
606
|
+
"properties": {
|
|
607
|
+
"name": {"type": "string"}
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
result = self.app._select_model_for_schema(schema)
|
|
611
|
+
self.assertEqual(result["modelName"], "gpt-4o-mini")
|
|
612
|
+
self.assertEqual(result["reason"], "simple_schema")
|
|
613
|
+
|
|
614
|
+
def test_recursive_schema(self):
|
|
615
|
+
"""Test model selection with recursive schema."""
|
|
616
|
+
schema = {
|
|
617
|
+
"type": "object",
|
|
618
|
+
"properties": {
|
|
619
|
+
"person": {"$ref": "#/$defs/Person"}
|
|
620
|
+
},
|
|
621
|
+
"$defs": {
|
|
622
|
+
"Person": {
|
|
623
|
+
"type": "object",
|
|
624
|
+
"properties": {"name": {"type": "string"}}
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
result = self.app._select_model_for_schema(schema)
|
|
629
|
+
self.assertEqual(result["modelName"], "gpt-4o")
|
|
630
|
+
self.assertEqual(result["reason"], "recursive_schema_detected")
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
class TestV1ProcessSchemaWithValidation(unittest.TestCase):
|
|
634
|
+
"""Tests for _process_schema_with_validation method in V1FirecrawlApp."""
|
|
635
|
+
|
|
636
|
+
def setUp(self):
|
|
637
|
+
"""Set up test client."""
|
|
638
|
+
self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
|
|
639
|
+
|
|
640
|
+
def test_process_valid_schema(self):
|
|
641
|
+
"""Test processing a valid schema."""
|
|
642
|
+
schema_container = {
|
|
643
|
+
"schema": {
|
|
644
|
+
"type": "object",
|
|
645
|
+
"properties": {
|
|
646
|
+
"name": {"type": "string"}
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
result = self.app._process_schema_with_validation(schema_container)
|
|
651
|
+
self.assertIn("schema", result)
|
|
652
|
+
|
|
653
|
+
def test_process_invalid_schema(self):
|
|
654
|
+
"""Test processing an invalid schema raises ValueError."""
|
|
655
|
+
schema_container = {
|
|
656
|
+
"schema": {
|
|
657
|
+
"type": "object",
|
|
658
|
+
"additionalProperties": True
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
with self.assertRaises(ValueError) as context:
|
|
662
|
+
self.app._process_schema_with_validation(schema_container)
|
|
663
|
+
self.assertIn("invalid structure for OpenAI", str(context.exception))
|
|
664
|
+
|
|
665
|
+
def test_process_recursive_schema(self):
|
|
666
|
+
"""Test processing a recursive schema."""
|
|
667
|
+
schema_container = {
|
|
668
|
+
"schema": {
|
|
669
|
+
"type": "object",
|
|
670
|
+
"properties": {
|
|
671
|
+
"children": {
|
|
672
|
+
"type": "array",
|
|
673
|
+
"items": {"$ref": "#/$defs/TreeNode"}
|
|
674
|
+
}
|
|
675
|
+
},
|
|
676
|
+
"$defs": {
|
|
677
|
+
"TreeNode": {
|
|
678
|
+
"type": "object",
|
|
679
|
+
"properties": {
|
|
680
|
+
"value": {"type": "string"},
|
|
681
|
+
"children": {
|
|
682
|
+
"type": "array",
|
|
683
|
+
"items": {"$ref": "#/$defs/TreeNode"}
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
result = self.app._process_schema_with_validation(schema_container)
|
|
691
|
+
self.assertIn("schema", result)
|
|
692
|
+
|
|
693
|
+
def test_process_resolves_non_recursive_refs(self):
|
|
694
|
+
"""Test that non-recursive refs are resolved."""
|
|
695
|
+
schema_container = {
|
|
696
|
+
"schema": {
|
|
697
|
+
"type": "object",
|
|
698
|
+
"properties": {
|
|
699
|
+
"person": {"$ref": "#/$defs/Person"}
|
|
700
|
+
},
|
|
701
|
+
"$defs": {
|
|
702
|
+
"Person": {
|
|
703
|
+
"type": "object",
|
|
704
|
+
"properties": {
|
|
705
|
+
"name": {"type": "string"}
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
result = self.app._process_schema_with_validation(schema_container)
|
|
712
|
+
self.assertIn("schema", result)
|
|
713
|
+
|
|
714
|
+
def test_process_no_schema_key(self):
|
|
715
|
+
"""Test processing container without schema key."""
|
|
716
|
+
schema_container = {
|
|
717
|
+
"other_key": "value"
|
|
718
|
+
}
|
|
719
|
+
result = self.app._process_schema_with_validation(schema_container)
|
|
720
|
+
self.assertEqual(result, schema_container)
|
|
721
|
+
|
|
722
|
+
def test_process_non_dict_container(self):
|
|
723
|
+
"""Test processing non-dict container."""
|
|
724
|
+
schema_container = "not a dict"
|
|
725
|
+
result = self.app._process_schema_with_validation(schema_container)
|
|
726
|
+
self.assertEqual(result, schema_container)
|
|
727
|
+
|
|
728
|
+
def test_process_with_custom_schema_key(self):
|
|
729
|
+
"""Test processing with custom schema key."""
|
|
730
|
+
schema_container = {
|
|
731
|
+
"custom_schema": {
|
|
732
|
+
"type": "object",
|
|
733
|
+
"properties": {
|
|
734
|
+
"name": {"type": "string"}
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
result = self.app._process_schema_with_validation(
|
|
739
|
+
schema_container,
|
|
740
|
+
schema_key="custom_schema"
|
|
741
|
+
)
|
|
742
|
+
self.assertIn("custom_schema", result)
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
class TestV1EdgeCases(unittest.TestCase):
|
|
746
|
+
"""Tests for edge cases and error handling in V1FirecrawlApp."""
|
|
747
|
+
|
|
748
|
+
def setUp(self):
|
|
749
|
+
"""Set up test client."""
|
|
750
|
+
self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
|
|
751
|
+
|
|
752
|
+
def test_deeply_nested_schema(self):
|
|
753
|
+
"""Test handling of deeply nested schemas."""
|
|
754
|
+
schema = {
|
|
755
|
+
"type": "object",
|
|
756
|
+
"properties": {
|
|
757
|
+
"level1": {
|
|
758
|
+
"type": "object",
|
|
759
|
+
"properties": {
|
|
760
|
+
"level2": {
|
|
761
|
+
"type": "object",
|
|
762
|
+
"properties": {
|
|
763
|
+
"level3": {
|
|
764
|
+
"type": "object",
|
|
765
|
+
"properties": {
|
|
766
|
+
"name": {"type": "string"}
|
|
767
|
+
},
|
|
768
|
+
"additionalProperties": True
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
777
|
+
# Should normalize deeply nested schema
|
|
778
|
+
self.assertIsNotNone(result)
|
|
779
|
+
|
|
780
|
+
def test_circular_reference_doesnt_hang(self):
|
|
781
|
+
"""Test that circular references don't cause infinite loops."""
|
|
782
|
+
schema = {
|
|
783
|
+
"$ref": "#/$defs/TreeNode",
|
|
784
|
+
"$defs": {
|
|
785
|
+
"TreeNode": {
|
|
786
|
+
"type": "object",
|
|
787
|
+
"properties": {
|
|
788
|
+
"left": {"$ref": "#/$defs/TreeNode"},
|
|
789
|
+
"right": {"$ref": "#/$defs/TreeNode"}
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
# Should complete without hanging
|
|
795
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
796
|
+
self.assertIsNotNone(result)
|
|
797
|
+
|
|
798
|
+
result2 = self.app._validate_schema_for_openai(schema)
|
|
799
|
+
self.assertIsNotNone(result2)
|
|
800
|
+
|
|
801
|
+
def test_empty_schema(self):
|
|
802
|
+
"""Test handling of empty schema."""
|
|
803
|
+
schema = {}
|
|
804
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
805
|
+
self.assertEqual(result, {})
|
|
806
|
+
|
|
807
|
+
is_valid = self.app._validate_schema_for_openai(schema)
|
|
808
|
+
self.assertTrue(is_valid)
|
|
809
|
+
|
|
810
|
+
def test_schema_with_complex_anyOf(self):
|
|
811
|
+
"""Test schema with complex anyOf structures."""
|
|
812
|
+
schema = {
|
|
813
|
+
"anyOf": [
|
|
814
|
+
{
|
|
815
|
+
"type": "object",
|
|
816
|
+
"properties": {"name": {"type": "string"}},
|
|
817
|
+
"additionalProperties": True
|
|
818
|
+
},
|
|
819
|
+
{
|
|
820
|
+
"type": "object",
|
|
821
|
+
"properties": {"id": {"type": "number"}}
|
|
822
|
+
}
|
|
823
|
+
]
|
|
824
|
+
}
|
|
825
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
826
|
+
self.assertNotIn("additionalProperties", result["anyOf"][0])
|
|
827
|
+
|
|
828
|
+
def test_openai_schema_error_message_constant(self):
|
|
829
|
+
"""Test that error message constant exists."""
|
|
830
|
+
self.assertIsNotNone(V1FirecrawlApp.OPENAI_SCHEMA_ERROR_MESSAGE)
|
|
831
|
+
self.assertIn("invalid structure for OpenAI", V1FirecrawlApp.OPENAI_SCHEMA_ERROR_MESSAGE)
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
class TestV1RecursionStressTests(unittest.TestCase):
|
|
835
|
+
"""Stress tests to ensure recursion handling doesn't break under extreme conditions."""
|
|
836
|
+
|
|
837
|
+
def setUp(self):
|
|
838
|
+
"""Set up test client."""
|
|
839
|
+
self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
|
|
840
|
+
|
|
841
|
+
def test_very_deep_reference_chain(self):
|
|
842
|
+
"""Test handling of very deep reference chains (testing depth limits)."""
|
|
843
|
+
# Create a chain of 20 references
|
|
844
|
+
defs = {}
|
|
845
|
+
for i in range(20):
|
|
846
|
+
if i == 19:
|
|
847
|
+
defs[f"Level{i}"] = {"type": "string"}
|
|
848
|
+
else:
|
|
849
|
+
defs[f"Level{i}"] = {"$ref": f"#/$defs/Level{i+1}"}
|
|
850
|
+
|
|
851
|
+
schema = {
|
|
852
|
+
"$ref": "#/$defs/Level0",
|
|
853
|
+
"$defs": defs
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
# Should handle without crashing (may not fully resolve due to depth limits)
|
|
857
|
+
result = self.app._resolve_refs(schema, defs)
|
|
858
|
+
self.assertIsNotNone(result)
|
|
859
|
+
|
|
860
|
+
# Normalization should also complete
|
|
861
|
+
normalized = self.app._normalize_schema_for_openai(schema)
|
|
862
|
+
self.assertIsNotNone(normalized)
|
|
863
|
+
|
|
864
|
+
def test_multiple_circular_paths(self):
|
|
865
|
+
"""Test schema with multiple different circular reference paths."""
|
|
866
|
+
defs = {
|
|
867
|
+
"Node": {
|
|
868
|
+
"type": "object",
|
|
869
|
+
"properties": {
|
|
870
|
+
"parent": {"$ref": "#/$defs/Node"},
|
|
871
|
+
"child": {"$ref": "#/$defs/Node"},
|
|
872
|
+
"sibling": {"$ref": "#/$defs/Node"},
|
|
873
|
+
"related": {"$ref": "#/$defs/RelatedNode"}
|
|
874
|
+
}
|
|
875
|
+
},
|
|
876
|
+
"RelatedNode": {
|
|
877
|
+
"type": "object",
|
|
878
|
+
"properties": {
|
|
879
|
+
"backref": {"$ref": "#/$defs/Node"},
|
|
880
|
+
"self": {"$ref": "#/$defs/RelatedNode"}
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
# Should detect circular references
|
|
886
|
+
has_circular = self.app._check_for_circular_defs(defs)
|
|
887
|
+
self.assertTrue(has_circular)
|
|
888
|
+
|
|
889
|
+
# Should handle normalization without hanging
|
|
890
|
+
schema = {"$ref": "#/$defs/Node", "$defs": defs}
|
|
891
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
892
|
+
self.assertIsNotNone(result)
|
|
893
|
+
|
|
894
|
+
def test_recursive_in_oneOf_allOf(self):
|
|
895
|
+
"""Test recursive references within oneOf and allOf contexts."""
|
|
896
|
+
schema = {
|
|
897
|
+
"oneOf": [
|
|
898
|
+
{"$ref": "#/$defs/TypeA"},
|
|
899
|
+
{"$ref": "#/$defs/TypeB"}
|
|
900
|
+
],
|
|
901
|
+
"$defs": {
|
|
902
|
+
"TypeA": {
|
|
903
|
+
"type": "object",
|
|
904
|
+
"properties": {
|
|
905
|
+
"nested": {"$ref": "#/$defs/TypeA"}
|
|
906
|
+
}
|
|
907
|
+
},
|
|
908
|
+
"TypeB": {
|
|
909
|
+
"type": "object",
|
|
910
|
+
"allOf": [
|
|
911
|
+
{"$ref": "#/$defs/TypeA"},
|
|
912
|
+
{"properties": {"extra": {"type": "string"}}}
|
|
913
|
+
]
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
# Should handle complex recursive patterns
|
|
919
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
920
|
+
self.assertIsNotNone(result)
|
|
921
|
+
self.assertIn("oneOf", result)
|
|
922
|
+
|
|
923
|
+
# Should validate without errors
|
|
924
|
+
is_valid = self.app._validate_schema_for_openai(schema)
|
|
925
|
+
self.assertTrue(is_valid)
|
|
926
|
+
|
|
927
|
+
def test_invalid_reference_doesnt_crash(self):
|
|
928
|
+
"""Test that invalid/broken references don't crash the system."""
|
|
929
|
+
schema = {
|
|
930
|
+
"type": "object",
|
|
931
|
+
"properties": {
|
|
932
|
+
"broken": {"$ref": "#/$defs/NonExistent"}
|
|
933
|
+
},
|
|
934
|
+
"$defs": {
|
|
935
|
+
"Existing": {"type": "string"}
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
# Should handle gracefully without crashing
|
|
940
|
+
result = self.app._resolve_refs(schema, schema.get("$defs", {}))
|
|
941
|
+
self.assertIsNotNone(result)
|
|
942
|
+
|
|
943
|
+
normalized = self.app._normalize_schema_for_openai(schema)
|
|
944
|
+
self.assertIsNotNone(normalized)
|
|
945
|
+
|
|
946
|
+
def test_malformed_reference_format(self):
|
|
947
|
+
"""Test handling of malformed $ref formats."""
|
|
948
|
+
schema = {
|
|
949
|
+
"type": "object",
|
|
950
|
+
"properties": {
|
|
951
|
+
"bad1": {"$ref": "not-a-valid-ref"},
|
|
952
|
+
"bad2": {"$ref": "#/wrong/path"},
|
|
953
|
+
"bad3": {"$ref": 12345} # Not even a string
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
# Should handle without crashing
|
|
958
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
959
|
+
self.assertIsNotNone(result)
|
|
960
|
+
|
|
961
|
+
def test_linked_list_pattern(self):
|
|
962
|
+
"""Test real-world pattern: linked list with recursive next pointer."""
|
|
963
|
+
schema = {
|
|
964
|
+
"type": "object",
|
|
965
|
+
"properties": {
|
|
966
|
+
"value": {"type": "string"},
|
|
967
|
+
"next": {
|
|
968
|
+
"oneOf": [
|
|
969
|
+
{"$ref": "#/$defs/Node"},
|
|
970
|
+
{"type": "null"}
|
|
971
|
+
]
|
|
972
|
+
}
|
|
973
|
+
},
|
|
974
|
+
"$defs": {
|
|
975
|
+
"Node": {
|
|
976
|
+
"type": "object",
|
|
977
|
+
"properties": {
|
|
978
|
+
"value": {"type": "string"},
|
|
979
|
+
"next": {
|
|
980
|
+
"oneOf": [
|
|
981
|
+
{"$ref": "#/$defs/Node"},
|
|
982
|
+
{"type": "null"}
|
|
983
|
+
]
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
# Should detect recursion
|
|
991
|
+
is_recursive = self.app._detect_recursive_schema(schema)
|
|
992
|
+
self.assertTrue(is_recursive)
|
|
993
|
+
|
|
994
|
+
# Should select appropriate model
|
|
995
|
+
model_info = self.app._select_model_for_schema(schema)
|
|
996
|
+
self.assertEqual(model_info["modelName"], "gpt-4o")
|
|
997
|
+
|
|
998
|
+
# Should handle normalization
|
|
999
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
1000
|
+
self.assertIsNotNone(result)
|
|
1001
|
+
|
|
1002
|
+
def test_graph_pattern_with_multiple_node_types(self):
|
|
1003
|
+
"""Test complex graph pattern with multiple interconnected node types."""
|
|
1004
|
+
schema = {
|
|
1005
|
+
"type": "object",
|
|
1006
|
+
"properties": {
|
|
1007
|
+
"nodes": {
|
|
1008
|
+
"type": "array",
|
|
1009
|
+
"items": {"$ref": "#/$defs/GraphNode"}
|
|
1010
|
+
},
|
|
1011
|
+
"edges": {
|
|
1012
|
+
"type": "array",
|
|
1013
|
+
"items": {"$ref": "#/$defs/Edge"}
|
|
1014
|
+
}
|
|
1015
|
+
},
|
|
1016
|
+
"$defs": {
|
|
1017
|
+
"GraphNode": {
|
|
1018
|
+
"type": "object",
|
|
1019
|
+
"properties": {
|
|
1020
|
+
"id": {"type": "string"},
|
|
1021
|
+
"neighbors": {
|
|
1022
|
+
"type": "array",
|
|
1023
|
+
"items": {"$ref": "#/$defs/GraphNode"}
|
|
1024
|
+
},
|
|
1025
|
+
"edges": {
|
|
1026
|
+
"type": "array",
|
|
1027
|
+
"items": {"$ref": "#/$defs/Edge"}
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
},
|
|
1031
|
+
"Edge": {
|
|
1032
|
+
"type": "object",
|
|
1033
|
+
"properties": {
|
|
1034
|
+
"from": {"$ref": "#/$defs/GraphNode"},
|
|
1035
|
+
"to": {"$ref": "#/$defs/GraphNode"}
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
# Should detect circular references
|
|
1042
|
+
has_circular = self.app._check_for_circular_defs(schema.get("$defs", {}))
|
|
1043
|
+
self.assertTrue(has_circular)
|
|
1044
|
+
|
|
1045
|
+
# Should handle without hanging
|
|
1046
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
1047
|
+
self.assertIsNotNone(result)
|
|
1048
|
+
|
|
1049
|
+
# Should validate
|
|
1050
|
+
is_valid = self.app._validate_schema_for_openai(schema)
|
|
1051
|
+
self.assertTrue(is_valid)
|
|
1052
|
+
|
|
1053
|
+
def test_mutual_recursion_three_way(self):
|
|
1054
|
+
"""Test three-way mutual recursion (A->B->C->A)."""
|
|
1055
|
+
defs = {
|
|
1056
|
+
"TypeA": {
|
|
1057
|
+
"type": "object",
|
|
1058
|
+
"properties": {
|
|
1059
|
+
"toB": {"$ref": "#/$defs/TypeB"}
|
|
1060
|
+
}
|
|
1061
|
+
},
|
|
1062
|
+
"TypeB": {
|
|
1063
|
+
"type": "object",
|
|
1064
|
+
"properties": {
|
|
1065
|
+
"toC": {"$ref": "#/$defs/TypeC"}
|
|
1066
|
+
}
|
|
1067
|
+
},
|
|
1068
|
+
"TypeC": {
|
|
1069
|
+
"type": "object",
|
|
1070
|
+
"properties": {
|
|
1071
|
+
"toA": {"$ref": "#/$defs/TypeA"}
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
# Should detect circular references
|
|
1077
|
+
has_circular = self.app._check_for_circular_defs(defs)
|
|
1078
|
+
self.assertTrue(has_circular)
|
|
1079
|
+
|
|
1080
|
+
# Should handle without hanging
|
|
1081
|
+
schema = {"$ref": "#/$defs/TypeA", "$defs": defs}
|
|
1082
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
1083
|
+
self.assertIsNotNone(result)
|
|
1084
|
+
|
|
1085
|
+
def test_required_fields_cleanup_without_defs(self):
|
|
1086
|
+
"""Test that required field cleanup works for schemas without $defs."""
|
|
1087
|
+
schema = {
|
|
1088
|
+
"type": "object",
|
|
1089
|
+
"properties": {
|
|
1090
|
+
"name": {"type": "string"},
|
|
1091
|
+
"age": {"type": "number"}
|
|
1092
|
+
},
|
|
1093
|
+
"required": ["name", "age", "nonexistent_field"],
|
|
1094
|
+
"additionalProperties": True
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
1098
|
+
|
|
1099
|
+
# Should clean up required when properties are fully defined
|
|
1100
|
+
self.assertIn("required", result)
|
|
1101
|
+
self.assertEqual(result["required"], ["name", "age"])
|
|
1102
|
+
# additionalProperties: true should be removed
|
|
1103
|
+
self.assertNotIn("additionalProperties", result)
|
|
1104
|
+
|
|
1105
|
+
def test_required_fields_in_nested_defs(self):
|
|
1106
|
+
"""Test that required field cleanup works in nested $defs definitions."""
|
|
1107
|
+
schema = {
|
|
1108
|
+
"type": "object",
|
|
1109
|
+
"properties": {
|
|
1110
|
+
"data": {"$ref": "#/$defs/Node"}
|
|
1111
|
+
},
|
|
1112
|
+
"$defs": {
|
|
1113
|
+
"Node": {
|
|
1114
|
+
"type": "object",
|
|
1115
|
+
"properties": {
|
|
1116
|
+
"value": {"type": "string"},
|
|
1117
|
+
"nested": {"$ref": "#/$defs/Node"}
|
|
1118
|
+
},
|
|
1119
|
+
"required": ["value", "another_nonexistent"],
|
|
1120
|
+
"additionalProperties": True
|
|
1121
|
+
}
|
|
1122
|
+
}
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
1126
|
+
|
|
1127
|
+
# $defs definitions should be cleaned
|
|
1128
|
+
self.assertIn("$defs", result)
|
|
1129
|
+
self.assertIn("Node", result["$defs"])
|
|
1130
|
+
node_def = result["$defs"]["Node"]
|
|
1131
|
+
|
|
1132
|
+
# Required should be cleaned in the definition
|
|
1133
|
+
self.assertIn("required", node_def)
|
|
1134
|
+
self.assertEqual(node_def["required"], ["value"])
|
|
1135
|
+
|
|
1136
|
+
# additionalProperties: true should be removed
|
|
1137
|
+
self.assertNotIn("additionalProperties", node_def)
|
|
1138
|
+
|
|
1139
|
+
def test_required_fields_cleanup_limitation_with_defs(self):
|
|
1140
|
+
"""
|
|
1141
|
+
Test documenting current limitation: required field cleanup doesn't happen
|
|
1142
|
+
at root level when $defs is present due to early return in normalization.
|
|
1143
|
+
|
|
1144
|
+
This test documents the current behavior - if this is considered a bug,
|
|
1145
|
+
the implementation should be fixed to cleanup required fields at root level
|
|
1146
|
+
even when $defs is present.
|
|
1147
|
+
"""
|
|
1148
|
+
schema = {
|
|
1149
|
+
"type": "object",
|
|
1150
|
+
"properties": {
|
|
1151
|
+
"name": {"type": "string"},
|
|
1152
|
+
"child": {"$ref": "#/$defs/Node"}
|
|
1153
|
+
},
|
|
1154
|
+
"required": ["name", "child", "nonexistent_field"], # Has invalid field
|
|
1155
|
+
"$defs": {
|
|
1156
|
+
"Node": {
|
|
1157
|
+
"type": "object",
|
|
1158
|
+
"properties": {
|
|
1159
|
+
"value": {"type": "string"}
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
result = self.app._normalize_schema_for_openai(schema)
|
|
1166
|
+
|
|
1167
|
+
# Current behavior: required is NOT cleaned at root when $defs present
|
|
1168
|
+
# This is because the code returns early when processing $defs
|
|
1169
|
+
self.assertIn("required", result)
|
|
1170
|
+
# Documents current behavior - includes invalid field
|
|
1171
|
+
self.assertIn("nonexistent_field", result["required"])
|
|
1172
|
+
|
|
1173
|
+
# Note: If this behavior should change, update both the code and this test
|
|
1174
|
+
|
|
1175
|
+
def test_same_object_referenced_multiple_times(self):
|
|
1176
|
+
"""Test that the same object referenced multiple times is handled correctly."""
|
|
1177
|
+
person_def = {
|
|
1178
|
+
"type": "object",
|
|
1179
|
+
"properties": {
|
|
1180
|
+
"name": {"type": "string"}
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
schema = {
|
|
1185
|
+
"type": "object",
|
|
1186
|
+
"properties": {
|
|
1187
|
+
"person1": {"$ref": "#/$defs/Person"},
|
|
1188
|
+
"person2": {"$ref": "#/$defs/Person"},
|
|
1189
|
+
"person3": {"$ref": "#/$defs/Person"}
|
|
1190
|
+
},
|
|
1191
|
+
"$defs": {
|
|
1192
|
+
"Person": person_def
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
|
|
1196
|
+
# Should handle without issues
|
|
1197
|
+
result = self.app._resolve_refs(schema, schema.get("$defs", {}))
|
|
1198
|
+
self.assertIsNotNone(result)
|
|
1199
|
+
|
|
1200
|
+
# All three should be resolved
|
|
1201
|
+
if "properties" in result:
|
|
1202
|
+
self.assertIn("person1", result["properties"])
|
|
1203
|
+
self.assertIn("person2", result["properties"])
|
|
1204
|
+
self.assertIn("person3", result["properties"])
|
|
1205
|
+
|
|
1206
|
+
|
|
1207
|
+
if __name__ == '__main__':
|
|
1208
|
+
unittest.main()
|
|
1209
|
+
|