firecrawl 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. firecrawl/__init__.py +87 -0
  2. firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
  10. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  11. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  13. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
  14. firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
  15. firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
  16. firecrawl/__tests__/e2e/v2/test_map.py +61 -0
  17. firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
  18. firecrawl/__tests__/e2e/v2/test_search.py +270 -0
  19. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  20. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  21. firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
  28. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  29. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  30. firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
  31. firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
  32. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  33. firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
  34. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  35. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  36. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  37. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
  38. firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
  39. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
  40. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
  41. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
  42. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  43. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  44. firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
  45. firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
  46. firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
  47. firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
  48. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  49. firecrawl/client.py +281 -0
  50. firecrawl/firecrawl.backup.py +4635 -0
  51. firecrawl/types.py +167 -0
  52. firecrawl/v1/__init__.py +14 -0
  53. firecrawl/v1/client.py +5164 -0
  54. firecrawl/v2/__init__.py +4 -0
  55. firecrawl/v2/client.py +967 -0
  56. firecrawl/v2/client_async.py +408 -0
  57. firecrawl/v2/methods/agent.py +144 -0
  58. firecrawl/v2/methods/aio/__init__.py +1 -0
  59. firecrawl/v2/methods/aio/agent.py +137 -0
  60. firecrawl/v2/methods/aio/batch.py +188 -0
  61. firecrawl/v2/methods/aio/crawl.py +351 -0
  62. firecrawl/v2/methods/aio/extract.py +133 -0
  63. firecrawl/v2/methods/aio/map.py +65 -0
  64. firecrawl/v2/methods/aio/scrape.py +33 -0
  65. firecrawl/v2/methods/aio/search.py +176 -0
  66. firecrawl/v2/methods/aio/usage.py +89 -0
  67. firecrawl/v2/methods/batch.py +499 -0
  68. firecrawl/v2/methods/crawl.py +592 -0
  69. firecrawl/v2/methods/extract.py +161 -0
  70. firecrawl/v2/methods/map.py +83 -0
  71. firecrawl/v2/methods/scrape.py +64 -0
  72. firecrawl/v2/methods/search.py +215 -0
  73. firecrawl/v2/methods/usage.py +84 -0
  74. firecrawl/v2/types.py +1143 -0
  75. firecrawl/v2/utils/__init__.py +9 -0
  76. firecrawl/v2/utils/error_handler.py +107 -0
  77. firecrawl/v2/utils/get_version.py +15 -0
  78. firecrawl/v2/utils/http_client.py +178 -0
  79. firecrawl/v2/utils/http_client_async.py +69 -0
  80. firecrawl/v2/utils/normalize.py +125 -0
  81. firecrawl/v2/utils/validation.py +692 -0
  82. firecrawl/v2/watcher.py +301 -0
  83. firecrawl/v2/watcher_async.py +243 -0
  84. firecrawl-4.12.0.dist-info/METADATA +234 -0
  85. firecrawl-4.12.0.dist-info/RECORD +92 -0
  86. firecrawl-4.12.0.dist-info/WHEEL +5 -0
  87. firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
  88. firecrawl-4.12.0.dist-info/top_level.txt +2 -0
  89. tests/test_agent_integration.py +277 -0
  90. tests/test_api_key_handling.py +44 -0
  91. tests/test_change_tracking.py +98 -0
  92. tests/test_timeout_conversion.py +117 -0
@@ -0,0 +1,1133 @@
1
+ """
2
+ Unit tests for recursive schema handling in v2 validation utils.
3
+ """
4
+ import unittest
5
+ from firecrawl.v2.utils.validation import (
6
+ normalize_schema_for_openai,
7
+ validate_schema_for_openai,
8
+ resolve_refs,
9
+ detect_recursive_schema,
10
+ select_model_for_schema,
11
+ _contains_recursive_ref,
12
+ _check_for_circular_defs,
13
+ _validate_json_format,
14
+ OPENAI_SCHEMA_ERROR_MESSAGE
15
+ )
16
+
17
+
18
+ class TestRecursiveRefDetection(unittest.TestCase):
19
+ """Tests for _contains_recursive_ref function."""
20
+
21
+ def test_no_recursive_ref(self):
22
+ """Test schema with no recursive references."""
23
+ schema = {
24
+ "type": "object",
25
+ "properties": {
26
+ "name": {"type": "string"}
27
+ }
28
+ }
29
+ defs = {}
30
+ result = _contains_recursive_ref(schema, "Person", defs)
31
+ self.assertFalse(result)
32
+
33
+ def test_simple_recursive_ref(self):
34
+ """Test schema with simple recursive reference."""
35
+ defs = {
36
+ "Person": {
37
+ "type": "object",
38
+ "properties": {
39
+ "name": {"type": "string"},
40
+ "parent": {"$ref": "#/$defs/Person"}
41
+ }
42
+ }
43
+ }
44
+ result = _contains_recursive_ref(defs["Person"], "Person", defs)
45
+ self.assertTrue(result)
46
+
47
+ def test_indirect_recursive_ref(self):
48
+ """Test schema with indirect recursive reference."""
49
+ defs = {
50
+ "Person": {
51
+ "type": "object",
52
+ "properties": {
53
+ "name": {"type": "string"},
54
+ "address": {"$ref": "#/$defs/Address"}
55
+ }
56
+ },
57
+ "Address": {
58
+ "type": "object",
59
+ "properties": {
60
+ "street": {"type": "string"},
61
+ "owner": {"$ref": "#/$defs/Person"}
62
+ }
63
+ }
64
+ }
65
+ result = _contains_recursive_ref(defs["Person"], "Person", defs)
66
+ self.assertTrue(result)
67
+
68
+ def test_recursive_ref_in_array(self):
69
+ """Test schema with recursive reference in array items."""
70
+ defs = {
71
+ "TreeNode": {
72
+ "type": "object",
73
+ "properties": {
74
+ "value": {"type": "string"},
75
+ "children": {
76
+ "type": "array",
77
+ "items": {"$ref": "#/$defs/TreeNode"}
78
+ }
79
+ }
80
+ }
81
+ }
82
+ result = _contains_recursive_ref(defs["TreeNode"], "TreeNode", defs)
83
+ self.assertTrue(result)
84
+
85
+ def test_no_ref_in_empty_schema(self):
86
+ """Test empty schema returns False."""
87
+ result = _contains_recursive_ref({}, "Person", {})
88
+ self.assertFalse(result)
89
+
90
+ def test_no_ref_with_none_input(self):
91
+ """Test None input returns False."""
92
+ result = _contains_recursive_ref(None, "Person", {})
93
+ self.assertFalse(result)
94
+
95
+
96
+ class TestCircularDefsDetection(unittest.TestCase):
97
+ """Tests for _check_for_circular_defs function."""
98
+
99
+ def test_no_circular_refs(self):
100
+ """Test definitions with no circular references."""
101
+ defs = {
102
+ "Person": {
103
+ "type": "object",
104
+ "properties": {
105
+ "name": {"type": "string"}
106
+ }
107
+ },
108
+ "Address": {
109
+ "type": "object",
110
+ "properties": {
111
+ "street": {"type": "string"}
112
+ }
113
+ }
114
+ }
115
+ result = _check_for_circular_defs(defs)
116
+ self.assertFalse(result)
117
+
118
+ def test_self_referencing_def(self):
119
+ """Test definition that references itself."""
120
+ defs = {
121
+ "TreeNode": {
122
+ "type": "object",
123
+ "properties": {
124
+ "value": {"type": "string"},
125
+ "left": {"$ref": "#/$defs/TreeNode"},
126
+ "right": {"$ref": "#/$defs/TreeNode"}
127
+ }
128
+ }
129
+ }
130
+ result = _check_for_circular_defs(defs)
131
+ self.assertTrue(result)
132
+
133
+ def test_mutually_recursive_defs(self):
134
+ """Test mutually recursive definitions."""
135
+ defs = {
136
+ "Person": {
137
+ "type": "object",
138
+ "properties": {
139
+ "name": {"type": "string"},
140
+ "address": {"$ref": "#/$defs/Address"}
141
+ }
142
+ },
143
+ "Address": {
144
+ "type": "object",
145
+ "properties": {
146
+ "street": {"type": "string"},
147
+ "resident": {"$ref": "#/$defs/Person"}
148
+ }
149
+ }
150
+ }
151
+ result = _check_for_circular_defs(defs)
152
+ self.assertTrue(result)
153
+
154
+ def test_empty_defs(self):
155
+ """Test empty definitions dict."""
156
+ result = _check_for_circular_defs({})
157
+ self.assertFalse(result)
158
+
159
+ def test_none_defs(self):
160
+ """Test None definitions."""
161
+ result = _check_for_circular_defs(None)
162
+ self.assertFalse(result)
163
+
164
+
165
+ class TestResolveRefs(unittest.TestCase):
166
+ """Tests for resolve_refs function."""
167
+
168
+ def test_resolve_simple_ref(self):
169
+ """Test resolving a simple $ref."""
170
+ schema = {
171
+ "type": "object",
172
+ "properties": {
173
+ "person": {"$ref": "#/$defs/Person"}
174
+ }
175
+ }
176
+ defs = {
177
+ "Person": {
178
+ "type": "object",
179
+ "properties": {
180
+ "name": {"type": "string"}
181
+ }
182
+ }
183
+ }
184
+ result = resolve_refs(schema, defs)
185
+ self.assertEqual(result["properties"]["person"]["type"], "object")
186
+ self.assertIn("name", result["properties"]["person"]["properties"])
187
+
188
+ def test_resolve_nested_refs(self):
189
+ """Test resolving nested $refs."""
190
+ schema = {
191
+ "type": "object",
192
+ "properties": {
193
+ "data": {"$ref": "#/$defs/Data"}
194
+ }
195
+ }
196
+ defs = {
197
+ "Data": {
198
+ "type": "object",
199
+ "properties": {
200
+ "person": {"$ref": "#/$defs/Person"}
201
+ }
202
+ },
203
+ "Person": {
204
+ "type": "object",
205
+ "properties": {
206
+ "name": {"type": "string"}
207
+ }
208
+ }
209
+ }
210
+ result = resolve_refs(schema, defs)
211
+ self.assertEqual(result["properties"]["data"]["type"], "object")
212
+
213
+ def test_resolve_refs_in_array(self):
214
+ """Test resolving $refs in array items."""
215
+ schema = {
216
+ "type": "array",
217
+ "items": {"$ref": "#/$defs/Person"}
218
+ }
219
+ defs = {
220
+ "Person": {
221
+ "type": "object",
222
+ "properties": {
223
+ "name": {"type": "string"}
224
+ }
225
+ }
226
+ }
227
+ result = resolve_refs(schema, defs)
228
+ # Result should be a dict with type: array and resolved items
229
+ self.assertIsInstance(result, dict)
230
+ self.assertEqual(result["type"], "array")
231
+ self.assertIn("items", result)
232
+
233
+ def test_resolve_refs_max_depth(self):
234
+ """Test that resolution stops at max depth."""
235
+ schema = {"$ref": "#/$defs/A"}
236
+ defs = {
237
+ "A": {"$ref": "#/$defs/B"},
238
+ "B": {"$ref": "#/$defs/C"},
239
+ "C": {"$ref": "#/$defs/D"},
240
+ "D": {"type": "string"}
241
+ }
242
+ result = resolve_refs(schema, defs, depth=0)
243
+ # Should resolve until depth limit
244
+ self.assertIsNotNone(result)
245
+
246
+ def test_resolve_refs_with_circular_ref(self):
247
+ """Test resolving refs with circular references."""
248
+ schema = {"$ref": "#/$defs/TreeNode"}
249
+ defs = {
250
+ "TreeNode": {
251
+ "type": "object",
252
+ "properties": {
253
+ "value": {"type": "string"},
254
+ "children": {
255
+ "type": "array",
256
+ "items": {"$ref": "#/$defs/TreeNode"}
257
+ }
258
+ }
259
+ }
260
+ }
261
+ # Should not throw error, but may preserve $ref
262
+ result = resolve_refs(schema, defs)
263
+ self.assertIsNotNone(result)
264
+
265
+ def test_resolve_refs_no_defs(self):
266
+ """Test schema with no $defs."""
267
+ schema = {
268
+ "type": "object",
269
+ "properties": {
270
+ "name": {"type": "string"}
271
+ }
272
+ }
273
+ result = resolve_refs(schema, {})
274
+ self.assertEqual(result, schema)
275
+
276
+ def test_resolve_refs_with_none(self):
277
+ """Test resolving None."""
278
+ result = resolve_refs(None, {})
279
+ self.assertIsNone(result)
280
+
281
+
282
+ class TestNormalizeSchemaForOpenAI(unittest.TestCase):
283
+ """Tests for normalize_schema_for_openai function."""
284
+
285
+ def test_normalize_removes_additional_properties(self):
286
+ """Test that additionalProperties: true is removed when properties exist."""
287
+ schema = {
288
+ "type": "object",
289
+ "properties": {
290
+ "name": {"type": "string"}
291
+ },
292
+ "additionalProperties": True
293
+ }
294
+ result = normalize_schema_for_openai(schema)
295
+ self.assertNotIn("additionalProperties", result)
296
+
297
+ def test_normalize_preserves_additional_properties_false(self):
298
+ """Test that additionalProperties: false is preserved."""
299
+ schema = {
300
+ "type": "object",
301
+ "properties": {
302
+ "name": {"type": "string"}
303
+ },
304
+ "additionalProperties": False
305
+ }
306
+ result = normalize_schema_for_openai(schema)
307
+ self.assertIn("additionalProperties", result)
308
+ self.assertFalse(result["additionalProperties"])
309
+
310
+ def test_normalize_removes_invalid_required(self):
311
+ """Test that required fields not in properties are removed."""
312
+ schema = {
313
+ "type": "object",
314
+ "properties": {
315
+ "name": {"type": "string"}
316
+ },
317
+ "required": ["name", "age", "email"]
318
+ }
319
+ result = normalize_schema_for_openai(schema)
320
+ self.assertEqual(result["required"], ["name"])
321
+
322
+ def test_normalize_removes_empty_required(self):
323
+ """Test that empty required array is removed."""
324
+ schema = {
325
+ "type": "object",
326
+ "properties": {
327
+ "name": {"type": "string"}
328
+ },
329
+ "required": ["age"]
330
+ }
331
+ result = normalize_schema_for_openai(schema)
332
+ self.assertNotIn("required", result)
333
+
334
+ def test_normalize_preserves_ref(self):
335
+ """Test that $ref is preserved."""
336
+ schema = {
337
+ "$ref": "#/$defs/Person"
338
+ }
339
+ result = normalize_schema_for_openai(schema)
340
+ self.assertEqual(result, schema)
341
+
342
+ def test_normalize_handles_defs(self):
343
+ """Test normalization with $defs."""
344
+ schema = {
345
+ "type": "object",
346
+ "properties": {
347
+ "person": {"$ref": "#/$defs/Person"}
348
+ },
349
+ "$defs": {
350
+ "Person": {
351
+ "type": "object",
352
+ "properties": {
353
+ "name": {"type": "string"}
354
+ },
355
+ "additionalProperties": True
356
+ }
357
+ }
358
+ }
359
+ result = normalize_schema_for_openai(schema)
360
+ self.assertIn("$defs", result)
361
+ self.assertNotIn("additionalProperties", result["$defs"]["Person"])
362
+
363
+ def test_normalize_nested_objects(self):
364
+ """Test normalization of nested objects."""
365
+ schema = {
366
+ "type": "object",
367
+ "properties": {
368
+ "person": {
369
+ "type": "object",
370
+ "properties": {
371
+ "name": {"type": "string"}
372
+ },
373
+ "additionalProperties": True
374
+ }
375
+ }
376
+ }
377
+ result = normalize_schema_for_openai(schema)
378
+ self.assertNotIn("additionalProperties", result["properties"]["person"])
379
+
380
+ def test_normalize_arrays_with_objects(self):
381
+ """Test normalization of arrays containing objects."""
382
+ schema = {
383
+ "anyOf": [
384
+ {
385
+ "type": "object",
386
+ "properties": {"name": {"type": "string"}},
387
+ "additionalProperties": True
388
+ },
389
+ {"type": "string"}
390
+ ]
391
+ }
392
+ result = normalize_schema_for_openai(schema)
393
+ self.assertNotIn("additionalProperties", result["anyOf"][0])
394
+
395
+ def test_normalize_with_none(self):
396
+ """Test normalization with None input."""
397
+ result = normalize_schema_for_openai(None)
398
+ self.assertIsNone(result)
399
+
400
+ def test_normalize_with_non_dict(self):
401
+ """Test normalization with non-dict input."""
402
+ result = normalize_schema_for_openai("string")
403
+ self.assertEqual(result, "string")
404
+
405
+
406
+ class TestValidateSchemaForOpenAI(unittest.TestCase):
407
+ """Tests for validate_schema_for_openai function."""
408
+
409
+ def test_valid_schema_with_properties(self):
410
+ """Test valid schema with properties defined."""
411
+ schema = {
412
+ "type": "object",
413
+ "properties": {
414
+ "name": {"type": "string"}
415
+ }
416
+ }
417
+ result = validate_schema_for_openai(schema)
418
+ self.assertTrue(result)
419
+
420
+ def test_valid_schema_with_ref(self):
421
+ """Test valid schema with $ref."""
422
+ schema = {
423
+ "type": "object",
424
+ "properties": {
425
+ "person": {"$ref": "#/$defs/Person"}
426
+ }
427
+ }
428
+ result = validate_schema_for_openai(schema)
429
+ self.assertTrue(result)
430
+
431
+ def test_invalid_schema_no_properties_with_additional_properties(self):
432
+ """Test invalid schema: object with no properties but additionalProperties: true."""
433
+ schema = {
434
+ "type": "object",
435
+ "additionalProperties": True
436
+ }
437
+ result = validate_schema_for_openai(schema)
438
+ self.assertFalse(result)
439
+
440
+ def test_valid_schema_with_pattern_properties(self):
441
+ """Test valid schema with patternProperties."""
442
+ schema = {
443
+ "type": "object",
444
+ "patternProperties": {
445
+ "^[a-z]+$": {"type": "string"}
446
+ },
447
+ "additionalProperties": True
448
+ }
449
+ result = validate_schema_for_openai(schema)
450
+ self.assertTrue(result)
451
+
452
+ def test_invalid_nested_schema(self):
453
+ """Test invalid nested schema."""
454
+ schema = {
455
+ "type": "object",
456
+ "properties": {
457
+ "data": {
458
+ "type": "object",
459
+ "additionalProperties": True
460
+ }
461
+ }
462
+ }
463
+ result = validate_schema_for_openai(schema)
464
+ self.assertFalse(result)
465
+
466
+ def test_invalid_schema_in_array(self):
467
+ """Test invalid schema within array (anyOf/oneOf)."""
468
+ schema = {
469
+ "anyOf": [
470
+ {
471
+ "type": "object",
472
+ "additionalProperties": True
473
+ },
474
+ {"type": "string"}
475
+ ]
476
+ }
477
+ result = validate_schema_for_openai(schema)
478
+ self.assertFalse(result)
479
+
480
+ def test_valid_with_none(self):
481
+ """Test validation with None input."""
482
+ result = validate_schema_for_openai(None)
483
+ self.assertTrue(result)
484
+
485
+ def test_valid_with_non_dict(self):
486
+ """Test validation with non-dict input."""
487
+ result = validate_schema_for_openai("string")
488
+ self.assertTrue(result)
489
+
490
+
491
+ class TestDetectRecursiveSchema(unittest.TestCase):
492
+ """Tests for detect_recursive_schema function."""
493
+
494
+ def test_detect_ref(self):
495
+ """Test detection of $ref."""
496
+ schema = {
497
+ "type": "object",
498
+ "properties": {
499
+ "person": {"$ref": "#/$defs/Person"}
500
+ }
501
+ }
502
+ result = detect_recursive_schema(schema)
503
+ self.assertTrue(result)
504
+
505
+ def test_detect_defs(self):
506
+ """Test detection of $defs."""
507
+ schema = {
508
+ "type": "object",
509
+ "properties": {
510
+ "name": {"type": "string"}
511
+ },
512
+ "$defs": {
513
+ "Person": {
514
+ "type": "object",
515
+ "properties": {"name": {"type": "string"}}
516
+ }
517
+ }
518
+ }
519
+ result = detect_recursive_schema(schema)
520
+ self.assertTrue(result)
521
+
522
+ def test_detect_definitions(self):
523
+ """Test detection of definitions (alternative to $defs)."""
524
+ schema = {
525
+ "type": "object",
526
+ "definitions": {
527
+ "Person": {
528
+ "type": "object",
529
+ "properties": {"name": {"type": "string"}}
530
+ }
531
+ }
532
+ }
533
+ result = detect_recursive_schema(schema)
534
+ self.assertTrue(result)
535
+
536
+ def test_no_recursion(self):
537
+ """Test schema with no recursive patterns."""
538
+ schema = {
539
+ "type": "object",
540
+ "properties": {
541
+ "name": {"type": "string"}
542
+ }
543
+ }
544
+ result = detect_recursive_schema(schema)
545
+ self.assertFalse(result)
546
+
547
+ def test_with_none(self):
548
+ """Test with None input."""
549
+ result = detect_recursive_schema(None)
550
+ self.assertFalse(result)
551
+
552
+ def test_with_non_dict(self):
553
+ """Test with non-dict input."""
554
+ result = detect_recursive_schema("string")
555
+ self.assertFalse(result)
556
+
557
+
558
+ class TestSelectModelForSchema(unittest.TestCase):
559
+ """Tests for select_model_for_schema function."""
560
+
561
+ def test_no_schema(self):
562
+ """Test model selection with no schema."""
563
+ result = select_model_for_schema(None)
564
+ self.assertEqual(result["modelName"], "gpt-4o-mini")
565
+ self.assertEqual(result["reason"], "no_schema")
566
+
567
+ def test_simple_schema(self):
568
+ """Test model selection with simple schema."""
569
+ schema = {
570
+ "type": "object",
571
+ "properties": {
572
+ "name": {"type": "string"}
573
+ }
574
+ }
575
+ result = select_model_for_schema(schema)
576
+ self.assertEqual(result["modelName"], "gpt-4o-mini")
577
+ self.assertEqual(result["reason"], "simple_schema")
578
+
579
+ def test_recursive_schema(self):
580
+ """Test model selection with recursive schema."""
581
+ schema = {
582
+ "type": "object",
583
+ "properties": {
584
+ "person": {"$ref": "#/$defs/Person"}
585
+ },
586
+ "$defs": {
587
+ "Person": {
588
+ "type": "object",
589
+ "properties": {"name": {"type": "string"}}
590
+ }
591
+ }
592
+ }
593
+ result = select_model_for_schema(schema)
594
+ self.assertEqual(result["modelName"], "gpt-4o")
595
+ self.assertEqual(result["reason"], "recursive_schema_detected")
596
+
597
+
598
+ class TestValidateJsonFormat(unittest.TestCase):
599
+ """Tests for _validate_json_format function integration."""
600
+
601
+ def test_validate_json_format_with_valid_schema(self):
602
+ """Test JSON format validation with valid schema."""
603
+ format_obj = {
604
+ "type": "json",
605
+ "schema": {
606
+ "type": "object",
607
+ "properties": {
608
+ "name": {"type": "string"}
609
+ }
610
+ }
611
+ }
612
+ result = _validate_json_format(format_obj)
613
+ self.assertIn("schema", result)
614
+
615
+ def test_validate_json_format_with_invalid_schema(self):
616
+ """Test JSON format validation with invalid schema."""
617
+ format_obj = {
618
+ "type": "json",
619
+ "schema": {
620
+ "type": "object",
621
+ "additionalProperties": True
622
+ }
623
+ }
624
+ with self.assertRaises(ValueError) as context:
625
+ _validate_json_format(format_obj)
626
+ self.assertIn("invalid structure for OpenAI", str(context.exception))
627
+
628
+ def test_validate_json_format_with_recursive_schema(self):
629
+ """Test JSON format validation with recursive schema."""
630
+ format_obj = {
631
+ "type": "json",
632
+ "schema": {
633
+ "type": "object",
634
+ "properties": {
635
+ "children": {
636
+ "type": "array",
637
+ "items": {"$ref": "#/$defs/TreeNode"}
638
+ }
639
+ },
640
+ "$defs": {
641
+ "TreeNode": {
642
+ "type": "object",
643
+ "properties": {
644
+ "value": {"type": "string"},
645
+ "children": {
646
+ "type": "array",
647
+ "items": {"$ref": "#/$defs/TreeNode"}
648
+ }
649
+ }
650
+ }
651
+ }
652
+ }
653
+ }
654
+ result = _validate_json_format(format_obj)
655
+ self.assertIn("schema", result)
656
+
657
+ def test_validate_json_format_resolves_non_recursive_refs(self):
658
+ """Test that non-recursive refs are resolved."""
659
+ format_obj = {
660
+ "type": "json",
661
+ "schema": {
662
+ "type": "object",
663
+ "properties": {
664
+ "person": {"$ref": "#/$defs/Person"}
665
+ },
666
+ "$defs": {
667
+ "Person": {
668
+ "type": "object",
669
+ "properties": {
670
+ "name": {"type": "string"}
671
+ }
672
+ }
673
+ }
674
+ }
675
+ }
676
+ result = _validate_json_format(format_obj)
677
+ # After resolution, $defs should be removed if all refs are resolved
678
+ # The exact behavior depends on implementation
679
+ self.assertIn("schema", result)
680
+
681
+
682
+ class TestEdgeCases(unittest.TestCase):
683
+ """Tests for edge cases and error handling."""
684
+
685
+ def test_deeply_nested_schema(self):
686
+ """Test handling of deeply nested schemas."""
687
+ schema = {
688
+ "type": "object",
689
+ "properties": {
690
+ "level1": {
691
+ "type": "object",
692
+ "properties": {
693
+ "level2": {
694
+ "type": "object",
695
+ "properties": {
696
+ "level3": {
697
+ "type": "object",
698
+ "properties": {
699
+ "name": {"type": "string"}
700
+ },
701
+ "additionalProperties": True
702
+ }
703
+ }
704
+ }
705
+ }
706
+ }
707
+ }
708
+ }
709
+ result = normalize_schema_for_openai(schema)
710
+ # Should normalize deeply nested schema
711
+ self.assertIsNotNone(result)
712
+
713
+ def test_circular_reference_doesnt_hang(self):
714
+ """Test that circular references don't cause infinite loops."""
715
+ schema = {
716
+ "$ref": "#/$defs/TreeNode",
717
+ "$defs": {
718
+ "TreeNode": {
719
+ "type": "object",
720
+ "properties": {
721
+ "left": {"$ref": "#/$defs/TreeNode"},
722
+ "right": {"$ref": "#/$defs/TreeNode"}
723
+ }
724
+ }
725
+ }
726
+ }
727
+ # Should complete without hanging
728
+ result = normalize_schema_for_openai(schema)
729
+ self.assertIsNotNone(result)
730
+
731
+ result2 = validate_schema_for_openai(schema)
732
+ self.assertIsNotNone(result2)
733
+
734
+ def test_empty_schema(self):
735
+ """Test handling of empty schema."""
736
+ schema = {}
737
+ result = normalize_schema_for_openai(schema)
738
+ self.assertEqual(result, {})
739
+
740
+ is_valid = validate_schema_for_openai(schema)
741
+ self.assertTrue(is_valid)
742
+
743
+ def test_schema_with_complex_anyOf(self):
744
+ """Test schema with complex anyOf structures."""
745
+ schema = {
746
+ "anyOf": [
747
+ {
748
+ "type": "object",
749
+ "properties": {"name": {"type": "string"}},
750
+ "additionalProperties": True
751
+ },
752
+ {
753
+ "type": "object",
754
+ "properties": {"id": {"type": "number"}}
755
+ }
756
+ ]
757
+ }
758
+ result = normalize_schema_for_openai(schema)
759
+ self.assertNotIn("additionalProperties", result["anyOf"][0])
760
+
761
+
762
+ class TestRecursionStressTests(unittest.TestCase):
763
+ """Stress tests to ensure recursion handling doesn't break under extreme conditions."""
764
+
765
+ def test_very_deep_reference_chain(self):
766
+ """Test handling of very deep reference chains (testing depth limits)."""
767
+ # Create a chain of 20 references
768
+ defs = {}
769
+ for i in range(20):
770
+ if i == 19:
771
+ defs[f"Level{i}"] = {"type": "string"}
772
+ else:
773
+ defs[f"Level{i}"] = {"$ref": f"#/$defs/Level{i+1}"}
774
+
775
+ schema = {
776
+ "$ref": "#/$defs/Level0",
777
+ "$defs": defs
778
+ }
779
+
780
+ # Should handle without crashing (may not fully resolve due to depth limits)
781
+ result = resolve_refs(schema, defs)
782
+ self.assertIsNotNone(result)
783
+
784
+ # Normalization should also complete
785
+ normalized = normalize_schema_for_openai(schema)
786
+ self.assertIsNotNone(normalized)
787
+
788
+ def test_multiple_circular_paths(self):
789
+ """Test schema with multiple different circular reference paths."""
790
+ defs = {
791
+ "Node": {
792
+ "type": "object",
793
+ "properties": {
794
+ "parent": {"$ref": "#/$defs/Node"},
795
+ "child": {"$ref": "#/$defs/Node"},
796
+ "sibling": {"$ref": "#/$defs/Node"},
797
+ "related": {"$ref": "#/$defs/RelatedNode"}
798
+ }
799
+ },
800
+ "RelatedNode": {
801
+ "type": "object",
802
+ "properties": {
803
+ "backref": {"$ref": "#/$defs/Node"},
804
+ "self": {"$ref": "#/$defs/RelatedNode"}
805
+ }
806
+ }
807
+ }
808
+
809
+ # Should detect circular references
810
+ has_circular = _check_for_circular_defs(defs)
811
+ self.assertTrue(has_circular)
812
+
813
+ # Should handle normalization without hanging
814
+ schema = {"$ref": "#/$defs/Node", "$defs": defs}
815
+ result = normalize_schema_for_openai(schema)
816
+ self.assertIsNotNone(result)
817
+
818
+ def test_recursive_in_oneOf_allOf(self):
819
+ """Test recursive references within oneOf and allOf contexts."""
820
+ schema = {
821
+ "oneOf": [
822
+ {"$ref": "#/$defs/TypeA"},
823
+ {"$ref": "#/$defs/TypeB"}
824
+ ],
825
+ "$defs": {
826
+ "TypeA": {
827
+ "type": "object",
828
+ "properties": {
829
+ "nested": {"$ref": "#/$defs/TypeA"}
830
+ }
831
+ },
832
+ "TypeB": {
833
+ "type": "object",
834
+ "allOf": [
835
+ {"$ref": "#/$defs/TypeA"},
836
+ {"properties": {"extra": {"type": "string"}}}
837
+ ]
838
+ }
839
+ }
840
+ }
841
+
842
+ # Should handle complex recursive patterns
843
+ result = normalize_schema_for_openai(schema)
844
+ self.assertIsNotNone(result)
845
+ self.assertIn("oneOf", result)
846
+
847
+ # Should validate without errors
848
+ is_valid = validate_schema_for_openai(schema)
849
+ self.assertTrue(is_valid)
850
+
851
+ def test_invalid_reference_doesnt_crash(self):
852
+ """Test that invalid/broken references don't crash the system."""
853
+ schema = {
854
+ "type": "object",
855
+ "properties": {
856
+ "broken": {"$ref": "#/$defs/NonExistent"}
857
+ },
858
+ "$defs": {
859
+ "Existing": {"type": "string"}
860
+ }
861
+ }
862
+
863
+ # Should handle gracefully without crashing
864
+ result = resolve_refs(schema, schema.get("$defs", {}))
865
+ self.assertIsNotNone(result)
866
+
867
+ normalized = normalize_schema_for_openai(schema)
868
+ self.assertIsNotNone(normalized)
869
+
870
+ def test_malformed_reference_format(self):
871
+ """Test handling of malformed $ref formats."""
872
+ schema = {
873
+ "type": "object",
874
+ "properties": {
875
+ "bad1": {"$ref": "not-a-valid-ref"},
876
+ "bad2": {"$ref": "#/wrong/path"},
877
+ "bad3": {"$ref": 12345} # Not even a string
878
+ }
879
+ }
880
+
881
+ # Should handle without crashing
882
+ result = normalize_schema_for_openai(schema)
883
+ self.assertIsNotNone(result)
884
+
885
+ def test_linked_list_pattern(self):
886
+ """Test real-world pattern: linked list with recursive next pointer."""
887
+ schema = {
888
+ "type": "object",
889
+ "properties": {
890
+ "value": {"type": "string"},
891
+ "next": {
892
+ "oneOf": [
893
+ {"$ref": "#/$defs/Node"},
894
+ {"type": "null"}
895
+ ]
896
+ }
897
+ },
898
+ "$defs": {
899
+ "Node": {
900
+ "type": "object",
901
+ "properties": {
902
+ "value": {"type": "string"},
903
+ "next": {
904
+ "oneOf": [
905
+ {"$ref": "#/$defs/Node"},
906
+ {"type": "null"}
907
+ ]
908
+ }
909
+ }
910
+ }
911
+ }
912
+ }
913
+
914
+ # Should detect recursion
915
+ is_recursive = detect_recursive_schema(schema)
916
+ self.assertTrue(is_recursive)
917
+
918
+ # Should select appropriate model
919
+ model_info = select_model_for_schema(schema)
920
+ self.assertEqual(model_info["modelName"], "gpt-4o")
921
+
922
+ # Should handle normalization
923
+ result = normalize_schema_for_openai(schema)
924
+ self.assertIsNotNone(result)
925
+
926
+ def test_graph_pattern_with_multiple_node_types(self):
927
+ """Test complex graph pattern with multiple interconnected node types."""
928
+ schema = {
929
+ "type": "object",
930
+ "properties": {
931
+ "nodes": {
932
+ "type": "array",
933
+ "items": {"$ref": "#/$defs/GraphNode"}
934
+ },
935
+ "edges": {
936
+ "type": "array",
937
+ "items": {"$ref": "#/$defs/Edge"}
938
+ }
939
+ },
940
+ "$defs": {
941
+ "GraphNode": {
942
+ "type": "object",
943
+ "properties": {
944
+ "id": {"type": "string"},
945
+ "neighbors": {
946
+ "type": "array",
947
+ "items": {"$ref": "#/$defs/GraphNode"}
948
+ },
949
+ "edges": {
950
+ "type": "array",
951
+ "items": {"$ref": "#/$defs/Edge"}
952
+ }
953
+ }
954
+ },
955
+ "Edge": {
956
+ "type": "object",
957
+ "properties": {
958
+ "from": {"$ref": "#/$defs/GraphNode"},
959
+ "to": {"$ref": "#/$defs/GraphNode"}
960
+ }
961
+ }
962
+ }
963
+ }
964
+
965
+ # Should detect circular references
966
+ has_circular = _check_for_circular_defs(schema.get("$defs", {}))
967
+ self.assertTrue(has_circular)
968
+
969
+ # Should handle without hanging
970
+ result = normalize_schema_for_openai(schema)
971
+ self.assertIsNotNone(result)
972
+
973
+ # Should validate
974
+ is_valid = validate_schema_for_openai(schema)
975
+ self.assertTrue(is_valid)
976
+
977
+ def test_mutual_recursion_three_way(self):
978
+ """Test three-way mutual recursion (A->B->C->A)."""
979
+ defs = {
980
+ "TypeA": {
981
+ "type": "object",
982
+ "properties": {
983
+ "toB": {"$ref": "#/$defs/TypeB"}
984
+ }
985
+ },
986
+ "TypeB": {
987
+ "type": "object",
988
+ "properties": {
989
+ "toC": {"$ref": "#/$defs/TypeC"}
990
+ }
991
+ },
992
+ "TypeC": {
993
+ "type": "object",
994
+ "properties": {
995
+ "toA": {"$ref": "#/$defs/TypeA"}
996
+ }
997
+ }
998
+ }
999
+
1000
+ # Should detect circular references
1001
+ has_circular = _check_for_circular_defs(defs)
1002
+ self.assertTrue(has_circular)
1003
+
1004
+ # Should handle without hanging
1005
+ schema = {"$ref": "#/$defs/TypeA", "$defs": defs}
1006
+ result = normalize_schema_for_openai(schema)
1007
+ self.assertIsNotNone(result)
1008
+
1009
+ def test_required_fields_cleanup_without_defs(self):
1010
+ """Test that required field cleanup works for schemas without $defs."""
1011
+ schema = {
1012
+ "type": "object",
1013
+ "properties": {
1014
+ "name": {"type": "string"},
1015
+ "age": {"type": "number"}
1016
+ },
1017
+ "required": ["name", "age", "nonexistent_field"],
1018
+ "additionalProperties": True
1019
+ }
1020
+
1021
+ result = normalize_schema_for_openai(schema)
1022
+
1023
+ # Should clean up required when properties are fully defined
1024
+ self.assertIn("required", result)
1025
+ self.assertEqual(result["required"], ["name", "age"])
1026
+ # additionalProperties: true should be removed
1027
+ self.assertNotIn("additionalProperties", result)
1028
+
1029
+ def test_required_fields_in_nested_defs(self):
1030
+ """Test that required field cleanup works in nested $defs definitions."""
1031
+ schema = {
1032
+ "type": "object",
1033
+ "properties": {
1034
+ "data": {"$ref": "#/$defs/Node"}
1035
+ },
1036
+ "$defs": {
1037
+ "Node": {
1038
+ "type": "object",
1039
+ "properties": {
1040
+ "value": {"type": "string"},
1041
+ "nested": {"$ref": "#/$defs/Node"}
1042
+ },
1043
+ "required": ["value", "another_nonexistent"],
1044
+ "additionalProperties": True
1045
+ }
1046
+ }
1047
+ }
1048
+
1049
+ result = normalize_schema_for_openai(schema)
1050
+
1051
+ # $defs definitions should be cleaned
1052
+ self.assertIn("$defs", result)
1053
+ self.assertIn("Node", result["$defs"])
1054
+ node_def = result["$defs"]["Node"]
1055
+
1056
+ # Required should be cleaned in the definition
1057
+ self.assertIn("required", node_def)
1058
+ self.assertEqual(node_def["required"], ["value"])
1059
+
1060
+ # additionalProperties: true should be removed
1061
+ self.assertNotIn("additionalProperties", node_def)
1062
+
1063
+ def test_required_fields_cleanup_limitation_with_defs(self):
1064
+ """
1065
+ Test documenting current limitation: required field cleanup doesn't happen
1066
+ at root level when $defs is present due to early return in normalization.
1067
+
1068
+ This test documents the current behavior - if this is considered a bug,
1069
+ the implementation should be fixed to cleanup required fields at root level
1070
+ even when $defs is present.
1071
+ """
1072
+ schema = {
1073
+ "type": "object",
1074
+ "properties": {
1075
+ "name": {"type": "string"},
1076
+ "child": {"$ref": "#/$defs/Node"}
1077
+ },
1078
+ "required": ["name", "child", "nonexistent_field"], # Has invalid field
1079
+ "$defs": {
1080
+ "Node": {
1081
+ "type": "object",
1082
+ "properties": {
1083
+ "value": {"type": "string"}
1084
+ }
1085
+ }
1086
+ }
1087
+ }
1088
+
1089
+ result = normalize_schema_for_openai(schema)
1090
+
1091
+ # Current behavior: required is NOT cleaned at root when $defs present
1092
+ # This is because the code returns early when processing $defs
1093
+ self.assertIn("required", result)
1094
+ # Documents current behavior - includes invalid field
1095
+ self.assertIn("nonexistent_field", result["required"])
1096
+
1097
+ # Note: If this behavior should change, update both the code and this test
1098
+
1099
+ def test_same_object_referenced_multiple_times(self):
1100
+ """Test that the same object referenced multiple times is handled correctly."""
1101
+ person_def = {
1102
+ "type": "object",
1103
+ "properties": {
1104
+ "name": {"type": "string"}
1105
+ }
1106
+ }
1107
+
1108
+ schema = {
1109
+ "type": "object",
1110
+ "properties": {
1111
+ "person1": {"$ref": "#/$defs/Person"},
1112
+ "person2": {"$ref": "#/$defs/Person"},
1113
+ "person3": {"$ref": "#/$defs/Person"}
1114
+ },
1115
+ "$defs": {
1116
+ "Person": person_def
1117
+ }
1118
+ }
1119
+
1120
+ # Should handle without issues
1121
+ result = resolve_refs(schema, schema.get("$defs", {}))
1122
+ self.assertIsNotNone(result)
1123
+
1124
+ # All three should be resolved
1125
+ if "properties" in result:
1126
+ self.assertIn("person1", result["properties"])
1127
+ self.assertIn("person2", result["properties"])
1128
+ self.assertIn("person3", result["properties"])
1129
+
1130
+
1131
+ if __name__ == '__main__':
1132
+ unittest.main()
1133
+