firecrawl 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. firecrawl/__init__.py +87 -0
  2. firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
  10. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  11. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  13. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
  14. firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
  15. firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
  16. firecrawl/__tests__/e2e/v2/test_map.py +61 -0
  17. firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
  18. firecrawl/__tests__/e2e/v2/test_search.py +270 -0
  19. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  20. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  21. firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
  28. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  29. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  30. firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
  31. firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
  32. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  33. firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
  34. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  35. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  36. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  37. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
  38. firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
  39. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
  40. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
  41. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
  42. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  43. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  44. firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
  45. firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
  46. firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
  47. firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
  48. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  49. firecrawl/client.py +281 -0
  50. firecrawl/firecrawl.backup.py +4635 -0
  51. firecrawl/types.py +167 -0
  52. firecrawl/v1/__init__.py +14 -0
  53. firecrawl/v1/client.py +5164 -0
  54. firecrawl/v2/__init__.py +4 -0
  55. firecrawl/v2/client.py +967 -0
  56. firecrawl/v2/client_async.py +408 -0
  57. firecrawl/v2/methods/agent.py +144 -0
  58. firecrawl/v2/methods/aio/__init__.py +1 -0
  59. firecrawl/v2/methods/aio/agent.py +137 -0
  60. firecrawl/v2/methods/aio/batch.py +188 -0
  61. firecrawl/v2/methods/aio/crawl.py +351 -0
  62. firecrawl/v2/methods/aio/extract.py +133 -0
  63. firecrawl/v2/methods/aio/map.py +65 -0
  64. firecrawl/v2/methods/aio/scrape.py +33 -0
  65. firecrawl/v2/methods/aio/search.py +176 -0
  66. firecrawl/v2/methods/aio/usage.py +89 -0
  67. firecrawl/v2/methods/batch.py +499 -0
  68. firecrawl/v2/methods/crawl.py +592 -0
  69. firecrawl/v2/methods/extract.py +161 -0
  70. firecrawl/v2/methods/map.py +83 -0
  71. firecrawl/v2/methods/scrape.py +64 -0
  72. firecrawl/v2/methods/search.py +215 -0
  73. firecrawl/v2/methods/usage.py +84 -0
  74. firecrawl/v2/types.py +1143 -0
  75. firecrawl/v2/utils/__init__.py +9 -0
  76. firecrawl/v2/utils/error_handler.py +107 -0
  77. firecrawl/v2/utils/get_version.py +15 -0
  78. firecrawl/v2/utils/http_client.py +178 -0
  79. firecrawl/v2/utils/http_client_async.py +69 -0
  80. firecrawl/v2/utils/normalize.py +125 -0
  81. firecrawl/v2/utils/validation.py +692 -0
  82. firecrawl/v2/watcher.py +301 -0
  83. firecrawl/v2/watcher_async.py +243 -0
  84. firecrawl-4.12.0.dist-info/METADATA +234 -0
  85. firecrawl-4.12.0.dist-info/RECORD +92 -0
  86. firecrawl-4.12.0.dist-info/WHEEL +5 -0
  87. firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
  88. firecrawl-4.12.0.dist-info/top_level.txt +2 -0
  89. tests/test_agent_integration.py +277 -0
  90. tests/test_api_key_handling.py +44 -0
  91. tests/test_change_tracking.py +98 -0
  92. tests/test_timeout_conversion.py +117 -0
@@ -0,0 +1,1209 @@
1
+ """
2
+ Unit tests for recursive schema handling in v1 client.
3
+ """
4
+ import unittest
5
+ import os
6
+ from firecrawl.v1.client import V1FirecrawlApp
7
+
8
+
9
+ class TestV1RecursiveRefDetection(unittest.TestCase):
10
+ """Tests for _contains_recursive_ref method in V1FirecrawlApp."""
11
+
12
+ def setUp(self):
13
+ """Set up test client."""
14
+ self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
15
+
16
+ def test_no_recursive_ref(self):
17
+ """Test schema with no recursive references."""
18
+ schema = {
19
+ "type": "object",
20
+ "properties": {
21
+ "name": {"type": "string"}
22
+ }
23
+ }
24
+ defs = {}
25
+ result = self.app._contains_recursive_ref(schema, "Person", defs)
26
+ self.assertFalse(result)
27
+
28
+ def test_simple_recursive_ref(self):
29
+ """Test schema with simple recursive reference."""
30
+ defs = {
31
+ "Person": {
32
+ "type": "object",
33
+ "properties": {
34
+ "name": {"type": "string"},
35
+ "parent": {"$ref": "#/$defs/Person"}
36
+ }
37
+ }
38
+ }
39
+ result = self.app._contains_recursive_ref(defs["Person"], "Person", defs)
40
+ self.assertTrue(result)
41
+
42
+ def test_indirect_recursive_ref(self):
43
+ """Test schema with indirect recursive reference."""
44
+ defs = {
45
+ "Person": {
46
+ "type": "object",
47
+ "properties": {
48
+ "name": {"type": "string"},
49
+ "address": {"$ref": "#/$defs/Address"}
50
+ }
51
+ },
52
+ "Address": {
53
+ "type": "object",
54
+ "properties": {
55
+ "street": {"type": "string"},
56
+ "owner": {"$ref": "#/$defs/Person"}
57
+ }
58
+ }
59
+ }
60
+ result = self.app._contains_recursive_ref(defs["Person"], "Person", defs)
61
+ self.assertTrue(result)
62
+
63
+ def test_recursive_ref_in_array(self):
64
+ """Test schema with recursive reference in array items."""
65
+ defs = {
66
+ "TreeNode": {
67
+ "type": "object",
68
+ "properties": {
69
+ "value": {"type": "string"},
70
+ "children": {
71
+ "type": "array",
72
+ "items": {"$ref": "#/$defs/TreeNode"}
73
+ }
74
+ }
75
+ }
76
+ }
77
+ result = self.app._contains_recursive_ref(defs["TreeNode"], "TreeNode", defs)
78
+ self.assertTrue(result)
79
+
80
+ def test_no_ref_in_empty_schema(self):
81
+ """Test empty schema returns False."""
82
+ result = self.app._contains_recursive_ref({}, "Person", {})
83
+ self.assertFalse(result)
84
+
85
+ def test_no_ref_with_none_input(self):
86
+ """Test None input returns False."""
87
+ result = self.app._contains_recursive_ref(None, "Person", {})
88
+ self.assertFalse(result)
89
+
90
+
91
+ class TestV1CircularDefsDetection(unittest.TestCase):
92
+ """Tests for _check_for_circular_defs method in V1FirecrawlApp."""
93
+
94
+ def setUp(self):
95
+ """Set up test client."""
96
+ self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
97
+
98
+ def test_no_circular_refs(self):
99
+ """Test definitions with no circular references."""
100
+ defs = {
101
+ "Person": {
102
+ "type": "object",
103
+ "properties": {
104
+ "name": {"type": "string"}
105
+ }
106
+ },
107
+ "Address": {
108
+ "type": "object",
109
+ "properties": {
110
+ "street": {"type": "string"}
111
+ }
112
+ }
113
+ }
114
+ result = self.app._check_for_circular_defs(defs)
115
+ self.assertFalse(result)
116
+
117
+ def test_self_referencing_def(self):
118
+ """Test definition that references itself."""
119
+ defs = {
120
+ "TreeNode": {
121
+ "type": "object",
122
+ "properties": {
123
+ "value": {"type": "string"},
124
+ "left": {"$ref": "#/$defs/TreeNode"},
125
+ "right": {"$ref": "#/$defs/TreeNode"}
126
+ }
127
+ }
128
+ }
129
+ result = self.app._check_for_circular_defs(defs)
130
+ self.assertTrue(result)
131
+
132
+ def test_mutually_recursive_defs(self):
133
+ """Test mutually recursive definitions."""
134
+ defs = {
135
+ "Person": {
136
+ "type": "object",
137
+ "properties": {
138
+ "name": {"type": "string"},
139
+ "address": {"$ref": "#/$defs/Address"}
140
+ }
141
+ },
142
+ "Address": {
143
+ "type": "object",
144
+ "properties": {
145
+ "street": {"type": "string"},
146
+ "resident": {"$ref": "#/$defs/Person"}
147
+ }
148
+ }
149
+ }
150
+ result = self.app._check_for_circular_defs(defs)
151
+ self.assertTrue(result)
152
+
153
+ def test_empty_defs(self):
154
+ """Test empty definitions dict."""
155
+ result = self.app._check_for_circular_defs({})
156
+ self.assertFalse(result)
157
+
158
+ def test_none_defs(self):
159
+ """Test None definitions."""
160
+ result = self.app._check_for_circular_defs(None)
161
+ self.assertFalse(result)
162
+
163
+
164
+ class TestV1ResolveRefs(unittest.TestCase):
165
+ """Tests for _resolve_refs method in V1FirecrawlApp."""
166
+
167
+ def setUp(self):
168
+ """Set up test client."""
169
+ self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
170
+
171
+ def test_resolve_simple_ref(self):
172
+ """Test resolving a simple $ref."""
173
+ schema = {
174
+ "type": "object",
175
+ "properties": {
176
+ "person": {"$ref": "#/$defs/Person"}
177
+ }
178
+ }
179
+ defs = {
180
+ "Person": {
181
+ "type": "object",
182
+ "properties": {
183
+ "name": {"type": "string"}
184
+ }
185
+ }
186
+ }
187
+ result = self.app._resolve_refs(schema, defs)
188
+ self.assertEqual(result["properties"]["person"]["type"], "object")
189
+ self.assertIn("name", result["properties"]["person"]["properties"])
190
+
191
+ def test_resolve_nested_refs(self):
192
+ """Test resolving nested $refs."""
193
+ schema = {
194
+ "type": "object",
195
+ "properties": {
196
+ "data": {"$ref": "#/$defs/Data"}
197
+ }
198
+ }
199
+ defs = {
200
+ "Data": {
201
+ "type": "object",
202
+ "properties": {
203
+ "person": {"$ref": "#/$defs/Person"}
204
+ }
205
+ },
206
+ "Person": {
207
+ "type": "object",
208
+ "properties": {
209
+ "name": {"type": "string"}
210
+ }
211
+ }
212
+ }
213
+ result = self.app._resolve_refs(schema, defs)
214
+ self.assertEqual(result["properties"]["data"]["type"], "object")
215
+
216
+ def test_resolve_refs_in_array(self):
217
+ """Test resolving $refs in array items."""
218
+ schema = {
219
+ "type": "array",
220
+ "items": {"$ref": "#/$defs/Person"}
221
+ }
222
+ defs = {
223
+ "Person": {
224
+ "type": "object",
225
+ "properties": {
226
+ "name": {"type": "string"}
227
+ }
228
+ }
229
+ }
230
+ result = self.app._resolve_refs(schema, defs)
231
+ # Result should be a dict with type: array and resolved items
232
+ self.assertIsInstance(result, dict)
233
+ self.assertEqual(result["type"], "array")
234
+ self.assertIn("items", result)
235
+
236
+ def test_resolve_refs_max_depth(self):
237
+ """Test that resolution stops at max depth."""
238
+ schema = {"$ref": "#/$defs/A"}
239
+ defs = {
240
+ "A": {"$ref": "#/$defs/B"},
241
+ "B": {"$ref": "#/$defs/C"},
242
+ "C": {"$ref": "#/$defs/D"},
243
+ "D": {"type": "string"}
244
+ }
245
+ result = self.app._resolve_refs(schema, defs, depth=0)
246
+ # Should resolve until depth limit
247
+ self.assertIsNotNone(result)
248
+
249
+ def test_resolve_refs_with_circular_ref(self):
250
+ """Test resolving refs with circular references."""
251
+ schema = {"$ref": "#/$defs/TreeNode"}
252
+ defs = {
253
+ "TreeNode": {
254
+ "type": "object",
255
+ "properties": {
256
+ "value": {"type": "string"},
257
+ "children": {
258
+ "type": "array",
259
+ "items": {"$ref": "#/$defs/TreeNode"}
260
+ }
261
+ }
262
+ }
263
+ }
264
+ # Should not throw error, but may preserve $ref
265
+ result = self.app._resolve_refs(schema, defs)
266
+ self.assertIsNotNone(result)
267
+
268
+ def test_resolve_refs_no_defs(self):
269
+ """Test schema with no $defs."""
270
+ schema = {
271
+ "type": "object",
272
+ "properties": {
273
+ "name": {"type": "string"}
274
+ }
275
+ }
276
+ result = self.app._resolve_refs(schema, {})
277
+ self.assertEqual(result, schema)
278
+
279
+ def test_resolve_refs_with_none(self):
280
+ """Test resolving None."""
281
+ result = self.app._resolve_refs(None, {})
282
+ self.assertIsNone(result)
283
+
284
+ def test_resolve_refs_skips_defs_key(self):
285
+ """Test that $defs key is skipped during resolution."""
286
+ schema = {
287
+ "type": "object",
288
+ "properties": {
289
+ "person": {"$ref": "#/$defs/Person"}
290
+ },
291
+ "$defs": {
292
+ "Person": {"type": "object"}
293
+ }
294
+ }
295
+ defs = schema["$defs"]
296
+ result = self.app._resolve_refs(schema, defs)
297
+ # $defs should not be in resolved result
298
+ self.assertNotIn("$defs", result)
299
+
300
+
301
+ class TestV1NormalizeSchemaForOpenAI(unittest.TestCase):
302
+ """Tests for _normalize_schema_for_openai method in V1FirecrawlApp."""
303
+
304
+ def setUp(self):
305
+ """Set up test client."""
306
+ self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
307
+
308
+ def test_normalize_removes_additional_properties(self):
309
+ """Test that additionalProperties: true is removed when properties exist."""
310
+ schema = {
311
+ "type": "object",
312
+ "properties": {
313
+ "name": {"type": "string"}
314
+ },
315
+ "additionalProperties": True
316
+ }
317
+ result = self.app._normalize_schema_for_openai(schema)
318
+ self.assertNotIn("additionalProperties", result)
319
+
320
+ def test_normalize_preserves_additional_properties_false(self):
321
+ """Test that additionalProperties: false is preserved."""
322
+ schema = {
323
+ "type": "object",
324
+ "properties": {
325
+ "name": {"type": "string"}
326
+ },
327
+ "additionalProperties": False
328
+ }
329
+ result = self.app._normalize_schema_for_openai(schema)
330
+ self.assertIn("additionalProperties", result)
331
+ self.assertFalse(result["additionalProperties"])
332
+
333
+ def test_normalize_removes_invalid_required(self):
334
+ """Test that required fields not in properties are removed."""
335
+ schema = {
336
+ "type": "object",
337
+ "properties": {
338
+ "name": {"type": "string"}
339
+ },
340
+ "required": ["name", "age", "email"]
341
+ }
342
+ result = self.app._normalize_schema_for_openai(schema)
343
+ self.assertEqual(result["required"], ["name"])
344
+
345
+ def test_normalize_removes_empty_required(self):
346
+ """Test that empty required array is removed."""
347
+ schema = {
348
+ "type": "object",
349
+ "properties": {
350
+ "name": {"type": "string"}
351
+ },
352
+ "required": ["age"]
353
+ }
354
+ result = self.app._normalize_schema_for_openai(schema)
355
+ self.assertNotIn("required", result)
356
+
357
+ def test_normalize_preserves_ref(self):
358
+ """Test that $ref is preserved."""
359
+ schema = {
360
+ "$ref": "#/$defs/Person"
361
+ }
362
+ result = self.app._normalize_schema_for_openai(schema)
363
+ self.assertEqual(result, schema)
364
+
365
+ def test_normalize_handles_defs(self):
366
+ """Test normalization with $defs."""
367
+ schema = {
368
+ "type": "object",
369
+ "properties": {
370
+ "person": {"$ref": "#/$defs/Person"}
371
+ },
372
+ "$defs": {
373
+ "Person": {
374
+ "type": "object",
375
+ "properties": {
376
+ "name": {"type": "string"}
377
+ },
378
+ "additionalProperties": True
379
+ }
380
+ }
381
+ }
382
+ result = self.app._normalize_schema_for_openai(schema)
383
+ self.assertIn("$defs", result)
384
+ self.assertNotIn("additionalProperties", result["$defs"]["Person"])
385
+
386
+ def test_normalize_nested_objects(self):
387
+ """Test normalization of nested objects."""
388
+ schema = {
389
+ "type": "object",
390
+ "properties": {
391
+ "person": {
392
+ "type": "object",
393
+ "properties": {
394
+ "name": {"type": "string"}
395
+ },
396
+ "additionalProperties": True
397
+ }
398
+ }
399
+ }
400
+ result = self.app._normalize_schema_for_openai(schema)
401
+ self.assertNotIn("additionalProperties", result["properties"]["person"])
402
+
403
+ def test_normalize_arrays_with_objects(self):
404
+ """Test normalization of arrays containing objects."""
405
+ schema = {
406
+ "anyOf": [
407
+ {
408
+ "type": "object",
409
+ "properties": {"name": {"type": "string"}},
410
+ "additionalProperties": True
411
+ },
412
+ {"type": "string"}
413
+ ]
414
+ }
415
+ result = self.app._normalize_schema_for_openai(schema)
416
+ self.assertNotIn("additionalProperties", result["anyOf"][0])
417
+
418
+ def test_normalize_with_none(self):
419
+ """Test normalization with None input."""
420
+ result = self.app._normalize_schema_for_openai(None)
421
+ self.assertIsNone(result)
422
+
423
+ def test_normalize_with_non_dict(self):
424
+ """Test normalization with non-dict input."""
425
+ result = self.app._normalize_schema_for_openai("string")
426
+ self.assertEqual(result, "string")
427
+
428
+
429
+ class TestV1ValidateSchemaForOpenAI(unittest.TestCase):
430
+ """Tests for _validate_schema_for_openai method in V1FirecrawlApp."""
431
+
432
+ def setUp(self):
433
+ """Set up test client."""
434
+ self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
435
+
436
+ def test_valid_schema_with_properties(self):
437
+ """Test valid schema with properties defined."""
438
+ schema = {
439
+ "type": "object",
440
+ "properties": {
441
+ "name": {"type": "string"}
442
+ }
443
+ }
444
+ result = self.app._validate_schema_for_openai(schema)
445
+ self.assertTrue(result)
446
+
447
+ def test_valid_schema_with_ref(self):
448
+ """Test valid schema with $ref."""
449
+ schema = {
450
+ "type": "object",
451
+ "properties": {
452
+ "person": {"$ref": "#/$defs/Person"}
453
+ }
454
+ }
455
+ result = self.app._validate_schema_for_openai(schema)
456
+ self.assertTrue(result)
457
+
458
+ def test_invalid_schema_no_properties_with_additional_properties(self):
459
+ """Test invalid schema: object with no properties but additionalProperties: true."""
460
+ schema = {
461
+ "type": "object",
462
+ "additionalProperties": True
463
+ }
464
+ result = self.app._validate_schema_for_openai(schema)
465
+ self.assertFalse(result)
466
+
467
+ def test_valid_schema_with_pattern_properties(self):
468
+ """Test valid schema with patternProperties."""
469
+ schema = {
470
+ "type": "object",
471
+ "patternProperties": {
472
+ "^[a-z]+$": {"type": "string"}
473
+ },
474
+ "additionalProperties": True
475
+ }
476
+ result = self.app._validate_schema_for_openai(schema)
477
+ self.assertTrue(result)
478
+
479
+ def test_invalid_nested_schema(self):
480
+ """Test invalid nested schema."""
481
+ schema = {
482
+ "type": "object",
483
+ "properties": {
484
+ "data": {
485
+ "type": "object",
486
+ "additionalProperties": True
487
+ }
488
+ }
489
+ }
490
+ result = self.app._validate_schema_for_openai(schema)
491
+ self.assertFalse(result)
492
+
493
+ def test_invalid_schema_in_array(self):
494
+ """Test invalid schema within array (anyOf/oneOf)."""
495
+ schema = {
496
+ "anyOf": [
497
+ {
498
+ "type": "object",
499
+ "additionalProperties": True
500
+ },
501
+ {"type": "string"}
502
+ ]
503
+ }
504
+ result = self.app._validate_schema_for_openai(schema)
505
+ self.assertFalse(result)
506
+
507
+ def test_valid_with_none(self):
508
+ """Test validation with None input."""
509
+ result = self.app._validate_schema_for_openai(None)
510
+ self.assertTrue(result)
511
+
512
+ def test_valid_with_non_dict(self):
513
+ """Test validation with non-dict input."""
514
+ result = self.app._validate_schema_for_openai("string")
515
+ self.assertTrue(result)
516
+
517
+
518
+ class TestV1DetectRecursiveSchema(unittest.TestCase):
519
+ """Tests for _detect_recursive_schema method in V1FirecrawlApp."""
520
+
521
+ def setUp(self):
522
+ """Set up test client."""
523
+ self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
524
+
525
+ def test_detect_ref(self):
526
+ """Test detection of $ref."""
527
+ schema = {
528
+ "type": "object",
529
+ "properties": {
530
+ "person": {"$ref": "#/$defs/Person"}
531
+ }
532
+ }
533
+ result = self.app._detect_recursive_schema(schema)
534
+ self.assertTrue(result)
535
+
536
+ def test_detect_defs(self):
537
+ """Test detection of $defs."""
538
+ schema = {
539
+ "type": "object",
540
+ "properties": {
541
+ "name": {"type": "string"}
542
+ },
543
+ "$defs": {
544
+ "Person": {
545
+ "type": "object",
546
+ "properties": {"name": {"type": "string"}}
547
+ }
548
+ }
549
+ }
550
+ result = self.app._detect_recursive_schema(schema)
551
+ self.assertTrue(result)
552
+
553
+ def test_detect_definitions(self):
554
+ """Test detection of definitions (alternative to $defs)."""
555
+ schema = {
556
+ "type": "object",
557
+ "definitions": {
558
+ "Person": {
559
+ "type": "object",
560
+ "properties": {"name": {"type": "string"}}
561
+ }
562
+ }
563
+ }
564
+ result = self.app._detect_recursive_schema(schema)
565
+ self.assertTrue(result)
566
+
567
+ def test_no_recursion(self):
568
+ """Test schema with no recursive patterns."""
569
+ schema = {
570
+ "type": "object",
571
+ "properties": {
572
+ "name": {"type": "string"}
573
+ }
574
+ }
575
+ result = self.app._detect_recursive_schema(schema)
576
+ self.assertFalse(result)
577
+
578
+ def test_with_none(self):
579
+ """Test with None input."""
580
+ result = self.app._detect_recursive_schema(None)
581
+ self.assertFalse(result)
582
+
583
+ def test_with_non_dict(self):
584
+ """Test with non-dict input."""
585
+ result = self.app._detect_recursive_schema("string")
586
+ self.assertFalse(result)
587
+
588
+
589
+ class TestV1SelectModelForSchema(unittest.TestCase):
590
+ """Tests for _select_model_for_schema method in V1FirecrawlApp."""
591
+
592
+ def setUp(self):
593
+ """Set up test client."""
594
+ self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
595
+
596
+ def test_no_schema(self):
597
+ """Test model selection with no schema."""
598
+ result = self.app._select_model_for_schema(None)
599
+ self.assertEqual(result["modelName"], "gpt-4o-mini")
600
+ self.assertEqual(result["reason"], "no_schema")
601
+
602
+ def test_simple_schema(self):
603
+ """Test model selection with simple schema."""
604
+ schema = {
605
+ "type": "object",
606
+ "properties": {
607
+ "name": {"type": "string"}
608
+ }
609
+ }
610
+ result = self.app._select_model_for_schema(schema)
611
+ self.assertEqual(result["modelName"], "gpt-4o-mini")
612
+ self.assertEqual(result["reason"], "simple_schema")
613
+
614
+ def test_recursive_schema(self):
615
+ """Test model selection with recursive schema."""
616
+ schema = {
617
+ "type": "object",
618
+ "properties": {
619
+ "person": {"$ref": "#/$defs/Person"}
620
+ },
621
+ "$defs": {
622
+ "Person": {
623
+ "type": "object",
624
+ "properties": {"name": {"type": "string"}}
625
+ }
626
+ }
627
+ }
628
+ result = self.app._select_model_for_schema(schema)
629
+ self.assertEqual(result["modelName"], "gpt-4o")
630
+ self.assertEqual(result["reason"], "recursive_schema_detected")
631
+
632
+
633
+ class TestV1ProcessSchemaWithValidation(unittest.TestCase):
634
+ """Tests for _process_schema_with_validation method in V1FirecrawlApp."""
635
+
636
+ def setUp(self):
637
+ """Set up test client."""
638
+ self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
639
+
640
+ def test_process_valid_schema(self):
641
+ """Test processing a valid schema."""
642
+ schema_container = {
643
+ "schema": {
644
+ "type": "object",
645
+ "properties": {
646
+ "name": {"type": "string"}
647
+ }
648
+ }
649
+ }
650
+ result = self.app._process_schema_with_validation(schema_container)
651
+ self.assertIn("schema", result)
652
+
653
+ def test_process_invalid_schema(self):
654
+ """Test processing an invalid schema raises ValueError."""
655
+ schema_container = {
656
+ "schema": {
657
+ "type": "object",
658
+ "additionalProperties": True
659
+ }
660
+ }
661
+ with self.assertRaises(ValueError) as context:
662
+ self.app._process_schema_with_validation(schema_container)
663
+ self.assertIn("invalid structure for OpenAI", str(context.exception))
664
+
665
+ def test_process_recursive_schema(self):
666
+ """Test processing a recursive schema."""
667
+ schema_container = {
668
+ "schema": {
669
+ "type": "object",
670
+ "properties": {
671
+ "children": {
672
+ "type": "array",
673
+ "items": {"$ref": "#/$defs/TreeNode"}
674
+ }
675
+ },
676
+ "$defs": {
677
+ "TreeNode": {
678
+ "type": "object",
679
+ "properties": {
680
+ "value": {"type": "string"},
681
+ "children": {
682
+ "type": "array",
683
+ "items": {"$ref": "#/$defs/TreeNode"}
684
+ }
685
+ }
686
+ }
687
+ }
688
+ }
689
+ }
690
+ result = self.app._process_schema_with_validation(schema_container)
691
+ self.assertIn("schema", result)
692
+
693
+ def test_process_resolves_non_recursive_refs(self):
694
+ """Test that non-recursive refs are resolved."""
695
+ schema_container = {
696
+ "schema": {
697
+ "type": "object",
698
+ "properties": {
699
+ "person": {"$ref": "#/$defs/Person"}
700
+ },
701
+ "$defs": {
702
+ "Person": {
703
+ "type": "object",
704
+ "properties": {
705
+ "name": {"type": "string"}
706
+ }
707
+ }
708
+ }
709
+ }
710
+ }
711
+ result = self.app._process_schema_with_validation(schema_container)
712
+ self.assertIn("schema", result)
713
+
714
+ def test_process_no_schema_key(self):
715
+ """Test processing container without schema key."""
716
+ schema_container = {
717
+ "other_key": "value"
718
+ }
719
+ result = self.app._process_schema_with_validation(schema_container)
720
+ self.assertEqual(result, schema_container)
721
+
722
+ def test_process_non_dict_container(self):
723
+ """Test processing non-dict container."""
724
+ schema_container = "not a dict"
725
+ result = self.app._process_schema_with_validation(schema_container)
726
+ self.assertEqual(result, schema_container)
727
+
728
+ def test_process_with_custom_schema_key(self):
729
+ """Test processing with custom schema key."""
730
+ schema_container = {
731
+ "custom_schema": {
732
+ "type": "object",
733
+ "properties": {
734
+ "name": {"type": "string"}
735
+ }
736
+ }
737
+ }
738
+ result = self.app._process_schema_with_validation(
739
+ schema_container,
740
+ schema_key="custom_schema"
741
+ )
742
+ self.assertIn("custom_schema", result)
743
+
744
+
745
+ class TestV1EdgeCases(unittest.TestCase):
746
+ """Tests for edge cases and error handling in V1FirecrawlApp."""
747
+
748
+ def setUp(self):
749
+ """Set up test client."""
750
+ self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
751
+
752
+ def test_deeply_nested_schema(self):
753
+ """Test handling of deeply nested schemas."""
754
+ schema = {
755
+ "type": "object",
756
+ "properties": {
757
+ "level1": {
758
+ "type": "object",
759
+ "properties": {
760
+ "level2": {
761
+ "type": "object",
762
+ "properties": {
763
+ "level3": {
764
+ "type": "object",
765
+ "properties": {
766
+ "name": {"type": "string"}
767
+ },
768
+ "additionalProperties": True
769
+ }
770
+ }
771
+ }
772
+ }
773
+ }
774
+ }
775
+ }
776
+ result = self.app._normalize_schema_for_openai(schema)
777
+ # Should normalize deeply nested schema
778
+ self.assertIsNotNone(result)
779
+
780
+ def test_circular_reference_doesnt_hang(self):
781
+ """Test that circular references don't cause infinite loops."""
782
+ schema = {
783
+ "$ref": "#/$defs/TreeNode",
784
+ "$defs": {
785
+ "TreeNode": {
786
+ "type": "object",
787
+ "properties": {
788
+ "left": {"$ref": "#/$defs/TreeNode"},
789
+ "right": {"$ref": "#/$defs/TreeNode"}
790
+ }
791
+ }
792
+ }
793
+ }
794
+ # Should complete without hanging
795
+ result = self.app._normalize_schema_for_openai(schema)
796
+ self.assertIsNotNone(result)
797
+
798
+ result2 = self.app._validate_schema_for_openai(schema)
799
+ self.assertIsNotNone(result2)
800
+
801
+ def test_empty_schema(self):
802
+ """Test handling of empty schema."""
803
+ schema = {}
804
+ result = self.app._normalize_schema_for_openai(schema)
805
+ self.assertEqual(result, {})
806
+
807
+ is_valid = self.app._validate_schema_for_openai(schema)
808
+ self.assertTrue(is_valid)
809
+
810
+ def test_schema_with_complex_anyOf(self):
811
+ """Test schema with complex anyOf structures."""
812
+ schema = {
813
+ "anyOf": [
814
+ {
815
+ "type": "object",
816
+ "properties": {"name": {"type": "string"}},
817
+ "additionalProperties": True
818
+ },
819
+ {
820
+ "type": "object",
821
+ "properties": {"id": {"type": "number"}}
822
+ }
823
+ ]
824
+ }
825
+ result = self.app._normalize_schema_for_openai(schema)
826
+ self.assertNotIn("additionalProperties", result["anyOf"][0])
827
+
828
+ def test_openai_schema_error_message_constant(self):
829
+ """Test that error message constant exists."""
830
+ self.assertIsNotNone(V1FirecrawlApp.OPENAI_SCHEMA_ERROR_MESSAGE)
831
+ self.assertIn("invalid structure for OpenAI", V1FirecrawlApp.OPENAI_SCHEMA_ERROR_MESSAGE)
832
+
833
+
834
+ class TestV1RecursionStressTests(unittest.TestCase):
835
+ """Stress tests to ensure recursion handling doesn't break under extreme conditions."""
836
+
837
+ def setUp(self):
838
+ """Set up test client."""
839
+ self.app = V1FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'test-key'))
840
+
841
+ def test_very_deep_reference_chain(self):
842
+ """Test handling of very deep reference chains (testing depth limits)."""
843
+ # Create a chain of 20 references
844
+ defs = {}
845
+ for i in range(20):
846
+ if i == 19:
847
+ defs[f"Level{i}"] = {"type": "string"}
848
+ else:
849
+ defs[f"Level{i}"] = {"$ref": f"#/$defs/Level{i+1}"}
850
+
851
+ schema = {
852
+ "$ref": "#/$defs/Level0",
853
+ "$defs": defs
854
+ }
855
+
856
+ # Should handle without crashing (may not fully resolve due to depth limits)
857
+ result = self.app._resolve_refs(schema, defs)
858
+ self.assertIsNotNone(result)
859
+
860
+ # Normalization should also complete
861
+ normalized = self.app._normalize_schema_for_openai(schema)
862
+ self.assertIsNotNone(normalized)
863
+
864
+ def test_multiple_circular_paths(self):
865
+ """Test schema with multiple different circular reference paths."""
866
+ defs = {
867
+ "Node": {
868
+ "type": "object",
869
+ "properties": {
870
+ "parent": {"$ref": "#/$defs/Node"},
871
+ "child": {"$ref": "#/$defs/Node"},
872
+ "sibling": {"$ref": "#/$defs/Node"},
873
+ "related": {"$ref": "#/$defs/RelatedNode"}
874
+ }
875
+ },
876
+ "RelatedNode": {
877
+ "type": "object",
878
+ "properties": {
879
+ "backref": {"$ref": "#/$defs/Node"},
880
+ "self": {"$ref": "#/$defs/RelatedNode"}
881
+ }
882
+ }
883
+ }
884
+
885
+ # Should detect circular references
886
+ has_circular = self.app._check_for_circular_defs(defs)
887
+ self.assertTrue(has_circular)
888
+
889
+ # Should handle normalization without hanging
890
+ schema = {"$ref": "#/$defs/Node", "$defs": defs}
891
+ result = self.app._normalize_schema_for_openai(schema)
892
+ self.assertIsNotNone(result)
893
+
894
+ def test_recursive_in_oneOf_allOf(self):
895
+ """Test recursive references within oneOf and allOf contexts."""
896
+ schema = {
897
+ "oneOf": [
898
+ {"$ref": "#/$defs/TypeA"},
899
+ {"$ref": "#/$defs/TypeB"}
900
+ ],
901
+ "$defs": {
902
+ "TypeA": {
903
+ "type": "object",
904
+ "properties": {
905
+ "nested": {"$ref": "#/$defs/TypeA"}
906
+ }
907
+ },
908
+ "TypeB": {
909
+ "type": "object",
910
+ "allOf": [
911
+ {"$ref": "#/$defs/TypeA"},
912
+ {"properties": {"extra": {"type": "string"}}}
913
+ ]
914
+ }
915
+ }
916
+ }
917
+
918
+ # Should handle complex recursive patterns
919
+ result = self.app._normalize_schema_for_openai(schema)
920
+ self.assertIsNotNone(result)
921
+ self.assertIn("oneOf", result)
922
+
923
+ # Should validate without errors
924
+ is_valid = self.app._validate_schema_for_openai(schema)
925
+ self.assertTrue(is_valid)
926
+
927
+ def test_invalid_reference_doesnt_crash(self):
928
+ """Test that invalid/broken references don't crash the system."""
929
+ schema = {
930
+ "type": "object",
931
+ "properties": {
932
+ "broken": {"$ref": "#/$defs/NonExistent"}
933
+ },
934
+ "$defs": {
935
+ "Existing": {"type": "string"}
936
+ }
937
+ }
938
+
939
+ # Should handle gracefully without crashing
940
+ result = self.app._resolve_refs(schema, schema.get("$defs", {}))
941
+ self.assertIsNotNone(result)
942
+
943
+ normalized = self.app._normalize_schema_for_openai(schema)
944
+ self.assertIsNotNone(normalized)
945
+
946
+ def test_malformed_reference_format(self):
947
+ """Test handling of malformed $ref formats."""
948
+ schema = {
949
+ "type": "object",
950
+ "properties": {
951
+ "bad1": {"$ref": "not-a-valid-ref"},
952
+ "bad2": {"$ref": "#/wrong/path"},
953
+ "bad3": {"$ref": 12345} # Not even a string
954
+ }
955
+ }
956
+
957
+ # Should handle without crashing
958
+ result = self.app._normalize_schema_for_openai(schema)
959
+ self.assertIsNotNone(result)
960
+
961
+ def test_linked_list_pattern(self):
962
+ """Test real-world pattern: linked list with recursive next pointer."""
963
+ schema = {
964
+ "type": "object",
965
+ "properties": {
966
+ "value": {"type": "string"},
967
+ "next": {
968
+ "oneOf": [
969
+ {"$ref": "#/$defs/Node"},
970
+ {"type": "null"}
971
+ ]
972
+ }
973
+ },
974
+ "$defs": {
975
+ "Node": {
976
+ "type": "object",
977
+ "properties": {
978
+ "value": {"type": "string"},
979
+ "next": {
980
+ "oneOf": [
981
+ {"$ref": "#/$defs/Node"},
982
+ {"type": "null"}
983
+ ]
984
+ }
985
+ }
986
+ }
987
+ }
988
+ }
989
+
990
+ # Should detect recursion
991
+ is_recursive = self.app._detect_recursive_schema(schema)
992
+ self.assertTrue(is_recursive)
993
+
994
+ # Should select appropriate model
995
+ model_info = self.app._select_model_for_schema(schema)
996
+ self.assertEqual(model_info["modelName"], "gpt-4o")
997
+
998
+ # Should handle normalization
999
+ result = self.app._normalize_schema_for_openai(schema)
1000
+ self.assertIsNotNone(result)
1001
+
1002
+ def test_graph_pattern_with_multiple_node_types(self):
1003
+ """Test complex graph pattern with multiple interconnected node types."""
1004
+ schema = {
1005
+ "type": "object",
1006
+ "properties": {
1007
+ "nodes": {
1008
+ "type": "array",
1009
+ "items": {"$ref": "#/$defs/GraphNode"}
1010
+ },
1011
+ "edges": {
1012
+ "type": "array",
1013
+ "items": {"$ref": "#/$defs/Edge"}
1014
+ }
1015
+ },
1016
+ "$defs": {
1017
+ "GraphNode": {
1018
+ "type": "object",
1019
+ "properties": {
1020
+ "id": {"type": "string"},
1021
+ "neighbors": {
1022
+ "type": "array",
1023
+ "items": {"$ref": "#/$defs/GraphNode"}
1024
+ },
1025
+ "edges": {
1026
+ "type": "array",
1027
+ "items": {"$ref": "#/$defs/Edge"}
1028
+ }
1029
+ }
1030
+ },
1031
+ "Edge": {
1032
+ "type": "object",
1033
+ "properties": {
1034
+ "from": {"$ref": "#/$defs/GraphNode"},
1035
+ "to": {"$ref": "#/$defs/GraphNode"}
1036
+ }
1037
+ }
1038
+ }
1039
+ }
1040
+
1041
+ # Should detect circular references
1042
+ has_circular = self.app._check_for_circular_defs(schema.get("$defs", {}))
1043
+ self.assertTrue(has_circular)
1044
+
1045
+ # Should handle without hanging
1046
+ result = self.app._normalize_schema_for_openai(schema)
1047
+ self.assertIsNotNone(result)
1048
+
1049
+ # Should validate
1050
+ is_valid = self.app._validate_schema_for_openai(schema)
1051
+ self.assertTrue(is_valid)
1052
+
1053
+ def test_mutual_recursion_three_way(self):
1054
+ """Test three-way mutual recursion (A->B->C->A)."""
1055
+ defs = {
1056
+ "TypeA": {
1057
+ "type": "object",
1058
+ "properties": {
1059
+ "toB": {"$ref": "#/$defs/TypeB"}
1060
+ }
1061
+ },
1062
+ "TypeB": {
1063
+ "type": "object",
1064
+ "properties": {
1065
+ "toC": {"$ref": "#/$defs/TypeC"}
1066
+ }
1067
+ },
1068
+ "TypeC": {
1069
+ "type": "object",
1070
+ "properties": {
1071
+ "toA": {"$ref": "#/$defs/TypeA"}
1072
+ }
1073
+ }
1074
+ }
1075
+
1076
+ # Should detect circular references
1077
+ has_circular = self.app._check_for_circular_defs(defs)
1078
+ self.assertTrue(has_circular)
1079
+
1080
+ # Should handle without hanging
1081
+ schema = {"$ref": "#/$defs/TypeA", "$defs": defs}
1082
+ result = self.app._normalize_schema_for_openai(schema)
1083
+ self.assertIsNotNone(result)
1084
+
1085
+ def test_required_fields_cleanup_without_defs(self):
1086
+ """Test that required field cleanup works for schemas without $defs."""
1087
+ schema = {
1088
+ "type": "object",
1089
+ "properties": {
1090
+ "name": {"type": "string"},
1091
+ "age": {"type": "number"}
1092
+ },
1093
+ "required": ["name", "age", "nonexistent_field"],
1094
+ "additionalProperties": True
1095
+ }
1096
+
1097
+ result = self.app._normalize_schema_for_openai(schema)
1098
+
1099
+ # Should clean up required when properties are fully defined
1100
+ self.assertIn("required", result)
1101
+ self.assertEqual(result["required"], ["name", "age"])
1102
+ # additionalProperties: true should be removed
1103
+ self.assertNotIn("additionalProperties", result)
1104
+
1105
+ def test_required_fields_in_nested_defs(self):
1106
+ """Test that required field cleanup works in nested $defs definitions."""
1107
+ schema = {
1108
+ "type": "object",
1109
+ "properties": {
1110
+ "data": {"$ref": "#/$defs/Node"}
1111
+ },
1112
+ "$defs": {
1113
+ "Node": {
1114
+ "type": "object",
1115
+ "properties": {
1116
+ "value": {"type": "string"},
1117
+ "nested": {"$ref": "#/$defs/Node"}
1118
+ },
1119
+ "required": ["value", "another_nonexistent"],
1120
+ "additionalProperties": True
1121
+ }
1122
+ }
1123
+ }
1124
+
1125
+ result = self.app._normalize_schema_for_openai(schema)
1126
+
1127
+ # $defs definitions should be cleaned
1128
+ self.assertIn("$defs", result)
1129
+ self.assertIn("Node", result["$defs"])
1130
+ node_def = result["$defs"]["Node"]
1131
+
1132
+ # Required should be cleaned in the definition
1133
+ self.assertIn("required", node_def)
1134
+ self.assertEqual(node_def["required"], ["value"])
1135
+
1136
+ # additionalProperties: true should be removed
1137
+ self.assertNotIn("additionalProperties", node_def)
1138
+
1139
+ def test_required_fields_cleanup_limitation_with_defs(self):
1140
+ """
1141
+ Test documenting current limitation: required field cleanup doesn't happen
1142
+ at root level when $defs is present due to early return in normalization.
1143
+
1144
+ This test documents the current behavior - if this is considered a bug,
1145
+ the implementation should be fixed to cleanup required fields at root level
1146
+ even when $defs is present.
1147
+ """
1148
+ schema = {
1149
+ "type": "object",
1150
+ "properties": {
1151
+ "name": {"type": "string"},
1152
+ "child": {"$ref": "#/$defs/Node"}
1153
+ },
1154
+ "required": ["name", "child", "nonexistent_field"], # Has invalid field
1155
+ "$defs": {
1156
+ "Node": {
1157
+ "type": "object",
1158
+ "properties": {
1159
+ "value": {"type": "string"}
1160
+ }
1161
+ }
1162
+ }
1163
+ }
1164
+
1165
+ result = self.app._normalize_schema_for_openai(schema)
1166
+
1167
+ # Current behavior: required is NOT cleaned at root when $defs present
1168
+ # This is because the code returns early when processing $defs
1169
+ self.assertIn("required", result)
1170
+ # Documents current behavior - includes invalid field
1171
+ self.assertIn("nonexistent_field", result["required"])
1172
+
1173
+ # Note: If this behavior should change, update both the code and this test
1174
+
1175
+ def test_same_object_referenced_multiple_times(self):
1176
+ """Test that the same object referenced multiple times is handled correctly."""
1177
+ person_def = {
1178
+ "type": "object",
1179
+ "properties": {
1180
+ "name": {"type": "string"}
1181
+ }
1182
+ }
1183
+
1184
+ schema = {
1185
+ "type": "object",
1186
+ "properties": {
1187
+ "person1": {"$ref": "#/$defs/Person"},
1188
+ "person2": {"$ref": "#/$defs/Person"},
1189
+ "person3": {"$ref": "#/$defs/Person"}
1190
+ },
1191
+ "$defs": {
1192
+ "Person": person_def
1193
+ }
1194
+ }
1195
+
1196
+ # Should handle without issues
1197
+ result = self.app._resolve_refs(schema, schema.get("$defs", {}))
1198
+ self.assertIsNotNone(result)
1199
+
1200
+ # All three should be resolved
1201
+ if "properties" in result:
1202
+ self.assertIn("person1", result["properties"])
1203
+ self.assertIn("person2", result["properties"])
1204
+ self.assertIn("person3", result["properties"])
1205
+
1206
+
1207
+ if __name__ == '__main__':
1208
+ unittest.main()
1209
+