firecrawl 2.4.3__py3-none-any.whl → 2.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

Files changed (29) hide show
  1. build/lib/build/lib/build/lib/firecrawl/__init__.py +79 -0
  2. build/lib/build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  3. build/lib/build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/test.py +170 -0
  4. build/lib/build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  5. build/lib/build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py +440 -0
  6. build/lib/build/lib/build/lib/firecrawl/firecrawl.py +4439 -0
  7. build/lib/build/lib/build/lib/tests/test_change_tracking.py +98 -0
  8. build/lib/build/lib/firecrawl/__init__.py +79 -0
  9. build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  10. build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/test.py +170 -0
  11. build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  12. build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py +440 -0
  13. build/lib/build/lib/firecrawl/firecrawl.py +4439 -0
  14. build/lib/build/lib/tests/test_change_tracking.py +98 -0
  15. build/lib/firecrawl/__init__.py +79 -0
  16. build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
  17. build/lib/firecrawl/__tests__/e2e_withAuth/test.py +170 -0
  18. build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
  19. build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py +440 -0
  20. build/lib/firecrawl/firecrawl.py +4439 -0
  21. build/lib/tests/test_change_tracking.py +98 -0
  22. firecrawl/__init__.py +2 -2
  23. firecrawl/firecrawl.py +132 -65
  24. {firecrawl-2.4.3.dist-info → firecrawl-2.5.1.dist-info}/METADATA +1 -1
  25. firecrawl-2.5.1.dist-info/RECORD +33 -0
  26. {firecrawl-2.4.3.dist-info → firecrawl-2.5.1.dist-info}/top_level.txt +2 -0
  27. firecrawl-2.4.3.dist-info/RECORD +0 -12
  28. {firecrawl-2.4.3.dist-info → firecrawl-2.5.1.dist-info}/LICENSE +0 -0
  29. {firecrawl-2.4.3.dist-info → firecrawl-2.5.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,98 @@
1
+ import unittest
2
+ from unittest.mock import patch, MagicMock
3
+ import json
4
+ import os
5
+ from firecrawl import FirecrawlApp
6
+
7
+ class TestChangeTracking(unittest.TestCase):
8
+ @patch('requests.post')
9
+ def test_change_tracking_format(self, mock_post):
10
+ mock_response = MagicMock()
11
+ mock_response.status_code = 200
12
+ mock_response.json.return_value = {
13
+ 'success': True,
14
+ 'data': {
15
+ 'markdown': 'Test markdown content',
16
+ 'changeTracking': {
17
+ 'previousScrapeAt': '2023-01-01T00:00:00Z',
18
+ 'changeStatus': 'changed',
19
+ 'visibility': 'visible'
20
+ }
21
+ }
22
+ }
23
+ mock_post.return_value = mock_response
24
+
25
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
26
+ result = app.scrape_url('https://example.com', {
27
+ 'formats': ['markdown', 'changeTracking']
28
+ })
29
+
30
+ args, kwargs = mock_post.call_args
31
+ self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
32
+
33
+ self.assertEqual(result['changeTracking']['previousScrapeAt'], '2023-01-01T00:00:00Z')
34
+ self.assertEqual(result['changeTracking']['changeStatus'], 'changed')
35
+ self.assertEqual(result['changeTracking']['visibility'], 'visible')
36
+
37
+ @patch('requests.post')
38
+ def test_change_tracking_options(self, mock_post):
39
+ mock_response = MagicMock()
40
+ mock_response.status_code = 200
41
+ mock_response.json.return_value = {
42
+ 'success': True,
43
+ 'data': {
44
+ 'markdown': 'Test markdown content',
45
+ 'changeTracking': {
46
+ 'previousScrapeAt': '2023-01-01T00:00:00Z',
47
+ 'changeStatus': 'changed',
48
+ 'visibility': 'visible',
49
+ 'diff': {
50
+ 'text': '@@ -1,1 +1,1 @@\n-old content\n+new content',
51
+ 'json': {
52
+ 'files': [{
53
+ 'from': None,
54
+ 'to': None,
55
+ 'chunks': [{
56
+ 'content': '@@ -1,1 +1,1 @@',
57
+ 'changes': [{
58
+ 'type': 'del',
59
+ 'content': '-old content',
60
+ 'del': True,
61
+ 'ln': 1
62
+ }, {
63
+ 'type': 'add',
64
+ 'content': '+new content',
65
+ 'add': True,
66
+ 'ln': 1
67
+ }]
68
+ }]
69
+ }]
70
+ }
71
+ },
72
+ 'json': {
73
+ 'title': {
74
+ 'previous': 'Old Title',
75
+ 'current': 'New Title'
76
+ }
77
+ }
78
+ }
79
+ }
80
+ }
81
+ mock_post.return_value = mock_response
82
+
83
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
84
+ result = app.scrape_url('https://example.com', {
85
+ 'formats': ['markdown', 'changeTracking'],
86
+ 'changeTrackingOptions': {
87
+ 'modes': ['git-diff', 'json'],
88
+ 'schema': {'type': 'object', 'properties': {'title': {'type': 'string'}}}
89
+ }
90
+ })
91
+
92
+ args, kwargs = mock_post.call_args
93
+ self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
94
+ self.assertEqual(kwargs['json']['changeTrackingOptions']['modes'], ['git-diff', 'json'])
95
+
96
+ self.assertEqual(result['changeTracking']['diff']['text'], '@@ -1,1 +1,1 @@\n-old content\n+new content')
97
+ self.assertEqual(result['changeTracking']['json']['title']['previous'], 'Old Title')
98
+ self.assertEqual(result['changeTracking']['json']['title']['current'], 'New Title')
firecrawl/__init__.py CHANGED
@@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/
11
11
  import logging
12
12
  import os
13
13
 
14
- from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions # noqa
14
+ from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
15
 
16
- __version__ = "2.4.3"
16
+ __version__ = "2.5.1"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
firecrawl/firecrawl.py CHANGED
@@ -29,7 +29,7 @@ warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDoc
29
29
  warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
30
30
  warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
31
31
  warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
32
-
32
+ warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ChangeTrackingOptions\" shadows an attribute in parent \"BaseModel\"")
33
33
 
34
34
  def get_version():
35
35
  try:
@@ -135,6 +135,12 @@ class WebhookConfig(pydantic.BaseModel):
135
135
  metadata: Optional[Dict[str, str]] = None
136
136
  events: Optional[List[Literal["completed", "failed", "page", "started"]]] = None
137
137
 
138
+ class ChangeTrackingOptions(pydantic.BaseModel):
139
+ """Configuration for change tracking."""
140
+ modes: Optional[List[Literal["git-diff", "json"]]] = None
141
+ schema: Optional[Any] = None
142
+ prompt: Optional[str] = None
143
+
138
144
  class ScrapeOptions(pydantic.BaseModel):
139
145
  """Parameters for scraping operations."""
140
146
  formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None
@@ -150,6 +156,7 @@ class ScrapeOptions(pydantic.BaseModel):
150
156
  removeBase64Images: Optional[bool] = None
151
157
  blockAds: Optional[bool] = None
152
158
  proxy: Optional[Literal["basic", "stealth"]] = None
159
+ changeTrackingOptions: Optional[ChangeTrackingOptions] = None
153
160
 
154
161
  class WaitAction(pydantic.BaseModel):
155
162
  """Wait action to perform during scraping."""
@@ -454,6 +461,7 @@ class FirecrawlApp:
454
461
  extract: Optional[JsonConfig] = None,
455
462
  json_options: Optional[JsonConfig] = None,
456
463
  actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
464
+ change_tracking_options: Optional[ChangeTrackingOptions] = None,
457
465
  **kwargs) -> ScrapeResponse[Any]:
458
466
  """
459
467
  Scrape and extract content from a URL.
@@ -475,6 +483,7 @@ class FirecrawlApp:
475
483
  extract (Optional[JsonConfig]): Content extraction settings
476
484
  json_options (Optional[JsonConfig]): JSON extraction settings
477
485
  actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
486
+ change_tracking_options (Optional[ChangeTrackingOptions]): Change tracking settings
478
487
 
479
488
 
480
489
  Returns:
@@ -520,18 +529,28 @@ class FirecrawlApp:
520
529
  scrape_params['blockAds'] = block_ads
521
530
  if proxy:
522
531
  scrape_params['proxy'] = proxy
523
- if extract:
524
- if hasattr(extract.schema, 'schema'):
525
- extract.schema = extract.schema.schema()
526
- scrape_params['extract'] = extract.dict(exclude_none=True)
527
- if json_options:
528
- if hasattr(json_options.schema, 'schema'):
529
- json_options.schema = json_options.schema.schema()
530
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
532
+ if extract is not None:
533
+ extract = self._ensure_schema_dict(extract)
534
+ if isinstance(extract, dict) and "schema" in extract:
535
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
536
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
537
+ if json_options is not None:
538
+ json_options = self._ensure_schema_dict(json_options)
539
+ if isinstance(json_options, dict) and "schema" in json_options:
540
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
541
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
531
542
  if actions:
532
543
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
544
+ if change_tracking_options:
545
+ scrape_params['changeTrackingOptions'] = change_tracking_options.dict(exclude_none=True)
546
+
533
547
  scrape_params.update(kwargs)
534
548
 
549
+ if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
550
+ scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
551
+ if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
552
+ scrape_params['jsonOptions']['schema'] = self._ensure_schema_dict(scrape_params['jsonOptions']['schema'])
553
+
535
554
  # Make request
536
555
  response = requests.post(
537
556
  f'{self.api_url}/v1/scrape',
@@ -1240,13 +1259,15 @@ class FirecrawlApp:
1240
1259
  if proxy is not None:
1241
1260
  scrape_params['proxy'] = proxy
1242
1261
  if extract is not None:
1243
- if hasattr(extract.schema, 'schema'):
1244
- extract.schema = extract.schema.schema()
1245
- scrape_params['extract'] = extract.dict(exclude_none=True)
1262
+ extract = self._ensure_schema_dict(extract)
1263
+ if isinstance(extract, dict) and "schema" in extract:
1264
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
1265
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
1246
1266
  if json_options is not None:
1247
- if hasattr(json_options.schema, 'schema'):
1248
- json_options.schema = json_options.schema.schema()
1249
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
1267
+ json_options = self._ensure_schema_dict(json_options)
1268
+ if isinstance(json_options, dict) and "schema" in json_options:
1269
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
1270
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
1250
1271
  if actions is not None:
1251
1272
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1252
1273
  if agent is not None:
@@ -1261,6 +1282,11 @@ class FirecrawlApp:
1261
1282
  params_dict['urls'] = urls
1262
1283
  params_dict['origin'] = f"python-sdk@{version}"
1263
1284
 
1285
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
1286
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
1287
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
1288
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
1289
+
1264
1290
  # Make request
1265
1291
  headers = self._prepare_headers(idempotency_key)
1266
1292
  response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
@@ -1366,13 +1392,15 @@ class FirecrawlApp:
1366
1392
  if proxy is not None:
1367
1393
  scrape_params['proxy'] = proxy
1368
1394
  if extract is not None:
1369
- if hasattr(extract.schema, 'schema'):
1370
- extract.schema = extract.schema.schema()
1371
- scrape_params['extract'] = extract.dict(exclude_none=True)
1395
+ extract = self._ensure_schema_dict(extract)
1396
+ if isinstance(extract, dict) and "schema" in extract:
1397
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
1398
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
1372
1399
  if json_options is not None:
1373
- if hasattr(json_options.schema, 'schema'):
1374
- json_options.schema = json_options.schema.schema()
1375
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
1400
+ json_options = self._ensure_schema_dict(json_options)
1401
+ if isinstance(json_options, dict) and "schema" in json_options:
1402
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
1403
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
1376
1404
  if actions is not None:
1377
1405
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1378
1406
  if agent is not None:
@@ -1387,6 +1415,11 @@ class FirecrawlApp:
1387
1415
  params_dict['urls'] = urls
1388
1416
  params_dict['origin'] = f"python-sdk@{version}"
1389
1417
 
1418
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
1419
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
1420
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
1421
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
1422
+
1390
1423
  # Make request
1391
1424
  headers = self._prepare_headers(idempotency_key)
1392
1425
  response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
@@ -1487,13 +1520,15 @@ class FirecrawlApp:
1487
1520
  if proxy is not None:
1488
1521
  scrape_params['proxy'] = proxy
1489
1522
  if extract is not None:
1490
- if hasattr(extract.schema, 'schema'):
1491
- extract.schema = extract.schema.schema()
1492
- scrape_params['extract'] = extract.dict(exclude_none=True)
1523
+ extract = self._ensure_schema_dict(extract)
1524
+ if isinstance(extract, dict) and "schema" in extract:
1525
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
1526
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
1493
1527
  if json_options is not None:
1494
- if hasattr(json_options.schema, 'schema'):
1495
- json_options.schema = json_options.schema.schema()
1496
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
1528
+ json_options = self._ensure_schema_dict(json_options)
1529
+ if isinstance(json_options, dict) and "schema" in json_options:
1530
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
1531
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
1497
1532
  if actions is not None:
1498
1533
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1499
1534
  if agent is not None:
@@ -1508,6 +1543,11 @@ class FirecrawlApp:
1508
1543
  params_dict['urls'] = urls
1509
1544
  params_dict['origin'] = f"python-sdk@{version}"
1510
1545
 
1546
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
1547
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
1548
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
1549
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
1550
+
1511
1551
  # Make request
1512
1552
  headers = self._prepare_headers(idempotency_key)
1513
1553
  response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
@@ -1594,7 +1634,7 @@ class FirecrawlApp:
1594
1634
  id (str): The ID of the crawl job.
1595
1635
 
1596
1636
  Returns:
1597
- CrawlErrorsResponse: A response containing:
1637
+ CrawlErrorsResponse containing:
1598
1638
  * errors (List[Dict[str, str]]): List of errors with fields:
1599
1639
  * id (str): Error ID
1600
1640
  * timestamp (str): When the error occurred
@@ -1657,10 +1697,7 @@ class FirecrawlApp:
1657
1697
  raise ValueError("Either urls or prompt is required")
1658
1698
 
1659
1699
  if schema:
1660
- if hasattr(schema, 'model_json_schema'):
1661
- # Convert Pydantic model to JSON schema
1662
- schema = schema.model_json_schema()
1663
- # Otherwise assume it's already a JSON schema dict
1700
+ schema = self._ensure_schema_dict(schema)
1664
1701
 
1665
1702
  request_data = {
1666
1703
  'urls': urls or [],
@@ -1789,10 +1826,7 @@ class FirecrawlApp:
1789
1826
 
1790
1827
  schema = schema
1791
1828
  if schema:
1792
- if hasattr(schema, 'model_json_schema'):
1793
- # Convert Pydantic model to JSON schema
1794
- schema = schema.model_json_schema()
1795
- # Otherwise assume it's already a JSON schema dict
1829
+ schema = self._ensure_schema_dict(schema)
1796
1830
 
1797
1831
  request_data = {
1798
1832
  'urls': urls,
@@ -2424,7 +2458,7 @@ class FirecrawlApp:
2424
2458
  method_params = {
2425
2459
  "scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
2426
2460
  "timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
2427
- "block_ads", "proxy", "extract", "json_options", "actions"},
2461
+ "block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options"},
2428
2462
  "search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options"},
2429
2463
  "crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
2430
2464
  "allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
@@ -2455,6 +2489,24 @@ class FirecrawlApp:
2455
2489
  # Additional type validation can be added here if needed
2456
2490
  # For now, we rely on Pydantic models for detailed type validation
2457
2491
 
2492
+ def _ensure_schema_dict(self, schema):
2493
+ """
2494
+ Utility to ensure a schema is a dict, not a Pydantic model class. Recursively checks dicts and lists.
2495
+ """
2496
+ if schema is None:
2497
+ return schema
2498
+ if isinstance(schema, type):
2499
+ # Pydantic v1/v2 model class
2500
+ if hasattr(schema, 'model_json_schema'):
2501
+ return schema.model_json_schema()
2502
+ elif hasattr(schema, 'schema'):
2503
+ return schema.schema()
2504
+ if isinstance(schema, dict):
2505
+ return {k: self._ensure_schema_dict(v) for k, v in schema.items()}
2506
+ if isinstance(schema, (list, tuple)):
2507
+ return [self._ensure_schema_dict(v) for v in schema]
2508
+ return schema
2509
+
2458
2510
  class CrawlWatcher:
2459
2511
  """
2460
2512
  A class to watch and handle crawl job events via WebSocket connection.
@@ -2861,19 +2913,24 @@ class AsyncFirecrawlApp(FirecrawlApp):
2861
2913
  scrape_params['blockAds'] = block_ads
2862
2914
  if proxy:
2863
2915
  scrape_params['proxy'] = proxy
2864
- if extract:
2865
- extract_dict = extract.dict(exclude_none=True)
2866
- if 'schema' in extract_dict and hasattr(extract.schema, 'schema'):
2867
- extract_dict['schema'] = extract.schema.schema() # Ensure pydantic model schema is converted
2868
- scrape_params['extract'] = extract_dict
2869
- if json_options:
2870
- json_options_dict = json_options.dict(exclude_none=True)
2871
- if 'schema' in json_options_dict and hasattr(json_options.schema, 'schema'):
2872
- json_options_dict['schema'] = json_options.schema.schema() # Ensure pydantic model schema is converted
2873
- scrape_params['jsonOptions'] = json_options_dict
2916
+ if extract is not None:
2917
+ extract = self._ensure_schema_dict(extract)
2918
+ if isinstance(extract, dict) and "schema" in extract:
2919
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
2920
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
2921
+ if json_options is not None:
2922
+ json_options = self._ensure_schema_dict(json_options)
2923
+ if isinstance(json_options, dict) and "schema" in json_options:
2924
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
2925
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
2874
2926
  if actions:
2875
2927
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
2876
2928
 
2929
+ if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
2930
+ scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
2931
+ if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
2932
+ scrape_params['jsonOptions']['schema'] = self._ensure_schema_dict(scrape_params['jsonOptions']['schema'])
2933
+
2877
2934
  # Make async request
2878
2935
  endpoint = f'/v1/scrape'
2879
2936
  response = await self._async_post_request(
@@ -2984,13 +3041,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
2984
3041
  if proxy is not None:
2985
3042
  scrape_params['proxy'] = proxy
2986
3043
  if extract is not None:
2987
- if hasattr(extract.schema, 'schema'):
2988
- extract.schema = extract.schema.schema()
2989
- scrape_params['extract'] = extract.dict(exclude_none=True)
3044
+ extract = self._ensure_schema_dict(extract)
3045
+ if isinstance(extract, dict) and "schema" in extract:
3046
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
3047
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
2990
3048
  if json_options is not None:
2991
- if hasattr(json_options.schema, 'schema'):
2992
- json_options.schema = json_options.schema.schema()
2993
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
3049
+ json_options = self._ensure_schema_dict(json_options)
3050
+ if isinstance(json_options, dict) and "schema" in json_options:
3051
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
3052
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
2994
3053
  if actions is not None:
2995
3054
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
2996
3055
  if agent is not None:
@@ -3005,6 +3064,11 @@ class AsyncFirecrawlApp(FirecrawlApp):
3005
3064
  params_dict['urls'] = urls
3006
3065
  params_dict['origin'] = f"python-sdk@{version}"
3007
3066
 
3067
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
3068
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
3069
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
3070
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
3071
+
3008
3072
  # Make request
3009
3073
  headers = self._prepare_headers(idempotency_key)
3010
3074
  response = await self._async_post_request(
@@ -3115,13 +3179,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
3115
3179
  if proxy is not None:
3116
3180
  scrape_params['proxy'] = proxy
3117
3181
  if extract is not None:
3118
- if hasattr(extract.schema, 'schema'):
3119
- extract.schema = extract.schema.schema()
3120
- scrape_params['extract'] = extract.dict(exclude_none=True)
3182
+ extract = self._ensure_schema_dict(extract)
3183
+ if isinstance(extract, dict) and "schema" in extract:
3184
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
3185
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
3121
3186
  if json_options is not None:
3122
- if hasattr(json_options.schema, 'schema'):
3123
- json_options.schema = json_options.schema.schema()
3124
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
3187
+ json_options = self._ensure_schema_dict(json_options)
3188
+ if isinstance(json_options, dict) and "schema" in json_options:
3189
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
3190
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
3125
3191
  if actions is not None:
3126
3192
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
3127
3193
  if agent is not None:
@@ -3136,6 +3202,11 @@ class AsyncFirecrawlApp(FirecrawlApp):
3136
3202
  params_dict['urls'] = urls
3137
3203
  params_dict['origin'] = f"python-sdk@{version}"
3138
3204
 
3205
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
3206
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
3207
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
3208
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
3209
+
3139
3210
  # Make request
3140
3211
  headers = self._prepare_headers(idempotency_key)
3141
3212
  response = await self._async_post_request(
@@ -3593,10 +3664,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3593
3664
  raise ValueError("Either urls or prompt is required")
3594
3665
 
3595
3666
  if schema:
3596
- if hasattr(schema, 'model_json_schema'):
3597
- # Convert Pydantic model to JSON schema
3598
- schema = schema.model_json_schema()
3599
- # Otherwise assume it's already a JSON schema dict
3667
+ schema = self._ensure_schema_dict(schema)
3600
3668
 
3601
3669
  request_data = {
3602
3670
  'urls': urls or [],
@@ -3850,8 +3918,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3850
3918
  raise ValueError("Either urls or prompt is required")
3851
3919
 
3852
3920
  if schema:
3853
- if hasattr(schema, 'model_json_schema'):
3854
- schema = schema.model_json_schema()
3921
+ schema = self._ensure_schema_dict(schema)
3855
3922
 
3856
3923
  request_data = ExtractResponse(
3857
3924
  urls=urls or [],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.4.3
3
+ Version: 2.5.1
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -0,0 +1,33 @@
1
+ build/lib/build/lib/build/lib/firecrawl/__init__.py,sha256=J1HgnaGyIrbk_5clFRTAb3XWfy9m3at1i8RFYzn5O0Q,2593
2
+ build/lib/build/lib/build/lib/firecrawl/firecrawl.py,sha256=RyUiKke08spOP6iSUgJ9_dz6l-D_dkGB4aA6UDPWiXI,188709
3
+ build/lib/build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ build/lib/build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
5
+ build/lib/build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ build/lib/build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
7
+ build/lib/build/lib/build/lib/tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
8
+ build/lib/build/lib/firecrawl/__init__.py,sha256=J1HgnaGyIrbk_5clFRTAb3XWfy9m3at1i8RFYzn5O0Q,2593
9
+ build/lib/build/lib/firecrawl/firecrawl.py,sha256=RyUiKke08spOP6iSUgJ9_dz6l-D_dkGB4aA6UDPWiXI,188709
10
+ build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
12
+ build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
14
+ build/lib/build/lib/tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
15
+ build/lib/firecrawl/__init__.py,sha256=J1HgnaGyIrbk_5clFRTAb3XWfy9m3at1i8RFYzn5O0Q,2593
16
+ build/lib/firecrawl/firecrawl.py,sha256=RyUiKke08spOP6iSUgJ9_dz6l-D_dkGB4aA6UDPWiXI,188709
17
+ build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ build/lib/firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
19
+ build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
21
+ build/lib/tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
22
+ firecrawl/__init__.py,sha256=J1HgnaGyIrbk_5clFRTAb3XWfy9m3at1i8RFYzn5O0Q,2593
23
+ firecrawl/firecrawl.py,sha256=RyUiKke08spOP6iSUgJ9_dz6l-D_dkGB4aA6UDPWiXI,188709
24
+ firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
+ firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
26
+ firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
+ firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
28
+ tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
29
+ firecrawl-2.5.1.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
30
+ firecrawl-2.5.1.dist-info/METADATA,sha256=jBHAE4mNK7Yq2NA2pPSN5_Rg_aVFztHCO1DLN5PaaQ0,7165
31
+ firecrawl-2.5.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
32
+ firecrawl-2.5.1.dist-info/top_level.txt,sha256=ytN_R30g2U2qZYFyIm710Z8QeK9FO1Uwa-WPGHXyqjE,27
33
+ firecrawl-2.5.1.dist-info/RECORD,,
@@ -1,2 +1,4 @@
1
+ build
2
+ dist
1
3
  firecrawl
2
4
  tests
@@ -1,12 +0,0 @@
1
- firecrawl/__init__.py,sha256=VW7ON6xBoqPVlchUkJMjD92NZ_tNC-XsqG-M2sIvbc8,2570
2
- firecrawl/firecrawl.py,sha256=Q1opxN1JxjbWLEDsSS3P5aEm4f9LEJrZyhd8UdsMVFw,182769
3
- firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
5
- firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
7
- tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
8
- firecrawl-2.4.3.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
9
- firecrawl-2.4.3.dist-info/METADATA,sha256=XFy99h7X3oruTzlbEWMx3uvxfk-JiQ5FZxZolp5bzSw,7165
10
- firecrawl-2.4.3.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
11
- firecrawl-2.4.3.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
12
- firecrawl-2.4.3.dist-info/RECORD,,