firecrawl 2.5.0__py3-none-any.whl → 2.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

@@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/
11
11
  import logging
12
12
  import os
13
13
 
14
- from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
14
+ from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
15
 
16
- __version__ = "2.5.0"
16
+ __version__ = "2.5.2"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -29,7 +29,7 @@ warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDoc
29
29
  warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
30
30
  warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
31
31
  warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
32
-
32
+ warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ChangeTrackingOptions\" shadows an attribute in parent \"BaseModel\"")
33
33
 
34
34
  def get_version():
35
35
  try:
@@ -529,14 +529,16 @@ class FirecrawlApp:
529
529
  scrape_params['blockAds'] = block_ads
530
530
  if proxy:
531
531
  scrape_params['proxy'] = proxy
532
- if extract:
533
- if hasattr(extract.schema, 'schema'):
534
- extract.schema = extract.schema.schema()
535
- scrape_params['extract'] = extract.dict(exclude_none=True)
536
- if json_options:
537
- if hasattr(json_options.schema, 'schema'):
538
- json_options.schema = json_options.schema.schema()
539
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
532
+ if extract is not None:
533
+ extract = self._ensure_schema_dict(extract)
534
+ if isinstance(extract, dict) and "schema" in extract:
535
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
536
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
537
+ if json_options is not None:
538
+ json_options = self._ensure_schema_dict(json_options)
539
+ if isinstance(json_options, dict) and "schema" in json_options:
540
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
541
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
540
542
  if actions:
541
543
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
542
544
  if change_tracking_options:
@@ -544,6 +546,11 @@ class FirecrawlApp:
544
546
 
545
547
  scrape_params.update(kwargs)
546
548
 
549
+ if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
550
+ scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
551
+ if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
552
+ scrape_params['jsonOptions']['schema'] = self._ensure_schema_dict(scrape_params['jsonOptions']['schema'])
553
+
547
554
  # Make request
548
555
  response = requests.post(
549
556
  f'{self.api_url}/v1/scrape',
@@ -1252,13 +1259,15 @@ class FirecrawlApp:
1252
1259
  if proxy is not None:
1253
1260
  scrape_params['proxy'] = proxy
1254
1261
  if extract is not None:
1255
- if hasattr(extract.schema, 'schema'):
1256
- extract.schema = extract.schema.schema()
1257
- scrape_params['extract'] = extract.dict(exclude_none=True)
1262
+ extract = self._ensure_schema_dict(extract)
1263
+ if isinstance(extract, dict) and "schema" in extract:
1264
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
1265
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
1258
1266
  if json_options is not None:
1259
- if hasattr(json_options.schema, 'schema'):
1260
- json_options.schema = json_options.schema.schema()
1261
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
1267
+ json_options = self._ensure_schema_dict(json_options)
1268
+ if isinstance(json_options, dict) and "schema" in json_options:
1269
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
1270
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
1262
1271
  if actions is not None:
1263
1272
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1264
1273
  if agent is not None:
@@ -1273,6 +1282,11 @@ class FirecrawlApp:
1273
1282
  params_dict['urls'] = urls
1274
1283
  params_dict['origin'] = f"python-sdk@{version}"
1275
1284
 
1285
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
1286
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
1287
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
1288
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
1289
+
1276
1290
  # Make request
1277
1291
  headers = self._prepare_headers(idempotency_key)
1278
1292
  response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
@@ -1378,13 +1392,15 @@ class FirecrawlApp:
1378
1392
  if proxy is not None:
1379
1393
  scrape_params['proxy'] = proxy
1380
1394
  if extract is not None:
1381
- if hasattr(extract.schema, 'schema'):
1382
- extract.schema = extract.schema.schema()
1383
- scrape_params['extract'] = extract.dict(exclude_none=True)
1395
+ extract = self._ensure_schema_dict(extract)
1396
+ if isinstance(extract, dict) and "schema" in extract:
1397
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
1398
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
1384
1399
  if json_options is not None:
1385
- if hasattr(json_options.schema, 'schema'):
1386
- json_options.schema = json_options.schema.schema()
1387
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
1400
+ json_options = self._ensure_schema_dict(json_options)
1401
+ if isinstance(json_options, dict) and "schema" in json_options:
1402
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
1403
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
1388
1404
  if actions is not None:
1389
1405
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1390
1406
  if agent is not None:
@@ -1399,6 +1415,11 @@ class FirecrawlApp:
1399
1415
  params_dict['urls'] = urls
1400
1416
  params_dict['origin'] = f"python-sdk@{version}"
1401
1417
 
1418
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
1419
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
1420
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
1421
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
1422
+
1402
1423
  # Make request
1403
1424
  headers = self._prepare_headers(idempotency_key)
1404
1425
  response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
@@ -1499,13 +1520,15 @@ class FirecrawlApp:
1499
1520
  if proxy is not None:
1500
1521
  scrape_params['proxy'] = proxy
1501
1522
  if extract is not None:
1502
- if hasattr(extract.schema, 'schema'):
1503
- extract.schema = extract.schema.schema()
1504
- scrape_params['extract'] = extract.dict(exclude_none=True)
1523
+ extract = self._ensure_schema_dict(extract)
1524
+ if isinstance(extract, dict) and "schema" in extract:
1525
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
1526
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
1505
1527
  if json_options is not None:
1506
- if hasattr(json_options.schema, 'schema'):
1507
- json_options.schema = json_options.schema.schema()
1508
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
1528
+ json_options = self._ensure_schema_dict(json_options)
1529
+ if isinstance(json_options, dict) and "schema" in json_options:
1530
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
1531
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
1509
1532
  if actions is not None:
1510
1533
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1511
1534
  if agent is not None:
@@ -1520,6 +1543,11 @@ class FirecrawlApp:
1520
1543
  params_dict['urls'] = urls
1521
1544
  params_dict['origin'] = f"python-sdk@{version}"
1522
1545
 
1546
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
1547
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
1548
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
1549
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
1550
+
1523
1551
  # Make request
1524
1552
  headers = self._prepare_headers(idempotency_key)
1525
1553
  response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
@@ -1606,7 +1634,7 @@ class FirecrawlApp:
1606
1634
  id (str): The ID of the crawl job.
1607
1635
 
1608
1636
  Returns:
1609
- CrawlErrorsResponse: A response containing:
1637
+ CrawlErrorsResponse containing:
1610
1638
  * errors (List[Dict[str, str]]): List of errors with fields:
1611
1639
  * id (str): Error ID
1612
1640
  * timestamp (str): When the error occurred
@@ -1669,10 +1697,7 @@ class FirecrawlApp:
1669
1697
  raise ValueError("Either urls or prompt is required")
1670
1698
 
1671
1699
  if schema:
1672
- if hasattr(schema, 'model_json_schema'):
1673
- # Convert Pydantic model to JSON schema
1674
- schema = schema.model_json_schema()
1675
- # Otherwise assume it's already a JSON schema dict
1700
+ schema = self._ensure_schema_dict(schema)
1676
1701
 
1677
1702
  request_data = {
1678
1703
  'urls': urls or [],
@@ -1801,10 +1826,7 @@ class FirecrawlApp:
1801
1826
 
1802
1827
  schema = schema
1803
1828
  if schema:
1804
- if hasattr(schema, 'model_json_schema'):
1805
- # Convert Pydantic model to JSON schema
1806
- schema = schema.model_json_schema()
1807
- # Otherwise assume it's already a JSON schema dict
1829
+ schema = self._ensure_schema_dict(schema)
1808
1830
 
1809
1831
  request_data = {
1810
1832
  'urls': urls,
@@ -2467,6 +2489,24 @@ class FirecrawlApp:
2467
2489
  # Additional type validation can be added here if needed
2468
2490
  # For now, we rely on Pydantic models for detailed type validation
2469
2491
 
2492
+ def _ensure_schema_dict(self, schema):
2493
+ """
2494
+ Utility to ensure a schema is a dict, not a Pydantic model class. Recursively checks dicts and lists.
2495
+ """
2496
+ if schema is None:
2497
+ return schema
2498
+ if isinstance(schema, type):
2499
+ # Pydantic v1/v2 model class
2500
+ if hasattr(schema, 'model_json_schema'):
2501
+ return schema.model_json_schema()
2502
+ elif hasattr(schema, 'schema'):
2503
+ return schema.schema()
2504
+ if isinstance(schema, dict):
2505
+ return {k: self._ensure_schema_dict(v) for k, v in schema.items()}
2506
+ if isinstance(schema, (list, tuple)):
2507
+ return [self._ensure_schema_dict(v) for v in schema]
2508
+ return schema
2509
+
2470
2510
  class CrawlWatcher:
2471
2511
  """
2472
2512
  A class to watch and handle crawl job events via WebSocket connection.
@@ -2873,19 +2913,24 @@ class AsyncFirecrawlApp(FirecrawlApp):
2873
2913
  scrape_params['blockAds'] = block_ads
2874
2914
  if proxy:
2875
2915
  scrape_params['proxy'] = proxy
2876
- if extract:
2877
- extract_dict = extract.dict(exclude_none=True)
2878
- if 'schema' in extract_dict and hasattr(extract.schema, 'schema'):
2879
- extract_dict['schema'] = extract.schema.schema() # Ensure pydantic model schema is converted
2880
- scrape_params['extract'] = extract_dict
2881
- if json_options:
2882
- json_options_dict = json_options.dict(exclude_none=True)
2883
- if 'schema' in json_options_dict and hasattr(json_options.schema, 'schema'):
2884
- json_options_dict['schema'] = json_options.schema.schema() # Ensure pydantic model schema is converted
2885
- scrape_params['jsonOptions'] = json_options_dict
2916
+ if extract is not None:
2917
+ extract = self._ensure_schema_dict(extract)
2918
+ if isinstance(extract, dict) and "schema" in extract:
2919
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
2920
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
2921
+ if json_options is not None:
2922
+ json_options = self._ensure_schema_dict(json_options)
2923
+ if isinstance(json_options, dict) and "schema" in json_options:
2924
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
2925
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
2886
2926
  if actions:
2887
2927
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
2888
2928
 
2929
+ if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
2930
+ scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
2931
+ if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
2932
+ scrape_params['jsonOptions']['schema'] = self._ensure_schema_dict(scrape_params['jsonOptions']['schema'])
2933
+
2889
2934
  # Make async request
2890
2935
  endpoint = f'/v1/scrape'
2891
2936
  response = await self._async_post_request(
@@ -2996,13 +3041,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
2996
3041
  if proxy is not None:
2997
3042
  scrape_params['proxy'] = proxy
2998
3043
  if extract is not None:
2999
- if hasattr(extract.schema, 'schema'):
3000
- extract.schema = extract.schema.schema()
3001
- scrape_params['extract'] = extract.dict(exclude_none=True)
3044
+ extract = self._ensure_schema_dict(extract)
3045
+ if isinstance(extract, dict) and "schema" in extract:
3046
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
3047
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
3002
3048
  if json_options is not None:
3003
- if hasattr(json_options.schema, 'schema'):
3004
- json_options.schema = json_options.schema.schema()
3005
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
3049
+ json_options = self._ensure_schema_dict(json_options)
3050
+ if isinstance(json_options, dict) and "schema" in json_options:
3051
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
3052
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
3006
3053
  if actions is not None:
3007
3054
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
3008
3055
  if agent is not None:
@@ -3017,6 +3064,11 @@ class AsyncFirecrawlApp(FirecrawlApp):
3017
3064
  params_dict['urls'] = urls
3018
3065
  params_dict['origin'] = f"python-sdk@{version}"
3019
3066
 
3067
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
3068
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
3069
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
3070
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
3071
+
3020
3072
  # Make request
3021
3073
  headers = self._prepare_headers(idempotency_key)
3022
3074
  response = await self._async_post_request(
@@ -3127,13 +3179,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
3127
3179
  if proxy is not None:
3128
3180
  scrape_params['proxy'] = proxy
3129
3181
  if extract is not None:
3130
- if hasattr(extract.schema, 'schema'):
3131
- extract.schema = extract.schema.schema()
3132
- scrape_params['extract'] = extract.dict(exclude_none=True)
3182
+ extract = self._ensure_schema_dict(extract)
3183
+ if isinstance(extract, dict) and "schema" in extract:
3184
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
3185
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
3133
3186
  if json_options is not None:
3134
- if hasattr(json_options.schema, 'schema'):
3135
- json_options.schema = json_options.schema.schema()
3136
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
3187
+ json_options = self._ensure_schema_dict(json_options)
3188
+ if isinstance(json_options, dict) and "schema" in json_options:
3189
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
3190
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
3137
3191
  if actions is not None:
3138
3192
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
3139
3193
  if agent is not None:
@@ -3148,6 +3202,11 @@ class AsyncFirecrawlApp(FirecrawlApp):
3148
3202
  params_dict['urls'] = urls
3149
3203
  params_dict['origin'] = f"python-sdk@{version}"
3150
3204
 
3205
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
3206
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
3207
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
3208
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
3209
+
3151
3210
  # Make request
3152
3211
  headers = self._prepare_headers(idempotency_key)
3153
3212
  response = await self._async_post_request(
@@ -3605,10 +3664,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3605
3664
  raise ValueError("Either urls or prompt is required")
3606
3665
 
3607
3666
  if schema:
3608
- if hasattr(schema, 'model_json_schema'):
3609
- # Convert Pydantic model to JSON schema
3610
- schema = schema.model_json_schema()
3611
- # Otherwise assume it's already a JSON schema dict
3667
+ schema = self._ensure_schema_dict(schema)
3612
3668
 
3613
3669
  request_data = {
3614
3670
  'urls': urls or [],
@@ -3862,8 +3918,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3862
3918
  raise ValueError("Either urls or prompt is required")
3863
3919
 
3864
3920
  if schema:
3865
- if hasattr(schema, 'model_json_schema'):
3866
- schema = schema.model_json_schema()
3921
+ schema = self._ensure_schema_dict(schema)
3867
3922
 
3868
3923
  request_data = ExtractResponse(
3869
3924
  urls=urls or [],
firecrawl/__init__.py CHANGED
@@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/
11
11
  import logging
12
12
  import os
13
13
 
14
- from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
14
+ from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
15
 
16
- __version__ = "2.5.0"
16
+ __version__ = "2.5.2"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
firecrawl/firecrawl.py CHANGED
@@ -29,7 +29,7 @@ warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDoc
29
29
  warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
30
30
  warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
31
31
  warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
32
-
32
+ warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ChangeTrackingOptions\" shadows an attribute in parent \"BaseModel\"")
33
33
 
34
34
  def get_version():
35
35
  try:
@@ -529,14 +529,16 @@ class FirecrawlApp:
529
529
  scrape_params['blockAds'] = block_ads
530
530
  if proxy:
531
531
  scrape_params['proxy'] = proxy
532
- if extract:
533
- if hasattr(extract.schema, 'schema'):
534
- extract.schema = extract.schema.schema()
535
- scrape_params['extract'] = extract.dict(exclude_none=True)
536
- if json_options:
537
- if hasattr(json_options.schema, 'schema'):
538
- json_options.schema = json_options.schema.schema()
539
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
532
+ if extract is not None:
533
+ extract = self._ensure_schema_dict(extract)
534
+ if isinstance(extract, dict) and "schema" in extract:
535
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
536
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
537
+ if json_options is not None:
538
+ json_options = self._ensure_schema_dict(json_options)
539
+ if isinstance(json_options, dict) and "schema" in json_options:
540
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
541
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
540
542
  if actions:
541
543
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
542
544
  if change_tracking_options:
@@ -544,6 +546,11 @@ class FirecrawlApp:
544
546
 
545
547
  scrape_params.update(kwargs)
546
548
 
549
+ if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
550
+ scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
551
+ if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
552
+ scrape_params['jsonOptions']['schema'] = self._ensure_schema_dict(scrape_params['jsonOptions']['schema'])
553
+
547
554
  # Make request
548
555
  response = requests.post(
549
556
  f'{self.api_url}/v1/scrape',
@@ -1252,13 +1259,15 @@ class FirecrawlApp:
1252
1259
  if proxy is not None:
1253
1260
  scrape_params['proxy'] = proxy
1254
1261
  if extract is not None:
1255
- if hasattr(extract.schema, 'schema'):
1256
- extract.schema = extract.schema.schema()
1257
- scrape_params['extract'] = extract.dict(exclude_none=True)
1262
+ extract = self._ensure_schema_dict(extract)
1263
+ if isinstance(extract, dict) and "schema" in extract:
1264
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
1265
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
1258
1266
  if json_options is not None:
1259
- if hasattr(json_options.schema, 'schema'):
1260
- json_options.schema = json_options.schema.schema()
1261
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
1267
+ json_options = self._ensure_schema_dict(json_options)
1268
+ if isinstance(json_options, dict) and "schema" in json_options:
1269
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
1270
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
1262
1271
  if actions is not None:
1263
1272
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1264
1273
  if agent is not None:
@@ -1273,6 +1282,11 @@ class FirecrawlApp:
1273
1282
  params_dict['urls'] = urls
1274
1283
  params_dict['origin'] = f"python-sdk@{version}"
1275
1284
 
1285
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
1286
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
1287
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
1288
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
1289
+
1276
1290
  # Make request
1277
1291
  headers = self._prepare_headers(idempotency_key)
1278
1292
  response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
@@ -1378,13 +1392,15 @@ class FirecrawlApp:
1378
1392
  if proxy is not None:
1379
1393
  scrape_params['proxy'] = proxy
1380
1394
  if extract is not None:
1381
- if hasattr(extract.schema, 'schema'):
1382
- extract.schema = extract.schema.schema()
1383
- scrape_params['extract'] = extract.dict(exclude_none=True)
1395
+ extract = self._ensure_schema_dict(extract)
1396
+ if isinstance(extract, dict) and "schema" in extract:
1397
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
1398
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
1384
1399
  if json_options is not None:
1385
- if hasattr(json_options.schema, 'schema'):
1386
- json_options.schema = json_options.schema.schema()
1387
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
1400
+ json_options = self._ensure_schema_dict(json_options)
1401
+ if isinstance(json_options, dict) and "schema" in json_options:
1402
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
1403
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
1388
1404
  if actions is not None:
1389
1405
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1390
1406
  if agent is not None:
@@ -1399,6 +1415,11 @@ class FirecrawlApp:
1399
1415
  params_dict['urls'] = urls
1400
1416
  params_dict['origin'] = f"python-sdk@{version}"
1401
1417
 
1418
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
1419
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
1420
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
1421
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
1422
+
1402
1423
  # Make request
1403
1424
  headers = self._prepare_headers(idempotency_key)
1404
1425
  response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
@@ -1499,13 +1520,15 @@ class FirecrawlApp:
1499
1520
  if proxy is not None:
1500
1521
  scrape_params['proxy'] = proxy
1501
1522
  if extract is not None:
1502
- if hasattr(extract.schema, 'schema'):
1503
- extract.schema = extract.schema.schema()
1504
- scrape_params['extract'] = extract.dict(exclude_none=True)
1523
+ extract = self._ensure_schema_dict(extract)
1524
+ if isinstance(extract, dict) and "schema" in extract:
1525
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
1526
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
1505
1527
  if json_options is not None:
1506
- if hasattr(json_options.schema, 'schema'):
1507
- json_options.schema = json_options.schema.schema()
1508
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
1528
+ json_options = self._ensure_schema_dict(json_options)
1529
+ if isinstance(json_options, dict) and "schema" in json_options:
1530
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
1531
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
1509
1532
  if actions is not None:
1510
1533
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
1511
1534
  if agent is not None:
@@ -1520,6 +1543,11 @@ class FirecrawlApp:
1520
1543
  params_dict['urls'] = urls
1521
1544
  params_dict['origin'] = f"python-sdk@{version}"
1522
1545
 
1546
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
1547
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
1548
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
1549
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
1550
+
1523
1551
  # Make request
1524
1552
  headers = self._prepare_headers(idempotency_key)
1525
1553
  response = self._post_request(f'{self.api_url}/v1/batch/scrape', params_dict, headers)
@@ -1606,7 +1634,7 @@ class FirecrawlApp:
1606
1634
  id (str): The ID of the crawl job.
1607
1635
 
1608
1636
  Returns:
1609
- CrawlErrorsResponse: A response containing:
1637
+ CrawlErrorsResponse containing:
1610
1638
  * errors (List[Dict[str, str]]): List of errors with fields:
1611
1639
  * id (str): Error ID
1612
1640
  * timestamp (str): When the error occurred
@@ -1669,10 +1697,7 @@ class FirecrawlApp:
1669
1697
  raise ValueError("Either urls or prompt is required")
1670
1698
 
1671
1699
  if schema:
1672
- if hasattr(schema, 'model_json_schema'):
1673
- # Convert Pydantic model to JSON schema
1674
- schema = schema.model_json_schema()
1675
- # Otherwise assume it's already a JSON schema dict
1700
+ schema = self._ensure_schema_dict(schema)
1676
1701
 
1677
1702
  request_data = {
1678
1703
  'urls': urls or [],
@@ -1801,10 +1826,7 @@ class FirecrawlApp:
1801
1826
 
1802
1827
  schema = schema
1803
1828
  if schema:
1804
- if hasattr(schema, 'model_json_schema'):
1805
- # Convert Pydantic model to JSON schema
1806
- schema = schema.model_json_schema()
1807
- # Otherwise assume it's already a JSON schema dict
1829
+ schema = self._ensure_schema_dict(schema)
1808
1830
 
1809
1831
  request_data = {
1810
1832
  'urls': urls,
@@ -2467,6 +2489,24 @@ class FirecrawlApp:
2467
2489
  # Additional type validation can be added here if needed
2468
2490
  # For now, we rely on Pydantic models for detailed type validation
2469
2491
 
2492
+ def _ensure_schema_dict(self, schema):
2493
+ """
2494
+ Utility to ensure a schema is a dict, not a Pydantic model class. Recursively checks dicts and lists.
2495
+ """
2496
+ if schema is None:
2497
+ return schema
2498
+ if isinstance(schema, type):
2499
+ # Pydantic v1/v2 model class
2500
+ if hasattr(schema, 'model_json_schema'):
2501
+ return schema.model_json_schema()
2502
+ elif hasattr(schema, 'schema'):
2503
+ return schema.schema()
2504
+ if isinstance(schema, dict):
2505
+ return {k: self._ensure_schema_dict(v) for k, v in schema.items()}
2506
+ if isinstance(schema, (list, tuple)):
2507
+ return [self._ensure_schema_dict(v) for v in schema]
2508
+ return schema
2509
+
2470
2510
  class CrawlWatcher:
2471
2511
  """
2472
2512
  A class to watch and handle crawl job events via WebSocket connection.
@@ -2873,19 +2913,24 @@ class AsyncFirecrawlApp(FirecrawlApp):
2873
2913
  scrape_params['blockAds'] = block_ads
2874
2914
  if proxy:
2875
2915
  scrape_params['proxy'] = proxy
2876
- if extract:
2877
- extract_dict = extract.dict(exclude_none=True)
2878
- if 'schema' in extract_dict and hasattr(extract.schema, 'schema'):
2879
- extract_dict['schema'] = extract.schema.schema() # Ensure pydantic model schema is converted
2880
- scrape_params['extract'] = extract_dict
2881
- if json_options:
2882
- json_options_dict = json_options.dict(exclude_none=True)
2883
- if 'schema' in json_options_dict and hasattr(json_options.schema, 'schema'):
2884
- json_options_dict['schema'] = json_options.schema.schema() # Ensure pydantic model schema is converted
2885
- scrape_params['jsonOptions'] = json_options_dict
2916
+ if extract is not None:
2917
+ extract = self._ensure_schema_dict(extract)
2918
+ if isinstance(extract, dict) and "schema" in extract:
2919
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
2920
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
2921
+ if json_options is not None:
2922
+ json_options = self._ensure_schema_dict(json_options)
2923
+ if isinstance(json_options, dict) and "schema" in json_options:
2924
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
2925
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
2886
2926
  if actions:
2887
2927
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
2888
2928
 
2929
+ if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
2930
+ scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
2931
+ if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
2932
+ scrape_params['jsonOptions']['schema'] = self._ensure_schema_dict(scrape_params['jsonOptions']['schema'])
2933
+
2889
2934
  # Make async request
2890
2935
  endpoint = f'/v1/scrape'
2891
2936
  response = await self._async_post_request(
@@ -2996,13 +3041,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
2996
3041
  if proxy is not None:
2997
3042
  scrape_params['proxy'] = proxy
2998
3043
  if extract is not None:
2999
- if hasattr(extract.schema, 'schema'):
3000
- extract.schema = extract.schema.schema()
3001
- scrape_params['extract'] = extract.dict(exclude_none=True)
3044
+ extract = self._ensure_schema_dict(extract)
3045
+ if isinstance(extract, dict) and "schema" in extract:
3046
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
3047
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
3002
3048
  if json_options is not None:
3003
- if hasattr(json_options.schema, 'schema'):
3004
- json_options.schema = json_options.schema.schema()
3005
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
3049
+ json_options = self._ensure_schema_dict(json_options)
3050
+ if isinstance(json_options, dict) and "schema" in json_options:
3051
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
3052
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
3006
3053
  if actions is not None:
3007
3054
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
3008
3055
  if agent is not None:
@@ -3017,6 +3064,11 @@ class AsyncFirecrawlApp(FirecrawlApp):
3017
3064
  params_dict['urls'] = urls
3018
3065
  params_dict['origin'] = f"python-sdk@{version}"
3019
3066
 
3067
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
3068
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
3069
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
3070
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
3071
+
3020
3072
  # Make request
3021
3073
  headers = self._prepare_headers(idempotency_key)
3022
3074
  response = await self._async_post_request(
@@ -3127,13 +3179,15 @@ class AsyncFirecrawlApp(FirecrawlApp):
3127
3179
  if proxy is not None:
3128
3180
  scrape_params['proxy'] = proxy
3129
3181
  if extract is not None:
3130
- if hasattr(extract.schema, 'schema'):
3131
- extract.schema = extract.schema.schema()
3132
- scrape_params['extract'] = extract.dict(exclude_none=True)
3182
+ extract = self._ensure_schema_dict(extract)
3183
+ if isinstance(extract, dict) and "schema" in extract:
3184
+ extract["schema"] = self._ensure_schema_dict(extract["schema"])
3185
+ scrape_params['extract'] = extract if isinstance(extract, dict) else extract.dict(exclude_none=True)
3133
3186
  if json_options is not None:
3134
- if hasattr(json_options.schema, 'schema'):
3135
- json_options.schema = json_options.schema.schema()
3136
- scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
3187
+ json_options = self._ensure_schema_dict(json_options)
3188
+ if isinstance(json_options, dict) and "schema" in json_options:
3189
+ json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
3190
+ scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
3137
3191
  if actions is not None:
3138
3192
  scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
3139
3193
  if agent is not None:
@@ -3148,6 +3202,11 @@ class AsyncFirecrawlApp(FirecrawlApp):
3148
3202
  params_dict['urls'] = urls
3149
3203
  params_dict['origin'] = f"python-sdk@{version}"
3150
3204
 
3205
+ if 'extract' in params_dict and params_dict['extract'] and 'schema' in params_dict['extract']:
3206
+ params_dict['extract']['schema'] = self._ensure_schema_dict(params_dict['extract']['schema'])
3207
+ if 'jsonOptions' in params_dict and params_dict['jsonOptions'] and 'schema' in params_dict['jsonOptions']:
3208
+ params_dict['jsonOptions']['schema'] = self._ensure_schema_dict(params_dict['jsonOptions']['schema'])
3209
+
3151
3210
  # Make request
3152
3211
  headers = self._prepare_headers(idempotency_key)
3153
3212
  response = await self._async_post_request(
@@ -3605,10 +3664,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3605
3664
  raise ValueError("Either urls or prompt is required")
3606
3665
 
3607
3666
  if schema:
3608
- if hasattr(schema, 'model_json_schema'):
3609
- # Convert Pydantic model to JSON schema
3610
- schema = schema.model_json_schema()
3611
- # Otherwise assume it's already a JSON schema dict
3667
+ schema = self._ensure_schema_dict(schema)
3612
3668
 
3613
3669
  request_data = {
3614
3670
  'urls': urls or [],
@@ -3862,8 +3918,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3862
3918
  raise ValueError("Either urls or prompt is required")
3863
3919
 
3864
3920
  if schema:
3865
- if hasattr(schema, 'model_json_schema'):
3866
- schema = schema.model_json_schema()
3921
+ schema = self._ensure_schema_dict(schema)
3867
3922
 
3868
3923
  request_data = ExtractResponse(
3869
3924
  urls=urls or [],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.5.0
3
+ Version: 2.5.2
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -1,19 +1,19 @@
1
- build/lib/firecrawl/__init__.py,sha256=yfkIcSa7UX4IJpIIRslJk7PKQ0eiFb-SVsRRJ7YhHWY,2593
2
- build/lib/firecrawl/firecrawl.py,sha256=kBwRXJ78Nzi65g85EjTy_Altgx4_yc1hvXG5peRhh58,183402
1
+ build/lib/firecrawl/__init__.py,sha256=8kp7rd0nA73D_B-PoJdqir3oSYeFukNmAB99F3KuoFQ,2612
2
+ build/lib/firecrawl/firecrawl.py,sha256=RyUiKke08spOP6iSUgJ9_dz6l-D_dkGB4aA6UDPWiXI,188709
3
3
  build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  build/lib/firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
5
5
  build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
7
7
  build/lib/tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
8
- firecrawl/__init__.py,sha256=yfkIcSa7UX4IJpIIRslJk7PKQ0eiFb-SVsRRJ7YhHWY,2593
9
- firecrawl/firecrawl.py,sha256=kBwRXJ78Nzi65g85EjTy_Altgx4_yc1hvXG5peRhh58,183402
8
+ firecrawl/__init__.py,sha256=8kp7rd0nA73D_B-PoJdqir3oSYeFukNmAB99F3KuoFQ,2612
9
+ firecrawl/firecrawl.py,sha256=RyUiKke08spOP6iSUgJ9_dz6l-D_dkGB4aA6UDPWiXI,188709
10
10
  firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
12
12
  firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
14
14
  tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
15
- firecrawl-2.5.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
16
- firecrawl-2.5.0.dist-info/METADATA,sha256=LTudFMB3GWdqvS_l-1XpJDfZC4QjDrlnACVL4zl9iLk,7165
17
- firecrawl-2.5.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
18
- firecrawl-2.5.0.dist-info/top_level.txt,sha256=ytN_R30g2U2qZYFyIm710Z8QeK9FO1Uwa-WPGHXyqjE,27
19
- firecrawl-2.5.0.dist-info/RECORD,,
15
+ firecrawl-2.5.2.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
16
+ firecrawl-2.5.2.dist-info/METADATA,sha256=_H0gXBuiO69jiid-jJo1gmAwpzZ5jE3eyPVtueARwrg,7165
17
+ firecrawl-2.5.2.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
18
+ firecrawl-2.5.2.dist-info/top_level.txt,sha256=ytN_R30g2U2qZYFyIm710Z8QeK9FO1Uwa-WPGHXyqjE,27
19
+ firecrawl-2.5.2.dist-info/RECORD,,