firecrawl 2.16.1__py3-none-any.whl → 2.16.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +1 -1
- firecrawl/firecrawl.py +13 -12
- {firecrawl-2.16.1.dist-info → firecrawl-2.16.3.dist-info}/METADATA +1 -1
- {firecrawl-2.16.1.dist-info → firecrawl-2.16.3.dist-info}/RECORD +7 -7
- {firecrawl-2.16.1.dist-info → firecrawl-2.16.3.dist-info}/LICENSE +0 -0
- {firecrawl-2.16.1.dist-info → firecrawl-2.16.3.dist-info}/WHEEL +0 -0
- {firecrawl-2.16.1.dist-info → firecrawl-2.16.3.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
|
@@ -13,7 +13,7 @@ import os
|
|
|
13
13
|
|
|
14
14
|
from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
|
15
15
|
|
|
16
|
-
__version__ = "2.16.
|
|
16
|
+
__version__ = "2.16.3"
|
|
17
17
|
|
|
18
18
|
# Define the logger for the Firecrawl project
|
|
19
19
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
firecrawl/firecrawl.py
CHANGED
|
@@ -520,6 +520,9 @@ class FirecrawlApp:
|
|
|
520
520
|
Raises:
|
|
521
521
|
Exception: If scraping fails
|
|
522
522
|
"""
|
|
523
|
+
# Validate any additional kwargs
|
|
524
|
+
self._validate_kwargs(kwargs, "scrape_url")
|
|
525
|
+
|
|
523
526
|
_headers = self._prepare_headers()
|
|
524
527
|
|
|
525
528
|
# Build scrape parameters
|
|
@@ -1372,8 +1375,8 @@ class FirecrawlApp:
|
|
|
1372
1375
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
1373
1376
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
1374
1377
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
1375
|
-
if actions
|
|
1376
|
-
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
|
1378
|
+
if actions:
|
|
1379
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
1377
1380
|
if agent is not None:
|
|
1378
1381
|
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
1379
1382
|
if max_concurrency is not None:
|
|
@@ -1513,8 +1516,8 @@ class FirecrawlApp:
|
|
|
1513
1516
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
1514
1517
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
1515
1518
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
1516
|
-
if actions
|
|
1517
|
-
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
|
1519
|
+
if actions:
|
|
1520
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
1518
1521
|
if agent is not None:
|
|
1519
1522
|
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
1520
1523
|
if max_concurrency is not None:
|
|
@@ -1649,8 +1652,8 @@ class FirecrawlApp:
|
|
|
1649
1652
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
1650
1653
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
1651
1654
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
1652
|
-
if actions
|
|
1653
|
-
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
|
1655
|
+
if actions:
|
|
1656
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
1654
1657
|
if agent is not None:
|
|
1655
1658
|
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
1656
1659
|
if max_concurrency is not None:
|
|
@@ -2597,7 +2600,7 @@ class FirecrawlApp:
|
|
|
2597
2600
|
method_params = {
|
|
2598
2601
|
"scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
|
|
2599
2602
|
"timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
|
|
2600
|
-
"block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options", "integration"},
|
|
2603
|
+
"block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options", "max_age", "integration"},
|
|
2601
2604
|
"search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options", "integration"},
|
|
2602
2605
|
"crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
|
|
2603
2606
|
"allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
|
|
@@ -3074,7 +3077,6 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3074
3077
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
3075
3078
|
if actions:
|
|
3076
3079
|
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
3077
|
-
|
|
3078
3080
|
if 'extract' in scrape_params and scrape_params['extract'] and 'schema' in scrape_params['extract']:
|
|
3079
3081
|
scrape_params['extract']['schema'] = self._ensure_schema_dict(scrape_params['extract']['schema'])
|
|
3080
3082
|
if 'jsonOptions' in scrape_params and scrape_params['jsonOptions'] and 'schema' in scrape_params['jsonOptions']:
|
|
@@ -3199,8 +3201,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3199
3201
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
3200
3202
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
3201
3203
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
3202
|
-
|
|
3203
|
-
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
|
3204
|
+
|
|
3204
3205
|
if agent is not None:
|
|
3205
3206
|
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
3206
3207
|
|
|
@@ -3339,8 +3340,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3339
3340
|
if isinstance(json_options, dict) and "schema" in json_options:
|
|
3340
3341
|
json_options["schema"] = self._ensure_schema_dict(json_options["schema"])
|
|
3341
3342
|
scrape_params['jsonOptions'] = json_options if isinstance(json_options, dict) else json_options.dict(exclude_none=True)
|
|
3342
|
-
if actions
|
|
3343
|
-
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
|
3343
|
+
if actions:
|
|
3344
|
+
scrape_params['actions'] = [action if isinstance(action, dict) else action.dict(exclude_none=True) for action in actions]
|
|
3344
3345
|
if agent is not None:
|
|
3345
3346
|
scrape_params['agent'] = agent.dict(exclude_none=True)
|
|
3346
3347
|
if zero_data_retention is not None:
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=
|
|
2
|
-
firecrawl/firecrawl.py,sha256=
|
|
1
|
+
firecrawl/__init__.py,sha256=KDMOLo0FD65aV1-iYoeKWFMAaMzo9w3cbtlO0X_nnYY,2613
|
|
2
|
+
firecrawl/firecrawl.py,sha256=Ku6ygxlcU1huKwjfVJPPmhs7Fgauw8gFwWqU5pF4Gjs,198640
|
|
3
3
|
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
|
|
5
5
|
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=k9IsEbdTHL9Cu49M4FpnQDEo2rnG6RqwmZAsK_EVJr4,21069
|
|
7
7
|
tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
|
|
8
|
-
firecrawl-2.16.
|
|
9
|
-
firecrawl-2.16.
|
|
10
|
-
firecrawl-2.16.
|
|
11
|
-
firecrawl-2.16.
|
|
12
|
-
firecrawl-2.16.
|
|
8
|
+
firecrawl-2.16.3.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
9
|
+
firecrawl-2.16.3.dist-info/METADATA,sha256=iwT8adS1Q1jB_Y0DY2Snmt7F-qXb2s5siHrXTPsHXRE,7166
|
|
10
|
+
firecrawl-2.16.3.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
11
|
+
firecrawl-2.16.3.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
|
|
12
|
+
firecrawl-2.16.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|