firecrawl 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +2 -2
- firecrawl/firecrawl.py +32 -32
- {firecrawl-2.0.0.dist-info → firecrawl-2.0.2.dist-info}/METADATA +1 -1
- {firecrawl-2.0.0.dist-info → firecrawl-2.0.2.dist-info}/RECORD +7 -7
- {firecrawl-2.0.0.dist-info → firecrawl-2.0.2.dist-info}/LICENSE +0 -0
- {firecrawl-2.0.0.dist-info → firecrawl-2.0.2.dist-info}/WHEEL +0 -0
- {firecrawl-2.0.0.dist-info → firecrawl-2.0.2.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
|
@@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/
|
|
|
11
11
|
import logging
|
|
12
12
|
import os
|
|
13
13
|
|
|
14
|
-
from .firecrawl import FirecrawlApp # noqa
|
|
14
|
+
from .firecrawl import FirecrawlApp, JsonConfig # noqa
|
|
15
15
|
|
|
16
|
-
__version__ = "2.0.
|
|
16
|
+
__version__ = "2.0.2"
|
|
17
17
|
|
|
18
18
|
# Define the logger for the Firecrawl project
|
|
19
19
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
firecrawl/firecrawl.py
CHANGED
|
@@ -27,7 +27,7 @@ from pydantic import Field
|
|
|
27
27
|
# Suppress Pydantic warnings about attribute shadowing
|
|
28
28
|
warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
|
|
29
29
|
warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
|
|
30
|
-
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"
|
|
30
|
+
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
|
|
31
31
|
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
|
|
32
32
|
|
|
33
33
|
|
|
@@ -186,7 +186,7 @@ class ExtractAgent(pydantic.BaseModel):
|
|
|
186
186
|
"""Configuration for the agent in extract operations."""
|
|
187
187
|
model: Literal["FIRE-1"] = "FIRE-1"
|
|
188
188
|
|
|
189
|
-
class
|
|
189
|
+
class JsonConfig(pydantic.BaseModel):
|
|
190
190
|
"""Configuration for extraction."""
|
|
191
191
|
prompt: Optional[str] = None
|
|
192
192
|
schema: Optional[Any] = None
|
|
@@ -195,8 +195,8 @@ class ExtractConfig(pydantic.BaseModel):
|
|
|
195
195
|
|
|
196
196
|
class ScrapeParams(CommonOptions):
|
|
197
197
|
"""Parameters for scraping operations."""
|
|
198
|
-
extract: Optional[
|
|
199
|
-
jsonOptions: Optional[
|
|
198
|
+
extract: Optional[JsonConfig] = None
|
|
199
|
+
jsonOptions: Optional[JsonConfig] = None
|
|
200
200
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None
|
|
201
201
|
agent: Optional[AgentOptions] = None
|
|
202
202
|
|
|
@@ -454,8 +454,8 @@ class FirecrawlApp:
|
|
|
454
454
|
remove_base64_images: Optional[bool] = None,
|
|
455
455
|
block_ads: Optional[bool] = None,
|
|
456
456
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
457
|
-
extract: Optional[
|
|
458
|
-
json_options: Optional[
|
|
457
|
+
extract: Optional[JsonConfig] = None,
|
|
458
|
+
json_options: Optional[JsonConfig] = None,
|
|
459
459
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
460
460
|
**kwargs) -> ScrapeResponse[Any]:
|
|
461
461
|
"""
|
|
@@ -475,8 +475,8 @@ class FirecrawlApp:
|
|
|
475
475
|
remove_base64_images (Optional[bool]): Remove base64 images
|
|
476
476
|
block_ads (Optional[bool]): Block ads
|
|
477
477
|
proxy (Optional[Literal["basic", "stealth"]]): Proxy type (basic/stealth)
|
|
478
|
-
extract (Optional[
|
|
479
|
-
json_options (Optional[
|
|
478
|
+
extract (Optional[JsonConfig]): Content extraction settings
|
|
479
|
+
json_options (Optional[JsonConfig]): JSON extraction settings
|
|
480
480
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
|
481
481
|
|
|
482
482
|
|
|
@@ -1161,8 +1161,8 @@ class FirecrawlApp:
|
|
|
1161
1161
|
remove_base64_images: Optional[bool] = None,
|
|
1162
1162
|
block_ads: Optional[bool] = None,
|
|
1163
1163
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
1164
|
-
extract: Optional[
|
|
1165
|
-
json_options: Optional[
|
|
1164
|
+
extract: Optional[JsonConfig] = None,
|
|
1165
|
+
json_options: Optional[JsonConfig] = None,
|
|
1166
1166
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
1167
1167
|
agent: Optional[AgentOptions] = None,
|
|
1168
1168
|
poll_interval: Optional[int] = 2,
|
|
@@ -1187,8 +1187,8 @@ class FirecrawlApp:
|
|
|
1187
1187
|
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
|
1188
1188
|
block_ads (Optional[bool]): Block advertisements
|
|
1189
1189
|
proxy (Optional[Literal]): Proxy type to use
|
|
1190
|
-
extract (Optional[
|
|
1191
|
-
json_options (Optional[
|
|
1190
|
+
extract (Optional[JsonConfig]): Content extraction config
|
|
1191
|
+
json_options (Optional[JsonConfig]): JSON extraction config
|
|
1192
1192
|
actions (Optional[List[Union]]): Actions to perform
|
|
1193
1193
|
agent (Optional[AgentOptions]): Agent configuration
|
|
1194
1194
|
poll_interval (Optional[int]): Seconds between status checks (default: 2)
|
|
@@ -1285,8 +1285,8 @@ class FirecrawlApp:
|
|
|
1285
1285
|
remove_base64_images: Optional[bool] = None,
|
|
1286
1286
|
block_ads: Optional[bool] = None,
|
|
1287
1287
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
1288
|
-
extract: Optional[
|
|
1289
|
-
json_options: Optional[
|
|
1288
|
+
extract: Optional[JsonConfig] = None,
|
|
1289
|
+
json_options: Optional[JsonConfig] = None,
|
|
1290
1290
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
1291
1291
|
agent: Optional[AgentOptions] = None,
|
|
1292
1292
|
idempotency_key: Optional[str] = None,
|
|
@@ -1310,8 +1310,8 @@ class FirecrawlApp:
|
|
|
1310
1310
|
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
|
1311
1311
|
block_ads (Optional[bool]): Block advertisements
|
|
1312
1312
|
proxy (Optional[Literal]): Proxy type to use
|
|
1313
|
-
extract (Optional[
|
|
1314
|
-
json_options (Optional[
|
|
1313
|
+
extract (Optional[JsonConfig]): Content extraction config
|
|
1314
|
+
json_options (Optional[JsonConfig]): JSON extraction config
|
|
1315
1315
|
actions (Optional[List[Union]]): Actions to perform
|
|
1316
1316
|
agent (Optional[AgentOptions]): Agent configuration
|
|
1317
1317
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
|
@@ -1407,8 +1407,8 @@ class FirecrawlApp:
|
|
|
1407
1407
|
remove_base64_images: Optional[bool] = None,
|
|
1408
1408
|
block_ads: Optional[bool] = None,
|
|
1409
1409
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
1410
|
-
extract: Optional[
|
|
1411
|
-
json_options: Optional[
|
|
1410
|
+
extract: Optional[JsonConfig] = None,
|
|
1411
|
+
json_options: Optional[JsonConfig] = None,
|
|
1412
1412
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
1413
1413
|
agent: Optional[AgentOptions] = None,
|
|
1414
1414
|
idempotency_key: Optional[str] = None,
|
|
@@ -1432,8 +1432,8 @@ class FirecrawlApp:
|
|
|
1432
1432
|
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
|
1433
1433
|
block_ads (Optional[bool]): Block advertisements
|
|
1434
1434
|
proxy (Optional[Literal]): Proxy type to use
|
|
1435
|
-
extract (Optional[
|
|
1436
|
-
json_options (Optional[
|
|
1435
|
+
extract (Optional[JsonConfig]): Content extraction config
|
|
1436
|
+
json_options (Optional[JsonConfig]): JSON extraction config
|
|
1437
1437
|
actions (Optional[List[Union]]): Actions to perform
|
|
1438
1438
|
agent (Optional[AgentOptions]): Agent configuration
|
|
1439
1439
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
|
@@ -2706,8 +2706,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2706
2706
|
remove_base64_images: Optional[bool] = None,
|
|
2707
2707
|
block_ads: Optional[bool] = None,
|
|
2708
2708
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
2709
|
-
extract: Optional[
|
|
2710
|
-
json_options: Optional[
|
|
2709
|
+
extract: Optional[JsonConfig] = None,
|
|
2710
|
+
json_options: Optional[JsonConfig] = None,
|
|
2711
2711
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None) -> ScrapeResponse[Any]:
|
|
2712
2712
|
"""
|
|
2713
2713
|
Scrape and extract content from a URL asynchronously.
|
|
@@ -2726,8 +2726,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2726
2726
|
remove_base64_images (Optional[bool]): Remove base64 images
|
|
2727
2727
|
block_ads (Optional[bool]): Block ads
|
|
2728
2728
|
proxy (Optional[Literal["basic", "stealth"]]): Proxy type (basic/stealth)
|
|
2729
|
-
extract (Optional[
|
|
2730
|
-
json_options (Optional[
|
|
2729
|
+
extract (Optional[JsonConfig]): Content extraction settings
|
|
2730
|
+
json_options (Optional[JsonConfig]): JSON extraction settings
|
|
2731
2731
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
|
2732
2732
|
|
|
2733
2733
|
Returns:
|
|
@@ -2820,8 +2820,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2820
2820
|
remove_base64_images: Optional[bool] = None,
|
|
2821
2821
|
block_ads: Optional[bool] = None,
|
|
2822
2822
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
2823
|
-
extract: Optional[
|
|
2824
|
-
json_options: Optional[
|
|
2823
|
+
extract: Optional[JsonConfig] = None,
|
|
2824
|
+
json_options: Optional[JsonConfig] = None,
|
|
2825
2825
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
2826
2826
|
agent: Optional[AgentOptions] = None,
|
|
2827
2827
|
poll_interval: Optional[int] = 2,
|
|
@@ -2846,8 +2846,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2846
2846
|
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
|
2847
2847
|
block_ads (Optional[bool]): Block advertisements
|
|
2848
2848
|
proxy (Optional[Literal]): Proxy type to use
|
|
2849
|
-
extract (Optional[
|
|
2850
|
-
json_options (Optional[
|
|
2849
|
+
extract (Optional[JsonConfig]): Content extraction config
|
|
2850
|
+
json_options (Optional[JsonConfig]): JSON extraction config
|
|
2851
2851
|
actions (Optional[List[Union]]): Actions to perform
|
|
2852
2852
|
agent (Optional[AgentOptions]): Agent configuration
|
|
2853
2853
|
poll_interval (Optional[int]): Seconds between status checks (default: 2)
|
|
@@ -2949,8 +2949,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2949
2949
|
remove_base64_images: Optional[bool] = None,
|
|
2950
2950
|
block_ads: Optional[bool] = None,
|
|
2951
2951
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
2952
|
-
extract: Optional[
|
|
2953
|
-
json_options: Optional[
|
|
2952
|
+
extract: Optional[JsonConfig] = None,
|
|
2953
|
+
json_options: Optional[JsonConfig] = None,
|
|
2954
2954
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
2955
2955
|
agent: Optional[AgentOptions] = None,
|
|
2956
2956
|
idempotency_key: Optional[str] = None,
|
|
@@ -2974,8 +2974,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2974
2974
|
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
|
2975
2975
|
block_ads (Optional[bool]): Block advertisements
|
|
2976
2976
|
proxy (Optional[Literal]): Proxy type to use
|
|
2977
|
-
extract (Optional[
|
|
2978
|
-
json_options (Optional[
|
|
2977
|
+
extract (Optional[JsonConfig]): Content extraction config
|
|
2978
|
+
json_options (Optional[JsonConfig]): JSON extraction config
|
|
2979
2979
|
actions (Optional[List[Union]]): Actions to perform
|
|
2980
2980
|
agent (Optional[AgentOptions]): Agent configuration
|
|
2981
2981
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=
|
|
2
|
-
firecrawl/firecrawl.py,sha256=
|
|
1
|
+
firecrawl/__init__.py,sha256=xqJCUbvvJi2ruXSmRS3Q9V7uZ0CpjK8gUFheUx0qXow,2555
|
|
2
|
+
firecrawl/firecrawl.py,sha256=k_yRUlYssszubZuZZ0rMOyOJsRb7kDTxo5JZPEc8yJY,176255
|
|
3
3
|
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
|
|
5
5
|
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
|
|
7
7
|
tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
|
|
8
|
-
firecrawl-2.0.
|
|
9
|
-
firecrawl-2.0.
|
|
10
|
-
firecrawl-2.0.
|
|
11
|
-
firecrawl-2.0.
|
|
12
|
-
firecrawl-2.0.
|
|
8
|
+
firecrawl-2.0.2.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
9
|
+
firecrawl-2.0.2.dist-info/METADATA,sha256=IVvPFtJZ9dMCNAKbI0dT9VpaC2RAZgE0iFUd_Gr_K6c,10583
|
|
10
|
+
firecrawl-2.0.2.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
11
|
+
firecrawl-2.0.2.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
|
|
12
|
+
firecrawl-2.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|