firecrawl 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +2 -2
- firecrawl/firecrawl.py +32 -33
- {firecrawl-2.0.1.dist-info → firecrawl-2.0.2.dist-info}/METADATA +1 -1
- {firecrawl-2.0.1.dist-info → firecrawl-2.0.2.dist-info}/RECORD +7 -7
- {firecrawl-2.0.1.dist-info → firecrawl-2.0.2.dist-info}/LICENSE +0 -0
- {firecrawl-2.0.1.dist-info → firecrawl-2.0.2.dist-info}/WHEEL +0 -0
- {firecrawl-2.0.1.dist-info → firecrawl-2.0.2.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
|
@@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/
|
|
|
11
11
|
import logging
|
|
12
12
|
import os
|
|
13
13
|
|
|
14
|
-
from .firecrawl import FirecrawlApp,
|
|
14
|
+
from .firecrawl import FirecrawlApp, JsonConfig # noqa
|
|
15
15
|
|
|
16
|
-
__version__ = "2.0.
|
|
16
|
+
__version__ = "2.0.2"
|
|
17
17
|
|
|
18
18
|
# Define the logger for the Firecrawl project
|
|
19
19
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
firecrawl/firecrawl.py
CHANGED
|
@@ -27,7 +27,7 @@ from pydantic import Field
|
|
|
27
27
|
# Suppress Pydantic warnings about attribute shadowing
|
|
28
28
|
warnings.filterwarnings("ignore", message="Field name \"json\" in \"FirecrawlDocument\" shadows an attribute in parent \"BaseModel\"")
|
|
29
29
|
warnings.filterwarnings("ignore", message="Field name \"json\" in \"ChangeTrackingData\" shadows an attribute in parent \"BaseModel\"")
|
|
30
|
-
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"
|
|
30
|
+
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"JsonConfig\" shadows an attribute in parent \"BaseModel\"")
|
|
31
31
|
warnings.filterwarnings("ignore", message="Field name \"schema\" in \"ExtractParams\" shadows an attribute in parent \"BaseModel\"")
|
|
32
32
|
|
|
33
33
|
|
|
@@ -84,7 +84,6 @@ T = TypeVar('T')
|
|
|
84
84
|
# statusCode: Optional[int] = None
|
|
85
85
|
# error: Optional[str] = None
|
|
86
86
|
|
|
87
|
-
|
|
88
87
|
class AgentOptions(pydantic.BaseModel):
|
|
89
88
|
"""Configuration for the agent."""
|
|
90
89
|
model: Literal["FIRE-1"] = "FIRE-1"
|
|
@@ -187,7 +186,7 @@ class ExtractAgent(pydantic.BaseModel):
|
|
|
187
186
|
"""Configuration for the agent in extract operations."""
|
|
188
187
|
model: Literal["FIRE-1"] = "FIRE-1"
|
|
189
188
|
|
|
190
|
-
class
|
|
189
|
+
class JsonConfig(pydantic.BaseModel):
|
|
191
190
|
"""Configuration for extraction."""
|
|
192
191
|
prompt: Optional[str] = None
|
|
193
192
|
schema: Optional[Any] = None
|
|
@@ -196,8 +195,8 @@ class ExtractConfig(pydantic.BaseModel):
|
|
|
196
195
|
|
|
197
196
|
class ScrapeParams(CommonOptions):
|
|
198
197
|
"""Parameters for scraping operations."""
|
|
199
|
-
extract: Optional[
|
|
200
|
-
jsonOptions: Optional[
|
|
198
|
+
extract: Optional[JsonConfig] = None
|
|
199
|
+
jsonOptions: Optional[JsonConfig] = None
|
|
201
200
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None
|
|
202
201
|
agent: Optional[AgentOptions] = None
|
|
203
202
|
|
|
@@ -455,8 +454,8 @@ class FirecrawlApp:
|
|
|
455
454
|
remove_base64_images: Optional[bool] = None,
|
|
456
455
|
block_ads: Optional[bool] = None,
|
|
457
456
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
458
|
-
extract: Optional[
|
|
459
|
-
json_options: Optional[
|
|
457
|
+
extract: Optional[JsonConfig] = None,
|
|
458
|
+
json_options: Optional[JsonConfig] = None,
|
|
460
459
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
461
460
|
**kwargs) -> ScrapeResponse[Any]:
|
|
462
461
|
"""
|
|
@@ -476,8 +475,8 @@ class FirecrawlApp:
|
|
|
476
475
|
remove_base64_images (Optional[bool]): Remove base64 images
|
|
477
476
|
block_ads (Optional[bool]): Block ads
|
|
478
477
|
proxy (Optional[Literal["basic", "stealth"]]): Proxy type (basic/stealth)
|
|
479
|
-
extract (Optional[
|
|
480
|
-
json_options (Optional[
|
|
478
|
+
extract (Optional[JsonConfig]): Content extraction settings
|
|
479
|
+
json_options (Optional[JsonConfig]): JSON extraction settings
|
|
481
480
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
|
482
481
|
|
|
483
482
|
|
|
@@ -1162,8 +1161,8 @@ class FirecrawlApp:
|
|
|
1162
1161
|
remove_base64_images: Optional[bool] = None,
|
|
1163
1162
|
block_ads: Optional[bool] = None,
|
|
1164
1163
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
1165
|
-
extract: Optional[
|
|
1166
|
-
json_options: Optional[
|
|
1164
|
+
extract: Optional[JsonConfig] = None,
|
|
1165
|
+
json_options: Optional[JsonConfig] = None,
|
|
1167
1166
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
1168
1167
|
agent: Optional[AgentOptions] = None,
|
|
1169
1168
|
poll_interval: Optional[int] = 2,
|
|
@@ -1188,8 +1187,8 @@ class FirecrawlApp:
|
|
|
1188
1187
|
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
|
1189
1188
|
block_ads (Optional[bool]): Block advertisements
|
|
1190
1189
|
proxy (Optional[Literal]): Proxy type to use
|
|
1191
|
-
extract (Optional[
|
|
1192
|
-
json_options (Optional[
|
|
1190
|
+
extract (Optional[JsonConfig]): Content extraction config
|
|
1191
|
+
json_options (Optional[JsonConfig]): JSON extraction config
|
|
1193
1192
|
actions (Optional[List[Union]]): Actions to perform
|
|
1194
1193
|
agent (Optional[AgentOptions]): Agent configuration
|
|
1195
1194
|
poll_interval (Optional[int]): Seconds between status checks (default: 2)
|
|
@@ -1286,8 +1285,8 @@ class FirecrawlApp:
|
|
|
1286
1285
|
remove_base64_images: Optional[bool] = None,
|
|
1287
1286
|
block_ads: Optional[bool] = None,
|
|
1288
1287
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
1289
|
-
extract: Optional[
|
|
1290
|
-
json_options: Optional[
|
|
1288
|
+
extract: Optional[JsonConfig] = None,
|
|
1289
|
+
json_options: Optional[JsonConfig] = None,
|
|
1291
1290
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
1292
1291
|
agent: Optional[AgentOptions] = None,
|
|
1293
1292
|
idempotency_key: Optional[str] = None,
|
|
@@ -1311,8 +1310,8 @@ class FirecrawlApp:
|
|
|
1311
1310
|
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
|
1312
1311
|
block_ads (Optional[bool]): Block advertisements
|
|
1313
1312
|
proxy (Optional[Literal]): Proxy type to use
|
|
1314
|
-
extract (Optional[
|
|
1315
|
-
json_options (Optional[
|
|
1313
|
+
extract (Optional[JsonConfig]): Content extraction config
|
|
1314
|
+
json_options (Optional[JsonConfig]): JSON extraction config
|
|
1316
1315
|
actions (Optional[List[Union]]): Actions to perform
|
|
1317
1316
|
agent (Optional[AgentOptions]): Agent configuration
|
|
1318
1317
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
|
@@ -1408,8 +1407,8 @@ class FirecrawlApp:
|
|
|
1408
1407
|
remove_base64_images: Optional[bool] = None,
|
|
1409
1408
|
block_ads: Optional[bool] = None,
|
|
1410
1409
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
1411
|
-
extract: Optional[
|
|
1412
|
-
json_options: Optional[
|
|
1410
|
+
extract: Optional[JsonConfig] = None,
|
|
1411
|
+
json_options: Optional[JsonConfig] = None,
|
|
1413
1412
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
1414
1413
|
agent: Optional[AgentOptions] = None,
|
|
1415
1414
|
idempotency_key: Optional[str] = None,
|
|
@@ -1433,8 +1432,8 @@ class FirecrawlApp:
|
|
|
1433
1432
|
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
|
1434
1433
|
block_ads (Optional[bool]): Block advertisements
|
|
1435
1434
|
proxy (Optional[Literal]): Proxy type to use
|
|
1436
|
-
extract (Optional[
|
|
1437
|
-
json_options (Optional[
|
|
1435
|
+
extract (Optional[JsonConfig]): Content extraction config
|
|
1436
|
+
json_options (Optional[JsonConfig]): JSON extraction config
|
|
1438
1437
|
actions (Optional[List[Union]]): Actions to perform
|
|
1439
1438
|
agent (Optional[AgentOptions]): Agent configuration
|
|
1440
1439
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
|
@@ -2707,8 +2706,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2707
2706
|
remove_base64_images: Optional[bool] = None,
|
|
2708
2707
|
block_ads: Optional[bool] = None,
|
|
2709
2708
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
2710
|
-
extract: Optional[
|
|
2711
|
-
json_options: Optional[
|
|
2709
|
+
extract: Optional[JsonConfig] = None,
|
|
2710
|
+
json_options: Optional[JsonConfig] = None,
|
|
2712
2711
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None) -> ScrapeResponse[Any]:
|
|
2713
2712
|
"""
|
|
2714
2713
|
Scrape and extract content from a URL asynchronously.
|
|
@@ -2727,8 +2726,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2727
2726
|
remove_base64_images (Optional[bool]): Remove base64 images
|
|
2728
2727
|
block_ads (Optional[bool]): Block ads
|
|
2729
2728
|
proxy (Optional[Literal["basic", "stealth"]]): Proxy type (basic/stealth)
|
|
2730
|
-
extract (Optional[
|
|
2731
|
-
json_options (Optional[
|
|
2729
|
+
extract (Optional[JsonConfig]): Content extraction settings
|
|
2730
|
+
json_options (Optional[JsonConfig]): JSON extraction settings
|
|
2732
2731
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
|
2733
2732
|
|
|
2734
2733
|
Returns:
|
|
@@ -2821,8 +2820,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2821
2820
|
remove_base64_images: Optional[bool] = None,
|
|
2822
2821
|
block_ads: Optional[bool] = None,
|
|
2823
2822
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
2824
|
-
extract: Optional[
|
|
2825
|
-
json_options: Optional[
|
|
2823
|
+
extract: Optional[JsonConfig] = None,
|
|
2824
|
+
json_options: Optional[JsonConfig] = None,
|
|
2826
2825
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
2827
2826
|
agent: Optional[AgentOptions] = None,
|
|
2828
2827
|
poll_interval: Optional[int] = 2,
|
|
@@ -2847,8 +2846,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2847
2846
|
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
|
2848
2847
|
block_ads (Optional[bool]): Block advertisements
|
|
2849
2848
|
proxy (Optional[Literal]): Proxy type to use
|
|
2850
|
-
extract (Optional[
|
|
2851
|
-
json_options (Optional[
|
|
2849
|
+
extract (Optional[JsonConfig]): Content extraction config
|
|
2850
|
+
json_options (Optional[JsonConfig]): JSON extraction config
|
|
2852
2851
|
actions (Optional[List[Union]]): Actions to perform
|
|
2853
2852
|
agent (Optional[AgentOptions]): Agent configuration
|
|
2854
2853
|
poll_interval (Optional[int]): Seconds between status checks (default: 2)
|
|
@@ -2950,8 +2949,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2950
2949
|
remove_base64_images: Optional[bool] = None,
|
|
2951
2950
|
block_ads: Optional[bool] = None,
|
|
2952
2951
|
proxy: Optional[Literal["basic", "stealth"]] = None,
|
|
2953
|
-
extract: Optional[
|
|
2954
|
-
json_options: Optional[
|
|
2952
|
+
extract: Optional[JsonConfig] = None,
|
|
2953
|
+
json_options: Optional[JsonConfig] = None,
|
|
2955
2954
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
2956
2955
|
agent: Optional[AgentOptions] = None,
|
|
2957
2956
|
idempotency_key: Optional[str] = None,
|
|
@@ -2975,8 +2974,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
2975
2974
|
remove_base64_images (Optional[bool]): Remove base64 encoded images
|
|
2976
2975
|
block_ads (Optional[bool]): Block advertisements
|
|
2977
2976
|
proxy (Optional[Literal]): Proxy type to use
|
|
2978
|
-
extract (Optional[
|
|
2979
|
-
json_options (Optional[
|
|
2977
|
+
extract (Optional[JsonConfig]): Content extraction config
|
|
2978
|
+
json_options (Optional[JsonConfig]): JSON extraction config
|
|
2980
2979
|
actions (Optional[List[Union]]): Actions to perform
|
|
2981
2980
|
agent (Optional[AgentOptions]): Agent configuration
|
|
2982
2981
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=
|
|
2
|
-
firecrawl/firecrawl.py,sha256=
|
|
1
|
+
firecrawl/__init__.py,sha256=xqJCUbvvJi2ruXSmRS3Q9V7uZ0CpjK8gUFheUx0qXow,2555
|
|
2
|
+
firecrawl/firecrawl.py,sha256=k_yRUlYssszubZuZZ0rMOyOJsRb7kDTxo5JZPEc8yJY,176255
|
|
3
3
|
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
|
|
5
5
|
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
|
|
7
7
|
tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
|
|
8
|
-
firecrawl-2.0.
|
|
9
|
-
firecrawl-2.0.
|
|
10
|
-
firecrawl-2.0.
|
|
11
|
-
firecrawl-2.0.
|
|
12
|
-
firecrawl-2.0.
|
|
8
|
+
firecrawl-2.0.2.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
9
|
+
firecrawl-2.0.2.dist-info/METADATA,sha256=IVvPFtJZ9dMCNAKbI0dT9VpaC2RAZgE0iFUd_Gr_K6c,10583
|
|
10
|
+
firecrawl-2.0.2.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
11
|
+
firecrawl-2.0.2.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
|
|
12
|
+
firecrawl-2.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|