firecrawl 2.11.0__py3-none-any.whl → 2.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +1 -1
- firecrawl/firecrawl.py +20 -0
- {firecrawl-2.11.0.dist-info → firecrawl-2.12.0.dist-info}/METADATA +1 -1
- {firecrawl-2.11.0.dist-info → firecrawl-2.12.0.dist-info}/RECORD +7 -7
- {firecrawl-2.11.0.dist-info → firecrawl-2.12.0.dist-info}/LICENSE +0 -0
- {firecrawl-2.11.0.dist-info → firecrawl-2.12.0.dist-info}/WHEEL +0 -0
- {firecrawl-2.11.0.dist-info → firecrawl-2.12.0.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
|
@@ -13,7 +13,7 @@ import os
|
|
|
13
13
|
|
|
14
14
|
from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
|
15
15
|
|
|
16
|
-
__version__ = "2.
|
|
16
|
+
__version__ = "2.12.0"
|
|
17
17
|
|
|
18
18
|
# Define the logger for the Firecrawl project
|
|
19
19
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
firecrawl/firecrawl.py
CHANGED
|
@@ -273,6 +273,7 @@ class CrawlParams(pydantic.BaseModel):
|
|
|
273
273
|
regexOnFullURL: Optional[bool] = None
|
|
274
274
|
delay: Optional[int] = None # Delay in seconds between scrapes
|
|
275
275
|
maxConcurrency: Optional[int] = None
|
|
276
|
+
allowSubdomains: Optional[bool] = None
|
|
276
277
|
|
|
277
278
|
class CrawlResponse(pydantic.BaseModel):
|
|
278
279
|
"""Response from crawling operations."""
|
|
@@ -708,6 +709,7 @@ class FirecrawlApp:
|
|
|
708
709
|
ignore_query_parameters: Optional[bool] = None,
|
|
709
710
|
regex_on_full_url: Optional[bool] = None,
|
|
710
711
|
delay: Optional[int] = None,
|
|
712
|
+
allow_subdomains: Optional[bool] = None,
|
|
711
713
|
max_concurrency: Optional[int] = None,
|
|
712
714
|
poll_interval: Optional[int] = 2,
|
|
713
715
|
idempotency_key: Optional[str] = None,
|
|
@@ -733,6 +735,7 @@ class FirecrawlApp:
|
|
|
733
735
|
ignore_query_parameters (Optional[bool]): Ignore URL parameters
|
|
734
736
|
regex_on_full_url (Optional[bool]): Apply regex to full URLs
|
|
735
737
|
delay (Optional[int]): Delay in seconds between scrapes
|
|
738
|
+
allow_subdomains (Optional[bool]): Follow subdomains
|
|
736
739
|
max_concurrency (Optional[int]): Maximum number of concurrent scrapes
|
|
737
740
|
poll_interval (Optional[int]): Seconds between status checks (default: 2)
|
|
738
741
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
|
@@ -783,6 +786,8 @@ class FirecrawlApp:
|
|
|
783
786
|
crawl_params['regexOnFullURL'] = regex_on_full_url
|
|
784
787
|
if delay is not None:
|
|
785
788
|
crawl_params['delay'] = delay
|
|
789
|
+
if allow_subdomains is not None:
|
|
790
|
+
crawl_params['allowSubdomains'] = allow_subdomains
|
|
786
791
|
if max_concurrency is not None:
|
|
787
792
|
crawl_params['maxConcurrency'] = max_concurrency
|
|
788
793
|
|
|
@@ -827,6 +832,8 @@ class FirecrawlApp:
|
|
|
827
832
|
ignore_query_parameters: Optional[bool] = None,
|
|
828
833
|
regex_on_full_url: Optional[bool] = None,
|
|
829
834
|
delay: Optional[int] = None,
|
|
835
|
+
allow_subdomains: Optional[bool] = None,
|
|
836
|
+
max_concurrency: Optional[int] = None,
|
|
830
837
|
idempotency_key: Optional[str] = None,
|
|
831
838
|
**kwargs
|
|
832
839
|
) -> CrawlResponse:
|
|
@@ -850,6 +857,7 @@ class FirecrawlApp:
|
|
|
850
857
|
ignore_query_parameters (Optional[bool]): Ignore URL parameters
|
|
851
858
|
regex_on_full_url (Optional[bool]): Apply regex to full URLs
|
|
852
859
|
delay (Optional[int]): Delay in seconds between scrapes
|
|
860
|
+
allow_subdomains (Optional[bool]): Follow subdomains
|
|
853
861
|
max_concurrency (Optional[int]): Maximum number of concurrent scrapes
|
|
854
862
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
|
855
863
|
**kwargs: Additional parameters to pass to the API
|
|
@@ -900,6 +908,8 @@ class FirecrawlApp:
|
|
|
900
908
|
crawl_params['regexOnFullURL'] = regex_on_full_url
|
|
901
909
|
if delay is not None:
|
|
902
910
|
crawl_params['delay'] = delay
|
|
911
|
+
if allow_subdomains is not None:
|
|
912
|
+
crawl_params['allowSubdomains'] = allow_subdomains
|
|
903
913
|
if max_concurrency is not None:
|
|
904
914
|
crawl_params['maxConcurrency'] = max_concurrency
|
|
905
915
|
|
|
@@ -1080,6 +1090,7 @@ class FirecrawlApp:
|
|
|
1080
1090
|
ignore_query_parameters: Optional[bool] = None,
|
|
1081
1091
|
regex_on_full_url: Optional[bool] = None,
|
|
1082
1092
|
delay: Optional[int] = None,
|
|
1093
|
+
allow_subdomains: Optional[bool] = None,
|
|
1083
1094
|
max_concurrency: Optional[int] = None,
|
|
1084
1095
|
idempotency_key: Optional[str] = None,
|
|
1085
1096
|
**kwargs
|
|
@@ -1104,6 +1115,7 @@ class FirecrawlApp:
|
|
|
1104
1115
|
ignore_query_parameters (Optional[bool]): Ignore URL parameters
|
|
1105
1116
|
regex_on_full_url (Optional[bool]): Apply regex to full URLs
|
|
1106
1117
|
delay (Optional[int]): Delay in seconds between scrapes
|
|
1118
|
+
allow_subdomains (Optional[bool]): Follow subdomains
|
|
1107
1119
|
max_concurrency (Optional[int]): Maximum number of concurrent scrapes
|
|
1108
1120
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
|
1109
1121
|
**kwargs: Additional parameters to pass to the API
|
|
@@ -1130,6 +1142,7 @@ class FirecrawlApp:
|
|
|
1130
1142
|
ignore_query_parameters=ignore_query_parameters,
|
|
1131
1143
|
regex_on_full_url=regex_on_full_url,
|
|
1132
1144
|
delay=delay,
|
|
1145
|
+
allow_subdomains=allow_subdomains,
|
|
1133
1146
|
max_concurrency=max_concurrency,
|
|
1134
1147
|
idempotency_key=idempotency_key,
|
|
1135
1148
|
**kwargs
|
|
@@ -3325,6 +3338,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3325
3338
|
ignore_query_parameters: Optional[bool] = None,
|
|
3326
3339
|
regex_on_full_url: Optional[bool] = None,
|
|
3327
3340
|
delay: Optional[int] = None,
|
|
3341
|
+
allow_subdomains: Optional[bool] = None,
|
|
3328
3342
|
poll_interval: Optional[int] = 2,
|
|
3329
3343
|
idempotency_key: Optional[str] = None,
|
|
3330
3344
|
**kwargs
|
|
@@ -3349,6 +3363,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3349
3363
|
ignore_query_parameters (Optional[bool]): Ignore URL parameters
|
|
3350
3364
|
regex_on_full_url (Optional[bool]): Apply regex to full URLs
|
|
3351
3365
|
delay (Optional[int]): Delay in seconds between scrapes
|
|
3366
|
+
allow_subdomains (Optional[bool]): Follow subdomains
|
|
3352
3367
|
poll_interval (Optional[int]): Seconds between status checks (default: 2)
|
|
3353
3368
|
idempotency_key (Optional[str]): Unique key to prevent duplicate requests
|
|
3354
3369
|
**kwargs: Additional parameters to pass to the API
|
|
@@ -3398,6 +3413,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3398
3413
|
crawl_params['regexOnFullURL'] = regex_on_full_url
|
|
3399
3414
|
if delay is not None:
|
|
3400
3415
|
crawl_params['delay'] = delay
|
|
3416
|
+
if allow_subdomains is not None:
|
|
3417
|
+
crawl_params['allowSubdomains'] = allow_subdomains
|
|
3401
3418
|
|
|
3402
3419
|
# Add any additional kwargs
|
|
3403
3420
|
crawl_params.update(kwargs)
|
|
@@ -3441,6 +3458,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3441
3458
|
ignore_query_parameters: Optional[bool] = None,
|
|
3442
3459
|
regex_on_full_url: Optional[bool] = None,
|
|
3443
3460
|
delay: Optional[int] = None,
|
|
3461
|
+
allow_subdomains: Optional[bool] = None,
|
|
3444
3462
|
poll_interval: Optional[int] = 2,
|
|
3445
3463
|
idempotency_key: Optional[str] = None,
|
|
3446
3464
|
**kwargs
|
|
@@ -3510,6 +3528,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
|
|
|
3510
3528
|
crawl_params['regexOnFullURL'] = regex_on_full_url
|
|
3511
3529
|
if delay is not None:
|
|
3512
3530
|
crawl_params['delay'] = delay
|
|
3531
|
+
if allow_subdomains is not None:
|
|
3532
|
+
crawl_params['allowSubdomains'] = allow_subdomains
|
|
3513
3533
|
|
|
3514
3534
|
# Add any additional kwargs
|
|
3515
3535
|
crawl_params.update(kwargs)
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=
|
|
2
|
-
firecrawl/firecrawl.py,sha256=
|
|
1
|
+
firecrawl/__init__.py,sha256=5Sj24ou9gppD7_rOTf_L3RUWHP-Woz_YUD9NA2wEhKc,2613
|
|
2
|
+
firecrawl/firecrawl.py,sha256=IT2CI7V-GeD64K4lH2f7f6vxy5znzQ8cdq8mfAJYn34,195396
|
|
3
3
|
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
|
|
5
5
|
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=k9IsEbdTHL9Cu49M4FpnQDEo2rnG6RqwmZAsK_EVJr4,21069
|
|
7
7
|
tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
|
|
8
|
-
firecrawl-2.
|
|
9
|
-
firecrawl-2.
|
|
10
|
-
firecrawl-2.
|
|
11
|
-
firecrawl-2.
|
|
12
|
-
firecrawl-2.
|
|
8
|
+
firecrawl-2.12.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
9
|
+
firecrawl-2.12.0.dist-info/METADATA,sha256=EFAoKU-yJrZJcxvmibVF9pK-PeAY0qoFBNyzAS8pOpQ,7166
|
|
10
|
+
firecrawl-2.12.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
11
|
+
firecrawl-2.12.0.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
|
|
12
|
+
firecrawl-2.12.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|