firecrawl 4.3.2__py3-none-any.whl → 4.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +1 -1
- firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +1 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +1 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +1 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +1 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +1 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +1 -0
- firecrawl/__tests__/e2e/v2/test_batch_scrape.py +1 -0
- firecrawl/__tests__/e2e/v2/test_crawl.py +4 -2
- firecrawl/__tests__/e2e/v2/test_extract.py +1 -0
- firecrawl/__tests__/e2e/v2/test_map.py +1 -0
- firecrawl/__tests__/e2e/v2/test_scrape.py +1 -0
- firecrawl/__tests__/e2e/v2/test_search.py +1 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +18 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +2 -1
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +3 -2
- firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +2 -2
- firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +3 -2
- firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +18 -1
- firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +4 -2
- firecrawl/types.py +1 -3
- firecrawl/v2/client.py +20 -6
- firecrawl/v2/client_async.py +7 -1
- firecrawl/v2/methods/aio/batch.py +3 -1
- firecrawl/v2/methods/aio/crawl.py +2 -0
- firecrawl/v2/methods/aio/extract.py +7 -0
- firecrawl/v2/methods/aio/map.py +2 -0
- firecrawl/v2/methods/aio/search.py +5 -1
- firecrawl/v2/methods/batch.py +1 -1
- firecrawl/v2/methods/crawl.py +3 -0
- firecrawl/v2/methods/extract.py +7 -0
- firecrawl/v2/methods/map.py +2 -0
- firecrawl/v2/methods/search.py +4 -1
- firecrawl/v2/types.py +50 -0
- firecrawl/v2/utils/validation.py +3 -0
- {firecrawl-4.3.2.dist-info → firecrawl-4.3.4.dist-info}/METADATA +1 -1
- {firecrawl-4.3.2.dist-info → firecrawl-4.3.4.dist-info}/RECORD +40 -40
- {firecrawl-4.3.2.dist-info → firecrawl-4.3.4.dist-info}/WHEEL +0 -0
- {firecrawl-4.3.2.dist-info → firecrawl-4.3.4.dist-info}/licenses/LICENSE +0 -0
- {firecrawl-4.3.2.dist-info → firecrawl-4.3.4.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
|
@@ -32,6 +32,7 @@ async def test_async_extract_with_schema_and_options():
|
|
|
32
32
|
allow_external_links=False,
|
|
33
33
|
enable_web_search=False,
|
|
34
34
|
show_sources=False,
|
|
35
|
+
integration="_e2e-test",
|
|
35
36
|
# agent={"model": "FIRE-1", "prompt": "Extract title"}, # Skipping agent test in CI
|
|
36
37
|
)
|
|
37
38
|
assert res is not None
|
|
@@ -168,7 +168,8 @@ class TestCrawlE2E:
|
|
|
168
168
|
limit=3,
|
|
169
169
|
max_discovery_depth=2,
|
|
170
170
|
poll_interval=1,
|
|
171
|
-
timeout=120
|
|
171
|
+
timeout=120,
|
|
172
|
+
integration="_e2e-test",
|
|
172
173
|
)
|
|
173
174
|
|
|
174
175
|
assert crawl_job.status in ["completed", "failed"]
|
|
@@ -257,7 +258,8 @@ class TestCrawlE2E:
|
|
|
257
258
|
max_concurrency=2,
|
|
258
259
|
webhook="https://example.com/hook",
|
|
259
260
|
scrape_options=scrape_opts,
|
|
260
|
-
zero_data_retention=False
|
|
261
|
+
zero_data_retention=False,
|
|
262
|
+
integration="_e2e-test",
|
|
261
263
|
)
|
|
262
264
|
|
|
263
265
|
assert crawl_job.id is not None
|
|
@@ -59,3 +59,21 @@ class TestAsyncCrawlRequestPreparation:
|
|
|
59
59
|
assert "metadata" not in webhook
|
|
60
60
|
assert "events" not in webhook
|
|
61
61
|
|
|
62
|
+
def test_all_fields_including_integration(self):
|
|
63
|
+
req = CrawlRequest(
|
|
64
|
+
url="https://example.com",
|
|
65
|
+
include_paths=["/docs/*"],
|
|
66
|
+
exclude_paths=["/admin/*"],
|
|
67
|
+
max_discovery_depth=2,
|
|
68
|
+
sitemap="include",
|
|
69
|
+
ignore_query_parameters=True,
|
|
70
|
+
crawl_entire_domain=False,
|
|
71
|
+
allow_external_links=True,
|
|
72
|
+
allow_subdomains=True,
|
|
73
|
+
max_concurrency=3,
|
|
74
|
+
zero_data_retention=False,
|
|
75
|
+
integration=" _unit-test ",
|
|
76
|
+
)
|
|
77
|
+
payload = _prepare_crawl_request(req)
|
|
78
|
+
assert payload["integration"] == "_unit-test"
|
|
79
|
+
|
|
@@ -9,11 +9,12 @@ class TestAsyncMapRequestPreparation:
|
|
|
9
9
|
assert payload["url"] == "https://example.com"
|
|
10
10
|
|
|
11
11
|
def test_fields(self):
|
|
12
|
-
opts = MapOptions(search="docs", include_subdomains=True, limit=10, sitemap="only", timeout=15000)
|
|
12
|
+
opts = MapOptions(search="docs", include_subdomains=True, limit=10, sitemap="only", timeout=15000, integration=" _unit-test ")
|
|
13
13
|
payload = _prepare_map_request("https://example.com", opts)
|
|
14
14
|
assert payload["search"] == "docs"
|
|
15
15
|
assert payload["includeSubdomains"] is True
|
|
16
16
|
assert payload["limit"] == 10
|
|
17
17
|
assert payload["sitemap"] == "only"
|
|
18
18
|
assert payload["timeout"] == 15000
|
|
19
|
+
assert payload["integration"] == "_unit-test"
|
|
19
20
|
|
|
@@ -33,10 +33,12 @@ class TestAsyncSearchRequestPreparation:
|
|
|
33
33
|
ignore_invalid_urls=False,
|
|
34
34
|
timeout=30000,
|
|
35
35
|
scrape_options=scrape_opts,
|
|
36
|
+
integration=" _unit-test ",
|
|
36
37
|
)
|
|
37
38
|
data = _prepare_search_request(request)
|
|
38
39
|
assert data["ignoreInvalidURLs"] is False
|
|
39
40
|
assert "scrapeOptions" in data
|
|
41
|
+
assert data["integration"] == "_unit-test"
|
|
40
42
|
|
|
41
43
|
def test_exclude_none_behavior(self):
|
|
42
44
|
request = SearchRequest(
|
|
@@ -59,5 +61,4 @@ class TestAsyncSearchRequestPreparation:
|
|
|
59
61
|
assert "scrapeOptions" in data
|
|
60
62
|
scrape_data = data["scrapeOptions"]
|
|
61
63
|
assert "onlyMainContent" in scrape_data
|
|
62
|
-
assert "mobile" in scrape_data
|
|
63
|
-
|
|
64
|
+
assert "mobile" in scrape_data
|
|
@@ -76,14 +76,14 @@ class TestBatchScrapeRequestPreparation:
|
|
|
76
76
|
ignore_invalid_urls=True,
|
|
77
77
|
max_concurrency=5,
|
|
78
78
|
zero_data_retention=True,
|
|
79
|
-
integration="test",
|
|
79
|
+
integration="_unit-test",
|
|
80
80
|
)
|
|
81
81
|
assert isinstance(data["webhook"], dict) and data["webhook"]["url"] == "https://hook.test"
|
|
82
82
|
assert data["appendToId"] == "00000000-0000-0000-0000-000000000000"
|
|
83
83
|
assert data["ignoreInvalidURLs"] is True
|
|
84
84
|
assert data["maxConcurrency"] == 5
|
|
85
85
|
assert data["zeroDataRetention"] is True
|
|
86
|
-
assert data["integration"] == "test"
|
|
86
|
+
assert data["integration"] == "_unit-test"
|
|
87
87
|
|
|
88
88
|
def test_string_webhook_is_passed_verbatim(self):
|
|
89
89
|
data = prepare_batch_scrape_request(["https://example.com"], webhook="https://hook.simple")
|
|
@@ -35,6 +35,7 @@ class TestMapRequestPreparation:
|
|
|
35
35
|
limit=25,
|
|
36
36
|
sitemap="only",
|
|
37
37
|
timeout=15000,
|
|
38
|
+
integration=" _unit-test ",
|
|
38
39
|
)
|
|
39
40
|
data = _prepare_map_request("https://example.com", opts)
|
|
40
41
|
|
|
@@ -44,10 +45,10 @@ class TestMapRequestPreparation:
|
|
|
44
45
|
assert data["limit"] == 25
|
|
45
46
|
assert data["sitemap"] == "only"
|
|
46
47
|
assert data["timeout"] == 15000
|
|
48
|
+
assert data["integration"] == "_unit-test"
|
|
47
49
|
|
|
48
50
|
def test_invalid_url(self):
|
|
49
51
|
with pytest.raises(ValueError):
|
|
50
52
|
_prepare_map_request("")
|
|
51
53
|
with pytest.raises(ValueError):
|
|
52
|
-
_prepare_map_request(" ")
|
|
53
|
-
|
|
54
|
+
_prepare_map_request(" ")
|
|
@@ -89,4 +89,21 @@ class TestScrapeRequestPreparation:
|
|
|
89
89
|
def test_whitespace_url_validation(self):
|
|
90
90
|
"""Test validation with whitespace-only URL."""
|
|
91
91
|
with pytest.raises(ValueError, match="URL cannot be empty"):
|
|
92
|
-
_prepare_scrape_request(" ")
|
|
92
|
+
_prepare_scrape_request(" ")
|
|
93
|
+
|
|
94
|
+
def test_all_params_including_integration(self):
|
|
95
|
+
opts = ScrapeOptions(
|
|
96
|
+
formats=["markdown"],
|
|
97
|
+
headers={"User-Agent": "Test"},
|
|
98
|
+
include_tags=["h1"],
|
|
99
|
+
exclude_tags=["nav"],
|
|
100
|
+
only_main_content=False,
|
|
101
|
+
timeout=15000,
|
|
102
|
+
wait_for=2000,
|
|
103
|
+
mobile=True,
|
|
104
|
+
skip_tls_verification=True,
|
|
105
|
+
remove_base64_images=False,
|
|
106
|
+
integration=" _unit-test ",
|
|
107
|
+
)
|
|
108
|
+
data = _prepare_scrape_request("https://example.com", opts)
|
|
109
|
+
assert data["integration"] == "_unit-test"
|
|
@@ -43,7 +43,8 @@ class TestSearchRequestPreparation:
|
|
|
43
43
|
location="US",
|
|
44
44
|
ignore_invalid_urls=False,
|
|
45
45
|
timeout=30000,
|
|
46
|
-
scrape_options=scrape_opts
|
|
46
|
+
scrape_options=scrape_opts,
|
|
47
|
+
integration=" _e2e-test ",
|
|
47
48
|
)
|
|
48
49
|
|
|
49
50
|
data = _prepare_search_request(request)
|
|
@@ -83,6 +84,7 @@ class TestSearchRequestPreparation:
|
|
|
83
84
|
assert scrape_data["skipTlsVerification"] is True
|
|
84
85
|
assert "removeBase64Images" in scrape_data
|
|
85
86
|
assert scrape_data["removeBase64Images"] is False
|
|
87
|
+
assert data["integration"] == "_e2e-test"
|
|
86
88
|
|
|
87
89
|
def test_exclude_none_behavior(self):
|
|
88
90
|
"""Test that exclude_none=True behavior is working."""
|
|
@@ -164,4 +166,4 @@ class TestSearchRequestPreparation:
|
|
|
164
166
|
assert "only_main_content" not in scrape_data
|
|
165
167
|
assert "wait_for" not in scrape_data
|
|
166
168
|
assert "skip_tls_verification" not in scrape_data
|
|
167
|
-
assert "remove_base64_images" not in scrape_data
|
|
169
|
+
assert "remove_base64_images" not in scrape_data
|
firecrawl/types.py
CHANGED
|
@@ -52,7 +52,6 @@ from .v2.types import (
|
|
|
52
52
|
SearchResultNews,
|
|
53
53
|
SearchResultImages,
|
|
54
54
|
SearchData,
|
|
55
|
-
SearchResponse,
|
|
56
55
|
|
|
57
56
|
# Action types
|
|
58
57
|
WaitAction,
|
|
@@ -133,7 +132,6 @@ __all__ = [
|
|
|
133
132
|
'SearchResultNews',
|
|
134
133
|
'SearchResultImages',
|
|
135
134
|
'SearchData',
|
|
136
|
-
'SearchResponse',
|
|
137
135
|
|
|
138
136
|
# Action types
|
|
139
137
|
'WaitAction',
|
|
@@ -164,4 +162,4 @@ __all__ = [
|
|
|
164
162
|
|
|
165
163
|
# Configuration types
|
|
166
164
|
'ClientConfig',
|
|
167
|
-
]
|
|
165
|
+
]
|
firecrawl/v2/client.py
CHANGED
|
@@ -117,6 +117,7 @@ class FirecrawlClient:
|
|
|
117
117
|
proxy: Optional[str] = None,
|
|
118
118
|
max_age: Optional[int] = None,
|
|
119
119
|
store_in_cache: Optional[bool] = None,
|
|
120
|
+
integration: Optional[str] = None,
|
|
120
121
|
) -> Document:
|
|
121
122
|
"""
|
|
122
123
|
Scrape a single URL and return the document.
|
|
@@ -165,8 +166,9 @@ class FirecrawlClient:
|
|
|
165
166
|
proxy=proxy,
|
|
166
167
|
max_age=max_age,
|
|
167
168
|
store_in_cache=store_in_cache,
|
|
169
|
+
integration=integration,
|
|
168
170
|
).items() if v is not None}
|
|
169
|
-
) if any(v is not None for v in [formats, headers, include_tags, exclude_tags, only_main_content, timeout, wait_for, mobile, parsers, actions, location, skip_tls_verification, remove_base64_images, fast_mode, use_mock, block_ads, proxy, max_age, store_in_cache]) else None
|
|
171
|
+
) if any(v is not None for v in [formats, headers, include_tags, exclude_tags, only_main_content, timeout, wait_for, mobile, parsers, actions, location, skip_tls_verification, remove_base64_images, fast_mode, use_mock, block_ads, proxy, max_age, store_in_cache, integration]) else None
|
|
170
172
|
return scrape_module.scrape(self.http_client, url, options)
|
|
171
173
|
|
|
172
174
|
def search(
|
|
@@ -181,6 +183,7 @@ class FirecrawlClient:
|
|
|
181
183
|
ignore_invalid_urls: Optional[bool] = None,
|
|
182
184
|
timeout: Optional[int] = None,
|
|
183
185
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
186
|
+
integration: Optional[str] = None,
|
|
184
187
|
) -> SearchData:
|
|
185
188
|
"""
|
|
186
189
|
Search for documents.
|
|
@@ -206,6 +209,7 @@ class FirecrawlClient:
|
|
|
206
209
|
ignore_invalid_urls=ignore_invalid_urls,
|
|
207
210
|
timeout=timeout,
|
|
208
211
|
scrape_options=scrape_options,
|
|
212
|
+
integration=integration,
|
|
209
213
|
)
|
|
210
214
|
|
|
211
215
|
return search_module.search(self.http_client, request)
|
|
@@ -230,7 +234,8 @@ class FirecrawlClient:
|
|
|
230
234
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
231
235
|
zero_data_retention: bool = False,
|
|
232
236
|
poll_interval: int = 2,
|
|
233
|
-
timeout: Optional[int] = None
|
|
237
|
+
timeout: Optional[int] = None,
|
|
238
|
+
integration: Optional[str] = None,
|
|
234
239
|
) -> CrawlJob:
|
|
235
240
|
"""
|
|
236
241
|
Start a crawl job and wait for it to complete.
|
|
@@ -279,7 +284,8 @@ class FirecrawlClient:
|
|
|
279
284
|
max_concurrency=max_concurrency,
|
|
280
285
|
webhook=webhook,
|
|
281
286
|
scrape_options=scrape_options,
|
|
282
|
-
zero_data_retention=zero_data_retention
|
|
287
|
+
zero_data_retention=zero_data_retention,
|
|
288
|
+
integration=integration,
|
|
283
289
|
)
|
|
284
290
|
|
|
285
291
|
return crawl_module.crawl(
|
|
@@ -307,7 +313,8 @@ class FirecrawlClient:
|
|
|
307
313
|
max_concurrency: Optional[int] = None,
|
|
308
314
|
webhook: Optional[Union[str, WebhookConfig]] = None,
|
|
309
315
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
310
|
-
zero_data_retention: bool = False
|
|
316
|
+
zero_data_retention: bool = False,
|
|
317
|
+
integration: Optional[str] = None,
|
|
311
318
|
) -> CrawlResponse:
|
|
312
319
|
"""
|
|
313
320
|
Start an asynchronous crawl job.
|
|
@@ -353,7 +360,8 @@ class FirecrawlClient:
|
|
|
353
360
|
max_concurrency=max_concurrency,
|
|
354
361
|
webhook=webhook,
|
|
355
362
|
scrape_options=scrape_options,
|
|
356
|
-
zero_data_retention=zero_data_retention
|
|
363
|
+
zero_data_retention=zero_data_retention,
|
|
364
|
+
integration=integration,
|
|
357
365
|
)
|
|
358
366
|
|
|
359
367
|
return crawl_module.start_crawl(self.http_client, request)
|
|
@@ -421,6 +429,7 @@ class FirecrawlClient:
|
|
|
421
429
|
limit: Optional[int] = None,
|
|
422
430
|
sitemap: Optional[Literal["only", "include", "skip"]] = None,
|
|
423
431
|
timeout: Optional[int] = None,
|
|
432
|
+
integration: Optional[str] = None,
|
|
424
433
|
location: Optional[Location] = None,
|
|
425
434
|
) -> MapData:
|
|
426
435
|
"""Map a URL and return discovered links.
|
|
@@ -442,8 +451,9 @@ class FirecrawlClient:
|
|
|
442
451
|
limit=limit,
|
|
443
452
|
sitemap=sitemap if sitemap is not None else "include",
|
|
444
453
|
timeout=timeout,
|
|
454
|
+
integration=integration,
|
|
445
455
|
location=location
|
|
446
|
-
) if any(v is not None for v in [search, include_subdomains, limit, sitemap, timeout, location]) else None
|
|
456
|
+
) if any(v is not None for v in [search, include_subdomains, limit, sitemap, timeout, integration, location]) else None
|
|
447
457
|
|
|
448
458
|
return map_module.map(self.http_client, url, options)
|
|
449
459
|
|
|
@@ -484,6 +494,7 @@ class FirecrawlClient:
|
|
|
484
494
|
show_sources: Optional[bool] = None,
|
|
485
495
|
scrape_options: Optional['ScrapeOptions'] = None,
|
|
486
496
|
ignore_invalid_urls: Optional[bool] = None,
|
|
497
|
+
integration: Optional[str] = None,
|
|
487
498
|
):
|
|
488
499
|
"""Start an extract job (non-blocking).
|
|
489
500
|
|
|
@@ -512,6 +523,7 @@ class FirecrawlClient:
|
|
|
512
523
|
show_sources=show_sources,
|
|
513
524
|
scrape_options=scrape_options,
|
|
514
525
|
ignore_invalid_urls=ignore_invalid_urls,
|
|
526
|
+
integration=integration,
|
|
515
527
|
)
|
|
516
528
|
|
|
517
529
|
def extract(
|
|
@@ -528,6 +540,7 @@ class FirecrawlClient:
|
|
|
528
540
|
ignore_invalid_urls: Optional[bool] = None,
|
|
529
541
|
poll_interval: int = 2,
|
|
530
542
|
timeout: Optional[int] = None,
|
|
543
|
+
integration: Optional[str] = None,
|
|
531
544
|
):
|
|
532
545
|
"""Extract structured data and wait until completion.
|
|
533
546
|
|
|
@@ -560,6 +573,7 @@ class FirecrawlClient:
|
|
|
560
573
|
ignore_invalid_urls=ignore_invalid_urls,
|
|
561
574
|
poll_interval=poll_interval,
|
|
562
575
|
timeout=timeout,
|
|
576
|
+
integration=integration,
|
|
563
577
|
)
|
|
564
578
|
|
|
565
579
|
def start_batch_scrape(
|
firecrawl/v2/client_async.py
CHANGED
|
@@ -132,6 +132,7 @@ class AsyncFirecrawlClient:
|
|
|
132
132
|
limit: Optional[int] = None,
|
|
133
133
|
sitemap: Optional[Literal["only", "include", "skip"]] = None,
|
|
134
134
|
timeout: Optional[int] = None,
|
|
135
|
+
integration: Optional[str] = None,
|
|
135
136
|
) -> MapData:
|
|
136
137
|
options = MapOptions(
|
|
137
138
|
search=search,
|
|
@@ -139,7 +140,8 @@ class AsyncFirecrawlClient:
|
|
|
139
140
|
limit=limit,
|
|
140
141
|
sitemap=sitemap if sitemap is not None else "include",
|
|
141
142
|
timeout=timeout,
|
|
142
|
-
|
|
143
|
+
integration=integration,
|
|
144
|
+
) if any(v is not None for v in [search, include_subdomains, limit, sitemap, integration, timeout]) else None
|
|
143
145
|
return await async_map.map(self.async_http_client, url, options)
|
|
144
146
|
|
|
145
147
|
async def start_batch_scrape(self, urls: List[str], **kwargs) -> Any:
|
|
@@ -196,6 +198,7 @@ class AsyncFirecrawlClient:
|
|
|
196
198
|
ignore_invalid_urls: Optional[bool] = None,
|
|
197
199
|
poll_interval: int = 2,
|
|
198
200
|
timeout: Optional[int] = None,
|
|
201
|
+
integration: Optional[str] = None,
|
|
199
202
|
):
|
|
200
203
|
return await async_extract.extract(
|
|
201
204
|
self.async_http_client,
|
|
@@ -210,6 +213,7 @@ class AsyncFirecrawlClient:
|
|
|
210
213
|
ignore_invalid_urls=ignore_invalid_urls,
|
|
211
214
|
poll_interval=poll_interval,
|
|
212
215
|
timeout=timeout,
|
|
216
|
+
integration=integration,
|
|
213
217
|
)
|
|
214
218
|
|
|
215
219
|
async def get_extract_status(self, job_id: str):
|
|
@@ -227,6 +231,7 @@ class AsyncFirecrawlClient:
|
|
|
227
231
|
show_sources: Optional[bool] = None,
|
|
228
232
|
scrape_options: Optional['ScrapeOptions'] = None,
|
|
229
233
|
ignore_invalid_urls: Optional[bool] = None,
|
|
234
|
+
integration: Optional[str] = None,
|
|
230
235
|
):
|
|
231
236
|
return await async_extract.start_extract(
|
|
232
237
|
self.async_http_client,
|
|
@@ -239,6 +244,7 @@ class AsyncFirecrawlClient:
|
|
|
239
244
|
show_sources=show_sources,
|
|
240
245
|
scrape_options=scrape_options,
|
|
241
246
|
ignore_invalid_urls=ignore_invalid_urls,
|
|
247
|
+
integration=integration,
|
|
242
248
|
)
|
|
243
249
|
|
|
244
250
|
# Usage endpoints
|
|
@@ -26,7 +26,9 @@ def _prepare(urls: List[str], *, options: Optional[ScrapeOptions] = None, **kwar
|
|
|
26
26
|
if (v := kwargs.get("zero_data_retention")) is not None:
|
|
27
27
|
payload["zeroDataRetention"] = v
|
|
28
28
|
if (v := kwargs.get("integration")) is not None:
|
|
29
|
-
|
|
29
|
+
trimmed_integration = str(v).strip()
|
|
30
|
+
if trimmed_integration:
|
|
31
|
+
payload["integration"] = trimmed_integration
|
|
30
32
|
return payload
|
|
31
33
|
|
|
32
34
|
|
|
@@ -56,6 +56,8 @@ def _prepare_crawl_request(request: CrawlRequest) -> dict:
|
|
|
56
56
|
if snake in request_data:
|
|
57
57
|
data[camel] = request_data.pop(snake)
|
|
58
58
|
data.update(request_data)
|
|
59
|
+
if getattr(request, "integration", None) is not None:
|
|
60
|
+
data["integration"] = str(getattr(request, "integration")).strip()
|
|
59
61
|
return data
|
|
60
62
|
|
|
61
63
|
|
|
@@ -17,6 +17,7 @@ def _prepare_extract_request(
|
|
|
17
17
|
show_sources: Optional[bool] = None,
|
|
18
18
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
19
19
|
ignore_invalid_urls: Optional[bool] = None,
|
|
20
|
+
integration: Optional[str] = None,
|
|
20
21
|
) -> Dict[str, Any]:
|
|
21
22
|
body: Dict[str, Any] = {}
|
|
22
23
|
if urls is not None:
|
|
@@ -39,6 +40,8 @@ def _prepare_extract_request(
|
|
|
39
40
|
prepared = prepare_scrape_options(scrape_options)
|
|
40
41
|
if prepared:
|
|
41
42
|
body["scrapeOptions"] = prepared
|
|
43
|
+
if integration is not None and str(integration).strip():
|
|
44
|
+
body["integration"] = str(integration).strip()
|
|
42
45
|
return body
|
|
43
46
|
|
|
44
47
|
|
|
@@ -54,6 +57,7 @@ async def start_extract(
|
|
|
54
57
|
show_sources: Optional[bool] = None,
|
|
55
58
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
56
59
|
ignore_invalid_urls: Optional[bool] = None,
|
|
60
|
+
integration: Optional[str] = None,
|
|
57
61
|
) -> ExtractResponse:
|
|
58
62
|
body = _prepare_extract_request(
|
|
59
63
|
urls,
|
|
@@ -65,6 +69,7 @@ async def start_extract(
|
|
|
65
69
|
show_sources=show_sources,
|
|
66
70
|
scrape_options=scrape_options,
|
|
67
71
|
ignore_invalid_urls=ignore_invalid_urls,
|
|
72
|
+
integration=integration,
|
|
68
73
|
)
|
|
69
74
|
resp = await client.post("/v2/extract", body)
|
|
70
75
|
return ExtractResponse(**resp.json())
|
|
@@ -106,6 +111,7 @@ async def extract(
|
|
|
106
111
|
ignore_invalid_urls: Optional[bool] = None,
|
|
107
112
|
poll_interval: int = 2,
|
|
108
113
|
timeout: Optional[int] = None,
|
|
114
|
+
integration: Optional[str] = None,
|
|
109
115
|
) -> ExtractResponse:
|
|
110
116
|
started = await start_extract(
|
|
111
117
|
client,
|
|
@@ -118,6 +124,7 @@ async def extract(
|
|
|
118
124
|
show_sources=show_sources,
|
|
119
125
|
scrape_options=scrape_options,
|
|
120
126
|
ignore_invalid_urls=ignore_invalid_urls,
|
|
127
|
+
integration=integration,
|
|
121
128
|
)
|
|
122
129
|
job_id = getattr(started, "id", None)
|
|
123
130
|
if not job_id:
|
firecrawl/v2/methods/aio/map.py
CHANGED
|
@@ -20,6 +20,8 @@ def _prepare_map_request(url: str, options: Optional[MapOptions] = None) -> Dict
|
|
|
20
20
|
data["limit"] = options.limit
|
|
21
21
|
if options.timeout is not None:
|
|
22
22
|
data["timeout"] = options.timeout
|
|
23
|
+
if options.integration is not None:
|
|
24
|
+
data["integration"] = options.integration.strip()
|
|
23
25
|
if options.location is not None:
|
|
24
26
|
data["location"] = options.location.model_dump(exclude_none=True)
|
|
25
27
|
payload.update(data)
|
|
@@ -10,6 +10,7 @@ from ...types import (
|
|
|
10
10
|
)
|
|
11
11
|
from ...utils.http_client_async import AsyncHttpClient
|
|
12
12
|
from ...utils.error_handler import handle_response_error
|
|
13
|
+
from ...utils.normalize import normalize_document_input
|
|
13
14
|
from ...utils.validation import validate_scrape_options, prepare_scrape_options
|
|
14
15
|
|
|
15
16
|
T = TypeVar("T")
|
|
@@ -73,7 +74,7 @@ def _transform_array(arr: List[Any], result_type: Type[T]) -> List[Union[T, Docu
|
|
|
73
74
|
"summary" in item or
|
|
74
75
|
"json" in item
|
|
75
76
|
):
|
|
76
|
-
results.append(Document(**item))
|
|
77
|
+
results.append(Document(**normalize_document_input(item)))
|
|
77
78
|
else:
|
|
78
79
|
results.append(result_type(**item))
|
|
79
80
|
else:
|
|
@@ -168,5 +169,8 @@ def _prepare_search_request(request: SearchRequest) -> Dict[str, Any]:
|
|
|
168
169
|
if scrape_data:
|
|
169
170
|
data["scrapeOptions"] = scrape_data
|
|
170
171
|
data.pop("scrape_options", None)
|
|
172
|
+
|
|
173
|
+
if (v := getattr(validated_request, "integration", None)) is not None and str(v).strip():
|
|
174
|
+
data["integration"] = str(validated_request.integration).strip()
|
|
171
175
|
|
|
172
176
|
return data
|
firecrawl/v2/methods/batch.py
CHANGED
|
@@ -407,7 +407,7 @@ def prepare_batch_scrape_request(
|
|
|
407
407
|
if zero_data_retention is not None:
|
|
408
408
|
request_data["zeroDataRetention"] = zero_data_retention
|
|
409
409
|
if integration is not None:
|
|
410
|
-
request_data["integration"] = integration
|
|
410
|
+
request_data["integration"] = str(integration).strip()
|
|
411
411
|
|
|
412
412
|
return request_data
|
|
413
413
|
|
firecrawl/v2/methods/crawl.py
CHANGED
|
@@ -99,6 +99,9 @@ def _prepare_crawl_request(request: CrawlRequest) -> dict:
|
|
|
99
99
|
|
|
100
100
|
# Add any remaining fields that don't need conversion (like limit)
|
|
101
101
|
data.update(request_data)
|
|
102
|
+
# Trim integration if present
|
|
103
|
+
if "integration" in data and isinstance(data["integration"], str):
|
|
104
|
+
data["integration"] = data["integration"].strip()
|
|
102
105
|
|
|
103
106
|
return data
|
|
104
107
|
|
firecrawl/v2/methods/extract.py
CHANGED
|
@@ -18,6 +18,7 @@ def _prepare_extract_request(
|
|
|
18
18
|
show_sources: Optional[bool] = None,
|
|
19
19
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
20
20
|
ignore_invalid_urls: Optional[bool] = None,
|
|
21
|
+
integration: Optional[str] = None,
|
|
21
22
|
) -> Dict[str, Any]:
|
|
22
23
|
body: Dict[str, Any] = {}
|
|
23
24
|
if urls is not None:
|
|
@@ -40,6 +41,8 @@ def _prepare_extract_request(
|
|
|
40
41
|
prepared = prepare_scrape_options(scrape_options)
|
|
41
42
|
if prepared:
|
|
42
43
|
body["scrapeOptions"] = prepared
|
|
44
|
+
if integration is not None and str(integration).strip():
|
|
45
|
+
body["integration"] = str(integration).strip()
|
|
43
46
|
return body
|
|
44
47
|
|
|
45
48
|
|
|
@@ -55,6 +58,7 @@ def start_extract(
|
|
|
55
58
|
show_sources: Optional[bool] = None,
|
|
56
59
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
57
60
|
ignore_invalid_urls: Optional[bool] = None,
|
|
61
|
+
integration: Optional[str] = None,
|
|
58
62
|
) -> ExtractResponse:
|
|
59
63
|
body = _prepare_extract_request(
|
|
60
64
|
urls,
|
|
@@ -66,6 +70,7 @@ def start_extract(
|
|
|
66
70
|
show_sources=show_sources,
|
|
67
71
|
scrape_options=scrape_options,
|
|
68
72
|
ignore_invalid_urls=ignore_invalid_urls,
|
|
73
|
+
integration=integration,
|
|
69
74
|
)
|
|
70
75
|
resp = client.post("/v2/extract", body)
|
|
71
76
|
if not resp.ok:
|
|
@@ -111,6 +116,7 @@ def extract(
|
|
|
111
116
|
ignore_invalid_urls: Optional[bool] = None,
|
|
112
117
|
poll_interval: int = 2,
|
|
113
118
|
timeout: Optional[int] = None,
|
|
119
|
+
integration: Optional[str] = None,
|
|
114
120
|
) -> ExtractResponse:
|
|
115
121
|
started = start_extract(
|
|
116
122
|
client,
|
|
@@ -123,6 +129,7 @@ def extract(
|
|
|
123
129
|
show_sources=show_sources,
|
|
124
130
|
scrape_options=scrape_options,
|
|
125
131
|
ignore_invalid_urls=ignore_invalid_urls,
|
|
132
|
+
integration=integration,
|
|
126
133
|
)
|
|
127
134
|
job_id = getattr(started, "id", None)
|
|
128
135
|
if not job_id:
|
firecrawl/v2/methods/map.py
CHANGED
|
@@ -27,6 +27,8 @@ def _prepare_map_request(url: str, options: Optional[MapOptions] = None) -> Dict
|
|
|
27
27
|
data["limit"] = options.limit
|
|
28
28
|
if options.timeout is not None:
|
|
29
29
|
data["timeout"] = options.timeout
|
|
30
|
+
if options.integration is not None and options.integration.strip():
|
|
31
|
+
data["integration"] = options.integration.strip()
|
|
30
32
|
if options.location is not None:
|
|
31
33
|
data["location"] = options.location.model_dump(exclude_none=True)
|
|
32
34
|
payload.update(data)
|
firecrawl/v2/methods/search.py
CHANGED
|
@@ -71,7 +71,7 @@ def _transform_array(arr: List[Any], result_type: Type[T]) -> List[Union[T, 'Doc
|
|
|
71
71
|
"summary" in item or
|
|
72
72
|
"json" in item
|
|
73
73
|
):
|
|
74
|
-
results.append(Document(**item))
|
|
74
|
+
results.append(Document(**normalize_document_input(item)))
|
|
75
75
|
else:
|
|
76
76
|
results.append(result_type(**item))
|
|
77
77
|
else:
|
|
@@ -194,4 +194,7 @@ def _prepare_search_request(request: SearchRequest) -> Dict[str, Any]:
|
|
|
194
194
|
data["scrapeOptions"] = scrape_data
|
|
195
195
|
data.pop("scrape_options", None)
|
|
196
196
|
|
|
197
|
+
if (str(getattr(validated_request, "integration", "")).strip()):
|
|
198
|
+
data["integration"] = str(validated_request.integration).strip()
|
|
199
|
+
|
|
197
200
|
return data
|
firecrawl/v2/types.py
CHANGED
|
@@ -289,6 +289,7 @@ class ScrapeOptions(BaseModel):
|
|
|
289
289
|
proxy: Optional[Literal["basic", "stealth", "auto"]] = None
|
|
290
290
|
max_age: Optional[int] = None
|
|
291
291
|
store_in_cache: Optional[bool] = None
|
|
292
|
+
integration: Optional[str] = None
|
|
292
293
|
|
|
293
294
|
@field_validator('formats')
|
|
294
295
|
@classmethod
|
|
@@ -334,6 +335,7 @@ class CrawlRequest(BaseModel):
|
|
|
334
335
|
webhook: Optional[Union[str, WebhookConfig]] = None
|
|
335
336
|
scrape_options: Optional[ScrapeOptions] = None
|
|
336
337
|
zero_data_retention: bool = False
|
|
338
|
+
integration: Optional[str] = None
|
|
337
339
|
|
|
338
340
|
class CrawlResponse(BaseModel):
|
|
339
341
|
"""Information about a crawl job."""
|
|
@@ -350,6 +352,10 @@ class CrawlJob(BaseModel):
|
|
|
350
352
|
next: Optional[str] = None
|
|
351
353
|
data: List[Document] = []
|
|
352
354
|
|
|
355
|
+
class CrawlStatusRequest(BaseModel):
|
|
356
|
+
"""Request to get crawl job status."""
|
|
357
|
+
job_id: str
|
|
358
|
+
|
|
353
359
|
class SearchResultWeb(BaseModel):
|
|
354
360
|
"""A web search result with URL, title, and description."""
|
|
355
361
|
url: str
|
|
@@ -410,6 +416,7 @@ class CrawlParamsData(BaseModel):
|
|
|
410
416
|
scrape_options: Optional[ScrapeOptions] = None
|
|
411
417
|
zero_data_retention: bool = False
|
|
412
418
|
warning: Optional[str] = None
|
|
419
|
+
integration: Optional[str] = None
|
|
413
420
|
|
|
414
421
|
class CrawlParamsResponse(BaseResponse[CrawlParamsData]):
|
|
415
422
|
"""Response from crawl params endpoint."""
|
|
@@ -420,6 +427,12 @@ class BatchScrapeRequest(BaseModel):
|
|
|
420
427
|
"""Request for batch scraping multiple URLs (internal helper only)."""
|
|
421
428
|
urls: List[str]
|
|
422
429
|
options: Optional[ScrapeOptions] = None
|
|
430
|
+
webhook: Optional[Union[str, WebhookConfig]] = None
|
|
431
|
+
append_to_id: Optional[str] = None
|
|
432
|
+
ignore_invalid_urls: Optional[bool] = None
|
|
433
|
+
max_concurrency: Optional[int] = None
|
|
434
|
+
zero_data_retention: Optional[bool] = None
|
|
435
|
+
integration: Optional[str] = None
|
|
423
436
|
|
|
424
437
|
class BatchScrapeResponse(BaseModel):
|
|
425
438
|
"""Response from starting a batch scrape job (mirrors CrawlResponse naming)."""
|
|
@@ -437,6 +450,14 @@ class BatchScrapeJob(BaseModel):
|
|
|
437
450
|
next: Optional[str] = None
|
|
438
451
|
data: List[Document] = []
|
|
439
452
|
|
|
453
|
+
class BatchScrapeStatusRequest(BaseModel):
|
|
454
|
+
"""Request to get batch scrape job status."""
|
|
455
|
+
job_id: str
|
|
456
|
+
|
|
457
|
+
class BatchScrapeErrorsRequest(BaseModel):
|
|
458
|
+
"""Request to get errors for a batch scrape job."""
|
|
459
|
+
job_id: str
|
|
460
|
+
|
|
440
461
|
# Map types
|
|
441
462
|
class MapOptions(BaseModel):
|
|
442
463
|
"""Options for mapping operations."""
|
|
@@ -445,6 +466,7 @@ class MapOptions(BaseModel):
|
|
|
445
466
|
include_subdomains: Optional[bool] = None
|
|
446
467
|
limit: Optional[int] = None
|
|
447
468
|
timeout: Optional[int] = None
|
|
469
|
+
integration: Optional[str] = None
|
|
448
470
|
location: Optional['Location'] = None
|
|
449
471
|
|
|
450
472
|
class MapRequest(BaseModel):
|
|
@@ -452,6 +474,8 @@ class MapRequest(BaseModel):
|
|
|
452
474
|
url: str
|
|
453
475
|
options: Optional[MapOptions] = None
|
|
454
476
|
|
|
477
|
+
|
|
478
|
+
|
|
455
479
|
class MapData(BaseModel):
|
|
456
480
|
"""Map results data."""
|
|
457
481
|
links: List['SearchResult']
|
|
@@ -461,6 +485,19 @@ class MapResponse(BaseResponse[MapData]):
|
|
|
461
485
|
pass
|
|
462
486
|
|
|
463
487
|
# Extract types
|
|
488
|
+
class ExtractRequest(BaseModel):
|
|
489
|
+
"""Request for extract operations."""
|
|
490
|
+
urls: Optional[List[str]] = None
|
|
491
|
+
prompt: Optional[str] = None
|
|
492
|
+
schema_: Optional[Dict[str, Any]] = Field(default=None, alias="schema")
|
|
493
|
+
system_prompt: Optional[str] = None
|
|
494
|
+
allow_external_links: Optional[bool] = None
|
|
495
|
+
enable_web_search: Optional[bool] = None
|
|
496
|
+
show_sources: Optional[bool] = None
|
|
497
|
+
scrape_options: Optional[ScrapeOptions] = None
|
|
498
|
+
ignore_invalid_urls: Optional[bool] = None
|
|
499
|
+
integration: Optional[str] = None
|
|
500
|
+
|
|
464
501
|
class ExtractResponse(BaseModel):
|
|
465
502
|
"""Response for extract operations (start/status/final)."""
|
|
466
503
|
success: Optional[bool] = None
|
|
@@ -492,6 +529,10 @@ class TokenUsage(BaseModel):
|
|
|
492
529
|
billing_period_start: Optional[str] = None
|
|
493
530
|
billing_period_end: Optional[str] = None
|
|
494
531
|
|
|
532
|
+
class QueueStatusRequest(BaseModel):
|
|
533
|
+
"""Request to retrieve queue status."""
|
|
534
|
+
pass
|
|
535
|
+
|
|
495
536
|
class QueueStatusResponse(BaseModel):
|
|
496
537
|
"""Metrics about the team's scrape queue."""
|
|
497
538
|
jobs_in_queue: int
|
|
@@ -593,6 +634,7 @@ class SearchRequest(BaseModel):
|
|
|
593
634
|
ignore_invalid_urls: Optional[bool] = None
|
|
594
635
|
timeout: Optional[int] = 60000
|
|
595
636
|
scrape_options: Optional[ScrapeOptions] = None
|
|
637
|
+
integration: Optional[str] = None
|
|
596
638
|
|
|
597
639
|
@field_validator('sources')
|
|
598
640
|
@classmethod
|
|
@@ -692,6 +734,10 @@ class CrawlErrorsResponse(BaseModel):
|
|
|
692
734
|
errors: List[CrawlError]
|
|
693
735
|
robots_blocked: List[str]
|
|
694
736
|
|
|
737
|
+
class CrawlErrorsRequest(BaseModel):
|
|
738
|
+
"""Request for crawl error monitoring."""
|
|
739
|
+
crawl_id: str
|
|
740
|
+
|
|
695
741
|
class ActiveCrawl(BaseModel):
|
|
696
742
|
"""Information about an active crawl job."""
|
|
697
743
|
id: str
|
|
@@ -704,6 +750,10 @@ class ActiveCrawlsResponse(BaseModel):
|
|
|
704
750
|
success: bool = True
|
|
705
751
|
crawls: List[ActiveCrawl]
|
|
706
752
|
|
|
753
|
+
class ActiveCrawlsRequest(BaseModel):
|
|
754
|
+
"""Request for listing active crawl jobs."""
|
|
755
|
+
pass
|
|
756
|
+
|
|
707
757
|
# Configuration types
|
|
708
758
|
class ClientConfig(BaseModel):
|
|
709
759
|
"""Configuration for the Firecrawl client."""
|
firecrawl/v2/utils/validation.py
CHANGED
|
@@ -177,6 +177,9 @@ def prepare_scrape_options(options: Optional[ScrapeOptions]) -> Optional[Dict[st
|
|
|
177
177
|
# Handle special cases
|
|
178
178
|
for key, value in options_data.items():
|
|
179
179
|
if value is not None:
|
|
180
|
+
if key == "integration":
|
|
181
|
+
scrape_data["integration"] = (str(value).strip() or None)
|
|
182
|
+
continue
|
|
180
183
|
if key == "formats":
|
|
181
184
|
# Handle formats conversion
|
|
182
185
|
converted_formats: List[Any] = []
|
|
@@ -1,42 +1,42 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=
|
|
1
|
+
firecrawl/__init__.py,sha256=D5LVY6ePtjLCYBrjkFAkDUG-AUWcuAzp_asyy3xG2Pc,2192
|
|
2
2
|
firecrawl/client.py,sha256=Lmrg2jniCETU6_xVMn_fgLrgDXiBixK9hSkkdsCGiog,11840
|
|
3
3
|
firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
|
|
4
|
-
firecrawl/types.py,sha256=
|
|
4
|
+
firecrawl/types.py,sha256=NOSdj61BqmwGoR8fwkneUGc-feBVIKVN1LKvNGF6SXo,2885
|
|
5
5
|
firecrawl/__tests__/e2e/v2/conftest.py,sha256=I28TUpN5j0-9gM79NlbrDS8Jlsheao657od2f-2xK0Y,2587
|
|
6
6
|
firecrawl/__tests__/e2e/v2/test_async.py,sha256=ZXpf1FVOJgNclITglrxIyFwP4cOiqzWLicGaxIm70BQ,2526
|
|
7
|
-
firecrawl/__tests__/e2e/v2/test_batch_scrape.py,sha256=
|
|
8
|
-
firecrawl/__tests__/e2e/v2/test_crawl.py,sha256=
|
|
9
|
-
firecrawl/__tests__/e2e/v2/test_extract.py,sha256=
|
|
10
|
-
firecrawl/__tests__/e2e/v2/test_map.py,sha256=
|
|
11
|
-
firecrawl/__tests__/e2e/v2/test_scrape.py,sha256=
|
|
12
|
-
firecrawl/__tests__/e2e/v2/test_search.py,sha256=
|
|
7
|
+
firecrawl/__tests__/e2e/v2/test_batch_scrape.py,sha256=tbuJ9y10ec9TtOnq97zmaEpOgZr9VzplRtZ_b6jkhq4,3302
|
|
8
|
+
firecrawl/__tests__/e2e/v2/test_crawl.py,sha256=3X3QT6mhNlHeNp3Rjy1AM3BNwDgEPjUvJRsLOIkAWP8,10023
|
|
9
|
+
firecrawl/__tests__/e2e/v2/test_extract.py,sha256=b3WL4xPtINrPAn7oKKyYWyPIMSl0fr_DGVUU5NjJe-Y,1707
|
|
10
|
+
firecrawl/__tests__/e2e/v2/test_map.py,sha256=K7abzGcmQp4FLchZytQv4Kwkm9AAivPYyAC5kCb8ecE,1655
|
|
11
|
+
firecrawl/__tests__/e2e/v2/test_scrape.py,sha256=4ElTgZqPmoQCC5tfjDnbw7W75oWa7PJ9WPXWaHVMRMs,7235
|
|
12
|
+
firecrawl/__tests__/e2e/v2/test_search.py,sha256=xlWuBqcwfWGkLpQidcsG3kdbyqHFjLQTMsJzCE_CFyY,9112
|
|
13
13
|
firecrawl/__tests__/e2e/v2/test_usage.py,sha256=JlBkYblhThua5qF2crRjsPpq4Ja0cBsdzxZ5zxXnQ_Y,805
|
|
14
14
|
firecrawl/__tests__/e2e/v2/test_watcher.py,sha256=OPTKLhVAKWqXl2Tieo6zCN1xpEwZDsz-B977CVJgLMA,1932
|
|
15
|
-
firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py,sha256=
|
|
16
|
-
firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py,sha256=
|
|
17
|
-
firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py,sha256=
|
|
18
|
-
firecrawl/__tests__/e2e/v2/aio/test_aio_map.py,sha256=
|
|
19
|
-
firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py,sha256=
|
|
20
|
-
firecrawl/__tests__/e2e/v2/aio/test_aio_search.py,sha256=
|
|
15
|
+
firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py,sha256=37glgmnFBX8JcW12O6-6XDkna7Tixi8_I7dNk5erBTw,2672
|
|
16
|
+
firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py,sha256=oytaAS7GCMlVv-EMLoSVkK3PiRt19FmBdCv8W4kstXc,7315
|
|
17
|
+
firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py,sha256=RGIBtnvAKebwpPy1ZWWT-AqFvdf4bn_Td6y55F5l5As,1232
|
|
18
|
+
firecrawl/__tests__/e2e/v2/aio/test_aio_map.py,sha256=uaSIk68BeeA9Z-2NL-HCYLcRMAEWyKX7oplpfbKUf20,1232
|
|
19
|
+
firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py,sha256=xjCXkmoyp4AVa2LiAJKlMYOzDnEAnenkWMK2jhHkD7U,4486
|
|
20
|
+
firecrawl/__tests__/e2e/v2/aio/test_aio_search.py,sha256=_IkHkIuvWY6vH99EsqrCZuKcfAX8qkG4NVG4KJNYu-0,8279
|
|
21
21
|
firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py,sha256=lVGfwR79eaZamUZXgKStUJcpclCnnlpwHGo2pMOUhCY,1255
|
|
22
22
|
firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py,sha256=hwES4Nu5c0hniZ9heIPDfvh_2JmJ2wPoX9ULTZ0Asjs,1471
|
|
23
|
-
firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py,sha256=
|
|
23
|
+
firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py,sha256=xAx-aH4bD6uCWavg1cw_8-9FnLIFJNkvVPyOCVJ7r2E,4052
|
|
24
24
|
firecrawl/__tests__/unit/v2/methods/test_crawl_params.py,sha256=p9hzg14uAs1iHKXPDSXhGU6hEzPBF_Ae34RAf5XYa10,2387
|
|
25
25
|
firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py,sha256=PEKbooNXfQwPpvcPHXABJnveztgAA-RFBhtlSs8uPro,8780
|
|
26
26
|
firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py,sha256=kErOmHSD01eMjXiMd4rgsMVGd_aU2G9uVymBjbAFoGw,3918
|
|
27
|
-
firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py,sha256=
|
|
27
|
+
firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py,sha256=w4FZrDqk9XGOuCHw3SV5CkbRuyb_F4Kc8C5eJ7zVcFs,1959
|
|
28
28
|
firecrawl/__tests__/unit/v2/methods/test_pagination.py,sha256=wNc9UtdauII_jzsjlJh645NBRq4IbQij1NeBwbyTjBU,22463
|
|
29
|
-
firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py,sha256=
|
|
30
|
-
firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py,sha256=
|
|
29
|
+
firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py,sha256=mxx4B7v4cC42ivLUCosFB2cBIaBI7m9uOUsbE8pyyGU,4077
|
|
30
|
+
firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py,sha256=HVqXDKO3602gPq-Rl0bXpfbAG5o0QBH51dgy8IOmm5s,6163
|
|
31
31
|
firecrawl/__tests__/unit/v2/methods/test_search_validation.py,sha256=7UGcNHpQzCpZbAPYjthfdPFWmAPcoApY-ED-khtuANs,9498
|
|
32
32
|
firecrawl/__tests__/unit/v2/methods/test_usage_types.py,sha256=cCHHfa6agSjD0brQ9rcAcw2kaI9riUH5C0dXV-fqktg,591
|
|
33
33
|
firecrawl/__tests__/unit/v2/methods/test_webhook.py,sha256=AvvW-bKpUA--Lvtif2bmUIp-AxiaMJ29ie1i9dk8WbI,4586
|
|
34
34
|
firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py,sha256=9azJxVvDOBqUevLp-wBF9gF7Ptj-7nN6LOkPQncFX2M,456
|
|
35
|
-
firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py,sha256=
|
|
35
|
+
firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py,sha256=fOSPJcCVsjk2WSDViwTqTnAPsUvsb6yT9lVG_q7iQfk,3208
|
|
36
36
|
firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py,sha256=WMgltdrrT2HOflqGyahC4v-Wb29_8sypN0hwS9lYXe8,403
|
|
37
|
-
firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py,sha256=
|
|
37
|
+
firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py,sha256=DFjgi4wdcesuD7SQzRSzqjbpN2YSSMkMY7oJ-q_wyrA,809
|
|
38
38
|
firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py,sha256=A5DT4wpH4vrIPvFxKVHrtDH5A3bgJ_ad4fmVQ8LN1t0,1993
|
|
39
|
-
firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py,sha256=
|
|
39
|
+
firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py,sha256=UAubD9xPX7H5oI6gttTxR3opvc3D-5ZDVCBHdpNFtYU,2182
|
|
40
40
|
firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py,sha256=E26UnUhpbjG-EG0ab4WRD94AxA5IBWmIHq8ZLBOWoAA,1202
|
|
41
41
|
firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py,sha256=pUwuWhRbVUTbgsZn4hgZesMkTMesTv_NPmvFW--ls-Y,3815
|
|
42
42
|
firecrawl/__tests__/unit/v2/utils/test_validation.py,sha256=E4n4jpBhH_W7E0ikI5r8KMAKiOhbfGD3i_B8-dv3PlI,10803
|
|
@@ -44,25 +44,25 @@ firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py,sha256=87w47n0iOihtu4jTR4
|
|
|
44
44
|
firecrawl/v1/__init__.py,sha256=aP1oisPeZVGGZynvENc07JySMOZfv_4zAlxQ0ecMJXA,481
|
|
45
45
|
firecrawl/v1/client.py,sha256=2Rq38RxGnuf2dMCmr4cc3f-ythavcBkUyJmRrwLmMHg,208104
|
|
46
46
|
firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
|
|
47
|
-
firecrawl/v2/client.py,sha256=
|
|
48
|
-
firecrawl/v2/client_async.py,sha256=
|
|
49
|
-
firecrawl/v2/types.py,sha256=
|
|
47
|
+
firecrawl/v2/client.py,sha256=bbHejoXHhWoDYsAcyDnIRI5RflHi0yFztGTPuSeVkYo,32500
|
|
48
|
+
firecrawl/v2/client_async.py,sha256=lnVnnjwVDVYHT1a2IiBooZi4rPt75gdxpzD0WpRrvb8,11457
|
|
49
|
+
firecrawl/v2/types.py,sha256=Vhhyhpe8h0a2Hn0PJzVqCrMIITVszbULUeoUEzILxJQ,25992
|
|
50
50
|
firecrawl/v2/watcher.py,sha256=FOU71tqSKxgeuGycu4ye0SLc2dw7clIcoQjPsi-4Csc,14229
|
|
51
51
|
firecrawl/v2/watcher_async.py,sha256=AVjW2mgABniolSsauK4u0FW8ya6WzRUdyEg2R-8vGCw,10278
|
|
52
|
-
firecrawl/v2/methods/batch.py,sha256
|
|
53
|
-
firecrawl/v2/methods/crawl.py,sha256=
|
|
54
|
-
firecrawl/v2/methods/extract.py,sha256
|
|
55
|
-
firecrawl/v2/methods/map.py,sha256=
|
|
52
|
+
firecrawl/v2/methods/batch.py,sha256=-eGnCGgB76pY-BFVKG1DC58XViETWukQXtDU0esU_UU,14865
|
|
53
|
+
firecrawl/v2/methods/crawl.py,sha256=p-1UC3-8vT757zOnNL5NJEWiT63BiAN2H1dCzLymqiQ,18797
|
|
54
|
+
firecrawl/v2/methods/extract.py,sha256=xWKkA5dNFzAkYMqmq11XCFkB2THl17Pu_DZWq7zuvI4,4573
|
|
55
|
+
firecrawl/v2/methods/map.py,sha256=MH8jhLIFsp-4IC9womVtdCyarnGTeMqBXqwL21TRbFk,2849
|
|
56
56
|
firecrawl/v2/methods/scrape.py,sha256=CSHBwC-P91UfrW3zHirjNAs2h899FKcWvd1DY_4fJdo,1921
|
|
57
|
-
firecrawl/v2/methods/search.py,sha256=
|
|
57
|
+
firecrawl/v2/methods/search.py,sha256=2wo7u-GPnr7AGXAdUoavE3MkToKZufMbcnGOhgWJWqI,7846
|
|
58
58
|
firecrawl/v2/methods/usage.py,sha256=NqkmFd-ziw8ijbZxwaxjxZHl85u0LTe_TYqr_NGWFwE,3693
|
|
59
59
|
firecrawl/v2/methods/aio/__init__.py,sha256=RocMJnGwnLIvGu3G8ZvY8INkipC7WHZiu2bE31eSyJs,35
|
|
60
|
-
firecrawl/v2/methods/aio/batch.py,sha256=
|
|
61
|
-
firecrawl/v2/methods/aio/crawl.py,sha256=
|
|
62
|
-
firecrawl/v2/methods/aio/extract.py,sha256=
|
|
63
|
-
firecrawl/v2/methods/aio/map.py,sha256=
|
|
60
|
+
firecrawl/v2/methods/aio/batch.py,sha256=0R01YcWqk4Tkilbec1EH2fqY614F5PPICQmILRJg38A,6840
|
|
61
|
+
firecrawl/v2/methods/aio/crawl.py,sha256=zLYmiYgwuqnussrEGyDOsej78lqQBKacg8wFKhRN0Qc,11684
|
|
62
|
+
firecrawl/v2/methods/aio/extract.py,sha256=oc7LcjJ3g3nGYJeedEn2YWOg8X0NqgQpd0DrlI0SyiU,4516
|
|
63
|
+
firecrawl/v2/methods/aio/map.py,sha256=4dIRBz6GRj_Ip6gbfFKi4ojN9nKBKEp8CXW4sdxFZaA,2551
|
|
64
64
|
firecrawl/v2/methods/aio/scrape.py,sha256=ilA9qco8YGwCFpE0PN1XBQUyuHPQwH2QioZ-xsfxhgU,1386
|
|
65
|
-
firecrawl/v2/methods/aio/search.py,sha256=
|
|
65
|
+
firecrawl/v2/methods/aio/search.py,sha256=d1SFbK1HtBKvR8qCvJQlJQCkN-3pbire80Fbbn7CnSw,6431
|
|
66
66
|
firecrawl/v2/methods/aio/usage.py,sha256=iUzTkdAWRheq-V5rRXcW0bc3MSODaVS1AqroRF0fO9M,3964
|
|
67
67
|
firecrawl/v2/utils/__init__.py,sha256=i1GgxySmqEXpWSBQCu3iZBPIJG7fXj0QXCDWGwerWNs,338
|
|
68
68
|
firecrawl/v2/utils/error_handler.py,sha256=Iuf916dHphDY8ObNNlWy75628DFeJ0Rv8ljRp4LttLE,4199
|
|
@@ -70,11 +70,11 @@ firecrawl/v2/utils/get_version.py,sha256=0CxW_41q2hlzIxEWOivUCaYw3GFiSIH32RPUMcI
|
|
|
70
70
|
firecrawl/v2/utils/http_client.py,sha256=gUrC1CvU5sj03w27Lbq-3-yH38Yi_OXiI01-piwA83w,6027
|
|
71
71
|
firecrawl/v2/utils/http_client_async.py,sha256=iy89_bk2HS3afSRHZ8016eMCa9Fk-5MFTntcOHfbPgE,1936
|
|
72
72
|
firecrawl/v2/utils/normalize.py,sha256=nlTU6QRghT1YKZzNZlIQj4STSRuSUGrS9cCErZIcY5w,3636
|
|
73
|
-
firecrawl/v2/utils/validation.py,sha256=
|
|
74
|
-
firecrawl-4.3.
|
|
73
|
+
firecrawl/v2/utils/validation.py,sha256=zzpCK4McM4P8Cag0_8s-d7Ww0idyTWKB4-yk92MT-rY,15405
|
|
74
|
+
firecrawl-4.3.4.dist-info/licenses/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
75
75
|
tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
|
|
76
76
|
tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
|
|
77
|
-
firecrawl-4.3.
|
|
78
|
-
firecrawl-4.3.
|
|
79
|
-
firecrawl-4.3.
|
|
80
|
-
firecrawl-4.3.
|
|
77
|
+
firecrawl-4.3.4.dist-info/METADATA,sha256=nIjIOu2mpVFiWH541AgJawzPtyvOeGLd9d4lAn4uw8w,7392
|
|
78
|
+
firecrawl-4.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
79
|
+
firecrawl-4.3.4.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
|
|
80
|
+
firecrawl-4.3.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|