firecrawl 4.3.4__tar.gz → 4.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- {firecrawl-4.3.4 → firecrawl-4.3.5}/PKG-INFO +1 -1
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__init__.py +1 -1
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/types.py +2 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/client.py +9 -2
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/extract.py +12 -2
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/types.py +5 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl.egg-info/PKG-INFO +1 -1
- {firecrawl-4.3.4 → firecrawl-4.3.5}/LICENSE +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/README.md +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/conftest.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/test_async.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/test_crawl.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/test_extract.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/test_map.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/test_scrape.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/test_search.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/test_usage.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/e2e/v2/test_watcher.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_pagination.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_search_validation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_usage_types.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_webhook.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/utils/test_validation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/client.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/firecrawl.backup.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v1/__init__.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v1/client.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/__init__.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/client_async.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/aio/__init__.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/aio/batch.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/aio/crawl.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/aio/extract.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/aio/map.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/aio/scrape.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/aio/search.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/aio/usage.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/batch.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/crawl.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/map.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/scrape.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/search.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/methods/usage.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/utils/__init__.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/utils/error_handler.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/utils/get_version.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/utils/http_client.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/utils/http_client_async.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/utils/normalize.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/utils/validation.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/watcher.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/v2/watcher_async.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl.egg-info/SOURCES.txt +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl.egg-info/dependency_links.txt +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl.egg-info/requires.txt +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl.egg-info/top_level.txt +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/pyproject.toml +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/setup.cfg +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/setup.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/tests/test_change_tracking.py +0 -0
- {firecrawl-4.3.4 → firecrawl-4.3.5}/tests/test_timeout_conversion.py +0 -0
|
@@ -37,6 +37,7 @@ from .types import (
|
|
|
37
37
|
PDFAction,
|
|
38
38
|
Location,
|
|
39
39
|
PaginationConfig,
|
|
40
|
+
AgentOptions,
|
|
40
41
|
)
|
|
41
42
|
from .utils.http_client import HttpClient
|
|
42
43
|
from .utils.error_handler import FirecrawlError
|
|
@@ -495,6 +496,7 @@ class FirecrawlClient:
|
|
|
495
496
|
scrape_options: Optional['ScrapeOptions'] = None,
|
|
496
497
|
ignore_invalid_urls: Optional[bool] = None,
|
|
497
498
|
integration: Optional[str] = None,
|
|
499
|
+
agent: Optional[AgentOptions] = None,
|
|
498
500
|
):
|
|
499
501
|
"""Start an extract job (non-blocking).
|
|
500
502
|
|
|
@@ -508,7 +510,8 @@ class FirecrawlClient:
|
|
|
508
510
|
show_sources: Include per-field/source mapping when available
|
|
509
511
|
scrape_options: Scrape options applied prior to extraction
|
|
510
512
|
ignore_invalid_urls: Skip invalid URLs instead of failing
|
|
511
|
-
|
|
513
|
+
integration: Integration tag/name
|
|
514
|
+
agent: Agent configuration
|
|
512
515
|
Returns:
|
|
513
516
|
Response payload with job id/status (poll with get_extract_status)
|
|
514
517
|
"""
|
|
@@ -524,6 +527,7 @@ class FirecrawlClient:
|
|
|
524
527
|
scrape_options=scrape_options,
|
|
525
528
|
ignore_invalid_urls=ignore_invalid_urls,
|
|
526
529
|
integration=integration,
|
|
530
|
+
agent=agent,
|
|
527
531
|
)
|
|
528
532
|
|
|
529
533
|
def extract(
|
|
@@ -541,6 +545,7 @@ class FirecrawlClient:
|
|
|
541
545
|
poll_interval: int = 2,
|
|
542
546
|
timeout: Optional[int] = None,
|
|
543
547
|
integration: Optional[str] = None,
|
|
548
|
+
agent: Optional[AgentOptions] = None,
|
|
544
549
|
):
|
|
545
550
|
"""Extract structured data and wait until completion.
|
|
546
551
|
|
|
@@ -556,7 +561,8 @@ class FirecrawlClient:
|
|
|
556
561
|
ignore_invalid_urls: Skip invalid URLs instead of failing
|
|
557
562
|
poll_interval: Seconds between status checks
|
|
558
563
|
timeout: Maximum seconds to wait (None for no timeout)
|
|
559
|
-
|
|
564
|
+
integration: Integration tag/name
|
|
565
|
+
agent: Agent configuration
|
|
560
566
|
Returns:
|
|
561
567
|
Final extract response when completed
|
|
562
568
|
"""
|
|
@@ -574,6 +580,7 @@ class FirecrawlClient:
|
|
|
574
580
|
poll_interval=poll_interval,
|
|
575
581
|
timeout=timeout,
|
|
576
582
|
integration=integration,
|
|
583
|
+
agent=agent,
|
|
577
584
|
)
|
|
578
585
|
|
|
579
586
|
def start_batch_scrape(
|
|
@@ -2,6 +2,7 @@ from typing import Any, Dict, List, Optional
|
|
|
2
2
|
import time
|
|
3
3
|
|
|
4
4
|
from ..types import ExtractResponse, ScrapeOptions
|
|
5
|
+
from ..types import AgentOptions
|
|
5
6
|
from ..utils.http_client import HttpClient
|
|
6
7
|
from ..utils.validation import prepare_scrape_options
|
|
7
8
|
from ..utils.error_handler import handle_response_error
|
|
@@ -19,6 +20,7 @@ def _prepare_extract_request(
|
|
|
19
20
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
20
21
|
ignore_invalid_urls: Optional[bool] = None,
|
|
21
22
|
integration: Optional[str] = None,
|
|
23
|
+
agent: Optional[AgentOptions] = None,
|
|
22
24
|
) -> Dict[str, Any]:
|
|
23
25
|
body: Dict[str, Any] = {}
|
|
24
26
|
if urls is not None:
|
|
@@ -43,6 +45,11 @@ def _prepare_extract_request(
|
|
|
43
45
|
body["scrapeOptions"] = prepared
|
|
44
46
|
if integration is not None and str(integration).strip():
|
|
45
47
|
body["integration"] = str(integration).strip()
|
|
48
|
+
if agent is not None:
|
|
49
|
+
try:
|
|
50
|
+
body["agent"] = agent.model_dump(exclude_none=True) # type: ignore[attr-defined]
|
|
51
|
+
except AttributeError:
|
|
52
|
+
body["agent"] = agent # fallback
|
|
46
53
|
return body
|
|
47
54
|
|
|
48
55
|
|
|
@@ -59,6 +66,7 @@ def start_extract(
|
|
|
59
66
|
scrape_options: Optional[ScrapeOptions] = None,
|
|
60
67
|
ignore_invalid_urls: Optional[bool] = None,
|
|
61
68
|
integration: Optional[str] = None,
|
|
69
|
+
agent: Optional[AgentOptions] = None,
|
|
62
70
|
) -> ExtractResponse:
|
|
63
71
|
body = _prepare_extract_request(
|
|
64
72
|
urls,
|
|
@@ -71,6 +79,7 @@ def start_extract(
|
|
|
71
79
|
scrape_options=scrape_options,
|
|
72
80
|
ignore_invalid_urls=ignore_invalid_urls,
|
|
73
81
|
integration=integration,
|
|
82
|
+
agent=agent,
|
|
74
83
|
)
|
|
75
84
|
resp = client.post("/v2/extract", body)
|
|
76
85
|
if not resp.ok:
|
|
@@ -117,6 +126,7 @@ def extract(
|
|
|
117
126
|
poll_interval: int = 2,
|
|
118
127
|
timeout: Optional[int] = None,
|
|
119
128
|
integration: Optional[str] = None,
|
|
129
|
+
agent: Optional[AgentOptions] = None,
|
|
120
130
|
) -> ExtractResponse:
|
|
121
131
|
started = start_extract(
|
|
122
132
|
client,
|
|
@@ -130,9 +140,9 @@ def extract(
|
|
|
130
140
|
scrape_options=scrape_options,
|
|
131
141
|
ignore_invalid_urls=ignore_invalid_urls,
|
|
132
142
|
integration=integration,
|
|
143
|
+
agent=agent,
|
|
133
144
|
)
|
|
134
145
|
job_id = getattr(started, "id", None)
|
|
135
146
|
if not job_id:
|
|
136
147
|
return started
|
|
137
|
-
return wait_extract(client, job_id, poll_interval=poll_interval, timeout=timeout)
|
|
138
|
-
|
|
148
|
+
return wait_extract(client, job_id, poll_interval=poll_interval, timeout=timeout)
|
|
@@ -114,6 +114,10 @@ class DocumentMetadata(BaseModel):
|
|
|
114
114
|
def coerce_status_code_to_int(cls, v):
|
|
115
115
|
return cls._coerce_string_to_int(v)
|
|
116
116
|
|
|
117
|
+
class AgentOptions(BaseModel):
|
|
118
|
+
"""Configuration for the agent in extract operations."""
|
|
119
|
+
model: Literal["FIRE-1"] = "FIRE-1"
|
|
120
|
+
|
|
117
121
|
class AttributeResult(BaseModel):
|
|
118
122
|
"""Result of attribute extraction."""
|
|
119
123
|
selector: str
|
|
@@ -497,6 +501,7 @@ class ExtractRequest(BaseModel):
|
|
|
497
501
|
scrape_options: Optional[ScrapeOptions] = None
|
|
498
502
|
ignore_invalid_urls: Optional[bool] = None
|
|
499
503
|
integration: Optional[str] = None
|
|
504
|
+
agent: Optional[AgentOptions] = None
|
|
500
505
|
|
|
501
506
|
class ExtractResponse(BaseModel):
|
|
502
507
|
"""Response for extract operations (start/status/final)."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py
RENAMED
|
File without changes
|
|
File without changes
|
{firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py
RENAMED
|
File without changes
|
|
File without changes
|
{firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{firecrawl-4.3.4 → firecrawl-4.3.5}/firecrawl/__tests__/unit/v2/methods/test_search_validation.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|