firecrawl 4.3.4__py3-none-any.whl → 4.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

firecrawl/__init__.py CHANGED
@@ -17,7 +17,7 @@ from .v1 import (
17
17
  V1ChangeTrackingOptions,
18
18
  )
19
19
 
20
- __version__ = "4.3.4"
20
+ __version__ = "4.3.6"
21
21
 
22
22
  # Define the logger for the Firecrawl project
23
23
  logger: logging.Logger = logging.getLogger("firecrawl")
firecrawl/types.py CHANGED
@@ -82,6 +82,7 @@ from .v2.types import (
82
82
 
83
83
  # Configuration types
84
84
  ClientConfig,
85
+ AgentOptions,
85
86
  )
86
87
 
87
88
  __all__ = [
@@ -162,4 +163,5 @@ __all__ = [
162
163
 
163
164
  # Configuration types
164
165
  'ClientConfig',
166
+ 'AgentOptions',
165
167
  ]
firecrawl/v2/client.py CHANGED
@@ -37,6 +37,7 @@ from .types import (
37
37
  PDFAction,
38
38
  Location,
39
39
  PaginationConfig,
40
+ AgentOptions,
40
41
  )
41
42
  from .utils.http_client import HttpClient
42
43
  from .utils.error_handler import FirecrawlError
@@ -495,6 +496,7 @@ class FirecrawlClient:
495
496
  scrape_options: Optional['ScrapeOptions'] = None,
496
497
  ignore_invalid_urls: Optional[bool] = None,
497
498
  integration: Optional[str] = None,
499
+ agent: Optional[AgentOptions] = None,
498
500
  ):
499
501
  """Start an extract job (non-blocking).
500
502
 
@@ -508,7 +510,8 @@ class FirecrawlClient:
508
510
  show_sources: Include per-field/source mapping when available
509
511
  scrape_options: Scrape options applied prior to extraction
510
512
  ignore_invalid_urls: Skip invalid URLs instead of failing
511
-
513
+ integration: Integration tag/name
514
+ agent: Agent configuration
512
515
  Returns:
513
516
  Response payload with job id/status (poll with get_extract_status)
514
517
  """
@@ -524,6 +527,7 @@ class FirecrawlClient:
524
527
  scrape_options=scrape_options,
525
528
  ignore_invalid_urls=ignore_invalid_urls,
526
529
  integration=integration,
530
+ agent=agent,
527
531
  )
528
532
 
529
533
  def extract(
@@ -541,6 +545,7 @@ class FirecrawlClient:
541
545
  poll_interval: int = 2,
542
546
  timeout: Optional[int] = None,
543
547
  integration: Optional[str] = None,
548
+ agent: Optional[AgentOptions] = None,
544
549
  ):
545
550
  """Extract structured data and wait until completion.
546
551
 
@@ -556,7 +561,8 @@ class FirecrawlClient:
556
561
  ignore_invalid_urls: Skip invalid URLs instead of failing
557
562
  poll_interval: Seconds between status checks
558
563
  timeout: Maximum seconds to wait (None for no timeout)
559
-
564
+ integration: Integration tag/name
565
+ agent: Agent configuration
560
566
  Returns:
561
567
  Final extract response when completed
562
568
  """
@@ -574,6 +580,7 @@ class FirecrawlClient:
574
580
  poll_interval=poll_interval,
575
581
  timeout=timeout,
576
582
  integration=integration,
583
+ agent=agent,
577
584
  )
578
585
 
579
586
  def start_batch_scrape(
@@ -2,6 +2,7 @@ from typing import Any, Dict, List, Optional
2
2
  import time
3
3
 
4
4
  from ..types import ExtractResponse, ScrapeOptions
5
+ from ..types import AgentOptions
5
6
  from ..utils.http_client import HttpClient
6
7
  from ..utils.validation import prepare_scrape_options
7
8
  from ..utils.error_handler import handle_response_error
@@ -19,6 +20,7 @@ def _prepare_extract_request(
19
20
  scrape_options: Optional[ScrapeOptions] = None,
20
21
  ignore_invalid_urls: Optional[bool] = None,
21
22
  integration: Optional[str] = None,
23
+ agent: Optional[AgentOptions] = None,
22
24
  ) -> Dict[str, Any]:
23
25
  body: Dict[str, Any] = {}
24
26
  if urls is not None:
@@ -43,6 +45,11 @@ def _prepare_extract_request(
43
45
  body["scrapeOptions"] = prepared
44
46
  if integration is not None and str(integration).strip():
45
47
  body["integration"] = str(integration).strip()
48
+ if agent is not None:
49
+ try:
50
+ body["agent"] = agent.model_dump(exclude_none=True) # type: ignore[attr-defined]
51
+ except AttributeError:
52
+ body["agent"] = agent # fallback
46
53
  return body
47
54
 
48
55
 
@@ -59,6 +66,7 @@ def start_extract(
59
66
  scrape_options: Optional[ScrapeOptions] = None,
60
67
  ignore_invalid_urls: Optional[bool] = None,
61
68
  integration: Optional[str] = None,
69
+ agent: Optional[AgentOptions] = None,
62
70
  ) -> ExtractResponse:
63
71
  body = _prepare_extract_request(
64
72
  urls,
@@ -71,6 +79,7 @@ def start_extract(
71
79
  scrape_options=scrape_options,
72
80
  ignore_invalid_urls=ignore_invalid_urls,
73
81
  integration=integration,
82
+ agent=agent,
74
83
  )
75
84
  resp = client.post("/v2/extract", body)
76
85
  if not resp.ok:
@@ -117,6 +126,7 @@ def extract(
117
126
  poll_interval: int = 2,
118
127
  timeout: Optional[int] = None,
119
128
  integration: Optional[str] = None,
129
+ agent: Optional[AgentOptions] = None,
120
130
  ) -> ExtractResponse:
121
131
  started = start_extract(
122
132
  client,
@@ -130,9 +140,9 @@ def extract(
130
140
  scrape_options=scrape_options,
131
141
  ignore_invalid_urls=ignore_invalid_urls,
132
142
  integration=integration,
143
+ agent=agent,
133
144
  )
134
145
  job_id = getattr(started, "id", None)
135
146
  if not job_id:
136
147
  return started
137
- return wait_extract(client, job_id, poll_interval=poll_interval, timeout=timeout)
138
-
148
+ return wait_extract(client, job_id, poll_interval=poll_interval, timeout=timeout)
@@ -194,7 +194,11 @@ def _prepare_search_request(request: SearchRequest) -> Dict[str, Any]:
194
194
  data["scrapeOptions"] = scrape_data
195
195
  data.pop("scrape_options", None)
196
196
 
197
- if (str(getattr(validated_request, "integration", "")).strip()):
198
- data["integration"] = str(validated_request.integration).strip()
197
+ # Only include integration if it was explicitly provided and non-empty
198
+ integration_value = getattr(validated_request, "integration", None)
199
+ if integration_value is not None:
200
+ integration_str = str(integration_value).strip()
201
+ if integration_str:
202
+ data["integration"] = integration_str
199
203
 
200
204
  return data
firecrawl/v2/types.py CHANGED
@@ -114,6 +114,10 @@ class DocumentMetadata(BaseModel):
114
114
  def coerce_status_code_to_int(cls, v):
115
115
  return cls._coerce_string_to_int(v)
116
116
 
117
+ class AgentOptions(BaseModel):
118
+ """Configuration for the agent in extract operations."""
119
+ model: Literal["FIRE-1"] = "FIRE-1"
120
+
117
121
  class AttributeResult(BaseModel):
118
122
  """Result of attribute extraction."""
119
123
  selector: str
@@ -497,6 +501,7 @@ class ExtractRequest(BaseModel):
497
501
  scrape_options: Optional[ScrapeOptions] = None
498
502
  ignore_invalid_urls: Optional[bool] = None
499
503
  integration: Optional[str] = None
504
+ agent: Optional[AgentOptions] = None
500
505
 
501
506
  class ExtractResponse(BaseModel):
502
507
  """Response for extract operations (start/status/final)."""
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 4.3.4
3
+ Version: 4.3.6
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
@@ -38,12 +38,8 @@ Requires-Dist: httpx
38
38
  Requires-Dist: python-dotenv
39
39
  Requires-Dist: websockets
40
40
  Requires-Dist: nest-asyncio
41
- Requires-Dist: pydantic>=2.0
41
+ Requires-Dist: pydantic (>=2.0)
42
42
  Requires-Dist: aiohttp
43
- Dynamic: author
44
- Dynamic: home-page
45
- Dynamic: license-file
46
- Dynamic: requires-python
47
43
 
48
44
  # Firecrawl Python SDK
49
45
 
@@ -1,7 +1,7 @@
1
- firecrawl/__init__.py,sha256=D5LVY6ePtjLCYBrjkFAkDUG-AUWcuAzp_asyy3xG2Pc,2192
1
+ firecrawl/__init__.py,sha256=V6faUxnmrRvR4mfES3V_pZGHYQu1IAX3-KAMQWTDdSw,2192
2
2
  firecrawl/client.py,sha256=Lmrg2jniCETU6_xVMn_fgLrgDXiBixK9hSkkdsCGiog,11840
3
3
  firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
4
- firecrawl/types.py,sha256=NOSdj61BqmwGoR8fwkneUGc-feBVIKVN1LKvNGF6SXo,2885
4
+ firecrawl/types.py,sha256=RmLTq14Z-Nf883wgZxubrtn2HDu9mecsCEdcIdBCu14,2923
5
5
  firecrawl/__tests__/e2e/v2/conftest.py,sha256=I28TUpN5j0-9gM79NlbrDS8Jlsheao657od2f-2xK0Y,2587
6
6
  firecrawl/__tests__/e2e/v2/test_async.py,sha256=ZXpf1FVOJgNclITglrxIyFwP4cOiqzWLicGaxIm70BQ,2526
7
7
  firecrawl/__tests__/e2e/v2/test_batch_scrape.py,sha256=tbuJ9y10ec9TtOnq97zmaEpOgZr9VzplRtZ_b6jkhq4,3302
@@ -44,17 +44,17 @@ firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py,sha256=87w47n0iOihtu4jTR4
44
44
  firecrawl/v1/__init__.py,sha256=aP1oisPeZVGGZynvENc07JySMOZfv_4zAlxQ0ecMJXA,481
45
45
  firecrawl/v1/client.py,sha256=2Rq38RxGnuf2dMCmr4cc3f-ythavcBkUyJmRrwLmMHg,208104
46
46
  firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
47
- firecrawl/v2/client.py,sha256=bbHejoXHhWoDYsAcyDnIRI5RflHi0yFztGTPuSeVkYo,32500
47
+ firecrawl/v2/client.py,sha256=KBDN8p7epuCOG0XNb-jcftxfboufgYLYl6d3RiYyORA,32828
48
48
  firecrawl/v2/client_async.py,sha256=lnVnnjwVDVYHT1a2IiBooZi4rPt75gdxpzD0WpRrvb8,11457
49
- firecrawl/v2/types.py,sha256=Vhhyhpe8h0a2Hn0PJzVqCrMIITVszbULUeoUEzILxJQ,25992
49
+ firecrawl/v2/types.py,sha256=ReVJ9VWYlCKX-tX55xFaA_XAIIePUbUGTq7vYQgwCSA,26166
50
50
  firecrawl/v2/watcher.py,sha256=FOU71tqSKxgeuGycu4ye0SLc2dw7clIcoQjPsi-4Csc,14229
51
51
  firecrawl/v2/watcher_async.py,sha256=AVjW2mgABniolSsauK4u0FW8ya6WzRUdyEg2R-8vGCw,10278
52
52
  firecrawl/v2/methods/batch.py,sha256=-eGnCGgB76pY-BFVKG1DC58XViETWukQXtDU0esU_UU,14865
53
53
  firecrawl/v2/methods/crawl.py,sha256=p-1UC3-8vT757zOnNL5NJEWiT63BiAN2H1dCzLymqiQ,18797
54
- firecrawl/v2/methods/extract.py,sha256=xWKkA5dNFzAkYMqmq11XCFkB2THl17Pu_DZWq7zuvI4,4573
54
+ firecrawl/v2/methods/extract.py,sha256=5EcgBzF8uNwA7auzco8xWdOycVV-Y44e04xJG4nlfZY,4982
55
55
  firecrawl/v2/methods/map.py,sha256=MH8jhLIFsp-4IC9womVtdCyarnGTeMqBXqwL21TRbFk,2849
56
56
  firecrawl/v2/methods/scrape.py,sha256=CSHBwC-P91UfrW3zHirjNAs2h899FKcWvd1DY_4fJdo,1921
57
- firecrawl/v2/methods/search.py,sha256=2wo7u-GPnr7AGXAdUoavE3MkToKZufMbcnGOhgWJWqI,7846
57
+ firecrawl/v2/methods/search.py,sha256=fWkMt0WL-VqmFEnkSf9Vi9E5gtBB2TEhtotaFuVb6rw,8024
58
58
  firecrawl/v2/methods/usage.py,sha256=NqkmFd-ziw8ijbZxwaxjxZHl85u0LTe_TYqr_NGWFwE,3693
59
59
  firecrawl/v2/methods/aio/__init__.py,sha256=RocMJnGwnLIvGu3G8ZvY8INkipC7WHZiu2bE31eSyJs,35
60
60
  firecrawl/v2/methods/aio/batch.py,sha256=0R01YcWqk4Tkilbec1EH2fqY614F5PPICQmILRJg38A,6840
@@ -71,10 +71,10 @@ firecrawl/v2/utils/http_client.py,sha256=gUrC1CvU5sj03w27Lbq-3-yH38Yi_OXiI01-piw
71
71
  firecrawl/v2/utils/http_client_async.py,sha256=iy89_bk2HS3afSRHZ8016eMCa9Fk-5MFTntcOHfbPgE,1936
72
72
  firecrawl/v2/utils/normalize.py,sha256=nlTU6QRghT1YKZzNZlIQj4STSRuSUGrS9cCErZIcY5w,3636
73
73
  firecrawl/v2/utils/validation.py,sha256=zzpCK4McM4P8Cag0_8s-d7Ww0idyTWKB4-yk92MT-rY,15405
74
- firecrawl-4.3.4.dist-info/licenses/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
75
74
  tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
76
75
  tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
77
- firecrawl-4.3.4.dist-info/METADATA,sha256=nIjIOu2mpVFiWH541AgJawzPtyvOeGLd9d4lAn4uw8w,7392
78
- firecrawl-4.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
79
- firecrawl-4.3.4.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
80
- firecrawl-4.3.4.dist-info/RECORD,,
76
+ firecrawl-4.3.6.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
77
+ firecrawl-4.3.6.dist-info/METADATA,sha256=FwTSDQ4Hk5O09wHnDzaIM5V8jlHz11TKd09OBbgq7E0,7313
78
+ firecrawl-4.3.6.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
79
+ firecrawl-4.3.6.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
80
+ firecrawl-4.3.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: bdist_wheel (0.38.4)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5