firecrawl 2.10.0__tar.gz → 2.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.10.0
3
+ Version: 2.12.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp, AsyncFirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
15
15
 
16
- __version__ = "2.10.0"
16
+ __version__ = "2.12.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -96,6 +96,7 @@ class AgentOptionsExtract(pydantic.BaseModel):
96
96
  class ActionsResult(pydantic.BaseModel):
97
97
  """Result of actions performed during scraping."""
98
98
  screenshots: List[str]
99
+ pdfs: List[str]
99
100
 
100
101
  class ChangeTrackingData(pydantic.BaseModel):
101
102
  """
@@ -172,6 +173,7 @@ class ScreenshotAction(pydantic.BaseModel):
172
173
  """Screenshot action to perform during scraping."""
173
174
  type: Literal["screenshot"]
174
175
  fullPage: Optional[bool] = None
176
+ quality: Optional[int] = None
175
177
 
176
178
  class ClickAction(pydantic.BaseModel):
177
179
  """Click action to perform during scraping."""
@@ -203,6 +205,12 @@ class ExecuteJavascriptAction(pydantic.BaseModel):
203
205
  type: Literal["executeJavascript"]
204
206
  script: str
205
207
 
208
+ class PDFAction(pydantic.BaseModel):
209
+ """PDF action to perform during scraping."""
210
+ type: Literal["pdf"]
211
+ format: Optional[Literal["A0", "A1", "A2", "A3", "A4", "A5", "A6", "Letter", "Legal", "Tabloid", "Ledger"]] = None
212
+ landscape: Optional[bool] = None
213
+ scale: Optional[float] = None
206
214
 
207
215
  class ExtractAgent(pydantic.BaseModel):
208
216
  """Configuration for the agent in extract operations."""
@@ -219,7 +227,7 @@ class ScrapeParams(ScrapeOptions):
219
227
  """Parameters for scraping operations."""
220
228
  extract: Optional[JsonConfig] = None
221
229
  jsonOptions: Optional[JsonConfig] = None
222
- actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None
230
+ actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None
223
231
  agent: Optional[AgentOptions] = None
224
232
  webhook: Optional[WebhookConfig] = None
225
233
 
@@ -265,6 +273,7 @@ class CrawlParams(pydantic.BaseModel):
265
273
  regexOnFullURL: Optional[bool] = None
266
274
  delay: Optional[int] = None # Delay in seconds between scrapes
267
275
  maxConcurrency: Optional[int] = None
276
+ allowSubdomains: Optional[bool] = None
268
277
 
269
278
  class CrawlResponse(pydantic.BaseModel):
270
279
  """Response from crawling operations."""
@@ -469,7 +478,7 @@ class FirecrawlApp:
469
478
  parse_pdf: Optional[bool] = None,
470
479
  extract: Optional[JsonConfig] = None,
471
480
  json_options: Optional[JsonConfig] = None,
472
- actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
481
+ actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
473
482
  change_tracking_options: Optional[ChangeTrackingOptions] = None,
474
483
  max_age: Optional[int] = None,
475
484
  store_in_cache: Optional[bool] = None,
@@ -493,7 +502,7 @@ class FirecrawlApp:
493
502
  proxy (Optional[Literal["basic", "stealth", "auto"]]): Proxy type (basic/stealth)
494
503
  extract (Optional[JsonConfig]): Content extraction settings
495
504
  json_options (Optional[JsonConfig]): JSON extraction settings
496
- actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
505
+ actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]]): Actions to perform
497
506
  change_tracking_options (Optional[ChangeTrackingOptions]): Change tracking settings
498
507
 
499
508
 
@@ -700,6 +709,7 @@ class FirecrawlApp:
700
709
  ignore_query_parameters: Optional[bool] = None,
701
710
  regex_on_full_url: Optional[bool] = None,
702
711
  delay: Optional[int] = None,
712
+ allow_subdomains: Optional[bool] = None,
703
713
  max_concurrency: Optional[int] = None,
704
714
  poll_interval: Optional[int] = 2,
705
715
  idempotency_key: Optional[str] = None,
@@ -725,6 +735,7 @@ class FirecrawlApp:
725
735
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
726
736
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
727
737
  delay (Optional[int]): Delay in seconds between scrapes
738
+ allow_subdomains (Optional[bool]): Follow subdomains
728
739
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
729
740
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
730
741
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
@@ -775,6 +786,8 @@ class FirecrawlApp:
775
786
  crawl_params['regexOnFullURL'] = regex_on_full_url
776
787
  if delay is not None:
777
788
  crawl_params['delay'] = delay
789
+ if allow_subdomains is not None:
790
+ crawl_params['allowSubdomains'] = allow_subdomains
778
791
  if max_concurrency is not None:
779
792
  crawl_params['maxConcurrency'] = max_concurrency
780
793
 
@@ -819,6 +832,8 @@ class FirecrawlApp:
819
832
  ignore_query_parameters: Optional[bool] = None,
820
833
  regex_on_full_url: Optional[bool] = None,
821
834
  delay: Optional[int] = None,
835
+ allow_subdomains: Optional[bool] = None,
836
+ max_concurrency: Optional[int] = None,
822
837
  idempotency_key: Optional[str] = None,
823
838
  **kwargs
824
839
  ) -> CrawlResponse:
@@ -842,6 +857,7 @@ class FirecrawlApp:
842
857
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
843
858
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
844
859
  delay (Optional[int]): Delay in seconds between scrapes
860
+ allow_subdomains (Optional[bool]): Follow subdomains
845
861
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
846
862
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
847
863
  **kwargs: Additional parameters to pass to the API
@@ -892,6 +908,8 @@ class FirecrawlApp:
892
908
  crawl_params['regexOnFullURL'] = regex_on_full_url
893
909
  if delay is not None:
894
910
  crawl_params['delay'] = delay
911
+ if allow_subdomains is not None:
912
+ crawl_params['allowSubdomains'] = allow_subdomains
895
913
  if max_concurrency is not None:
896
914
  crawl_params['maxConcurrency'] = max_concurrency
897
915
 
@@ -1072,6 +1090,7 @@ class FirecrawlApp:
1072
1090
  ignore_query_parameters: Optional[bool] = None,
1073
1091
  regex_on_full_url: Optional[bool] = None,
1074
1092
  delay: Optional[int] = None,
1093
+ allow_subdomains: Optional[bool] = None,
1075
1094
  max_concurrency: Optional[int] = None,
1076
1095
  idempotency_key: Optional[str] = None,
1077
1096
  **kwargs
@@ -1096,6 +1115,7 @@ class FirecrawlApp:
1096
1115
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
1097
1116
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
1098
1117
  delay (Optional[int]): Delay in seconds between scrapes
1118
+ allow_subdomains (Optional[bool]): Follow subdomains
1099
1119
  max_concurrency (Optional[int]): Maximum number of concurrent scrapes
1100
1120
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
1101
1121
  **kwargs: Additional parameters to pass to the API
@@ -1122,6 +1142,7 @@ class FirecrawlApp:
1122
1142
  ignore_query_parameters=ignore_query_parameters,
1123
1143
  regex_on_full_url=regex_on_full_url,
1124
1144
  delay=delay,
1145
+ allow_subdomains=allow_subdomains,
1125
1146
  max_concurrency=max_concurrency,
1126
1147
  idempotency_key=idempotency_key,
1127
1148
  **kwargs
@@ -1236,7 +1257,7 @@ class FirecrawlApp:
1236
1257
  proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
1237
1258
  extract: Optional[JsonConfig] = None,
1238
1259
  json_options: Optional[JsonConfig] = None,
1239
- actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1260
+ actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
1240
1261
  agent: Optional[AgentOptions] = None,
1241
1262
  poll_interval: Optional[int] = 2,
1242
1263
  max_concurrency: Optional[int] = None,
@@ -1374,7 +1395,7 @@ class FirecrawlApp:
1374
1395
  proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
1375
1396
  extract: Optional[JsonConfig] = None,
1376
1397
  json_options: Optional[JsonConfig] = None,
1377
- actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1398
+ actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
1378
1399
  agent: Optional[AgentOptions] = None,
1379
1400
  max_concurrency: Optional[int] = None,
1380
1401
  idempotency_key: Optional[str] = None,
@@ -1510,7 +1531,7 @@ class FirecrawlApp:
1510
1531
  proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
1511
1532
  extract: Optional[JsonConfig] = None,
1512
1533
  json_options: Optional[JsonConfig] = None,
1513
- actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
1534
+ actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
1514
1535
  agent: Optional[AgentOptions] = None,
1515
1536
  max_concurrency: Optional[int] = None,
1516
1537
  idempotency_key: Optional[str] = None,
@@ -2911,7 +2932,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
2911
2932
  parse_pdf: Optional[bool] = None,
2912
2933
  extract: Optional[JsonConfig] = None,
2913
2934
  json_options: Optional[JsonConfig] = None,
2914
- actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
2935
+ actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
2915
2936
  **kwargs) -> ScrapeResponse[Any]:
2916
2937
  """
2917
2938
  Scrape a single URL asynchronously.
@@ -2932,7 +2953,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
2932
2953
  proxy (Optional[Literal["basic", "stealth", "auto"]]): Proxy type (basic/stealth)
2933
2954
  extract (Optional[JsonConfig]): Content extraction settings
2934
2955
  json_options (Optional[JsonConfig]): JSON extraction settings
2935
- actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
2956
+ actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]]): Actions to perform
2936
2957
  **kwargs: Additional parameters to pass to the API
2937
2958
 
2938
2959
  Returns:
@@ -3042,7 +3063,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3042
3063
  proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
3043
3064
  extract: Optional[JsonConfig] = None,
3044
3065
  json_options: Optional[JsonConfig] = None,
3045
- actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
3066
+ actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
3046
3067
  agent: Optional[AgentOptions] = None,
3047
3068
  poll_interval: Optional[int] = 2,
3048
3069
  idempotency_key: Optional[str] = None,
@@ -3181,7 +3202,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3181
3202
  proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
3182
3203
  extract: Optional[JsonConfig] = None,
3183
3204
  json_options: Optional[JsonConfig] = None,
3184
- actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
3205
+ actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction, PDFAction]]] = None,
3185
3206
  agent: Optional[AgentOptions] = None,
3186
3207
  idempotency_key: Optional[str] = None,
3187
3208
  **kwargs
@@ -3317,6 +3338,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3317
3338
  ignore_query_parameters: Optional[bool] = None,
3318
3339
  regex_on_full_url: Optional[bool] = None,
3319
3340
  delay: Optional[int] = None,
3341
+ allow_subdomains: Optional[bool] = None,
3320
3342
  poll_interval: Optional[int] = 2,
3321
3343
  idempotency_key: Optional[str] = None,
3322
3344
  **kwargs
@@ -3341,6 +3363,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3341
3363
  ignore_query_parameters (Optional[bool]): Ignore URL parameters
3342
3364
  regex_on_full_url (Optional[bool]): Apply regex to full URLs
3343
3365
  delay (Optional[int]): Delay in seconds between scrapes
3366
+ allow_subdomains (Optional[bool]): Follow subdomains
3344
3367
  poll_interval (Optional[int]): Seconds between status checks (default: 2)
3345
3368
  idempotency_key (Optional[str]): Unique key to prevent duplicate requests
3346
3369
  **kwargs: Additional parameters to pass to the API
@@ -3390,6 +3413,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
3390
3413
  crawl_params['regexOnFullURL'] = regex_on_full_url
3391
3414
  if delay is not None:
3392
3415
  crawl_params['delay'] = delay
3416
+ if allow_subdomains is not None:
3417
+ crawl_params['allowSubdomains'] = allow_subdomains
3393
3418
 
3394
3419
  # Add any additional kwargs
3395
3420
  crawl_params.update(kwargs)
@@ -3433,6 +3458,7 @@ class AsyncFirecrawlApp(FirecrawlApp):
3433
3458
  ignore_query_parameters: Optional[bool] = None,
3434
3459
  regex_on_full_url: Optional[bool] = None,
3435
3460
  delay: Optional[int] = None,
3461
+ allow_subdomains: Optional[bool] = None,
3436
3462
  poll_interval: Optional[int] = 2,
3437
3463
  idempotency_key: Optional[str] = None,
3438
3464
  **kwargs
@@ -3502,6 +3528,8 @@ class AsyncFirecrawlApp(FirecrawlApp):
3502
3528
  crawl_params['regexOnFullURL'] = regex_on_full_url
3503
3529
  if delay is not None:
3504
3530
  crawl_params['delay'] = delay
3531
+ if allow_subdomains is not None:
3532
+ crawl_params['allowSubdomains'] = allow_subdomains
3505
3533
 
3506
3534
  # Add any additional kwargs
3507
3535
  crawl_params.update(kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.10.0
3
+ Version: 2.12.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
File without changes
File without changes
File without changes
File without changes
File without changes