firecrawl-py 4.13.2__tar.gz → 4.14.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. {firecrawl_py-4.13.2/firecrawl_py.egg-info → firecrawl_py-4.14.0}/PKG-INFO +27 -1
  2. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/README.md +27 -1
  3. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__init__.py +1 -1
  4. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +35 -2
  5. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/test_crawl.py +29 -2
  6. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_pagination.py +228 -4
  7. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/client.py +9 -1
  8. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v1/client.py +8 -8
  9. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/client.py +43 -0
  10. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/client_async.py +34 -0
  11. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/aio/batch.py +78 -26
  12. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/aio/crawl.py +92 -37
  13. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/batch.py +83 -28
  14. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/crawl.py +99 -51
  15. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/types.py +2 -1
  16. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0/firecrawl_py.egg-info}/PKG-INFO +27 -1
  17. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/LICENSE +0 -0
  18. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/aio/conftest.py +0 -0
  19. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +0 -0
  20. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +0 -0
  21. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +0 -0
  22. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +0 -0
  23. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +0 -0
  24. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +0 -0
  25. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +0 -0
  26. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +0 -0
  27. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/conftest.py +0 -0
  28. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/test_async.py +0 -0
  29. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/test_extract.py +0 -0
  30. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/test_map.py +0 -0
  31. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/test_scrape.py +0 -0
  32. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/test_search.py +0 -0
  33. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/test_usage.py +0 -0
  34. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/e2e/v2/test_watcher.py +0 -0
  35. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/test_recursive_schema_v1.py +0 -0
  36. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +0 -0
  37. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +0 -0
  38. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +0 -0
  39. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +0 -0
  40. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +0 -0
  41. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +0 -0
  42. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +0 -0
  43. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +0 -0
  44. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_agent.py +0 -0
  45. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +0 -0
  46. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_agent_webhook.py +0 -0
  47. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +0 -0
  48. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_branding.py +0 -0
  49. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +0 -0
  50. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +0 -0
  51. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +0 -0
  52. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +0 -0
  53. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +0 -0
  54. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +0 -0
  55. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_search_validation.py +0 -0
  56. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_usage_types.py +0 -0
  57. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/methods/test_webhook.py +0 -0
  58. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +0 -0
  59. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +0 -0
  60. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +0 -0
  61. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/utils/test_validation.py +0 -0
  62. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +0 -0
  63. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/firecrawl.backup.py +0 -0
  64. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/types.py +0 -0
  65. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v1/__init__.py +0 -0
  66. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/__init__.py +0 -0
  67. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/agent.py +0 -0
  68. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/aio/__init__.py +0 -0
  69. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/aio/agent.py +0 -0
  70. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/aio/extract.py +0 -0
  71. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/aio/map.py +0 -0
  72. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/aio/scrape.py +0 -0
  73. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/aio/search.py +0 -0
  74. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/aio/usage.py +0 -0
  75. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/extract.py +0 -0
  76. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/map.py +0 -0
  77. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/scrape.py +0 -0
  78. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/search.py +0 -0
  79. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/methods/usage.py +0 -0
  80. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/utils/__init__.py +0 -0
  81. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/utils/error_handler.py +0 -0
  82. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/utils/get_version.py +0 -0
  83. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/utils/http_client.py +0 -0
  84. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/utils/http_client_async.py +0 -0
  85. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/utils/normalize.py +0 -0
  86. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/utils/validation.py +0 -0
  87. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/watcher.py +0 -0
  88. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl/v2/watcher_async.py +0 -0
  89. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl_py.egg-info/SOURCES.txt +0 -0
  90. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl_py.egg-info/dependency_links.txt +0 -0
  91. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl_py.egg-info/requires.txt +0 -0
  92. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/firecrawl_py.egg-info/top_level.txt +0 -0
  93. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/pyproject.toml +0 -0
  94. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/setup.cfg +0 -0
  95. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/setup.py +0 -0
  96. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/tests/test_agent_integration.py +0 -0
  97. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/tests/test_api_key_handling.py +0 -0
  98. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/tests/test_change_tracking.py +0 -0
  99. {firecrawl_py-4.13.2 → firecrawl_py-4.14.0}/tests/test_timeout_conversion.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: firecrawl-py
3
- Version: 4.13.2
3
+ Version: 4.14.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
@@ -134,6 +134,32 @@ crawl_status = firecrawl.get_crawl_status("<crawl_id>")
134
134
  print(crawl_status)
135
135
  ```
136
136
 
137
+ ### Manual Pagination (v2)
138
+
139
+ Crawl and batch scrape status responses may include a `next` URL when more data is available. The SDK auto-paginates by default; to page manually, disable auto-pagination and pass the opaque `next` URL back to the SDK.
140
+
141
+ ```python
142
+ from firecrawl.v2.types import PaginationConfig
143
+
144
+ # Crawl: fetch one page at a time
145
+ crawl_job = firecrawl.start_crawl("https://firecrawl.dev", limit=100)
146
+ status = firecrawl.get_crawl_status(
147
+ crawl_job.id,
148
+ pagination_config=PaginationConfig(auto_paginate=False),
149
+ )
150
+ if status.next:
151
+ page2 = firecrawl.get_crawl_status_page(status.next)
152
+
153
+ # Batch scrape: fetch one page at a time
154
+ batch_job = firecrawl.start_batch_scrape(["https://firecrawl.dev"])
155
+ status = firecrawl.get_batch_scrape_status(
156
+ batch_job.id,
157
+ pagination_config=PaginationConfig(auto_paginate=False),
158
+ )
159
+ if status.next:
160
+ page2 = firecrawl.get_batch_scrape_status_page(status.next)
161
+ ```
162
+
137
163
  ### Cancelling a Crawl
138
164
 
139
165
  To cancel an asynchronous crawl job, use the `cancel_crawl` method. It takes the job ID of the asynchronous crawl as a parameter and returns the cancellation status.
@@ -87,6 +87,32 @@ crawl_status = firecrawl.get_crawl_status("<crawl_id>")
87
87
  print(crawl_status)
88
88
  ```
89
89
 
90
+ ### Manual Pagination (v2)
91
+
92
+ Crawl and batch scrape status responses may include a `next` URL when more data is available. The SDK auto-paginates by default; to page manually, disable auto-pagination and pass the opaque `next` URL back to the SDK.
93
+
94
+ ```python
95
+ from firecrawl.v2.types import PaginationConfig
96
+
97
+ # Crawl: fetch one page at a time
98
+ crawl_job = firecrawl.start_crawl("https://firecrawl.dev", limit=100)
99
+ status = firecrawl.get_crawl_status(
100
+ crawl_job.id,
101
+ pagination_config=PaginationConfig(auto_paginate=False),
102
+ )
103
+ if status.next:
104
+ page2 = firecrawl.get_crawl_status_page(status.next)
105
+
106
+ # Batch scrape: fetch one page at a time
107
+ batch_job = firecrawl.start_batch_scrape(["https://firecrawl.dev"])
108
+ status = firecrawl.get_batch_scrape_status(
109
+ batch_job.id,
110
+ pagination_config=PaginationConfig(auto_paginate=False),
111
+ )
112
+ if status.next:
113
+ page2 = firecrawl.get_batch_scrape_status_page(status.next)
114
+ ```
115
+
90
116
  ### Cancelling a Crawl
91
117
 
92
118
  To cancel an asynchronous crawl job, use the `cancel_crawl` method. It takes the job ID of the asynchronous crawl as a parameter and returns the cancellation status.
@@ -184,4 +210,4 @@ firecrawl = Firecrawl(api_key="YOUR_API_KEY")
184
210
  doc_v1 = firecrawl.v1.scrape_url('https://firecrawl.dev', formats=['markdown', 'html'])
185
211
  crawl_v1 = firecrawl.v1.crawl_url('https://firecrawl.dev', limit=100)
186
212
  map_v1 = firecrawl.v1.map_url('https://firecrawl.dev')
187
- ```
213
+ ```
@@ -17,7 +17,7 @@ from .v1 import (
17
17
  V1ChangeTrackingOptions,
18
18
  )
19
19
 
20
- __version__ = "4.13.2"
20
+ __version__ = "4.14.0"
21
21
 
22
22
  # Define the logger for the Firecrawl project
23
23
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -1,8 +1,9 @@
1
1
  import os
2
+ import time
2
3
  import pytest
3
4
  from dotenv import load_dotenv
4
5
  from firecrawl import Firecrawl
5
- from firecrawl.v2.types import ScrapeOptions
6
+ from firecrawl.v2.types import ScrapeOptions, PaginationConfig
6
7
 
7
8
  load_dotenv()
8
9
 
@@ -48,6 +49,39 @@ class TestBatchScrapeE2E:
48
49
  assert job.status in ["scraping", "completed", "failed"]
49
50
  assert job.total >= 0
50
51
 
52
+ def test_get_batch_scrape_status_page(self):
53
+ """Fetch a single batch scrape page using the next URL."""
54
+ urls = [f"https://docs.firecrawl.dev?batch={i}" for i in range(15)]
55
+
56
+ start_resp = self.client.start_batch_scrape(
57
+ urls,
58
+ formats=["markdown"],
59
+ ignore_invalid_urls=True,
60
+ )
61
+ assert start_resp.id is not None
62
+
63
+ pagination_config = PaginationConfig(auto_paginate=False)
64
+ deadline = time.time() + 120
65
+ status_job = None
66
+ while time.time() < deadline:
67
+ status_job = self.client.get_batch_scrape_status(
68
+ start_resp.id,
69
+ pagination_config=pagination_config,
70
+ )
71
+ if status_job.next:
72
+ break
73
+ if status_job.status in ["completed", "failed", "cancelled"]:
74
+ break
75
+ time.sleep(2)
76
+
77
+ assert status_job is not None
78
+ if not status_job.next:
79
+ pytest.skip("Batch scrape completed without pagination; skipping page fetch.")
80
+
81
+ next_page = self.client.get_batch_scrape_status_page(status_job.next)
82
+ assert isinstance(next_page.data, list)
83
+ assert next_page.status in ["scraping", "completed", "failed", "cancelled"]
84
+
51
85
  def test_wait_batch_with_all_params(self):
52
86
  """Blocking waiter with JSON and changeTracking formats plus many options."""
53
87
  urls = [
@@ -103,4 +137,3 @@ class TestBatchScrapeE2E:
103
137
 
104
138
  cancelled = self.client.cancel_batch_scrape(start_resp.id)
105
139
  assert cancelled is True
106
-
@@ -3,7 +3,7 @@ import time
3
3
  import os
4
4
  from dotenv import load_dotenv
5
5
  from firecrawl import Firecrawl
6
- from firecrawl.v2.types import ScrapeOptions
6
+ from firecrawl.v2.types import ScrapeOptions, PaginationConfig
7
7
 
8
8
  load_dotenv()
9
9
 
@@ -66,6 +66,33 @@ class TestCrawlE2E:
66
66
  assert status_job.next is None
67
67
  assert isinstance(status_job.data, list)
68
68
 
69
+ def test_get_crawl_status_page(self):
70
+ """Fetch a single crawl page using the next URL."""
71
+ start_job = self.client.start_crawl("https://docs.firecrawl.dev", limit=25)
72
+ assert start_job.id is not None
73
+
74
+ pagination_config = PaginationConfig(auto_paginate=False)
75
+ deadline = time.time() + 120
76
+ status_job = None
77
+ while time.time() < deadline:
78
+ status_job = self.client.get_crawl_status(
79
+ start_job.id,
80
+ pagination_config=pagination_config,
81
+ )
82
+ if status_job.next:
83
+ break
84
+ if status_job.status in ["completed", "failed", "cancelled"]:
85
+ break
86
+ time.sleep(2)
87
+
88
+ assert status_job is not None
89
+ if not status_job.next:
90
+ pytest.skip("Crawl completed without pagination; skipping page fetch.")
91
+
92
+ next_page = self.client.get_crawl_status_page(status_job.next)
93
+ assert isinstance(next_page.data, list)
94
+ assert next_page.status in ["scraping", "completed", "failed", "cancelled"]
95
+
69
96
  def test_cancel_crawl(self):
70
97
  """Test canceling a crawl."""
71
98
  start_job = self.client.start_crawl("https://docs.firecrawl.dev", limit=3)
@@ -275,4 +302,4 @@ class TestCrawlE2E:
275
302
  assert params_data is not None
276
303
  assert params_data.limit is not None or params_data.include_paths is not None or params_data.max_discovery_depth is not None
277
304
  assert 'blog/.*' in params_data.include_paths
278
- assert 'docs/.*' in params_data.include_paths
305
+ assert 'docs/.*' in params_data.include_paths
@@ -14,10 +14,18 @@ from firecrawl.v2.types import (
14
14
  Document,
15
15
  DocumentMetadata
16
16
  )
17
- from firecrawl.v2.methods.crawl import get_crawl_status, _fetch_all_pages
18
- from firecrawl.v2.methods.batch import get_batch_scrape_status, _fetch_all_batch_pages
19
- from firecrawl.v2.methods.aio.crawl import get_crawl_status as get_crawl_status_async, _fetch_all_pages_async
20
- from firecrawl.v2.methods.aio.batch import get_batch_scrape_status as get_batch_scrape_status_async, _fetch_all_batch_pages_async
17
+ from firecrawl.v2.methods.crawl import get_crawl_status, get_crawl_status_page, _fetch_all_pages
18
+ from firecrawl.v2.methods.batch import get_batch_scrape_status, get_batch_scrape_status_page, _fetch_all_batch_pages
19
+ from firecrawl.v2.methods.aio.crawl import (
20
+ get_crawl_status as get_crawl_status_async,
21
+ get_crawl_status_page as get_crawl_status_page_async,
22
+ _fetch_all_pages_async,
23
+ )
24
+ from firecrawl.v2.methods.aio.batch import (
25
+ get_batch_scrape_status as get_batch_scrape_status_async,
26
+ get_batch_scrape_status_page as get_batch_scrape_status_page_async,
27
+ _fetch_all_batch_pages_async,
28
+ )
21
29
 
22
30
 
23
31
  class TestPaginationConfig:
@@ -123,6 +131,59 @@ class TestCrawlPagination:
123
131
  self.mock_client.get.assert_called_with(
124
132
  f"/v2/crawl/{self.job_id}", timeout=timeout_seconds
125
133
  )
134
+
135
+ def test_get_crawl_status_page(self):
136
+ """Test get_crawl_status_page returns a single page."""
137
+ mock_response = Mock()
138
+ mock_response.ok = True
139
+ mock_response.json.return_value = {
140
+ "success": True,
141
+ "status": "completed",
142
+ "completed": 10,
143
+ "total": 20,
144
+ "creditsUsed": 5,
145
+ "expiresAt": "2024-01-01T00:00:00Z",
146
+ "next": "https://api.firecrawl.dev/v2/crawl/test-crawl-123?page=3",
147
+ "data": [self.sample_doc],
148
+ }
149
+
150
+ self.mock_client.get.return_value = mock_response
151
+ next_url = "https://api.firecrawl.dev/v2/crawl/test-crawl-123?page=2"
152
+
153
+ result = get_crawl_status_page(self.mock_client, next_url)
154
+
155
+ assert result.status == "completed"
156
+ assert result.next == "https://api.firecrawl.dev/v2/crawl/test-crawl-123?page=3"
157
+ assert len(result.data) == 1
158
+ self.mock_client.get.assert_called_with(next_url, timeout=None)
159
+
160
+ def test_get_crawl_status_page_propagates_request_timeout(self):
161
+ """Ensure request_timeout is forwarded to crawl status page requests."""
162
+ mock_response = Mock()
163
+ mock_response.ok = True
164
+ mock_response.json.return_value = {
165
+ "success": True,
166
+ "status": "completed",
167
+ "completed": 1,
168
+ "total": 1,
169
+ "creditsUsed": 1,
170
+ "expiresAt": "2024-01-01T00:00:00Z",
171
+ "next": None,
172
+ "data": [self.sample_doc],
173
+ }
174
+
175
+ self.mock_client.get.return_value = mock_response
176
+
177
+ next_url = "https://api.firecrawl.dev/v2/crawl/test-crawl-123?page=2"
178
+ timeout_seconds = 4.2
179
+ result = get_crawl_status_page(
180
+ self.mock_client,
181
+ next_url,
182
+ request_timeout=timeout_seconds,
183
+ )
184
+
185
+ assert result.status == "completed"
186
+ self.mock_client.get.assert_called_with(next_url, timeout=timeout_seconds)
126
187
 
127
188
  def test_get_crawl_status_with_pagination(self):
128
189
  """Test get_crawl_status with auto_paginate=True."""
@@ -326,6 +387,59 @@ class TestBatchScrapePagination:
326
387
  assert result.next == "https://api.firecrawl.dev/v2/batch/scrape/test-batch-123?page=2"
327
388
  assert len(result.data) == 1
328
389
  assert isinstance(result.data[0], Document)
390
+
391
+ def test_get_batch_scrape_status_page(self):
392
+ """Test get_batch_scrape_status_page returns a single page."""
393
+ mock_response = Mock()
394
+ mock_response.ok = True
395
+ mock_response.json.return_value = {
396
+ "success": True,
397
+ "status": "completed",
398
+ "completed": 10,
399
+ "total": 20,
400
+ "creditsUsed": 5,
401
+ "expiresAt": "2024-01-01T00:00:00Z",
402
+ "next": "https://api.firecrawl.dev/v2/batch/scrape/test-batch-123?page=3",
403
+ "data": [self.sample_doc],
404
+ }
405
+
406
+ self.mock_client.get.return_value = mock_response
407
+ next_url = "https://api.firecrawl.dev/v2/batch/scrape/test-batch-123?page=2"
408
+
409
+ result = get_batch_scrape_status_page(self.mock_client, next_url)
410
+
411
+ assert result.status == "completed"
412
+ assert result.next == "https://api.firecrawl.dev/v2/batch/scrape/test-batch-123?page=3"
413
+ assert len(result.data) == 1
414
+ self.mock_client.get.assert_called_with(next_url, timeout=None)
415
+
416
+ def test_get_batch_scrape_status_page_propagates_request_timeout(self):
417
+ """Ensure request_timeout is forwarded to batch status page requests."""
418
+ mock_response = Mock()
419
+ mock_response.ok = True
420
+ mock_response.json.return_value = {
421
+ "success": True,
422
+ "status": "completed",
423
+ "completed": 1,
424
+ "total": 1,
425
+ "creditsUsed": 1,
426
+ "expiresAt": "2024-01-01T00:00:00Z",
427
+ "next": None,
428
+ "data": [self.sample_doc],
429
+ }
430
+
431
+ self.mock_client.get.return_value = mock_response
432
+
433
+ next_url = "https://api.firecrawl.dev/v2/batch/scrape/test-batch-123?page=2"
434
+ timeout_seconds = 2.7
435
+ result = get_batch_scrape_status_page(
436
+ self.mock_client,
437
+ next_url,
438
+ request_timeout=timeout_seconds,
439
+ )
440
+
441
+ assert result.status == "completed"
442
+ self.mock_client.get.assert_called_with(next_url, timeout=timeout_seconds)
329
443
 
330
444
  def test_get_batch_scrape_status_with_pagination(self):
331
445
  """Test get_batch_scrape_status with auto_paginate=True."""
@@ -493,6 +607,61 @@ class TestAsyncPagination:
493
607
  f"/v2/crawl/{self.job_id}", timeout=timeout_seconds
494
608
  )
495
609
 
610
+ @pytest.mark.asyncio
611
+ async def test_get_crawl_status_page_async(self):
612
+ """Test async get_crawl_status_page returns a single page."""
613
+ mock_response = Mock()
614
+ mock_response.status_code = 200
615
+ mock_response.json.return_value = {
616
+ "success": True,
617
+ "status": "completed",
618
+ "completed": 10,
619
+ "total": 20,
620
+ "creditsUsed": 5,
621
+ "expiresAt": "2024-01-01T00:00:00Z",
622
+ "next": "https://api.firecrawl.dev/v2/crawl/test-async-123?page=3",
623
+ "data": [self.sample_doc],
624
+ }
625
+
626
+ self.mock_client.get.return_value = mock_response
627
+ next_url = "https://api.firecrawl.dev/v2/crawl/test-async-123?page=2"
628
+
629
+ result = await get_crawl_status_page_async(self.mock_client, next_url)
630
+
631
+ assert result.status == "completed"
632
+ assert result.next == "https://api.firecrawl.dev/v2/crawl/test-async-123?page=3"
633
+ assert len(result.data) == 1
634
+ self.mock_client.get.assert_awaited_with(next_url, timeout=None)
635
+
636
+ @pytest.mark.asyncio
637
+ async def test_get_crawl_status_page_async_propagates_request_timeout(self):
638
+ """Ensure async request_timeout is forwarded to crawl status page requests."""
639
+ mock_response = Mock()
640
+ mock_response.status_code = 200
641
+ mock_response.json.return_value = {
642
+ "success": True,
643
+ "status": "completed",
644
+ "completed": 1,
645
+ "total": 1,
646
+ "creditsUsed": 1,
647
+ "expiresAt": "2024-01-01T00:00:00Z",
648
+ "next": None,
649
+ "data": [self.sample_doc],
650
+ }
651
+
652
+ self.mock_client.get.return_value = mock_response
653
+
654
+ next_url = "https://api.firecrawl.dev/v2/crawl/test-async-123?page=2"
655
+ timeout_seconds = 6.1
656
+ result = await get_crawl_status_page_async(
657
+ self.mock_client,
658
+ next_url,
659
+ request_timeout=timeout_seconds,
660
+ )
661
+
662
+ assert result.status == "completed"
663
+ self.mock_client.get.assert_awaited_with(next_url, timeout=timeout_seconds)
664
+
496
665
  @pytest.mark.asyncio
497
666
  async def test_get_batch_scrape_status_async_with_pagination(self):
498
667
  """Test async get_batch_scrape_status with pagination."""
@@ -534,6 +703,61 @@ class TestAsyncPagination:
534
703
  assert result.next is None
535
704
  assert len(result.data) == 2
536
705
  assert self.mock_client.get.call_count == 2
706
+
707
+ @pytest.mark.asyncio
708
+ async def test_get_batch_scrape_status_page_async(self):
709
+ """Test async get_batch_scrape_status_page returns a single page."""
710
+ mock_response = Mock()
711
+ mock_response.status_code = 200
712
+ mock_response.json.return_value = {
713
+ "success": True,
714
+ "status": "completed",
715
+ "completed": 10,
716
+ "total": 20,
717
+ "creditsUsed": 5,
718
+ "expiresAt": "2024-01-01T00:00:00Z",
719
+ "next": "https://api.firecrawl.dev/v2/batch/scrape/test-async-123?page=3",
720
+ "data": [self.sample_doc],
721
+ }
722
+
723
+ self.mock_client.get.return_value = mock_response
724
+ next_url = "https://api.firecrawl.dev/v2/batch/scrape/test-async-123?page=2"
725
+
726
+ result = await get_batch_scrape_status_page_async(self.mock_client, next_url)
727
+
728
+ assert result.status == "completed"
729
+ assert result.next == "https://api.firecrawl.dev/v2/batch/scrape/test-async-123?page=3"
730
+ assert len(result.data) == 1
731
+ self.mock_client.get.assert_awaited_with(next_url, timeout=None)
732
+
733
+ @pytest.mark.asyncio
734
+ async def test_get_batch_scrape_status_page_async_propagates_request_timeout(self):
735
+ """Ensure async request_timeout is forwarded to batch status page requests."""
736
+ mock_response = Mock()
737
+ mock_response.status_code = 200
738
+ mock_response.json.return_value = {
739
+ "success": True,
740
+ "status": "completed",
741
+ "completed": 1,
742
+ "total": 1,
743
+ "creditsUsed": 1,
744
+ "expiresAt": "2024-01-01T00:00:00Z",
745
+ "next": None,
746
+ "data": [self.sample_doc],
747
+ }
748
+
749
+ self.mock_client.get.return_value = mock_response
750
+
751
+ next_url = "https://api.firecrawl.dev/v2/batch/scrape/test-async-123?page=2"
752
+ timeout_seconds = 4.4
753
+ result = await get_batch_scrape_status_page_async(
754
+ self.mock_client,
755
+ next_url,
756
+ request_timeout=timeout_seconds,
757
+ )
758
+
759
+ assert result.status == "completed"
760
+ self.mock_client.get.assert_awaited_with(next_url, timeout=timeout_seconds)
537
761
 
538
762
  @pytest.mark.asyncio
539
763
  async def test_fetch_all_pages_async_limits(self):
@@ -61,6 +61,7 @@ class V2Proxy:
61
61
  self.crawl = client_instance.crawl
62
62
  self.start_crawl = client_instance.start_crawl
63
63
  self.get_crawl_status = client_instance.get_crawl_status
64
+ self.get_crawl_status_page = client_instance.get_crawl_status_page
64
65
  self.cancel_crawl = client_instance.cancel_crawl
65
66
  self.get_crawl_errors = client_instance.get_crawl_errors
66
67
  self.get_active_crawls = client_instance.get_active_crawls
@@ -78,6 +79,7 @@ class V2Proxy:
78
79
 
79
80
  self.start_batch_scrape = client_instance.start_batch_scrape
80
81
  self.get_batch_scrape_status = client_instance.get_batch_scrape_status
82
+ self.get_batch_scrape_status_page = client_instance.get_batch_scrape_status_page
81
83
  self.cancel_batch_scrape = client_instance.cancel_batch_scrape
82
84
  self.batch_scrape = client_instance.batch_scrape
83
85
  self.get_batch_scrape_errors = client_instance.get_batch_scrape_errors
@@ -127,6 +129,7 @@ class AsyncV2Proxy:
127
129
  self.start_crawl = client_instance.start_crawl
128
130
  self.wait_crawl = client_instance.wait_crawl
129
131
  self.get_crawl_status = client_instance.get_crawl_status
132
+ self.get_crawl_status_page = client_instance.get_crawl_status_page
130
133
  self.cancel_crawl = client_instance.cancel_crawl
131
134
  self.get_crawl_errors = client_instance.get_crawl_errors
132
135
  self.get_active_crawls = client_instance.get_active_crawls
@@ -144,6 +147,7 @@ class AsyncV2Proxy:
144
147
 
145
148
  self.start_batch_scrape = client_instance.start_batch_scrape
146
149
  self.get_batch_scrape_status = client_instance.get_batch_scrape_status
150
+ self.get_batch_scrape_status_page = client_instance.get_batch_scrape_status_page
147
151
  self.cancel_batch_scrape = client_instance.cancel_batch_scrape
148
152
  self.wait_batch_scrape = client_instance.wait_batch_scrape
149
153
  self.batch_scrape = client_instance.batch_scrape
@@ -198,6 +202,7 @@ class Firecrawl:
198
202
  self.start_crawl = self._v2_client.start_crawl
199
203
  self.crawl_params_preview = self._v2_client.crawl_params_preview
200
204
  self.get_crawl_status = self._v2_client.get_crawl_status
205
+ self.get_crawl_status_page = self._v2_client.get_crawl_status_page
201
206
  self.cancel_crawl = self._v2_client.cancel_crawl
202
207
  self.get_crawl_errors = self._v2_client.get_crawl_errors
203
208
  self.get_active_crawls = self._v2_client.get_active_crawls
@@ -205,6 +210,7 @@ class Firecrawl:
205
210
 
206
211
  self.start_batch_scrape = self._v2_client.start_batch_scrape
207
212
  self.get_batch_scrape_status = self._v2_client.get_batch_scrape_status
213
+ self.get_batch_scrape_status_page = self._v2_client.get_batch_scrape_status_page
208
214
  self.cancel_batch_scrape = self._v2_client.cancel_batch_scrape
209
215
  self.batch_scrape = self._v2_client.batch_scrape
210
216
  self.get_batch_scrape_errors = self._v2_client.get_batch_scrape_errors
@@ -248,6 +254,7 @@ class AsyncFirecrawl:
248
254
 
249
255
  self.start_crawl = self._v2_client.start_crawl
250
256
  self.get_crawl_status = self._v2_client.get_crawl_status
257
+ self.get_crawl_status_page = self._v2_client.get_crawl_status_page
251
258
  self.cancel_crawl = self._v2_client.cancel_crawl
252
259
  self.crawl = self._v2_client.crawl
253
260
  self.get_crawl_errors = self._v2_client.get_crawl_errors
@@ -256,6 +263,7 @@ class AsyncFirecrawl:
256
263
 
257
264
  self.start_batch_scrape = self._v2_client.start_batch_scrape
258
265
  self.get_batch_scrape_status = self._v2_client.get_batch_scrape_status
266
+ self.get_batch_scrape_status_page = self._v2_client.get_batch_scrape_status_page
259
267
  self.cancel_batch_scrape = self._v2_client.cancel_batch_scrape
260
268
  self.batch_scrape = self._v2_client.batch_scrape
261
269
  self.get_batch_scrape_errors = self._v2_client.get_batch_scrape_errors
@@ -278,4 +286,4 @@ class AsyncFirecrawl:
278
286
 
279
287
  # Export Firecrawl as an alias for FirecrawlApp
280
288
  FirecrawlApp = Firecrawl
281
- AsyncFirecrawlApp = AsyncFirecrawl
289
+ AsyncFirecrawlApp = AsyncFirecrawl
@@ -150,7 +150,7 @@ class V1ScrapeOptions(pydantic.BaseModel):
150
150
  skipTlsVerification: Optional[bool] = None
151
151
  removeBase64Images: Optional[bool] = None
152
152
  blockAds: Optional[bool] = None
153
- proxy: Optional[Literal["basic", "stealth", "auto"]] = None
153
+ proxy: Optional[Literal["basic", "stealth", "enhanced", "auto"]] = None
154
154
  changeTrackingOptions: Optional[V1ChangeTrackingOptions] = None
155
155
  maxAge: Optional[int] = None
156
156
  storeInCache: Optional[bool] = None
@@ -542,7 +542,7 @@ class V1FirecrawlApp:
542
542
  skip_tls_verification: Optional[bool] = None,
543
543
  remove_base64_images: Optional[bool] = None,
544
544
  block_ads: Optional[bool] = None,
545
- proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
545
+ proxy: Optional[Literal["basic", "stealth", "enhanced", "auto"]] = None,
546
546
  parse_pdf: Optional[bool] = None,
547
547
  extract: Optional[V1JsonConfig] = None,
548
548
  json_options: Optional[V1JsonConfig] = None,
@@ -1441,7 +1441,7 @@ class V1FirecrawlApp:
1441
1441
  skip_tls_verification: Optional[bool] = None,
1442
1442
  remove_base64_images: Optional[bool] = None,
1443
1443
  block_ads: Optional[bool] = None,
1444
- proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
1444
+ proxy: Optional[Literal["basic", "stealth", "enhanced", "auto"]] = None,
1445
1445
  extract: Optional[V1JsonConfig] = None,
1446
1446
  json_options: Optional[V1JsonConfig] = None,
1447
1447
  actions: Optional[List[Union[V1WaitAction, V1ScreenshotAction, V1ClickAction, V1WriteAction, V1PressAction, V1ScrollAction, V1ScrapeAction, V1ExecuteJavascriptAction, V1PDFAction]]] = None,
@@ -1582,7 +1582,7 @@ class V1FirecrawlApp:
1582
1582
  skip_tls_verification: Optional[bool] = None,
1583
1583
  remove_base64_images: Optional[bool] = None,
1584
1584
  block_ads: Optional[bool] = None,
1585
- proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
1585
+ proxy: Optional[Literal["basic", "stealth", "enhanced", "auto"]] = None,
1586
1586
  extract: Optional[V1JsonConfig] = None,
1587
1587
  json_options: Optional[V1JsonConfig] = None,
1588
1588
  actions: Optional[List[Union[V1WaitAction, V1ScreenshotAction, V1ClickAction, V1WriteAction, V1PressAction, V1ScrollAction, V1ScrapeAction, V1ExecuteJavascriptAction, V1PDFAction]]] = None,
@@ -1722,7 +1722,7 @@ class V1FirecrawlApp:
1722
1722
  skip_tls_verification: Optional[bool] = None,
1723
1723
  remove_base64_images: Optional[bool] = None,
1724
1724
  block_ads: Optional[bool] = None,
1725
- proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
1725
+ proxy: Optional[Literal["basic", "stealth", "enhanced", "auto"]] = None,
1726
1726
  extract: Optional[V1JsonConfig] = None,
1727
1727
  json_options: Optional[V1JsonConfig] = None,
1728
1728
  actions: Optional[List[Union[V1WaitAction, V1ScreenshotAction, V1ClickAction, V1WriteAction, V1PressAction, V1ScrollAction, V1ScrapeAction, V1ExecuteJavascriptAction, V1PDFAction]]] = None,
@@ -3523,7 +3523,7 @@ class AsyncV1FirecrawlApp(V1FirecrawlApp):
3523
3523
  skip_tls_verification: Optional[bool] = None,
3524
3524
  remove_base64_images: Optional[bool] = None,
3525
3525
  block_ads: Optional[bool] = None,
3526
- proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
3526
+ proxy: Optional[Literal["basic", "stealth", "enhanced", "auto"]] = None,
3527
3527
  parse_pdf: Optional[bool] = None,
3528
3528
  extract: Optional[V1JsonConfig] = None,
3529
3529
  json_options: Optional[V1JsonConfig] = None,
@@ -3657,7 +3657,7 @@ class AsyncV1FirecrawlApp(V1FirecrawlApp):
3657
3657
  skip_tls_verification: Optional[bool] = None,
3658
3658
  remove_base64_images: Optional[bool] = None,
3659
3659
  block_ads: Optional[bool] = None,
3660
- proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
3660
+ proxy: Optional[Literal["basic", "stealth", "enhanced", "auto"]] = None,
3661
3661
  extract: Optional[V1JsonConfig] = None,
3662
3662
  json_options: Optional[V1JsonConfig] = None,
3663
3663
  actions: Optional[List[Union[V1WaitAction, V1ScreenshotAction, V1ClickAction, V1WriteAction, V1PressAction, V1ScrollAction, V1ScrapeAction, V1ExecuteJavascriptAction, V1PDFAction]]] = None,
@@ -3796,7 +3796,7 @@ class AsyncV1FirecrawlApp(V1FirecrawlApp):
3796
3796
  skip_tls_verification: Optional[bool] = None,
3797
3797
  remove_base64_images: Optional[bool] = None,
3798
3798
  block_ads: Optional[bool] = None,
3799
- proxy: Optional[Literal["basic", "stealth", "auto"]] = None,
3799
+ proxy: Optional[Literal["basic", "stealth", "enhanced", "auto"]] = None,
3800
3800
  extract: Optional[V1JsonConfig] = None,
3801
3801
  json_options: Optional[V1JsonConfig] = None,
3802
3802
  actions: Optional[List[Union[V1WaitAction, V1ScreenshotAction, V1ClickAction, V1WriteAction, V1PressAction, V1ScrollAction, V1ScrapeAction, V1ExecuteJavascriptAction, V1PDFAction]]] = None,
@@ -423,6 +423,28 @@ class FirecrawlClient:
423
423
  pagination_config=pagination_config,
424
424
  request_timeout=request_timeout,
425
425
  )
426
+
427
+ def get_crawl_status_page(
428
+ self,
429
+ next_url: str,
430
+ *,
431
+ request_timeout: Optional[float] = None,
432
+ ) -> CrawlJob:
433
+ """
434
+ Fetch a single page of crawl results using a next URL.
435
+
436
+ Args:
437
+ next_url: Opaque next URL from a prior crawl status response
438
+ request_timeout: Timeout (in seconds) for the HTTP request
439
+
440
+ Returns:
441
+ CrawlJob with the page data and next URL (if any)
442
+ """
443
+ return crawl_module.get_crawl_status_page(
444
+ self.http_client,
445
+ next_url,
446
+ request_timeout=request_timeout,
447
+ )
426
448
 
427
449
  def get_crawl_errors(self, crawl_id: str) -> CrawlErrorsResponse:
428
450
  """
@@ -741,6 +763,27 @@ class FirecrawlClient:
741
763
  pagination_config=pagination_config
742
764
  )
743
765
 
766
+ def get_batch_scrape_status_page(
767
+ self,
768
+ next_url: str,
769
+ *,
770
+ request_timeout: Optional[float] = None,
771
+ ):
772
+ """Fetch a single page of batch scrape results using a next URL.
773
+
774
+ Args:
775
+ next_url: Opaque next URL from a prior batch scrape status response
776
+ request_timeout: Timeout (in seconds) for the HTTP request
777
+
778
+ Returns:
779
+ BatchScrapeJob with the page data and next URL (if any)
780
+ """
781
+ return batch_module.get_batch_scrape_status_page(
782
+ self.http_client,
783
+ next_url,
784
+ request_timeout=request_timeout,
785
+ )
786
+
744
787
  def cancel_batch_scrape(self, job_id: str) -> bool:
745
788
  """Cancel a running batch scrape job.
746
789