firecrawl 4.3.2__tar.gz → 4.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

Files changed (86) hide show
  1. {firecrawl-4.3.2 → firecrawl-4.3.3}/PKG-INFO +1 -1
  2. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__init__.py +1 -1
  3. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +1 -0
  4. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +1 -0
  5. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +1 -0
  6. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +1 -0
  7. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +1 -0
  8. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +1 -0
  9. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +1 -0
  10. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/test_crawl.py +4 -2
  11. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/test_extract.py +1 -0
  12. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/test_map.py +1 -0
  13. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/test_scrape.py +1 -0
  14. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/test_search.py +1 -0
  15. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +18 -0
  16. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +2 -1
  17. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +3 -2
  18. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +2 -2
  19. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +3 -2
  20. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +18 -1
  21. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +4 -2
  22. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/client.py +20 -6
  23. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/client_async.py +7 -1
  24. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/aio/batch.py +3 -1
  25. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/aio/crawl.py +2 -0
  26. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/aio/extract.py +7 -0
  27. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/aio/map.py +2 -0
  28. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/aio/search.py +5 -1
  29. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/batch.py +1 -1
  30. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/crawl.py +3 -0
  31. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/extract.py +7 -0
  32. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/map.py +2 -0
  33. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/search.py +4 -1
  34. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/types.py +50 -0
  35. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/utils/validation.py +3 -0
  36. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl.egg-info/PKG-INFO +1 -1
  37. {firecrawl-4.3.2 → firecrawl-4.3.3}/LICENSE +0 -0
  38. {firecrawl-4.3.2 → firecrawl-4.3.3}/README.md +0 -0
  39. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +0 -0
  40. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +0 -0
  41. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/conftest.py +0 -0
  42. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/test_async.py +0 -0
  43. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/test_usage.py +0 -0
  44. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/e2e/v2/test_watcher.py +0 -0
  45. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +0 -0
  46. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +0 -0
  47. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +0 -0
  48. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +0 -0
  49. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +0 -0
  50. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +0 -0
  51. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +0 -0
  52. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +0 -0
  53. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/test_pagination.py +0 -0
  54. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/test_search_validation.py +0 -0
  55. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/test_usage_types.py +0 -0
  56. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/methods/test_webhook.py +0 -0
  57. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/utils/test_validation.py +0 -0
  58. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +0 -0
  59. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/client.py +0 -0
  60. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/firecrawl.backup.py +0 -0
  61. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/types.py +0 -0
  62. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v1/__init__.py +0 -0
  63. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v1/client.py +0 -0
  64. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/__init__.py +0 -0
  65. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/aio/__init__.py +0 -0
  66. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/aio/scrape.py +0 -0
  67. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/aio/usage.py +0 -0
  68. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/scrape.py +0 -0
  69. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/methods/usage.py +0 -0
  70. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/utils/__init__.py +0 -0
  71. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/utils/error_handler.py +0 -0
  72. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/utils/get_version.py +0 -0
  73. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/utils/http_client.py +0 -0
  74. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/utils/http_client_async.py +0 -0
  75. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/utils/normalize.py +0 -0
  76. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/watcher.py +0 -0
  77. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl/v2/watcher_async.py +0 -0
  78. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl.egg-info/SOURCES.txt +0 -0
  79. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl.egg-info/dependency_links.txt +0 -0
  80. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl.egg-info/requires.txt +0 -0
  81. {firecrawl-4.3.2 → firecrawl-4.3.3}/firecrawl.egg-info/top_level.txt +0 -0
  82. {firecrawl-4.3.2 → firecrawl-4.3.3}/pyproject.toml +0 -0
  83. {firecrawl-4.3.2 → firecrawl-4.3.3}/setup.cfg +0 -0
  84. {firecrawl-4.3.2 → firecrawl-4.3.3}/setup.py +0 -0
  85. {firecrawl-4.3.2 → firecrawl-4.3.3}/tests/test_change_tracking.py +0 -0
  86. {firecrawl-4.3.2 → firecrawl-4.3.3}/tests/test_timeout_conversion.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: firecrawl
3
- Version: 4.3.2
3
+ Version: 4.3.3
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
@@ -17,7 +17,7 @@ from .v1 import (
17
17
  V1ChangeTrackingOptions,
18
18
  )
19
19
 
20
- __version__ = "4.3.2"
20
+ __version__ = "4.3.3"
21
21
 
22
22
  # Define the logger for the Firecrawl project
23
23
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -63,6 +63,7 @@ async def test_async_batch_wait_with_all_params():
63
63
  zero_data_retention=False,
64
64
  poll_interval=1,
65
65
  timeout=180,
66
+ integration="_e2e-test",
66
67
  )
67
68
  assert job.status in ("completed", "failed")
68
69
 
@@ -53,6 +53,7 @@ async def test_async_crawl_with_all_params():
53
53
  allow_subdomains=True,
54
54
  delay=1,
55
55
  max_concurrency=2,
56
+ integration="_e2e-test",
56
57
  webhook="https://example.com/hook",
57
58
  scrape_options=ScrapeOptions(
58
59
  formats=[
@@ -32,6 +32,7 @@ async def test_async_extract_with_schema_and_options():
32
32
  allow_external_links=False,
33
33
  enable_web_search=False,
34
34
  show_sources=False,
35
+ integration="_e2e-test",
35
36
  # agent={"model": "FIRE-1", "prompt": "Extract title"}, # Skipping agent test in CI
36
37
  )
37
38
  assert res is not None
@@ -34,6 +34,7 @@ async def test_async_map_with_all_params(sitemap):
34
34
  limit=10,
35
35
  sitemap=sitemap,
36
36
  timeout=15000,
37
+ integration="_e2e-test",
37
38
  )
38
39
  assert hasattr(resp, "links") and isinstance(resp.links, list)
39
40
  assert len(resp.links) <= 10
@@ -64,6 +64,7 @@ async def test_async_scrape_with_all_params():
64
64
  proxy="basic",
65
65
  max_age=0,
66
66
  store_in_cache=False,
67
+ integration="_e2e-test",
67
68
  )
68
69
  assert isinstance(doc, Document)
69
70
 
@@ -140,6 +140,7 @@ async def test_async_search_all_parameters():
140
140
  location="US",
141
141
  ignore_invalid_urls=True,
142
142
  timeout=60000,
143
+ integration="_e2e-test",
143
144
  scrape_options=ScrapeOptions(
144
145
  formats=[
145
146
  "markdown",
@@ -83,6 +83,7 @@ class TestBatchScrapeE2E:
83
83
  zero_data_retention=False,
84
84
  poll_interval=1,
85
85
  wait_timeout=180,
86
+ integration="_e2e-test",
86
87
  )
87
88
 
88
89
  assert job.status in ["completed", "failed"]
@@ -168,7 +168,8 @@ class TestCrawlE2E:
168
168
  limit=3,
169
169
  max_discovery_depth=2,
170
170
  poll_interval=1,
171
- timeout=120
171
+ timeout=120,
172
+ integration="_e2e-test",
172
173
  )
173
174
 
174
175
  assert crawl_job.status in ["completed", "failed"]
@@ -257,7 +258,8 @@ class TestCrawlE2E:
257
258
  max_concurrency=2,
258
259
  webhook="https://example.com/hook",
259
260
  scrape_options=scrape_opts,
260
- zero_data_retention=False
261
+ zero_data_retention=False,
262
+ integration="_e2e-test",
261
263
  )
262
264
 
263
265
  assert crawl_job.id is not None
@@ -42,6 +42,7 @@ class TestExtractE2E:
42
42
  prompt="Extract the main page title",
43
43
  show_sources=True,
44
44
  enable_web_search=False,
45
+ integration="_e2e-test",
45
46
  )
46
47
 
47
48
  assert hasattr(resp, "success")
@@ -48,6 +48,7 @@ class TestMapE2E:
48
48
  include_subdomains=True, limit=10,
49
49
  sitemap=sitemap,
50
50
  timeout=15000,
51
+ integration="_e2e-test",
51
52
  )
52
53
 
53
54
  assert hasattr(resp, "links") and isinstance(resp.links, list)
@@ -150,6 +150,7 @@ class TestScrapeE2E:
150
150
  proxy="basic",
151
151
  max_age=0,
152
152
  store_in_cache=False,
153
+ integration="_e2e-test",
153
154
  )
154
155
  assert isinstance(doc, Document)
155
156
 
@@ -144,6 +144,7 @@ def test_search_all_parameters():
144
144
  location="US",
145
145
  ignore_invalid_urls=True,
146
146
  timeout=60000,
147
+ integration="_e2e-test",
147
148
  scrape_options=ScrapeOptions(
148
149
  formats=[
149
150
  "markdown",
@@ -59,3 +59,21 @@ class TestAsyncCrawlRequestPreparation:
59
59
  assert "metadata" not in webhook
60
60
  assert "events" not in webhook
61
61
 
62
+ def test_all_fields_including_integration(self):
63
+ req = CrawlRequest(
64
+ url="https://example.com",
65
+ include_paths=["/docs/*"],
66
+ exclude_paths=["/admin/*"],
67
+ max_discovery_depth=2,
68
+ sitemap="include",
69
+ ignore_query_parameters=True,
70
+ crawl_entire_domain=False,
71
+ allow_external_links=True,
72
+ allow_subdomains=True,
73
+ max_concurrency=3,
74
+ zero_data_retention=False,
75
+ integration=" _unit-test ",
76
+ )
77
+ payload = _prepare_crawl_request(req)
78
+ assert payload["integration"] == "_unit-test"
79
+
@@ -9,11 +9,12 @@ class TestAsyncMapRequestPreparation:
9
9
  assert payload["url"] == "https://example.com"
10
10
 
11
11
  def test_fields(self):
12
- opts = MapOptions(search="docs", include_subdomains=True, limit=10, sitemap="only", timeout=15000)
12
+ opts = MapOptions(search="docs", include_subdomains=True, limit=10, sitemap="only", timeout=15000, integration=" _unit-test ")
13
13
  payload = _prepare_map_request("https://example.com", opts)
14
14
  assert payload["search"] == "docs"
15
15
  assert payload["includeSubdomains"] is True
16
16
  assert payload["limit"] == 10
17
17
  assert payload["sitemap"] == "only"
18
18
  assert payload["timeout"] == 15000
19
+ assert payload["integration"] == "_unit-test"
19
20
 
@@ -33,10 +33,12 @@ class TestAsyncSearchRequestPreparation:
33
33
  ignore_invalid_urls=False,
34
34
  timeout=30000,
35
35
  scrape_options=scrape_opts,
36
+ integration=" _unit-test ",
36
37
  )
37
38
  data = _prepare_search_request(request)
38
39
  assert data["ignoreInvalidURLs"] is False
39
40
  assert "scrapeOptions" in data
41
+ assert data["integration"] == "_unit-test"
40
42
 
41
43
  def test_exclude_none_behavior(self):
42
44
  request = SearchRequest(
@@ -59,5 +61,4 @@ class TestAsyncSearchRequestPreparation:
59
61
  assert "scrapeOptions" in data
60
62
  scrape_data = data["scrapeOptions"]
61
63
  assert "onlyMainContent" in scrape_data
62
- assert "mobile" in scrape_data
63
-
64
+ assert "mobile" in scrape_data
@@ -76,14 +76,14 @@ class TestBatchScrapeRequestPreparation:
76
76
  ignore_invalid_urls=True,
77
77
  max_concurrency=5,
78
78
  zero_data_retention=True,
79
- integration="test",
79
+ integration="_unit-test",
80
80
  )
81
81
  assert isinstance(data["webhook"], dict) and data["webhook"]["url"] == "https://hook.test"
82
82
  assert data["appendToId"] == "00000000-0000-0000-0000-000000000000"
83
83
  assert data["ignoreInvalidURLs"] is True
84
84
  assert data["maxConcurrency"] == 5
85
85
  assert data["zeroDataRetention"] is True
86
- assert data["integration"] == "test"
86
+ assert data["integration"] == "_unit-test"
87
87
 
88
88
  def test_string_webhook_is_passed_verbatim(self):
89
89
  data = prepare_batch_scrape_request(["https://example.com"], webhook="https://hook.simple")
@@ -35,6 +35,7 @@ class TestMapRequestPreparation:
35
35
  limit=25,
36
36
  sitemap="only",
37
37
  timeout=15000,
38
+ integration=" _unit-test ",
38
39
  )
39
40
  data = _prepare_map_request("https://example.com", opts)
40
41
 
@@ -44,10 +45,10 @@ class TestMapRequestPreparation:
44
45
  assert data["limit"] == 25
45
46
  assert data["sitemap"] == "only"
46
47
  assert data["timeout"] == 15000
48
+ assert data["integration"] == "_unit-test"
47
49
 
48
50
  def test_invalid_url(self):
49
51
  with pytest.raises(ValueError):
50
52
  _prepare_map_request("")
51
53
  with pytest.raises(ValueError):
52
- _prepare_map_request(" ")
53
-
54
+ _prepare_map_request(" ")
@@ -89,4 +89,21 @@ class TestScrapeRequestPreparation:
89
89
  def test_whitespace_url_validation(self):
90
90
  """Test validation with whitespace-only URL."""
91
91
  with pytest.raises(ValueError, match="URL cannot be empty"):
92
- _prepare_scrape_request(" ")
92
+ _prepare_scrape_request(" ")
93
+
94
+ def test_all_params_including_integration(self):
95
+ opts = ScrapeOptions(
96
+ formats=["markdown"],
97
+ headers={"User-Agent": "Test"},
98
+ include_tags=["h1"],
99
+ exclude_tags=["nav"],
100
+ only_main_content=False,
101
+ timeout=15000,
102
+ wait_for=2000,
103
+ mobile=True,
104
+ skip_tls_verification=True,
105
+ remove_base64_images=False,
106
+ integration=" _unit-test ",
107
+ )
108
+ data = _prepare_scrape_request("https://example.com", opts)
109
+ assert data["integration"] == "_unit-test"
@@ -43,7 +43,8 @@ class TestSearchRequestPreparation:
43
43
  location="US",
44
44
  ignore_invalid_urls=False,
45
45
  timeout=30000,
46
- scrape_options=scrape_opts
46
+ scrape_options=scrape_opts,
47
+ integration=" _e2e-test ",
47
48
  )
48
49
 
49
50
  data = _prepare_search_request(request)
@@ -83,6 +84,7 @@ class TestSearchRequestPreparation:
83
84
  assert scrape_data["skipTlsVerification"] is True
84
85
  assert "removeBase64Images" in scrape_data
85
86
  assert scrape_data["removeBase64Images"] is False
87
+ assert data["integration"] == "_e2e-test"
86
88
 
87
89
  def test_exclude_none_behavior(self):
88
90
  """Test that exclude_none=True behavior is working."""
@@ -164,4 +166,4 @@ class TestSearchRequestPreparation:
164
166
  assert "only_main_content" not in scrape_data
165
167
  assert "wait_for" not in scrape_data
166
168
  assert "skip_tls_verification" not in scrape_data
167
- assert "remove_base64_images" not in scrape_data
169
+ assert "remove_base64_images" not in scrape_data
@@ -117,6 +117,7 @@ class FirecrawlClient:
117
117
  proxy: Optional[str] = None,
118
118
  max_age: Optional[int] = None,
119
119
  store_in_cache: Optional[bool] = None,
120
+ integration: Optional[str] = None,
120
121
  ) -> Document:
121
122
  """
122
123
  Scrape a single URL and return the document.
@@ -165,8 +166,9 @@ class FirecrawlClient:
165
166
  proxy=proxy,
166
167
  max_age=max_age,
167
168
  store_in_cache=store_in_cache,
169
+ integration=integration,
168
170
  ).items() if v is not None}
169
- ) if any(v is not None for v in [formats, headers, include_tags, exclude_tags, only_main_content, timeout, wait_for, mobile, parsers, actions, location, skip_tls_verification, remove_base64_images, fast_mode, use_mock, block_ads, proxy, max_age, store_in_cache]) else None
171
+ ) if any(v is not None for v in [formats, headers, include_tags, exclude_tags, only_main_content, timeout, wait_for, mobile, parsers, actions, location, skip_tls_verification, remove_base64_images, fast_mode, use_mock, block_ads, proxy, max_age, store_in_cache, integration]) else None
170
172
  return scrape_module.scrape(self.http_client, url, options)
171
173
 
172
174
  def search(
@@ -181,6 +183,7 @@ class FirecrawlClient:
181
183
  ignore_invalid_urls: Optional[bool] = None,
182
184
  timeout: Optional[int] = None,
183
185
  scrape_options: Optional[ScrapeOptions] = None,
186
+ integration: Optional[str] = None,
184
187
  ) -> SearchData:
185
188
  """
186
189
  Search for documents.
@@ -206,6 +209,7 @@ class FirecrawlClient:
206
209
  ignore_invalid_urls=ignore_invalid_urls,
207
210
  timeout=timeout,
208
211
  scrape_options=scrape_options,
212
+ integration=integration,
209
213
  )
210
214
 
211
215
  return search_module.search(self.http_client, request)
@@ -230,7 +234,8 @@ class FirecrawlClient:
230
234
  scrape_options: Optional[ScrapeOptions] = None,
231
235
  zero_data_retention: bool = False,
232
236
  poll_interval: int = 2,
233
- timeout: Optional[int] = None
237
+ timeout: Optional[int] = None,
238
+ integration: Optional[str] = None,
234
239
  ) -> CrawlJob:
235
240
  """
236
241
  Start a crawl job and wait for it to complete.
@@ -279,7 +284,8 @@ class FirecrawlClient:
279
284
  max_concurrency=max_concurrency,
280
285
  webhook=webhook,
281
286
  scrape_options=scrape_options,
282
- zero_data_retention=zero_data_retention
287
+ zero_data_retention=zero_data_retention,
288
+ integration=integration,
283
289
  )
284
290
 
285
291
  return crawl_module.crawl(
@@ -307,7 +313,8 @@ class FirecrawlClient:
307
313
  max_concurrency: Optional[int] = None,
308
314
  webhook: Optional[Union[str, WebhookConfig]] = None,
309
315
  scrape_options: Optional[ScrapeOptions] = None,
310
- zero_data_retention: bool = False
316
+ zero_data_retention: bool = False,
317
+ integration: Optional[str] = None,
311
318
  ) -> CrawlResponse:
312
319
  """
313
320
  Start an asynchronous crawl job.
@@ -353,7 +360,8 @@ class FirecrawlClient:
353
360
  max_concurrency=max_concurrency,
354
361
  webhook=webhook,
355
362
  scrape_options=scrape_options,
356
- zero_data_retention=zero_data_retention
363
+ zero_data_retention=zero_data_retention,
364
+ integration=integration,
357
365
  )
358
366
 
359
367
  return crawl_module.start_crawl(self.http_client, request)
@@ -421,6 +429,7 @@ class FirecrawlClient:
421
429
  limit: Optional[int] = None,
422
430
  sitemap: Optional[Literal["only", "include", "skip"]] = None,
423
431
  timeout: Optional[int] = None,
432
+ integration: Optional[str] = None,
424
433
  location: Optional[Location] = None,
425
434
  ) -> MapData:
426
435
  """Map a URL and return discovered links.
@@ -442,8 +451,9 @@ class FirecrawlClient:
442
451
  limit=limit,
443
452
  sitemap=sitemap if sitemap is not None else "include",
444
453
  timeout=timeout,
454
+ integration=integration,
445
455
  location=location
446
- ) if any(v is not None for v in [search, include_subdomains, limit, sitemap, timeout, location]) else None
456
+ ) if any(v is not None for v in [search, include_subdomains, limit, sitemap, timeout, integration, location]) else None
447
457
 
448
458
  return map_module.map(self.http_client, url, options)
449
459
 
@@ -484,6 +494,7 @@ class FirecrawlClient:
484
494
  show_sources: Optional[bool] = None,
485
495
  scrape_options: Optional['ScrapeOptions'] = None,
486
496
  ignore_invalid_urls: Optional[bool] = None,
497
+ integration: Optional[str] = None,
487
498
  ):
488
499
  """Start an extract job (non-blocking).
489
500
 
@@ -512,6 +523,7 @@ class FirecrawlClient:
512
523
  show_sources=show_sources,
513
524
  scrape_options=scrape_options,
514
525
  ignore_invalid_urls=ignore_invalid_urls,
526
+ integration=integration,
515
527
  )
516
528
 
517
529
  def extract(
@@ -528,6 +540,7 @@ class FirecrawlClient:
528
540
  ignore_invalid_urls: Optional[bool] = None,
529
541
  poll_interval: int = 2,
530
542
  timeout: Optional[int] = None,
543
+ integration: Optional[str] = None,
531
544
  ):
532
545
  """Extract structured data and wait until completion.
533
546
 
@@ -560,6 +573,7 @@ class FirecrawlClient:
560
573
  ignore_invalid_urls=ignore_invalid_urls,
561
574
  poll_interval=poll_interval,
562
575
  timeout=timeout,
576
+ integration=integration,
563
577
  )
564
578
 
565
579
  def start_batch_scrape(
@@ -132,6 +132,7 @@ class AsyncFirecrawlClient:
132
132
  limit: Optional[int] = None,
133
133
  sitemap: Optional[Literal["only", "include", "skip"]] = None,
134
134
  timeout: Optional[int] = None,
135
+ integration: Optional[str] = None,
135
136
  ) -> MapData:
136
137
  options = MapOptions(
137
138
  search=search,
@@ -139,7 +140,8 @@ class AsyncFirecrawlClient:
139
140
  limit=limit,
140
141
  sitemap=sitemap if sitemap is not None else "include",
141
142
  timeout=timeout,
142
- ) if any(v is not None for v in [search, include_subdomains, limit, sitemap, timeout]) else None
143
+ integration=integration,
144
+ ) if any(v is not None for v in [search, include_subdomains, limit, sitemap, integration, timeout]) else None
143
145
  return await async_map.map(self.async_http_client, url, options)
144
146
 
145
147
  async def start_batch_scrape(self, urls: List[str], **kwargs) -> Any:
@@ -196,6 +198,7 @@ class AsyncFirecrawlClient:
196
198
  ignore_invalid_urls: Optional[bool] = None,
197
199
  poll_interval: int = 2,
198
200
  timeout: Optional[int] = None,
201
+ integration: Optional[str] = None,
199
202
  ):
200
203
  return await async_extract.extract(
201
204
  self.async_http_client,
@@ -210,6 +213,7 @@ class AsyncFirecrawlClient:
210
213
  ignore_invalid_urls=ignore_invalid_urls,
211
214
  poll_interval=poll_interval,
212
215
  timeout=timeout,
216
+ integration=integration,
213
217
  )
214
218
 
215
219
  async def get_extract_status(self, job_id: str):
@@ -227,6 +231,7 @@ class AsyncFirecrawlClient:
227
231
  show_sources: Optional[bool] = None,
228
232
  scrape_options: Optional['ScrapeOptions'] = None,
229
233
  ignore_invalid_urls: Optional[bool] = None,
234
+ integration: Optional[str] = None,
230
235
  ):
231
236
  return await async_extract.start_extract(
232
237
  self.async_http_client,
@@ -239,6 +244,7 @@ class AsyncFirecrawlClient:
239
244
  show_sources=show_sources,
240
245
  scrape_options=scrape_options,
241
246
  ignore_invalid_urls=ignore_invalid_urls,
247
+ integration=integration,
242
248
  )
243
249
 
244
250
  # Usage endpoints
@@ -26,7 +26,9 @@ def _prepare(urls: List[str], *, options: Optional[ScrapeOptions] = None, **kwar
26
26
  if (v := kwargs.get("zero_data_retention")) is not None:
27
27
  payload["zeroDataRetention"] = v
28
28
  if (v := kwargs.get("integration")) is not None:
29
- payload["integration"] = v
29
+ trimmed_integration = str(v).strip()
30
+ if trimmed_integration:
31
+ payload["integration"] = trimmed_integration
30
32
  return payload
31
33
 
32
34
 
@@ -56,6 +56,8 @@ def _prepare_crawl_request(request: CrawlRequest) -> dict:
56
56
  if snake in request_data:
57
57
  data[camel] = request_data.pop(snake)
58
58
  data.update(request_data)
59
+ if getattr(request, "integration", None) is not None:
60
+ data["integration"] = str(getattr(request, "integration")).strip()
59
61
  return data
60
62
 
61
63
 
@@ -17,6 +17,7 @@ def _prepare_extract_request(
17
17
  show_sources: Optional[bool] = None,
18
18
  scrape_options: Optional[ScrapeOptions] = None,
19
19
  ignore_invalid_urls: Optional[bool] = None,
20
+ integration: Optional[str] = None,
20
21
  ) -> Dict[str, Any]:
21
22
  body: Dict[str, Any] = {}
22
23
  if urls is not None:
@@ -39,6 +40,8 @@ def _prepare_extract_request(
39
40
  prepared = prepare_scrape_options(scrape_options)
40
41
  if prepared:
41
42
  body["scrapeOptions"] = prepared
43
+ if integration is not None and str(integration).strip():
44
+ body["integration"] = str(integration).strip()
42
45
  return body
43
46
 
44
47
 
@@ -54,6 +57,7 @@ async def start_extract(
54
57
  show_sources: Optional[bool] = None,
55
58
  scrape_options: Optional[ScrapeOptions] = None,
56
59
  ignore_invalid_urls: Optional[bool] = None,
60
+ integration: Optional[str] = None,
57
61
  ) -> ExtractResponse:
58
62
  body = _prepare_extract_request(
59
63
  urls,
@@ -65,6 +69,7 @@ async def start_extract(
65
69
  show_sources=show_sources,
66
70
  scrape_options=scrape_options,
67
71
  ignore_invalid_urls=ignore_invalid_urls,
72
+ integration=integration,
68
73
  )
69
74
  resp = await client.post("/v2/extract", body)
70
75
  return ExtractResponse(**resp.json())
@@ -106,6 +111,7 @@ async def extract(
106
111
  ignore_invalid_urls: Optional[bool] = None,
107
112
  poll_interval: int = 2,
108
113
  timeout: Optional[int] = None,
114
+ integration: Optional[str] = None,
109
115
  ) -> ExtractResponse:
110
116
  started = await start_extract(
111
117
  client,
@@ -118,6 +124,7 @@ async def extract(
118
124
  show_sources=show_sources,
119
125
  scrape_options=scrape_options,
120
126
  ignore_invalid_urls=ignore_invalid_urls,
127
+ integration=integration,
121
128
  )
122
129
  job_id = getattr(started, "id", None)
123
130
  if not job_id:
@@ -20,6 +20,8 @@ def _prepare_map_request(url: str, options: Optional[MapOptions] = None) -> Dict
20
20
  data["limit"] = options.limit
21
21
  if options.timeout is not None:
22
22
  data["timeout"] = options.timeout
23
+ if options.integration is not None:
24
+ data["integration"] = options.integration.strip()
23
25
  if options.location is not None:
24
26
  data["location"] = options.location.model_dump(exclude_none=True)
25
27
  payload.update(data)
@@ -10,6 +10,7 @@ from ...types import (
10
10
  )
11
11
  from ...utils.http_client_async import AsyncHttpClient
12
12
  from ...utils.error_handler import handle_response_error
13
+ from ...utils.normalize import normalize_document_input
13
14
  from ...utils.validation import validate_scrape_options, prepare_scrape_options
14
15
 
15
16
  T = TypeVar("T")
@@ -73,7 +74,7 @@ def _transform_array(arr: List[Any], result_type: Type[T]) -> List[Union[T, Docu
73
74
  "summary" in item or
74
75
  "json" in item
75
76
  ):
76
- results.append(Document(**item))
77
+ results.append(Document(**normalize_document_input(item)))
77
78
  else:
78
79
  results.append(result_type(**item))
79
80
  else:
@@ -168,5 +169,8 @@ def _prepare_search_request(request: SearchRequest) -> Dict[str, Any]:
168
169
  if scrape_data:
169
170
  data["scrapeOptions"] = scrape_data
170
171
  data.pop("scrape_options", None)
172
+
173
+ if (v := getattr(validated_request, "integration", None)) is not None and str(v).strip():
174
+ data["integration"] = str(validated_request.integration).strip()
171
175
 
172
176
  return data
@@ -407,7 +407,7 @@ def prepare_batch_scrape_request(
407
407
  if zero_data_retention is not None:
408
408
  request_data["zeroDataRetention"] = zero_data_retention
409
409
  if integration is not None:
410
- request_data["integration"] = integration
410
+ request_data["integration"] = str(integration).strip()
411
411
 
412
412
  return request_data
413
413
 
@@ -99,6 +99,9 @@ def _prepare_crawl_request(request: CrawlRequest) -> dict:
99
99
 
100
100
  # Add any remaining fields that don't need conversion (like limit)
101
101
  data.update(request_data)
102
+ # Trim integration if present
103
+ if "integration" in data and isinstance(data["integration"], str):
104
+ data["integration"] = data["integration"].strip()
102
105
 
103
106
  return data
104
107
 
@@ -18,6 +18,7 @@ def _prepare_extract_request(
18
18
  show_sources: Optional[bool] = None,
19
19
  scrape_options: Optional[ScrapeOptions] = None,
20
20
  ignore_invalid_urls: Optional[bool] = None,
21
+ integration: Optional[str] = None,
21
22
  ) -> Dict[str, Any]:
22
23
  body: Dict[str, Any] = {}
23
24
  if urls is not None:
@@ -40,6 +41,8 @@ def _prepare_extract_request(
40
41
  prepared = prepare_scrape_options(scrape_options)
41
42
  if prepared:
42
43
  body["scrapeOptions"] = prepared
44
+ if integration is not None and str(integration).strip():
45
+ body["integration"] = str(integration).strip()
43
46
  return body
44
47
 
45
48
 
@@ -55,6 +58,7 @@ def start_extract(
55
58
  show_sources: Optional[bool] = None,
56
59
  scrape_options: Optional[ScrapeOptions] = None,
57
60
  ignore_invalid_urls: Optional[bool] = None,
61
+ integration: Optional[str] = None,
58
62
  ) -> ExtractResponse:
59
63
  body = _prepare_extract_request(
60
64
  urls,
@@ -66,6 +70,7 @@ def start_extract(
66
70
  show_sources=show_sources,
67
71
  scrape_options=scrape_options,
68
72
  ignore_invalid_urls=ignore_invalid_urls,
73
+ integration=integration,
69
74
  )
70
75
  resp = client.post("/v2/extract", body)
71
76
  if not resp.ok:
@@ -111,6 +116,7 @@ def extract(
111
116
  ignore_invalid_urls: Optional[bool] = None,
112
117
  poll_interval: int = 2,
113
118
  timeout: Optional[int] = None,
119
+ integration: Optional[str] = None,
114
120
  ) -> ExtractResponse:
115
121
  started = start_extract(
116
122
  client,
@@ -123,6 +129,7 @@ def extract(
123
129
  show_sources=show_sources,
124
130
  scrape_options=scrape_options,
125
131
  ignore_invalid_urls=ignore_invalid_urls,
132
+ integration=integration,
126
133
  )
127
134
  job_id = getattr(started, "id", None)
128
135
  if not job_id:
@@ -27,6 +27,8 @@ def _prepare_map_request(url: str, options: Optional[MapOptions] = None) -> Dict
27
27
  data["limit"] = options.limit
28
28
  if options.timeout is not None:
29
29
  data["timeout"] = options.timeout
30
+ if options.integration is not None and options.integration.strip():
31
+ data["integration"] = options.integration.strip()
30
32
  if options.location is not None:
31
33
  data["location"] = options.location.model_dump(exclude_none=True)
32
34
  payload.update(data)
@@ -71,7 +71,7 @@ def _transform_array(arr: List[Any], result_type: Type[T]) -> List[Union[T, 'Doc
71
71
  "summary" in item or
72
72
  "json" in item
73
73
  ):
74
- results.append(Document(**item))
74
+ results.append(Document(**normalize_document_input(item)))
75
75
  else:
76
76
  results.append(result_type(**item))
77
77
  else:
@@ -194,4 +194,7 @@ def _prepare_search_request(request: SearchRequest) -> Dict[str, Any]:
194
194
  data["scrapeOptions"] = scrape_data
195
195
  data.pop("scrape_options", None)
196
196
 
197
+ if (str(getattr(validated_request, "integration", "")).strip()):
198
+ data["integration"] = str(validated_request.integration).strip()
199
+
197
200
  return data
@@ -289,6 +289,7 @@ class ScrapeOptions(BaseModel):
289
289
  proxy: Optional[Literal["basic", "stealth", "auto"]] = None
290
290
  max_age: Optional[int] = None
291
291
  store_in_cache: Optional[bool] = None
292
+ integration: Optional[str] = None
292
293
 
293
294
  @field_validator('formats')
294
295
  @classmethod
@@ -334,6 +335,7 @@ class CrawlRequest(BaseModel):
334
335
  webhook: Optional[Union[str, WebhookConfig]] = None
335
336
  scrape_options: Optional[ScrapeOptions] = None
336
337
  zero_data_retention: bool = False
338
+ integration: Optional[str] = None
337
339
 
338
340
  class CrawlResponse(BaseModel):
339
341
  """Information about a crawl job."""
@@ -350,6 +352,10 @@ class CrawlJob(BaseModel):
350
352
  next: Optional[str] = None
351
353
  data: List[Document] = []
352
354
 
355
+ class CrawlStatusRequest(BaseModel):
356
+ """Request to get crawl job status."""
357
+ job_id: str
358
+
353
359
  class SearchResultWeb(BaseModel):
354
360
  """A web search result with URL, title, and description."""
355
361
  url: str
@@ -410,6 +416,7 @@ class CrawlParamsData(BaseModel):
410
416
  scrape_options: Optional[ScrapeOptions] = None
411
417
  zero_data_retention: bool = False
412
418
  warning: Optional[str] = None
419
+ integration: Optional[str] = None
413
420
 
414
421
  class CrawlParamsResponse(BaseResponse[CrawlParamsData]):
415
422
  """Response from crawl params endpoint."""
@@ -420,6 +427,12 @@ class BatchScrapeRequest(BaseModel):
420
427
  """Request for batch scraping multiple URLs (internal helper only)."""
421
428
  urls: List[str]
422
429
  options: Optional[ScrapeOptions] = None
430
+ webhook: Optional[Union[str, WebhookConfig]] = None
431
+ append_to_id: Optional[str] = None
432
+ ignore_invalid_urls: Optional[bool] = None
433
+ max_concurrency: Optional[int] = None
434
+ zero_data_retention: Optional[bool] = None
435
+ integration: Optional[str] = None
423
436
 
424
437
  class BatchScrapeResponse(BaseModel):
425
438
  """Response from starting a batch scrape job (mirrors CrawlResponse naming)."""
@@ -437,6 +450,14 @@ class BatchScrapeJob(BaseModel):
437
450
  next: Optional[str] = None
438
451
  data: List[Document] = []
439
452
 
453
+ class BatchScrapeStatusRequest(BaseModel):
454
+ """Request to get batch scrape job status."""
455
+ job_id: str
456
+
457
+ class BatchScrapeErrorsRequest(BaseModel):
458
+ """Request to get errors for a batch scrape job."""
459
+ job_id: str
460
+
440
461
  # Map types
441
462
  class MapOptions(BaseModel):
442
463
  """Options for mapping operations."""
@@ -445,6 +466,7 @@ class MapOptions(BaseModel):
445
466
  include_subdomains: Optional[bool] = None
446
467
  limit: Optional[int] = None
447
468
  timeout: Optional[int] = None
469
+ integration: Optional[str] = None
448
470
  location: Optional['Location'] = None
449
471
 
450
472
  class MapRequest(BaseModel):
@@ -452,6 +474,8 @@ class MapRequest(BaseModel):
452
474
  url: str
453
475
  options: Optional[MapOptions] = None
454
476
 
477
+
478
+
455
479
  class MapData(BaseModel):
456
480
  """Map results data."""
457
481
  links: List['SearchResult']
@@ -461,6 +485,19 @@ class MapResponse(BaseResponse[MapData]):
461
485
  pass
462
486
 
463
487
  # Extract types
488
+ class ExtractRequest(BaseModel):
489
+ """Request for extract operations."""
490
+ urls: Optional[List[str]] = None
491
+ prompt: Optional[str] = None
492
+ schema_: Optional[Dict[str, Any]] = Field(default=None, alias="schema")
493
+ system_prompt: Optional[str] = None
494
+ allow_external_links: Optional[bool] = None
495
+ enable_web_search: Optional[bool] = None
496
+ show_sources: Optional[bool] = None
497
+ scrape_options: Optional[ScrapeOptions] = None
498
+ ignore_invalid_urls: Optional[bool] = None
499
+ integration: Optional[str] = None
500
+
464
501
  class ExtractResponse(BaseModel):
465
502
  """Response for extract operations (start/status/final)."""
466
503
  success: Optional[bool] = None
@@ -492,6 +529,10 @@ class TokenUsage(BaseModel):
492
529
  billing_period_start: Optional[str] = None
493
530
  billing_period_end: Optional[str] = None
494
531
 
532
+ class QueueStatusRequest(BaseModel):
533
+ """Request to retrieve queue status."""
534
+ pass
535
+
495
536
  class QueueStatusResponse(BaseModel):
496
537
  """Metrics about the team's scrape queue."""
497
538
  jobs_in_queue: int
@@ -593,6 +634,7 @@ class SearchRequest(BaseModel):
593
634
  ignore_invalid_urls: Optional[bool] = None
594
635
  timeout: Optional[int] = 60000
595
636
  scrape_options: Optional[ScrapeOptions] = None
637
+ integration: Optional[str] = None
596
638
 
597
639
  @field_validator('sources')
598
640
  @classmethod
@@ -692,6 +734,10 @@ class CrawlErrorsResponse(BaseModel):
692
734
  errors: List[CrawlError]
693
735
  robots_blocked: List[str]
694
736
 
737
+ class CrawlErrorsRequest(BaseModel):
738
+ """Request for crawl error monitoring."""
739
+ crawl_id: str
740
+
695
741
  class ActiveCrawl(BaseModel):
696
742
  """Information about an active crawl job."""
697
743
  id: str
@@ -704,6 +750,10 @@ class ActiveCrawlsResponse(BaseModel):
704
750
  success: bool = True
705
751
  crawls: List[ActiveCrawl]
706
752
 
753
+ class ActiveCrawlsRequest(BaseModel):
754
+ """Request for listing active crawl jobs."""
755
+ pass
756
+
707
757
  # Configuration types
708
758
  class ClientConfig(BaseModel):
709
759
  """Configuration for the Firecrawl client."""
@@ -177,6 +177,9 @@ def prepare_scrape_options(options: Optional[ScrapeOptions]) -> Optional[Dict[st
177
177
  # Handle special cases
178
178
  for key, value in options_data.items():
179
179
  if value is not None:
180
+ if key == "integration":
181
+ scrape_data["integration"] = (str(value).strip() or None)
182
+ continue
180
183
  if key == "formats":
181
184
  # Handle formats conversion
182
185
  converted_formats: List[Any] = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: firecrawl
3
- Version: 4.3.2
3
+ Version: 4.3.3
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes