firecrawl-py 3.0.3__py3-none-any.whl → 3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl-py might be problematic. Click here for more details.
- firecrawl/__init__.py +2 -2
- firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +0 -1
- firecrawl/__tests__/unit/v2/methods/test_search_validation.py +47 -17
- firecrawl/client.py +1 -0
- firecrawl/v2/methods/aio/crawl.py +2 -5
- firecrawl/v2/methods/aio/scrape.py +2 -5
- firecrawl/v2/methods/aio/search.py +2 -5
- firecrawl/v2/methods/batch.py +2 -5
- firecrawl/v2/methods/crawl.py +2 -1
- firecrawl/v2/methods/scrape.py +2 -6
- firecrawl/v2/methods/search.py +14 -10
- firecrawl/v2/types.py +68 -2
- firecrawl/v2/utils/normalize.py +107 -0
- firecrawl/v2/watcher.py +4 -15
- firecrawl/v2/watcher_async.py +2 -5
- {firecrawl_py-3.0.3.dist-info → firecrawl_py-3.1.1.dist-info}/METADATA +6 -2
- {firecrawl_py-3.0.3.dist-info → firecrawl_py-3.1.1.dist-info}/RECORD +20 -19
- {firecrawl_py-3.0.3.dist-info → firecrawl_py-3.1.1.dist-info}/WHEEL +1 -1
- {firecrawl_py-3.0.3.dist-info → firecrawl_py-3.1.1.dist-info/licenses}/LICENSE +0 -0
- {firecrawl_py-3.0.3.dist-info → firecrawl_py-3.1.1.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
|
@@ -17,7 +17,7 @@ from .v1 import (
|
|
|
17
17
|
V1ChangeTrackingOptions,
|
|
18
18
|
)
|
|
19
19
|
|
|
20
|
-
__version__ = "3.
|
|
20
|
+
__version__ = "3.1.1"
|
|
21
21
|
|
|
22
22
|
# Define the logger for the Firecrawl project
|
|
23
23
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
|
@@ -84,4 +84,4 @@ __all__ = [
|
|
|
84
84
|
'V1JsonConfig',
|
|
85
85
|
'V1ScrapeOptions',
|
|
86
86
|
'V1ChangeTrackingOptions',
|
|
87
|
-
]
|
|
87
|
+
]
|
|
@@ -96,7 +96,6 @@ async def test_async_get_crawl_status_shape():
|
|
|
96
96
|
assert status.status in ("scraping", "completed", "failed")
|
|
97
97
|
assert status.completed >= 0
|
|
98
98
|
assert status.expires_at is not None
|
|
99
|
-
assert status.next is not None
|
|
100
99
|
assert isinstance(status.data, list)
|
|
101
100
|
|
|
102
101
|
|
|
@@ -11,7 +11,7 @@ class TestSearchValidation:
|
|
|
11
11
|
request = SearchRequest(query="")
|
|
12
12
|
with pytest.raises(ValueError, match="Query cannot be empty"):
|
|
13
13
|
_validate_search_request(request)
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
request = SearchRequest(query=" ")
|
|
16
16
|
with pytest.raises(ValueError, match="Query cannot be empty"):
|
|
17
17
|
_validate_search_request(request)
|
|
@@ -22,12 +22,12 @@ class TestSearchValidation:
|
|
|
22
22
|
request = SearchRequest(query="test", limit=0)
|
|
23
23
|
with pytest.raises(ValueError, match="Limit must be positive"):
|
|
24
24
|
_validate_search_request(request)
|
|
25
|
-
|
|
25
|
+
|
|
26
26
|
# Negative limit
|
|
27
27
|
request = SearchRequest(query="test", limit=-1)
|
|
28
28
|
with pytest.raises(ValueError, match="Limit must be positive"):
|
|
29
29
|
_validate_search_request(request)
|
|
30
|
-
|
|
30
|
+
|
|
31
31
|
# Too high limit
|
|
32
32
|
request = SearchRequest(query="test", limit=101)
|
|
33
33
|
with pytest.raises(ValueError, match="Limit cannot exceed 100"):
|
|
@@ -39,12 +39,12 @@ class TestSearchValidation:
|
|
|
39
39
|
request = SearchRequest(query="test", timeout=0)
|
|
40
40
|
with pytest.raises(ValueError, match="Timeout must be positive"):
|
|
41
41
|
_validate_search_request(request)
|
|
42
|
-
|
|
42
|
+
|
|
43
43
|
# Negative timeout
|
|
44
44
|
request = SearchRequest(query="test", timeout=-1000)
|
|
45
45
|
with pytest.raises(ValueError, match="Timeout must be positive"):
|
|
46
46
|
_validate_search_request(request)
|
|
47
|
-
|
|
47
|
+
|
|
48
48
|
# Too high timeout
|
|
49
49
|
request = SearchRequest(query="test", timeout=300001)
|
|
50
50
|
with pytest.raises(ValueError, match="Timeout cannot exceed 300000ms"):
|
|
@@ -56,12 +56,12 @@ class TestSearchValidation:
|
|
|
56
56
|
request = SearchRequest(query="test", sources=["invalid_source"])
|
|
57
57
|
with pytest.raises(ValueError, match="Invalid source type"):
|
|
58
58
|
_validate_search_request(request)
|
|
59
|
-
|
|
59
|
+
|
|
60
60
|
# Invalid object source
|
|
61
61
|
request = SearchRequest(query="test", sources=[Source(type="invalid_source")])
|
|
62
62
|
with pytest.raises(ValueError, match="Invalid source type"):
|
|
63
63
|
_validate_search_request(request)
|
|
64
|
-
|
|
64
|
+
|
|
65
65
|
# Mixed valid/invalid sources
|
|
66
66
|
request = SearchRequest(query="test", sources=["web", "invalid_source"])
|
|
67
67
|
with pytest.raises(ValueError, match="Invalid source type"):
|
|
@@ -73,7 +73,7 @@ class TestSearchValidation:
|
|
|
73
73
|
request = SearchRequest(query="test", location="")
|
|
74
74
|
with pytest.raises(ValueError, match="Location must be a non-empty string"):
|
|
75
75
|
_validate_search_request(request)
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
# Whitespace location
|
|
78
78
|
request = SearchRequest(query="test", location=" ")
|
|
79
79
|
with pytest.raises(ValueError, match="Location must be a non-empty string"):
|
|
@@ -82,19 +82,49 @@ class TestSearchValidation:
|
|
|
82
82
|
def test_validate_invalid_tbs(self):
|
|
83
83
|
"""Test validation of invalid tbs values."""
|
|
84
84
|
invalid_tbs_values = ["invalid", "qdr:x", "yesterday", "last_week"]
|
|
85
|
-
|
|
85
|
+
|
|
86
86
|
for invalid_tbs in invalid_tbs_values:
|
|
87
87
|
request = SearchRequest(query="test", tbs=invalid_tbs)
|
|
88
88
|
with pytest.raises(ValueError, match="Invalid tbs value"):
|
|
89
89
|
_validate_search_request(request)
|
|
90
90
|
|
|
91
|
+
def test_validate_custom_date_ranges(self):
|
|
92
|
+
"""Test validation of custom date range formats."""
|
|
93
|
+
valid_custom_ranges = [
|
|
94
|
+
"cdr:1,cd_min:1/1/2024,cd_max:12/31/2024",
|
|
95
|
+
"cdr:1,cd_min:12/1/2024,cd_max:12/31/2024",
|
|
96
|
+
"cdr:1,cd_min:2/28/2023,cd_max:3/1/2023",
|
|
97
|
+
"cdr:1,cd_min:10/15/2023,cd_max:11/15/2023"
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
for valid_range in valid_custom_ranges:
|
|
101
|
+
request = SearchRequest(query="test", tbs=valid_range)
|
|
102
|
+
validated = _validate_search_request(request)
|
|
103
|
+
assert validated == request
|
|
104
|
+
|
|
105
|
+
def test_validate_invalid_custom_date_ranges(self):
|
|
106
|
+
"""Test validation of invalid custom date range formats."""
|
|
107
|
+
# Invalid custom date ranges
|
|
108
|
+
invalid_custom_ranges = [
|
|
109
|
+
"cdr:1,cd_min:2/28/2023", # Missing cd_max
|
|
110
|
+
"cdr:1,cd_max:2/28/2023", # Missing cd_min
|
|
111
|
+
"cdr:2,cd_min:1/1/2024,cd_max:12/31/2024", # Wrong cdr value
|
|
112
|
+
"cdr:cd_min:1/1/2024,cd_max:12/31/2024", # Missing :1
|
|
113
|
+
"custom:1,cd_min:1/1/2024,cd_max:12/31/2024" # Wrong prefix
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
for invalid_range in invalid_custom_ranges:
|
|
117
|
+
request = SearchRequest(query="test", tbs=invalid_range)
|
|
118
|
+
with pytest.raises(ValueError, match="Invalid"):
|
|
119
|
+
_validate_search_request(request)
|
|
120
|
+
|
|
91
121
|
def test_validate_valid_requests(self):
|
|
92
122
|
"""Test that valid requests pass validation."""
|
|
93
123
|
# Minimal valid request
|
|
94
124
|
request = SearchRequest(query="test")
|
|
95
125
|
validated = _validate_search_request(request)
|
|
96
126
|
assert validated == request
|
|
97
|
-
|
|
127
|
+
|
|
98
128
|
# Request with all optional parameters
|
|
99
129
|
request = SearchRequest(
|
|
100
130
|
query="test query",
|
|
@@ -107,7 +137,7 @@ class TestSearchValidation:
|
|
|
107
137
|
)
|
|
108
138
|
validated = _validate_search_request(request)
|
|
109
139
|
assert validated == request
|
|
110
|
-
|
|
140
|
+
|
|
111
141
|
# Request with object sources
|
|
112
142
|
request = SearchRequest(
|
|
113
143
|
query="test",
|
|
@@ -122,17 +152,17 @@ class TestSearchValidation:
|
|
|
122
152
|
request = SearchRequest(query="test", limit=100)
|
|
123
153
|
validated = _validate_search_request(request)
|
|
124
154
|
assert validated == request
|
|
125
|
-
|
|
155
|
+
|
|
126
156
|
# Maximum valid timeout
|
|
127
157
|
request = SearchRequest(query="test", timeout=300000)
|
|
128
158
|
validated = _validate_search_request(request)
|
|
129
159
|
assert validated == request
|
|
130
|
-
|
|
160
|
+
|
|
131
161
|
# Minimum valid limit
|
|
132
162
|
request = SearchRequest(query="test", limit=1)
|
|
133
163
|
validated = _validate_search_request(request)
|
|
134
164
|
assert validated == request
|
|
135
|
-
|
|
165
|
+
|
|
136
166
|
# Minimum valid timeout
|
|
137
167
|
request = SearchRequest(query="test", timeout=1)
|
|
138
168
|
validated = _validate_search_request(request)
|
|
@@ -191,16 +221,16 @@ class TestSearchRequestModel:
|
|
|
191
221
|
data1 = request1.model_dump(by_alias=True)
|
|
192
222
|
assert "ignore_invalid_urls" in data1 # No alias, uses snake_case
|
|
193
223
|
assert data1["ignore_invalid_urls"] is None
|
|
194
|
-
|
|
224
|
+
|
|
195
225
|
# Test with explicit False value
|
|
196
226
|
request2 = SearchRequest(
|
|
197
227
|
query="test",
|
|
198
228
|
ignore_invalid_urls=False,
|
|
199
229
|
scrape_options=ScrapeOptions(formats=["markdown"])
|
|
200
230
|
)
|
|
201
|
-
|
|
231
|
+
|
|
202
232
|
# Check that aliases are used in model_dump with by_alias=True
|
|
203
233
|
data2 = request2.model_dump(by_alias=True)
|
|
204
234
|
assert "ignore_invalid_urls" in data2 # No alias, uses snake_case
|
|
205
235
|
assert "scrape_options" in data2 # No alias, uses snake_case
|
|
206
|
-
assert data2["ignore_invalid_urls"] is False
|
|
236
|
+
assert data2["ignore_invalid_urls"] is False
|
firecrawl/client.py
CHANGED
|
@@ -14,6 +14,7 @@ from ...types import (
|
|
|
14
14
|
from ...utils.error_handler import handle_response_error
|
|
15
15
|
from ...utils.validation import prepare_scrape_options
|
|
16
16
|
from ...utils.http_client_async import AsyncHttpClient
|
|
17
|
+
from ...utils.normalize import normalize_document_input
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
def _prepare_crawl_request(request: CrawlRequest) -> dict:
|
|
@@ -76,11 +77,7 @@ async def get_crawl_status(client: AsyncHttpClient, job_id: str) -> CrawlJob:
|
|
|
76
77
|
documents = []
|
|
77
78
|
for doc_data in body.get("data", []):
|
|
78
79
|
if isinstance(doc_data, dict):
|
|
79
|
-
normalized =
|
|
80
|
-
if 'rawHtml' in normalized and 'raw_html' not in normalized:
|
|
81
|
-
normalized['raw_html'] = normalized.pop('rawHtml')
|
|
82
|
-
if 'changeTracking' in normalized and 'change_tracking' not in normalized:
|
|
83
|
-
normalized['change_tracking'] = normalized.pop('changeTracking')
|
|
80
|
+
normalized = normalize_document_input(doc_data)
|
|
84
81
|
documents.append(Document(**normalized))
|
|
85
82
|
return CrawlJob(
|
|
86
83
|
status=body.get("status"),
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Optional, Dict, Any
|
|
2
2
|
from ...types import ScrapeOptions, Document
|
|
3
|
+
from ...utils.normalize import normalize_document_input
|
|
3
4
|
from ...utils.error_handler import handle_response_error
|
|
4
5
|
from ...utils.validation import prepare_scrape_options, validate_scrape_options
|
|
5
6
|
from ...utils.http_client_async import AsyncHttpClient
|
|
@@ -27,10 +28,6 @@ async def scrape(client: AsyncHttpClient, url: str, options: Optional[ScrapeOpti
|
|
|
27
28
|
if not body.get("success"):
|
|
28
29
|
raise Exception(body.get("error", "Unknown error occurred"))
|
|
29
30
|
document_data = body.get("data", {})
|
|
30
|
-
normalized =
|
|
31
|
-
if 'rawHtml' in normalized and 'raw_html' not in normalized:
|
|
32
|
-
normalized['raw_html'] = normalized.pop('rawHtml')
|
|
33
|
-
if 'changeTracking' in normalized and 'change_tracking' not in normalized:
|
|
34
|
-
normalized['change_tracking'] = normalized.pop('changeTracking')
|
|
31
|
+
normalized = normalize_document_input(document_data)
|
|
35
32
|
return Document(**normalized)
|
|
36
33
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Dict, Any
|
|
2
2
|
from ...types import SearchRequest, SearchData, SearchResult, Document
|
|
3
|
+
from ...utils.normalize import normalize_document_input
|
|
3
4
|
from ...utils.http_client_async import AsyncHttpClient
|
|
4
5
|
from ...utils.error_handler import handle_response_error
|
|
5
6
|
from ...utils.validation import prepare_scrape_options, validate_scrape_options
|
|
@@ -38,11 +39,7 @@ async def search(client: AsyncHttpClient, request: SearchRequest) -> SearchData:
|
|
|
38
39
|
if request.scrape_options is not None and any(
|
|
39
40
|
key in doc_data for key in ['markdown', 'html', 'rawHtml', 'links', 'summary', 'screenshot', 'changeTracking']
|
|
40
41
|
):
|
|
41
|
-
normalized =
|
|
42
|
-
if 'rawHtml' in normalized and 'raw_html' not in normalized:
|
|
43
|
-
normalized['raw_html'] = normalized.pop('rawHtml')
|
|
44
|
-
if 'changeTracking' in normalized and 'change_tracking' not in normalized:
|
|
45
|
-
normalized['change_tracking'] = normalized.pop('changeTracking')
|
|
42
|
+
normalized = normalize_document_input(doc_data)
|
|
46
43
|
results.append(Document(**normalized))
|
|
47
44
|
else:
|
|
48
45
|
results.append(SearchResult(
|
firecrawl/v2/methods/batch.py
CHANGED
|
@@ -13,6 +13,7 @@ from ..types import (
|
|
|
13
13
|
WebhookConfig,
|
|
14
14
|
)
|
|
15
15
|
from ..utils import HttpClient, handle_response_error, validate_scrape_options, prepare_scrape_options
|
|
16
|
+
from ..utils.normalize import normalize_document_input
|
|
16
17
|
from ..types import CrawlErrorsResponse
|
|
17
18
|
|
|
18
19
|
|
|
@@ -107,11 +108,7 @@ def get_batch_scrape_status(
|
|
|
107
108
|
documents: List[Document] = []
|
|
108
109
|
for doc in body.get("data", []) or []:
|
|
109
110
|
if isinstance(doc, dict):
|
|
110
|
-
normalized =
|
|
111
|
-
if 'rawHtml' in normalized and 'raw_html' not in normalized:
|
|
112
|
-
normalized['raw_html'] = normalized.pop('rawHtml')
|
|
113
|
-
if 'changeTracking' in normalized and 'change_tracking' not in normalized:
|
|
114
|
-
normalized['change_tracking'] = normalized.pop('changeTracking')
|
|
111
|
+
normalized = normalize_document_input(doc)
|
|
115
112
|
documents.append(Document(**normalized))
|
|
116
113
|
|
|
117
114
|
return BatchScrapeJob(
|
firecrawl/v2/methods/crawl.py
CHANGED
|
@@ -11,6 +11,7 @@ from ..types import (
|
|
|
11
11
|
WebhookConfig, CrawlErrorsResponse, ActiveCrawlsResponse, ActiveCrawl
|
|
12
12
|
)
|
|
13
13
|
from ..utils import HttpClient, handle_response_error, validate_scrape_options, prepare_scrape_options
|
|
14
|
+
from ..utils.normalize import normalize_document_input
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def _validate_crawl_request(request: CrawlRequest) -> None:
|
|
@@ -173,7 +174,7 @@ def get_crawl_status(client: HttpClient, job_id: str) -> CrawlJob:
|
|
|
173
174
|
# but we'll handle it gracefully
|
|
174
175
|
continue
|
|
175
176
|
else:
|
|
176
|
-
documents.append(Document(**doc_data))
|
|
177
|
+
documents.append(Document(**normalize_document_input(doc_data)))
|
|
177
178
|
|
|
178
179
|
# Create CrawlJob with current status and data
|
|
179
180
|
return CrawlJob(
|
firecrawl/v2/methods/scrape.py
CHANGED
|
@@ -4,6 +4,7 @@ Scraping functionality for Firecrawl v2 API.
|
|
|
4
4
|
|
|
5
5
|
from typing import Optional, Dict, Any
|
|
6
6
|
from ..types import ScrapeOptions, Document
|
|
7
|
+
from ..utils.normalize import normalize_document_input
|
|
7
8
|
from ..utils import HttpClient, handle_response_error, prepare_scrape_options, validate_scrape_options
|
|
8
9
|
|
|
9
10
|
|
|
@@ -59,10 +60,5 @@ def scrape(client: HttpClient, url: str, options: Optional[ScrapeOptions] = None
|
|
|
59
60
|
raise Exception(body.get("error", "Unknown error occurred"))
|
|
60
61
|
|
|
61
62
|
document_data = body.get("data", {})
|
|
62
|
-
|
|
63
|
-
normalized = dict(document_data)
|
|
64
|
-
if 'rawHtml' in normalized and 'raw_html' not in normalized:
|
|
65
|
-
normalized['raw_html'] = normalized.pop('rawHtml')
|
|
66
|
-
if 'changeTracking' in normalized and 'change_tracking' not in normalized:
|
|
67
|
-
normalized['change_tracking'] = normalized.pop('changeTracking')
|
|
63
|
+
normalized = normalize_document_input(document_data)
|
|
68
64
|
return Document(**normalized)
|
firecrawl/v2/methods/search.py
CHANGED
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
Search functionality for Firecrawl v2 API.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import re
|
|
5
6
|
from typing import Optional, Dict, Any, Union
|
|
6
7
|
from ..types import SearchRequest, SearchData, SearchResult, Document
|
|
8
|
+
from ..utils.normalize import normalize_document_input
|
|
7
9
|
from ..utils import HttpClient, handle_response_error, validate_scrape_options, prepare_scrape_options
|
|
8
10
|
|
|
9
11
|
|
|
@@ -50,12 +52,7 @@ def search(
|
|
|
50
52
|
if request.scrape_options is not None and any(
|
|
51
53
|
key in doc_data for key in ['markdown', 'html', 'rawHtml', 'links', 'summary', 'screenshot', 'changeTracking']
|
|
52
54
|
):
|
|
53
|
-
|
|
54
|
-
normalized = dict(doc_data)
|
|
55
|
-
if 'rawHtml' in normalized and 'raw_html' not in normalized:
|
|
56
|
-
normalized['raw_html'] = normalized.pop('rawHtml')
|
|
57
|
-
if 'changeTracking' in normalized and 'change_tracking' not in normalized:
|
|
58
|
-
normalized['change_tracking'] = normalized.pop('changeTracking')
|
|
55
|
+
normalized = normalize_document_input(doc_data)
|
|
59
56
|
results.append(Document(**normalized))
|
|
60
57
|
else:
|
|
61
58
|
# Minimal search result shape
|
|
@@ -123,11 +120,18 @@ def _validate_search_request(request: SearchRequest) -> SearchRequest:
|
|
|
123
120
|
# Validate tbs (time-based search, if provided)
|
|
124
121
|
if request.tbs is not None:
|
|
125
122
|
valid_tbs_values = {
|
|
126
|
-
"qdr:d", "qdr:w", "qdr:m", "qdr:y", # Google time filters
|
|
123
|
+
"qdr:h", "qdr:d", "qdr:w", "qdr:m", "qdr:y", # Google time filters
|
|
127
124
|
"d", "w", "m", "y" # Short forms
|
|
128
125
|
}
|
|
129
|
-
|
|
130
|
-
|
|
126
|
+
|
|
127
|
+
if request.tbs in valid_tbs_values:
|
|
128
|
+
pass # Valid predefined value
|
|
129
|
+
elif request.tbs.startswith("cdr:"):
|
|
130
|
+
custom_date_pattern = r"^cdr:1,cd_min:\d{1,2}/\d{1,2}/\d{4},cd_max:\d{1,2}/\d{1,2}/\d{4}$"
|
|
131
|
+
if not re.match(custom_date_pattern, request.tbs):
|
|
132
|
+
raise ValueError(f"Invalid custom date range format: {request.tbs}. Expected format: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY")
|
|
133
|
+
else:
|
|
134
|
+
raise ValueError(f"Invalid tbs value: {request.tbs}. Valid values: {valid_tbs_values} or custom date range format: cdr:1,cd_min:MM/DD/YYYY,cd_max:MM/DD/YYYY")
|
|
131
135
|
|
|
132
136
|
# Validate scrape_options (if provided)
|
|
133
137
|
if request.scrape_options is not None:
|
|
@@ -170,4 +174,4 @@ def _prepare_search_request(request: SearchRequest) -> Dict[str, Any]:
|
|
|
170
174
|
data["scrapeOptions"] = scrape_data
|
|
171
175
|
data.pop("scrape_options", None)
|
|
172
176
|
|
|
173
|
-
return data
|
|
177
|
+
return data
|
firecrawl/v2/types.py
CHANGED
|
@@ -7,7 +7,8 @@ This module contains clean, modern type definitions for the v2 API.
|
|
|
7
7
|
import warnings
|
|
8
8
|
from datetime import datetime
|
|
9
9
|
from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union
|
|
10
|
-
|
|
10
|
+
import logging
|
|
11
|
+
from pydantic import BaseModel, Field, field_validator, ValidationError
|
|
11
12
|
|
|
12
13
|
# Suppress pydantic warnings about schema field shadowing
|
|
13
14
|
# Tested using schema_field alias="schema" but it doesn't work.
|
|
@@ -19,6 +20,9 @@ warnings.filterwarnings("ignore", message="Field name \"json\" in \"Document\" s
|
|
|
19
20
|
|
|
20
21
|
T = TypeVar('T')
|
|
21
22
|
|
|
23
|
+
# Module logger
|
|
24
|
+
logger = logging.getLogger("firecrawl")
|
|
25
|
+
|
|
22
26
|
# Base response types
|
|
23
27
|
class BaseResponse(BaseModel, Generic[T]):
|
|
24
28
|
"""Base response structure for all API responses."""
|
|
@@ -29,18 +33,57 @@ class BaseResponse(BaseModel, Generic[T]):
|
|
|
29
33
|
|
|
30
34
|
# Document and content types
|
|
31
35
|
class DocumentMetadata(BaseModel):
|
|
32
|
-
"""Metadata for scraped documents."""
|
|
36
|
+
"""Metadata for scraped documents (snake_case only; API camelCase normalized in code)."""
|
|
37
|
+
# Common metadata fields
|
|
33
38
|
title: Optional[str] = None
|
|
34
39
|
description: Optional[str] = None
|
|
40
|
+
url: Optional[str] = None
|
|
35
41
|
language: Optional[str] = None
|
|
36
42
|
keywords: Optional[Union[str, List[str]]] = None
|
|
37
43
|
robots: Optional[str] = None
|
|
44
|
+
|
|
45
|
+
# OpenGraph and social metadata
|
|
38
46
|
og_title: Optional[str] = None
|
|
39
47
|
og_description: Optional[str] = None
|
|
40
48
|
og_url: Optional[str] = None
|
|
41
49
|
og_image: Optional[str] = None
|
|
50
|
+
og_audio: Optional[str] = None
|
|
51
|
+
og_determiner: Optional[str] = None
|
|
52
|
+
og_locale: Optional[str] = None
|
|
53
|
+
og_locale_alternate: Optional[List[str]] = None
|
|
54
|
+
og_site_name: Optional[str] = None
|
|
55
|
+
og_video: Optional[str] = None
|
|
56
|
+
|
|
57
|
+
# Dublin Core and other site metadata
|
|
58
|
+
favicon: Optional[str] = None
|
|
59
|
+
dc_terms_created: Optional[str] = None
|
|
60
|
+
dc_date_created: Optional[str] = None
|
|
61
|
+
dc_date: Optional[str] = None
|
|
62
|
+
dc_terms_type: Optional[str] = None
|
|
63
|
+
dc_type: Optional[str] = None
|
|
64
|
+
dc_terms_audience: Optional[str] = None
|
|
65
|
+
dc_terms_subject: Optional[str] = None
|
|
66
|
+
dc_subject: Optional[str] = None
|
|
67
|
+
dc_description: Optional[str] = None
|
|
68
|
+
dc_terms_keywords: Optional[str] = None
|
|
69
|
+
|
|
70
|
+
modified_time: Optional[str] = None
|
|
71
|
+
published_time: Optional[str] = None
|
|
72
|
+
article_tag: Optional[str] = None
|
|
73
|
+
article_section: Optional[str] = None
|
|
74
|
+
|
|
75
|
+
# Response-level metadata
|
|
42
76
|
source_url: Optional[str] = None
|
|
43
77
|
status_code: Optional[int] = None
|
|
78
|
+
scrape_id: Optional[str] = None
|
|
79
|
+
num_pages: Optional[int] = None
|
|
80
|
+
content_type: Optional[str] = None
|
|
81
|
+
proxy_used: Optional[Literal["basic", "stealth"]] = None
|
|
82
|
+
cache_state: Optional[Literal["hit", "miss"]] = None
|
|
83
|
+
cached_at: Optional[str] = None
|
|
84
|
+
credits_used: Optional[int] = None
|
|
85
|
+
|
|
86
|
+
# Error information
|
|
44
87
|
error: Optional[str] = None
|
|
45
88
|
|
|
46
89
|
@staticmethod
|
|
@@ -85,6 +128,29 @@ class Document(BaseModel):
|
|
|
85
128
|
warning: Optional[str] = None
|
|
86
129
|
change_tracking: Optional[Dict[str, Any]] = None
|
|
87
130
|
|
|
131
|
+
@property
|
|
132
|
+
def metadata_typed(self) -> DocumentMetadata:
|
|
133
|
+
"""Always returns a DocumentMetadata instance for LSP-friendly access."""
|
|
134
|
+
md = self.metadata
|
|
135
|
+
if isinstance(md, DocumentMetadata):
|
|
136
|
+
return md
|
|
137
|
+
if isinstance(md, dict):
|
|
138
|
+
try:
|
|
139
|
+
return DocumentMetadata(**md)
|
|
140
|
+
except (ValidationError, TypeError) as exc:
|
|
141
|
+
logger.debug("Failed to construct DocumentMetadata from dict: %s", exc)
|
|
142
|
+
return DocumentMetadata()
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def metadata_dict(self) -> Dict[str, Any]:
|
|
146
|
+
"""Returns metadata as a plain dict (exclude None)."""
|
|
147
|
+
md = self.metadata
|
|
148
|
+
if isinstance(md, DocumentMetadata):
|
|
149
|
+
return md.model_dump(exclude_none=True)
|
|
150
|
+
if isinstance(md, dict):
|
|
151
|
+
return {k: v for k, v in md.items() if v is not None}
|
|
152
|
+
return {}
|
|
153
|
+
|
|
88
154
|
# Webhook types
|
|
89
155
|
class WebhookConfig(BaseModel):
|
|
90
156
|
"""Configuration for webhooks."""
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Normalization helpers for v2 API payloads to avoid relying on Pydantic aliases.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, List
|
|
6
|
+
from ..types import DocumentMetadata
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _map_metadata_keys(md: Dict[str, Any]) -> Dict[str, Any]:
|
|
10
|
+
"""
|
|
11
|
+
Convert API v2 camelCase metadata keys to snake_case expected by DocumentMetadata.
|
|
12
|
+
Leaves unknown keys as-is.
|
|
13
|
+
"""
|
|
14
|
+
mapping = {
|
|
15
|
+
# OpenGraph
|
|
16
|
+
"ogTitle": "og_title",
|
|
17
|
+
"ogDescription": "og_description",
|
|
18
|
+
"ogUrl": "og_url",
|
|
19
|
+
"ogImage": "og_image",
|
|
20
|
+
"ogAudio": "og_audio",
|
|
21
|
+
"ogDeterminer": "og_determiner",
|
|
22
|
+
"ogLocale": "og_locale",
|
|
23
|
+
"ogLocaleAlternate": "og_locale_alternate",
|
|
24
|
+
"ogSiteName": "og_site_name",
|
|
25
|
+
"ogVideo": "og_video",
|
|
26
|
+
# Dublin Core and misc
|
|
27
|
+
"dcTermsCreated": "dc_terms_created",
|
|
28
|
+
"dcDateCreated": "dc_date_created",
|
|
29
|
+
"dcDate": "dc_date",
|
|
30
|
+
"dcTermsType": "dc_terms_type",
|
|
31
|
+
"dcType": "dc_type",
|
|
32
|
+
"dcTermsAudience": "dc_terms_audience",
|
|
33
|
+
"dcTermsSubject": "dc_terms_subject",
|
|
34
|
+
"dcSubject": "dc_subject",
|
|
35
|
+
"dcDescription": "dc_description",
|
|
36
|
+
"dcTermsKeywords": "dc_terms_keywords",
|
|
37
|
+
"modifiedTime": "modified_time",
|
|
38
|
+
"publishedTime": "published_time",
|
|
39
|
+
"articleTag": "article_tag",
|
|
40
|
+
"articleSection": "article_section",
|
|
41
|
+
# Response-level
|
|
42
|
+
"sourceURL": "source_url",
|
|
43
|
+
"statusCode": "status_code",
|
|
44
|
+
"scrapeId": "scrape_id",
|
|
45
|
+
"numPages": "num_pages",
|
|
46
|
+
"contentType": "content_type",
|
|
47
|
+
"proxyUsed": "proxy_used",
|
|
48
|
+
"cacheState": "cache_state",
|
|
49
|
+
"cachedAt": "cached_at",
|
|
50
|
+
"creditsUsed": "credits_used",
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
out: Dict[str, Any] = {}
|
|
54
|
+
for k, v in md.items():
|
|
55
|
+
snake = mapping.get(k, k)
|
|
56
|
+
out[snake] = v
|
|
57
|
+
|
|
58
|
+
# Light coercions where server may send strings/lists
|
|
59
|
+
if isinstance(out.get("status_code"), str):
|
|
60
|
+
try:
|
|
61
|
+
out["status_code"] = int(out["status_code"]) # type: ignore
|
|
62
|
+
except ValueError:
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
# Generic rule: if a value is a list, join with ", " for string-like fields,
|
|
66
|
+
# except for explicit fields we preserve as lists.
|
|
67
|
+
preserve_list_fields: List[str] = [
|
|
68
|
+
"og_locale_alternate",
|
|
69
|
+
]
|
|
70
|
+
for f, val in list(out.items()):
|
|
71
|
+
if isinstance(val, list) and f not in preserve_list_fields:
|
|
72
|
+
try:
|
|
73
|
+
out[f] = ", ".join(str(x) for x in val)
|
|
74
|
+
except Exception:
|
|
75
|
+
# Fallback: keep original list if join fails
|
|
76
|
+
pass
|
|
77
|
+
|
|
78
|
+
return out
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def normalize_document_input(doc: Dict[str, Any]) -> Dict[str, Any]:
|
|
82
|
+
"""
|
|
83
|
+
Normalize a raw Document dict from the API into the Python SDK's expected shape:
|
|
84
|
+
- Convert top-level keys rawHtml->raw_html, changeTracking->change_tracking
|
|
85
|
+
- Convert metadata keys from camelCase to snake_case
|
|
86
|
+
"""
|
|
87
|
+
normalized = dict(doc)
|
|
88
|
+
|
|
89
|
+
if "rawHtml" in normalized and "raw_html" not in normalized:
|
|
90
|
+
normalized["raw_html"] = normalized.pop("rawHtml")
|
|
91
|
+
|
|
92
|
+
if "changeTracking" in normalized and "change_tracking" not in normalized:
|
|
93
|
+
normalized["change_tracking"] = normalized.pop("changeTracking")
|
|
94
|
+
|
|
95
|
+
md = normalized.get("metadata")
|
|
96
|
+
if isinstance(md, dict):
|
|
97
|
+
mapped = _map_metadata_keys(md)
|
|
98
|
+
# Construct a concrete DocumentMetadata so downstream has a typed object
|
|
99
|
+
try:
|
|
100
|
+
normalized["metadata"] = DocumentMetadata(**mapped)
|
|
101
|
+
except Exception:
|
|
102
|
+
# Fallback to mapped dict if model construction fails for any reason
|
|
103
|
+
normalized["metadata"] = mapped
|
|
104
|
+
|
|
105
|
+
return normalized
|
|
106
|
+
|
|
107
|
+
|
firecrawl/v2/watcher.py
CHANGED
|
@@ -15,6 +15,7 @@ from typing import Callable, List, Optional, Literal, Union, Dict, Any
|
|
|
15
15
|
import websockets
|
|
16
16
|
|
|
17
17
|
from .types import CrawlJob, BatchScrapeJob, Document
|
|
18
|
+
from .utils.normalize import normalize_document_input
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
JobKind = Literal["crawl", "batch"]
|
|
@@ -172,11 +173,7 @@ class Watcher:
|
|
|
172
173
|
docs: List[Document] = []
|
|
173
174
|
for doc in self.data:
|
|
174
175
|
if isinstance(doc, dict):
|
|
175
|
-
d =
|
|
176
|
-
if "rawHtml" in d and "raw_html" not in d:
|
|
177
|
-
d["raw_html"] = d.pop("rawHtml")
|
|
178
|
-
if "changeTracking" in d and "change_tracking" not in d:
|
|
179
|
-
d["change_tracking"] = d.pop("changeTracking")
|
|
176
|
+
d = normalize_document_input(doc)
|
|
180
177
|
docs.append(Document(**d))
|
|
181
178
|
if self._kind == "crawl":
|
|
182
179
|
job = CrawlJob(
|
|
@@ -212,11 +209,7 @@ class Watcher:
|
|
|
212
209
|
docs = []
|
|
213
210
|
for doc in payload.get("data", []):
|
|
214
211
|
if isinstance(doc, dict):
|
|
215
|
-
d =
|
|
216
|
-
if "rawHtml" in d and "raw_html" not in d:
|
|
217
|
-
d["raw_html"] = d.pop("rawHtml")
|
|
218
|
-
if "changeTracking" in d and "change_tracking" not in d:
|
|
219
|
-
d["change_tracking"] = d.pop("changeTracking")
|
|
212
|
+
d = normalize_document_input(doc)
|
|
220
213
|
docs.append(Document(**d))
|
|
221
214
|
job = CrawlJob(
|
|
222
215
|
status=status_str,
|
|
@@ -241,11 +234,7 @@ class Watcher:
|
|
|
241
234
|
docs = []
|
|
242
235
|
for doc in payload.get("data", []):
|
|
243
236
|
if isinstance(doc, dict):
|
|
244
|
-
d =
|
|
245
|
-
if "rawHtml" in d and "raw_html" not in d:
|
|
246
|
-
d["raw_html"] = d.pop("rawHtml")
|
|
247
|
-
if "changeTracking" in d and "change_tracking" not in d:
|
|
248
|
-
d["change_tracking"] = d.pop("changeTracking")
|
|
237
|
+
d = normalize_document_input(doc)
|
|
249
238
|
docs.append(Document(**d))
|
|
250
239
|
job = BatchScrapeJob(
|
|
251
240
|
status=status_str,
|
firecrawl/v2/watcher_async.py
CHANGED
|
@@ -16,6 +16,7 @@ import websockets
|
|
|
16
16
|
from websockets.exceptions import ConnectionClosed, ConnectionClosedOK, ConnectionClosedError
|
|
17
17
|
|
|
18
18
|
from .types import BatchScrapeJob, CrawlJob, Document
|
|
19
|
+
from .utils.normalize import normalize_document_input
|
|
19
20
|
|
|
20
21
|
JobKind = Literal["crawl", "batch"]
|
|
21
22
|
|
|
@@ -216,11 +217,7 @@ class AsyncWatcher:
|
|
|
216
217
|
source_docs = docs_override if docs_override is not None else payload.get("data", []) or []
|
|
217
218
|
for doc in source_docs:
|
|
218
219
|
if isinstance(doc, dict):
|
|
219
|
-
d =
|
|
220
|
-
if "rawHtml" in d and "raw_html" not in d:
|
|
221
|
-
d["raw_html"] = d.pop("rawHtml")
|
|
222
|
-
if "changeTracking" in d and "change_tracking" not in d:
|
|
223
|
-
d["change_tracking"] = d.pop("changeTracking")
|
|
220
|
+
d = normalize_document_input(doc)
|
|
224
221
|
docs.append(Document(**d))
|
|
225
222
|
|
|
226
223
|
if self._kind == "crawl":
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: firecrawl-py
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.1.1
|
|
4
4
|
Summary: Python SDK for Firecrawl API
|
|
5
5
|
Home-page: https://github.com/firecrawl/firecrawl
|
|
6
6
|
Author: Mendable.ai
|
|
@@ -40,6 +40,10 @@ Requires-Dist: websockets
|
|
|
40
40
|
Requires-Dist: nest-asyncio
|
|
41
41
|
Requires-Dist: pydantic
|
|
42
42
|
Requires-Dist: aiohttp
|
|
43
|
+
Dynamic: author
|
|
44
|
+
Dynamic: home-page
|
|
45
|
+
Dynamic: license-file
|
|
46
|
+
Dynamic: requires-python
|
|
43
47
|
|
|
44
48
|
# Firecrawl Python SDK
|
|
45
49
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256
|
|
2
|
-
firecrawl/client.py,sha256=
|
|
1
|
+
firecrawl/__init__.py,sha256=-BUn8vFl0b_PPw4VJ0lV9y0BKsHBlOSfxGTPqGMZK7U,2192
|
|
2
|
+
firecrawl/client.py,sha256=2BGIRTiW2eR6q3wu_g2s3VTQtrHYauoDeNF1YklQpHo,11089
|
|
3
3
|
firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
|
|
4
4
|
firecrawl/types.py,sha256=yZ4iza0M1T2kxNbt-tLEOKH7o6mFKZZ11VAZGodHSq4,2734
|
|
5
5
|
firecrawl/__tests__/e2e/v2/conftest.py,sha256=I28TUpN5j0-9gM79NlbrDS8Jlsheao657od2f-2xK0Y,2587
|
|
@@ -13,7 +13,7 @@ firecrawl/__tests__/e2e/v2/test_search.py,sha256=MN-q82gHlm5DT2HsnAQgW1NwVbgowlF
|
|
|
13
13
|
firecrawl/__tests__/e2e/v2/test_usage.py,sha256=JlBkYblhThua5qF2crRjsPpq4Ja0cBsdzxZ5zxXnQ_Y,805
|
|
14
14
|
firecrawl/__tests__/e2e/v2/test_watcher.py,sha256=OPTKLhVAKWqXl2Tieo6zCN1xpEwZDsz-B977CVJgLMA,1932
|
|
15
15
|
firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py,sha256=gJv_mLzzoAYftETB2TLkrpSfB5c04kaYgkD4hQTYsIg,2639
|
|
16
|
-
firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py,sha256=
|
|
16
|
+
firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py,sha256=X-nk5tkYUYIkM6kTYl7GDjvxh2JT9GxJqk2KlO8xpWw,7282
|
|
17
17
|
firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py,sha256=3CNRIFzgBMcOYOLhnKcK1k5a3Gy--u08EGDkL31uieM,1199
|
|
18
18
|
firecrawl/__tests__/e2e/v2/aio/test_aio_map.py,sha256=nckl1kbiEaaTdu5lm__tOoTDG-txTYwwSH3KZEvyKzc,1199
|
|
19
19
|
firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py,sha256=b17A7advBEjxrjdait2w8GHztZeKy_P3zZ3ixm5H7xw,4453
|
|
@@ -27,7 +27,7 @@ firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py,sha256=kErOmHSD01eM
|
|
|
27
27
|
firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py,sha256=toVcgnMp_cFeYsIUuyKGEWZGp0nAAkzaeFGUbY0zY0o,1868
|
|
28
28
|
firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py,sha256=wDOslsA5BN4kyezlaT5GeMv_Ifn8f461EaA7i5ujnaQ,3482
|
|
29
29
|
firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py,sha256=14lUgFpQsiosgMKjDustBRVE0zXnHujBI76F8BC5PZ4,6072
|
|
30
|
-
firecrawl/__tests__/unit/v2/methods/test_search_validation.py,sha256=
|
|
30
|
+
firecrawl/__tests__/unit/v2/methods/test_search_validation.py,sha256=7UGcNHpQzCpZbAPYjthfdPFWmAPcoApY-ED-khtuANs,9498
|
|
31
31
|
firecrawl/__tests__/unit/v2/methods/test_usage_types.py,sha256=cCHHfa6agSjD0brQ9rcAcw2kaI9riUH5C0dXV-fqktg,591
|
|
32
32
|
firecrawl/__tests__/unit/v2/methods/test_webhook.py,sha256=AvvW-bKpUA--Lvtif2bmUIp-AxiaMJ29ie1i9dk8WbI,4586
|
|
33
33
|
firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py,sha256=9azJxVvDOBqUevLp-wBF9gF7Ptj-7nN6LOkPQncFX2M,456
|
|
@@ -45,34 +45,35 @@ firecrawl/v1/client.py,sha256=sydurfEFTsXyowyaGryA1lkPxN_r9Nf6iQpM43OwJyM,201672
|
|
|
45
45
|
firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
|
|
46
46
|
firecrawl/v2/client.py,sha256=P6WAzwYGLLIANTrqAM-K4EUdGWQoFsi-zCjBibbxKQw,30507
|
|
47
47
|
firecrawl/v2/client_async.py,sha256=zwxHis1bSh0tSF1480ze-4XDQEDJ5yDur1ZqtL94dwc,10127
|
|
48
|
-
firecrawl/v2/types.py,sha256=
|
|
49
|
-
firecrawl/v2/watcher.py,sha256=
|
|
50
|
-
firecrawl/v2/watcher_async.py,sha256=
|
|
51
|
-
firecrawl/v2/methods/batch.py,sha256=
|
|
52
|
-
firecrawl/v2/methods/crawl.py,sha256=
|
|
48
|
+
firecrawl/v2/types.py,sha256=zV0XAX_pJaJj41uxfJewKPANxd45BCL48nvbN_ybLOc,20222
|
|
49
|
+
firecrawl/v2/watcher.py,sha256=FOU71tqSKxgeuGycu4ye0SLc2dw7clIcoQjPsi-4Csc,14229
|
|
50
|
+
firecrawl/v2/watcher_async.py,sha256=AVjW2mgABniolSsauK4u0FW8ya6WzRUdyEg2R-8vGCw,10278
|
|
51
|
+
firecrawl/v2/methods/batch.py,sha256=us7zUGl7u9ZDIEk2J3rNqj87bkaNjXU27SMFW_fdcg8,11932
|
|
52
|
+
firecrawl/v2/methods/crawl.py,sha256=4ZUmanHNuNtq9wbKMAZ3lenuPcNdOaV0kYXqMI5XJJ8,15485
|
|
53
53
|
firecrawl/v2/methods/extract.py,sha256=-Jr4BtraU3b7hd3JIY73V-S69rUclxyXyUpoQb6DCQk,4274
|
|
54
54
|
firecrawl/v2/methods/map.py,sha256=4SADb0-lkbdOWDmO6k8_TzK0yRti5xsN40N45nUl9uA,2592
|
|
55
|
-
firecrawl/v2/methods/scrape.py,sha256=
|
|
56
|
-
firecrawl/v2/methods/search.py,sha256=
|
|
55
|
+
firecrawl/v2/methods/scrape.py,sha256=CSHBwC-P91UfrW3zHirjNAs2h899FKcWvd1DY_4fJdo,1921
|
|
56
|
+
firecrawl/v2/methods/search.py,sha256=HB17OorEHfZXZh8tvfSqVKxS9uYtqBX3Me4YAFMF7w0,6640
|
|
57
57
|
firecrawl/v2/methods/usage.py,sha256=OJlkxwaB-AAtgO3WLr9QiqBRmjdh6GVhroCgleegupQ,1460
|
|
58
58
|
firecrawl/v2/methods/aio/__init__.py,sha256=RocMJnGwnLIvGu3G8ZvY8INkipC7WHZiu2bE31eSyJs,35
|
|
59
59
|
firecrawl/v2/methods/aio/batch.py,sha256=GS_xsd_Uib1fxFITBK1sH88VGzFMrIcqJVQqOvMQ540,3735
|
|
60
|
-
firecrawl/v2/methods/aio/crawl.py,sha256=
|
|
60
|
+
firecrawl/v2/methods/aio/crawl.py,sha256=pC6bHVk30Hj1EJdAChxpMOg0Xx_GVqq4tIlvU2e5RQ4,6688
|
|
61
61
|
firecrawl/v2/methods/aio/extract.py,sha256=IfNr2ETqt4dR73JFzrEYI4kk5vpKnJOG0BmPEjGEoO4,4217
|
|
62
62
|
firecrawl/v2/methods/aio/map.py,sha256=EuT-5A0cQr_e5SBfEZ6pnl8u0JUwEEvSwhyT2N-QoKU,2326
|
|
63
|
-
firecrawl/v2/methods/aio/scrape.py,sha256
|
|
64
|
-
firecrawl/v2/methods/aio/search.py,sha256=
|
|
63
|
+
firecrawl/v2/methods/aio/scrape.py,sha256=ilA9qco8YGwCFpE0PN1XBQUyuHPQwH2QioZ-xsfxhgU,1386
|
|
64
|
+
firecrawl/v2/methods/aio/search.py,sha256=nuRmFCA_ymBw2tXJZ88vjZY-BueIRNonkSsrxExwusM,2501
|
|
65
65
|
firecrawl/v2/methods/aio/usage.py,sha256=OtBi6X-aT09MMR2dpm3vBCm9JrJZIJLCQ8jJ3L7vie4,1606
|
|
66
66
|
firecrawl/v2/utils/__init__.py,sha256=i1GgxySmqEXpWSBQCu3iZBPIJG7fXj0QXCDWGwerWNs,338
|
|
67
67
|
firecrawl/v2/utils/error_handler.py,sha256=Iuf916dHphDY8ObNNlWy75628DFeJ0Rv8ljRp4LttLE,4199
|
|
68
68
|
firecrawl/v2/utils/get_version.py,sha256=0CxW_41q2hlzIxEWOivUCaYw3GFiSIH32RPUMcIgwAY,492
|
|
69
69
|
firecrawl/v2/utils/http_client.py,sha256=_n8mp4xi6GGihg662Lsv6TSlvw9zykyADwEk0fg8mYA,4873
|
|
70
70
|
firecrawl/v2/utils/http_client_async.py,sha256=P4XG6nTz6kKH3vCPTz6i7DRhbpK4IImRGaFvQFGBFRc,1874
|
|
71
|
+
firecrawl/v2/utils/normalize.py,sha256=nlTU6QRghT1YKZzNZlIQj4STSRuSUGrS9cCErZIcY5w,3636
|
|
71
72
|
firecrawl/v2/utils/validation.py,sha256=L8by7z-t6GuMGIYkK7il1BM8d-4_-sAdG9hDMF_LeG4,14518
|
|
73
|
+
firecrawl_py-3.1.1.dist-info/licenses/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
72
74
|
tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
|
|
73
75
|
tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
|
|
74
|
-
firecrawl_py-3.
|
|
75
|
-
firecrawl_py-3.
|
|
76
|
-
firecrawl_py-3.
|
|
77
|
-
firecrawl_py-3.
|
|
78
|
-
firecrawl_py-3.0.3.dist-info/RECORD,,
|
|
76
|
+
firecrawl_py-3.1.1.dist-info/METADATA,sha256=JGp1wtMdsywSEAY520vzMJtol2Mq14v3CbC_ec6DSKA,7390
|
|
77
|
+
firecrawl_py-3.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
78
|
+
firecrawl_py-3.1.1.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
|
|
79
|
+
firecrawl_py-3.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|