firecrawl 4.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firecrawl/__init__.py +87 -0
- firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
- firecrawl/__tests__/e2e/v2/conftest.py +73 -0
- firecrawl/__tests__/e2e/v2/test_async.py +73 -0
- firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
- firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
- firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
- firecrawl/__tests__/e2e/v2/test_map.py +61 -0
- firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
- firecrawl/__tests__/e2e/v2/test_search.py +270 -0
- firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
- firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
- firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
- firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
- firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
- firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
- firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
- firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
- firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
- firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
- firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
- firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
- firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
- firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
- firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
- firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
- firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
- firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
- firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
- firecrawl/client.py +281 -0
- firecrawl/firecrawl.backup.py +4635 -0
- firecrawl/types.py +167 -0
- firecrawl/v1/__init__.py +14 -0
- firecrawl/v1/client.py +5164 -0
- firecrawl/v2/__init__.py +4 -0
- firecrawl/v2/client.py +967 -0
- firecrawl/v2/client_async.py +408 -0
- firecrawl/v2/methods/agent.py +144 -0
- firecrawl/v2/methods/aio/__init__.py +1 -0
- firecrawl/v2/methods/aio/agent.py +137 -0
- firecrawl/v2/methods/aio/batch.py +188 -0
- firecrawl/v2/methods/aio/crawl.py +351 -0
- firecrawl/v2/methods/aio/extract.py +133 -0
- firecrawl/v2/methods/aio/map.py +65 -0
- firecrawl/v2/methods/aio/scrape.py +33 -0
- firecrawl/v2/methods/aio/search.py +176 -0
- firecrawl/v2/methods/aio/usage.py +89 -0
- firecrawl/v2/methods/batch.py +499 -0
- firecrawl/v2/methods/crawl.py +592 -0
- firecrawl/v2/methods/extract.py +161 -0
- firecrawl/v2/methods/map.py +83 -0
- firecrawl/v2/methods/scrape.py +64 -0
- firecrawl/v2/methods/search.py +215 -0
- firecrawl/v2/methods/usage.py +84 -0
- firecrawl/v2/types.py +1143 -0
- firecrawl/v2/utils/__init__.py +9 -0
- firecrawl/v2/utils/error_handler.py +107 -0
- firecrawl/v2/utils/get_version.py +15 -0
- firecrawl/v2/utils/http_client.py +178 -0
- firecrawl/v2/utils/http_client_async.py +69 -0
- firecrawl/v2/utils/normalize.py +125 -0
- firecrawl/v2/utils/validation.py +692 -0
- firecrawl/v2/watcher.py +301 -0
- firecrawl/v2/watcher_async.py +243 -0
- firecrawl-4.12.0.dist-info/METADATA +234 -0
- firecrawl-4.12.0.dist-info/RECORD +92 -0
- firecrawl-4.12.0.dist-info/WHEEL +5 -0
- firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
- firecrawl-4.12.0.dist-info/top_level.txt +2 -0
- tests/test_agent_integration.py +277 -0
- tests/test_api_key_handling.py +44 -0
- tests/test_change_tracking.py +98 -0
- tests/test_timeout_conversion.py +117 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from firecrawl.v2.types import Document, DocumentMetadata
|
|
4
|
+
from firecrawl.v2.utils.normalize import normalize_document_input
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TestDocumentMetadataExtras:
|
|
8
|
+
def test_metadata_extras_preserved_in_metadata_dict(self):
|
|
9
|
+
raw = {
|
|
10
|
+
"markdown": "# Hello",
|
|
11
|
+
"metadata": {
|
|
12
|
+
"title": "Example",
|
|
13
|
+
# Known camelCase -> snake_case mapping
|
|
14
|
+
"statusCode": "200",
|
|
15
|
+
# Unknown keys that should be preserved as-is
|
|
16
|
+
"twitter:card": "summary",
|
|
17
|
+
"twitterCard": "summary_large_image",
|
|
18
|
+
"theme-color": "#fff",
|
|
19
|
+
},
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
doc = Document(**normalize_document_input(raw))
|
|
23
|
+
md_dict = doc.metadata_dict
|
|
24
|
+
# Known key mapped and coerced
|
|
25
|
+
assert md_dict["status_code"] == 200
|
|
26
|
+
assert md_dict["title"] == "Example"
|
|
27
|
+
# Extras are preserved verbatim
|
|
28
|
+
assert md_dict["twitter:card"] == "summary"
|
|
29
|
+
assert md_dict["twitterCard"] == "summary_large_image"
|
|
30
|
+
assert md_dict["theme-color"] == "#fff"
|
|
31
|
+
|
|
32
|
+
def test_metadata_typed_from_plain_dict_preserves_extras(self):
|
|
33
|
+
# Construct Document with raw dict metadata without normalization step
|
|
34
|
+
doc = Document(
|
|
35
|
+
markdown="# Hi",
|
|
36
|
+
metadata={
|
|
37
|
+
"ogTitle": "Hello", # will be treated as extra without normalization
|
|
38
|
+
"x-custom": "ok",
|
|
39
|
+
},
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
md = doc.metadata_typed
|
|
43
|
+
assert isinstance(md, DocumentMetadata)
|
|
44
|
+
# Known fields aren't populated without normalization
|
|
45
|
+
assert md.og_title is None
|
|
46
|
+
# Extras are available on the underlying pydantic storage and in metadata_dict
|
|
47
|
+
extras = getattr(md, "__pydantic_extra__", {}) or {}
|
|
48
|
+
assert extras == {"ogTitle": "Hello", "x-custom": "ok"}
|
|
49
|
+
assert doc.metadata_dict["ogTitle"] == "Hello"
|
|
50
|
+
assert doc.metadata_dict["x-custom"] == "ok"
|
|
51
|
+
|
|
52
|
+
def test_document_model_dump_includes_metadata_extras(self):
|
|
53
|
+
raw = {
|
|
54
|
+
"markdown": "# Body",
|
|
55
|
+
"metadata": {
|
|
56
|
+
"title": "Page",
|
|
57
|
+
"twitter:site": "@site",
|
|
58
|
+
},
|
|
59
|
+
}
|
|
60
|
+
doc = Document(**normalize_document_input(raw))
|
|
61
|
+
dumped = doc.model_dump(exclude_none=True)
|
|
62
|
+
assert "metadata" in dumped
|
|
63
|
+
assert dumped["metadata"]["title"] == "Page"
|
|
64
|
+
assert dumped["metadata"]["twitter:site"] == "@site"
|
|
65
|
+
|
|
66
|
+
def test_concurrency_fields_are_mapped(self):
|
|
67
|
+
raw = {
|
|
68
|
+
"markdown": "# Queue info",
|
|
69
|
+
"metadata": {
|
|
70
|
+
"concurrencyLimited": True,
|
|
71
|
+
"concurrencyQueueDurationMs": 1234,
|
|
72
|
+
},
|
|
73
|
+
}
|
|
74
|
+
doc = Document(**normalize_document_input(raw))
|
|
75
|
+
md = doc.metadata_typed
|
|
76
|
+
assert md.concurrency_limited is True
|
|
77
|
+
assert md.concurrency_queue_duration_ms == 1234
|
|
78
|
+
|
|
79
|
+
def test_unknown_list_metadata_preserved(self):
|
|
80
|
+
raw = {
|
|
81
|
+
"markdown": "# Body",
|
|
82
|
+
"metadata": {
|
|
83
|
+
"title": "Page",
|
|
84
|
+
"x-list": [1, "a", 3],
|
|
85
|
+
},
|
|
86
|
+
}
|
|
87
|
+
doc = Document(**normalize_document_input(raw))
|
|
88
|
+
md = doc.metadata_dict
|
|
89
|
+
assert md["x-list"] == [1, "a", 3]
|
|
90
|
+
|
|
91
|
+
def test_metadata_typed_extras_property(self):
|
|
92
|
+
md = DocumentMetadata(title="T", **{"x-foo": "bar"})
|
|
93
|
+
# extras accessor should expose unknown keys
|
|
94
|
+
assert md.extras == {"x-foo": "bar"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from firecrawl.v2.types import Document
|
|
4
|
+
from firecrawl.v2.utils.normalize import normalize_document_input
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TestMetadataMultiValue:
|
|
8
|
+
def test_article_tag_list_coerced_to_string(self):
|
|
9
|
+
raw = {
|
|
10
|
+
"markdown": "# Body",
|
|
11
|
+
"metadata": {
|
|
12
|
+
"title": "Page",
|
|
13
|
+
"articleTag": ["one", "two"],
|
|
14
|
+
},
|
|
15
|
+
}
|
|
16
|
+
doc = Document(**normalize_document_input(raw))
|
|
17
|
+
# typed access works and is joined as string
|
|
18
|
+
assert doc.metadata is not None
|
|
19
|
+
assert doc.metadata.article_tag == "one, two"
|
|
20
|
+
# dict view shows string
|
|
21
|
+
md = doc.metadata_dict
|
|
22
|
+
assert md["article_tag"] == "one, two"
|