firecrawl 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. firecrawl/__init__.py +87 -0
  2. firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
  10. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  11. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  13. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
  14. firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
  15. firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
  16. firecrawl/__tests__/e2e/v2/test_map.py +61 -0
  17. firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
  18. firecrawl/__tests__/e2e/v2/test_search.py +270 -0
  19. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  20. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  21. firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
  28. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  29. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  30. firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
  31. firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
  32. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  33. firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
  34. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  35. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  36. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  37. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
  38. firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
  39. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
  40. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
  41. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
  42. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  43. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  44. firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
  45. firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
  46. firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
  47. firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
  48. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  49. firecrawl/client.py +281 -0
  50. firecrawl/firecrawl.backup.py +4635 -0
  51. firecrawl/types.py +167 -0
  52. firecrawl/v1/__init__.py +14 -0
  53. firecrawl/v1/client.py +5164 -0
  54. firecrawl/v2/__init__.py +4 -0
  55. firecrawl/v2/client.py +967 -0
  56. firecrawl/v2/client_async.py +408 -0
  57. firecrawl/v2/methods/agent.py +144 -0
  58. firecrawl/v2/methods/aio/__init__.py +1 -0
  59. firecrawl/v2/methods/aio/agent.py +137 -0
  60. firecrawl/v2/methods/aio/batch.py +188 -0
  61. firecrawl/v2/methods/aio/crawl.py +351 -0
  62. firecrawl/v2/methods/aio/extract.py +133 -0
  63. firecrawl/v2/methods/aio/map.py +65 -0
  64. firecrawl/v2/methods/aio/scrape.py +33 -0
  65. firecrawl/v2/methods/aio/search.py +176 -0
  66. firecrawl/v2/methods/aio/usage.py +89 -0
  67. firecrawl/v2/methods/batch.py +499 -0
  68. firecrawl/v2/methods/crawl.py +592 -0
  69. firecrawl/v2/methods/extract.py +161 -0
  70. firecrawl/v2/methods/map.py +83 -0
  71. firecrawl/v2/methods/scrape.py +64 -0
  72. firecrawl/v2/methods/search.py +215 -0
  73. firecrawl/v2/methods/usage.py +84 -0
  74. firecrawl/v2/types.py +1143 -0
  75. firecrawl/v2/utils/__init__.py +9 -0
  76. firecrawl/v2/utils/error_handler.py +107 -0
  77. firecrawl/v2/utils/get_version.py +15 -0
  78. firecrawl/v2/utils/http_client.py +178 -0
  79. firecrawl/v2/utils/http_client_async.py +69 -0
  80. firecrawl/v2/utils/normalize.py +125 -0
  81. firecrawl/v2/utils/validation.py +692 -0
  82. firecrawl/v2/watcher.py +301 -0
  83. firecrawl/v2/watcher_async.py +243 -0
  84. firecrawl-4.12.0.dist-info/METADATA +234 -0
  85. firecrawl-4.12.0.dist-info/RECORD +92 -0
  86. firecrawl-4.12.0.dist-info/WHEEL +5 -0
  87. firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
  88. firecrawl-4.12.0.dist-info/top_level.txt +2 -0
  89. tests/test_agent_integration.py +277 -0
  90. tests/test_api_key_handling.py +44 -0
  91. tests/test_change_tracking.py +98 -0
  92. tests/test_timeout_conversion.py +117 -0
@@ -0,0 +1,94 @@
1
+ import pytest
2
+
3
+ from firecrawl.v2.types import Document, DocumentMetadata
4
+ from firecrawl.v2.utils.normalize import normalize_document_input
5
+
6
+
7
+ class TestDocumentMetadataExtras:
8
+ def test_metadata_extras_preserved_in_metadata_dict(self):
9
+ raw = {
10
+ "markdown": "# Hello",
11
+ "metadata": {
12
+ "title": "Example",
13
+ # Known camelCase -> snake_case mapping
14
+ "statusCode": "200",
15
+ # Unknown keys that should be preserved as-is
16
+ "twitter:card": "summary",
17
+ "twitterCard": "summary_large_image",
18
+ "theme-color": "#fff",
19
+ },
20
+ }
21
+
22
+ doc = Document(**normalize_document_input(raw))
23
+ md_dict = doc.metadata_dict
24
+ # Known key mapped and coerced
25
+ assert md_dict["status_code"] == 200
26
+ assert md_dict["title"] == "Example"
27
+ # Extras are preserved verbatim
28
+ assert md_dict["twitter:card"] == "summary"
29
+ assert md_dict["twitterCard"] == "summary_large_image"
30
+ assert md_dict["theme-color"] == "#fff"
31
+
32
+ def test_metadata_typed_from_plain_dict_preserves_extras(self):
33
+ # Construct Document with raw dict metadata without normalization step
34
+ doc = Document(
35
+ markdown="# Hi",
36
+ metadata={
37
+ "ogTitle": "Hello", # will be treated as extra without normalization
38
+ "x-custom": "ok",
39
+ },
40
+ )
41
+
42
+ md = doc.metadata_typed
43
+ assert isinstance(md, DocumentMetadata)
44
+ # Known fields aren't populated without normalization
45
+ assert md.og_title is None
46
+ # Extras are available on the underlying pydantic storage and in metadata_dict
47
+ extras = getattr(md, "__pydantic_extra__", {}) or {}
48
+ assert extras == {"ogTitle": "Hello", "x-custom": "ok"}
49
+ assert doc.metadata_dict["ogTitle"] == "Hello"
50
+ assert doc.metadata_dict["x-custom"] == "ok"
51
+
52
+ def test_document_model_dump_includes_metadata_extras(self):
53
+ raw = {
54
+ "markdown": "# Body",
55
+ "metadata": {
56
+ "title": "Page",
57
+ "twitter:site": "@site",
58
+ },
59
+ }
60
+ doc = Document(**normalize_document_input(raw))
61
+ dumped = doc.model_dump(exclude_none=True)
62
+ assert "metadata" in dumped
63
+ assert dumped["metadata"]["title"] == "Page"
64
+ assert dumped["metadata"]["twitter:site"] == "@site"
65
+
66
+ def test_concurrency_fields_are_mapped(self):
67
+ raw = {
68
+ "markdown": "# Queue info",
69
+ "metadata": {
70
+ "concurrencyLimited": True,
71
+ "concurrencyQueueDurationMs": 1234,
72
+ },
73
+ }
74
+ doc = Document(**normalize_document_input(raw))
75
+ md = doc.metadata_typed
76
+ assert md.concurrency_limited is True
77
+ assert md.concurrency_queue_duration_ms == 1234
78
+
79
+ def test_unknown_list_metadata_preserved(self):
80
+ raw = {
81
+ "markdown": "# Body",
82
+ "metadata": {
83
+ "title": "Page",
84
+ "x-list": [1, "a", 3],
85
+ },
86
+ }
87
+ doc = Document(**normalize_document_input(raw))
88
+ md = doc.metadata_dict
89
+ assert md["x-list"] == [1, "a", 3]
90
+
91
+ def test_metadata_typed_extras_property(self):
92
+ md = DocumentMetadata(title="T", **{"x-foo": "bar"})
93
+ # extras accessor should expose unknown keys
94
+ assert md.extras == {"x-foo": "bar"}
@@ -0,0 +1,22 @@
1
+ import pytest
2
+
3
+ from firecrawl.v2.types import Document
4
+ from firecrawl.v2.utils.normalize import normalize_document_input
5
+
6
+
7
+ class TestMetadataMultiValue:
8
+ def test_article_tag_list_coerced_to_string(self):
9
+ raw = {
10
+ "markdown": "# Body",
11
+ "metadata": {
12
+ "title": "Page",
13
+ "articleTag": ["one", "two"],
14
+ },
15
+ }
16
+ doc = Document(**normalize_document_input(raw))
17
+ # typed access works and is joined as string
18
+ assert doc.metadata is not None
19
+ assert doc.metadata.article_tag == "one, two"
20
+ # dict view shows string
21
+ md = doc.metadata_dict
22
+ assert md["article_tag"] == "one, two"