firecrawl-py 3.3.1__py3-none-any.whl → 3.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl-py might be problematic. Click here for more details.

Files changed (84) hide show
  1. firecrawl/__init__.py +1 -1
  2. firecrawl/__tests__/e2e/v2/test_scrape.py +37 -1
  3. firecrawl/client.py +8 -4
  4. firecrawl/v2/types.py +19 -2
  5. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info}/METADATA +7 -3
  6. firecrawl_py-3.3.3.dist-info/RECORD +79 -0
  7. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info}/WHEEL +1 -1
  8. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info/licenses}/LICENSE +0 -0
  9. {firecrawl_py-3.3.1.dist-info → firecrawl_py-3.3.3.dist-info}/top_level.txt +0 -2
  10. build/lib/firecrawl/__init__.py +0 -87
  11. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +0 -79
  12. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +0 -188
  13. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +0 -38
  14. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +0 -40
  15. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +0 -137
  16. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +0 -248
  17. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +0 -35
  18. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +0 -43
  19. build/lib/firecrawl/__tests__/e2e/v2/conftest.py +0 -73
  20. build/lib/firecrawl/__tests__/e2e/v2/test_async.py +0 -73
  21. build/lib/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +0 -105
  22. build/lib/firecrawl/__tests__/e2e/v2/test_crawl.py +0 -276
  23. build/lib/firecrawl/__tests__/e2e/v2/test_extract.py +0 -54
  24. build/lib/firecrawl/__tests__/e2e/v2/test_map.py +0 -60
  25. build/lib/firecrawl/__tests__/e2e/v2/test_scrape.py +0 -154
  26. build/lib/firecrawl/__tests__/e2e/v2/test_search.py +0 -269
  27. build/lib/firecrawl/__tests__/e2e/v2/test_usage.py +0 -26
  28. build/lib/firecrawl/__tests__/e2e/v2/test_watcher.py +0 -65
  29. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +0 -12
  30. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +0 -61
  31. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +0 -12
  32. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +0 -19
  33. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +0 -50
  34. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +0 -63
  35. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +0 -28
  36. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +0 -117
  37. build/lib/firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +0 -90
  38. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +0 -70
  39. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +0 -240
  40. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +0 -107
  41. build/lib/firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +0 -53
  42. build/lib/firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +0 -92
  43. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +0 -167
  44. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_validation.py +0 -236
  45. build/lib/firecrawl/__tests__/unit/v2/methods/test_usage_types.py +0 -18
  46. build/lib/firecrawl/__tests__/unit/v2/methods/test_webhook.py +0 -123
  47. build/lib/firecrawl/__tests__/unit/v2/utils/test_validation.py +0 -290
  48. build/lib/firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +0 -332
  49. build/lib/firecrawl/client.py +0 -242
  50. build/lib/firecrawl/firecrawl.backup.py +0 -4635
  51. build/lib/firecrawl/types.py +0 -161
  52. build/lib/firecrawl/v1/__init__.py +0 -14
  53. build/lib/firecrawl/v1/client.py +0 -4653
  54. build/lib/firecrawl/v2/__init__.py +0 -4
  55. build/lib/firecrawl/v2/client.py +0 -805
  56. build/lib/firecrawl/v2/client_async.py +0 -250
  57. build/lib/firecrawl/v2/methods/aio/__init__.py +0 -1
  58. build/lib/firecrawl/v2/methods/aio/batch.py +0 -85
  59. build/lib/firecrawl/v2/methods/aio/crawl.py +0 -171
  60. build/lib/firecrawl/v2/methods/aio/extract.py +0 -126
  61. build/lib/firecrawl/v2/methods/aio/map.py +0 -59
  62. build/lib/firecrawl/v2/methods/aio/scrape.py +0 -33
  63. build/lib/firecrawl/v2/methods/aio/search.py +0 -172
  64. build/lib/firecrawl/v2/methods/aio/usage.py +0 -42
  65. build/lib/firecrawl/v2/methods/batch.py +0 -417
  66. build/lib/firecrawl/v2/methods/crawl.py +0 -469
  67. build/lib/firecrawl/v2/methods/extract.py +0 -131
  68. build/lib/firecrawl/v2/methods/map.py +0 -77
  69. build/lib/firecrawl/v2/methods/scrape.py +0 -64
  70. build/lib/firecrawl/v2/methods/search.py +0 -197
  71. build/lib/firecrawl/v2/methods/usage.py +0 -41
  72. build/lib/firecrawl/v2/types.py +0 -665
  73. build/lib/firecrawl/v2/utils/__init__.py +0 -9
  74. build/lib/firecrawl/v2/utils/error_handler.py +0 -107
  75. build/lib/firecrawl/v2/utils/get_version.py +0 -15
  76. build/lib/firecrawl/v2/utils/http_client.py +0 -153
  77. build/lib/firecrawl/v2/utils/http_client_async.py +0 -65
  78. build/lib/firecrawl/v2/utils/normalize.py +0 -107
  79. build/lib/firecrawl/v2/utils/validation.py +0 -324
  80. build/lib/firecrawl/v2/watcher.py +0 -301
  81. build/lib/firecrawl/v2/watcher_async.py +0 -242
  82. build/lib/tests/test_change_tracking.py +0 -98
  83. build/lib/tests/test_timeout_conversion.py +0 -117
  84. firecrawl_py-3.3.1.dist-info/RECORD +0 -153
@@ -1,40 +0,0 @@
1
- import os
2
- import pytest
3
- from dotenv import load_dotenv
4
- from firecrawl import AsyncFirecrawl
5
-
6
-
7
- load_dotenv()
8
-
9
- if not os.getenv("API_KEY"):
10
- raise ValueError("API_KEY is not set")
11
-
12
- if not os.getenv("API_URL"):
13
- raise ValueError("API_URL is not set")
14
-
15
-
16
- @pytest.mark.asyncio
17
- async def test_async_map_minimal():
18
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
19
- resp = await client.map("https://docs.firecrawl.dev")
20
- assert hasattr(resp, "links") and isinstance(resp.links, list)
21
- if resp.links:
22
- first = resp.links[0]
23
- assert hasattr(first, "url") and isinstance(first.url, str) and first.url.startswith("http")
24
-
25
-
26
- @pytest.mark.asyncio
27
- @pytest.mark.parametrize("sitemap", ["only", "include", "skip"])
28
- async def test_async_map_with_all_params(sitemap):
29
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
30
- resp = await client.map(
31
- "https://docs.firecrawl.dev",
32
- search="docs",
33
- include_subdomains=True,
34
- limit=10,
35
- sitemap=sitemap,
36
- timeout=15000,
37
- )
38
- assert hasattr(resp, "links") and isinstance(resp.links, list)
39
- assert len(resp.links) <= 10
40
-
@@ -1,137 +0,0 @@
1
- import os
2
- import pytest
3
- from dotenv import load_dotenv
4
- from firecrawl import AsyncFirecrawl
5
- from firecrawl.v2.types import Document
6
-
7
-
8
- load_dotenv()
9
-
10
- if not os.getenv("API_KEY"):
11
- raise ValueError("API_KEY is not set")
12
-
13
- if not os.getenv("API_URL"):
14
- raise ValueError("API_URL is not set")
15
-
16
-
17
- @pytest.mark.asyncio
18
- async def test_async_scrape_minimal():
19
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
20
- doc = await client.scrape("https://docs.firecrawl.dev")
21
- assert isinstance(doc, Document)
22
- assert (
23
- (doc.markdown and len(doc.markdown) > 0)
24
- or (doc.html and len(doc.html) > 0)
25
- or (doc.raw_html and len(doc.raw_html) > 0)
26
- or (doc.links is not None)
27
- or (doc.screenshot is not None)
28
- or (doc.json is not None)
29
- or (doc.summary is not None)
30
- )
31
-
32
-
33
- @pytest.mark.asyncio
34
- async def test_async_scrape_with_all_params():
35
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
36
- # Include multiple formats with configuration
37
- json_schema = {
38
- "type": "object",
39
- "properties": {"title": {"type": "string"}},
40
- "required": ["title"],
41
- }
42
- doc = await client.scrape(
43
- "https://docs.firecrawl.dev",
44
- formats=[
45
- "markdown",
46
- "rawHtml",
47
- {"type": "screenshot", "full_page": False, "quality": 70},
48
- {"type": "json", "prompt": "Extract title", "schema": json_schema},
49
- ],
50
- headers={"User-Agent": "E2E-AIO"},
51
- include_tags=["main"],
52
- exclude_tags=["nav"],
53
- only_main_content=True,
54
- timeout=20000,
55
- wait_for=500,
56
- mobile=False,
57
- parsers=["pdf"],
58
- actions=[],
59
- skip_tls_verification=False,
60
- remove_base64_images=False,
61
- fast_mode=False,
62
- use_mock=None,
63
- block_ads=False,
64
- proxy="basic",
65
- max_age=0,
66
- store_in_cache=False,
67
- )
68
- assert isinstance(doc, Document)
69
-
70
-
71
- @pytest.mark.asyncio
72
- async def test_async_scrape_with_options_markdown():
73
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
74
- doc = await client.scrape(
75
- "https://docs.firecrawl.dev",
76
- formats=["markdown"],
77
- only_main_content=False,
78
- mobile=False,
79
- )
80
- assert isinstance(doc, Document)
81
-
82
-
83
- @pytest.mark.asyncio
84
- async def test_async_scrape_with_screenshot_action_viewport():
85
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
86
- doc = await client.scrape(
87
- "https://docs.firecrawl.dev",
88
- formats=[{"type": "screenshot", "full_page": False, "quality": 80, "viewport": {"width": 800, "height": 600}}],
89
- )
90
- assert isinstance(doc, Document)
91
-
92
-
93
- @pytest.mark.asyncio
94
- @pytest.mark.parametrize("fmt,expect_field", [
95
- ("markdown", "markdown"),
96
- ("html", "html"),
97
- ("raw_html", "raw_html"),
98
- ("links", "links"),
99
- ("screenshot", "screenshot"),
100
- ("summary", "summary"),
101
- ])
102
- async def test_async_scrape_basic_formats(fmt, expect_field):
103
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
104
- doc = await client.scrape("https://docs.firecrawl.dev", formats=[fmt])
105
- assert isinstance(doc, Document)
106
- if expect_field == "markdown":
107
- assert doc.markdown is not None
108
- elif expect_field == "html":
109
- assert doc.html is not None
110
- elif expect_field == "raw_html":
111
- assert doc.raw_html is not None
112
- elif expect_field == "links":
113
- assert isinstance(doc.links, list)
114
- elif expect_field == "screenshot":
115
- assert doc.screenshot is not None
116
- elif expect_field == "summary":
117
- assert doc.summary is not None
118
-
119
-
120
- @pytest.mark.asyncio
121
- async def test_async_scrape_with_json_format_object():
122
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
123
- json_schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
124
- doc = await client.scrape(
125
- "https://docs.firecrawl.dev",
126
- formats=[{"type": "json", "prompt": "Extract page title", "schema": json_schema}],
127
- only_main_content=True,
128
- )
129
- assert isinstance(doc, Document)
130
-
131
-
132
- @pytest.mark.asyncio
133
- async def test_async_scrape_invalid_url():
134
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
135
- with pytest.raises(ValueError):
136
- await client.scrape("")
137
-
@@ -1,248 +0,0 @@
1
- import os
2
- import pytest
3
- from dotenv import load_dotenv
4
- from firecrawl import AsyncFirecrawl
5
- from firecrawl.types import (
6
- SearchData,
7
- Document,
8
- ScrapeOptions,
9
- ScrapeFormats,
10
- SearchResultWeb,
11
- SearchResultNews,
12
- SearchResultImages,
13
- )
14
-
15
- load_dotenv()
16
-
17
- firecrawl = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
18
-
19
- def _collect_texts(entries):
20
- texts = []
21
- for r in entries or []:
22
- title = getattr(r, 'title', None) if hasattr(r, 'title') else None
23
- desc = getattr(r, 'description', None) if hasattr(r, 'description') else None
24
- if title:
25
- texts.append(str(title).lower())
26
- if desc:
27
- texts.append(str(desc).lower())
28
- return texts
29
-
30
- def _is_document(entry) -> bool:
31
- try:
32
- from firecrawl.v2.types import Document
33
- return isinstance(entry, Document) or \
34
- hasattr(entry, 'markdown') or \
35
- hasattr(entry, 'html') or \
36
- hasattr(entry, 'raw_html') or \
37
- hasattr(entry, 'json') or \
38
- hasattr(entry, 'screenshot') or \
39
- hasattr(entry, 'change_tracking') or \
40
- hasattr(entry, 'summary')
41
- except Exception:
42
- return hasattr(entry, 'markdown') or \
43
- hasattr(entry, 'html') or \
44
- hasattr(entry, 'raw_html') or \
45
- hasattr(entry, 'json') or \
46
- hasattr(entry, 'screenshot') or \
47
- hasattr(entry, 'change_tracking') or \
48
- hasattr(entry, 'summary')
49
-
50
- @pytest.mark.asyncio
51
- async def test_async_search_minimal_request():
52
- results = await firecrawl.search(
53
- query="What is the capital of France?"
54
- )
55
- assert isinstance(results, SearchData)
56
- assert hasattr(results, 'web')
57
- assert results.web is not None
58
- assert len(results.web) > 0
59
- assert hasattr(results, 'news')
60
- assert results.news is None
61
- assert hasattr(results, 'images')
62
- assert results.images is None
63
-
64
- for result in results.web:
65
- assert isinstance(result, SearchResultWeb)
66
- assert hasattr(result, 'url')
67
- assert hasattr(result, 'title')
68
- assert hasattr(result, 'description')
69
- assert result.url.startswith('http')
70
- assert result.title is not None
71
- assert result.description is not None
72
-
73
- all_text = ' '.join(_collect_texts(results.web))
74
- assert 'paris' in all_text
75
-
76
- assert results.news is None
77
- assert results.images is None
78
-
79
- @pytest.mark.asyncio
80
- async def test_async_search_with_sources():
81
- results = await firecrawl.search(
82
- query="firecrawl",
83
- sources=["web", "news", "images"],
84
- limit=3
85
- )
86
- assert isinstance(results, SearchData)
87
- assert results.web is not None
88
- assert len(results.web) <= 3
89
- assert isinstance(results.web[0], SearchResultWeb)
90
-
91
- if results.news is not None:
92
- assert len(results.news) <= 3
93
- assert isinstance(results.news[0], SearchResultNews)
94
-
95
- if results.images is not None:
96
- assert len(results.images) <= 3
97
- assert isinstance(results.images[0], SearchResultImages)
98
-
99
- web_titles = [result.title.lower() for result in results.web]
100
- web_descriptions = [result.description.lower() for result in results.web]
101
- all_web_text = ' '.join(web_titles + web_descriptions)
102
- assert 'firecrawl' in all_web_text
103
-
104
- @pytest.mark.asyncio
105
- async def test_async_search_result_structure():
106
- results = await firecrawl.search(
107
- query="test query",
108
- limit=1
109
- )
110
- if results.web and len(results.web) > 0:
111
- result = results.web[0]
112
- assert hasattr(result, 'url')
113
- assert hasattr(result, 'title')
114
- assert hasattr(result, 'description')
115
- assert isinstance(result.url, str)
116
- assert isinstance(result.title, str) or result.title is None
117
- assert isinstance(result.description, str) or result.description is None
118
- assert result.url.startswith('http')
119
-
120
- @pytest.mark.asyncio
121
- async def test_async_search_all_parameters():
122
- from firecrawl.types import ScrapeOptions, Location, WaitAction
123
- schema = {
124
- "type": "object",
125
- "properties": {
126
- "title": {"type": "string"},
127
- "description": {"type": "string"},
128
- "url": {"type": "string"}
129
- },
130
- "required": ["title", "description"]
131
- }
132
- results = await firecrawl.search(
133
- query="artificial intelligence",
134
- sources=[
135
- {"type": "web"},
136
- {"type": "news"}
137
- ],
138
- limit=3,
139
- tbs="qdr:m",
140
- location="US",
141
- ignore_invalid_urls=True,
142
- timeout=60000,
143
- scrape_options=ScrapeOptions(
144
- formats=[
145
- "markdown",
146
- "html",
147
- {
148
- "type": "json",
149
- "prompt": "Extract the title and description from the page",
150
- "schema": schema
151
- },
152
- {"type": "summary"}
153
- ],
154
- headers={"User-Agent": "Firecrawl-Test/1.0"},
155
- include_tags=["h1", "h2", "p"],
156
- exclude_tags=["nav", "footer"],
157
- only_main_content=True,
158
- wait_for=2000,
159
- mobile=False,
160
- skip_tls_verification=False,
161
- remove_base64_images=True,
162
- block_ads=True,
163
- proxy="basic",
164
- max_age=3600000,
165
- store_in_cache=True,
166
- location=Location(
167
- country="US",
168
- languages=["en"]
169
- ),
170
- actions=[
171
- WaitAction(milliseconds=1000)
172
- ]
173
- )
174
- )
175
- assert isinstance(results, SearchData)
176
- assert hasattr(results, 'web')
177
- assert hasattr(results, 'news')
178
- assert hasattr(results, 'images')
179
- assert results.web is not None
180
- assert len(results.web) <= 3
181
-
182
- non_doc_entries = [r for r in (results.web or []) if not _is_document(r)]
183
- if non_doc_entries:
184
- all_web_text = ' '.join(_collect_texts(non_doc_entries))
185
- ai_terms = ['artificial', 'intelligence', 'ai', 'machine', 'learning']
186
- assert any(term in all_web_text for term in ai_terms)
187
-
188
- for result in results.web:
189
- assert isinstance(result, (SearchResultWeb, Document))
190
- if isinstance(result, Document):
191
- assert (result.markdown is not None) or (result.html is not None)
192
- else:
193
- assert hasattr(result, 'url')
194
- assert isinstance(result.url, str) and result.url.startswith('http')
195
-
196
- if results.news is not None:
197
- assert len(results.news) <= 3
198
- for result in results.news:
199
- assert isinstance(result, (SearchResultNews, Document))
200
- if isinstance(result, Document):
201
- assert (result.markdown is not None) or (result.html is not None)
202
- else:
203
- assert hasattr(result, 'url')
204
- assert isinstance(result.url, str) and result.url.startswith('http')
205
-
206
- assert results.images is None
207
-
208
- @pytest.mark.asyncio
209
- async def test_async_search_formats_flexibility():
210
- # Test with list format
211
- results1 = await firecrawl.search(
212
- query="python programming",
213
- limit=1,
214
- scrape_options=ScrapeOptions(
215
- formats=["markdown"]
216
- )
217
- )
218
- # Test with ScrapeFormats object
219
- results2 = await firecrawl.search(
220
- query="python programming",
221
- limit=1,
222
- scrape_options=ScrapeOptions(
223
- formats=ScrapeFormats(markdown=True)
224
- )
225
- )
226
- assert isinstance(results1, SearchData)
227
- assert isinstance(results2, SearchData)
228
- assert results1.web is not None
229
- assert results2.web is not None
230
-
231
- @pytest.mark.asyncio
232
- async def test_async_search_with_json_format_object():
233
- json_schema = {
234
- "type": "object",
235
- "properties": {
236
- "title": {"type": "string"}
237
- },
238
- "required": ["title"],
239
- }
240
- results = await firecrawl.search(
241
- query="site:docs.firecrawl.dev",
242
- limit=1,
243
- scrape_options=ScrapeOptions(
244
- formats=[{"type": "json", "prompt": "Extract page title", "schema": json_schema}]
245
- ),
246
- )
247
- assert isinstance(results, SearchData)
248
- assert results.web is not None and len(results.web) >= 0
@@ -1,35 +0,0 @@
1
- import os
2
- import pytest
3
- from dotenv import load_dotenv
4
- from firecrawl import AsyncFirecrawl
5
-
6
-
7
- load_dotenv()
8
-
9
- if not os.getenv("API_KEY"):
10
- raise ValueError("API_KEY is not set")
11
-
12
- if not os.getenv("API_URL"):
13
- raise ValueError("API_URL is not set")
14
-
15
-
16
- @pytest.mark.asyncio
17
- async def test_async_get_concurrency():
18
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
19
- conc = await client.get_concurrency()
20
- assert hasattr(conc, "concurrency") and hasattr(conc, "max_concurrency")
21
-
22
-
23
- @pytest.mark.asyncio
24
- async def test_async_get_credit_usage():
25
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
26
- credits = await client.get_credit_usage()
27
- assert hasattr(credits, "remaining_credits")
28
-
29
-
30
- @pytest.mark.asyncio
31
- async def test_async_get_token_usage():
32
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
33
- tokens = await client.get_token_usage()
34
- assert hasattr(tokens, "remaining_tokens")
35
-
@@ -1,43 +0,0 @@
1
- import os
2
- import asyncio
3
- import pytest
4
- from dotenv import load_dotenv
5
- from firecrawl import AsyncFirecrawl
6
- from firecrawl.v2.watcher_async import AsyncWatcher
7
-
8
-
9
- load_dotenv()
10
-
11
- if not os.getenv("API_KEY"):
12
- raise ValueError("API_KEY is not set")
13
-
14
- if not os.getenv("API_URL"):
15
- raise ValueError("API_URL is not set")
16
-
17
-
18
- @pytest.mark.asyncio
19
- async def test_async_watcher_crawl_progresses():
20
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
21
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=2)
22
- statuses = []
23
- async for snapshot in AsyncWatcher(client, start.id, kind="crawl", timeout=180):
24
- statuses.append(snapshot.status)
25
- if snapshot.status in ("completed", "failed"):
26
- break
27
- assert statuses and statuses[-1] in ("completed", "failed")
28
-
29
-
30
- @pytest.mark.asyncio
31
- async def test_async_watcher_batch_progresses():
32
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
33
- start = await client.start_batch_scrape([
34
- "https://docs.firecrawl.dev",
35
- "https://firecrawl.dev",
36
- ], formats=["markdown"], max_concurrency=1)
37
- statuses = []
38
- async for snapshot in AsyncWatcher(client, start.id, kind="batch", timeout=240):
39
- statuses.append(snapshot.status)
40
- if snapshot.status in ("completed", "failed", "cancelled"):
41
- break
42
- assert statuses and statuses[-1] in ("completed", "failed", "cancelled")
43
-
@@ -1,73 +0,0 @@
1
- import os
2
- import json
3
- import pytest
4
- import requests
5
- from dotenv import load_dotenv
6
-
7
- load_dotenv()
8
-
9
- def _idmux(identity_request: dict) -> dict:
10
- idmux_url = os.getenv("IDMUX_URL")
11
- if not idmux_url:
12
- raise EnvironmentError("IDMUX_URL is not set. E2E tests must use idmux for credentials.")
13
- run_number = int(os.getenv("GITHUB_RUN_NUMBER") or 0)
14
- payload = {
15
- "refName": os.getenv("GITHUB_REF_NAME") or "local",
16
- "runNumber": run_number,
17
- "concurrency": identity_request.get("concurrency", 100),
18
- **identity_request,
19
- }
20
- resp = requests.post(idmux_url + "/", json=payload)
21
- resp.raise_for_status()
22
- return resp.json()
23
-
24
- @pytest.fixture(scope="session")
25
- def api_url():
26
- # Prefer TEST_URL, then FIRECRAWL_API_URL (for parity with JS), then legacy API_URL
27
- return (
28
- os.getenv("TEST_URL")
29
- or os.getenv("FIRECRAWL_API_URL")
30
- or os.getenv("API_URL")
31
- or "https://api.firecrawl.dev"
32
- )
33
-
34
- # Resolve identity and export environment at import time so tests that read env at module import succeed
35
- _IDENTITY = None
36
- _API_URL = (
37
- os.getenv("TEST_URL")
38
- or os.getenv("FIRECRAWL_API_URL")
39
- or os.getenv("API_URL")
40
- or "https://api.firecrawl.dev"
41
- )
42
-
43
- _IDMUX_URL = os.getenv("IDMUX_URL")
44
- if _IDMUX_URL:
45
- run_name = os.getenv("PYTEST_RUN_NAME") or "py-e2e"
46
- # If IDMUX_URL is set, idmux MUST succeed; do not silently fall back
47
- _IDENTITY = _idmux({"name": run_name})
48
- os.environ["API_KEY"] = _IDENTITY.get("apiKey", "")
49
- os.environ["API_URL"] = _API_URL
50
-
51
- @pytest.fixture(scope="session")
52
- def api_identity():
53
- return _IDENTITY or {"apiKey": os.getenv("API_KEY") or "", "teamId": os.getenv("TEST_TEAM_ID") or os.getenv("TEAM_ID") or ""}
54
-
55
- @pytest.fixture(autouse=True)
56
- def _inject_client(request, api_identity, api_url):
57
- # For class-based tests that rely on self.client, inject a client if missing
58
- inst = getattr(request, "instance", None)
59
- if inst is not None and not hasattr(inst, "client"):
60
- try:
61
- from firecrawl import Firecrawl
62
- inst.client = Firecrawl(api_key=api_identity.get("apiKey", ""), api_url=api_url)
63
- except Exception:
64
- pass
65
- # For function-based modules that expect a module-level `firecrawl` symbol
66
- mod = getattr(request, "module", None)
67
- if mod is not None and not hasattr(mod, "firecrawl"):
68
- try:
69
- from firecrawl import Firecrawl
70
- setattr(mod, "firecrawl", Firecrawl(api_key=api_identity.get("apiKey", ""), api_url=api_url))
71
- except Exception:
72
- pass
73
-
@@ -1,73 +0,0 @@
1
- import os
2
- import asyncio
3
- import pytest
4
- from dotenv import load_dotenv
5
-
6
- from firecrawl import AsyncFirecrawl
7
- from firecrawl.v2.types import Document
8
-
9
-
10
- load_dotenv()
11
-
12
- if not os.getenv("API_KEY"):
13
- raise ValueError("API_KEY is not set")
14
-
15
- if not os.getenv("API_URL"):
16
- raise ValueError("API_URL is not set")
17
-
18
-
19
- @pytest.mark.asyncio
20
- async def test_async_scrape_minimal():
21
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
22
- doc = await client.scrape("https://docs.firecrawl.dev")
23
- assert isinstance(doc, Document)
24
- # Accept any primary content or alternate outputs
25
- assert doc.markdown is not None and doc.markdown and len(doc.markdown) > 0
26
-
27
-
28
- @pytest.mark.asyncio
29
- async def test_async_crawl_start_and_status():
30
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
31
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=2)
32
- job_id = start.id
33
-
34
- # Poll status until terminal or timeout
35
- deadline = asyncio.get_event_loop().time() + 180
36
- status = await client.get_crawl_status(job_id)
37
- while status.status not in ("completed", "failed") and asyncio.get_event_loop().time() < deadline:
38
- await asyncio.sleep(2)
39
- status = await client.get_crawl_status(job_id)
40
-
41
- assert status.status in ("completed", "failed")
42
-
43
-
44
- @pytest.mark.asyncio
45
- async def test_async_batch_start_and_status():
46
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
47
- start = await client.start_batch_scrape([
48
- "https://docs.firecrawl.dev",
49
- "https://firecrawl.dev",
50
- ], formats=["markdown"], max_concurrency=1)
51
- job_id = start.id
52
-
53
- deadline = asyncio.get_event_loop().time() + 240
54
- status = await client.get_batch_scrape_status(job_id)
55
- while status.status not in ("completed", "failed", "cancelled") and asyncio.get_event_loop().time() < deadline:
56
- await asyncio.sleep(2)
57
- status = await client.get_batch_scrape_status(job_id)
58
-
59
- assert status.status in ("completed", "failed", "cancelled")
60
-
61
-
62
- @pytest.mark.asyncio
63
- async def test_async_usage_minimal():
64
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
65
- conc = await client.get_concurrency()
66
- assert hasattr(conc, "concurrency") and hasattr(conc, "max_concurrency")
67
-
68
- credits = await client.get_credit_usage()
69
- assert hasattr(credits, "remaining_credits")
70
-
71
- tokens = await client.get_token_usage()
72
- assert hasattr(tokens, "remaining_tokens")
73
-