firecrawl 4.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firecrawl/__init__.py +87 -0
- firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
- firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
- firecrawl/__tests__/e2e/v2/conftest.py +73 -0
- firecrawl/__tests__/e2e/v2/test_async.py +73 -0
- firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
- firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
- firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
- firecrawl/__tests__/e2e/v2/test_map.py +61 -0
- firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
- firecrawl/__tests__/e2e/v2/test_search.py +270 -0
- firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
- firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
- firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
- firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
- firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
- firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
- firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
- firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
- firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
- firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
- firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
- firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
- firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
- firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
- firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
- firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
- firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
- firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
- firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
- firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
- firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
- firecrawl/client.py +281 -0
- firecrawl/firecrawl.backup.py +4635 -0
- firecrawl/types.py +167 -0
- firecrawl/v1/__init__.py +14 -0
- firecrawl/v1/client.py +5164 -0
- firecrawl/v2/__init__.py +4 -0
- firecrawl/v2/client.py +967 -0
- firecrawl/v2/client_async.py +408 -0
- firecrawl/v2/methods/agent.py +144 -0
- firecrawl/v2/methods/aio/__init__.py +1 -0
- firecrawl/v2/methods/aio/agent.py +137 -0
- firecrawl/v2/methods/aio/batch.py +188 -0
- firecrawl/v2/methods/aio/crawl.py +351 -0
- firecrawl/v2/methods/aio/extract.py +133 -0
- firecrawl/v2/methods/aio/map.py +65 -0
- firecrawl/v2/methods/aio/scrape.py +33 -0
- firecrawl/v2/methods/aio/search.py +176 -0
- firecrawl/v2/methods/aio/usage.py +89 -0
- firecrawl/v2/methods/batch.py +499 -0
- firecrawl/v2/methods/crawl.py +592 -0
- firecrawl/v2/methods/extract.py +161 -0
- firecrawl/v2/methods/map.py +83 -0
- firecrawl/v2/methods/scrape.py +64 -0
- firecrawl/v2/methods/search.py +215 -0
- firecrawl/v2/methods/usage.py +84 -0
- firecrawl/v2/types.py +1143 -0
- firecrawl/v2/utils/__init__.py +9 -0
- firecrawl/v2/utils/error_handler.py +107 -0
- firecrawl/v2/utils/get_version.py +15 -0
- firecrawl/v2/utils/http_client.py +178 -0
- firecrawl/v2/utils/http_client_async.py +69 -0
- firecrawl/v2/utils/normalize.py +125 -0
- firecrawl/v2/utils/validation.py +692 -0
- firecrawl/v2/watcher.py +301 -0
- firecrawl/v2/watcher_async.py +243 -0
- firecrawl-4.12.0.dist-info/METADATA +234 -0
- firecrawl-4.12.0.dist-info/RECORD +92 -0
- firecrawl-4.12.0.dist-info/WHEEL +5 -0
- firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
- firecrawl-4.12.0.dist-info/top_level.txt +2 -0
- tests/test_agent_integration.py +277 -0
- tests/test_api_key_handling.py +44 -0
- tests/test_change_tracking.py +98 -0
- tests/test_timeout_conversion.py +117 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
ROOT = Path(__file__).resolve().parents[1]
|
|
7
|
+
if str(ROOT) not in sys.path:
|
|
8
|
+
sys.path.insert(0, str(ROOT))
|
|
9
|
+
|
|
10
|
+
from firecrawl.v2.client import FirecrawlClient
|
|
11
|
+
from firecrawl.v2.client_async import AsyncFirecrawlClient
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.fixture(autouse=True)
|
|
15
|
+
def clear_firecrawl_api_key_env(monkeypatch):
|
|
16
|
+
monkeypatch.delenv("FIRECRAWL_API_KEY", raising=False)
|
|
17
|
+
yield
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_cloud_requires_api_key():
|
|
21
|
+
with pytest.raises(ValueError):
|
|
22
|
+
FirecrawlClient(api_url="https://api.firecrawl.dev")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_self_host_allows_missing_api_key():
|
|
26
|
+
client = FirecrawlClient(api_url="http://localhost:3000")
|
|
27
|
+
assert client.http_client.api_key is None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_async_cloud_requires_api_key():
|
|
31
|
+
with pytest.raises(ValueError):
|
|
32
|
+
AsyncFirecrawlClient(api_url="https://api.firecrawl.dev")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@pytest.mark.asyncio
|
|
36
|
+
async def test_async_self_host_allows_missing_api_key():
|
|
37
|
+
client = AsyncFirecrawlClient(api_url="http://localhost:3000")
|
|
38
|
+
try:
|
|
39
|
+
assert client.http_client.api_key is None
|
|
40
|
+
await client.async_http_client.close()
|
|
41
|
+
finally:
|
|
42
|
+
# Ensure the underlying HTTPX client is closed even if assertions fail
|
|
43
|
+
if not client.async_http_client._client.is_closed:
|
|
44
|
+
await client.async_http_client.close()
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
from unittest.mock import patch, MagicMock
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from firecrawl import FirecrawlApp
|
|
6
|
+
|
|
7
|
+
class TestChangeTracking(unittest.TestCase):
|
|
8
|
+
@patch('requests.post')
|
|
9
|
+
def test_change_tracking_format(self, mock_post):
|
|
10
|
+
mock_response = MagicMock()
|
|
11
|
+
mock_response.status_code = 200
|
|
12
|
+
mock_response.json.return_value = {
|
|
13
|
+
'success': True,
|
|
14
|
+
'data': {
|
|
15
|
+
'markdown': 'Test markdown content',
|
|
16
|
+
'changeTracking': {
|
|
17
|
+
'previousScrapeAt': '2023-01-01T00:00:00Z',
|
|
18
|
+
'changeStatus': 'changed',
|
|
19
|
+
'visibility': 'visible'
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
mock_post.return_value = mock_response
|
|
24
|
+
|
|
25
|
+
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
|
|
26
|
+
result = app.scrape_url('https://example.com', {
|
|
27
|
+
'formats': ['markdown', 'changeTracking']
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
args, kwargs = mock_post.call_args
|
|
31
|
+
self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
|
|
32
|
+
|
|
33
|
+
self.assertEqual(result['changeTracking']['previousScrapeAt'], '2023-01-01T00:00:00Z')
|
|
34
|
+
self.assertEqual(result['changeTracking']['changeStatus'], 'changed')
|
|
35
|
+
self.assertEqual(result['changeTracking']['visibility'], 'visible')
|
|
36
|
+
|
|
37
|
+
@patch('requests.post')
|
|
38
|
+
def test_change_tracking_options(self, mock_post):
|
|
39
|
+
mock_response = MagicMock()
|
|
40
|
+
mock_response.status_code = 200
|
|
41
|
+
mock_response.json.return_value = {
|
|
42
|
+
'success': True,
|
|
43
|
+
'data': {
|
|
44
|
+
'markdown': 'Test markdown content',
|
|
45
|
+
'changeTracking': {
|
|
46
|
+
'previousScrapeAt': '2023-01-01T00:00:00Z',
|
|
47
|
+
'changeStatus': 'changed',
|
|
48
|
+
'visibility': 'visible',
|
|
49
|
+
'diff': {
|
|
50
|
+
'text': '@@ -1,1 +1,1 @@\n-old content\n+new content',
|
|
51
|
+
'json': {
|
|
52
|
+
'files': [{
|
|
53
|
+
'from': None,
|
|
54
|
+
'to': None,
|
|
55
|
+
'chunks': [{
|
|
56
|
+
'content': '@@ -1,1 +1,1 @@',
|
|
57
|
+
'changes': [{
|
|
58
|
+
'type': 'del',
|
|
59
|
+
'content': '-old content',
|
|
60
|
+
'del': True,
|
|
61
|
+
'ln': 1
|
|
62
|
+
}, {
|
|
63
|
+
'type': 'add',
|
|
64
|
+
'content': '+new content',
|
|
65
|
+
'add': True,
|
|
66
|
+
'ln': 1
|
|
67
|
+
}]
|
|
68
|
+
}]
|
|
69
|
+
}]
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
'json': {
|
|
73
|
+
'title': {
|
|
74
|
+
'previous': 'Old Title',
|
|
75
|
+
'current': 'New Title'
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
mock_post.return_value = mock_response
|
|
82
|
+
|
|
83
|
+
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
|
|
84
|
+
result = app.scrape_url('https://example.com', {
|
|
85
|
+
'formats': ['markdown', 'changeTracking'],
|
|
86
|
+
'changeTrackingOptions': {
|
|
87
|
+
'modes': ['git-diff', 'json'],
|
|
88
|
+
'schema': {'type': 'object', 'properties': {'title': {'type': 'string'}}}
|
|
89
|
+
}
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
args, kwargs = mock_post.call_args
|
|
93
|
+
self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
|
|
94
|
+
self.assertEqual(kwargs['json']['changeTrackingOptions']['modes'], ['git-diff', 'json'])
|
|
95
|
+
|
|
96
|
+
self.assertEqual(result['changeTracking']['diff']['text'], '@@ -1,1 +1,1 @@\n-old content\n+new content')
|
|
97
|
+
self.assertEqual(result['changeTracking']['json']['title']['previous'], 'Old Title')
|
|
98
|
+
self.assertEqual(result['changeTracking']['json']['title']['current'], 'New Title')
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
from unittest.mock import patch, MagicMock
|
|
3
|
+
import os
|
|
4
|
+
from firecrawl import FirecrawlApp
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TestTimeoutConversion(unittest.TestCase):
|
|
8
|
+
|
|
9
|
+
@patch('requests.post')
|
|
10
|
+
def test_scrape_url_timeout_conversion(self, mock_post):
|
|
11
|
+
mock_response = MagicMock()
|
|
12
|
+
mock_response.status_code = 200
|
|
13
|
+
mock_response.json.return_value = {
|
|
14
|
+
'success': True,
|
|
15
|
+
'data': {
|
|
16
|
+
'markdown': 'Test content'
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
mock_post.return_value = mock_response
|
|
20
|
+
|
|
21
|
+
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
|
|
22
|
+
app.scrape_url('https://example.com', timeout=60000)
|
|
23
|
+
|
|
24
|
+
args, kwargs = mock_post.call_args
|
|
25
|
+
self.assertEqual(kwargs['timeout'], 65.0)
|
|
26
|
+
|
|
27
|
+
@patch('requests.post')
|
|
28
|
+
def test_scrape_url_default_timeout(self, mock_post):
|
|
29
|
+
mock_response = MagicMock()
|
|
30
|
+
mock_response.status_code = 200
|
|
31
|
+
mock_response.json.return_value = {
|
|
32
|
+
'success': True,
|
|
33
|
+
'data': {
|
|
34
|
+
'markdown': 'Test content'
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
mock_post.return_value = mock_response
|
|
38
|
+
|
|
39
|
+
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
|
|
40
|
+
app.scrape_url('https://example.com')
|
|
41
|
+
|
|
42
|
+
args, kwargs = mock_post.call_args
|
|
43
|
+
self.assertEqual(kwargs['timeout'], 35.0)
|
|
44
|
+
|
|
45
|
+
@patch('requests.post')
|
|
46
|
+
def test_post_request_timeout_conversion(self, mock_post):
|
|
47
|
+
mock_response = MagicMock()
|
|
48
|
+
mock_response.status_code = 200
|
|
49
|
+
mock_post.return_value = mock_response
|
|
50
|
+
|
|
51
|
+
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
|
|
52
|
+
|
|
53
|
+
data = {'timeout': 30000}
|
|
54
|
+
headers = {'Content-Type': 'application/json'}
|
|
55
|
+
|
|
56
|
+
app._post_request('https://example.com/api', data, headers)
|
|
57
|
+
|
|
58
|
+
args, kwargs = mock_post.call_args
|
|
59
|
+
self.assertEqual(kwargs['timeout'], 35.0)
|
|
60
|
+
|
|
61
|
+
@patch('requests.post')
|
|
62
|
+
def test_post_request_default_timeout(self, mock_post):
|
|
63
|
+
mock_response = MagicMock()
|
|
64
|
+
mock_response.status_code = 200
|
|
65
|
+
mock_post.return_value = mock_response
|
|
66
|
+
|
|
67
|
+
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
|
|
68
|
+
|
|
69
|
+
data = {'timeout': 30000, 'url': 'https://example.com'}
|
|
70
|
+
headers = {'Content-Type': 'application/json'}
|
|
71
|
+
|
|
72
|
+
app._post_request('https://example.com/api', data, headers)
|
|
73
|
+
|
|
74
|
+
args, kwargs = mock_post.call_args
|
|
75
|
+
self.assertEqual(kwargs['timeout'], 35.0)
|
|
76
|
+
|
|
77
|
+
@patch('requests.post')
|
|
78
|
+
def test_timeout_edge_cases(self, mock_post):
|
|
79
|
+
mock_response = MagicMock()
|
|
80
|
+
mock_response.status_code = 200
|
|
81
|
+
mock_response.json.return_value = {
|
|
82
|
+
'success': True,
|
|
83
|
+
'data': {
|
|
84
|
+
'markdown': 'Test content'
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
mock_post.return_value = mock_response
|
|
88
|
+
|
|
89
|
+
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
|
|
90
|
+
|
|
91
|
+
app.scrape_url('https://example.com', timeout=1000)
|
|
92
|
+
args, kwargs = mock_post.call_args
|
|
93
|
+
self.assertEqual(kwargs['timeout'], 6.0)
|
|
94
|
+
|
|
95
|
+
app.scrape_url('https://example.com', timeout=0)
|
|
96
|
+
args, kwargs = mock_post.call_args
|
|
97
|
+
self.assertEqual(kwargs['timeout'], 5.0)
|
|
98
|
+
|
|
99
|
+
@patch('requests.post')
|
|
100
|
+
def test_post_request_no_timeout_key(self, mock_post):
|
|
101
|
+
mock_response = MagicMock()
|
|
102
|
+
mock_response.status_code = 200
|
|
103
|
+
mock_post.return_value = mock_response
|
|
104
|
+
|
|
105
|
+
app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
|
|
106
|
+
|
|
107
|
+
data = {'url': 'https://example.com'}
|
|
108
|
+
headers = {'Content-Type': 'application/json'}
|
|
109
|
+
|
|
110
|
+
app._post_request('https://example.com/api', data, headers)
|
|
111
|
+
|
|
112
|
+
args, kwargs = mock_post.call_args
|
|
113
|
+
self.assertIsNone(kwargs['timeout'])
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
if __name__ == '__main__':
|
|
117
|
+
unittest.main()
|