firecrawl 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. firecrawl/__init__.py +87 -0
  2. firecrawl/__tests__/e2e/v2/aio/conftest.py +62 -0
  3. firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +69 -0
  4. firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +189 -0
  5. firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +39 -0
  6. firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +41 -0
  7. firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +138 -0
  8. firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +249 -0
  9. firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +42 -0
  10. firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +43 -0
  11. firecrawl/__tests__/e2e/v2/conftest.py +73 -0
  12. firecrawl/__tests__/e2e/v2/test_async.py +73 -0
  13. firecrawl/__tests__/e2e/v2/test_batch_scrape.py +106 -0
  14. firecrawl/__tests__/e2e/v2/test_crawl.py +278 -0
  15. firecrawl/__tests__/e2e/v2/test_extract.py +55 -0
  16. firecrawl/__tests__/e2e/v2/test_map.py +61 -0
  17. firecrawl/__tests__/e2e/v2/test_scrape.py +191 -0
  18. firecrawl/__tests__/e2e/v2/test_search.py +270 -0
  19. firecrawl/__tests__/e2e/v2/test_usage.py +26 -0
  20. firecrawl/__tests__/e2e/v2/test_watcher.py +65 -0
  21. firecrawl/__tests__/unit/test_recursive_schema_v1.py +1209 -0
  22. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +12 -0
  23. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +79 -0
  24. firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +12 -0
  25. firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +20 -0
  26. firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +50 -0
  27. firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +64 -0
  28. firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +28 -0
  29. firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +117 -0
  30. firecrawl/__tests__/unit/v2/methods/test_agent.py +367 -0
  31. firecrawl/__tests__/unit/v2/methods/test_agent_request_preparation.py +226 -0
  32. firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +90 -0
  33. firecrawl/__tests__/unit/v2/methods/test_branding.py +214 -0
  34. firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +70 -0
  35. firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +240 -0
  36. firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +107 -0
  37. firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +54 -0
  38. firecrawl/__tests__/unit/v2/methods/test_pagination.py +671 -0
  39. firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +109 -0
  40. firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +169 -0
  41. firecrawl/__tests__/unit/v2/methods/test_search_validation.py +236 -0
  42. firecrawl/__tests__/unit/v2/methods/test_usage_types.py +18 -0
  43. firecrawl/__tests__/unit/v2/methods/test_webhook.py +123 -0
  44. firecrawl/__tests__/unit/v2/utils/test_metadata_extras.py +94 -0
  45. firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py +22 -0
  46. firecrawl/__tests__/unit/v2/utils/test_recursive_schema.py +1133 -0
  47. firecrawl/__tests__/unit/v2/utils/test_validation.py +311 -0
  48. firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +332 -0
  49. firecrawl/client.py +281 -0
  50. firecrawl/firecrawl.backup.py +4635 -0
  51. firecrawl/types.py +167 -0
  52. firecrawl/v1/__init__.py +14 -0
  53. firecrawl/v1/client.py +5164 -0
  54. firecrawl/v2/__init__.py +4 -0
  55. firecrawl/v2/client.py +967 -0
  56. firecrawl/v2/client_async.py +408 -0
  57. firecrawl/v2/methods/agent.py +144 -0
  58. firecrawl/v2/methods/aio/__init__.py +1 -0
  59. firecrawl/v2/methods/aio/agent.py +137 -0
  60. firecrawl/v2/methods/aio/batch.py +188 -0
  61. firecrawl/v2/methods/aio/crawl.py +351 -0
  62. firecrawl/v2/methods/aio/extract.py +133 -0
  63. firecrawl/v2/methods/aio/map.py +65 -0
  64. firecrawl/v2/methods/aio/scrape.py +33 -0
  65. firecrawl/v2/methods/aio/search.py +176 -0
  66. firecrawl/v2/methods/aio/usage.py +89 -0
  67. firecrawl/v2/methods/batch.py +499 -0
  68. firecrawl/v2/methods/crawl.py +592 -0
  69. firecrawl/v2/methods/extract.py +161 -0
  70. firecrawl/v2/methods/map.py +83 -0
  71. firecrawl/v2/methods/scrape.py +64 -0
  72. firecrawl/v2/methods/search.py +215 -0
  73. firecrawl/v2/methods/usage.py +84 -0
  74. firecrawl/v2/types.py +1143 -0
  75. firecrawl/v2/utils/__init__.py +9 -0
  76. firecrawl/v2/utils/error_handler.py +107 -0
  77. firecrawl/v2/utils/get_version.py +15 -0
  78. firecrawl/v2/utils/http_client.py +178 -0
  79. firecrawl/v2/utils/http_client_async.py +69 -0
  80. firecrawl/v2/utils/normalize.py +125 -0
  81. firecrawl/v2/utils/validation.py +692 -0
  82. firecrawl/v2/watcher.py +301 -0
  83. firecrawl/v2/watcher_async.py +243 -0
  84. firecrawl-4.12.0.dist-info/METADATA +234 -0
  85. firecrawl-4.12.0.dist-info/RECORD +92 -0
  86. firecrawl-4.12.0.dist-info/WHEEL +5 -0
  87. firecrawl-4.12.0.dist-info/licenses/LICENSE +21 -0
  88. firecrawl-4.12.0.dist-info/top_level.txt +2 -0
  89. tests/test_agent_integration.py +277 -0
  90. tests/test_api_key_handling.py +44 -0
  91. tests/test_change_tracking.py +98 -0
  92. tests/test_timeout_conversion.py +117 -0
@@ -0,0 +1,44 @@
1
+ import sys
2
+ from pathlib import Path
3
+
4
+ import pytest
5
+
6
+ ROOT = Path(__file__).resolve().parents[1]
7
+ if str(ROOT) not in sys.path:
8
+ sys.path.insert(0, str(ROOT))
9
+
10
+ from firecrawl.v2.client import FirecrawlClient
11
+ from firecrawl.v2.client_async import AsyncFirecrawlClient
12
+
13
+
14
+ @pytest.fixture(autouse=True)
15
+ def clear_firecrawl_api_key_env(monkeypatch):
16
+ monkeypatch.delenv("FIRECRAWL_API_KEY", raising=False)
17
+ yield
18
+
19
+
20
+ def test_cloud_requires_api_key():
21
+ with pytest.raises(ValueError):
22
+ FirecrawlClient(api_url="https://api.firecrawl.dev")
23
+
24
+
25
+ def test_self_host_allows_missing_api_key():
26
+ client = FirecrawlClient(api_url="http://localhost:3000")
27
+ assert client.http_client.api_key is None
28
+
29
+
30
+ def test_async_cloud_requires_api_key():
31
+ with pytest.raises(ValueError):
32
+ AsyncFirecrawlClient(api_url="https://api.firecrawl.dev")
33
+
34
+
35
+ @pytest.mark.asyncio
36
+ async def test_async_self_host_allows_missing_api_key():
37
+ client = AsyncFirecrawlClient(api_url="http://localhost:3000")
38
+ try:
39
+ assert client.http_client.api_key is None
40
+ await client.async_http_client.close()
41
+ finally:
42
+ # Ensure the underlying HTTPX client is closed even if assertions fail
43
+ if not client.async_http_client._client.is_closed:
44
+ await client.async_http_client.close()
@@ -0,0 +1,98 @@
1
+ import unittest
2
+ from unittest.mock import patch, MagicMock
3
+ import json
4
+ import os
5
+ from firecrawl import FirecrawlApp
6
+
7
+ class TestChangeTracking(unittest.TestCase):
8
+ @patch('requests.post')
9
+ def test_change_tracking_format(self, mock_post):
10
+ mock_response = MagicMock()
11
+ mock_response.status_code = 200
12
+ mock_response.json.return_value = {
13
+ 'success': True,
14
+ 'data': {
15
+ 'markdown': 'Test markdown content',
16
+ 'changeTracking': {
17
+ 'previousScrapeAt': '2023-01-01T00:00:00Z',
18
+ 'changeStatus': 'changed',
19
+ 'visibility': 'visible'
20
+ }
21
+ }
22
+ }
23
+ mock_post.return_value = mock_response
24
+
25
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
26
+ result = app.scrape_url('https://example.com', {
27
+ 'formats': ['markdown', 'changeTracking']
28
+ })
29
+
30
+ args, kwargs = mock_post.call_args
31
+ self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
32
+
33
+ self.assertEqual(result['changeTracking']['previousScrapeAt'], '2023-01-01T00:00:00Z')
34
+ self.assertEqual(result['changeTracking']['changeStatus'], 'changed')
35
+ self.assertEqual(result['changeTracking']['visibility'], 'visible')
36
+
37
+ @patch('requests.post')
38
+ def test_change_tracking_options(self, mock_post):
39
+ mock_response = MagicMock()
40
+ mock_response.status_code = 200
41
+ mock_response.json.return_value = {
42
+ 'success': True,
43
+ 'data': {
44
+ 'markdown': 'Test markdown content',
45
+ 'changeTracking': {
46
+ 'previousScrapeAt': '2023-01-01T00:00:00Z',
47
+ 'changeStatus': 'changed',
48
+ 'visibility': 'visible',
49
+ 'diff': {
50
+ 'text': '@@ -1,1 +1,1 @@\n-old content\n+new content',
51
+ 'json': {
52
+ 'files': [{
53
+ 'from': None,
54
+ 'to': None,
55
+ 'chunks': [{
56
+ 'content': '@@ -1,1 +1,1 @@',
57
+ 'changes': [{
58
+ 'type': 'del',
59
+ 'content': '-old content',
60
+ 'del': True,
61
+ 'ln': 1
62
+ }, {
63
+ 'type': 'add',
64
+ 'content': '+new content',
65
+ 'add': True,
66
+ 'ln': 1
67
+ }]
68
+ }]
69
+ }]
70
+ }
71
+ },
72
+ 'json': {
73
+ 'title': {
74
+ 'previous': 'Old Title',
75
+ 'current': 'New Title'
76
+ }
77
+ }
78
+ }
79
+ }
80
+ }
81
+ mock_post.return_value = mock_response
82
+
83
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
84
+ result = app.scrape_url('https://example.com', {
85
+ 'formats': ['markdown', 'changeTracking'],
86
+ 'changeTrackingOptions': {
87
+ 'modes': ['git-diff', 'json'],
88
+ 'schema': {'type': 'object', 'properties': {'title': {'type': 'string'}}}
89
+ }
90
+ })
91
+
92
+ args, kwargs = mock_post.call_args
93
+ self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
94
+ self.assertEqual(kwargs['json']['changeTrackingOptions']['modes'], ['git-diff', 'json'])
95
+
96
+ self.assertEqual(result['changeTracking']['diff']['text'], '@@ -1,1 +1,1 @@\n-old content\n+new content')
97
+ self.assertEqual(result['changeTracking']['json']['title']['previous'], 'Old Title')
98
+ self.assertEqual(result['changeTracking']['json']['title']['current'], 'New Title')
@@ -0,0 +1,117 @@
1
+ import unittest
2
+ from unittest.mock import patch, MagicMock
3
+ import os
4
+ from firecrawl import FirecrawlApp
5
+
6
+
7
+ class TestTimeoutConversion(unittest.TestCase):
8
+
9
+ @patch('requests.post')
10
+ def test_scrape_url_timeout_conversion(self, mock_post):
11
+ mock_response = MagicMock()
12
+ mock_response.status_code = 200
13
+ mock_response.json.return_value = {
14
+ 'success': True,
15
+ 'data': {
16
+ 'markdown': 'Test content'
17
+ }
18
+ }
19
+ mock_post.return_value = mock_response
20
+
21
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
22
+ app.scrape_url('https://example.com', timeout=60000)
23
+
24
+ args, kwargs = mock_post.call_args
25
+ self.assertEqual(kwargs['timeout'], 65.0)
26
+
27
+ @patch('requests.post')
28
+ def test_scrape_url_default_timeout(self, mock_post):
29
+ mock_response = MagicMock()
30
+ mock_response.status_code = 200
31
+ mock_response.json.return_value = {
32
+ 'success': True,
33
+ 'data': {
34
+ 'markdown': 'Test content'
35
+ }
36
+ }
37
+ mock_post.return_value = mock_response
38
+
39
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
40
+ app.scrape_url('https://example.com')
41
+
42
+ args, kwargs = mock_post.call_args
43
+ self.assertEqual(kwargs['timeout'], 35.0)
44
+
45
+ @patch('requests.post')
46
+ def test_post_request_timeout_conversion(self, mock_post):
47
+ mock_response = MagicMock()
48
+ mock_response.status_code = 200
49
+ mock_post.return_value = mock_response
50
+
51
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
52
+
53
+ data = {'timeout': 30000}
54
+ headers = {'Content-Type': 'application/json'}
55
+
56
+ app._post_request('https://example.com/api', data, headers)
57
+
58
+ args, kwargs = mock_post.call_args
59
+ self.assertEqual(kwargs['timeout'], 35.0)
60
+
61
+ @patch('requests.post')
62
+ def test_post_request_default_timeout(self, mock_post):
63
+ mock_response = MagicMock()
64
+ mock_response.status_code = 200
65
+ mock_post.return_value = mock_response
66
+
67
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
68
+
69
+ data = {'timeout': 30000, 'url': 'https://example.com'}
70
+ headers = {'Content-Type': 'application/json'}
71
+
72
+ app._post_request('https://example.com/api', data, headers)
73
+
74
+ args, kwargs = mock_post.call_args
75
+ self.assertEqual(kwargs['timeout'], 35.0)
76
+
77
+ @patch('requests.post')
78
+ def test_timeout_edge_cases(self, mock_post):
79
+ mock_response = MagicMock()
80
+ mock_response.status_code = 200
81
+ mock_response.json.return_value = {
82
+ 'success': True,
83
+ 'data': {
84
+ 'markdown': 'Test content'
85
+ }
86
+ }
87
+ mock_post.return_value = mock_response
88
+
89
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
90
+
91
+ app.scrape_url('https://example.com', timeout=1000)
92
+ args, kwargs = mock_post.call_args
93
+ self.assertEqual(kwargs['timeout'], 6.0)
94
+
95
+ app.scrape_url('https://example.com', timeout=0)
96
+ args, kwargs = mock_post.call_args
97
+ self.assertEqual(kwargs['timeout'], 5.0)
98
+
99
+ @patch('requests.post')
100
+ def test_post_request_no_timeout_key(self, mock_post):
101
+ mock_response = MagicMock()
102
+ mock_response.status_code = 200
103
+ mock_post.return_value = mock_response
104
+
105
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
106
+
107
+ data = {'url': 'https://example.com'}
108
+ headers = {'Content-Type': 'application/json'}
109
+
110
+ app._post_request('https://example.com/api', data, headers)
111
+
112
+ args, kwargs = mock_post.call_args
113
+ self.assertIsNone(kwargs['timeout'])
114
+
115
+
116
+ if __name__ == '__main__':
117
+ unittest.main()