firecrawl 1.14.1__tar.gz → 1.16.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 1.14.1
3
+ Version: 1.16.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp # noqa
15
15
 
16
- __version__ = "1.14.1"
16
+ __version__ = "1.16.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -48,6 +48,8 @@ class DeepResearchParams(pydantic.BaseModel):
48
48
  maxDepth: Optional[int] = 7
49
49
  timeLimit: Optional[int] = 270
50
50
  maxUrls: Optional[int] = 20
51
+ analysisPrompt: Optional[str] = None
52
+ systemPrompt: Optional[str] = None
51
53
  __experimental_streamSteps: Optional[bool] = None
52
54
 
53
55
  class DeepResearchResponse(pydantic.BaseModel):
@@ -73,6 +75,16 @@ class DeepResearchStatusResponse(pydantic.BaseModel):
73
75
  sources: List[Dict[str, Any]]
74
76
  summaries: List[str]
75
77
 
78
+ class ChangeTrackingData(pydantic.BaseModel):
79
+ """
80
+ Data for the change tracking format.
81
+ """
82
+ previousScrapeAt: Optional[str] = None
83
+ changeStatus: str # "new" | "same" | "changed" | "removed"
84
+ visibility: str # "visible" | "hidden"
85
+ diff: Optional[Dict[str, Any]] = None
86
+ json: Optional[Any] = None
87
+
76
88
  class FirecrawlApp:
77
89
  class SearchResponse(pydantic.BaseModel):
78
90
  """
@@ -165,9 +177,13 @@ class FirecrawlApp:
165
177
  json['schema'] = json['schema'].schema()
166
178
  scrape_params['jsonOptions'] = json
167
179
 
180
+ change_tracking = params.get("changeTrackingOptions", {})
181
+ if change_tracking:
182
+ scrape_params['changeTrackingOptions'] = change_tracking
183
+
168
184
  # Include any other params directly at the top level of scrape_params
169
185
  for key, value in params.items():
170
- if key not in ['jsonOptions']:
186
+ if key not in ['jsonOptions', 'changeTrackingOptions']:
171
187
  scrape_params[key] = value
172
188
 
173
189
 
@@ -1098,6 +1114,8 @@ class FirecrawlApp:
1098
1114
 
1099
1115
  if response.status_code == 402:
1100
1116
  message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
1117
+ elif response.status_code == 403:
1118
+ message = f"Website Not Supported: Failed to {action}. {error_message} - {error_details}"
1101
1119
  elif response.status_code == 408:
1102
1120
  message = f"Request Timeout: Failed to {action} as the request timed out. {error_message} - {error_details}"
1103
1121
  elif response.status_code == 409:
@@ -1168,7 +1186,6 @@ class FirecrawlApp:
1168
1186
  time.sleep(2) # Polling interval
1169
1187
 
1170
1188
  return {'success': False, 'error': 'Deep research job terminated unexpectedly'}
1171
-
1172
1189
  def async_deep_research(self, query: str, params: Optional[Union[Dict[str, Any], DeepResearchParams]] = None) -> Dict[str, Any]:
1173
1190
  """
1174
1191
  Initiates an asynchronous deep research operation.
@@ -1192,8 +1209,15 @@ class FirecrawlApp:
1192
1209
  research_params = params
1193
1210
 
1194
1211
  headers = self._prepare_headers()
1212
+
1195
1213
  json_data = {'query': query, **research_params.dict(exclude_none=True)}
1196
1214
 
1215
+ # Handle json options schema if present
1216
+ if 'jsonOptions' in json_data:
1217
+ json_opts = json_data['jsonOptions']
1218
+ if json_opts and 'schema' in json_opts and hasattr(json_opts['schema'], 'schema'):
1219
+ json_data['jsonOptions']['schema'] = json_opts['schema'].schema()
1220
+
1197
1221
  try:
1198
1222
  response = self._post_request(f'{self.api_url}/v1/deep-research', json_data, headers)
1199
1223
  if response.status_code == 200:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 1.14.1
3
+ Version: 1.16.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -12,4 +12,5 @@ firecrawl.egg-info/top_level.txt
12
12
  firecrawl/__tests__/e2e_withAuth/__init__.py
13
13
  firecrawl/__tests__/e2e_withAuth/test.py
14
14
  firecrawl/__tests__/v1/e2e_withAuth/__init__.py
15
- firecrawl/__tests__/v1/e2e_withAuth/test.py
15
+ firecrawl/__tests__/v1/e2e_withAuth/test.py
16
+ tests/test_change_tracking.py
@@ -0,0 +1,98 @@
1
+ import unittest
2
+ from unittest.mock import patch, MagicMock
3
+ import json
4
+ import os
5
+ from firecrawl import FirecrawlApp
6
+
7
+ class TestChangeTracking(unittest.TestCase):
8
+ @patch('requests.post')
9
+ def test_change_tracking_format(self, mock_post):
10
+ mock_response = MagicMock()
11
+ mock_response.status_code = 200
12
+ mock_response.json.return_value = {
13
+ 'success': True,
14
+ 'data': {
15
+ 'markdown': 'Test markdown content',
16
+ 'changeTracking': {
17
+ 'previousScrapeAt': '2023-01-01T00:00:00Z',
18
+ 'changeStatus': 'changed',
19
+ 'visibility': 'visible'
20
+ }
21
+ }
22
+ }
23
+ mock_post.return_value = mock_response
24
+
25
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
26
+ result = app.scrape_url('https://example.com', {
27
+ 'formats': ['markdown', 'changeTracking']
28
+ })
29
+
30
+ args, kwargs = mock_post.call_args
31
+ self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
32
+
33
+ self.assertEqual(result['changeTracking']['previousScrapeAt'], '2023-01-01T00:00:00Z')
34
+ self.assertEqual(result['changeTracking']['changeStatus'], 'changed')
35
+ self.assertEqual(result['changeTracking']['visibility'], 'visible')
36
+
37
+ @patch('requests.post')
38
+ def test_change_tracking_options(self, mock_post):
39
+ mock_response = MagicMock()
40
+ mock_response.status_code = 200
41
+ mock_response.json.return_value = {
42
+ 'success': True,
43
+ 'data': {
44
+ 'markdown': 'Test markdown content',
45
+ 'changeTracking': {
46
+ 'previousScrapeAt': '2023-01-01T00:00:00Z',
47
+ 'changeStatus': 'changed',
48
+ 'visibility': 'visible',
49
+ 'diff': {
50
+ 'text': '@@ -1,1 +1,1 @@\n-old content\n+new content',
51
+ 'json': {
52
+ 'files': [{
53
+ 'from': None,
54
+ 'to': None,
55
+ 'chunks': [{
56
+ 'content': '@@ -1,1 +1,1 @@',
57
+ 'changes': [{
58
+ 'type': 'del',
59
+ 'content': '-old content',
60
+ 'del': True,
61
+ 'ln': 1
62
+ }, {
63
+ 'type': 'add',
64
+ 'content': '+new content',
65
+ 'add': True,
66
+ 'ln': 1
67
+ }]
68
+ }]
69
+ }]
70
+ }
71
+ },
72
+ 'json': {
73
+ 'title': {
74
+ 'previous': 'Old Title',
75
+ 'current': 'New Title'
76
+ }
77
+ }
78
+ }
79
+ }
80
+ }
81
+ mock_post.return_value = mock_response
82
+
83
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
84
+ result = app.scrape_url('https://example.com', {
85
+ 'formats': ['markdown', 'changeTracking'],
86
+ 'changeTrackingOptions': {
87
+ 'modes': ['git-diff', 'json'],
88
+ 'schema': {'type': 'object', 'properties': {'title': {'type': 'string'}}}
89
+ }
90
+ })
91
+
92
+ args, kwargs = mock_post.call_args
93
+ self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
94
+ self.assertEqual(kwargs['json']['changeTrackingOptions']['modes'], ['git-diff', 'json'])
95
+
96
+ self.assertEqual(result['changeTracking']['diff']['text'], '@@ -1,1 +1,1 @@\n-old content\n+new content')
97
+ self.assertEqual(result['changeTracking']['json']['title']['previous'], 'Old Title')
98
+ self.assertEqual(result['changeTracking']['json']['title']['current'], 'New Title')
File without changes
File without changes
File without changes
File without changes
File without changes