firecrawl 1.15.0__tar.gz → 1.17.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 1.15.0
3
+ Version: 1.17.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp # noqa
15
15
 
16
- __version__ = "1.15.0"
16
+ __version__ = "1.17.0"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -75,6 +75,16 @@ class DeepResearchStatusResponse(pydantic.BaseModel):
75
75
  sources: List[Dict[str, Any]]
76
76
  summaries: List[str]
77
77
 
78
+ class ChangeTrackingData(pydantic.BaseModel):
79
+ """
80
+ Data for the change tracking format.
81
+ """
82
+ previousScrapeAt: Optional[str] = None
83
+ changeStatus: str # "new" | "same" | "changed" | "removed"
84
+ visibility: str # "visible" | "hidden"
85
+ diff: Optional[Dict[str, Any]] = None
86
+ json: Optional[Any] = None
87
+
78
88
  class FirecrawlApp:
79
89
  class SearchResponse(pydantic.BaseModel):
80
90
  """
@@ -97,6 +107,7 @@ class FirecrawlApp:
97
107
  # Just for backwards compatibility
98
108
  enableWebSearch: Optional[bool] = False
99
109
  show_sources: Optional[bool] = False
110
+ agent: Optional[Dict[str, Any]] = None
100
111
 
101
112
 
102
113
 
@@ -167,10 +178,18 @@ class FirecrawlApp:
167
178
  json['schema'] = json['schema'].schema()
168
179
  scrape_params['jsonOptions'] = json
169
180
 
181
+ change_tracking = params.get("changeTrackingOptions", {})
182
+ if change_tracking:
183
+ scrape_params['changeTrackingOptions'] = change_tracking
184
+
170
185
  # Include any other params directly at the top level of scrape_params
171
186
  for key, value in params.items():
172
- if key not in ['jsonOptions']:
187
+ if key not in ['jsonOptions', 'changeTrackingOptions', 'agent']:
173
188
  scrape_params[key] = value
189
+
190
+ agent = params.get('agent')
191
+ if agent:
192
+ scrape_params['agent'] = agent
174
193
 
175
194
 
176
195
  endpoint = f'/v1/scrape'
@@ -692,6 +711,9 @@ class FirecrawlApp:
692
711
  request_data['systemPrompt'] = params['system_prompt']
693
712
  elif params.get('systemPrompt'): # Check legacy field name
694
713
  request_data['systemPrompt'] = params['systemPrompt']
714
+
715
+ if params.get('agent'):
716
+ request_data['agent'] = params['agent']
695
717
 
696
718
  try:
697
719
  # Send the initial extract request
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 1.15.0
3
+ Version: 1.17.0
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -12,4 +12,5 @@ firecrawl.egg-info/top_level.txt
12
12
  firecrawl/__tests__/e2e_withAuth/__init__.py
13
13
  firecrawl/__tests__/e2e_withAuth/test.py
14
14
  firecrawl/__tests__/v1/e2e_withAuth/__init__.py
15
- firecrawl/__tests__/v1/e2e_withAuth/test.py
15
+ firecrawl/__tests__/v1/e2e_withAuth/test.py
16
+ tests/test_change_tracking.py
@@ -0,0 +1,98 @@
1
+ import unittest
2
+ from unittest.mock import patch, MagicMock
3
+ import json
4
+ import os
5
+ from firecrawl import FirecrawlApp
6
+
7
+ class TestChangeTracking(unittest.TestCase):
8
+ @patch('requests.post')
9
+ def test_change_tracking_format(self, mock_post):
10
+ mock_response = MagicMock()
11
+ mock_response.status_code = 200
12
+ mock_response.json.return_value = {
13
+ 'success': True,
14
+ 'data': {
15
+ 'markdown': 'Test markdown content',
16
+ 'changeTracking': {
17
+ 'previousScrapeAt': '2023-01-01T00:00:00Z',
18
+ 'changeStatus': 'changed',
19
+ 'visibility': 'visible'
20
+ }
21
+ }
22
+ }
23
+ mock_post.return_value = mock_response
24
+
25
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
26
+ result = app.scrape_url('https://example.com', {
27
+ 'formats': ['markdown', 'changeTracking']
28
+ })
29
+
30
+ args, kwargs = mock_post.call_args
31
+ self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
32
+
33
+ self.assertEqual(result['changeTracking']['previousScrapeAt'], '2023-01-01T00:00:00Z')
34
+ self.assertEqual(result['changeTracking']['changeStatus'], 'changed')
35
+ self.assertEqual(result['changeTracking']['visibility'], 'visible')
36
+
37
+ @patch('requests.post')
38
+ def test_change_tracking_options(self, mock_post):
39
+ mock_response = MagicMock()
40
+ mock_response.status_code = 200
41
+ mock_response.json.return_value = {
42
+ 'success': True,
43
+ 'data': {
44
+ 'markdown': 'Test markdown content',
45
+ 'changeTracking': {
46
+ 'previousScrapeAt': '2023-01-01T00:00:00Z',
47
+ 'changeStatus': 'changed',
48
+ 'visibility': 'visible',
49
+ 'diff': {
50
+ 'text': '@@ -1,1 +1,1 @@\n-old content\n+new content',
51
+ 'json': {
52
+ 'files': [{
53
+ 'from': None,
54
+ 'to': None,
55
+ 'chunks': [{
56
+ 'content': '@@ -1,1 +1,1 @@',
57
+ 'changes': [{
58
+ 'type': 'del',
59
+ 'content': '-old content',
60
+ 'del': True,
61
+ 'ln': 1
62
+ }, {
63
+ 'type': 'add',
64
+ 'content': '+new content',
65
+ 'add': True,
66
+ 'ln': 1
67
+ }]
68
+ }]
69
+ }]
70
+ }
71
+ },
72
+ 'json': {
73
+ 'title': {
74
+ 'previous': 'Old Title',
75
+ 'current': 'New Title'
76
+ }
77
+ }
78
+ }
79
+ }
80
+ }
81
+ mock_post.return_value = mock_response
82
+
83
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
84
+ result = app.scrape_url('https://example.com', {
85
+ 'formats': ['markdown', 'changeTracking'],
86
+ 'changeTrackingOptions': {
87
+ 'modes': ['git-diff', 'json'],
88
+ 'schema': {'type': 'object', 'properties': {'title': {'type': 'string'}}}
89
+ }
90
+ })
91
+
92
+ args, kwargs = mock_post.call_args
93
+ self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
94
+ self.assertEqual(kwargs['json']['changeTrackingOptions']['modes'], ['git-diff', 'json'])
95
+
96
+ self.assertEqual(result['changeTracking']['diff']['text'], '@@ -1,1 +1,1 @@\n-old content\n+new content')
97
+ self.assertEqual(result['changeTracking']['json']['title']['previous'], 'Old Title')
98
+ self.assertEqual(result['changeTracking']['json']['title']['current'], 'New Title')
File without changes
File without changes
File without changes
File without changes
File without changes