firecrawl 2.1.1__py3-none-any.whl → 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl might be problematic. Click here for more details.

@@ -0,0 +1,98 @@
1
+ import unittest
2
+ from unittest.mock import patch, MagicMock
3
+ import json
4
+ import os
5
+ from firecrawl import FirecrawlApp
6
+
7
+ class TestChangeTracking(unittest.TestCase):
8
+ @patch('requests.post')
9
+ def test_change_tracking_format(self, mock_post):
10
+ mock_response = MagicMock()
11
+ mock_response.status_code = 200
12
+ mock_response.json.return_value = {
13
+ 'success': True,
14
+ 'data': {
15
+ 'markdown': 'Test markdown content',
16
+ 'changeTracking': {
17
+ 'previousScrapeAt': '2023-01-01T00:00:00Z',
18
+ 'changeStatus': 'changed',
19
+ 'visibility': 'visible'
20
+ }
21
+ }
22
+ }
23
+ mock_post.return_value = mock_response
24
+
25
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
26
+ result = app.scrape_url('https://example.com', {
27
+ 'formats': ['markdown', 'changeTracking']
28
+ })
29
+
30
+ args, kwargs = mock_post.call_args
31
+ self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
32
+
33
+ self.assertEqual(result['changeTracking']['previousScrapeAt'], '2023-01-01T00:00:00Z')
34
+ self.assertEqual(result['changeTracking']['changeStatus'], 'changed')
35
+ self.assertEqual(result['changeTracking']['visibility'], 'visible')
36
+
37
+ @patch('requests.post')
38
+ def test_change_tracking_options(self, mock_post):
39
+ mock_response = MagicMock()
40
+ mock_response.status_code = 200
41
+ mock_response.json.return_value = {
42
+ 'success': True,
43
+ 'data': {
44
+ 'markdown': 'Test markdown content',
45
+ 'changeTracking': {
46
+ 'previousScrapeAt': '2023-01-01T00:00:00Z',
47
+ 'changeStatus': 'changed',
48
+ 'visibility': 'visible',
49
+ 'diff': {
50
+ 'text': '@@ -1,1 +1,1 @@\n-old content\n+new content',
51
+ 'json': {
52
+ 'files': [{
53
+ 'from': None,
54
+ 'to': None,
55
+ 'chunks': [{
56
+ 'content': '@@ -1,1 +1,1 @@',
57
+ 'changes': [{
58
+ 'type': 'del',
59
+ 'content': '-old content',
60
+ 'del': True,
61
+ 'ln': 1
62
+ }, {
63
+ 'type': 'add',
64
+ 'content': '+new content',
65
+ 'add': True,
66
+ 'ln': 1
67
+ }]
68
+ }]
69
+ }]
70
+ }
71
+ },
72
+ 'json': {
73
+ 'title': {
74
+ 'previous': 'Old Title',
75
+ 'current': 'New Title'
76
+ }
77
+ }
78
+ }
79
+ }
80
+ }
81
+ mock_post.return_value = mock_response
82
+
83
+ app = FirecrawlApp(api_key=os.environ.get('TEST_API_KEY', 'dummy-api-key-for-testing'))
84
+ result = app.scrape_url('https://example.com', {
85
+ 'formats': ['markdown', 'changeTracking'],
86
+ 'changeTrackingOptions': {
87
+ 'modes': ['git-diff', 'json'],
88
+ 'schema': {'type': 'object', 'properties': {'title': {'type': 'string'}}}
89
+ }
90
+ })
91
+
92
+ args, kwargs = mock_post.call_args
93
+ self.assertEqual(kwargs['json']['formats'], ['markdown', 'changeTracking'])
94
+ self.assertEqual(kwargs['json']['changeTrackingOptions']['modes'], ['git-diff', 'json'])
95
+
96
+ self.assertEqual(result['changeTracking']['diff']['text'], '@@ -1,1 +1,1 @@\n-old content\n+new content')
97
+ self.assertEqual(result['changeTracking']['json']['title']['previous'], 'Old Title')
98
+ self.assertEqual(result['changeTracking']['json']['title']['current'], 'New Title')
firecrawl/__init__.py CHANGED
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions # noqa
15
15
 
16
- __version__ = "2.1.1"
16
+ __version__ = "2.1.2"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
firecrawl/firecrawl.py CHANGED
@@ -1849,24 +1849,33 @@ class FirecrawlApp:
1849
1849
  show_full_text=show_full_text,
1850
1850
  experimental_stream=experimental_stream
1851
1851
  )
1852
- if not response.get('success') or 'id' not in response:
1853
- return response
1852
+
1853
+ if not response.success or not response.id:
1854
+ return GenerateLLMsTextStatusResponse(
1855
+ success=False,
1856
+ error='Failed to start LLMs.txt generation',
1857
+ status='failed',
1858
+ expiresAt=''
1859
+ )
1854
1860
 
1855
- job_id = response['id']
1861
+ job_id = response.id
1856
1862
  while True:
1857
1863
  status = self.check_generate_llms_text_status(job_id)
1858
1864
 
1859
- if status['status'] == 'completed':
1865
+ if status.status == 'completed':
1860
1866
  return status
1861
- elif status['status'] == 'failed':
1862
- raise Exception(f'LLMs.txt generation failed. Error: {status.get("error")}')
1863
- elif status['status'] != 'processing':
1864
- break
1867
+ elif status.status == 'failed':
1868
+ return status
1869
+ elif status.status != 'processing':
1870
+ return GenerateLLMsTextStatusResponse(
1871
+ success=False,
1872
+ error='LLMs.txt generation job terminated unexpectedly',
1873
+ status='failed',
1874
+ expiresAt=''
1875
+ )
1865
1876
 
1866
1877
  time.sleep(2) # Polling interval
1867
1878
 
1868
- return {'success': False, 'error': 'LLMs.txt generation job terminated unexpectedly'}
1869
-
1870
1879
  def async_generate_llms_text(
1871
1880
  self,
1872
1881
  url: str,
@@ -1903,10 +1912,13 @@ class FirecrawlApp:
1903
1912
  json_data['origin'] = f"python-sdk@{version}"
1904
1913
 
1905
1914
  try:
1906
- response = self._post_request(f'{self.api_url}/v1/llmstxt', json_data, headers)
1907
- if response.status_code == 200:
1915
+ req = self._post_request(f'{self.api_url}/v1/llmstxt', json_data, headers)
1916
+ response = req.json()
1917
+ print("json_data", json_data)
1918
+ print("response", response)
1919
+ if response.get('success'):
1908
1920
  try:
1909
- return response.json()
1921
+ return GenerateLLMsTextResponse(**response)
1910
1922
  except:
1911
1923
  raise Exception('Failed to parse Firecrawl response as JSON.')
1912
1924
  else:
@@ -1914,7 +1926,10 @@ class FirecrawlApp:
1914
1926
  except Exception as e:
1915
1927
  raise ValueError(str(e))
1916
1928
 
1917
- return {'success': False, 'error': 'Internal server error'}
1929
+ return GenerateLLMsTextResponse(
1930
+ success=False,
1931
+ error='Internal server error'
1932
+ )
1918
1933
 
1919
1934
  def check_generate_llms_text_status(self, id: str) -> GenerateLLMsTextStatusResponse:
1920
1935
  """
@@ -1941,9 +1956,10 @@ class FirecrawlApp:
1941
1956
  response = self._get_request(f'{self.api_url}/v1/llmstxt/{id}', headers)
1942
1957
  if response.status_code == 200:
1943
1958
  try:
1944
- return response.json()
1945
- except:
1946
- raise Exception('Failed to parse Firecrawl response as JSON.')
1959
+ json_data = response.json()
1960
+ return GenerateLLMsTextStatusResponse(**json_data)
1961
+ except Exception as e:
1962
+ raise Exception(f'Failed to parse Firecrawl response as GenerateLLMsTextStatusResponse: {str(e)}')
1947
1963
  elif response.status_code == 404:
1948
1964
  raise Exception('LLMs.txt generation job not found')
1949
1965
  else:
@@ -1951,7 +1967,7 @@ class FirecrawlApp:
1951
1967
  except Exception as e:
1952
1968
  raise ValueError(str(e))
1953
1969
 
1954
- return {'success': False, 'error': 'Internal server error'}
1970
+ return GenerateLLMsTextStatusResponse(success=False, error='Internal server error', status='failed', expiresAt='')
1955
1971
 
1956
1972
  def _prepare_headers(
1957
1973
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 2.1.1
3
+ Version: 2.1.2
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -0,0 +1,26 @@
1
+ build/lib/build/lib/firecrawl/__init__.py,sha256=ULa8YPZxYyfqSWUKB3kuqDpCsteI9xpEEgoQhUDikvg,2570
2
+ build/lib/build/lib/firecrawl/firecrawl.py,sha256=_TpfO8ltda563MIg1NbtPWY1DiWRRAqM2NBcW7DmHXM,178339
3
+ build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ build/lib/build/lib/firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
5
+ build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ build/lib/build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
7
+ build/lib/build/lib/tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
8
+ build/lib/firecrawl/__init__.py,sha256=ULa8YPZxYyfqSWUKB3kuqDpCsteI9xpEEgoQhUDikvg,2570
9
+ build/lib/firecrawl/firecrawl.py,sha256=_TpfO8ltda563MIg1NbtPWY1DiWRRAqM2NBcW7DmHXM,178339
10
+ build/lib/firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ build/lib/firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
12
+ build/lib/firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ build/lib/firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
14
+ build/lib/tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
15
+ firecrawl/__init__.py,sha256=ULa8YPZxYyfqSWUKB3kuqDpCsteI9xpEEgoQhUDikvg,2570
16
+ firecrawl/firecrawl.py,sha256=_TpfO8ltda563MIg1NbtPWY1DiWRRAqM2NBcW7DmHXM,178339
17
+ firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
19
+ firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
21
+ tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
22
+ firecrawl-2.1.2.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
23
+ firecrawl-2.1.2.dist-info/METADATA,sha256=-RpAnHd-RpdV0syOIhYWzfprdpBZGibZQkdG-CHceDA,10583
24
+ firecrawl-2.1.2.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
25
+ firecrawl-2.1.2.dist-info/top_level.txt,sha256=ytN_R30g2U2qZYFyIm710Z8QeK9FO1Uwa-WPGHXyqjE,27
26
+ firecrawl-2.1.2.dist-info/RECORD,,
@@ -1,2 +1,4 @@
1
+ build
2
+ dist
1
3
  firecrawl
2
4
  tests
@@ -1,12 +0,0 @@
1
- firecrawl/__init__.py,sha256=NU9Qcom12t48ym3ovFMpCYI4-uH-Ac1jnddqSUzxEIE,2570
2
- firecrawl/firecrawl.py,sha256=bXjJKt2UAdszpoCspBOPen_2lz5ysmVWP5vDMZUbyUo,177726
3
- firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- firecrawl/__tests__/e2e_withAuth/test.py,sha256=-Fq2vPcMo0iQi4dwsUkkCd931ybDaTxMBnZbRfGdDcA,7931
5
- firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=DcCw-cohtnL-t9XPekUtRoQrgg3UCWu8Ikqudf9ory8,19880
7
- tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
8
- firecrawl-2.1.1.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
9
- firecrawl-2.1.1.dist-info/METADATA,sha256=5_5qIPtR-xSv8jAkZLqBP1i-xefxucWl3rZo2OfPsLo,10583
10
- firecrawl-2.1.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
11
- firecrawl-2.1.1.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
12
- firecrawl-2.1.1.dist-info/RECORD,,