firecrawl 1.9.0__tar.gz → 1.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 1.9.0
3
+ Version: 1.10.1
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
@@ -13,7 +13,7 @@ import os
13
13
 
14
14
  from .firecrawl import FirecrawlApp # noqa
15
15
 
16
- __version__ = "1.9.0"
16
+ __version__ = "1.10.1"
17
17
 
18
18
  # Define the logger for the Firecrawl project
19
19
  logger: logging.Logger = logging.getLogger("firecrawl")
@@ -112,6 +112,18 @@ class FirecrawlApp:
112
112
  if key not in ['extract']:
113
113
  scrape_params[key] = value
114
114
 
115
+ json = params.get("jsonOptions", {})
116
+ if json:
117
+ if 'schema' in json and hasattr(json['schema'], 'schema'):
118
+ json['schema'] = json['schema'].schema()
119
+ scrape_params['jsonOptions'] = json
120
+
121
+ # Include any other params directly at the top level of scrape_params
122
+ for key, value in params.items():
123
+ if key not in ['jsonOptions']:
124
+ scrape_params[key] = value
125
+
126
+
115
127
  endpoint = f'/v1/scrape'
116
128
  # Make the POST request with the prepared headers and JSON data
117
129
  response = requests.post(
@@ -120,7 +132,10 @@ class FirecrawlApp:
120
132
  json=scrape_params,
121
133
  )
122
134
  if response.status_code == 200:
123
- response = response.json()
135
+ try:
136
+ response = response.json()
137
+ except:
138
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
124
139
  if response['success'] and 'data' in response:
125
140
  return response['data']
126
141
  elif "error" in response:
@@ -159,7 +174,10 @@ class FirecrawlApp:
159
174
  if response.status_code != 200:
160
175
  raise Exception(f"Request failed with status code {response.status_code}")
161
176
 
162
- return response.json()
177
+ try:
178
+ return response.json()
179
+ except:
180
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
163
181
 
164
182
  def crawl_url(self, url: str,
165
183
  params: Optional[Dict[str, Any]] = None,
@@ -194,7 +212,10 @@ class FirecrawlApp:
194
212
  json_data.update(params)
195
213
  response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
196
214
  if response.status_code == 200:
197
- id = response.json().get('id')
215
+ try:
216
+ id = response.json().get('id')
217
+ except:
218
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
198
219
  return self._monitor_job_status(id, headers, poll_interval)
199
220
 
200
221
  else:
@@ -223,7 +244,10 @@ class FirecrawlApp:
223
244
  json_data.update(params)
224
245
  response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
225
246
  if response.status_code == 200:
226
- return response.json()
247
+ try:
248
+ return response.json()
249
+ except:
250
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
227
251
  else:
228
252
  self._handle_error(response, 'start crawl job')
229
253
 
@@ -245,7 +269,10 @@ class FirecrawlApp:
245
269
  headers = self._prepare_headers()
246
270
  response = self._get_request(f'{self.api_url}{endpoint}', headers)
247
271
  if response.status_code == 200:
248
- status_data = response.json()
272
+ try:
273
+ status_data = response.json()
274
+ except:
275
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
249
276
  if status_data['status'] == 'completed':
250
277
  if 'data' in status_data:
251
278
  data = status_data['data']
@@ -261,7 +288,10 @@ class FirecrawlApp:
261
288
  if status_response.status_code != 200:
262
289
  logger.error(f"Failed to fetch next page: {status_response.status_code}")
263
290
  break
264
- next_data = status_response.json()
291
+ try:
292
+ next_data = status_response.json()
293
+ except:
294
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
265
295
  data.extend(next_data.get('data', []))
266
296
  status_data = next_data
267
297
  except Exception as e:
@@ -291,6 +321,26 @@ class FirecrawlApp:
291
321
  else:
292
322
  self._handle_error(response, 'check crawl status')
293
323
 
324
+ def check_crawl_errors(self, id: str) -> Dict[str, Any]:
325
+ """
326
+ Returns information about crawl errors.
327
+
328
+ Args:
329
+ id (str): The ID of the crawl job.
330
+
331
+ Returns:
332
+ Dict[str, Any]: Information about crawl errors.
333
+ """
334
+ headers = self._prepare_headers()
335
+ response = self._get_request(f'{self.api_url}/v1/crawl/{id}/errors', headers)
336
+ if response.status_code == 200:
337
+ try:
338
+ return response.json()
339
+ except:
340
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
341
+ else:
342
+ self._handle_error(response, "check crawl errors")
343
+
294
344
  def cancel_crawl(self, id: str) -> Dict[str, Any]:
295
345
  """
296
346
  Cancel an asynchronous crawl job using the Firecrawl API.
@@ -304,7 +354,10 @@ class FirecrawlApp:
304
354
  headers = self._prepare_headers()
305
355
  response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers)
306
356
  if response.status_code == 200:
307
- return response.json()
357
+ try:
358
+ return response.json()
359
+ except:
360
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
308
361
  else:
309
362
  self._handle_error(response, "cancel crawl job")
310
363
 
@@ -352,7 +405,10 @@ class FirecrawlApp:
352
405
  json=json_data,
353
406
  )
354
407
  if response.status_code == 200:
355
- response = response.json()
408
+ try:
409
+ response = response.json()
410
+ except:
411
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
356
412
  if response['success'] and 'links' in response:
357
413
  return response
358
414
  elif 'error' in response:
@@ -395,7 +451,10 @@ class FirecrawlApp:
395
451
  json_data.update(params)
396
452
  response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
397
453
  if response.status_code == 200:
398
- id = response.json().get('id')
454
+ try:
455
+ id = response.json().get('id')
456
+ except:
457
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
399
458
  return self._monitor_job_status(id, headers, poll_interval)
400
459
 
401
460
  else:
@@ -424,7 +483,10 @@ class FirecrawlApp:
424
483
  json_data.update(params)
425
484
  response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
426
485
  if response.status_code == 200:
427
- return response.json()
486
+ try:
487
+ return response.json()
488
+ except:
489
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
428
490
  else:
429
491
  self._handle_error(response, 'start batch scrape job')
430
492
 
@@ -464,7 +526,10 @@ class FirecrawlApp:
464
526
  headers = self._prepare_headers()
465
527
  response = self._get_request(f'{self.api_url}{endpoint}', headers)
466
528
  if response.status_code == 200:
467
- status_data = response.json()
529
+ try:
530
+ status_data = response.json()
531
+ except:
532
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
468
533
  if status_data['status'] == 'completed':
469
534
  if 'data' in status_data:
470
535
  data = status_data['data']
@@ -480,7 +545,10 @@ class FirecrawlApp:
480
545
  if status_response.status_code != 200:
481
546
  logger.error(f"Failed to fetch next page: {status_response.status_code}")
482
547
  break
483
- next_data = status_response.json()
548
+ try:
549
+ next_data = status_response.json()
550
+ except:
551
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
484
552
  data.extend(next_data.get('data', []))
485
553
  status_data = next_data
486
554
  except Exception as e:
@@ -510,6 +578,25 @@ class FirecrawlApp:
510
578
  else:
511
579
  self._handle_error(response, 'check batch scrape status')
512
580
 
581
+ def check_batch_scrape_errors(self, id: str) -> Dict[str, Any]:
582
+ """
583
+ Returns information about batch scrape errors.
584
+
585
+ Args:
586
+ id (str): The ID of the crawl job.
587
+
588
+ Returns:
589
+ Dict[str, Any]: Information about crawl errors.
590
+ """
591
+ headers = self._prepare_headers()
592
+ response = self._get_request(f'{self.api_url}/v1/batch/scrape/{id}/errors', headers)
593
+ if response.status_code == 200:
594
+ try:
595
+ return response.json()
596
+ except:
597
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
598
+ else:
599
+ self._handle_error(response, "check batch scrape errors")
513
600
 
514
601
  def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> Any:
515
602
  """
@@ -524,8 +611,8 @@ class FirecrawlApp:
524
611
  """
525
612
  headers = self._prepare_headers()
526
613
 
527
- if not params or not params.get('prompt'):
528
- raise ValueError("Prompt is required")
614
+ if not params or (not params.get('prompt') and not params.get('schema')):
615
+ raise ValueError("Either prompt or schema is required")
529
616
 
530
617
  schema = params.get('schema')
531
618
  if schema:
@@ -537,7 +624,8 @@ class FirecrawlApp:
537
624
  jsonData = {'urls': urls, **params}
538
625
  request_data = {
539
626
  **jsonData,
540
- 'allowExternalLinks': params.get('allow_external_links', False),
627
+ 'allowExternalLinks': params.get('allow_external_links', params.get('allowExternalLinks', False)),
628
+ 'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)),
541
629
  'schema': schema,
542
630
  'origin': 'api-sdk'
543
631
  }
@@ -550,7 +638,10 @@ class FirecrawlApp:
550
638
  headers
551
639
  )
552
640
  if response.status_code == 200:
553
- data = response.json()
641
+ try:
642
+ data = response.json()
643
+ except:
644
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
554
645
  if data['success']:
555
646
  job_id = data.get('id')
556
647
  if not job_id:
@@ -563,7 +654,10 @@ class FirecrawlApp:
563
654
  headers
564
655
  )
565
656
  if status_response.status_code == 200:
566
- status_data = status_response.json()
657
+ try:
658
+ status_data = status_response.json()
659
+ except:
660
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
567
661
  if status_data['status'] == 'completed':
568
662
  if status_data['success']:
569
663
  return status_data
@@ -601,7 +695,10 @@ class FirecrawlApp:
601
695
  try:
602
696
  response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers)
603
697
  if response.status_code == 200:
604
- return response.json()
698
+ try:
699
+ return response.json()
700
+ except:
701
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
605
702
  else:
606
703
  self._handle_error(response, "get extract status")
607
704
  except Exception as e:
@@ -641,7 +738,10 @@ class FirecrawlApp:
641
738
  try:
642
739
  response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers)
643
740
  if response.status_code == 200:
644
- return response.json()
741
+ try:
742
+ return response.json()
743
+ except:
744
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
645
745
  else:
646
746
  self._handle_error(response, "async extract")
647
747
  except Exception as e:
@@ -771,16 +871,22 @@ class FirecrawlApp:
771
871
 
772
872
  status_response = self._get_request(api_url, headers)
773
873
  if status_response.status_code == 200:
774
- status_data = status_response.json()
874
+ try:
875
+ status_data = status_response.json()
876
+ except:
877
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
775
878
  if status_data['status'] == 'completed':
776
879
  if 'data' in status_data:
777
880
  data = status_data['data']
778
881
  while 'next' in status_data:
779
- if len(status_data['data']) == 0:
780
- break
781
- status_response = self._get_request(status_data['next'], headers)
782
- status_data = status_response.json()
783
- data.extend(status_data.get('data', []))
882
+ if len(status_data['data']) == 0:
883
+ break
884
+ status_response = self._get_request(status_data['next'], headers)
885
+ try:
886
+ status_data = status_response.json()
887
+ except:
888
+ raise Exception(f'Failed to parse Firecrawl response as JSON.')
889
+ data.extend(status_data.get('data', []))
784
890
  status_data['data'] = data
785
891
  return status_data
786
892
  else:
@@ -804,8 +910,12 @@ class FirecrawlApp:
804
910
  Raises:
805
911
  Exception: An exception with a message containing the status code and error details from the response.
806
912
  """
807
- error_message = response.json().get('error', 'No error message provided.')
808
- error_details = response.json().get('details', 'No additional error details provided.')
913
+ try:
914
+ error_message = response.json().get('error', 'No error message provided.')
915
+ error_details = response.json().get('details', 'No additional error details provided.')
916
+ except:
917
+ raise requests.exceptions.HTTPError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status_code}', response=response)
918
+
809
919
 
810
920
  if response.status_code == 402:
811
921
  message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl
3
- Version: 1.9.0
3
+ Version: 1.10.1
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/mendableai/firecrawl
6
6
  Author: Mendable.ai
File without changes
File without changes
File without changes
File without changes
File without changes