firecrawl 1.9.0__py3-none-any.whl → 1.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- firecrawl/__init__.py +1 -1
- firecrawl/firecrawl.py +137 -27
- {firecrawl-1.9.0.dist-info → firecrawl-1.10.1.dist-info}/METADATA +1 -1
- firecrawl-1.10.1.dist-info/RECORD +11 -0
- firecrawl-1.9.0.dist-info/RECORD +0 -11
- {firecrawl-1.9.0.dist-info → firecrawl-1.10.1.dist-info}/LICENSE +0 -0
- {firecrawl-1.9.0.dist-info → firecrawl-1.10.1.dist-info}/WHEEL +0 -0
- {firecrawl-1.9.0.dist-info → firecrawl-1.10.1.dist-info}/top_level.txt +0 -0
firecrawl/__init__.py
CHANGED
firecrawl/firecrawl.py
CHANGED
|
@@ -112,6 +112,18 @@ class FirecrawlApp:
|
|
|
112
112
|
if key not in ['extract']:
|
|
113
113
|
scrape_params[key] = value
|
|
114
114
|
|
|
115
|
+
json = params.get("jsonOptions", {})
|
|
116
|
+
if json:
|
|
117
|
+
if 'schema' in json and hasattr(json['schema'], 'schema'):
|
|
118
|
+
json['schema'] = json['schema'].schema()
|
|
119
|
+
scrape_params['jsonOptions'] = json
|
|
120
|
+
|
|
121
|
+
# Include any other params directly at the top level of scrape_params
|
|
122
|
+
for key, value in params.items():
|
|
123
|
+
if key not in ['jsonOptions']:
|
|
124
|
+
scrape_params[key] = value
|
|
125
|
+
|
|
126
|
+
|
|
115
127
|
endpoint = f'/v1/scrape'
|
|
116
128
|
# Make the POST request with the prepared headers and JSON data
|
|
117
129
|
response = requests.post(
|
|
@@ -120,7 +132,10 @@ class FirecrawlApp:
|
|
|
120
132
|
json=scrape_params,
|
|
121
133
|
)
|
|
122
134
|
if response.status_code == 200:
|
|
123
|
-
|
|
135
|
+
try:
|
|
136
|
+
response = response.json()
|
|
137
|
+
except:
|
|
138
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
124
139
|
if response['success'] and 'data' in response:
|
|
125
140
|
return response['data']
|
|
126
141
|
elif "error" in response:
|
|
@@ -159,7 +174,10 @@ class FirecrawlApp:
|
|
|
159
174
|
if response.status_code != 200:
|
|
160
175
|
raise Exception(f"Request failed with status code {response.status_code}")
|
|
161
176
|
|
|
162
|
-
|
|
177
|
+
try:
|
|
178
|
+
return response.json()
|
|
179
|
+
except:
|
|
180
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
163
181
|
|
|
164
182
|
def crawl_url(self, url: str,
|
|
165
183
|
params: Optional[Dict[str, Any]] = None,
|
|
@@ -194,7 +212,10 @@ class FirecrawlApp:
|
|
|
194
212
|
json_data.update(params)
|
|
195
213
|
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
|
196
214
|
if response.status_code == 200:
|
|
197
|
-
|
|
215
|
+
try:
|
|
216
|
+
id = response.json().get('id')
|
|
217
|
+
except:
|
|
218
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
198
219
|
return self._monitor_job_status(id, headers, poll_interval)
|
|
199
220
|
|
|
200
221
|
else:
|
|
@@ -223,7 +244,10 @@ class FirecrawlApp:
|
|
|
223
244
|
json_data.update(params)
|
|
224
245
|
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
|
225
246
|
if response.status_code == 200:
|
|
226
|
-
|
|
247
|
+
try:
|
|
248
|
+
return response.json()
|
|
249
|
+
except:
|
|
250
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
227
251
|
else:
|
|
228
252
|
self._handle_error(response, 'start crawl job')
|
|
229
253
|
|
|
@@ -245,7 +269,10 @@ class FirecrawlApp:
|
|
|
245
269
|
headers = self._prepare_headers()
|
|
246
270
|
response = self._get_request(f'{self.api_url}{endpoint}', headers)
|
|
247
271
|
if response.status_code == 200:
|
|
248
|
-
|
|
272
|
+
try:
|
|
273
|
+
status_data = response.json()
|
|
274
|
+
except:
|
|
275
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
249
276
|
if status_data['status'] == 'completed':
|
|
250
277
|
if 'data' in status_data:
|
|
251
278
|
data = status_data['data']
|
|
@@ -261,7 +288,10 @@ class FirecrawlApp:
|
|
|
261
288
|
if status_response.status_code != 200:
|
|
262
289
|
logger.error(f"Failed to fetch next page: {status_response.status_code}")
|
|
263
290
|
break
|
|
264
|
-
|
|
291
|
+
try:
|
|
292
|
+
next_data = status_response.json()
|
|
293
|
+
except:
|
|
294
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
265
295
|
data.extend(next_data.get('data', []))
|
|
266
296
|
status_data = next_data
|
|
267
297
|
except Exception as e:
|
|
@@ -291,6 +321,26 @@ class FirecrawlApp:
|
|
|
291
321
|
else:
|
|
292
322
|
self._handle_error(response, 'check crawl status')
|
|
293
323
|
|
|
324
|
+
def check_crawl_errors(self, id: str) -> Dict[str, Any]:
|
|
325
|
+
"""
|
|
326
|
+
Returns information about crawl errors.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
id (str): The ID of the crawl job.
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
Dict[str, Any]: Information about crawl errors.
|
|
333
|
+
"""
|
|
334
|
+
headers = self._prepare_headers()
|
|
335
|
+
response = self._get_request(f'{self.api_url}/v1/crawl/{id}/errors', headers)
|
|
336
|
+
if response.status_code == 200:
|
|
337
|
+
try:
|
|
338
|
+
return response.json()
|
|
339
|
+
except:
|
|
340
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
341
|
+
else:
|
|
342
|
+
self._handle_error(response, "check crawl errors")
|
|
343
|
+
|
|
294
344
|
def cancel_crawl(self, id: str) -> Dict[str, Any]:
|
|
295
345
|
"""
|
|
296
346
|
Cancel an asynchronous crawl job using the Firecrawl API.
|
|
@@ -304,7 +354,10 @@ class FirecrawlApp:
|
|
|
304
354
|
headers = self._prepare_headers()
|
|
305
355
|
response = self._delete_request(f'{self.api_url}/v1/crawl/{id}', headers)
|
|
306
356
|
if response.status_code == 200:
|
|
307
|
-
|
|
357
|
+
try:
|
|
358
|
+
return response.json()
|
|
359
|
+
except:
|
|
360
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
308
361
|
else:
|
|
309
362
|
self._handle_error(response, "cancel crawl job")
|
|
310
363
|
|
|
@@ -352,7 +405,10 @@ class FirecrawlApp:
|
|
|
352
405
|
json=json_data,
|
|
353
406
|
)
|
|
354
407
|
if response.status_code == 200:
|
|
355
|
-
|
|
408
|
+
try:
|
|
409
|
+
response = response.json()
|
|
410
|
+
except:
|
|
411
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
356
412
|
if response['success'] and 'links' in response:
|
|
357
413
|
return response
|
|
358
414
|
elif 'error' in response:
|
|
@@ -395,7 +451,10 @@ class FirecrawlApp:
|
|
|
395
451
|
json_data.update(params)
|
|
396
452
|
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
|
397
453
|
if response.status_code == 200:
|
|
398
|
-
|
|
454
|
+
try:
|
|
455
|
+
id = response.json().get('id')
|
|
456
|
+
except:
|
|
457
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
399
458
|
return self._monitor_job_status(id, headers, poll_interval)
|
|
400
459
|
|
|
401
460
|
else:
|
|
@@ -424,7 +483,10 @@ class FirecrawlApp:
|
|
|
424
483
|
json_data.update(params)
|
|
425
484
|
response = self._post_request(f'{self.api_url}{endpoint}', json_data, headers)
|
|
426
485
|
if response.status_code == 200:
|
|
427
|
-
|
|
486
|
+
try:
|
|
487
|
+
return response.json()
|
|
488
|
+
except:
|
|
489
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
428
490
|
else:
|
|
429
491
|
self._handle_error(response, 'start batch scrape job')
|
|
430
492
|
|
|
@@ -464,7 +526,10 @@ class FirecrawlApp:
|
|
|
464
526
|
headers = self._prepare_headers()
|
|
465
527
|
response = self._get_request(f'{self.api_url}{endpoint}', headers)
|
|
466
528
|
if response.status_code == 200:
|
|
467
|
-
|
|
529
|
+
try:
|
|
530
|
+
status_data = response.json()
|
|
531
|
+
except:
|
|
532
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
468
533
|
if status_data['status'] == 'completed':
|
|
469
534
|
if 'data' in status_data:
|
|
470
535
|
data = status_data['data']
|
|
@@ -480,7 +545,10 @@ class FirecrawlApp:
|
|
|
480
545
|
if status_response.status_code != 200:
|
|
481
546
|
logger.error(f"Failed to fetch next page: {status_response.status_code}")
|
|
482
547
|
break
|
|
483
|
-
|
|
548
|
+
try:
|
|
549
|
+
next_data = status_response.json()
|
|
550
|
+
except:
|
|
551
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
484
552
|
data.extend(next_data.get('data', []))
|
|
485
553
|
status_data = next_data
|
|
486
554
|
except Exception as e:
|
|
@@ -510,6 +578,25 @@ class FirecrawlApp:
|
|
|
510
578
|
else:
|
|
511
579
|
self._handle_error(response, 'check batch scrape status')
|
|
512
580
|
|
|
581
|
+
def check_batch_scrape_errors(self, id: str) -> Dict[str, Any]:
|
|
582
|
+
"""
|
|
583
|
+
Returns information about batch scrape errors.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
id (str): The ID of the crawl job.
|
|
587
|
+
|
|
588
|
+
Returns:
|
|
589
|
+
Dict[str, Any]: Information about crawl errors.
|
|
590
|
+
"""
|
|
591
|
+
headers = self._prepare_headers()
|
|
592
|
+
response = self._get_request(f'{self.api_url}/v1/batch/scrape/{id}/errors', headers)
|
|
593
|
+
if response.status_code == 200:
|
|
594
|
+
try:
|
|
595
|
+
return response.json()
|
|
596
|
+
except:
|
|
597
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
598
|
+
else:
|
|
599
|
+
self._handle_error(response, "check batch scrape errors")
|
|
513
600
|
|
|
514
601
|
def extract(self, urls: List[str], params: Optional[ExtractParams] = None) -> Any:
|
|
515
602
|
"""
|
|
@@ -524,8 +611,8 @@ class FirecrawlApp:
|
|
|
524
611
|
"""
|
|
525
612
|
headers = self._prepare_headers()
|
|
526
613
|
|
|
527
|
-
if not params or not params.get('prompt'):
|
|
528
|
-
raise ValueError("
|
|
614
|
+
if not params or (not params.get('prompt') and not params.get('schema')):
|
|
615
|
+
raise ValueError("Either prompt or schema is required")
|
|
529
616
|
|
|
530
617
|
schema = params.get('schema')
|
|
531
618
|
if schema:
|
|
@@ -537,7 +624,8 @@ class FirecrawlApp:
|
|
|
537
624
|
jsonData = {'urls': urls, **params}
|
|
538
625
|
request_data = {
|
|
539
626
|
**jsonData,
|
|
540
|
-
'allowExternalLinks': params.get('allow_external_links', False),
|
|
627
|
+
'allowExternalLinks': params.get('allow_external_links', params.get('allowExternalLinks', False)),
|
|
628
|
+
'enableWebSearch': params.get('enable_web_search', params.get('enableWebSearch', False)),
|
|
541
629
|
'schema': schema,
|
|
542
630
|
'origin': 'api-sdk'
|
|
543
631
|
}
|
|
@@ -550,7 +638,10 @@ class FirecrawlApp:
|
|
|
550
638
|
headers
|
|
551
639
|
)
|
|
552
640
|
if response.status_code == 200:
|
|
553
|
-
|
|
641
|
+
try:
|
|
642
|
+
data = response.json()
|
|
643
|
+
except:
|
|
644
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
554
645
|
if data['success']:
|
|
555
646
|
job_id = data.get('id')
|
|
556
647
|
if not job_id:
|
|
@@ -563,7 +654,10 @@ class FirecrawlApp:
|
|
|
563
654
|
headers
|
|
564
655
|
)
|
|
565
656
|
if status_response.status_code == 200:
|
|
566
|
-
|
|
657
|
+
try:
|
|
658
|
+
status_data = status_response.json()
|
|
659
|
+
except:
|
|
660
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
567
661
|
if status_data['status'] == 'completed':
|
|
568
662
|
if status_data['success']:
|
|
569
663
|
return status_data
|
|
@@ -601,7 +695,10 @@ class FirecrawlApp:
|
|
|
601
695
|
try:
|
|
602
696
|
response = self._get_request(f'{self.api_url}/v1/extract/{job_id}', headers)
|
|
603
697
|
if response.status_code == 200:
|
|
604
|
-
|
|
698
|
+
try:
|
|
699
|
+
return response.json()
|
|
700
|
+
except:
|
|
701
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
605
702
|
else:
|
|
606
703
|
self._handle_error(response, "get extract status")
|
|
607
704
|
except Exception as e:
|
|
@@ -641,7 +738,10 @@ class FirecrawlApp:
|
|
|
641
738
|
try:
|
|
642
739
|
response = self._post_request(f'{self.api_url}/v1/extract', request_data, headers)
|
|
643
740
|
if response.status_code == 200:
|
|
644
|
-
|
|
741
|
+
try:
|
|
742
|
+
return response.json()
|
|
743
|
+
except:
|
|
744
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
645
745
|
else:
|
|
646
746
|
self._handle_error(response, "async extract")
|
|
647
747
|
except Exception as e:
|
|
@@ -771,16 +871,22 @@ class FirecrawlApp:
|
|
|
771
871
|
|
|
772
872
|
status_response = self._get_request(api_url, headers)
|
|
773
873
|
if status_response.status_code == 200:
|
|
774
|
-
|
|
874
|
+
try:
|
|
875
|
+
status_data = status_response.json()
|
|
876
|
+
except:
|
|
877
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
775
878
|
if status_data['status'] == 'completed':
|
|
776
879
|
if 'data' in status_data:
|
|
777
880
|
data = status_data['data']
|
|
778
881
|
while 'next' in status_data:
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
882
|
+
if len(status_data['data']) == 0:
|
|
883
|
+
break
|
|
884
|
+
status_response = self._get_request(status_data['next'], headers)
|
|
885
|
+
try:
|
|
886
|
+
status_data = status_response.json()
|
|
887
|
+
except:
|
|
888
|
+
raise Exception(f'Failed to parse Firecrawl response as JSON.')
|
|
889
|
+
data.extend(status_data.get('data', []))
|
|
784
890
|
status_data['data'] = data
|
|
785
891
|
return status_data
|
|
786
892
|
else:
|
|
@@ -804,8 +910,12 @@ class FirecrawlApp:
|
|
|
804
910
|
Raises:
|
|
805
911
|
Exception: An exception with a message containing the status code and error details from the response.
|
|
806
912
|
"""
|
|
807
|
-
|
|
808
|
-
|
|
913
|
+
try:
|
|
914
|
+
error_message = response.json().get('error', 'No error message provided.')
|
|
915
|
+
error_details = response.json().get('details', 'No additional error details provided.')
|
|
916
|
+
except:
|
|
917
|
+
raise requests.exceptions.HTTPError(f'Failed to parse Firecrawl error response as JSON. Status code: {response.status_code}', response=response)
|
|
918
|
+
|
|
809
919
|
|
|
810
920
|
if response.status_code == 402:
|
|
811
921
|
message = f"Payment Required: Failed to {action}. {error_message} - {error_details}"
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
firecrawl/__init__.py,sha256=SIBff1i-wnosNeyzLZJy204vGizDLNZgH-F2LNYScbs,2544
|
|
2
|
+
firecrawl/firecrawl.py,sha256=7hvNQSAc59MqgzLlRql0pbGpNO-VoNuK38u8IaPF20U,40629
|
|
3
|
+
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
firecrawl/__tests__/e2e_withAuth/test.py,sha256=6OawnVF4IPeGyXg_Izi3t8U7MyT90roaJBJIG5UfllM,7935
|
|
5
|
+
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=tL5kJJ4el37Wc-Z2TRSuSWwWG2M40h3VPxHYuWijD00,19888
|
|
7
|
+
firecrawl-1.10.1.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
8
|
+
firecrawl-1.10.1.dist-info/METADATA,sha256=H8PyHIE5Rjq3KbOT50IQyKLVLyH-_T34nWG3gZSQvUs,10632
|
|
9
|
+
firecrawl-1.10.1.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
10
|
+
firecrawl-1.10.1.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
|
|
11
|
+
firecrawl-1.10.1.dist-info/RECORD,,
|
firecrawl-1.9.0.dist-info/RECORD
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
firecrawl/__init__.py,sha256=5ubhPauv4SGtK_XBudjfb2AgdfGzSMetytrO2nb9QII,2543
|
|
2
|
-
firecrawl/firecrawl.py,sha256=VuSKgvzxF3G-1MWK7INR1NBae3jYx6kES-kDyqkqD40,35962
|
|
3
|
-
firecrawl/__tests__/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
firecrawl/__tests__/e2e_withAuth/test.py,sha256=6OawnVF4IPeGyXg_Izi3t8U7MyT90roaJBJIG5UfllM,7935
|
|
5
|
-
firecrawl/__tests__/v1/e2e_withAuth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
firecrawl/__tests__/v1/e2e_withAuth/test.py,sha256=tL5kJJ4el37Wc-Z2TRSuSWwWG2M40h3VPxHYuWijD00,19888
|
|
7
|
-
firecrawl-1.9.0.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
|
|
8
|
-
firecrawl-1.9.0.dist-info/METADATA,sha256=Hz7bNsZqTKMLpZ-wP3myJXRSM1MOUbCoouQy9DIk78c,10631
|
|
9
|
-
firecrawl-1.9.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
10
|
-
firecrawl-1.9.0.dist-info/top_level.txt,sha256=jTvz79zWhiyAezfmmHe4FQ-hR60C59UU5FrjMjijLu8,10
|
|
11
|
-
firecrawl-1.9.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|