llama-cloud 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +32 -22
- llama_cloud/client.py +0 -3
- llama_cloud/resources/__init__.py +14 -8
- llama_cloud/resources/chat_apps/client.py +99 -133
- llama_cloud/resources/files/client.py +34 -6
- llama_cloud/resources/llama_extract/__init__.py +16 -2
- llama_cloud/resources/llama_extract/client.py +238 -366
- llama_cloud/resources/llama_extract/types/__init__.py +14 -3
- llama_cloud/resources/llama_extract/types/extract_agent_create_data_schema.py +9 -0
- llama_cloud/resources/llama_extract/types/{extract_agent_create_data_schema_value.py → extract_agent_create_data_schema_zero_value.py} +1 -1
- llama_cloud/resources/llama_extract/types/extract_agent_update_data_schema.py +9 -0
- llama_cloud/resources/{extraction/types/extraction_schema_create_data_schema_value.py → llama_extract/types/extract_agent_update_data_schema_zero_value.py} +1 -1
- llama_cloud/resources/llama_extract/types/extract_schema_validate_request_data_schema.py +9 -0
- llama_cloud/resources/llama_extract/types/extract_schema_validate_request_data_schema_zero_value.py +7 -0
- llama_cloud/resources/organizations/client.py +8 -12
- llama_cloud/resources/parsing/client.py +146 -18
- llama_cloud/resources/reports/client.py +30 -26
- llama_cloud/resources/retrievers/client.py +16 -4
- llama_cloud/types/__init__.py +20 -12
- llama_cloud/types/chat_app.py +11 -9
- llama_cloud/types/chat_app_response.py +12 -10
- llama_cloud/types/cloud_mongo_db_atlas_vector_search.py +1 -0
- llama_cloud/types/extract_job.py +3 -1
- llama_cloud/types/extract_job_create.py +4 -2
- llama_cloud/types/extract_job_create_data_schema_override.py +9 -0
- llama_cloud/{resources/extraction/types/extraction_schema_update_data_schema_value.py → types/extract_job_create_data_schema_override_zero_value.py} +1 -1
- llama_cloud/types/extract_resultset.py +2 -6
- llama_cloud/types/extract_run.py +5 -0
- llama_cloud/types/extract_run_data.py +11 -0
- llama_cloud/types/extract_run_data_item_value.py +5 -0
- llama_cloud/types/extract_run_data_zero_value.py +5 -0
- llama_cloud/{resources/llama_extract/types/extract_agent_update_data_schema_value.py → types/extract_run_extraction_metadata_value.py} +1 -1
- llama_cloud/types/{extraction_job.py → extract_schema_validate_response.py} +3 -6
- llama_cloud/types/extract_schema_validate_response_data_schema_value.py +7 -0
- llama_cloud/types/extract_state.py +4 -4
- llama_cloud/types/llama_extract_settings.py +4 -0
- llama_cloud/types/llama_parse_parameters.py +11 -0
- llama_cloud/types/plan.py +4 -0
- llama_cloud/types/{extraction_result.py → preset_composite_retrieval_params.py} +5 -14
- llama_cloud/types/{extraction_schema.py → report_file_info.py} +5 -9
- llama_cloud/types/report_metadata.py +2 -1
- {llama_cloud-0.1.8.dist-info → llama_cloud-0.1.10.dist-info}/METADATA +2 -1
- {llama_cloud-0.1.8.dist-info → llama_cloud-0.1.10.dist-info}/RECORD +45 -42
- {llama_cloud-0.1.8.dist-info → llama_cloud-0.1.10.dist-info}/WHEEL +1 -1
- llama_cloud/resources/extraction/__init__.py +0 -5
- llama_cloud/resources/extraction/client.py +0 -756
- llama_cloud/resources/extraction/types/__init__.py +0 -6
- llama_cloud/types/extract_job_create_data_schema_override_value.py +0 -7
- llama_cloud/types/extraction_result_data_value.py +0 -5
- llama_cloud/types/extraction_schema_data_schema_value.py +0 -7
- {llama_cloud-0.1.8.dist-info → llama_cloud-0.1.10.dist-info}/LICENSE +0 -0
|
@@ -103,8 +103,8 @@ class ParsingClient:
|
|
|
103
103
|
def upload_file(
|
|
104
104
|
self,
|
|
105
105
|
*,
|
|
106
|
-
project_id: typing.Optional[str] = None,
|
|
107
106
|
organization_id: typing.Optional[str] = None,
|
|
107
|
+
project_id: typing.Optional[str] = None,
|
|
108
108
|
file: typing.Optional[str] = OMIT,
|
|
109
109
|
annotate_links: bool,
|
|
110
110
|
auto_mode: bool,
|
|
@@ -137,6 +137,7 @@ class ParsingClient:
|
|
|
137
137
|
html_remove_navigation_elements: bool,
|
|
138
138
|
http_proxy: str,
|
|
139
139
|
input_s_3_path: str,
|
|
140
|
+
input_s_3_region: str,
|
|
140
141
|
input_url: str,
|
|
141
142
|
invalidate_cache: bool,
|
|
142
143
|
is_formatting_instruction: bool,
|
|
@@ -145,12 +146,14 @@ class ParsingClient:
|
|
|
145
146
|
max_pages: typing.Optional[int] = OMIT,
|
|
146
147
|
output_pdf_of_document: bool,
|
|
147
148
|
output_s_3_path_prefix: str,
|
|
149
|
+
output_s_3_region: str,
|
|
148
150
|
page_prefix: str,
|
|
149
151
|
page_separator: str,
|
|
150
152
|
page_suffix: str,
|
|
151
153
|
parsing_instruction: str,
|
|
152
154
|
premium_mode: bool,
|
|
153
155
|
skip_diagonal_text: bool,
|
|
156
|
+
spreadsheet_extract_sub_tables: bool,
|
|
154
157
|
structured_output: bool,
|
|
155
158
|
structured_output_json_schema: str,
|
|
156
159
|
structured_output_json_schema_name: str,
|
|
@@ -163,15 +166,23 @@ class ParsingClient:
|
|
|
163
166
|
bounding_box: str,
|
|
164
167
|
gpt_4_o_mode: bool,
|
|
165
168
|
gpt_4_o_api_key: str,
|
|
169
|
+
job_timeout_in_seconds: float,
|
|
170
|
+
job_timeout_extra_time_per_page_in_seconds: float,
|
|
171
|
+
strict_mode_image_extraction: bool,
|
|
172
|
+
strict_mode_image_ocr: bool,
|
|
173
|
+
strict_mode_reconstruction: bool,
|
|
174
|
+
strict_mode_buggy_font: bool,
|
|
175
|
+
ignore_document_elements_for_layout_detection: bool,
|
|
176
|
+
output_tables_as_html: bool,
|
|
166
177
|
) -> ParsingJob:
|
|
167
178
|
"""
|
|
168
179
|
Upload a file to s3 and create a job. return a job id
|
|
169
180
|
|
|
170
181
|
Parameters:
|
|
171
|
-
- project_id: typing.Optional[str].
|
|
172
|
-
|
|
173
182
|
- organization_id: typing.Optional[str].
|
|
174
183
|
|
|
184
|
+
- project_id: typing.Optional[str].
|
|
185
|
+
|
|
175
186
|
- file: typing.Optional[str].
|
|
176
187
|
|
|
177
188
|
- annotate_links: bool.
|
|
@@ -236,6 +247,8 @@ class ParsingClient:
|
|
|
236
247
|
|
|
237
248
|
- input_s_3_path: str.
|
|
238
249
|
|
|
250
|
+
- input_s_3_region: str.
|
|
251
|
+
|
|
239
252
|
- input_url: str.
|
|
240
253
|
|
|
241
254
|
- invalidate_cache: bool.
|
|
@@ -252,6 +265,8 @@ class ParsingClient:
|
|
|
252
265
|
|
|
253
266
|
- output_s_3_path_prefix: str.
|
|
254
267
|
|
|
268
|
+
- output_s_3_region: str.
|
|
269
|
+
|
|
255
270
|
- page_prefix: str.
|
|
256
271
|
|
|
257
272
|
- page_separator: str.
|
|
@@ -264,6 +279,8 @@ class ParsingClient:
|
|
|
264
279
|
|
|
265
280
|
- skip_diagonal_text: bool.
|
|
266
281
|
|
|
282
|
+
- spreadsheet_extract_sub_tables: bool.
|
|
283
|
+
|
|
267
284
|
- structured_output: bool.
|
|
268
285
|
|
|
269
286
|
- structured_output_json_schema: str.
|
|
@@ -287,6 +304,22 @@ class ParsingClient:
|
|
|
287
304
|
- gpt_4_o_mode: bool.
|
|
288
305
|
|
|
289
306
|
- gpt_4_o_api_key: str.
|
|
307
|
+
|
|
308
|
+
- job_timeout_in_seconds: float.
|
|
309
|
+
|
|
310
|
+
- job_timeout_extra_time_per_page_in_seconds: float.
|
|
311
|
+
|
|
312
|
+
- strict_mode_image_extraction: bool.
|
|
313
|
+
|
|
314
|
+
- strict_mode_image_ocr: bool.
|
|
315
|
+
|
|
316
|
+
- strict_mode_reconstruction: bool.
|
|
317
|
+
|
|
318
|
+
- strict_mode_buggy_font: bool.
|
|
319
|
+
|
|
320
|
+
- ignore_document_elements_for_layout_detection: bool.
|
|
321
|
+
|
|
322
|
+
- output_tables_as_html: bool.
|
|
290
323
|
"""
|
|
291
324
|
_request: typing.Dict[str, typing.Any] = {
|
|
292
325
|
"annotate_links": annotate_links,
|
|
@@ -320,6 +353,7 @@ class ParsingClient:
|
|
|
320
353
|
"html_remove_navigation_elements": html_remove_navigation_elements,
|
|
321
354
|
"http_proxy": http_proxy,
|
|
322
355
|
"input_s3_path": input_s_3_path,
|
|
356
|
+
"input_s3_region": input_s_3_region,
|
|
323
357
|
"input_url": input_url,
|
|
324
358
|
"invalidate_cache": invalidate_cache,
|
|
325
359
|
"is_formatting_instruction": is_formatting_instruction,
|
|
@@ -327,12 +361,14 @@ class ParsingClient:
|
|
|
327
361
|
"extract_layout": extract_layout,
|
|
328
362
|
"output_pdf_of_document": output_pdf_of_document,
|
|
329
363
|
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
364
|
+
"output_s3_region": output_s_3_region,
|
|
330
365
|
"page_prefix": page_prefix,
|
|
331
366
|
"page_separator": page_separator,
|
|
332
367
|
"page_suffix": page_suffix,
|
|
333
368
|
"parsing_instruction": parsing_instruction,
|
|
334
369
|
"premium_mode": premium_mode,
|
|
335
370
|
"skip_diagonal_text": skip_diagonal_text,
|
|
371
|
+
"spreadsheet_extract_sub_tables": spreadsheet_extract_sub_tables,
|
|
336
372
|
"structured_output": structured_output,
|
|
337
373
|
"structured_output_json_schema": structured_output_json_schema,
|
|
338
374
|
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
@@ -345,6 +381,14 @@ class ParsingClient:
|
|
|
345
381
|
"bounding_box": bounding_box,
|
|
346
382
|
"gpt4o_mode": gpt_4_o_mode,
|
|
347
383
|
"gpt4o_api_key": gpt_4_o_api_key,
|
|
384
|
+
"job_timeout_in_seconds": job_timeout_in_seconds,
|
|
385
|
+
"job_timeout_extra_time_per_page_in_seconds": job_timeout_extra_time_per_page_in_seconds,
|
|
386
|
+
"strict_mode_image_extraction": strict_mode_image_extraction,
|
|
387
|
+
"strict_mode_image_ocr": strict_mode_image_ocr,
|
|
388
|
+
"strict_mode_reconstruction": strict_mode_reconstruction,
|
|
389
|
+
"strict_mode_buggy_font": strict_mode_buggy_font,
|
|
390
|
+
"ignore_document_elements_for_layout_detection": ignore_document_elements_for_layout_detection,
|
|
391
|
+
"output_tables_as_HTML": output_tables_as_html,
|
|
348
392
|
}
|
|
349
393
|
if file is not OMIT:
|
|
350
394
|
_request["file"] = file
|
|
@@ -353,7 +397,7 @@ class ParsingClient:
|
|
|
353
397
|
_response = self._client_wrapper.httpx_client.request(
|
|
354
398
|
"POST",
|
|
355
399
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
|
|
356
|
-
params=remove_none_from_dict({"
|
|
400
|
+
params=remove_none_from_dict({"organization_id": organization_id, "project_id": project_id}),
|
|
357
401
|
json=jsonable_encoder(_request),
|
|
358
402
|
headers=self._client_wrapper.get_headers(),
|
|
359
403
|
timeout=60,
|
|
@@ -368,11 +412,13 @@ class ParsingClient:
|
|
|
368
412
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
369
413
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
370
414
|
|
|
371
|
-
def usage(self) -> ParsingUsage:
|
|
415
|
+
def usage(self, *, organization_id: typing.Optional[str] = None) -> ParsingUsage:
|
|
372
416
|
"""
|
|
373
417
|
DEPRECATED: use either /organizations/{organization_id}/usage or /projects/{project_id}/usage instead
|
|
374
418
|
Get parsing usage for user
|
|
375
419
|
|
|
420
|
+
Parameters:
|
|
421
|
+
- organization_id: typing.Optional[str].
|
|
376
422
|
---
|
|
377
423
|
from llama_cloud.client import LlamaCloud
|
|
378
424
|
|
|
@@ -384,6 +430,7 @@ class ParsingClient:
|
|
|
384
430
|
_response = self._client_wrapper.httpx_client.request(
|
|
385
431
|
"GET",
|
|
386
432
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/usage"),
|
|
433
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
387
434
|
headers=self._client_wrapper.get_headers(),
|
|
388
435
|
timeout=60,
|
|
389
436
|
)
|
|
@@ -461,12 +508,14 @@ class ParsingClient:
|
|
|
461
508
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
462
509
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
463
510
|
|
|
464
|
-
def get_job_text_result(self, job_id: str) -> ParsingJobTextResult:
|
|
511
|
+
def get_job_text_result(self, job_id: str, *, organization_id: typing.Optional[str] = None) -> ParsingJobTextResult:
|
|
465
512
|
"""
|
|
466
513
|
Get a job by id
|
|
467
514
|
|
|
468
515
|
Parameters:
|
|
469
516
|
- job_id: str.
|
|
517
|
+
|
|
518
|
+
- organization_id: typing.Optional[str].
|
|
470
519
|
---
|
|
471
520
|
from llama_cloud.client import LlamaCloud
|
|
472
521
|
|
|
@@ -480,6 +529,7 @@ class ParsingClient:
|
|
|
480
529
|
_response = self._client_wrapper.httpx_client.request(
|
|
481
530
|
"GET",
|
|
482
531
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/text"),
|
|
532
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
483
533
|
headers=self._client_wrapper.get_headers(),
|
|
484
534
|
timeout=60,
|
|
485
535
|
)
|
|
@@ -527,12 +577,16 @@ class ParsingClient:
|
|
|
527
577
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
528
578
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
529
579
|
|
|
530
|
-
def get_job_structured_result(
|
|
580
|
+
def get_job_structured_result(
|
|
581
|
+
self, job_id: str, *, organization_id: typing.Optional[str] = None
|
|
582
|
+
) -> ParsingJobStructuredResult:
|
|
531
583
|
"""
|
|
532
584
|
Get a job by id
|
|
533
585
|
|
|
534
586
|
Parameters:
|
|
535
587
|
- job_id: str.
|
|
588
|
+
|
|
589
|
+
- organization_id: typing.Optional[str].
|
|
536
590
|
---
|
|
537
591
|
from llama_cloud.client import LlamaCloud
|
|
538
592
|
|
|
@@ -548,6 +602,7 @@ class ParsingClient:
|
|
|
548
602
|
urllib.parse.urljoin(
|
|
549
603
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/structured"
|
|
550
604
|
),
|
|
605
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
551
606
|
headers=self._client_wrapper.get_headers(),
|
|
552
607
|
timeout=60,
|
|
553
608
|
)
|
|
@@ -629,12 +684,14 @@ class ParsingClient:
|
|
|
629
684
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
630
685
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
631
686
|
|
|
632
|
-
def get_job_result(self, job_id: str) -> ParsingJobMarkdownResult:
|
|
687
|
+
def get_job_result(self, job_id: str, *, organization_id: typing.Optional[str] = None) -> ParsingJobMarkdownResult:
|
|
633
688
|
"""
|
|
634
689
|
Get a job by id
|
|
635
690
|
|
|
636
691
|
Parameters:
|
|
637
692
|
- job_id: str.
|
|
693
|
+
|
|
694
|
+
- organization_id: typing.Optional[str].
|
|
638
695
|
---
|
|
639
696
|
from llama_cloud.client import LlamaCloud
|
|
640
697
|
|
|
@@ -650,6 +707,7 @@ class ParsingClient:
|
|
|
650
707
|
urllib.parse.urljoin(
|
|
651
708
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/markdown"
|
|
652
709
|
),
|
|
710
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
653
711
|
headers=self._client_wrapper.get_headers(),
|
|
654
712
|
timeout=60,
|
|
655
713
|
)
|
|
@@ -697,12 +755,14 @@ class ParsingClient:
|
|
|
697
755
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
698
756
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
699
757
|
|
|
700
|
-
def get_job_json_result(self, job_id: str) -> ParsingJobJsonResult:
|
|
758
|
+
def get_job_json_result(self, job_id: str, *, organization_id: typing.Optional[str] = None) -> ParsingJobJsonResult:
|
|
701
759
|
"""
|
|
702
760
|
Get a job by id
|
|
703
761
|
|
|
704
762
|
Parameters:
|
|
705
763
|
- job_id: str.
|
|
764
|
+
|
|
765
|
+
- organization_id: typing.Optional[str].
|
|
706
766
|
---
|
|
707
767
|
from llama_cloud.client import LlamaCloud
|
|
708
768
|
|
|
@@ -716,6 +776,7 @@ class ParsingClient:
|
|
|
716
776
|
_response = self._client_wrapper.httpx_client.request(
|
|
717
777
|
"GET",
|
|
718
778
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/json"),
|
|
779
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
719
780
|
headers=self._client_wrapper.get_headers(),
|
|
720
781
|
timeout=60,
|
|
721
782
|
)
|
|
@@ -899,8 +960,8 @@ class AsyncParsingClient:
|
|
|
899
960
|
async def upload_file(
|
|
900
961
|
self,
|
|
901
962
|
*,
|
|
902
|
-
project_id: typing.Optional[str] = None,
|
|
903
963
|
organization_id: typing.Optional[str] = None,
|
|
964
|
+
project_id: typing.Optional[str] = None,
|
|
904
965
|
file: typing.Optional[str] = OMIT,
|
|
905
966
|
annotate_links: bool,
|
|
906
967
|
auto_mode: bool,
|
|
@@ -933,6 +994,7 @@ class AsyncParsingClient:
|
|
|
933
994
|
html_remove_navigation_elements: bool,
|
|
934
995
|
http_proxy: str,
|
|
935
996
|
input_s_3_path: str,
|
|
997
|
+
input_s_3_region: str,
|
|
936
998
|
input_url: str,
|
|
937
999
|
invalidate_cache: bool,
|
|
938
1000
|
is_formatting_instruction: bool,
|
|
@@ -941,12 +1003,14 @@ class AsyncParsingClient:
|
|
|
941
1003
|
max_pages: typing.Optional[int] = OMIT,
|
|
942
1004
|
output_pdf_of_document: bool,
|
|
943
1005
|
output_s_3_path_prefix: str,
|
|
1006
|
+
output_s_3_region: str,
|
|
944
1007
|
page_prefix: str,
|
|
945
1008
|
page_separator: str,
|
|
946
1009
|
page_suffix: str,
|
|
947
1010
|
parsing_instruction: str,
|
|
948
1011
|
premium_mode: bool,
|
|
949
1012
|
skip_diagonal_text: bool,
|
|
1013
|
+
spreadsheet_extract_sub_tables: bool,
|
|
950
1014
|
structured_output: bool,
|
|
951
1015
|
structured_output_json_schema: str,
|
|
952
1016
|
structured_output_json_schema_name: str,
|
|
@@ -959,15 +1023,23 @@ class AsyncParsingClient:
|
|
|
959
1023
|
bounding_box: str,
|
|
960
1024
|
gpt_4_o_mode: bool,
|
|
961
1025
|
gpt_4_o_api_key: str,
|
|
1026
|
+
job_timeout_in_seconds: float,
|
|
1027
|
+
job_timeout_extra_time_per_page_in_seconds: float,
|
|
1028
|
+
strict_mode_image_extraction: bool,
|
|
1029
|
+
strict_mode_image_ocr: bool,
|
|
1030
|
+
strict_mode_reconstruction: bool,
|
|
1031
|
+
strict_mode_buggy_font: bool,
|
|
1032
|
+
ignore_document_elements_for_layout_detection: bool,
|
|
1033
|
+
output_tables_as_html: bool,
|
|
962
1034
|
) -> ParsingJob:
|
|
963
1035
|
"""
|
|
964
1036
|
Upload a file to s3 and create a job. return a job id
|
|
965
1037
|
|
|
966
1038
|
Parameters:
|
|
967
|
-
- project_id: typing.Optional[str].
|
|
968
|
-
|
|
969
1039
|
- organization_id: typing.Optional[str].
|
|
970
1040
|
|
|
1041
|
+
- project_id: typing.Optional[str].
|
|
1042
|
+
|
|
971
1043
|
- file: typing.Optional[str].
|
|
972
1044
|
|
|
973
1045
|
- annotate_links: bool.
|
|
@@ -1032,6 +1104,8 @@ class AsyncParsingClient:
|
|
|
1032
1104
|
|
|
1033
1105
|
- input_s_3_path: str.
|
|
1034
1106
|
|
|
1107
|
+
- input_s_3_region: str.
|
|
1108
|
+
|
|
1035
1109
|
- input_url: str.
|
|
1036
1110
|
|
|
1037
1111
|
- invalidate_cache: bool.
|
|
@@ -1048,6 +1122,8 @@ class AsyncParsingClient:
|
|
|
1048
1122
|
|
|
1049
1123
|
- output_s_3_path_prefix: str.
|
|
1050
1124
|
|
|
1125
|
+
- output_s_3_region: str.
|
|
1126
|
+
|
|
1051
1127
|
- page_prefix: str.
|
|
1052
1128
|
|
|
1053
1129
|
- page_separator: str.
|
|
@@ -1060,6 +1136,8 @@ class AsyncParsingClient:
|
|
|
1060
1136
|
|
|
1061
1137
|
- skip_diagonal_text: bool.
|
|
1062
1138
|
|
|
1139
|
+
- spreadsheet_extract_sub_tables: bool.
|
|
1140
|
+
|
|
1063
1141
|
- structured_output: bool.
|
|
1064
1142
|
|
|
1065
1143
|
- structured_output_json_schema: str.
|
|
@@ -1083,6 +1161,22 @@ class AsyncParsingClient:
|
|
|
1083
1161
|
- gpt_4_o_mode: bool.
|
|
1084
1162
|
|
|
1085
1163
|
- gpt_4_o_api_key: str.
|
|
1164
|
+
|
|
1165
|
+
- job_timeout_in_seconds: float.
|
|
1166
|
+
|
|
1167
|
+
- job_timeout_extra_time_per_page_in_seconds: float.
|
|
1168
|
+
|
|
1169
|
+
- strict_mode_image_extraction: bool.
|
|
1170
|
+
|
|
1171
|
+
- strict_mode_image_ocr: bool.
|
|
1172
|
+
|
|
1173
|
+
- strict_mode_reconstruction: bool.
|
|
1174
|
+
|
|
1175
|
+
- strict_mode_buggy_font: bool.
|
|
1176
|
+
|
|
1177
|
+
- ignore_document_elements_for_layout_detection: bool.
|
|
1178
|
+
|
|
1179
|
+
- output_tables_as_html: bool.
|
|
1086
1180
|
"""
|
|
1087
1181
|
_request: typing.Dict[str, typing.Any] = {
|
|
1088
1182
|
"annotate_links": annotate_links,
|
|
@@ -1116,6 +1210,7 @@ class AsyncParsingClient:
|
|
|
1116
1210
|
"html_remove_navigation_elements": html_remove_navigation_elements,
|
|
1117
1211
|
"http_proxy": http_proxy,
|
|
1118
1212
|
"input_s3_path": input_s_3_path,
|
|
1213
|
+
"input_s3_region": input_s_3_region,
|
|
1119
1214
|
"input_url": input_url,
|
|
1120
1215
|
"invalidate_cache": invalidate_cache,
|
|
1121
1216
|
"is_formatting_instruction": is_formatting_instruction,
|
|
@@ -1123,12 +1218,14 @@ class AsyncParsingClient:
|
|
|
1123
1218
|
"extract_layout": extract_layout,
|
|
1124
1219
|
"output_pdf_of_document": output_pdf_of_document,
|
|
1125
1220
|
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
1221
|
+
"output_s3_region": output_s_3_region,
|
|
1126
1222
|
"page_prefix": page_prefix,
|
|
1127
1223
|
"page_separator": page_separator,
|
|
1128
1224
|
"page_suffix": page_suffix,
|
|
1129
1225
|
"parsing_instruction": parsing_instruction,
|
|
1130
1226
|
"premium_mode": premium_mode,
|
|
1131
1227
|
"skip_diagonal_text": skip_diagonal_text,
|
|
1228
|
+
"spreadsheet_extract_sub_tables": spreadsheet_extract_sub_tables,
|
|
1132
1229
|
"structured_output": structured_output,
|
|
1133
1230
|
"structured_output_json_schema": structured_output_json_schema,
|
|
1134
1231
|
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
@@ -1141,6 +1238,14 @@ class AsyncParsingClient:
|
|
|
1141
1238
|
"bounding_box": bounding_box,
|
|
1142
1239
|
"gpt4o_mode": gpt_4_o_mode,
|
|
1143
1240
|
"gpt4o_api_key": gpt_4_o_api_key,
|
|
1241
|
+
"job_timeout_in_seconds": job_timeout_in_seconds,
|
|
1242
|
+
"job_timeout_extra_time_per_page_in_seconds": job_timeout_extra_time_per_page_in_seconds,
|
|
1243
|
+
"strict_mode_image_extraction": strict_mode_image_extraction,
|
|
1244
|
+
"strict_mode_image_ocr": strict_mode_image_ocr,
|
|
1245
|
+
"strict_mode_reconstruction": strict_mode_reconstruction,
|
|
1246
|
+
"strict_mode_buggy_font": strict_mode_buggy_font,
|
|
1247
|
+
"ignore_document_elements_for_layout_detection": ignore_document_elements_for_layout_detection,
|
|
1248
|
+
"output_tables_as_HTML": output_tables_as_html,
|
|
1144
1249
|
}
|
|
1145
1250
|
if file is not OMIT:
|
|
1146
1251
|
_request["file"] = file
|
|
@@ -1149,7 +1254,7 @@ class AsyncParsingClient:
|
|
|
1149
1254
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1150
1255
|
"POST",
|
|
1151
1256
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
|
|
1152
|
-
params=remove_none_from_dict({"
|
|
1257
|
+
params=remove_none_from_dict({"organization_id": organization_id, "project_id": project_id}),
|
|
1153
1258
|
json=jsonable_encoder(_request),
|
|
1154
1259
|
headers=self._client_wrapper.get_headers(),
|
|
1155
1260
|
timeout=60,
|
|
@@ -1164,11 +1269,13 @@ class AsyncParsingClient:
|
|
|
1164
1269
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1165
1270
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1166
1271
|
|
|
1167
|
-
async def usage(self) -> ParsingUsage:
|
|
1272
|
+
async def usage(self, *, organization_id: typing.Optional[str] = None) -> ParsingUsage:
|
|
1168
1273
|
"""
|
|
1169
1274
|
DEPRECATED: use either /organizations/{organization_id}/usage or /projects/{project_id}/usage instead
|
|
1170
1275
|
Get parsing usage for user
|
|
1171
1276
|
|
|
1277
|
+
Parameters:
|
|
1278
|
+
- organization_id: typing.Optional[str].
|
|
1172
1279
|
---
|
|
1173
1280
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1174
1281
|
|
|
@@ -1180,6 +1287,7 @@ class AsyncParsingClient:
|
|
|
1180
1287
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1181
1288
|
"GET",
|
|
1182
1289
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/usage"),
|
|
1290
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
1183
1291
|
headers=self._client_wrapper.get_headers(),
|
|
1184
1292
|
timeout=60,
|
|
1185
1293
|
)
|
|
@@ -1257,12 +1365,16 @@ class AsyncParsingClient:
|
|
|
1257
1365
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1258
1366
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1259
1367
|
|
|
1260
|
-
async def get_job_text_result(
|
|
1368
|
+
async def get_job_text_result(
|
|
1369
|
+
self, job_id: str, *, organization_id: typing.Optional[str] = None
|
|
1370
|
+
) -> ParsingJobTextResult:
|
|
1261
1371
|
"""
|
|
1262
1372
|
Get a job by id
|
|
1263
1373
|
|
|
1264
1374
|
Parameters:
|
|
1265
1375
|
- job_id: str.
|
|
1376
|
+
|
|
1377
|
+
- organization_id: typing.Optional[str].
|
|
1266
1378
|
---
|
|
1267
1379
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1268
1380
|
|
|
@@ -1276,6 +1388,7 @@ class AsyncParsingClient:
|
|
|
1276
1388
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1277
1389
|
"GET",
|
|
1278
1390
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/text"),
|
|
1391
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
1279
1392
|
headers=self._client_wrapper.get_headers(),
|
|
1280
1393
|
timeout=60,
|
|
1281
1394
|
)
|
|
@@ -1323,12 +1436,16 @@ class AsyncParsingClient:
|
|
|
1323
1436
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1324
1437
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1325
1438
|
|
|
1326
|
-
async def get_job_structured_result(
|
|
1439
|
+
async def get_job_structured_result(
|
|
1440
|
+
self, job_id: str, *, organization_id: typing.Optional[str] = None
|
|
1441
|
+
) -> ParsingJobStructuredResult:
|
|
1327
1442
|
"""
|
|
1328
1443
|
Get a job by id
|
|
1329
1444
|
|
|
1330
1445
|
Parameters:
|
|
1331
1446
|
- job_id: str.
|
|
1447
|
+
|
|
1448
|
+
- organization_id: typing.Optional[str].
|
|
1332
1449
|
---
|
|
1333
1450
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1334
1451
|
|
|
@@ -1344,6 +1461,7 @@ class AsyncParsingClient:
|
|
|
1344
1461
|
urllib.parse.urljoin(
|
|
1345
1462
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/structured"
|
|
1346
1463
|
),
|
|
1464
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
1347
1465
|
headers=self._client_wrapper.get_headers(),
|
|
1348
1466
|
timeout=60,
|
|
1349
1467
|
)
|
|
@@ -1425,12 +1543,16 @@ class AsyncParsingClient:
|
|
|
1425
1543
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1426
1544
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1427
1545
|
|
|
1428
|
-
async def get_job_result(
|
|
1546
|
+
async def get_job_result(
|
|
1547
|
+
self, job_id: str, *, organization_id: typing.Optional[str] = None
|
|
1548
|
+
) -> ParsingJobMarkdownResult:
|
|
1429
1549
|
"""
|
|
1430
1550
|
Get a job by id
|
|
1431
1551
|
|
|
1432
1552
|
Parameters:
|
|
1433
1553
|
- job_id: str.
|
|
1554
|
+
|
|
1555
|
+
- organization_id: typing.Optional[str].
|
|
1434
1556
|
---
|
|
1435
1557
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1436
1558
|
|
|
@@ -1446,6 +1568,7 @@ class AsyncParsingClient:
|
|
|
1446
1568
|
urllib.parse.urljoin(
|
|
1447
1569
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/markdown"
|
|
1448
1570
|
),
|
|
1571
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
1449
1572
|
headers=self._client_wrapper.get_headers(),
|
|
1450
1573
|
timeout=60,
|
|
1451
1574
|
)
|
|
@@ -1493,12 +1616,16 @@ class AsyncParsingClient:
|
|
|
1493
1616
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1494
1617
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1495
1618
|
|
|
1496
|
-
async def get_job_json_result(
|
|
1619
|
+
async def get_job_json_result(
|
|
1620
|
+
self, job_id: str, *, organization_id: typing.Optional[str] = None
|
|
1621
|
+
) -> ParsingJobJsonResult:
|
|
1497
1622
|
"""
|
|
1498
1623
|
Get a job by id
|
|
1499
1624
|
|
|
1500
1625
|
Parameters:
|
|
1501
1626
|
- job_id: str.
|
|
1627
|
+
|
|
1628
|
+
- organization_id: typing.Optional[str].
|
|
1502
1629
|
---
|
|
1503
1630
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1504
1631
|
|
|
@@ -1512,6 +1639,7 @@ class AsyncParsingClient:
|
|
|
1512
1639
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1513
1640
|
"GET",
|
|
1514
1641
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/json"),
|
|
1642
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
1515
1643
|
headers=self._client_wrapper.get_headers(),
|
|
1516
1644
|
timeout=60,
|
|
1517
1645
|
)
|
|
@@ -47,9 +47,10 @@ class ReportsClient:
|
|
|
47
47
|
organization_id: typing.Optional[str] = None,
|
|
48
48
|
name: str,
|
|
49
49
|
template_text: str,
|
|
50
|
-
template_instructions: str,
|
|
50
|
+
template_instructions: typing.Optional[str] = OMIT,
|
|
51
|
+
existing_retriever_id: typing.Optional[str] = OMIT,
|
|
51
52
|
files: typing.List[str],
|
|
52
|
-
template_file: typing.
|
|
53
|
+
template_file: typing.Optional[str] = OMIT,
|
|
53
54
|
) -> ReportCreateResponse:
|
|
54
55
|
"""
|
|
55
56
|
Create a new report.
|
|
@@ -63,25 +64,26 @@ class ReportsClient:
|
|
|
63
64
|
|
|
64
65
|
- template_text: str.
|
|
65
66
|
|
|
66
|
-
- template_instructions: str.
|
|
67
|
+
- template_instructions: typing.Optional[str].
|
|
68
|
+
|
|
69
|
+
- existing_retriever_id: typing.Optional[str].
|
|
67
70
|
|
|
68
71
|
- files: typing.List[str].
|
|
69
72
|
|
|
70
|
-
- template_file: typing.
|
|
73
|
+
- template_file: typing.Optional[str].
|
|
71
74
|
"""
|
|
75
|
+
_request: typing.Dict[str, typing.Any] = {"name": name, "template_text": template_text, "files": files}
|
|
76
|
+
if template_instructions is not OMIT:
|
|
77
|
+
_request["template_instructions"] = template_instructions
|
|
78
|
+
if existing_retriever_id is not OMIT:
|
|
79
|
+
_request["existing_retriever_id"] = existing_retriever_id
|
|
80
|
+
if template_file is not OMIT:
|
|
81
|
+
_request["template_file"] = template_file
|
|
72
82
|
_response = self._client_wrapper.httpx_client.request(
|
|
73
83
|
"POST",
|
|
74
84
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/reports"),
|
|
75
85
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
76
|
-
|
|
77
|
-
{
|
|
78
|
-
"name": name,
|
|
79
|
-
"template_text": template_text,
|
|
80
|
-
"template_instructions": template_instructions,
|
|
81
|
-
"files": files,
|
|
82
|
-
}
|
|
83
|
-
),
|
|
84
|
-
files={"template_file": template_file},
|
|
86
|
+
json=jsonable_encoder(_request),
|
|
85
87
|
headers=self._client_wrapper.get_headers(),
|
|
86
88
|
timeout=60,
|
|
87
89
|
)
|
|
@@ -628,9 +630,10 @@ class AsyncReportsClient:
|
|
|
628
630
|
organization_id: typing.Optional[str] = None,
|
|
629
631
|
name: str,
|
|
630
632
|
template_text: str,
|
|
631
|
-
template_instructions: str,
|
|
633
|
+
template_instructions: typing.Optional[str] = OMIT,
|
|
634
|
+
existing_retriever_id: typing.Optional[str] = OMIT,
|
|
632
635
|
files: typing.List[str],
|
|
633
|
-
template_file: typing.
|
|
636
|
+
template_file: typing.Optional[str] = OMIT,
|
|
634
637
|
) -> ReportCreateResponse:
|
|
635
638
|
"""
|
|
636
639
|
Create a new report.
|
|
@@ -644,25 +647,26 @@ class AsyncReportsClient:
|
|
|
644
647
|
|
|
645
648
|
- template_text: str.
|
|
646
649
|
|
|
647
|
-
- template_instructions: str.
|
|
650
|
+
- template_instructions: typing.Optional[str].
|
|
651
|
+
|
|
652
|
+
- existing_retriever_id: typing.Optional[str].
|
|
648
653
|
|
|
649
654
|
- files: typing.List[str].
|
|
650
655
|
|
|
651
|
-
- template_file: typing.
|
|
656
|
+
- template_file: typing.Optional[str].
|
|
652
657
|
"""
|
|
658
|
+
_request: typing.Dict[str, typing.Any] = {"name": name, "template_text": template_text, "files": files}
|
|
659
|
+
if template_instructions is not OMIT:
|
|
660
|
+
_request["template_instructions"] = template_instructions
|
|
661
|
+
if existing_retriever_id is not OMIT:
|
|
662
|
+
_request["existing_retriever_id"] = existing_retriever_id
|
|
663
|
+
if template_file is not OMIT:
|
|
664
|
+
_request["template_file"] = template_file
|
|
653
665
|
_response = await self._client_wrapper.httpx_client.request(
|
|
654
666
|
"POST",
|
|
655
667
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/reports"),
|
|
656
668
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
657
|
-
|
|
658
|
-
{
|
|
659
|
-
"name": name,
|
|
660
|
-
"template_text": template_text,
|
|
661
|
-
"template_instructions": template_instructions,
|
|
662
|
-
"files": files,
|
|
663
|
-
}
|
|
664
|
-
),
|
|
665
|
-
files={"template_file": template_file},
|
|
669
|
+
json=jsonable_encoder(_request),
|
|
666
670
|
headers=self._client_wrapper.get_headers(),
|
|
667
671
|
timeout=60,
|
|
668
672
|
)
|