llama-cloud 0.1.7a1__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +16 -16
- llama_cloud/client.py +0 -3
- llama_cloud/resources/__init__.py +0 -5
- llama_cloud/resources/chat_apps/client.py +10 -0
- llama_cloud/resources/files/client.py +305 -6
- llama_cloud/resources/llama_extract/client.py +376 -276
- llama_cloud/resources/parsing/client.py +106 -18
- llama_cloud/resources/reports/client.py +4 -4
- llama_cloud/types/__init__.py +16 -10
- llama_cloud/types/composite_retrieval_result.py +2 -2
- llama_cloud/types/{extraction_job.py → composite_retrieved_text_node_with_score.py} +5 -6
- llama_cloud/types/extract_job.py +3 -0
- llama_cloud/types/extract_resultset.py +2 -6
- llama_cloud/types/extract_run.py +54 -0
- llama_cloud/types/{extraction_result_data_value.py → extract_run_data_schema_value.py} +1 -1
- llama_cloud/types/extract_run_data_value.py +5 -0
- llama_cloud/types/{extraction_schema_data_schema_value.py → extract_run_extraction_metadata_value.py} +1 -1
- llama_cloud/types/extract_state.py +29 -0
- llama_cloud/types/{extraction_result.py → llama_extract_settings.py} +12 -11
- llama_cloud/types/llama_parse_parameters.py +6 -0
- llama_cloud/types/{extraction_schema.py → page_figure_metadata.py} +7 -12
- llama_cloud/types/report_metadata.py +4 -0
- {llama_cloud-0.1.7a1.dist-info → llama_cloud-0.1.9.dist-info}/METADATA +2 -1
- {llama_cloud-0.1.7a1.dist-info → llama_cloud-0.1.9.dist-info}/RECORD +26 -28
- {llama_cloud-0.1.7a1.dist-info → llama_cloud-0.1.9.dist-info}/WHEEL +1 -1
- llama_cloud/resources/extraction/__init__.py +0 -5
- llama_cloud/resources/extraction/client.py +0 -756
- llama_cloud/resources/extraction/types/__init__.py +0 -6
- llama_cloud/resources/extraction/types/extraction_schema_create_data_schema_value.py +0 -7
- llama_cloud/resources/extraction/types/extraction_schema_update_data_schema_value.py +0 -7
- {llama_cloud-0.1.7a1.dist-info → llama_cloud-0.1.9.dist-info}/LICENSE +0 -0
|
@@ -103,8 +103,8 @@ class ParsingClient:
|
|
|
103
103
|
def upload_file(
|
|
104
104
|
self,
|
|
105
105
|
*,
|
|
106
|
-
project_id: typing.Optional[str] = None,
|
|
107
106
|
organization_id: typing.Optional[str] = None,
|
|
107
|
+
project_id: typing.Optional[str] = None,
|
|
108
108
|
file: typing.Optional[str] = OMIT,
|
|
109
109
|
annotate_links: bool,
|
|
110
110
|
auto_mode: bool,
|
|
@@ -120,6 +120,8 @@ class ParsingClient:
|
|
|
120
120
|
bbox_left: float,
|
|
121
121
|
bbox_right: float,
|
|
122
122
|
bbox_top: float,
|
|
123
|
+
complemental_formatting_instruction: str,
|
|
124
|
+
content_guideline_instruction: str,
|
|
123
125
|
continuous_mode: bool,
|
|
124
126
|
disable_ocr: bool,
|
|
125
127
|
disable_reconstruction: bool,
|
|
@@ -128,12 +130,14 @@ class ParsingClient:
|
|
|
128
130
|
do_not_unroll_columns: bool,
|
|
129
131
|
extract_charts: bool,
|
|
130
132
|
fast_mode: bool,
|
|
133
|
+
formatting_instruction: str,
|
|
131
134
|
guess_xlsx_sheet_name: bool,
|
|
132
135
|
html_make_all_elements_visible: bool,
|
|
133
136
|
html_remove_fixed_elements: bool,
|
|
134
137
|
html_remove_navigation_elements: bool,
|
|
135
138
|
http_proxy: str,
|
|
136
139
|
input_s_3_path: str,
|
|
140
|
+
input_s_3_region: str,
|
|
137
141
|
input_url: str,
|
|
138
142
|
invalidate_cache: bool,
|
|
139
143
|
is_formatting_instruction: bool,
|
|
@@ -142,12 +146,14 @@ class ParsingClient:
|
|
|
142
146
|
max_pages: typing.Optional[int] = OMIT,
|
|
143
147
|
output_pdf_of_document: bool,
|
|
144
148
|
output_s_3_path_prefix: str,
|
|
149
|
+
output_s_3_region: str,
|
|
145
150
|
page_prefix: str,
|
|
146
151
|
page_separator: str,
|
|
147
152
|
page_suffix: str,
|
|
148
153
|
parsing_instruction: str,
|
|
149
154
|
premium_mode: bool,
|
|
150
155
|
skip_diagonal_text: bool,
|
|
156
|
+
spreadsheet_extract_sub_tables: bool,
|
|
151
157
|
structured_output: bool,
|
|
152
158
|
structured_output_json_schema: str,
|
|
153
159
|
structured_output_json_schema_name: str,
|
|
@@ -165,10 +171,10 @@ class ParsingClient:
|
|
|
165
171
|
Upload a file to s3 and create a job. return a job id
|
|
166
172
|
|
|
167
173
|
Parameters:
|
|
168
|
-
- project_id: typing.Optional[str].
|
|
169
|
-
|
|
170
174
|
- organization_id: typing.Optional[str].
|
|
171
175
|
|
|
176
|
+
- project_id: typing.Optional[str].
|
|
177
|
+
|
|
172
178
|
- file: typing.Optional[str].
|
|
173
179
|
|
|
174
180
|
- annotate_links: bool.
|
|
@@ -199,6 +205,10 @@ class ParsingClient:
|
|
|
199
205
|
|
|
200
206
|
- bbox_top: float.
|
|
201
207
|
|
|
208
|
+
- complemental_formatting_instruction: str.
|
|
209
|
+
|
|
210
|
+
- content_guideline_instruction: str.
|
|
211
|
+
|
|
202
212
|
- continuous_mode: bool.
|
|
203
213
|
|
|
204
214
|
- disable_ocr: bool.
|
|
@@ -215,6 +225,8 @@ class ParsingClient:
|
|
|
215
225
|
|
|
216
226
|
- fast_mode: bool.
|
|
217
227
|
|
|
228
|
+
- formatting_instruction: str.
|
|
229
|
+
|
|
218
230
|
- guess_xlsx_sheet_name: bool.
|
|
219
231
|
|
|
220
232
|
- html_make_all_elements_visible: bool.
|
|
@@ -227,6 +239,8 @@ class ParsingClient:
|
|
|
227
239
|
|
|
228
240
|
- input_s_3_path: str.
|
|
229
241
|
|
|
242
|
+
- input_s_3_region: str.
|
|
243
|
+
|
|
230
244
|
- input_url: str.
|
|
231
245
|
|
|
232
246
|
- invalidate_cache: bool.
|
|
@@ -243,6 +257,8 @@ class ParsingClient:
|
|
|
243
257
|
|
|
244
258
|
- output_s_3_path_prefix: str.
|
|
245
259
|
|
|
260
|
+
- output_s_3_region: str.
|
|
261
|
+
|
|
246
262
|
- page_prefix: str.
|
|
247
263
|
|
|
248
264
|
- page_separator: str.
|
|
@@ -255,6 +271,8 @@ class ParsingClient:
|
|
|
255
271
|
|
|
256
272
|
- skip_diagonal_text: bool.
|
|
257
273
|
|
|
274
|
+
- spreadsheet_extract_sub_tables: bool.
|
|
275
|
+
|
|
258
276
|
- structured_output: bool.
|
|
259
277
|
|
|
260
278
|
- structured_output_json_schema: str.
|
|
@@ -294,6 +312,8 @@ class ParsingClient:
|
|
|
294
312
|
"bbox_left": bbox_left,
|
|
295
313
|
"bbox_right": bbox_right,
|
|
296
314
|
"bbox_top": bbox_top,
|
|
315
|
+
"complemental_formatting_instruction": complemental_formatting_instruction,
|
|
316
|
+
"content_guideline_instruction": content_guideline_instruction,
|
|
297
317
|
"continuous_mode": continuous_mode,
|
|
298
318
|
"disable_ocr": disable_ocr,
|
|
299
319
|
"disable_reconstruction": disable_reconstruction,
|
|
@@ -302,12 +322,14 @@ class ParsingClient:
|
|
|
302
322
|
"do_not_unroll_columns": do_not_unroll_columns,
|
|
303
323
|
"extract_charts": extract_charts,
|
|
304
324
|
"fast_mode": fast_mode,
|
|
325
|
+
"formatting_instruction": formatting_instruction,
|
|
305
326
|
"guess_xlsx_sheet_name": guess_xlsx_sheet_name,
|
|
306
327
|
"html_make_all_elements_visible": html_make_all_elements_visible,
|
|
307
328
|
"html_remove_fixed_elements": html_remove_fixed_elements,
|
|
308
329
|
"html_remove_navigation_elements": html_remove_navigation_elements,
|
|
309
330
|
"http_proxy": http_proxy,
|
|
310
331
|
"input_s3_path": input_s_3_path,
|
|
332
|
+
"input_s3_region": input_s_3_region,
|
|
311
333
|
"input_url": input_url,
|
|
312
334
|
"invalidate_cache": invalidate_cache,
|
|
313
335
|
"is_formatting_instruction": is_formatting_instruction,
|
|
@@ -315,12 +337,14 @@ class ParsingClient:
|
|
|
315
337
|
"extract_layout": extract_layout,
|
|
316
338
|
"output_pdf_of_document": output_pdf_of_document,
|
|
317
339
|
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
340
|
+
"output_s3_region": output_s_3_region,
|
|
318
341
|
"page_prefix": page_prefix,
|
|
319
342
|
"page_separator": page_separator,
|
|
320
343
|
"page_suffix": page_suffix,
|
|
321
344
|
"parsing_instruction": parsing_instruction,
|
|
322
345
|
"premium_mode": premium_mode,
|
|
323
346
|
"skip_diagonal_text": skip_diagonal_text,
|
|
347
|
+
"spreadsheet_extract_sub_tables": spreadsheet_extract_sub_tables,
|
|
324
348
|
"structured_output": structured_output,
|
|
325
349
|
"structured_output_json_schema": structured_output_json_schema,
|
|
326
350
|
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
@@ -341,7 +365,7 @@ class ParsingClient:
|
|
|
341
365
|
_response = self._client_wrapper.httpx_client.request(
|
|
342
366
|
"POST",
|
|
343
367
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
|
|
344
|
-
params=remove_none_from_dict({"
|
|
368
|
+
params=remove_none_from_dict({"organization_id": organization_id, "project_id": project_id}),
|
|
345
369
|
json=jsonable_encoder(_request),
|
|
346
370
|
headers=self._client_wrapper.get_headers(),
|
|
347
371
|
timeout=60,
|
|
@@ -356,11 +380,13 @@ class ParsingClient:
|
|
|
356
380
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
357
381
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
358
382
|
|
|
359
|
-
def usage(self) -> ParsingUsage:
|
|
383
|
+
def usage(self, *, organization_id: typing.Optional[str] = None) -> ParsingUsage:
|
|
360
384
|
"""
|
|
361
385
|
DEPRECATED: use either /organizations/{organization_id}/usage or /projects/{project_id}/usage instead
|
|
362
386
|
Get parsing usage for user
|
|
363
387
|
|
|
388
|
+
Parameters:
|
|
389
|
+
- organization_id: typing.Optional[str].
|
|
364
390
|
---
|
|
365
391
|
from llama_cloud.client import LlamaCloud
|
|
366
392
|
|
|
@@ -372,6 +398,7 @@ class ParsingClient:
|
|
|
372
398
|
_response = self._client_wrapper.httpx_client.request(
|
|
373
399
|
"GET",
|
|
374
400
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/usage"),
|
|
401
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
375
402
|
headers=self._client_wrapper.get_headers(),
|
|
376
403
|
timeout=60,
|
|
377
404
|
)
|
|
@@ -449,12 +476,14 @@ class ParsingClient:
|
|
|
449
476
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
450
477
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
451
478
|
|
|
452
|
-
def get_job_text_result(self, job_id: str) -> ParsingJobTextResult:
|
|
479
|
+
def get_job_text_result(self, job_id: str, *, organization_id: typing.Optional[str] = None) -> ParsingJobTextResult:
|
|
453
480
|
"""
|
|
454
481
|
Get a job by id
|
|
455
482
|
|
|
456
483
|
Parameters:
|
|
457
484
|
- job_id: str.
|
|
485
|
+
|
|
486
|
+
- organization_id: typing.Optional[str].
|
|
458
487
|
---
|
|
459
488
|
from llama_cloud.client import LlamaCloud
|
|
460
489
|
|
|
@@ -468,6 +497,7 @@ class ParsingClient:
|
|
|
468
497
|
_response = self._client_wrapper.httpx_client.request(
|
|
469
498
|
"GET",
|
|
470
499
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/text"),
|
|
500
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
471
501
|
headers=self._client_wrapper.get_headers(),
|
|
472
502
|
timeout=60,
|
|
473
503
|
)
|
|
@@ -515,12 +545,16 @@ class ParsingClient:
|
|
|
515
545
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
516
546
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
517
547
|
|
|
518
|
-
def get_job_structured_result(
|
|
548
|
+
def get_job_structured_result(
|
|
549
|
+
self, job_id: str, *, organization_id: typing.Optional[str] = None
|
|
550
|
+
) -> ParsingJobStructuredResult:
|
|
519
551
|
"""
|
|
520
552
|
Get a job by id
|
|
521
553
|
|
|
522
554
|
Parameters:
|
|
523
555
|
- job_id: str.
|
|
556
|
+
|
|
557
|
+
- organization_id: typing.Optional[str].
|
|
524
558
|
---
|
|
525
559
|
from llama_cloud.client import LlamaCloud
|
|
526
560
|
|
|
@@ -536,6 +570,7 @@ class ParsingClient:
|
|
|
536
570
|
urllib.parse.urljoin(
|
|
537
571
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/structured"
|
|
538
572
|
),
|
|
573
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
539
574
|
headers=self._client_wrapper.get_headers(),
|
|
540
575
|
timeout=60,
|
|
541
576
|
)
|
|
@@ -617,12 +652,14 @@ class ParsingClient:
|
|
|
617
652
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
618
653
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
619
654
|
|
|
620
|
-
def get_job_result(self, job_id: str) -> ParsingJobMarkdownResult:
|
|
655
|
+
def get_job_result(self, job_id: str, *, organization_id: typing.Optional[str] = None) -> ParsingJobMarkdownResult:
|
|
621
656
|
"""
|
|
622
657
|
Get a job by id
|
|
623
658
|
|
|
624
659
|
Parameters:
|
|
625
660
|
- job_id: str.
|
|
661
|
+
|
|
662
|
+
- organization_id: typing.Optional[str].
|
|
626
663
|
---
|
|
627
664
|
from llama_cloud.client import LlamaCloud
|
|
628
665
|
|
|
@@ -638,6 +675,7 @@ class ParsingClient:
|
|
|
638
675
|
urllib.parse.urljoin(
|
|
639
676
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/markdown"
|
|
640
677
|
),
|
|
678
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
641
679
|
headers=self._client_wrapper.get_headers(),
|
|
642
680
|
timeout=60,
|
|
643
681
|
)
|
|
@@ -685,12 +723,14 @@ class ParsingClient:
|
|
|
685
723
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
686
724
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
687
725
|
|
|
688
|
-
def get_job_json_result(self, job_id: str) -> ParsingJobJsonResult:
|
|
726
|
+
def get_job_json_result(self, job_id: str, *, organization_id: typing.Optional[str] = None) -> ParsingJobJsonResult:
|
|
689
727
|
"""
|
|
690
728
|
Get a job by id
|
|
691
729
|
|
|
692
730
|
Parameters:
|
|
693
731
|
- job_id: str.
|
|
732
|
+
|
|
733
|
+
- organization_id: typing.Optional[str].
|
|
694
734
|
---
|
|
695
735
|
from llama_cloud.client import LlamaCloud
|
|
696
736
|
|
|
@@ -704,6 +744,7 @@ class ParsingClient:
|
|
|
704
744
|
_response = self._client_wrapper.httpx_client.request(
|
|
705
745
|
"GET",
|
|
706
746
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/json"),
|
|
747
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
707
748
|
headers=self._client_wrapper.get_headers(),
|
|
708
749
|
timeout=60,
|
|
709
750
|
)
|
|
@@ -887,8 +928,8 @@ class AsyncParsingClient:
|
|
|
887
928
|
async def upload_file(
|
|
888
929
|
self,
|
|
889
930
|
*,
|
|
890
|
-
project_id: typing.Optional[str] = None,
|
|
891
931
|
organization_id: typing.Optional[str] = None,
|
|
932
|
+
project_id: typing.Optional[str] = None,
|
|
892
933
|
file: typing.Optional[str] = OMIT,
|
|
893
934
|
annotate_links: bool,
|
|
894
935
|
auto_mode: bool,
|
|
@@ -904,6 +945,8 @@ class AsyncParsingClient:
|
|
|
904
945
|
bbox_left: float,
|
|
905
946
|
bbox_right: float,
|
|
906
947
|
bbox_top: float,
|
|
948
|
+
complemental_formatting_instruction: str,
|
|
949
|
+
content_guideline_instruction: str,
|
|
907
950
|
continuous_mode: bool,
|
|
908
951
|
disable_ocr: bool,
|
|
909
952
|
disable_reconstruction: bool,
|
|
@@ -912,12 +955,14 @@ class AsyncParsingClient:
|
|
|
912
955
|
do_not_unroll_columns: bool,
|
|
913
956
|
extract_charts: bool,
|
|
914
957
|
fast_mode: bool,
|
|
958
|
+
formatting_instruction: str,
|
|
915
959
|
guess_xlsx_sheet_name: bool,
|
|
916
960
|
html_make_all_elements_visible: bool,
|
|
917
961
|
html_remove_fixed_elements: bool,
|
|
918
962
|
html_remove_navigation_elements: bool,
|
|
919
963
|
http_proxy: str,
|
|
920
964
|
input_s_3_path: str,
|
|
965
|
+
input_s_3_region: str,
|
|
921
966
|
input_url: str,
|
|
922
967
|
invalidate_cache: bool,
|
|
923
968
|
is_formatting_instruction: bool,
|
|
@@ -926,12 +971,14 @@ class AsyncParsingClient:
|
|
|
926
971
|
max_pages: typing.Optional[int] = OMIT,
|
|
927
972
|
output_pdf_of_document: bool,
|
|
928
973
|
output_s_3_path_prefix: str,
|
|
974
|
+
output_s_3_region: str,
|
|
929
975
|
page_prefix: str,
|
|
930
976
|
page_separator: str,
|
|
931
977
|
page_suffix: str,
|
|
932
978
|
parsing_instruction: str,
|
|
933
979
|
premium_mode: bool,
|
|
934
980
|
skip_diagonal_text: bool,
|
|
981
|
+
spreadsheet_extract_sub_tables: bool,
|
|
935
982
|
structured_output: bool,
|
|
936
983
|
structured_output_json_schema: str,
|
|
937
984
|
structured_output_json_schema_name: str,
|
|
@@ -949,10 +996,10 @@ class AsyncParsingClient:
|
|
|
949
996
|
Upload a file to s3 and create a job. return a job id
|
|
950
997
|
|
|
951
998
|
Parameters:
|
|
952
|
-
- project_id: typing.Optional[str].
|
|
953
|
-
|
|
954
999
|
- organization_id: typing.Optional[str].
|
|
955
1000
|
|
|
1001
|
+
- project_id: typing.Optional[str].
|
|
1002
|
+
|
|
956
1003
|
- file: typing.Optional[str].
|
|
957
1004
|
|
|
958
1005
|
- annotate_links: bool.
|
|
@@ -983,6 +1030,10 @@ class AsyncParsingClient:
|
|
|
983
1030
|
|
|
984
1031
|
- bbox_top: float.
|
|
985
1032
|
|
|
1033
|
+
- complemental_formatting_instruction: str.
|
|
1034
|
+
|
|
1035
|
+
- content_guideline_instruction: str.
|
|
1036
|
+
|
|
986
1037
|
- continuous_mode: bool.
|
|
987
1038
|
|
|
988
1039
|
- disable_ocr: bool.
|
|
@@ -999,6 +1050,8 @@ class AsyncParsingClient:
|
|
|
999
1050
|
|
|
1000
1051
|
- fast_mode: bool.
|
|
1001
1052
|
|
|
1053
|
+
- formatting_instruction: str.
|
|
1054
|
+
|
|
1002
1055
|
- guess_xlsx_sheet_name: bool.
|
|
1003
1056
|
|
|
1004
1057
|
- html_make_all_elements_visible: bool.
|
|
@@ -1011,6 +1064,8 @@ class AsyncParsingClient:
|
|
|
1011
1064
|
|
|
1012
1065
|
- input_s_3_path: str.
|
|
1013
1066
|
|
|
1067
|
+
- input_s_3_region: str.
|
|
1068
|
+
|
|
1014
1069
|
- input_url: str.
|
|
1015
1070
|
|
|
1016
1071
|
- invalidate_cache: bool.
|
|
@@ -1027,6 +1082,8 @@ class AsyncParsingClient:
|
|
|
1027
1082
|
|
|
1028
1083
|
- output_s_3_path_prefix: str.
|
|
1029
1084
|
|
|
1085
|
+
- output_s_3_region: str.
|
|
1086
|
+
|
|
1030
1087
|
- page_prefix: str.
|
|
1031
1088
|
|
|
1032
1089
|
- page_separator: str.
|
|
@@ -1039,6 +1096,8 @@ class AsyncParsingClient:
|
|
|
1039
1096
|
|
|
1040
1097
|
- skip_diagonal_text: bool.
|
|
1041
1098
|
|
|
1099
|
+
- spreadsheet_extract_sub_tables: bool.
|
|
1100
|
+
|
|
1042
1101
|
- structured_output: bool.
|
|
1043
1102
|
|
|
1044
1103
|
- structured_output_json_schema: str.
|
|
@@ -1078,6 +1137,8 @@ class AsyncParsingClient:
|
|
|
1078
1137
|
"bbox_left": bbox_left,
|
|
1079
1138
|
"bbox_right": bbox_right,
|
|
1080
1139
|
"bbox_top": bbox_top,
|
|
1140
|
+
"complemental_formatting_instruction": complemental_formatting_instruction,
|
|
1141
|
+
"content_guideline_instruction": content_guideline_instruction,
|
|
1081
1142
|
"continuous_mode": continuous_mode,
|
|
1082
1143
|
"disable_ocr": disable_ocr,
|
|
1083
1144
|
"disable_reconstruction": disable_reconstruction,
|
|
@@ -1086,12 +1147,14 @@ class AsyncParsingClient:
|
|
|
1086
1147
|
"do_not_unroll_columns": do_not_unroll_columns,
|
|
1087
1148
|
"extract_charts": extract_charts,
|
|
1088
1149
|
"fast_mode": fast_mode,
|
|
1150
|
+
"formatting_instruction": formatting_instruction,
|
|
1089
1151
|
"guess_xlsx_sheet_name": guess_xlsx_sheet_name,
|
|
1090
1152
|
"html_make_all_elements_visible": html_make_all_elements_visible,
|
|
1091
1153
|
"html_remove_fixed_elements": html_remove_fixed_elements,
|
|
1092
1154
|
"html_remove_navigation_elements": html_remove_navigation_elements,
|
|
1093
1155
|
"http_proxy": http_proxy,
|
|
1094
1156
|
"input_s3_path": input_s_3_path,
|
|
1157
|
+
"input_s3_region": input_s_3_region,
|
|
1095
1158
|
"input_url": input_url,
|
|
1096
1159
|
"invalidate_cache": invalidate_cache,
|
|
1097
1160
|
"is_formatting_instruction": is_formatting_instruction,
|
|
@@ -1099,12 +1162,14 @@ class AsyncParsingClient:
|
|
|
1099
1162
|
"extract_layout": extract_layout,
|
|
1100
1163
|
"output_pdf_of_document": output_pdf_of_document,
|
|
1101
1164
|
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
1165
|
+
"output_s3_region": output_s_3_region,
|
|
1102
1166
|
"page_prefix": page_prefix,
|
|
1103
1167
|
"page_separator": page_separator,
|
|
1104
1168
|
"page_suffix": page_suffix,
|
|
1105
1169
|
"parsing_instruction": parsing_instruction,
|
|
1106
1170
|
"premium_mode": premium_mode,
|
|
1107
1171
|
"skip_diagonal_text": skip_diagonal_text,
|
|
1172
|
+
"spreadsheet_extract_sub_tables": spreadsheet_extract_sub_tables,
|
|
1108
1173
|
"structured_output": structured_output,
|
|
1109
1174
|
"structured_output_json_schema": structured_output_json_schema,
|
|
1110
1175
|
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
@@ -1125,7 +1190,7 @@ class AsyncParsingClient:
|
|
|
1125
1190
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1126
1191
|
"POST",
|
|
1127
1192
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
|
|
1128
|
-
params=remove_none_from_dict({"
|
|
1193
|
+
params=remove_none_from_dict({"organization_id": organization_id, "project_id": project_id}),
|
|
1129
1194
|
json=jsonable_encoder(_request),
|
|
1130
1195
|
headers=self._client_wrapper.get_headers(),
|
|
1131
1196
|
timeout=60,
|
|
@@ -1140,11 +1205,13 @@ class AsyncParsingClient:
|
|
|
1140
1205
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1141
1206
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1142
1207
|
|
|
1143
|
-
async def usage(self) -> ParsingUsage:
|
|
1208
|
+
async def usage(self, *, organization_id: typing.Optional[str] = None) -> ParsingUsage:
|
|
1144
1209
|
"""
|
|
1145
1210
|
DEPRECATED: use either /organizations/{organization_id}/usage or /projects/{project_id}/usage instead
|
|
1146
1211
|
Get parsing usage for user
|
|
1147
1212
|
|
|
1213
|
+
Parameters:
|
|
1214
|
+
- organization_id: typing.Optional[str].
|
|
1148
1215
|
---
|
|
1149
1216
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1150
1217
|
|
|
@@ -1156,6 +1223,7 @@ class AsyncParsingClient:
|
|
|
1156
1223
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1157
1224
|
"GET",
|
|
1158
1225
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/usage"),
|
|
1226
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
1159
1227
|
headers=self._client_wrapper.get_headers(),
|
|
1160
1228
|
timeout=60,
|
|
1161
1229
|
)
|
|
@@ -1233,12 +1301,16 @@ class AsyncParsingClient:
|
|
|
1233
1301
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1234
1302
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1235
1303
|
|
|
1236
|
-
async def get_job_text_result(
|
|
1304
|
+
async def get_job_text_result(
|
|
1305
|
+
self, job_id: str, *, organization_id: typing.Optional[str] = None
|
|
1306
|
+
) -> ParsingJobTextResult:
|
|
1237
1307
|
"""
|
|
1238
1308
|
Get a job by id
|
|
1239
1309
|
|
|
1240
1310
|
Parameters:
|
|
1241
1311
|
- job_id: str.
|
|
1312
|
+
|
|
1313
|
+
- organization_id: typing.Optional[str].
|
|
1242
1314
|
---
|
|
1243
1315
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1244
1316
|
|
|
@@ -1252,6 +1324,7 @@ class AsyncParsingClient:
|
|
|
1252
1324
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1253
1325
|
"GET",
|
|
1254
1326
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/text"),
|
|
1327
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
1255
1328
|
headers=self._client_wrapper.get_headers(),
|
|
1256
1329
|
timeout=60,
|
|
1257
1330
|
)
|
|
@@ -1299,12 +1372,16 @@ class AsyncParsingClient:
|
|
|
1299
1372
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1300
1373
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1301
1374
|
|
|
1302
|
-
async def get_job_structured_result(
|
|
1375
|
+
async def get_job_structured_result(
|
|
1376
|
+
self, job_id: str, *, organization_id: typing.Optional[str] = None
|
|
1377
|
+
) -> ParsingJobStructuredResult:
|
|
1303
1378
|
"""
|
|
1304
1379
|
Get a job by id
|
|
1305
1380
|
|
|
1306
1381
|
Parameters:
|
|
1307
1382
|
- job_id: str.
|
|
1383
|
+
|
|
1384
|
+
- organization_id: typing.Optional[str].
|
|
1308
1385
|
---
|
|
1309
1386
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1310
1387
|
|
|
@@ -1320,6 +1397,7 @@ class AsyncParsingClient:
|
|
|
1320
1397
|
urllib.parse.urljoin(
|
|
1321
1398
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/structured"
|
|
1322
1399
|
),
|
|
1400
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
1323
1401
|
headers=self._client_wrapper.get_headers(),
|
|
1324
1402
|
timeout=60,
|
|
1325
1403
|
)
|
|
@@ -1401,12 +1479,16 @@ class AsyncParsingClient:
|
|
|
1401
1479
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1402
1480
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1403
1481
|
|
|
1404
|
-
async def get_job_result(
|
|
1482
|
+
async def get_job_result(
|
|
1483
|
+
self, job_id: str, *, organization_id: typing.Optional[str] = None
|
|
1484
|
+
) -> ParsingJobMarkdownResult:
|
|
1405
1485
|
"""
|
|
1406
1486
|
Get a job by id
|
|
1407
1487
|
|
|
1408
1488
|
Parameters:
|
|
1409
1489
|
- job_id: str.
|
|
1490
|
+
|
|
1491
|
+
- organization_id: typing.Optional[str].
|
|
1410
1492
|
---
|
|
1411
1493
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1412
1494
|
|
|
@@ -1422,6 +1504,7 @@ class AsyncParsingClient:
|
|
|
1422
1504
|
urllib.parse.urljoin(
|
|
1423
1505
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/markdown"
|
|
1424
1506
|
),
|
|
1507
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
1425
1508
|
headers=self._client_wrapper.get_headers(),
|
|
1426
1509
|
timeout=60,
|
|
1427
1510
|
)
|
|
@@ -1469,12 +1552,16 @@ class AsyncParsingClient:
|
|
|
1469
1552
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1470
1553
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1471
1554
|
|
|
1472
|
-
async def get_job_json_result(
|
|
1555
|
+
async def get_job_json_result(
|
|
1556
|
+
self, job_id: str, *, organization_id: typing.Optional[str] = None
|
|
1557
|
+
) -> ParsingJobJsonResult:
|
|
1473
1558
|
"""
|
|
1474
1559
|
Get a job by id
|
|
1475
1560
|
|
|
1476
1561
|
Parameters:
|
|
1477
1562
|
- job_id: str.
|
|
1563
|
+
|
|
1564
|
+
- organization_id: typing.Optional[str].
|
|
1478
1565
|
---
|
|
1479
1566
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1480
1567
|
|
|
@@ -1488,6 +1575,7 @@ class AsyncParsingClient:
|
|
|
1488
1575
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1489
1576
|
"GET",
|
|
1490
1577
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/json"),
|
|
1578
|
+
params=remove_none_from_dict({"organization_id": organization_id}),
|
|
1491
1579
|
headers=self._client_wrapper.get_headers(),
|
|
1492
1580
|
timeout=60,
|
|
1493
1581
|
)
|
|
@@ -581,7 +581,7 @@ class ReportsClient:
|
|
|
581
581
|
self, report_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
582
582
|
) -> typing.Any:
|
|
583
583
|
"""
|
|
584
|
-
Restart a report.
|
|
584
|
+
Restart a report from scratch.
|
|
585
585
|
|
|
586
586
|
Parameters:
|
|
587
587
|
- report_id: str.
|
|
@@ -601,7 +601,7 @@ class ReportsClient:
|
|
|
601
601
|
"""
|
|
602
602
|
_response = self._client_wrapper.httpx_client.request(
|
|
603
603
|
"POST",
|
|
604
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/reports/{report_id}/
|
|
604
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/reports/{report_id}/restart"),
|
|
605
605
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
606
606
|
headers=self._client_wrapper.get_headers(),
|
|
607
607
|
timeout=60,
|
|
@@ -1162,7 +1162,7 @@ class AsyncReportsClient:
|
|
|
1162
1162
|
self, report_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
1163
1163
|
) -> typing.Any:
|
|
1164
1164
|
"""
|
|
1165
|
-
Restart a report.
|
|
1165
|
+
Restart a report from scratch.
|
|
1166
1166
|
|
|
1167
1167
|
Parameters:
|
|
1168
1168
|
- report_id: str.
|
|
@@ -1182,7 +1182,7 @@ class AsyncReportsClient:
|
|
|
1182
1182
|
"""
|
|
1183
1183
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1184
1184
|
"POST",
|
|
1185
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/reports/{report_id}/
|
|
1185
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/reports/{report_id}/restart"),
|
|
1186
1186
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1187
1187
|
headers=self._client_wrapper.get_headers(),
|
|
1188
1188
|
timeout=60,
|
llama_cloud/types/__init__.py
CHANGED
|
@@ -53,6 +53,7 @@ from .cohere_embedding_config import CohereEmbeddingConfig
|
|
|
53
53
|
from .composite_retrieval_mode import CompositeRetrievalMode
|
|
54
54
|
from .composite_retrieval_result import CompositeRetrievalResult
|
|
55
55
|
from .composite_retrieved_text_node import CompositeRetrievedTextNode
|
|
56
|
+
from .composite_retrieved_text_node_with_score import CompositeRetrievedTextNodeWithScore
|
|
56
57
|
from .configurable_data_sink_names import ConfigurableDataSinkNames
|
|
57
58
|
from .configurable_data_source_names import ConfigurableDataSourceNames
|
|
58
59
|
from .configurable_transformation_definition import ConfigurableTransformationDefinition
|
|
@@ -116,11 +117,11 @@ from .extract_resultset_data import ExtractResultsetData
|
|
|
116
117
|
from .extract_resultset_data_item_value import ExtractResultsetDataItemValue
|
|
117
118
|
from .extract_resultset_data_zero_value import ExtractResultsetDataZeroValue
|
|
118
119
|
from .extract_resultset_extraction_metadata_value import ExtractResultsetExtractionMetadataValue
|
|
119
|
-
from .
|
|
120
|
-
from .
|
|
121
|
-
from .
|
|
122
|
-
from .
|
|
123
|
-
from .
|
|
120
|
+
from .extract_run import ExtractRun
|
|
121
|
+
from .extract_run_data_schema_value import ExtractRunDataSchemaValue
|
|
122
|
+
from .extract_run_data_value import ExtractRunDataValue
|
|
123
|
+
from .extract_run_extraction_metadata_value import ExtractRunExtractionMetadataValue
|
|
124
|
+
from .extract_state import ExtractState
|
|
124
125
|
from .file import File
|
|
125
126
|
from .file_permission_info_value import FilePermissionInfoValue
|
|
126
127
|
from .file_resource_info_value import FileResourceInfoValue
|
|
@@ -140,6 +141,7 @@ from .job_name_mapping import JobNameMapping
|
|
|
140
141
|
from .job_names import JobNames
|
|
141
142
|
from .job_record import JobRecord
|
|
142
143
|
from .job_record_with_usage_metrics import JobRecordWithUsageMetrics
|
|
144
|
+
from .llama_extract_settings import LlamaExtractSettings
|
|
143
145
|
from .llama_index_core_base_llms_types_chat_message import LlamaIndexCoreBaseLlmsTypesChatMessage
|
|
144
146
|
from .llama_index_core_base_llms_types_chat_message_blocks_item import (
|
|
145
147
|
LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem,
|
|
@@ -174,6 +176,7 @@ from .open_ai_embedding import OpenAiEmbedding
|
|
|
174
176
|
from .open_ai_embedding_config import OpenAiEmbeddingConfig
|
|
175
177
|
from .organization import Organization
|
|
176
178
|
from .organization_create import OrganizationCreate
|
|
179
|
+
from .page_figure_metadata import PageFigureMetadata
|
|
177
180
|
from .page_screenshot_metadata import PageScreenshotMetadata
|
|
178
181
|
from .page_screenshot_node_with_score import PageScreenshotNodeWithScore
|
|
179
182
|
from .page_segmentation_config import PageSegmentationConfig
|
|
@@ -350,6 +353,7 @@ __all__ = [
|
|
|
350
353
|
"CompositeRetrievalMode",
|
|
351
354
|
"CompositeRetrievalResult",
|
|
352
355
|
"CompositeRetrievedTextNode",
|
|
356
|
+
"CompositeRetrievedTextNodeWithScore",
|
|
353
357
|
"ConfigurableDataSinkNames",
|
|
354
358
|
"ConfigurableDataSourceNames",
|
|
355
359
|
"ConfigurableTransformationDefinition",
|
|
@@ -409,11 +413,11 @@ __all__ = [
|
|
|
409
413
|
"ExtractResultsetDataItemValue",
|
|
410
414
|
"ExtractResultsetDataZeroValue",
|
|
411
415
|
"ExtractResultsetExtractionMetadataValue",
|
|
412
|
-
"
|
|
413
|
-
"
|
|
414
|
-
"
|
|
415
|
-
"
|
|
416
|
-
"
|
|
416
|
+
"ExtractRun",
|
|
417
|
+
"ExtractRunDataSchemaValue",
|
|
418
|
+
"ExtractRunDataValue",
|
|
419
|
+
"ExtractRunExtractionMetadataValue",
|
|
420
|
+
"ExtractState",
|
|
417
421
|
"File",
|
|
418
422
|
"FilePermissionInfoValue",
|
|
419
423
|
"FileResourceInfoValue",
|
|
@@ -433,6 +437,7 @@ __all__ = [
|
|
|
433
437
|
"JobNames",
|
|
434
438
|
"JobRecord",
|
|
435
439
|
"JobRecordWithUsageMetrics",
|
|
440
|
+
"LlamaExtractSettings",
|
|
436
441
|
"LlamaIndexCoreBaseLlmsTypesChatMessage",
|
|
437
442
|
"LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem",
|
|
438
443
|
"LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image",
|
|
@@ -465,6 +470,7 @@ __all__ = [
|
|
|
465
470
|
"OpenAiEmbeddingConfig",
|
|
466
471
|
"Organization",
|
|
467
472
|
"OrganizationCreate",
|
|
473
|
+
"PageFigureMetadata",
|
|
468
474
|
"PageScreenshotMetadata",
|
|
469
475
|
"PageScreenshotNodeWithScore",
|
|
470
476
|
"PageSegmentationConfig",
|
|
@@ -4,7 +4,7 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
-
from .
|
|
7
|
+
from .composite_retrieved_text_node_with_score import CompositeRetrievedTextNodeWithScore
|
|
8
8
|
from .page_screenshot_node_with_score import PageScreenshotNodeWithScore
|
|
9
9
|
|
|
10
10
|
try:
|
|
@@ -17,7 +17,7 @@ except ImportError:
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class CompositeRetrievalResult(pydantic.BaseModel):
|
|
20
|
-
nodes: typing.Optional[typing.List[
|
|
20
|
+
nodes: typing.Optional[typing.List[CompositeRetrievedTextNodeWithScore]] = pydantic.Field(
|
|
21
21
|
description="The retrieved nodes from the composite retrieval."
|
|
22
22
|
)
|
|
23
23
|
image_nodes: typing.Optional[typing.List[PageScreenshotNodeWithScore]] = pydantic.Field(
|