llama-cloud 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +138 -2
- llama_cloud/client.py +15 -0
- llama_cloud/resources/__init__.py +17 -1
- llama_cloud/resources/chat_apps/__init__.py +2 -0
- llama_cloud/resources/chat_apps/client.py +630 -0
- llama_cloud/resources/data_sinks/client.py +2 -2
- llama_cloud/resources/data_sources/client.py +2 -2
- llama_cloud/resources/embedding_model_configs/client.py +4 -4
- llama_cloud/resources/files/__init__.py +2 -2
- llama_cloud/resources/files/client.py +21 -0
- llama_cloud/resources/files/types/__init__.py +2 -1
- llama_cloud/resources/files/types/file_create_permission_info_value.py +7 -0
- llama_cloud/resources/jobs/__init__.py +2 -0
- llama_cloud/resources/jobs/client.py +148 -0
- llama_cloud/resources/llama_extract/__init__.py +5 -0
- llama_cloud/resources/llama_extract/client.py +1038 -0
- llama_cloud/resources/llama_extract/types/__init__.py +6 -0
- llama_cloud/resources/llama_extract/types/extract_agent_create_data_schema_value.py +7 -0
- llama_cloud/resources/llama_extract/types/extract_agent_update_data_schema_value.py +7 -0
- llama_cloud/resources/organizations/client.py +14 -14
- llama_cloud/resources/parsing/client.py +480 -229
- llama_cloud/resources/pipelines/client.py +182 -126
- llama_cloud/resources/projects/client.py +210 -102
- llama_cloud/resources/reports/__init__.py +5 -0
- llama_cloud/resources/reports/client.py +1198 -0
- llama_cloud/resources/reports/types/__init__.py +7 -0
- llama_cloud/resources/reports/types/update_report_plan_api_v_1_reports_report_id_plan_patch_request_action.py +25 -0
- llama_cloud/resources/retrievers/__init__.py +2 -0
- llama_cloud/resources/retrievers/client.py +654 -0
- llama_cloud/types/__init__.py +124 -2
- llama_cloud/types/{chat_message.py → app_schema_chat_chat_message.py} +2 -2
- llama_cloud/types/chat_app.py +44 -0
- llama_cloud/types/chat_app_response.py +41 -0
- llama_cloud/types/cloud_az_storage_blob_data_source.py +1 -0
- llama_cloud/types/cloud_box_data_source.py +1 -0
- llama_cloud/types/cloud_confluence_data_source.py +1 -0
- llama_cloud/types/cloud_google_drive_data_source.py +1 -0
- llama_cloud/types/cloud_jira_data_source.py +1 -0
- llama_cloud/types/cloud_notion_page_data_source.py +1 -0
- llama_cloud/types/cloud_one_drive_data_source.py +1 -0
- llama_cloud/types/cloud_postgres_vector_store.py +1 -0
- llama_cloud/types/cloud_s_3_data_source.py +1 -0
- llama_cloud/types/cloud_sharepoint_data_source.py +1 -0
- llama_cloud/types/cloud_slack_data_source.py +1 -0
- llama_cloud/types/composite_retrieval_mode.py +21 -0
- llama_cloud/types/composite_retrieval_result.py +38 -0
- llama_cloud/types/composite_retrieved_text_node.py +42 -0
- llama_cloud/types/data_sink.py +1 -1
- llama_cloud/types/data_sink_create.py +1 -1
- llama_cloud/types/data_source.py +1 -1
- llama_cloud/types/data_source_create.py +1 -1
- llama_cloud/types/edit_suggestion.py +39 -0
- llama_cloud/types/eval_dataset_job_record.py +1 -0
- llama_cloud/types/extract_agent.py +45 -0
- llama_cloud/types/extract_agent_data_schema_value.py +5 -0
- llama_cloud/types/extract_config.py +40 -0
- llama_cloud/types/extract_job.py +35 -0
- llama_cloud/types/extract_job_create.py +40 -0
- llama_cloud/types/extract_job_create_data_schema_override_value.py +7 -0
- llama_cloud/types/extract_mode.py +17 -0
- llama_cloud/types/extract_resultset.py +46 -0
- llama_cloud/types/extract_resultset_data.py +11 -0
- llama_cloud/types/extract_resultset_data_item_value.py +7 -0
- llama_cloud/types/extract_resultset_data_zero_value.py +7 -0
- llama_cloud/types/extract_resultset_extraction_metadata_value.py +7 -0
- llama_cloud/types/file.py +3 -0
- llama_cloud/types/file_permission_info_value.py +5 -0
- llama_cloud/types/filter_condition.py +9 -1
- llama_cloud/types/filter_operator.py +4 -0
- llama_cloud/types/image_block.py +35 -0
- llama_cloud/types/input_message.py +1 -1
- llama_cloud/types/job_name_mapping.py +4 -0
- llama_cloud/types/job_names.py +89 -0
- llama_cloud/types/job_record.py +57 -0
- llama_cloud/types/job_record_with_usage_metrics.py +36 -0
- llama_cloud/types/llama_index_core_base_llms_types_chat_message.py +39 -0
- llama_cloud/types/llama_index_core_base_llms_types_chat_message_blocks_item.py +33 -0
- llama_cloud/types/llama_parse_parameters.py +15 -0
- llama_cloud/types/llm.py +1 -0
- llama_cloud/types/llm_model_data.py +1 -0
- llama_cloud/types/llm_parameters.py +1 -0
- llama_cloud/types/managed_ingestion_status.py +4 -0
- llama_cloud/types/managed_ingestion_status_response.py +1 -0
- llama_cloud/types/object_type.py +4 -0
- llama_cloud/types/organization.py +5 -0
- llama_cloud/types/paginated_jobs_history_with_metrics.py +35 -0
- llama_cloud/types/paginated_report_response.py +35 -0
- llama_cloud/types/parse_plan_level.py +21 -0
- llama_cloud/types/parsing_job_structured_result.py +32 -0
- llama_cloud/types/pipeline_create.py +3 -1
- llama_cloud/types/pipeline_data_source.py +1 -1
- llama_cloud/types/pipeline_file.py +3 -0
- llama_cloud/types/pipeline_file_permission_info_value.py +7 -0
- llama_cloud/types/playground_session.py +2 -2
- llama_cloud/types/preset_retrieval_params.py +1 -0
- llama_cloud/types/progress_event.py +44 -0
- llama_cloud/types/progress_event_status.py +33 -0
- llama_cloud/types/prompt_spec.py +2 -2
- llama_cloud/types/related_node_info.py +2 -2
- llama_cloud/types/related_node_info_node_type.py +7 -0
- llama_cloud/types/report.py +33 -0
- llama_cloud/types/report_block.py +34 -0
- llama_cloud/types/report_block_dependency.py +29 -0
- llama_cloud/types/report_create_response.py +31 -0
- llama_cloud/types/report_event_item.py +40 -0
- llama_cloud/types/report_event_item_event_data.py +45 -0
- llama_cloud/types/report_event_type.py +37 -0
- llama_cloud/types/report_metadata.py +43 -0
- llama_cloud/types/report_plan.py +36 -0
- llama_cloud/types/report_plan_block.py +36 -0
- llama_cloud/types/report_query.py +33 -0
- llama_cloud/types/report_response.py +41 -0
- llama_cloud/types/report_state.py +37 -0
- llama_cloud/types/report_state_event.py +38 -0
- llama_cloud/types/report_update_event.py +38 -0
- llama_cloud/types/retrieve_results.py +1 -1
- llama_cloud/types/retriever.py +45 -0
- llama_cloud/types/retriever_create.py +37 -0
- llama_cloud/types/retriever_pipeline.py +37 -0
- llama_cloud/types/status_enum.py +4 -0
- llama_cloud/types/supported_llm_model_names.py +4 -0
- llama_cloud/types/text_block.py +31 -0
- llama_cloud/types/text_node.py +13 -6
- llama_cloud/types/usage_metric_response.py +34 -0
- llama_cloud/types/user_job_record.py +32 -0
- {llama_cloud-0.1.5.dist-info → llama_cloud-0.1.7.dist-info}/METADATA +3 -1
- {llama_cloud-0.1.5.dist-info → llama_cloud-0.1.7.dist-info}/RECORD +129 -59
- {llama_cloud-0.1.5.dist-info → llama_cloud-0.1.7.dist-info}/WHEEL +1 -1
- {llama_cloud-0.1.5.dist-info → llama_cloud-0.1.7.dist-info}/LICENSE +0 -0
|
@@ -16,6 +16,7 @@ from ...types.parsing_history_item import ParsingHistoryItem
|
|
|
16
16
|
from ...types.parsing_job import ParsingJob
|
|
17
17
|
from ...types.parsing_job_json_result import ParsingJobJsonResult
|
|
18
18
|
from ...types.parsing_job_markdown_result import ParsingJobMarkdownResult
|
|
19
|
+
from ...types.parsing_job_structured_result import ParsingJobStructuredResult
|
|
19
20
|
from ...types.parsing_job_text_result import ParsingJobTextResult
|
|
20
21
|
from ...types.parsing_usage import ParsingUsage
|
|
21
22
|
from ...types.presigned_url import PresignedUrl
|
|
@@ -104,47 +105,61 @@ class ParsingClient:
|
|
|
104
105
|
*,
|
|
105
106
|
project_id: typing.Optional[str] = None,
|
|
106
107
|
organization_id: typing.Optional[str] = None,
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
108
|
+
file: typing.Optional[str] = OMIT,
|
|
109
|
+
annotate_links: bool,
|
|
110
|
+
auto_mode: bool,
|
|
111
|
+
auto_mode_trigger_on_image_in_page: bool,
|
|
112
|
+
auto_mode_trigger_on_table_in_page: bool,
|
|
113
|
+
auto_mode_trigger_on_text_in_page: str,
|
|
114
|
+
auto_mode_trigger_on_regexp_in_page: str,
|
|
115
|
+
azure_openai_api_version: str,
|
|
116
|
+
azure_openai_deployment_name: str,
|
|
117
|
+
azure_openai_endpoint: str,
|
|
118
|
+
azure_openai_key: str,
|
|
119
|
+
bbox_bottom: float,
|
|
120
|
+
bbox_left: float,
|
|
121
|
+
bbox_right: float,
|
|
122
|
+
bbox_top: float,
|
|
115
123
|
continuous_mode: bool,
|
|
116
|
-
|
|
124
|
+
disable_ocr: bool,
|
|
125
|
+
disable_reconstruction: bool,
|
|
126
|
+
disable_image_extraction: bool,
|
|
127
|
+
do_not_cache: bool,
|
|
117
128
|
do_not_unroll_columns: bool,
|
|
129
|
+
extract_charts: bool,
|
|
130
|
+
fast_mode: bool,
|
|
118
131
|
guess_xlsx_sheet_name: bool,
|
|
132
|
+
html_make_all_elements_visible: bool,
|
|
133
|
+
html_remove_fixed_elements: bool,
|
|
134
|
+
html_remove_navigation_elements: bool,
|
|
135
|
+
http_proxy: str,
|
|
136
|
+
input_s_3_path: str,
|
|
137
|
+
input_url: str,
|
|
138
|
+
invalidate_cache: bool,
|
|
139
|
+
is_formatting_instruction: bool,
|
|
140
|
+
language: typing.List[ParserLanguages],
|
|
141
|
+
extract_layout: bool,
|
|
142
|
+
max_pages: typing.Optional[int] = OMIT,
|
|
143
|
+
output_pdf_of_document: bool,
|
|
144
|
+
output_s_3_path_prefix: str,
|
|
145
|
+
page_prefix: str,
|
|
119
146
|
page_separator: str,
|
|
120
|
-
|
|
147
|
+
page_suffix: str,
|
|
148
|
+
parsing_instruction: str,
|
|
149
|
+
premium_mode: bool,
|
|
150
|
+
skip_diagonal_text: bool,
|
|
151
|
+
structured_output: bool,
|
|
152
|
+
structured_output_json_schema: str,
|
|
153
|
+
structured_output_json_schema_name: str,
|
|
154
|
+
take_screenshot: bool,
|
|
121
155
|
target_pages: str,
|
|
122
156
|
use_vendor_multimodal_model: bool,
|
|
123
|
-
vendor_multimodal_model_name: str,
|
|
124
157
|
vendor_multimodal_api_key: str,
|
|
125
|
-
|
|
126
|
-
page_suffix: str,
|
|
158
|
+
vendor_multimodal_model_name: str,
|
|
127
159
|
webhook_url: str,
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
annotate_links: bool,
|
|
132
|
-
disable_reconstruction: bool,
|
|
133
|
-
disable_image_extraction: bool,
|
|
134
|
-
input_s_3_path: str,
|
|
135
|
-
output_s_3_path_prefix: str,
|
|
136
|
-
azure_openai_deployment_name: str,
|
|
137
|
-
azure_openai_endpoint: str,
|
|
138
|
-
azure_openai_api_version: str,
|
|
139
|
-
azure_openai_key: str,
|
|
140
|
-
auto_mode: bool,
|
|
141
|
-
auto_mode_trigger_on_regexp_in_page: str,
|
|
142
|
-
auto_mode_trigger_on_text_in_page: str,
|
|
143
|
-
auto_mode_trigger_on_table_in_page: bool,
|
|
144
|
-
auto_mode_trigger_on_image_in_page: bool,
|
|
145
|
-
file: typing.Optional[str] = OMIT,
|
|
146
|
-
input_url: str,
|
|
147
|
-
http_proxy: str,
|
|
160
|
+
bounding_box: str,
|
|
161
|
+
gpt_4_o_mode: bool,
|
|
162
|
+
gpt_4_o_api_key: str,
|
|
148
163
|
) -> ParsingJob:
|
|
149
164
|
"""
|
|
150
165
|
Upload a file to s3 and create a job. return a job id
|
|
@@ -154,132 +169,175 @@ class ParsingClient:
|
|
|
154
169
|
|
|
155
170
|
- organization_id: typing.Optional[str].
|
|
156
171
|
|
|
157
|
-
-
|
|
172
|
+
- file: typing.Optional[str].
|
|
158
173
|
|
|
159
|
-
-
|
|
174
|
+
- annotate_links: bool.
|
|
160
175
|
|
|
161
|
-
-
|
|
176
|
+
- auto_mode: bool.
|
|
162
177
|
|
|
163
|
-
-
|
|
178
|
+
- auto_mode_trigger_on_image_in_page: bool.
|
|
164
179
|
|
|
165
|
-
-
|
|
180
|
+
- auto_mode_trigger_on_table_in_page: bool.
|
|
166
181
|
|
|
167
|
-
-
|
|
182
|
+
- auto_mode_trigger_on_text_in_page: str.
|
|
168
183
|
|
|
169
|
-
-
|
|
184
|
+
- auto_mode_trigger_on_regexp_in_page: str.
|
|
170
185
|
|
|
171
|
-
-
|
|
186
|
+
- azure_openai_api_version: str.
|
|
187
|
+
|
|
188
|
+
- azure_openai_deployment_name: str.
|
|
189
|
+
|
|
190
|
+
- azure_openai_endpoint: str.
|
|
191
|
+
|
|
192
|
+
- azure_openai_key: str.
|
|
193
|
+
|
|
194
|
+
- bbox_bottom: float.
|
|
195
|
+
|
|
196
|
+
- bbox_left: float.
|
|
197
|
+
|
|
198
|
+
- bbox_right: float.
|
|
199
|
+
|
|
200
|
+
- bbox_top: float.
|
|
172
201
|
|
|
173
202
|
- continuous_mode: bool.
|
|
174
203
|
|
|
175
|
-
-
|
|
204
|
+
- disable_ocr: bool.
|
|
205
|
+
|
|
206
|
+
- disable_reconstruction: bool.
|
|
207
|
+
|
|
208
|
+
- disable_image_extraction: bool.
|
|
209
|
+
|
|
210
|
+
- do_not_cache: bool.
|
|
176
211
|
|
|
177
212
|
- do_not_unroll_columns: bool.
|
|
178
213
|
|
|
214
|
+
- extract_charts: bool.
|
|
215
|
+
|
|
216
|
+
- fast_mode: bool.
|
|
217
|
+
|
|
179
218
|
- guess_xlsx_sheet_name: bool.
|
|
180
219
|
|
|
181
|
-
-
|
|
220
|
+
- html_make_all_elements_visible: bool.
|
|
182
221
|
|
|
183
|
-
-
|
|
222
|
+
- html_remove_fixed_elements: bool.
|
|
184
223
|
|
|
185
|
-
-
|
|
224
|
+
- html_remove_navigation_elements: bool.
|
|
186
225
|
|
|
187
|
-
-
|
|
226
|
+
- http_proxy: str.
|
|
188
227
|
|
|
189
|
-
-
|
|
228
|
+
- input_s_3_path: str.
|
|
190
229
|
|
|
191
|
-
-
|
|
230
|
+
- input_url: str.
|
|
192
231
|
|
|
193
|
-
-
|
|
232
|
+
- invalidate_cache: bool.
|
|
194
233
|
|
|
195
|
-
-
|
|
234
|
+
- is_formatting_instruction: bool.
|
|
196
235
|
|
|
197
|
-
-
|
|
236
|
+
- language: typing.List[ParserLanguages].
|
|
198
237
|
|
|
199
|
-
-
|
|
238
|
+
- extract_layout: bool.
|
|
200
239
|
|
|
201
|
-
-
|
|
240
|
+
- max_pages: typing.Optional[int].
|
|
202
241
|
|
|
203
|
-
-
|
|
242
|
+
- output_pdf_of_document: bool.
|
|
204
243
|
|
|
205
|
-
-
|
|
244
|
+
- output_s_3_path_prefix: str.
|
|
206
245
|
|
|
207
|
-
-
|
|
246
|
+
- page_prefix: str.
|
|
208
247
|
|
|
209
|
-
-
|
|
248
|
+
- page_separator: str.
|
|
210
249
|
|
|
211
|
-
-
|
|
250
|
+
- page_suffix: str.
|
|
212
251
|
|
|
213
|
-
-
|
|
252
|
+
- parsing_instruction: str.
|
|
214
253
|
|
|
215
|
-
-
|
|
254
|
+
- premium_mode: bool.
|
|
216
255
|
|
|
217
|
-
-
|
|
256
|
+
- skip_diagonal_text: bool.
|
|
218
257
|
|
|
219
|
-
-
|
|
258
|
+
- structured_output: bool.
|
|
220
259
|
|
|
221
|
-
-
|
|
260
|
+
- structured_output_json_schema: str.
|
|
222
261
|
|
|
223
|
-
-
|
|
262
|
+
- structured_output_json_schema_name: str.
|
|
224
263
|
|
|
225
|
-
-
|
|
264
|
+
- take_screenshot: bool.
|
|
226
265
|
|
|
227
|
-
-
|
|
266
|
+
- target_pages: str.
|
|
228
267
|
|
|
229
|
-
-
|
|
268
|
+
- use_vendor_multimodal_model: bool.
|
|
230
269
|
|
|
231
|
-
-
|
|
270
|
+
- vendor_multimodal_api_key: str.
|
|
232
271
|
|
|
233
|
-
-
|
|
272
|
+
- vendor_multimodal_model_name: str.
|
|
234
273
|
|
|
235
|
-
-
|
|
274
|
+
- webhook_url: str.
|
|
236
275
|
|
|
237
|
-
-
|
|
276
|
+
- bounding_box: str.
|
|
277
|
+
|
|
278
|
+
- gpt_4_o_mode: bool.
|
|
279
|
+
|
|
280
|
+
- gpt_4_o_api_key: str.
|
|
238
281
|
"""
|
|
239
282
|
_request: typing.Dict[str, typing.Any] = {
|
|
240
|
-
"
|
|
241
|
-
"
|
|
242
|
-
"
|
|
243
|
-
"
|
|
244
|
-
"
|
|
245
|
-
"
|
|
246
|
-
"
|
|
247
|
-
"
|
|
283
|
+
"annotate_links": annotate_links,
|
|
284
|
+
"auto_mode": auto_mode,
|
|
285
|
+
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
286
|
+
"auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
|
|
287
|
+
"auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
|
|
288
|
+
"auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
|
|
289
|
+
"azure_openai_api_version": azure_openai_api_version,
|
|
290
|
+
"azure_openai_deployment_name": azure_openai_deployment_name,
|
|
291
|
+
"azure_openai_endpoint": azure_openai_endpoint,
|
|
292
|
+
"azure_openai_key": azure_openai_key,
|
|
293
|
+
"bbox_bottom": bbox_bottom,
|
|
294
|
+
"bbox_left": bbox_left,
|
|
295
|
+
"bbox_right": bbox_right,
|
|
296
|
+
"bbox_top": bbox_top,
|
|
248
297
|
"continuous_mode": continuous_mode,
|
|
249
|
-
"
|
|
298
|
+
"disable_ocr": disable_ocr,
|
|
299
|
+
"disable_reconstruction": disable_reconstruction,
|
|
300
|
+
"disable_image_extraction": disable_image_extraction,
|
|
301
|
+
"do_not_cache": do_not_cache,
|
|
250
302
|
"do_not_unroll_columns": do_not_unroll_columns,
|
|
303
|
+
"extract_charts": extract_charts,
|
|
304
|
+
"fast_mode": fast_mode,
|
|
251
305
|
"guess_xlsx_sheet_name": guess_xlsx_sheet_name,
|
|
306
|
+
"html_make_all_elements_visible": html_make_all_elements_visible,
|
|
307
|
+
"html_remove_fixed_elements": html_remove_fixed_elements,
|
|
308
|
+
"html_remove_navigation_elements": html_remove_navigation_elements,
|
|
309
|
+
"http_proxy": http_proxy,
|
|
310
|
+
"input_s3_path": input_s_3_path,
|
|
311
|
+
"input_url": input_url,
|
|
312
|
+
"invalidate_cache": invalidate_cache,
|
|
313
|
+
"is_formatting_instruction": is_formatting_instruction,
|
|
314
|
+
"language": language,
|
|
315
|
+
"extract_layout": extract_layout,
|
|
316
|
+
"output_pdf_of_document": output_pdf_of_document,
|
|
317
|
+
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
318
|
+
"page_prefix": page_prefix,
|
|
252
319
|
"page_separator": page_separator,
|
|
253
|
-
"
|
|
320
|
+
"page_suffix": page_suffix,
|
|
321
|
+
"parsing_instruction": parsing_instruction,
|
|
322
|
+
"premium_mode": premium_mode,
|
|
323
|
+
"skip_diagonal_text": skip_diagonal_text,
|
|
324
|
+
"structured_output": structured_output,
|
|
325
|
+
"structured_output_json_schema": structured_output_json_schema,
|
|
326
|
+
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
327
|
+
"take_screenshot": take_screenshot,
|
|
254
328
|
"target_pages": target_pages,
|
|
255
329
|
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
256
|
-
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
257
330
|
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
258
|
-
"
|
|
259
|
-
"page_suffix": page_suffix,
|
|
331
|
+
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
260
332
|
"webhook_url": webhook_url,
|
|
261
|
-
"
|
|
262
|
-
"
|
|
263
|
-
"
|
|
264
|
-
"annotate_links": annotate_links,
|
|
265
|
-
"disable_reconstruction": disable_reconstruction,
|
|
266
|
-
"disable_image_extraction": disable_image_extraction,
|
|
267
|
-
"input_s3_path": input_s_3_path,
|
|
268
|
-
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
269
|
-
"azure_openai_deployment_name": azure_openai_deployment_name,
|
|
270
|
-
"azure_openai_endpoint": azure_openai_endpoint,
|
|
271
|
-
"azure_openai_api_version": azure_openai_api_version,
|
|
272
|
-
"azure_openai_key": azure_openai_key,
|
|
273
|
-
"auto_mode": auto_mode,
|
|
274
|
-
"auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
|
|
275
|
-
"auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
|
|
276
|
-
"auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
|
|
277
|
-
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
278
|
-
"input_url": input_url,
|
|
279
|
-
"http_proxy": http_proxy,
|
|
333
|
+
"bounding_box": bounding_box,
|
|
334
|
+
"gpt4o_mode": gpt_4_o_mode,
|
|
335
|
+
"gpt4o_api_key": gpt_4_o_api_key,
|
|
280
336
|
}
|
|
281
337
|
if file is not OMIT:
|
|
282
338
|
_request["file"] = file
|
|
339
|
+
if max_pages is not OMIT:
|
|
340
|
+
_request["max_pages"] = max_pages
|
|
283
341
|
_response = self._client_wrapper.httpx_client.request(
|
|
284
342
|
"POST",
|
|
285
343
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
|
|
@@ -442,7 +500,7 @@ class ParsingClient:
|
|
|
442
500
|
_response = self._client_wrapper.httpx_client.request(
|
|
443
501
|
"GET",
|
|
444
502
|
urllib.parse.urljoin(
|
|
445
|
-
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/
|
|
503
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/pdf"
|
|
446
504
|
),
|
|
447
505
|
headers=self._client_wrapper.get_headers(),
|
|
448
506
|
timeout=60,
|
|
@@ -457,7 +515,7 @@ class ParsingClient:
|
|
|
457
515
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
458
516
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
459
517
|
|
|
460
|
-
def
|
|
518
|
+
def get_job_structured_result(self, job_id: str) -> ParsingJobStructuredResult:
|
|
461
519
|
"""
|
|
462
520
|
Get a job by id
|
|
463
521
|
|
|
@@ -469,20 +527,20 @@ class ParsingClient:
|
|
|
469
527
|
client = LlamaCloud(
|
|
470
528
|
token="YOUR_TOKEN",
|
|
471
529
|
)
|
|
472
|
-
client.parsing.
|
|
530
|
+
client.parsing.get_job_structured_result(
|
|
473
531
|
job_id="string",
|
|
474
532
|
)
|
|
475
533
|
"""
|
|
476
534
|
_response = self._client_wrapper.httpx_client.request(
|
|
477
535
|
"GET",
|
|
478
536
|
urllib.parse.urljoin(
|
|
479
|
-
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/
|
|
537
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/structured"
|
|
480
538
|
),
|
|
481
539
|
headers=self._client_wrapper.get_headers(),
|
|
482
540
|
timeout=60,
|
|
483
541
|
)
|
|
484
542
|
if 200 <= _response.status_code < 300:
|
|
485
|
-
return pydantic.parse_obj_as(
|
|
543
|
+
return pydantic.parse_obj_as(ParsingJobStructuredResult, _response.json()) # type: ignore
|
|
486
544
|
if _response.status_code == 422:
|
|
487
545
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
488
546
|
try:
|
|
@@ -491,7 +549,7 @@ class ParsingClient:
|
|
|
491
549
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
492
550
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
493
551
|
|
|
494
|
-
def
|
|
552
|
+
def get_job_raw_structured_result(self, job_id: str) -> typing.Any:
|
|
495
553
|
"""
|
|
496
554
|
Get a job by id
|
|
497
555
|
|
|
@@ -503,20 +561,20 @@ class ParsingClient:
|
|
|
503
561
|
client = LlamaCloud(
|
|
504
562
|
token="YOUR_TOKEN",
|
|
505
563
|
)
|
|
506
|
-
client.parsing.
|
|
564
|
+
client.parsing.get_job_raw_structured_result(
|
|
507
565
|
job_id="string",
|
|
508
566
|
)
|
|
509
567
|
"""
|
|
510
568
|
_response = self._client_wrapper.httpx_client.request(
|
|
511
569
|
"GET",
|
|
512
570
|
urllib.parse.urljoin(
|
|
513
|
-
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/
|
|
571
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/structured"
|
|
514
572
|
),
|
|
515
573
|
headers=self._client_wrapper.get_headers(),
|
|
516
574
|
timeout=60,
|
|
517
575
|
)
|
|
518
576
|
if 200 <= _response.status_code < 300:
|
|
519
|
-
return pydantic.parse_obj_as(
|
|
577
|
+
return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
|
|
520
578
|
if _response.status_code == 422:
|
|
521
579
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
522
580
|
try:
|
|
@@ -525,7 +583,7 @@ class ParsingClient:
|
|
|
525
583
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
526
584
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
527
585
|
|
|
528
|
-
def
|
|
586
|
+
def get_job_raw_xlsx_result(self, job_id: str) -> typing.Any:
|
|
529
587
|
"""
|
|
530
588
|
Get a job by id
|
|
531
589
|
|
|
@@ -537,14 +595,14 @@ class ParsingClient:
|
|
|
537
595
|
client = LlamaCloud(
|
|
538
596
|
token="YOUR_TOKEN",
|
|
539
597
|
)
|
|
540
|
-
client.parsing.
|
|
598
|
+
client.parsing.get_job_raw_xlsx_result(
|
|
541
599
|
job_id="string",
|
|
542
600
|
)
|
|
543
601
|
"""
|
|
544
602
|
_response = self._client_wrapper.httpx_client.request(
|
|
545
603
|
"GET",
|
|
546
604
|
urllib.parse.urljoin(
|
|
547
|
-
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/
|
|
605
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/xlsx"
|
|
548
606
|
),
|
|
549
607
|
headers=self._client_wrapper.get_headers(),
|
|
550
608
|
timeout=60,
|
|
@@ -559,7 +617,7 @@ class ParsingClient:
|
|
|
559
617
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
560
618
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
561
619
|
|
|
562
|
-
def
|
|
620
|
+
def get_job_result(self, job_id: str) -> ParsingJobMarkdownResult:
|
|
563
621
|
"""
|
|
564
622
|
Get a job by id
|
|
565
623
|
|
|
@@ -571,18 +629,20 @@ class ParsingClient:
|
|
|
571
629
|
client = LlamaCloud(
|
|
572
630
|
token="YOUR_TOKEN",
|
|
573
631
|
)
|
|
574
|
-
client.parsing.
|
|
632
|
+
client.parsing.get_job_result(
|
|
575
633
|
job_id="string",
|
|
576
634
|
)
|
|
577
635
|
"""
|
|
578
636
|
_response = self._client_wrapper.httpx_client.request(
|
|
579
637
|
"GET",
|
|
580
|
-
urllib.parse.urljoin(
|
|
638
|
+
urllib.parse.urljoin(
|
|
639
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/markdown"
|
|
640
|
+
),
|
|
581
641
|
headers=self._client_wrapper.get_headers(),
|
|
582
642
|
timeout=60,
|
|
583
643
|
)
|
|
584
644
|
if 200 <= _response.status_code < 300:
|
|
585
|
-
return pydantic.parse_obj_as(
|
|
645
|
+
return pydantic.parse_obj_as(ParsingJobMarkdownResult, _response.json()) # type: ignore
|
|
586
646
|
if _response.status_code == 422:
|
|
587
647
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
588
648
|
try:
|
|
@@ -591,7 +651,7 @@ class ParsingClient:
|
|
|
591
651
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
592
652
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
593
653
|
|
|
594
|
-
def
|
|
654
|
+
def get_job_raw_md_result(self, job_id: str) -> typing.Any:
|
|
595
655
|
"""
|
|
596
656
|
Get a job by id
|
|
597
657
|
|
|
@@ -603,14 +663,14 @@ class ParsingClient:
|
|
|
603
663
|
client = LlamaCloud(
|
|
604
664
|
token="YOUR_TOKEN",
|
|
605
665
|
)
|
|
606
|
-
client.parsing.
|
|
666
|
+
client.parsing.get_job_raw_md_result(
|
|
607
667
|
job_id="string",
|
|
608
668
|
)
|
|
609
669
|
"""
|
|
610
670
|
_response = self._client_wrapper.httpx_client.request(
|
|
611
671
|
"GET",
|
|
612
672
|
urllib.parse.urljoin(
|
|
613
|
-
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/
|
|
673
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/markdown"
|
|
614
674
|
),
|
|
615
675
|
headers=self._client_wrapper.get_headers(),
|
|
616
676
|
timeout=60,
|
|
@@ -625,11 +685,77 @@ class ParsingClient:
|
|
|
625
685
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
626
686
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
627
687
|
|
|
628
|
-
def
|
|
688
|
+
def get_job_json_result(self, job_id: str) -> ParsingJobJsonResult:
|
|
629
689
|
"""
|
|
630
|
-
Get
|
|
690
|
+
Get a job by id
|
|
631
691
|
|
|
632
|
-
|
|
692
|
+
Parameters:
|
|
693
|
+
- job_id: str.
|
|
694
|
+
---
|
|
695
|
+
from llama_cloud.client import LlamaCloud
|
|
696
|
+
|
|
697
|
+
client = LlamaCloud(
|
|
698
|
+
token="YOUR_TOKEN",
|
|
699
|
+
)
|
|
700
|
+
client.parsing.get_job_json_result(
|
|
701
|
+
job_id="string",
|
|
702
|
+
)
|
|
703
|
+
"""
|
|
704
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
705
|
+
"GET",
|
|
706
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/json"),
|
|
707
|
+
headers=self._client_wrapper.get_headers(),
|
|
708
|
+
timeout=60,
|
|
709
|
+
)
|
|
710
|
+
if 200 <= _response.status_code < 300:
|
|
711
|
+
return pydantic.parse_obj_as(ParsingJobJsonResult, _response.json()) # type: ignore
|
|
712
|
+
if _response.status_code == 422:
|
|
713
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
714
|
+
try:
|
|
715
|
+
_response_json = _response.json()
|
|
716
|
+
except JSONDecodeError:
|
|
717
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
718
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
719
|
+
|
|
720
|
+
def get_job_json_raw_result(self, job_id: str) -> typing.Any:
|
|
721
|
+
"""
|
|
722
|
+
Get a job by id
|
|
723
|
+
|
|
724
|
+
Parameters:
|
|
725
|
+
- job_id: str.
|
|
726
|
+
---
|
|
727
|
+
from llama_cloud.client import LlamaCloud
|
|
728
|
+
|
|
729
|
+
client = LlamaCloud(
|
|
730
|
+
token="YOUR_TOKEN",
|
|
731
|
+
)
|
|
732
|
+
client.parsing.get_job_json_raw_result(
|
|
733
|
+
job_id="string",
|
|
734
|
+
)
|
|
735
|
+
"""
|
|
736
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
737
|
+
"GET",
|
|
738
|
+
urllib.parse.urljoin(
|
|
739
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/json"
|
|
740
|
+
),
|
|
741
|
+
headers=self._client_wrapper.get_headers(),
|
|
742
|
+
timeout=60,
|
|
743
|
+
)
|
|
744
|
+
if 200 <= _response.status_code < 300:
|
|
745
|
+
return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
|
|
746
|
+
if _response.status_code == 422:
|
|
747
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
748
|
+
try:
|
|
749
|
+
_response_json = _response.json()
|
|
750
|
+
except JSONDecodeError:
|
|
751
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
752
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
753
|
+
|
|
754
|
+
def get_parsing_history_result(self) -> typing.List[ParsingHistoryItem]:
|
|
755
|
+
"""
|
|
756
|
+
Get parsing history for user
|
|
757
|
+
|
|
758
|
+
---
|
|
633
759
|
from llama_cloud.client import LlamaCloud
|
|
634
760
|
|
|
635
761
|
client = LlamaCloud(
|
|
@@ -763,47 +889,61 @@ class AsyncParsingClient:
|
|
|
763
889
|
*,
|
|
764
890
|
project_id: typing.Optional[str] = None,
|
|
765
891
|
organization_id: typing.Optional[str] = None,
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
892
|
+
file: typing.Optional[str] = OMIT,
|
|
893
|
+
annotate_links: bool,
|
|
894
|
+
auto_mode: bool,
|
|
895
|
+
auto_mode_trigger_on_image_in_page: bool,
|
|
896
|
+
auto_mode_trigger_on_table_in_page: bool,
|
|
897
|
+
auto_mode_trigger_on_text_in_page: str,
|
|
898
|
+
auto_mode_trigger_on_regexp_in_page: str,
|
|
899
|
+
azure_openai_api_version: str,
|
|
900
|
+
azure_openai_deployment_name: str,
|
|
901
|
+
azure_openai_endpoint: str,
|
|
902
|
+
azure_openai_key: str,
|
|
903
|
+
bbox_bottom: float,
|
|
904
|
+
bbox_left: float,
|
|
905
|
+
bbox_right: float,
|
|
906
|
+
bbox_top: float,
|
|
774
907
|
continuous_mode: bool,
|
|
775
|
-
|
|
908
|
+
disable_ocr: bool,
|
|
909
|
+
disable_reconstruction: bool,
|
|
910
|
+
disable_image_extraction: bool,
|
|
911
|
+
do_not_cache: bool,
|
|
776
912
|
do_not_unroll_columns: bool,
|
|
913
|
+
extract_charts: bool,
|
|
914
|
+
fast_mode: bool,
|
|
777
915
|
guess_xlsx_sheet_name: bool,
|
|
916
|
+
html_make_all_elements_visible: bool,
|
|
917
|
+
html_remove_fixed_elements: bool,
|
|
918
|
+
html_remove_navigation_elements: bool,
|
|
919
|
+
http_proxy: str,
|
|
920
|
+
input_s_3_path: str,
|
|
921
|
+
input_url: str,
|
|
922
|
+
invalidate_cache: bool,
|
|
923
|
+
is_formatting_instruction: bool,
|
|
924
|
+
language: typing.List[ParserLanguages],
|
|
925
|
+
extract_layout: bool,
|
|
926
|
+
max_pages: typing.Optional[int] = OMIT,
|
|
927
|
+
output_pdf_of_document: bool,
|
|
928
|
+
output_s_3_path_prefix: str,
|
|
929
|
+
page_prefix: str,
|
|
778
930
|
page_separator: str,
|
|
779
|
-
|
|
931
|
+
page_suffix: str,
|
|
932
|
+
parsing_instruction: str,
|
|
933
|
+
premium_mode: bool,
|
|
934
|
+
skip_diagonal_text: bool,
|
|
935
|
+
structured_output: bool,
|
|
936
|
+
structured_output_json_schema: str,
|
|
937
|
+
structured_output_json_schema_name: str,
|
|
938
|
+
take_screenshot: bool,
|
|
780
939
|
target_pages: str,
|
|
781
940
|
use_vendor_multimodal_model: bool,
|
|
782
|
-
vendor_multimodal_model_name: str,
|
|
783
941
|
vendor_multimodal_api_key: str,
|
|
784
|
-
|
|
785
|
-
page_suffix: str,
|
|
942
|
+
vendor_multimodal_model_name: str,
|
|
786
943
|
webhook_url: str,
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
annotate_links: bool,
|
|
791
|
-
disable_reconstruction: bool,
|
|
792
|
-
disable_image_extraction: bool,
|
|
793
|
-
input_s_3_path: str,
|
|
794
|
-
output_s_3_path_prefix: str,
|
|
795
|
-
azure_openai_deployment_name: str,
|
|
796
|
-
azure_openai_endpoint: str,
|
|
797
|
-
azure_openai_api_version: str,
|
|
798
|
-
azure_openai_key: str,
|
|
799
|
-
auto_mode: bool,
|
|
800
|
-
auto_mode_trigger_on_regexp_in_page: str,
|
|
801
|
-
auto_mode_trigger_on_text_in_page: str,
|
|
802
|
-
auto_mode_trigger_on_table_in_page: bool,
|
|
803
|
-
auto_mode_trigger_on_image_in_page: bool,
|
|
804
|
-
file: typing.Optional[str] = OMIT,
|
|
805
|
-
input_url: str,
|
|
806
|
-
http_proxy: str,
|
|
944
|
+
bounding_box: str,
|
|
945
|
+
gpt_4_o_mode: bool,
|
|
946
|
+
gpt_4_o_api_key: str,
|
|
807
947
|
) -> ParsingJob:
|
|
808
948
|
"""
|
|
809
949
|
Upload a file to s3 and create a job. return a job id
|
|
@@ -813,132 +953,175 @@ class AsyncParsingClient:
|
|
|
813
953
|
|
|
814
954
|
- organization_id: typing.Optional[str].
|
|
815
955
|
|
|
816
|
-
-
|
|
956
|
+
- file: typing.Optional[str].
|
|
817
957
|
|
|
818
|
-
-
|
|
958
|
+
- annotate_links: bool.
|
|
819
959
|
|
|
820
|
-
-
|
|
960
|
+
- auto_mode: bool.
|
|
821
961
|
|
|
822
|
-
-
|
|
962
|
+
- auto_mode_trigger_on_image_in_page: bool.
|
|
823
963
|
|
|
824
|
-
-
|
|
964
|
+
- auto_mode_trigger_on_table_in_page: bool.
|
|
825
965
|
|
|
826
|
-
-
|
|
966
|
+
- auto_mode_trigger_on_text_in_page: str.
|
|
827
967
|
|
|
828
|
-
-
|
|
968
|
+
- auto_mode_trigger_on_regexp_in_page: str.
|
|
829
969
|
|
|
830
|
-
-
|
|
970
|
+
- azure_openai_api_version: str.
|
|
971
|
+
|
|
972
|
+
- azure_openai_deployment_name: str.
|
|
973
|
+
|
|
974
|
+
- azure_openai_endpoint: str.
|
|
975
|
+
|
|
976
|
+
- azure_openai_key: str.
|
|
977
|
+
|
|
978
|
+
- bbox_bottom: float.
|
|
979
|
+
|
|
980
|
+
- bbox_left: float.
|
|
981
|
+
|
|
982
|
+
- bbox_right: float.
|
|
983
|
+
|
|
984
|
+
- bbox_top: float.
|
|
831
985
|
|
|
832
986
|
- continuous_mode: bool.
|
|
833
987
|
|
|
834
|
-
-
|
|
988
|
+
- disable_ocr: bool.
|
|
989
|
+
|
|
990
|
+
- disable_reconstruction: bool.
|
|
991
|
+
|
|
992
|
+
- disable_image_extraction: bool.
|
|
993
|
+
|
|
994
|
+
- do_not_cache: bool.
|
|
835
995
|
|
|
836
996
|
- do_not_unroll_columns: bool.
|
|
837
997
|
|
|
998
|
+
- extract_charts: bool.
|
|
999
|
+
|
|
1000
|
+
- fast_mode: bool.
|
|
1001
|
+
|
|
838
1002
|
- guess_xlsx_sheet_name: bool.
|
|
839
1003
|
|
|
840
|
-
-
|
|
1004
|
+
- html_make_all_elements_visible: bool.
|
|
841
1005
|
|
|
842
|
-
-
|
|
1006
|
+
- html_remove_fixed_elements: bool.
|
|
843
1007
|
|
|
844
|
-
-
|
|
1008
|
+
- html_remove_navigation_elements: bool.
|
|
845
1009
|
|
|
846
|
-
-
|
|
1010
|
+
- http_proxy: str.
|
|
847
1011
|
|
|
848
|
-
-
|
|
1012
|
+
- input_s_3_path: str.
|
|
849
1013
|
|
|
850
|
-
-
|
|
1014
|
+
- input_url: str.
|
|
851
1015
|
|
|
852
|
-
-
|
|
1016
|
+
- invalidate_cache: bool.
|
|
853
1017
|
|
|
854
|
-
-
|
|
1018
|
+
- is_formatting_instruction: bool.
|
|
855
1019
|
|
|
856
|
-
-
|
|
1020
|
+
- language: typing.List[ParserLanguages].
|
|
857
1021
|
|
|
858
|
-
-
|
|
1022
|
+
- extract_layout: bool.
|
|
859
1023
|
|
|
860
|
-
-
|
|
1024
|
+
- max_pages: typing.Optional[int].
|
|
861
1025
|
|
|
862
|
-
-
|
|
1026
|
+
- output_pdf_of_document: bool.
|
|
863
1027
|
|
|
864
|
-
-
|
|
1028
|
+
- output_s_3_path_prefix: str.
|
|
865
1029
|
|
|
866
|
-
-
|
|
1030
|
+
- page_prefix: str.
|
|
867
1031
|
|
|
868
|
-
-
|
|
1032
|
+
- page_separator: str.
|
|
869
1033
|
|
|
870
|
-
-
|
|
1034
|
+
- page_suffix: str.
|
|
871
1035
|
|
|
872
|
-
-
|
|
1036
|
+
- parsing_instruction: str.
|
|
873
1037
|
|
|
874
|
-
-
|
|
1038
|
+
- premium_mode: bool.
|
|
875
1039
|
|
|
876
|
-
-
|
|
1040
|
+
- skip_diagonal_text: bool.
|
|
877
1041
|
|
|
878
|
-
-
|
|
1042
|
+
- structured_output: bool.
|
|
879
1043
|
|
|
880
|
-
-
|
|
1044
|
+
- structured_output_json_schema: str.
|
|
881
1045
|
|
|
882
|
-
-
|
|
1046
|
+
- structured_output_json_schema_name: str.
|
|
883
1047
|
|
|
884
|
-
-
|
|
1048
|
+
- take_screenshot: bool.
|
|
885
1049
|
|
|
886
|
-
-
|
|
1050
|
+
- target_pages: str.
|
|
887
1051
|
|
|
888
|
-
-
|
|
1052
|
+
- use_vendor_multimodal_model: bool.
|
|
889
1053
|
|
|
890
|
-
-
|
|
1054
|
+
- vendor_multimodal_api_key: str.
|
|
891
1055
|
|
|
892
|
-
-
|
|
1056
|
+
- vendor_multimodal_model_name: str.
|
|
893
1057
|
|
|
894
|
-
-
|
|
1058
|
+
- webhook_url: str.
|
|
895
1059
|
|
|
896
|
-
-
|
|
1060
|
+
- bounding_box: str.
|
|
1061
|
+
|
|
1062
|
+
- gpt_4_o_mode: bool.
|
|
1063
|
+
|
|
1064
|
+
- gpt_4_o_api_key: str.
|
|
897
1065
|
"""
|
|
898
1066
|
_request: typing.Dict[str, typing.Any] = {
|
|
899
|
-
"
|
|
900
|
-
"
|
|
901
|
-
"
|
|
902
|
-
"
|
|
903
|
-
"
|
|
904
|
-
"
|
|
905
|
-
"
|
|
906
|
-
"
|
|
1067
|
+
"annotate_links": annotate_links,
|
|
1068
|
+
"auto_mode": auto_mode,
|
|
1069
|
+
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
1070
|
+
"auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
|
|
1071
|
+
"auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
|
|
1072
|
+
"auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
|
|
1073
|
+
"azure_openai_api_version": azure_openai_api_version,
|
|
1074
|
+
"azure_openai_deployment_name": azure_openai_deployment_name,
|
|
1075
|
+
"azure_openai_endpoint": azure_openai_endpoint,
|
|
1076
|
+
"azure_openai_key": azure_openai_key,
|
|
1077
|
+
"bbox_bottom": bbox_bottom,
|
|
1078
|
+
"bbox_left": bbox_left,
|
|
1079
|
+
"bbox_right": bbox_right,
|
|
1080
|
+
"bbox_top": bbox_top,
|
|
907
1081
|
"continuous_mode": continuous_mode,
|
|
908
|
-
"
|
|
1082
|
+
"disable_ocr": disable_ocr,
|
|
1083
|
+
"disable_reconstruction": disable_reconstruction,
|
|
1084
|
+
"disable_image_extraction": disable_image_extraction,
|
|
1085
|
+
"do_not_cache": do_not_cache,
|
|
909
1086
|
"do_not_unroll_columns": do_not_unroll_columns,
|
|
1087
|
+
"extract_charts": extract_charts,
|
|
1088
|
+
"fast_mode": fast_mode,
|
|
910
1089
|
"guess_xlsx_sheet_name": guess_xlsx_sheet_name,
|
|
1090
|
+
"html_make_all_elements_visible": html_make_all_elements_visible,
|
|
1091
|
+
"html_remove_fixed_elements": html_remove_fixed_elements,
|
|
1092
|
+
"html_remove_navigation_elements": html_remove_navigation_elements,
|
|
1093
|
+
"http_proxy": http_proxy,
|
|
1094
|
+
"input_s3_path": input_s_3_path,
|
|
1095
|
+
"input_url": input_url,
|
|
1096
|
+
"invalidate_cache": invalidate_cache,
|
|
1097
|
+
"is_formatting_instruction": is_formatting_instruction,
|
|
1098
|
+
"language": language,
|
|
1099
|
+
"extract_layout": extract_layout,
|
|
1100
|
+
"output_pdf_of_document": output_pdf_of_document,
|
|
1101
|
+
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
1102
|
+
"page_prefix": page_prefix,
|
|
911
1103
|
"page_separator": page_separator,
|
|
912
|
-
"
|
|
1104
|
+
"page_suffix": page_suffix,
|
|
1105
|
+
"parsing_instruction": parsing_instruction,
|
|
1106
|
+
"premium_mode": premium_mode,
|
|
1107
|
+
"skip_diagonal_text": skip_diagonal_text,
|
|
1108
|
+
"structured_output": structured_output,
|
|
1109
|
+
"structured_output_json_schema": structured_output_json_schema,
|
|
1110
|
+
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
1111
|
+
"take_screenshot": take_screenshot,
|
|
913
1112
|
"target_pages": target_pages,
|
|
914
1113
|
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
915
|
-
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
916
1114
|
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
917
|
-
"
|
|
918
|
-
"page_suffix": page_suffix,
|
|
1115
|
+
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
919
1116
|
"webhook_url": webhook_url,
|
|
920
|
-
"
|
|
921
|
-
"
|
|
922
|
-
"
|
|
923
|
-
"annotate_links": annotate_links,
|
|
924
|
-
"disable_reconstruction": disable_reconstruction,
|
|
925
|
-
"disable_image_extraction": disable_image_extraction,
|
|
926
|
-
"input_s3_path": input_s_3_path,
|
|
927
|
-
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
928
|
-
"azure_openai_deployment_name": azure_openai_deployment_name,
|
|
929
|
-
"azure_openai_endpoint": azure_openai_endpoint,
|
|
930
|
-
"azure_openai_api_version": azure_openai_api_version,
|
|
931
|
-
"azure_openai_key": azure_openai_key,
|
|
932
|
-
"auto_mode": auto_mode,
|
|
933
|
-
"auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
|
|
934
|
-
"auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
|
|
935
|
-
"auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
|
|
936
|
-
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
937
|
-
"input_url": input_url,
|
|
938
|
-
"http_proxy": http_proxy,
|
|
1117
|
+
"bounding_box": bounding_box,
|
|
1118
|
+
"gpt4o_mode": gpt_4_o_mode,
|
|
1119
|
+
"gpt4o_api_key": gpt_4_o_api_key,
|
|
939
1120
|
}
|
|
940
1121
|
if file is not OMIT:
|
|
941
1122
|
_request["file"] = file
|
|
1123
|
+
if max_pages is not OMIT:
|
|
1124
|
+
_request["max_pages"] = max_pages
|
|
942
1125
|
_response = await self._client_wrapper.httpx_client.request(
|
|
943
1126
|
"POST",
|
|
944
1127
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
|
|
@@ -1101,7 +1284,75 @@ class AsyncParsingClient:
|
|
|
1101
1284
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1102
1285
|
"GET",
|
|
1103
1286
|
urllib.parse.urljoin(
|
|
1104
|
-
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/
|
|
1287
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/pdf"
|
|
1288
|
+
),
|
|
1289
|
+
headers=self._client_wrapper.get_headers(),
|
|
1290
|
+
timeout=60,
|
|
1291
|
+
)
|
|
1292
|
+
if 200 <= _response.status_code < 300:
|
|
1293
|
+
return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
|
|
1294
|
+
if _response.status_code == 422:
|
|
1295
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1296
|
+
try:
|
|
1297
|
+
_response_json = _response.json()
|
|
1298
|
+
except JSONDecodeError:
|
|
1299
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1300
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1301
|
+
|
|
1302
|
+
async def get_job_structured_result(self, job_id: str) -> ParsingJobStructuredResult:
|
|
1303
|
+
"""
|
|
1304
|
+
Get a job by id
|
|
1305
|
+
|
|
1306
|
+
Parameters:
|
|
1307
|
+
- job_id: str.
|
|
1308
|
+
---
|
|
1309
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
1310
|
+
|
|
1311
|
+
client = AsyncLlamaCloud(
|
|
1312
|
+
token="YOUR_TOKEN",
|
|
1313
|
+
)
|
|
1314
|
+
await client.parsing.get_job_structured_result(
|
|
1315
|
+
job_id="string",
|
|
1316
|
+
)
|
|
1317
|
+
"""
|
|
1318
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1319
|
+
"GET",
|
|
1320
|
+
urllib.parse.urljoin(
|
|
1321
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/structured"
|
|
1322
|
+
),
|
|
1323
|
+
headers=self._client_wrapper.get_headers(),
|
|
1324
|
+
timeout=60,
|
|
1325
|
+
)
|
|
1326
|
+
if 200 <= _response.status_code < 300:
|
|
1327
|
+
return pydantic.parse_obj_as(ParsingJobStructuredResult, _response.json()) # type: ignore
|
|
1328
|
+
if _response.status_code == 422:
|
|
1329
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1330
|
+
try:
|
|
1331
|
+
_response_json = _response.json()
|
|
1332
|
+
except JSONDecodeError:
|
|
1333
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1334
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1335
|
+
|
|
1336
|
+
async def get_job_raw_structured_result(self, job_id: str) -> typing.Any:
|
|
1337
|
+
"""
|
|
1338
|
+
Get a job by id
|
|
1339
|
+
|
|
1340
|
+
Parameters:
|
|
1341
|
+
- job_id: str.
|
|
1342
|
+
---
|
|
1343
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
1344
|
+
|
|
1345
|
+
client = AsyncLlamaCloud(
|
|
1346
|
+
token="YOUR_TOKEN",
|
|
1347
|
+
)
|
|
1348
|
+
await client.parsing.get_job_raw_structured_result(
|
|
1349
|
+
job_id="string",
|
|
1350
|
+
)
|
|
1351
|
+
"""
|
|
1352
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1353
|
+
"GET",
|
|
1354
|
+
urllib.parse.urljoin(
|
|
1355
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/structured"
|
|
1105
1356
|
),
|
|
1106
1357
|
headers=self._client_wrapper.get_headers(),
|
|
1107
1358
|
timeout=60,
|