llama-cloud 0.1.6__py3-none-any.whl → 0.1.7a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +140 -6
- llama_cloud/client.py +15 -0
- llama_cloud/environment.py +1 -1
- llama_cloud/resources/__init__.py +15 -0
- llama_cloud/{types/token.py → resources/chat_apps/__init__.py} +0 -3
- llama_cloud/resources/chat_apps/client.py +620 -0
- llama_cloud/resources/data_sinks/client.py +12 -12
- llama_cloud/resources/data_sources/client.py +14 -14
- llama_cloud/resources/embedding_model_configs/client.py +20 -76
- llama_cloud/resources/evals/client.py +26 -36
- llama_cloud/resources/extraction/client.py +32 -32
- llama_cloud/resources/files/client.py +40 -44
- llama_cloud/resources/jobs/__init__.py +2 -0
- llama_cloud/resources/jobs/client.py +148 -0
- llama_cloud/resources/llama_extract/__init__.py +5 -0
- llama_cloud/resources/llama_extract/client.py +1038 -0
- llama_cloud/resources/llama_extract/types/__init__.py +6 -0
- llama_cloud/resources/llama_extract/types/extract_agent_create_data_schema_value.py +7 -0
- llama_cloud/resources/llama_extract/types/extract_agent_update_data_schema_value.py +7 -0
- llama_cloud/resources/organizations/client.py +66 -70
- llama_cloud/resources/parsing/client.py +448 -428
- llama_cloud/resources/pipelines/client.py +256 -344
- llama_cloud/resources/projects/client.py +34 -60
- llama_cloud/resources/reports/__init__.py +5 -0
- llama_cloud/resources/reports/client.py +1198 -0
- llama_cloud/resources/reports/types/__init__.py +7 -0
- llama_cloud/resources/reports/types/update_report_plan_api_v_1_reports_report_id_plan_patch_request_action.py +25 -0
- llama_cloud/resources/retrievers/__init__.py +2 -0
- llama_cloud/resources/retrievers/client.py +654 -0
- llama_cloud/types/__init__.py +128 -6
- llama_cloud/types/{chat_message.py → app_schema_chat_chat_message.py} +3 -3
- llama_cloud/types/azure_open_ai_embedding.py +6 -12
- llama_cloud/types/base_prompt_template.py +2 -6
- llama_cloud/types/bedrock_embedding.py +6 -12
- llama_cloud/types/character_splitter.py +2 -4
- llama_cloud/types/chat_app.py +44 -0
- llama_cloud/types/chat_app_response.py +41 -0
- llama_cloud/types/cloud_az_storage_blob_data_source.py +7 -15
- llama_cloud/types/cloud_box_data_source.py +6 -12
- llama_cloud/types/cloud_confluence_data_source.py +6 -6
- llama_cloud/types/cloud_document.py +1 -3
- llama_cloud/types/cloud_document_create.py +1 -3
- llama_cloud/types/cloud_jira_data_source.py +4 -6
- llama_cloud/types/cloud_notion_page_data_source.py +2 -2
- llama_cloud/types/cloud_one_drive_data_source.py +3 -5
- llama_cloud/types/cloud_postgres_vector_store.py +1 -0
- llama_cloud/types/cloud_s_3_data_source.py +4 -8
- llama_cloud/types/cloud_sharepoint_data_source.py +6 -8
- llama_cloud/types/cloud_slack_data_source.py +6 -6
- llama_cloud/types/code_splitter.py +1 -1
- llama_cloud/types/cohere_embedding.py +3 -7
- llama_cloud/types/composite_retrieval_mode.py +21 -0
- llama_cloud/types/composite_retrieval_result.py +38 -0
- llama_cloud/types/composite_retrieved_text_node.py +42 -0
- llama_cloud/types/data_sink.py +4 -4
- llama_cloud/types/data_sink_component.py +20 -0
- llama_cloud/types/data_source.py +5 -7
- llama_cloud/types/data_source_component.py +28 -0
- llama_cloud/types/data_source_create.py +1 -3
- llama_cloud/types/edit_suggestion.py +39 -0
- llama_cloud/types/embedding_model_config.py +2 -2
- llama_cloud/types/embedding_model_config_update.py +2 -4
- llama_cloud/types/eval_dataset.py +2 -2
- llama_cloud/types/eval_dataset_job_record.py +8 -13
- llama_cloud/types/eval_execution_params_override.py +2 -6
- llama_cloud/types/eval_question.py +2 -2
- llama_cloud/types/extract_agent.py +45 -0
- llama_cloud/types/extract_agent_data_schema_value.py +5 -0
- llama_cloud/types/extract_config.py +40 -0
- llama_cloud/types/extract_job.py +35 -0
- llama_cloud/types/extract_job_create.py +40 -0
- llama_cloud/types/extract_job_create_data_schema_override_value.py +7 -0
- llama_cloud/types/extract_mode.py +17 -0
- llama_cloud/types/extract_resultset.py +46 -0
- llama_cloud/types/extract_resultset_data.py +11 -0
- llama_cloud/types/extract_resultset_data_item_value.py +7 -0
- llama_cloud/types/extract_resultset_data_zero_value.py +7 -0
- llama_cloud/types/extract_resultset_extraction_metadata_value.py +7 -0
- llama_cloud/types/extraction_result.py +2 -2
- llama_cloud/types/extraction_schema.py +3 -5
- llama_cloud/types/file.py +9 -14
- llama_cloud/types/filter_condition.py +9 -1
- llama_cloud/types/filter_operator.py +6 -2
- llama_cloud/types/gemini_embedding.py +6 -10
- llama_cloud/types/hugging_face_inference_api_embedding.py +11 -27
- llama_cloud/types/hugging_face_inference_api_embedding_token.py +5 -0
- llama_cloud/types/image_block.py +35 -0
- llama_cloud/types/input_message.py +2 -4
- llama_cloud/types/job_names.py +89 -0
- llama_cloud/types/job_record.py +57 -0
- llama_cloud/types/job_record_with_usage_metrics.py +36 -0
- llama_cloud/types/llama_index_core_base_llms_types_chat_message.py +39 -0
- llama_cloud/types/llama_index_core_base_llms_types_chat_message_blocks_item.py +33 -0
- llama_cloud/types/llama_parse_parameters.py +4 -0
- llama_cloud/types/llm.py +3 -4
- llama_cloud/types/llm_model_data.py +1 -0
- llama_cloud/types/llm_parameters.py +3 -5
- llama_cloud/types/local_eval.py +8 -10
- llama_cloud/types/local_eval_results.py +1 -1
- llama_cloud/types/managed_ingestion_status.py +4 -0
- llama_cloud/types/managed_ingestion_status_response.py +4 -5
- llama_cloud/types/markdown_element_node_parser.py +3 -5
- llama_cloud/types/markdown_node_parser.py +1 -1
- llama_cloud/types/metadata_filter.py +2 -2
- llama_cloud/types/metadata_filter_value.py +5 -0
- llama_cloud/types/metric_result.py +3 -3
- llama_cloud/types/node_parser.py +1 -1
- llama_cloud/types/object_type.py +4 -0
- llama_cloud/types/open_ai_embedding.py +6 -12
- llama_cloud/types/organization.py +7 -2
- llama_cloud/types/page_splitter_node_parser.py +2 -2
- llama_cloud/types/paginated_jobs_history_with_metrics.py +35 -0
- llama_cloud/types/paginated_report_response.py +35 -0
- llama_cloud/types/parse_plan_level.py +21 -0
- llama_cloud/types/permission.py +3 -3
- llama_cloud/types/pipeline.py +7 -17
- llama_cloud/types/pipeline_configuration_hashes.py +3 -3
- llama_cloud/types/pipeline_create.py +8 -16
- llama_cloud/types/pipeline_data_source.py +7 -13
- llama_cloud/types/pipeline_data_source_component.py +28 -0
- llama_cloud/types/pipeline_data_source_create.py +1 -3
- llama_cloud/types/pipeline_deployment.py +4 -4
- llama_cloud/types/pipeline_file.py +13 -24
- llama_cloud/types/pipeline_file_create.py +1 -3
- llama_cloud/types/playground_session.py +4 -4
- llama_cloud/types/preset_retrieval_params.py +8 -14
- llama_cloud/types/presigned_url.py +1 -3
- llama_cloud/types/progress_event.py +44 -0
- llama_cloud/types/progress_event_status.py +33 -0
- llama_cloud/types/project.py +2 -2
- llama_cloud/types/prompt_mixin_prompts.py +1 -1
- llama_cloud/types/prompt_spec.py +3 -5
- llama_cloud/types/related_node_info.py +2 -2
- llama_cloud/types/related_node_info_node_type.py +7 -0
- llama_cloud/types/report.py +33 -0
- llama_cloud/types/report_block.py +34 -0
- llama_cloud/types/report_block_dependency.py +29 -0
- llama_cloud/types/report_create_response.py +31 -0
- llama_cloud/types/report_event_item.py +40 -0
- llama_cloud/types/report_event_item_event_data.py +45 -0
- llama_cloud/types/report_event_type.py +37 -0
- llama_cloud/types/report_metadata.py +39 -0
- llama_cloud/types/report_plan.py +36 -0
- llama_cloud/types/report_plan_block.py +36 -0
- llama_cloud/types/report_query.py +33 -0
- llama_cloud/types/report_response.py +41 -0
- llama_cloud/types/report_state.py +37 -0
- llama_cloud/types/report_state_event.py +38 -0
- llama_cloud/types/report_update_event.py +38 -0
- llama_cloud/types/retrieve_results.py +1 -1
- llama_cloud/types/retriever.py +45 -0
- llama_cloud/types/retriever_create.py +37 -0
- llama_cloud/types/retriever_pipeline.py +37 -0
- llama_cloud/types/role.py +3 -3
- llama_cloud/types/sentence_splitter.py +2 -4
- llama_cloud/types/status_enum.py +4 -0
- llama_cloud/types/supported_llm_model_names.py +4 -0
- llama_cloud/types/text_block.py +31 -0
- llama_cloud/types/text_node.py +15 -8
- llama_cloud/types/token_text_splitter.py +1 -1
- llama_cloud/types/usage_metric_response.py +34 -0
- llama_cloud/types/user_job_record.py +32 -0
- llama_cloud/types/user_organization.py +5 -9
- llama_cloud/types/user_organization_create.py +4 -4
- llama_cloud/types/user_organization_delete.py +2 -2
- llama_cloud/types/user_organization_role.py +2 -2
- llama_cloud/types/vertex_text_embedding.py +5 -9
- {llama_cloud-0.1.6.dist-info → llama_cloud-0.1.7a1.dist-info}/METADATA +2 -1
- llama_cloud-0.1.7a1.dist-info/RECORD +310 -0
- llama_cloud/types/value.py +0 -5
- llama_cloud-0.1.6.dist-info/RECORD +0 -241
- {llama_cloud-0.1.6.dist-info → llama_cloud-0.1.7a1.dist-info}/LICENSE +0 -0
- {llama_cloud-0.1.6.dist-info → llama_cloud-0.1.7a1.dist-info}/WHEEL +0 -0
|
@@ -37,7 +37,7 @@ class ParsingClient:
|
|
|
37
37
|
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
|
38
38
|
self._client_wrapper = client_wrapper
|
|
39
39
|
|
|
40
|
-
def get_job_image_result(self, job_id: str, name: str) ->
|
|
40
|
+
def get_job_image_result(self, job_id: str, name: str) -> None:
|
|
41
41
|
"""
|
|
42
42
|
Get a job by id
|
|
43
43
|
|
|
@@ -45,29 +45,34 @@ class ParsingClient:
|
|
|
45
45
|
- job_id: str.
|
|
46
46
|
|
|
47
47
|
- name: str.
|
|
48
|
+
---
|
|
49
|
+
from llama_cloud.client import LlamaCloud
|
|
50
|
+
|
|
51
|
+
client = LlamaCloud(
|
|
52
|
+
token="YOUR_TOKEN",
|
|
53
|
+
)
|
|
54
|
+
client.parsing.get_job_image_result(
|
|
55
|
+
job_id="string",
|
|
56
|
+
name="string",
|
|
57
|
+
)
|
|
48
58
|
"""
|
|
49
|
-
|
|
59
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
50
60
|
"GET",
|
|
51
61
|
urllib.parse.urljoin(
|
|
52
62
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/image/{name}"
|
|
53
63
|
),
|
|
54
64
|
headers=self._client_wrapper.get_headers(),
|
|
55
65
|
timeout=60,
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
try:
|
|
67
|
-
_response_json = _response.json()
|
|
68
|
-
except JSONDecodeError:
|
|
69
|
-
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
70
|
-
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
66
|
+
)
|
|
67
|
+
if 200 <= _response.status_code < 300:
|
|
68
|
+
return
|
|
69
|
+
if _response.status_code == 422:
|
|
70
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
71
|
+
try:
|
|
72
|
+
_response_json = _response.json()
|
|
73
|
+
except JSONDecodeError:
|
|
74
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
75
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
71
76
|
|
|
72
77
|
def get_supported_file_extensions(self) -> typing.List[LlamaParseSupportedFileExtensions]:
|
|
73
78
|
"""
|
|
@@ -100,58 +105,61 @@ class ParsingClient:
|
|
|
100
105
|
*,
|
|
101
106
|
project_id: typing.Optional[str] = None,
|
|
102
107
|
organization_id: typing.Optional[str] = None,
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
108
|
+
file: typing.Optional[str] = OMIT,
|
|
109
|
+
annotate_links: bool,
|
|
110
|
+
auto_mode: bool,
|
|
111
|
+
auto_mode_trigger_on_image_in_page: bool,
|
|
112
|
+
auto_mode_trigger_on_table_in_page: bool,
|
|
113
|
+
auto_mode_trigger_on_text_in_page: str,
|
|
114
|
+
auto_mode_trigger_on_regexp_in_page: str,
|
|
115
|
+
azure_openai_api_version: str,
|
|
116
|
+
azure_openai_deployment_name: str,
|
|
117
|
+
azure_openai_endpoint: str,
|
|
118
|
+
azure_openai_key: str,
|
|
119
|
+
bbox_bottom: float,
|
|
120
|
+
bbox_left: float,
|
|
121
|
+
bbox_right: float,
|
|
122
|
+
bbox_top: float,
|
|
123
|
+
continuous_mode: bool,
|
|
124
|
+
disable_ocr: bool,
|
|
125
|
+
disable_reconstruction: bool,
|
|
126
|
+
disable_image_extraction: bool,
|
|
127
|
+
do_not_cache: bool,
|
|
128
|
+
do_not_unroll_columns: bool,
|
|
129
|
+
extract_charts: bool,
|
|
130
|
+
fast_mode: bool,
|
|
131
|
+
guess_xlsx_sheet_name: bool,
|
|
132
|
+
html_make_all_elements_visible: bool,
|
|
133
|
+
html_remove_fixed_elements: bool,
|
|
134
|
+
html_remove_navigation_elements: bool,
|
|
135
|
+
http_proxy: str,
|
|
136
|
+
input_s_3_path: str,
|
|
137
|
+
input_url: str,
|
|
138
|
+
invalidate_cache: bool,
|
|
139
|
+
is_formatting_instruction: bool,
|
|
140
|
+
language: typing.List[ParserLanguages],
|
|
141
|
+
extract_layout: bool,
|
|
142
|
+
max_pages: typing.Optional[int] = OMIT,
|
|
143
|
+
output_pdf_of_document: bool,
|
|
144
|
+
output_s_3_path_prefix: str,
|
|
145
|
+
page_prefix: str,
|
|
146
|
+
page_separator: str,
|
|
147
|
+
page_suffix: str,
|
|
148
|
+
parsing_instruction: str,
|
|
149
|
+
premium_mode: bool,
|
|
150
|
+
skip_diagonal_text: bool,
|
|
151
|
+
structured_output: bool,
|
|
152
|
+
structured_output_json_schema: str,
|
|
153
|
+
structured_output_json_schema_name: str,
|
|
154
|
+
take_screenshot: bool,
|
|
155
|
+
target_pages: str,
|
|
156
|
+
use_vendor_multimodal_model: bool,
|
|
157
|
+
vendor_multimodal_api_key: str,
|
|
158
|
+
vendor_multimodal_model_name: str,
|
|
159
|
+
webhook_url: str,
|
|
160
|
+
bounding_box: str,
|
|
161
|
+
gpt_4_o_mode: bool,
|
|
162
|
+
gpt_4_o_api_key: str,
|
|
155
163
|
) -> ParsingJob:
|
|
156
164
|
"""
|
|
157
165
|
Upload a file to s3 and create a job. return a job id
|
|
@@ -161,178 +169,180 @@ class ParsingClient:
|
|
|
161
169
|
|
|
162
170
|
- organization_id: typing.Optional[str].
|
|
163
171
|
|
|
164
|
-
-
|
|
165
|
-
|
|
166
|
-
- parsing_instruction: typing.Optional[str].
|
|
167
|
-
|
|
168
|
-
- skip_diagonal_text: typing.Optional[bool].
|
|
169
|
-
|
|
170
|
-
- invalidate_cache: typing.Optional[bool].
|
|
171
|
-
|
|
172
|
-
- output_pdf_of_document: typing.Optional[bool].
|
|
173
|
-
|
|
174
|
-
- do_not_cache: typing.Optional[bool].
|
|
175
|
-
|
|
176
|
-
- gpt_4_o_mode: typing.Optional[bool].
|
|
177
|
-
|
|
178
|
-
- fast_mode: typing.Optional[bool].
|
|
179
|
-
|
|
180
|
-
- premium_mode: typing.Optional[bool].
|
|
181
|
-
|
|
182
|
-
- continuous_mode: typing.Optional[bool].
|
|
183
|
-
|
|
184
|
-
- gpt_4_o_api_key: typing.Optional[str].
|
|
185
|
-
|
|
186
|
-
- do_not_unroll_columns: typing.Optional[bool].
|
|
187
|
-
|
|
188
|
-
- html_make_all_elements_visible: typing.Optional[bool].
|
|
189
|
-
|
|
190
|
-
- html_remove_fixed_elements: typing.Optional[bool].
|
|
191
|
-
|
|
192
|
-
- guess_xlsx_sheet_name: typing.Optional[bool].
|
|
193
|
-
|
|
194
|
-
- page_separator: typing.Optional[str].
|
|
195
|
-
|
|
196
|
-
- bounding_box: typing.Optional[str].
|
|
197
|
-
|
|
198
|
-
- bbox_top: typing.Optional[float].
|
|
172
|
+
- file: typing.Optional[str].
|
|
199
173
|
|
|
200
|
-
-
|
|
174
|
+
- annotate_links: bool.
|
|
201
175
|
|
|
202
|
-
-
|
|
176
|
+
- auto_mode: bool.
|
|
203
177
|
|
|
204
|
-
-
|
|
178
|
+
- auto_mode_trigger_on_image_in_page: bool.
|
|
205
179
|
|
|
206
|
-
-
|
|
180
|
+
- auto_mode_trigger_on_table_in_page: bool.
|
|
207
181
|
|
|
208
|
-
-
|
|
182
|
+
- auto_mode_trigger_on_text_in_page: str.
|
|
209
183
|
|
|
210
|
-
-
|
|
184
|
+
- auto_mode_trigger_on_regexp_in_page: str.
|
|
211
185
|
|
|
212
|
-
-
|
|
186
|
+
- azure_openai_api_version: str.
|
|
213
187
|
|
|
214
|
-
-
|
|
188
|
+
- azure_openai_deployment_name: str.
|
|
215
189
|
|
|
216
|
-
-
|
|
190
|
+
- azure_openai_endpoint: str.
|
|
217
191
|
|
|
218
|
-
-
|
|
192
|
+
- azure_openai_key: str.
|
|
219
193
|
|
|
220
|
-
-
|
|
194
|
+
- bbox_bottom: float.
|
|
221
195
|
|
|
222
|
-
-
|
|
196
|
+
- bbox_left: float.
|
|
223
197
|
|
|
224
|
-
-
|
|
198
|
+
- bbox_right: float.
|
|
225
199
|
|
|
226
|
-
-
|
|
200
|
+
- bbox_top: float.
|
|
227
201
|
|
|
228
|
-
-
|
|
202
|
+
- continuous_mode: bool.
|
|
229
203
|
|
|
230
|
-
-
|
|
204
|
+
- disable_ocr: bool.
|
|
231
205
|
|
|
232
|
-
-
|
|
206
|
+
- disable_reconstruction: bool.
|
|
233
207
|
|
|
234
|
-
-
|
|
208
|
+
- disable_image_extraction: bool.
|
|
235
209
|
|
|
236
|
-
-
|
|
210
|
+
- do_not_cache: bool.
|
|
237
211
|
|
|
238
|
-
-
|
|
212
|
+
- do_not_unroll_columns: bool.
|
|
239
213
|
|
|
240
|
-
-
|
|
214
|
+
- extract_charts: bool.
|
|
241
215
|
|
|
242
|
-
-
|
|
216
|
+
- fast_mode: bool.
|
|
243
217
|
|
|
244
|
-
-
|
|
218
|
+
- guess_xlsx_sheet_name: bool.
|
|
245
219
|
|
|
246
|
-
-
|
|
220
|
+
- html_make_all_elements_visible: bool.
|
|
247
221
|
|
|
248
|
-
-
|
|
222
|
+
- html_remove_fixed_elements: bool.
|
|
249
223
|
|
|
250
|
-
-
|
|
224
|
+
- html_remove_navigation_elements: bool.
|
|
251
225
|
|
|
252
|
-
-
|
|
226
|
+
- http_proxy: str.
|
|
253
227
|
|
|
254
|
-
-
|
|
228
|
+
- input_s_3_path: str.
|
|
255
229
|
|
|
256
|
-
- input_url:
|
|
230
|
+
- input_url: str.
|
|
257
231
|
|
|
258
|
-
-
|
|
232
|
+
- invalidate_cache: bool.
|
|
259
233
|
|
|
260
|
-
-
|
|
234
|
+
- is_formatting_instruction: bool.
|
|
261
235
|
|
|
262
|
-
-
|
|
236
|
+
- language: typing.List[ParserLanguages].
|
|
263
237
|
|
|
264
|
-
-
|
|
238
|
+
- extract_layout: bool.
|
|
265
239
|
|
|
266
240
|
- max_pages: typing.Optional[int].
|
|
267
|
-
---
|
|
268
|
-
from llama_cloud.client import LlamaCloud
|
|
269
241
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
242
|
+
- output_pdf_of_document: bool.
|
|
243
|
+
|
|
244
|
+
- output_s_3_path_prefix: str.
|
|
245
|
+
|
|
246
|
+
- page_prefix: str.
|
|
247
|
+
|
|
248
|
+
- page_separator: str.
|
|
249
|
+
|
|
250
|
+
- page_suffix: str.
|
|
251
|
+
|
|
252
|
+
- parsing_instruction: str.
|
|
253
|
+
|
|
254
|
+
- premium_mode: bool.
|
|
255
|
+
|
|
256
|
+
- skip_diagonal_text: bool.
|
|
257
|
+
|
|
258
|
+
- structured_output: bool.
|
|
259
|
+
|
|
260
|
+
- structured_output_json_schema: str.
|
|
261
|
+
|
|
262
|
+
- structured_output_json_schema_name: str.
|
|
263
|
+
|
|
264
|
+
- take_screenshot: bool.
|
|
265
|
+
|
|
266
|
+
- target_pages: str.
|
|
267
|
+
|
|
268
|
+
- use_vendor_multimodal_model: bool.
|
|
269
|
+
|
|
270
|
+
- vendor_multimodal_api_key: str.
|
|
271
|
+
|
|
272
|
+
- vendor_multimodal_model_name: str.
|
|
273
|
+
|
|
274
|
+
- webhook_url: str.
|
|
275
|
+
|
|
276
|
+
- bounding_box: str.
|
|
277
|
+
|
|
278
|
+
- gpt_4_o_mode: bool.
|
|
279
|
+
|
|
280
|
+
- gpt_4_o_api_key: str.
|
|
281
|
+
"""
|
|
282
|
+
_request: typing.Dict[str, typing.Any] = {
|
|
283
|
+
"annotate_links": annotate_links,
|
|
284
|
+
"auto_mode": auto_mode,
|
|
285
|
+
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
286
|
+
"auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
|
|
287
|
+
"auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
|
|
288
|
+
"auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
|
|
289
|
+
"azure_openai_api_version": azure_openai_api_version,
|
|
290
|
+
"azure_openai_deployment_name": azure_openai_deployment_name,
|
|
291
|
+
"azure_openai_endpoint": azure_openai_endpoint,
|
|
292
|
+
"azure_openai_key": azure_openai_key,
|
|
293
|
+
"bbox_bottom": bbox_bottom,
|
|
294
|
+
"bbox_left": bbox_left,
|
|
295
|
+
"bbox_right": bbox_right,
|
|
296
|
+
"bbox_top": bbox_top,
|
|
297
|
+
"continuous_mode": continuous_mode,
|
|
298
|
+
"disable_ocr": disable_ocr,
|
|
299
|
+
"disable_reconstruction": disable_reconstruction,
|
|
300
|
+
"disable_image_extraction": disable_image_extraction,
|
|
301
|
+
"do_not_cache": do_not_cache,
|
|
302
|
+
"do_not_unroll_columns": do_not_unroll_columns,
|
|
303
|
+
"extract_charts": extract_charts,
|
|
304
|
+
"fast_mode": fast_mode,
|
|
305
|
+
"guess_xlsx_sheet_name": guess_xlsx_sheet_name,
|
|
306
|
+
"html_make_all_elements_visible": html_make_all_elements_visible,
|
|
307
|
+
"html_remove_fixed_elements": html_remove_fixed_elements,
|
|
308
|
+
"html_remove_navigation_elements": html_remove_navigation_elements,
|
|
309
|
+
"http_proxy": http_proxy,
|
|
310
|
+
"input_s3_path": input_s_3_path,
|
|
311
|
+
"input_url": input_url,
|
|
312
|
+
"invalidate_cache": invalidate_cache,
|
|
313
|
+
"is_formatting_instruction": is_formatting_instruction,
|
|
314
|
+
"language": language,
|
|
315
|
+
"extract_layout": extract_layout,
|
|
316
|
+
"output_pdf_of_document": output_pdf_of_document,
|
|
317
|
+
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
318
|
+
"page_prefix": page_prefix,
|
|
319
|
+
"page_separator": page_separator,
|
|
320
|
+
"page_suffix": page_suffix,
|
|
321
|
+
"parsing_instruction": parsing_instruction,
|
|
322
|
+
"premium_mode": premium_mode,
|
|
323
|
+
"skip_diagonal_text": skip_diagonal_text,
|
|
324
|
+
"structured_output": structured_output,
|
|
325
|
+
"structured_output_json_schema": structured_output_json_schema,
|
|
326
|
+
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
327
|
+
"take_screenshot": take_screenshot,
|
|
328
|
+
"target_pages": target_pages,
|
|
329
|
+
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
330
|
+
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
331
|
+
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
332
|
+
"webhook_url": webhook_url,
|
|
333
|
+
"bounding_box": bounding_box,
|
|
334
|
+
"gpt4o_mode": gpt_4_o_mode,
|
|
335
|
+
"gpt4o_api_key": gpt_4_o_api_key,
|
|
336
|
+
}
|
|
337
|
+
if file is not OMIT:
|
|
338
|
+
_request["file"] = file
|
|
339
|
+
if max_pages is not OMIT:
|
|
340
|
+
_request["max_pages"] = max_pages
|
|
275
341
|
_response = self._client_wrapper.httpx_client.request(
|
|
276
342
|
"POST",
|
|
277
343
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
|
|
278
344
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
279
|
-
|
|
280
|
-
{
|
|
281
|
-
"language": language,
|
|
282
|
-
"parsing_instruction": parsing_instruction,
|
|
283
|
-
"skip_diagonal_text": skip_diagonal_text,
|
|
284
|
-
"invalidate_cache": invalidate_cache,
|
|
285
|
-
"output_pdf_of_document": output_pdf_of_document,
|
|
286
|
-
"do_not_cache": do_not_cache,
|
|
287
|
-
"gpt4o_mode": gpt_4_o_mode,
|
|
288
|
-
"fast_mode": fast_mode,
|
|
289
|
-
"premium_mode": premium_mode,
|
|
290
|
-
"continuous_mode": continuous_mode,
|
|
291
|
-
"gpt4o_api_key": gpt_4_o_api_key,
|
|
292
|
-
"do_not_unroll_columns": do_not_unroll_columns,
|
|
293
|
-
"html_make_all_elements_visible": html_make_all_elements_visible,
|
|
294
|
-
"html_remove_fixed_elements": html_remove_fixed_elements,
|
|
295
|
-
"guess_xlsx_sheet_name": guess_xlsx_sheet_name,
|
|
296
|
-
"page_separator": page_separator,
|
|
297
|
-
"bounding_box": bounding_box,
|
|
298
|
-
"bbox_top": bbox_top,
|
|
299
|
-
"bbox_right": bbox_right,
|
|
300
|
-
"bbox_bottom": bbox_bottom,
|
|
301
|
-
"bbox_left": bbox_left,
|
|
302
|
-
"target_pages": target_pages,
|
|
303
|
-
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
304
|
-
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
305
|
-
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
306
|
-
"page_prefix": page_prefix,
|
|
307
|
-
"page_suffix": page_suffix,
|
|
308
|
-
"webhook_url": webhook_url,
|
|
309
|
-
"take_screenshot": take_screenshot,
|
|
310
|
-
"is_formatting_instruction": is_formatting_instruction,
|
|
311
|
-
"disable_ocr": disable_ocr,
|
|
312
|
-
"annotate_links": annotate_links,
|
|
313
|
-
"disable_reconstruction": disable_reconstruction,
|
|
314
|
-
"disable_image_extraction": disable_image_extraction,
|
|
315
|
-
"input_s3_path": input_s_3_path,
|
|
316
|
-
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
317
|
-
"azure_openai_deployment_name": azure_openai_deployment_name,
|
|
318
|
-
"azure_openai_endpoint": azure_openai_endpoint,
|
|
319
|
-
"azure_openai_api_version": azure_openai_api_version,
|
|
320
|
-
"azure_openai_key": azure_openai_key,
|
|
321
|
-
"auto_mode": auto_mode,
|
|
322
|
-
"auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
|
|
323
|
-
"auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
|
|
324
|
-
"auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
|
|
325
|
-
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
326
|
-
"file": file,
|
|
327
|
-
"input_url": input_url,
|
|
328
|
-
"http_proxy": http_proxy,
|
|
329
|
-
"structured_output": structured_output,
|
|
330
|
-
"structured_output_json_schema": structured_output_json_schema,
|
|
331
|
-
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
332
|
-
"max_pages": max_pages,
|
|
333
|
-
}
|
|
334
|
-
),
|
|
335
|
-
files={},
|
|
345
|
+
json=jsonable_encoder(_request),
|
|
336
346
|
headers=self._client_wrapper.get_headers(),
|
|
337
347
|
timeout=60,
|
|
338
348
|
)
|
|
@@ -388,7 +398,7 @@ class ParsingClient:
|
|
|
388
398
|
token="YOUR_TOKEN",
|
|
389
399
|
)
|
|
390
400
|
client.parsing.get_job(
|
|
391
|
-
job_id="
|
|
401
|
+
job_id="string",
|
|
392
402
|
)
|
|
393
403
|
"""
|
|
394
404
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -420,7 +430,7 @@ class ParsingClient:
|
|
|
420
430
|
token="YOUR_TOKEN",
|
|
421
431
|
)
|
|
422
432
|
client.parsing.get_parsing_job_details(
|
|
423
|
-
job_id="
|
|
433
|
+
job_id="string",
|
|
424
434
|
)
|
|
425
435
|
"""
|
|
426
436
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -452,7 +462,7 @@ class ParsingClient:
|
|
|
452
462
|
token="YOUR_TOKEN",
|
|
453
463
|
)
|
|
454
464
|
client.parsing.get_job_text_result(
|
|
455
|
-
job_id="
|
|
465
|
+
job_id="string",
|
|
456
466
|
)
|
|
457
467
|
"""
|
|
458
468
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -484,7 +494,7 @@ class ParsingClient:
|
|
|
484
494
|
token="YOUR_TOKEN",
|
|
485
495
|
)
|
|
486
496
|
client.parsing.get_job_raw_text_result(
|
|
487
|
-
job_id="
|
|
497
|
+
job_id="string",
|
|
488
498
|
)
|
|
489
499
|
"""
|
|
490
500
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -518,7 +528,7 @@ class ParsingClient:
|
|
|
518
528
|
token="YOUR_TOKEN",
|
|
519
529
|
)
|
|
520
530
|
client.parsing.get_job_structured_result(
|
|
521
|
-
job_id="
|
|
531
|
+
job_id="string",
|
|
522
532
|
)
|
|
523
533
|
"""
|
|
524
534
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -552,7 +562,7 @@ class ParsingClient:
|
|
|
552
562
|
token="YOUR_TOKEN",
|
|
553
563
|
)
|
|
554
564
|
client.parsing.get_job_raw_structured_result(
|
|
555
|
-
job_id="
|
|
565
|
+
job_id="string",
|
|
556
566
|
)
|
|
557
567
|
"""
|
|
558
568
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -586,7 +596,7 @@ class ParsingClient:
|
|
|
586
596
|
token="YOUR_TOKEN",
|
|
587
597
|
)
|
|
588
598
|
client.parsing.get_job_raw_xlsx_result(
|
|
589
|
-
job_id="
|
|
599
|
+
job_id="string",
|
|
590
600
|
)
|
|
591
601
|
"""
|
|
592
602
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -620,7 +630,7 @@ class ParsingClient:
|
|
|
620
630
|
token="YOUR_TOKEN",
|
|
621
631
|
)
|
|
622
632
|
client.parsing.get_job_result(
|
|
623
|
-
job_id="
|
|
633
|
+
job_id="string",
|
|
624
634
|
)
|
|
625
635
|
"""
|
|
626
636
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -654,7 +664,7 @@ class ParsingClient:
|
|
|
654
664
|
token="YOUR_TOKEN",
|
|
655
665
|
)
|
|
656
666
|
client.parsing.get_job_raw_md_result(
|
|
657
|
-
job_id="
|
|
667
|
+
job_id="string",
|
|
658
668
|
)
|
|
659
669
|
"""
|
|
660
670
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -688,7 +698,7 @@ class ParsingClient:
|
|
|
688
698
|
token="YOUR_TOKEN",
|
|
689
699
|
)
|
|
690
700
|
client.parsing.get_job_json_result(
|
|
691
|
-
job_id="
|
|
701
|
+
job_id="string",
|
|
692
702
|
)
|
|
693
703
|
"""
|
|
694
704
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -720,7 +730,7 @@ class ParsingClient:
|
|
|
720
730
|
token="YOUR_TOKEN",
|
|
721
731
|
)
|
|
722
732
|
client.parsing.get_job_json_raw_result(
|
|
723
|
-
job_id="
|
|
733
|
+
job_id="string",
|
|
724
734
|
)
|
|
725
735
|
"""
|
|
726
736
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -784,8 +794,8 @@ class ParsingClient:
|
|
|
784
794
|
token="YOUR_TOKEN",
|
|
785
795
|
)
|
|
786
796
|
client.parsing.generate_presigned_url(
|
|
787
|
-
job_id="
|
|
788
|
-
filename="
|
|
797
|
+
job_id="string",
|
|
798
|
+
filename="string",
|
|
789
799
|
)
|
|
790
800
|
"""
|
|
791
801
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -811,7 +821,7 @@ class AsyncParsingClient:
|
|
|
811
821
|
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
812
822
|
self._client_wrapper = client_wrapper
|
|
813
823
|
|
|
814
|
-
async def get_job_image_result(self, job_id: str, name: str) ->
|
|
824
|
+
async def get_job_image_result(self, job_id: str, name: str) -> None:
|
|
815
825
|
"""
|
|
816
826
|
Get a job by id
|
|
817
827
|
|
|
@@ -819,29 +829,34 @@ class AsyncParsingClient:
|
|
|
819
829
|
- job_id: str.
|
|
820
830
|
|
|
821
831
|
- name: str.
|
|
832
|
+
---
|
|
833
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
834
|
+
|
|
835
|
+
client = AsyncLlamaCloud(
|
|
836
|
+
token="YOUR_TOKEN",
|
|
837
|
+
)
|
|
838
|
+
await client.parsing.get_job_image_result(
|
|
839
|
+
job_id="string",
|
|
840
|
+
name="string",
|
|
841
|
+
)
|
|
822
842
|
"""
|
|
823
|
-
|
|
843
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
824
844
|
"GET",
|
|
825
845
|
urllib.parse.urljoin(
|
|
826
846
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/image/{name}"
|
|
827
847
|
),
|
|
828
848
|
headers=self._client_wrapper.get_headers(),
|
|
829
849
|
timeout=60,
|
|
830
|
-
)
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
try:
|
|
841
|
-
_response_json = _response.json()
|
|
842
|
-
except JSONDecodeError:
|
|
843
|
-
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
844
|
-
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
850
|
+
)
|
|
851
|
+
if 200 <= _response.status_code < 300:
|
|
852
|
+
return
|
|
853
|
+
if _response.status_code == 422:
|
|
854
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
855
|
+
try:
|
|
856
|
+
_response_json = _response.json()
|
|
857
|
+
except JSONDecodeError:
|
|
858
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
859
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
845
860
|
|
|
846
861
|
async def get_supported_file_extensions(self) -> typing.List[LlamaParseSupportedFileExtensions]:
|
|
847
862
|
"""
|
|
@@ -874,58 +889,61 @@ class AsyncParsingClient:
|
|
|
874
889
|
*,
|
|
875
890
|
project_id: typing.Optional[str] = None,
|
|
876
891
|
organization_id: typing.Optional[str] = None,
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
892
|
+
file: typing.Optional[str] = OMIT,
|
|
893
|
+
annotate_links: bool,
|
|
894
|
+
auto_mode: bool,
|
|
895
|
+
auto_mode_trigger_on_image_in_page: bool,
|
|
896
|
+
auto_mode_trigger_on_table_in_page: bool,
|
|
897
|
+
auto_mode_trigger_on_text_in_page: str,
|
|
898
|
+
auto_mode_trigger_on_regexp_in_page: str,
|
|
899
|
+
azure_openai_api_version: str,
|
|
900
|
+
azure_openai_deployment_name: str,
|
|
901
|
+
azure_openai_endpoint: str,
|
|
902
|
+
azure_openai_key: str,
|
|
903
|
+
bbox_bottom: float,
|
|
904
|
+
bbox_left: float,
|
|
905
|
+
bbox_right: float,
|
|
906
|
+
bbox_top: float,
|
|
907
|
+
continuous_mode: bool,
|
|
908
|
+
disable_ocr: bool,
|
|
909
|
+
disable_reconstruction: bool,
|
|
910
|
+
disable_image_extraction: bool,
|
|
911
|
+
do_not_cache: bool,
|
|
912
|
+
do_not_unroll_columns: bool,
|
|
913
|
+
extract_charts: bool,
|
|
914
|
+
fast_mode: bool,
|
|
915
|
+
guess_xlsx_sheet_name: bool,
|
|
916
|
+
html_make_all_elements_visible: bool,
|
|
917
|
+
html_remove_fixed_elements: bool,
|
|
918
|
+
html_remove_navigation_elements: bool,
|
|
919
|
+
http_proxy: str,
|
|
920
|
+
input_s_3_path: str,
|
|
921
|
+
input_url: str,
|
|
922
|
+
invalidate_cache: bool,
|
|
923
|
+
is_formatting_instruction: bool,
|
|
924
|
+
language: typing.List[ParserLanguages],
|
|
925
|
+
extract_layout: bool,
|
|
926
|
+
max_pages: typing.Optional[int] = OMIT,
|
|
927
|
+
output_pdf_of_document: bool,
|
|
928
|
+
output_s_3_path_prefix: str,
|
|
929
|
+
page_prefix: str,
|
|
930
|
+
page_separator: str,
|
|
931
|
+
page_suffix: str,
|
|
932
|
+
parsing_instruction: str,
|
|
933
|
+
premium_mode: bool,
|
|
934
|
+
skip_diagonal_text: bool,
|
|
935
|
+
structured_output: bool,
|
|
936
|
+
structured_output_json_schema: str,
|
|
937
|
+
structured_output_json_schema_name: str,
|
|
938
|
+
take_screenshot: bool,
|
|
939
|
+
target_pages: str,
|
|
940
|
+
use_vendor_multimodal_model: bool,
|
|
941
|
+
vendor_multimodal_api_key: str,
|
|
942
|
+
vendor_multimodal_model_name: str,
|
|
943
|
+
webhook_url: str,
|
|
944
|
+
bounding_box: str,
|
|
945
|
+
gpt_4_o_mode: bool,
|
|
946
|
+
gpt_4_o_api_key: str,
|
|
929
947
|
) -> ParsingJob:
|
|
930
948
|
"""
|
|
931
949
|
Upload a file to s3 and create a job. return a job id
|
|
@@ -935,178 +953,180 @@ class AsyncParsingClient:
|
|
|
935
953
|
|
|
936
954
|
- organization_id: typing.Optional[str].
|
|
937
955
|
|
|
938
|
-
-
|
|
939
|
-
|
|
940
|
-
- parsing_instruction: typing.Optional[str].
|
|
941
|
-
|
|
942
|
-
- skip_diagonal_text: typing.Optional[bool].
|
|
943
|
-
|
|
944
|
-
- invalidate_cache: typing.Optional[bool].
|
|
945
|
-
|
|
946
|
-
- output_pdf_of_document: typing.Optional[bool].
|
|
947
|
-
|
|
948
|
-
- do_not_cache: typing.Optional[bool].
|
|
949
|
-
|
|
950
|
-
- gpt_4_o_mode: typing.Optional[bool].
|
|
951
|
-
|
|
952
|
-
- fast_mode: typing.Optional[bool].
|
|
953
|
-
|
|
954
|
-
- premium_mode: typing.Optional[bool].
|
|
955
|
-
|
|
956
|
-
- continuous_mode: typing.Optional[bool].
|
|
957
|
-
|
|
958
|
-
- gpt_4_o_api_key: typing.Optional[str].
|
|
959
|
-
|
|
960
|
-
- do_not_unroll_columns: typing.Optional[bool].
|
|
961
|
-
|
|
962
|
-
- html_make_all_elements_visible: typing.Optional[bool].
|
|
963
|
-
|
|
964
|
-
- html_remove_fixed_elements: typing.Optional[bool].
|
|
965
|
-
|
|
966
|
-
- guess_xlsx_sheet_name: typing.Optional[bool].
|
|
967
|
-
|
|
968
|
-
- page_separator: typing.Optional[str].
|
|
969
|
-
|
|
970
|
-
- bounding_box: typing.Optional[str].
|
|
971
|
-
|
|
972
|
-
- bbox_top: typing.Optional[float].
|
|
956
|
+
- file: typing.Optional[str].
|
|
973
957
|
|
|
974
|
-
-
|
|
958
|
+
- annotate_links: bool.
|
|
975
959
|
|
|
976
|
-
-
|
|
960
|
+
- auto_mode: bool.
|
|
977
961
|
|
|
978
|
-
-
|
|
962
|
+
- auto_mode_trigger_on_image_in_page: bool.
|
|
979
963
|
|
|
980
|
-
-
|
|
964
|
+
- auto_mode_trigger_on_table_in_page: bool.
|
|
981
965
|
|
|
982
|
-
-
|
|
966
|
+
- auto_mode_trigger_on_text_in_page: str.
|
|
983
967
|
|
|
984
|
-
-
|
|
968
|
+
- auto_mode_trigger_on_regexp_in_page: str.
|
|
985
969
|
|
|
986
|
-
-
|
|
970
|
+
- azure_openai_api_version: str.
|
|
987
971
|
|
|
988
|
-
-
|
|
972
|
+
- azure_openai_deployment_name: str.
|
|
989
973
|
|
|
990
|
-
-
|
|
974
|
+
- azure_openai_endpoint: str.
|
|
991
975
|
|
|
992
|
-
-
|
|
976
|
+
- azure_openai_key: str.
|
|
993
977
|
|
|
994
|
-
-
|
|
978
|
+
- bbox_bottom: float.
|
|
995
979
|
|
|
996
|
-
-
|
|
980
|
+
- bbox_left: float.
|
|
997
981
|
|
|
998
|
-
-
|
|
982
|
+
- bbox_right: float.
|
|
999
983
|
|
|
1000
|
-
-
|
|
984
|
+
- bbox_top: float.
|
|
1001
985
|
|
|
1002
|
-
-
|
|
986
|
+
- continuous_mode: bool.
|
|
1003
987
|
|
|
1004
|
-
-
|
|
988
|
+
- disable_ocr: bool.
|
|
1005
989
|
|
|
1006
|
-
-
|
|
990
|
+
- disable_reconstruction: bool.
|
|
1007
991
|
|
|
1008
|
-
-
|
|
992
|
+
- disable_image_extraction: bool.
|
|
1009
993
|
|
|
1010
|
-
-
|
|
994
|
+
- do_not_cache: bool.
|
|
1011
995
|
|
|
1012
|
-
-
|
|
996
|
+
- do_not_unroll_columns: bool.
|
|
1013
997
|
|
|
1014
|
-
-
|
|
998
|
+
- extract_charts: bool.
|
|
1015
999
|
|
|
1016
|
-
-
|
|
1000
|
+
- fast_mode: bool.
|
|
1017
1001
|
|
|
1018
|
-
-
|
|
1002
|
+
- guess_xlsx_sheet_name: bool.
|
|
1019
1003
|
|
|
1020
|
-
-
|
|
1004
|
+
- html_make_all_elements_visible: bool.
|
|
1021
1005
|
|
|
1022
|
-
-
|
|
1006
|
+
- html_remove_fixed_elements: bool.
|
|
1023
1007
|
|
|
1024
|
-
-
|
|
1008
|
+
- html_remove_navigation_elements: bool.
|
|
1025
1009
|
|
|
1026
|
-
-
|
|
1010
|
+
- http_proxy: str.
|
|
1027
1011
|
|
|
1028
|
-
-
|
|
1012
|
+
- input_s_3_path: str.
|
|
1029
1013
|
|
|
1030
|
-
- input_url:
|
|
1014
|
+
- input_url: str.
|
|
1031
1015
|
|
|
1032
|
-
-
|
|
1016
|
+
- invalidate_cache: bool.
|
|
1033
1017
|
|
|
1034
|
-
-
|
|
1018
|
+
- is_formatting_instruction: bool.
|
|
1035
1019
|
|
|
1036
|
-
-
|
|
1020
|
+
- language: typing.List[ParserLanguages].
|
|
1037
1021
|
|
|
1038
|
-
-
|
|
1022
|
+
- extract_layout: bool.
|
|
1039
1023
|
|
|
1040
1024
|
- max_pages: typing.Optional[int].
|
|
1041
|
-
---
|
|
1042
|
-
from llama_cloud.client import AsyncLlamaCloud
|
|
1043
1025
|
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1026
|
+
- output_pdf_of_document: bool.
|
|
1027
|
+
|
|
1028
|
+
- output_s_3_path_prefix: str.
|
|
1029
|
+
|
|
1030
|
+
- page_prefix: str.
|
|
1031
|
+
|
|
1032
|
+
- page_separator: str.
|
|
1033
|
+
|
|
1034
|
+
- page_suffix: str.
|
|
1035
|
+
|
|
1036
|
+
- parsing_instruction: str.
|
|
1037
|
+
|
|
1038
|
+
- premium_mode: bool.
|
|
1039
|
+
|
|
1040
|
+
- skip_diagonal_text: bool.
|
|
1041
|
+
|
|
1042
|
+
- structured_output: bool.
|
|
1043
|
+
|
|
1044
|
+
- structured_output_json_schema: str.
|
|
1045
|
+
|
|
1046
|
+
- structured_output_json_schema_name: str.
|
|
1047
|
+
|
|
1048
|
+
- take_screenshot: bool.
|
|
1049
|
+
|
|
1050
|
+
- target_pages: str.
|
|
1051
|
+
|
|
1052
|
+
- use_vendor_multimodal_model: bool.
|
|
1053
|
+
|
|
1054
|
+
- vendor_multimodal_api_key: str.
|
|
1055
|
+
|
|
1056
|
+
- vendor_multimodal_model_name: str.
|
|
1057
|
+
|
|
1058
|
+
- webhook_url: str.
|
|
1059
|
+
|
|
1060
|
+
- bounding_box: str.
|
|
1061
|
+
|
|
1062
|
+
- gpt_4_o_mode: bool.
|
|
1063
|
+
|
|
1064
|
+
- gpt_4_o_api_key: str.
|
|
1065
|
+
"""
|
|
1066
|
+
_request: typing.Dict[str, typing.Any] = {
|
|
1067
|
+
"annotate_links": annotate_links,
|
|
1068
|
+
"auto_mode": auto_mode,
|
|
1069
|
+
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
1070
|
+
"auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
|
|
1071
|
+
"auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
|
|
1072
|
+
"auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
|
|
1073
|
+
"azure_openai_api_version": azure_openai_api_version,
|
|
1074
|
+
"azure_openai_deployment_name": azure_openai_deployment_name,
|
|
1075
|
+
"azure_openai_endpoint": azure_openai_endpoint,
|
|
1076
|
+
"azure_openai_key": azure_openai_key,
|
|
1077
|
+
"bbox_bottom": bbox_bottom,
|
|
1078
|
+
"bbox_left": bbox_left,
|
|
1079
|
+
"bbox_right": bbox_right,
|
|
1080
|
+
"bbox_top": bbox_top,
|
|
1081
|
+
"continuous_mode": continuous_mode,
|
|
1082
|
+
"disable_ocr": disable_ocr,
|
|
1083
|
+
"disable_reconstruction": disable_reconstruction,
|
|
1084
|
+
"disable_image_extraction": disable_image_extraction,
|
|
1085
|
+
"do_not_cache": do_not_cache,
|
|
1086
|
+
"do_not_unroll_columns": do_not_unroll_columns,
|
|
1087
|
+
"extract_charts": extract_charts,
|
|
1088
|
+
"fast_mode": fast_mode,
|
|
1089
|
+
"guess_xlsx_sheet_name": guess_xlsx_sheet_name,
|
|
1090
|
+
"html_make_all_elements_visible": html_make_all_elements_visible,
|
|
1091
|
+
"html_remove_fixed_elements": html_remove_fixed_elements,
|
|
1092
|
+
"html_remove_navigation_elements": html_remove_navigation_elements,
|
|
1093
|
+
"http_proxy": http_proxy,
|
|
1094
|
+
"input_s3_path": input_s_3_path,
|
|
1095
|
+
"input_url": input_url,
|
|
1096
|
+
"invalidate_cache": invalidate_cache,
|
|
1097
|
+
"is_formatting_instruction": is_formatting_instruction,
|
|
1098
|
+
"language": language,
|
|
1099
|
+
"extract_layout": extract_layout,
|
|
1100
|
+
"output_pdf_of_document": output_pdf_of_document,
|
|
1101
|
+
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
1102
|
+
"page_prefix": page_prefix,
|
|
1103
|
+
"page_separator": page_separator,
|
|
1104
|
+
"page_suffix": page_suffix,
|
|
1105
|
+
"parsing_instruction": parsing_instruction,
|
|
1106
|
+
"premium_mode": premium_mode,
|
|
1107
|
+
"skip_diagonal_text": skip_diagonal_text,
|
|
1108
|
+
"structured_output": structured_output,
|
|
1109
|
+
"structured_output_json_schema": structured_output_json_schema,
|
|
1110
|
+
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
1111
|
+
"take_screenshot": take_screenshot,
|
|
1112
|
+
"target_pages": target_pages,
|
|
1113
|
+
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
1114
|
+
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
1115
|
+
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
1116
|
+
"webhook_url": webhook_url,
|
|
1117
|
+
"bounding_box": bounding_box,
|
|
1118
|
+
"gpt4o_mode": gpt_4_o_mode,
|
|
1119
|
+
"gpt4o_api_key": gpt_4_o_api_key,
|
|
1120
|
+
}
|
|
1121
|
+
if file is not OMIT:
|
|
1122
|
+
_request["file"] = file
|
|
1123
|
+
if max_pages is not OMIT:
|
|
1124
|
+
_request["max_pages"] = max_pages
|
|
1049
1125
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1050
1126
|
"POST",
|
|
1051
1127
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
|
|
1052
1128
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1053
|
-
|
|
1054
|
-
{
|
|
1055
|
-
"language": language,
|
|
1056
|
-
"parsing_instruction": parsing_instruction,
|
|
1057
|
-
"skip_diagonal_text": skip_diagonal_text,
|
|
1058
|
-
"invalidate_cache": invalidate_cache,
|
|
1059
|
-
"output_pdf_of_document": output_pdf_of_document,
|
|
1060
|
-
"do_not_cache": do_not_cache,
|
|
1061
|
-
"gpt4o_mode": gpt_4_o_mode,
|
|
1062
|
-
"fast_mode": fast_mode,
|
|
1063
|
-
"premium_mode": premium_mode,
|
|
1064
|
-
"continuous_mode": continuous_mode,
|
|
1065
|
-
"gpt4o_api_key": gpt_4_o_api_key,
|
|
1066
|
-
"do_not_unroll_columns": do_not_unroll_columns,
|
|
1067
|
-
"html_make_all_elements_visible": html_make_all_elements_visible,
|
|
1068
|
-
"html_remove_fixed_elements": html_remove_fixed_elements,
|
|
1069
|
-
"guess_xlsx_sheet_name": guess_xlsx_sheet_name,
|
|
1070
|
-
"page_separator": page_separator,
|
|
1071
|
-
"bounding_box": bounding_box,
|
|
1072
|
-
"bbox_top": bbox_top,
|
|
1073
|
-
"bbox_right": bbox_right,
|
|
1074
|
-
"bbox_bottom": bbox_bottom,
|
|
1075
|
-
"bbox_left": bbox_left,
|
|
1076
|
-
"target_pages": target_pages,
|
|
1077
|
-
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
1078
|
-
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
1079
|
-
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
1080
|
-
"page_prefix": page_prefix,
|
|
1081
|
-
"page_suffix": page_suffix,
|
|
1082
|
-
"webhook_url": webhook_url,
|
|
1083
|
-
"take_screenshot": take_screenshot,
|
|
1084
|
-
"is_formatting_instruction": is_formatting_instruction,
|
|
1085
|
-
"disable_ocr": disable_ocr,
|
|
1086
|
-
"annotate_links": annotate_links,
|
|
1087
|
-
"disable_reconstruction": disable_reconstruction,
|
|
1088
|
-
"disable_image_extraction": disable_image_extraction,
|
|
1089
|
-
"input_s3_path": input_s_3_path,
|
|
1090
|
-
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
1091
|
-
"azure_openai_deployment_name": azure_openai_deployment_name,
|
|
1092
|
-
"azure_openai_endpoint": azure_openai_endpoint,
|
|
1093
|
-
"azure_openai_api_version": azure_openai_api_version,
|
|
1094
|
-
"azure_openai_key": azure_openai_key,
|
|
1095
|
-
"auto_mode": auto_mode,
|
|
1096
|
-
"auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
|
|
1097
|
-
"auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
|
|
1098
|
-
"auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
|
|
1099
|
-
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
1100
|
-
"file": file,
|
|
1101
|
-
"input_url": input_url,
|
|
1102
|
-
"http_proxy": http_proxy,
|
|
1103
|
-
"structured_output": structured_output,
|
|
1104
|
-
"structured_output_json_schema": structured_output_json_schema,
|
|
1105
|
-
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
1106
|
-
"max_pages": max_pages,
|
|
1107
|
-
}
|
|
1108
|
-
),
|
|
1109
|
-
files={},
|
|
1129
|
+
json=jsonable_encoder(_request),
|
|
1110
1130
|
headers=self._client_wrapper.get_headers(),
|
|
1111
1131
|
timeout=60,
|
|
1112
1132
|
)
|
|
@@ -1162,7 +1182,7 @@ class AsyncParsingClient:
|
|
|
1162
1182
|
token="YOUR_TOKEN",
|
|
1163
1183
|
)
|
|
1164
1184
|
await client.parsing.get_job(
|
|
1165
|
-
job_id="
|
|
1185
|
+
job_id="string",
|
|
1166
1186
|
)
|
|
1167
1187
|
"""
|
|
1168
1188
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1194,7 +1214,7 @@ class AsyncParsingClient:
|
|
|
1194
1214
|
token="YOUR_TOKEN",
|
|
1195
1215
|
)
|
|
1196
1216
|
await client.parsing.get_parsing_job_details(
|
|
1197
|
-
job_id="
|
|
1217
|
+
job_id="string",
|
|
1198
1218
|
)
|
|
1199
1219
|
"""
|
|
1200
1220
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1226,7 +1246,7 @@ class AsyncParsingClient:
|
|
|
1226
1246
|
token="YOUR_TOKEN",
|
|
1227
1247
|
)
|
|
1228
1248
|
await client.parsing.get_job_text_result(
|
|
1229
|
-
job_id="
|
|
1249
|
+
job_id="string",
|
|
1230
1250
|
)
|
|
1231
1251
|
"""
|
|
1232
1252
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1258,7 +1278,7 @@ class AsyncParsingClient:
|
|
|
1258
1278
|
token="YOUR_TOKEN",
|
|
1259
1279
|
)
|
|
1260
1280
|
await client.parsing.get_job_raw_text_result(
|
|
1261
|
-
job_id="
|
|
1281
|
+
job_id="string",
|
|
1262
1282
|
)
|
|
1263
1283
|
"""
|
|
1264
1284
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1292,7 +1312,7 @@ class AsyncParsingClient:
|
|
|
1292
1312
|
token="YOUR_TOKEN",
|
|
1293
1313
|
)
|
|
1294
1314
|
await client.parsing.get_job_structured_result(
|
|
1295
|
-
job_id="
|
|
1315
|
+
job_id="string",
|
|
1296
1316
|
)
|
|
1297
1317
|
"""
|
|
1298
1318
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1326,7 +1346,7 @@ class AsyncParsingClient:
|
|
|
1326
1346
|
token="YOUR_TOKEN",
|
|
1327
1347
|
)
|
|
1328
1348
|
await client.parsing.get_job_raw_structured_result(
|
|
1329
|
-
job_id="
|
|
1349
|
+
job_id="string",
|
|
1330
1350
|
)
|
|
1331
1351
|
"""
|
|
1332
1352
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1360,7 +1380,7 @@ class AsyncParsingClient:
|
|
|
1360
1380
|
token="YOUR_TOKEN",
|
|
1361
1381
|
)
|
|
1362
1382
|
await client.parsing.get_job_raw_xlsx_result(
|
|
1363
|
-
job_id="
|
|
1383
|
+
job_id="string",
|
|
1364
1384
|
)
|
|
1365
1385
|
"""
|
|
1366
1386
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1394,7 +1414,7 @@ class AsyncParsingClient:
|
|
|
1394
1414
|
token="YOUR_TOKEN",
|
|
1395
1415
|
)
|
|
1396
1416
|
await client.parsing.get_job_result(
|
|
1397
|
-
job_id="
|
|
1417
|
+
job_id="string",
|
|
1398
1418
|
)
|
|
1399
1419
|
"""
|
|
1400
1420
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1428,7 +1448,7 @@ class AsyncParsingClient:
|
|
|
1428
1448
|
token="YOUR_TOKEN",
|
|
1429
1449
|
)
|
|
1430
1450
|
await client.parsing.get_job_raw_md_result(
|
|
1431
|
-
job_id="
|
|
1451
|
+
job_id="string",
|
|
1432
1452
|
)
|
|
1433
1453
|
"""
|
|
1434
1454
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1462,7 +1482,7 @@ class AsyncParsingClient:
|
|
|
1462
1482
|
token="YOUR_TOKEN",
|
|
1463
1483
|
)
|
|
1464
1484
|
await client.parsing.get_job_json_result(
|
|
1465
|
-
job_id="
|
|
1485
|
+
job_id="string",
|
|
1466
1486
|
)
|
|
1467
1487
|
"""
|
|
1468
1488
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1494,7 +1514,7 @@ class AsyncParsingClient:
|
|
|
1494
1514
|
token="YOUR_TOKEN",
|
|
1495
1515
|
)
|
|
1496
1516
|
await client.parsing.get_job_json_raw_result(
|
|
1497
|
-
job_id="
|
|
1517
|
+
job_id="string",
|
|
1498
1518
|
)
|
|
1499
1519
|
"""
|
|
1500
1520
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1558,8 +1578,8 @@ class AsyncParsingClient:
|
|
|
1558
1578
|
token="YOUR_TOKEN",
|
|
1559
1579
|
)
|
|
1560
1580
|
await client.parsing.generate_presigned_url(
|
|
1561
|
-
job_id="
|
|
1562
|
-
filename="
|
|
1581
|
+
job_id="string",
|
|
1582
|
+
filename="string",
|
|
1563
1583
|
)
|
|
1564
1584
|
"""
|
|
1565
1585
|
_response = await self._client_wrapper.httpx_client.request(
|