llama-cloud 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +12 -10
- llama_cloud/environment.py +1 -1
- llama_cloud/resources/__init__.py +2 -1
- llama_cloud/resources/data_sinks/client.py +14 -14
- llama_cloud/resources/data_sources/client.py +16 -16
- llama_cloud/resources/embedding_model_configs/client.py +80 -24
- llama_cloud/resources/evals/client.py +36 -26
- llama_cloud/resources/extraction/client.py +32 -32
- llama_cloud/resources/files/__init__.py +2 -2
- llama_cloud/resources/files/client.py +53 -28
- llama_cloud/resources/files/types/__init__.py +2 -1
- llama_cloud/resources/files/types/file_create_permission_info_value.py +7 -0
- llama_cloud/resources/organizations/client.py +60 -56
- llama_cloud/resources/parsing/client.py +555 -324
- llama_cloud/resources/pipelines/client.py +446 -302
- llama_cloud/resources/projects/client.py +270 -136
- llama_cloud/types/__init__.py +10 -10
- llama_cloud/types/azure_open_ai_embedding.py +12 -6
- llama_cloud/types/base_prompt_template.py +6 -2
- llama_cloud/types/bedrock_embedding.py +12 -6
- llama_cloud/types/character_splitter.py +4 -2
- llama_cloud/types/chat_message.py +1 -1
- llama_cloud/types/cloud_az_storage_blob_data_source.py +16 -7
- llama_cloud/types/cloud_box_data_source.py +13 -6
- llama_cloud/types/cloud_confluence_data_source.py +7 -6
- llama_cloud/types/cloud_document.py +3 -1
- llama_cloud/types/cloud_document_create.py +3 -1
- llama_cloud/types/cloud_google_drive_data_source.py +1 -0
- llama_cloud/types/cloud_jira_data_source.py +7 -4
- llama_cloud/types/cloud_notion_page_data_source.py +3 -2
- llama_cloud/types/cloud_one_drive_data_source.py +6 -3
- llama_cloud/types/cloud_s_3_data_source.py +9 -4
- llama_cloud/types/cloud_sharepoint_data_source.py +9 -6
- llama_cloud/types/cloud_slack_data_source.py +7 -6
- llama_cloud/types/code_splitter.py +1 -1
- llama_cloud/types/cohere_embedding.py +7 -3
- llama_cloud/types/data_sink.py +4 -4
- llama_cloud/types/data_sink_create.py +1 -1
- llama_cloud/types/data_source.py +7 -5
- llama_cloud/types/data_source_create.py +4 -2
- llama_cloud/types/embedding_model_config.py +2 -2
- llama_cloud/types/embedding_model_config_update.py +4 -2
- llama_cloud/types/eval_dataset.py +2 -2
- llama_cloud/types/eval_dataset_job_record.py +13 -7
- llama_cloud/types/eval_execution_params_override.py +6 -2
- llama_cloud/types/eval_question.py +2 -2
- llama_cloud/types/extraction_result.py +2 -2
- llama_cloud/types/extraction_schema.py +5 -3
- llama_cloud/types/file.py +15 -7
- llama_cloud/types/file_permission_info_value.py +5 -0
- llama_cloud/types/filter_operator.py +2 -2
- llama_cloud/types/gemini_embedding.py +10 -6
- llama_cloud/types/hugging_face_inference_api_embedding.py +27 -11
- llama_cloud/types/input_message.py +3 -1
- llama_cloud/types/job_name_mapping.py +4 -0
- llama_cloud/types/llama_parse_parameters.py +11 -0
- llama_cloud/types/llm.py +4 -2
- llama_cloud/types/llm_parameters.py +5 -2
- llama_cloud/types/local_eval.py +10 -8
- llama_cloud/types/local_eval_results.py +1 -1
- llama_cloud/types/managed_ingestion_status_response.py +5 -3
- llama_cloud/types/markdown_element_node_parser.py +5 -3
- llama_cloud/types/markdown_node_parser.py +1 -1
- llama_cloud/types/metadata_filter.py +2 -2
- llama_cloud/types/metric_result.py +3 -3
- llama_cloud/types/node_parser.py +1 -1
- llama_cloud/types/open_ai_embedding.py +12 -6
- llama_cloud/types/organization.py +2 -2
- llama_cloud/types/page_splitter_node_parser.py +2 -2
- llama_cloud/types/parsing_job_structured_result.py +32 -0
- llama_cloud/types/permission.py +3 -3
- llama_cloud/types/pipeline.py +17 -7
- llama_cloud/types/pipeline_configuration_hashes.py +3 -3
- llama_cloud/types/pipeline_create.py +15 -5
- llama_cloud/types/pipeline_data_source.py +13 -7
- llama_cloud/types/pipeline_data_source_create.py +3 -1
- llama_cloud/types/pipeline_deployment.py +4 -4
- llama_cloud/types/pipeline_file.py +25 -11
- llama_cloud/types/pipeline_file_create.py +3 -1
- llama_cloud/types/pipeline_file_permission_info_value.py +7 -0
- llama_cloud/types/playground_session.py +2 -2
- llama_cloud/types/preset_retrieval_params.py +14 -7
- llama_cloud/types/presigned_url.py +3 -1
- llama_cloud/types/project.py +2 -2
- llama_cloud/types/prompt_mixin_prompts.py +1 -1
- llama_cloud/types/prompt_spec.py +4 -2
- llama_cloud/types/role.py +3 -3
- llama_cloud/types/sentence_splitter.py +4 -2
- llama_cloud/types/text_node.py +3 -3
- llama_cloud/types/{hugging_face_inference_api_embedding_token.py → token.py} +1 -1
- llama_cloud/types/token_text_splitter.py +1 -1
- llama_cloud/types/user_organization.py +9 -5
- llama_cloud/types/user_organization_create.py +4 -4
- llama_cloud/types/user_organization_delete.py +2 -2
- llama_cloud/types/user_organization_role.py +2 -2
- llama_cloud/types/value.py +5 -0
- llama_cloud/types/vertex_text_embedding.py +9 -5
- {llama_cloud-0.1.5.dist-info → llama_cloud-0.1.6.dist-info}/METADATA +2 -1
- {llama_cloud-0.1.5.dist-info → llama_cloud-0.1.6.dist-info}/RECORD +101 -100
- {llama_cloud-0.1.5.dist-info → llama_cloud-0.1.6.dist-info}/WHEEL +1 -1
- llama_cloud/types/data_sink_component.py +0 -20
- llama_cloud/types/data_source_component.py +0 -28
- llama_cloud/types/metadata_filter_value.py +0 -5
- llama_cloud/types/pipeline_data_source_component.py +0 -28
- {llama_cloud-0.1.5.dist-info → llama_cloud-0.1.6.dist-info}/LICENSE +0 -0
|
@@ -16,6 +16,7 @@ from ...types.parsing_history_item import ParsingHistoryItem
|
|
|
16
16
|
from ...types.parsing_job import ParsingJob
|
|
17
17
|
from ...types.parsing_job_json_result import ParsingJobJsonResult
|
|
18
18
|
from ...types.parsing_job_markdown_result import ParsingJobMarkdownResult
|
|
19
|
+
from ...types.parsing_job_structured_result import ParsingJobStructuredResult
|
|
19
20
|
from ...types.parsing_job_text_result import ParsingJobTextResult
|
|
20
21
|
from ...types.parsing_usage import ParsingUsage
|
|
21
22
|
from ...types.presigned_url import PresignedUrl
|
|
@@ -36,7 +37,7 @@ class ParsingClient:
|
|
|
36
37
|
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
|
37
38
|
self._client_wrapper = client_wrapper
|
|
38
39
|
|
|
39
|
-
def get_job_image_result(self, job_id: str, name: str) ->
|
|
40
|
+
def get_job_image_result(self, job_id: str, name: str) -> typing.Iterator[bytes]:
|
|
40
41
|
"""
|
|
41
42
|
Get a job by id
|
|
42
43
|
|
|
@@ -44,34 +45,29 @@ class ParsingClient:
|
|
|
44
45
|
- job_id: str.
|
|
45
46
|
|
|
46
47
|
- name: str.
|
|
47
|
-
---
|
|
48
|
-
from llama_cloud.client import LlamaCloud
|
|
49
|
-
|
|
50
|
-
client = LlamaCloud(
|
|
51
|
-
token="YOUR_TOKEN",
|
|
52
|
-
)
|
|
53
|
-
client.parsing.get_job_image_result(
|
|
54
|
-
job_id="string",
|
|
55
|
-
name="string",
|
|
56
|
-
)
|
|
57
48
|
"""
|
|
58
|
-
|
|
49
|
+
with self._client_wrapper.httpx_client.stream(
|
|
59
50
|
"GET",
|
|
60
51
|
urllib.parse.urljoin(
|
|
61
52
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/image/{name}"
|
|
62
53
|
),
|
|
63
54
|
headers=self._client_wrapper.get_headers(),
|
|
64
55
|
timeout=60,
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
56
|
+
) as _response:
|
|
57
|
+
if 200 <= _response.status_code < 300:
|
|
58
|
+
for _chunk in _response.iter_bytes():
|
|
59
|
+
yield _chunk
|
|
60
|
+
return
|
|
61
|
+
_response.read()
|
|
62
|
+
if _response.status_code == 422:
|
|
63
|
+
raise UnprocessableEntityError(
|
|
64
|
+
pydantic.parse_obj_as(HttpValidationError, _response.json()) # type: ignore
|
|
65
|
+
)
|
|
66
|
+
try:
|
|
67
|
+
_response_json = _response.json()
|
|
68
|
+
except JSONDecodeError:
|
|
69
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
70
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
75
71
|
|
|
76
72
|
def get_supported_file_extensions(self) -> typing.List[LlamaParseSupportedFileExtensions]:
|
|
77
73
|
"""
|
|
@@ -104,47 +100,58 @@ class ParsingClient:
|
|
|
104
100
|
*,
|
|
105
101
|
project_id: typing.Optional[str] = None,
|
|
106
102
|
organization_id: typing.Optional[str] = None,
|
|
107
|
-
language: typing.List[ParserLanguages],
|
|
108
|
-
parsing_instruction: str,
|
|
109
|
-
skip_diagonal_text: bool,
|
|
110
|
-
invalidate_cache: bool,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
103
|
+
language: typing.Optional[typing.List[ParserLanguages]] = None,
|
|
104
|
+
parsing_instruction: typing.Optional[str] = None,
|
|
105
|
+
skip_diagonal_text: typing.Optional[bool] = None,
|
|
106
|
+
invalidate_cache: typing.Optional[bool] = None,
|
|
107
|
+
output_pdf_of_document: typing.Optional[bool] = None,
|
|
108
|
+
do_not_cache: typing.Optional[bool] = None,
|
|
109
|
+
gpt_4_o_mode: typing.Optional[bool] = None,
|
|
110
|
+
fast_mode: typing.Optional[bool] = None,
|
|
111
|
+
premium_mode: typing.Optional[bool] = None,
|
|
112
|
+
continuous_mode: typing.Optional[bool] = None,
|
|
113
|
+
gpt_4_o_api_key: typing.Optional[str] = None,
|
|
114
|
+
do_not_unroll_columns: typing.Optional[bool] = None,
|
|
115
|
+
html_make_all_elements_visible: typing.Optional[bool] = None,
|
|
116
|
+
html_remove_fixed_elements: typing.Optional[bool] = None,
|
|
117
|
+
guess_xlsx_sheet_name: typing.Optional[bool] = None,
|
|
118
|
+
page_separator: typing.Optional[str] = None,
|
|
119
|
+
bounding_box: typing.Optional[str] = None,
|
|
120
|
+
bbox_top: typing.Optional[float] = None,
|
|
121
|
+
bbox_right: typing.Optional[float] = None,
|
|
122
|
+
bbox_bottom: typing.Optional[float] = None,
|
|
123
|
+
bbox_left: typing.Optional[float] = None,
|
|
124
|
+
target_pages: typing.Optional[str] = None,
|
|
125
|
+
use_vendor_multimodal_model: typing.Optional[bool] = None,
|
|
126
|
+
vendor_multimodal_model_name: typing.Optional[str] = None,
|
|
127
|
+
vendor_multimodal_api_key: typing.Optional[str] = None,
|
|
128
|
+
page_prefix: typing.Optional[str] = None,
|
|
129
|
+
page_suffix: typing.Optional[str] = None,
|
|
130
|
+
webhook_url: typing.Optional[str] = None,
|
|
131
|
+
take_screenshot: typing.Optional[bool] = None,
|
|
132
|
+
is_formatting_instruction: typing.Optional[bool] = None,
|
|
133
|
+
disable_ocr: typing.Optional[bool] = None,
|
|
134
|
+
annotate_links: typing.Optional[bool] = None,
|
|
135
|
+
disable_reconstruction: typing.Optional[bool] = None,
|
|
136
|
+
disable_image_extraction: typing.Optional[bool] = None,
|
|
137
|
+
input_s_3_path: typing.Optional[str] = None,
|
|
138
|
+
output_s_3_path_prefix: typing.Optional[str] = None,
|
|
139
|
+
azure_openai_deployment_name: typing.Optional[str] = None,
|
|
140
|
+
azure_openai_endpoint: typing.Optional[str] = None,
|
|
141
|
+
azure_openai_api_version: typing.Optional[str] = None,
|
|
142
|
+
azure_openai_key: typing.Optional[str] = None,
|
|
143
|
+
auto_mode: typing.Optional[bool] = None,
|
|
144
|
+
auto_mode_trigger_on_regexp_in_page: typing.Optional[str] = None,
|
|
145
|
+
auto_mode_trigger_on_text_in_page: typing.Optional[str] = None,
|
|
146
|
+
auto_mode_trigger_on_table_in_page: typing.Optional[bool] = None,
|
|
147
|
+
auto_mode_trigger_on_image_in_page: typing.Optional[bool] = None,
|
|
148
|
+
file: typing.Optional[str] = None,
|
|
149
|
+
input_url: typing.Optional[str] = None,
|
|
150
|
+
http_proxy: typing.Optional[str] = None,
|
|
151
|
+
structured_output: typing.Optional[bool] = None,
|
|
152
|
+
structured_output_json_schema: typing.Optional[str] = None,
|
|
153
|
+
structured_output_json_schema_name: typing.Optional[str] = None,
|
|
154
|
+
max_pages: typing.Optional[int] = None,
|
|
148
155
|
) -> ParsingJob:
|
|
149
156
|
"""
|
|
150
157
|
Upload a file to s3 and create a job. return a job id
|
|
@@ -154,137 +161,178 @@ class ParsingClient:
|
|
|
154
161
|
|
|
155
162
|
- organization_id: typing.Optional[str].
|
|
156
163
|
|
|
157
|
-
- language: typing.List[ParserLanguages].
|
|
164
|
+
- language: typing.Optional[typing.List[ParserLanguages]].
|
|
165
|
+
|
|
166
|
+
- parsing_instruction: typing.Optional[str].
|
|
167
|
+
|
|
168
|
+
- skip_diagonal_text: typing.Optional[bool].
|
|
169
|
+
|
|
170
|
+
- invalidate_cache: typing.Optional[bool].
|
|
171
|
+
|
|
172
|
+
- output_pdf_of_document: typing.Optional[bool].
|
|
173
|
+
|
|
174
|
+
- do_not_cache: typing.Optional[bool].
|
|
175
|
+
|
|
176
|
+
- gpt_4_o_mode: typing.Optional[bool].
|
|
177
|
+
|
|
178
|
+
- fast_mode: typing.Optional[bool].
|
|
158
179
|
|
|
159
|
-
-
|
|
180
|
+
- premium_mode: typing.Optional[bool].
|
|
160
181
|
|
|
161
|
-
-
|
|
182
|
+
- continuous_mode: typing.Optional[bool].
|
|
162
183
|
|
|
163
|
-
-
|
|
184
|
+
- gpt_4_o_api_key: typing.Optional[str].
|
|
164
185
|
|
|
165
|
-
-
|
|
186
|
+
- do_not_unroll_columns: typing.Optional[bool].
|
|
166
187
|
|
|
167
|
-
-
|
|
188
|
+
- html_make_all_elements_visible: typing.Optional[bool].
|
|
168
189
|
|
|
169
|
-
-
|
|
190
|
+
- html_remove_fixed_elements: typing.Optional[bool].
|
|
170
191
|
|
|
171
|
-
-
|
|
192
|
+
- guess_xlsx_sheet_name: typing.Optional[bool].
|
|
172
193
|
|
|
173
|
-
-
|
|
194
|
+
- page_separator: typing.Optional[str].
|
|
174
195
|
|
|
175
|
-
-
|
|
196
|
+
- bounding_box: typing.Optional[str].
|
|
176
197
|
|
|
177
|
-
-
|
|
198
|
+
- bbox_top: typing.Optional[float].
|
|
178
199
|
|
|
179
|
-
-
|
|
200
|
+
- bbox_right: typing.Optional[float].
|
|
180
201
|
|
|
181
|
-
-
|
|
202
|
+
- bbox_bottom: typing.Optional[float].
|
|
182
203
|
|
|
183
|
-
-
|
|
204
|
+
- bbox_left: typing.Optional[float].
|
|
184
205
|
|
|
185
|
-
- target_pages: str.
|
|
206
|
+
- target_pages: typing.Optional[str].
|
|
186
207
|
|
|
187
|
-
- use_vendor_multimodal_model: bool.
|
|
208
|
+
- use_vendor_multimodal_model: typing.Optional[bool].
|
|
188
209
|
|
|
189
|
-
- vendor_multimodal_model_name: str.
|
|
210
|
+
- vendor_multimodal_model_name: typing.Optional[str].
|
|
190
211
|
|
|
191
|
-
- vendor_multimodal_api_key: str.
|
|
212
|
+
- vendor_multimodal_api_key: typing.Optional[str].
|
|
192
213
|
|
|
193
|
-
- page_prefix: str.
|
|
214
|
+
- page_prefix: typing.Optional[str].
|
|
194
215
|
|
|
195
|
-
- page_suffix: str.
|
|
216
|
+
- page_suffix: typing.Optional[str].
|
|
196
217
|
|
|
197
|
-
- webhook_url: str.
|
|
218
|
+
- webhook_url: typing.Optional[str].
|
|
198
219
|
|
|
199
|
-
- take_screenshot: bool.
|
|
220
|
+
- take_screenshot: typing.Optional[bool].
|
|
200
221
|
|
|
201
|
-
- is_formatting_instruction: bool.
|
|
222
|
+
- is_formatting_instruction: typing.Optional[bool].
|
|
202
223
|
|
|
203
|
-
- disable_ocr: bool.
|
|
224
|
+
- disable_ocr: typing.Optional[bool].
|
|
204
225
|
|
|
205
|
-
- annotate_links: bool.
|
|
226
|
+
- annotate_links: typing.Optional[bool].
|
|
206
227
|
|
|
207
|
-
- disable_reconstruction: bool.
|
|
228
|
+
- disable_reconstruction: typing.Optional[bool].
|
|
208
229
|
|
|
209
|
-
- disable_image_extraction: bool.
|
|
230
|
+
- disable_image_extraction: typing.Optional[bool].
|
|
210
231
|
|
|
211
|
-
- input_s_3_path: str.
|
|
232
|
+
- input_s_3_path: typing.Optional[str].
|
|
212
233
|
|
|
213
|
-
- output_s_3_path_prefix: str.
|
|
234
|
+
- output_s_3_path_prefix: typing.Optional[str].
|
|
214
235
|
|
|
215
|
-
- azure_openai_deployment_name: str.
|
|
236
|
+
- azure_openai_deployment_name: typing.Optional[str].
|
|
216
237
|
|
|
217
|
-
- azure_openai_endpoint: str.
|
|
238
|
+
- azure_openai_endpoint: typing.Optional[str].
|
|
218
239
|
|
|
219
|
-
- azure_openai_api_version: str.
|
|
240
|
+
- azure_openai_api_version: typing.Optional[str].
|
|
220
241
|
|
|
221
|
-
- azure_openai_key: str.
|
|
242
|
+
- azure_openai_key: typing.Optional[str].
|
|
222
243
|
|
|
223
|
-
- auto_mode: bool.
|
|
244
|
+
- auto_mode: typing.Optional[bool].
|
|
224
245
|
|
|
225
|
-
- auto_mode_trigger_on_regexp_in_page: str.
|
|
246
|
+
- auto_mode_trigger_on_regexp_in_page: typing.Optional[str].
|
|
226
247
|
|
|
227
|
-
- auto_mode_trigger_on_text_in_page: str.
|
|
248
|
+
- auto_mode_trigger_on_text_in_page: typing.Optional[str].
|
|
228
249
|
|
|
229
|
-
- auto_mode_trigger_on_table_in_page: bool.
|
|
250
|
+
- auto_mode_trigger_on_table_in_page: typing.Optional[bool].
|
|
230
251
|
|
|
231
|
-
- auto_mode_trigger_on_image_in_page: bool.
|
|
252
|
+
- auto_mode_trigger_on_image_in_page: typing.Optional[bool].
|
|
232
253
|
|
|
233
254
|
- file: typing.Optional[str].
|
|
234
255
|
|
|
235
|
-
- input_url: str.
|
|
236
|
-
|
|
237
|
-
- http_proxy: str.
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
"
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
"target_pages": target_pages,
|
|
255
|
-
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
256
|
-
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
257
|
-
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
258
|
-
"page_prefix": page_prefix,
|
|
259
|
-
"page_suffix": page_suffix,
|
|
260
|
-
"webhook_url": webhook_url,
|
|
261
|
-
"take_screenshot": take_screenshot,
|
|
262
|
-
"is_formatting_instruction": is_formatting_instruction,
|
|
263
|
-
"disable_ocr": disable_ocr,
|
|
264
|
-
"annotate_links": annotate_links,
|
|
265
|
-
"disable_reconstruction": disable_reconstruction,
|
|
266
|
-
"disable_image_extraction": disable_image_extraction,
|
|
267
|
-
"input_s3_path": input_s_3_path,
|
|
268
|
-
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
269
|
-
"azure_openai_deployment_name": azure_openai_deployment_name,
|
|
270
|
-
"azure_openai_endpoint": azure_openai_endpoint,
|
|
271
|
-
"azure_openai_api_version": azure_openai_api_version,
|
|
272
|
-
"azure_openai_key": azure_openai_key,
|
|
273
|
-
"auto_mode": auto_mode,
|
|
274
|
-
"auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
|
|
275
|
-
"auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
|
|
276
|
-
"auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
|
|
277
|
-
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
278
|
-
"input_url": input_url,
|
|
279
|
-
"http_proxy": http_proxy,
|
|
280
|
-
}
|
|
281
|
-
if file is not OMIT:
|
|
282
|
-
_request["file"] = file
|
|
256
|
+
- input_url: typing.Optional[str].
|
|
257
|
+
|
|
258
|
+
- http_proxy: typing.Optional[str].
|
|
259
|
+
|
|
260
|
+
- structured_output: typing.Optional[bool].
|
|
261
|
+
|
|
262
|
+
- structured_output_json_schema: typing.Optional[str].
|
|
263
|
+
|
|
264
|
+
- structured_output_json_schema_name: typing.Optional[str].
|
|
265
|
+
|
|
266
|
+
- max_pages: typing.Optional[int].
|
|
267
|
+
---
|
|
268
|
+
from llama_cloud.client import LlamaCloud
|
|
269
|
+
|
|
270
|
+
client = LlamaCloud(
|
|
271
|
+
token="YOUR_TOKEN",
|
|
272
|
+
)
|
|
273
|
+
client.parsing.upload_file()
|
|
274
|
+
"""
|
|
283
275
|
_response = self._client_wrapper.httpx_client.request(
|
|
284
276
|
"POST",
|
|
285
277
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
|
|
286
278
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
287
|
-
|
|
279
|
+
data=jsonable_encoder(
|
|
280
|
+
{
|
|
281
|
+
"language": language,
|
|
282
|
+
"parsing_instruction": parsing_instruction,
|
|
283
|
+
"skip_diagonal_text": skip_diagonal_text,
|
|
284
|
+
"invalidate_cache": invalidate_cache,
|
|
285
|
+
"output_pdf_of_document": output_pdf_of_document,
|
|
286
|
+
"do_not_cache": do_not_cache,
|
|
287
|
+
"gpt4o_mode": gpt_4_o_mode,
|
|
288
|
+
"fast_mode": fast_mode,
|
|
289
|
+
"premium_mode": premium_mode,
|
|
290
|
+
"continuous_mode": continuous_mode,
|
|
291
|
+
"gpt4o_api_key": gpt_4_o_api_key,
|
|
292
|
+
"do_not_unroll_columns": do_not_unroll_columns,
|
|
293
|
+
"html_make_all_elements_visible": html_make_all_elements_visible,
|
|
294
|
+
"html_remove_fixed_elements": html_remove_fixed_elements,
|
|
295
|
+
"guess_xlsx_sheet_name": guess_xlsx_sheet_name,
|
|
296
|
+
"page_separator": page_separator,
|
|
297
|
+
"bounding_box": bounding_box,
|
|
298
|
+
"bbox_top": bbox_top,
|
|
299
|
+
"bbox_right": bbox_right,
|
|
300
|
+
"bbox_bottom": bbox_bottom,
|
|
301
|
+
"bbox_left": bbox_left,
|
|
302
|
+
"target_pages": target_pages,
|
|
303
|
+
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
304
|
+
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
305
|
+
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
306
|
+
"page_prefix": page_prefix,
|
|
307
|
+
"page_suffix": page_suffix,
|
|
308
|
+
"webhook_url": webhook_url,
|
|
309
|
+
"take_screenshot": take_screenshot,
|
|
310
|
+
"is_formatting_instruction": is_formatting_instruction,
|
|
311
|
+
"disable_ocr": disable_ocr,
|
|
312
|
+
"annotate_links": annotate_links,
|
|
313
|
+
"disable_reconstruction": disable_reconstruction,
|
|
314
|
+
"disable_image_extraction": disable_image_extraction,
|
|
315
|
+
"input_s3_path": input_s_3_path,
|
|
316
|
+
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
317
|
+
"azure_openai_deployment_name": azure_openai_deployment_name,
|
|
318
|
+
"azure_openai_endpoint": azure_openai_endpoint,
|
|
319
|
+
"azure_openai_api_version": azure_openai_api_version,
|
|
320
|
+
"azure_openai_key": azure_openai_key,
|
|
321
|
+
"auto_mode": auto_mode,
|
|
322
|
+
"auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
|
|
323
|
+
"auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
|
|
324
|
+
"auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
|
|
325
|
+
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
326
|
+
"file": file,
|
|
327
|
+
"input_url": input_url,
|
|
328
|
+
"http_proxy": http_proxy,
|
|
329
|
+
"structured_output": structured_output,
|
|
330
|
+
"structured_output_json_schema": structured_output_json_schema,
|
|
331
|
+
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
332
|
+
"max_pages": max_pages,
|
|
333
|
+
}
|
|
334
|
+
),
|
|
335
|
+
files={},
|
|
288
336
|
headers=self._client_wrapper.get_headers(),
|
|
289
337
|
timeout=60,
|
|
290
338
|
)
|
|
@@ -340,7 +388,7 @@ class ParsingClient:
|
|
|
340
388
|
token="YOUR_TOKEN",
|
|
341
389
|
)
|
|
342
390
|
client.parsing.get_job(
|
|
343
|
-
job_id="
|
|
391
|
+
job_id="job_id",
|
|
344
392
|
)
|
|
345
393
|
"""
|
|
346
394
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -372,7 +420,7 @@ class ParsingClient:
|
|
|
372
420
|
token="YOUR_TOKEN",
|
|
373
421
|
)
|
|
374
422
|
client.parsing.get_parsing_job_details(
|
|
375
|
-
job_id="
|
|
423
|
+
job_id="job_id",
|
|
376
424
|
)
|
|
377
425
|
"""
|
|
378
426
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -404,7 +452,7 @@ class ParsingClient:
|
|
|
404
452
|
token="YOUR_TOKEN",
|
|
405
453
|
)
|
|
406
454
|
client.parsing.get_job_text_result(
|
|
407
|
-
job_id="
|
|
455
|
+
job_id="job_id",
|
|
408
456
|
)
|
|
409
457
|
"""
|
|
410
458
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -436,13 +484,81 @@ class ParsingClient:
|
|
|
436
484
|
token="YOUR_TOKEN",
|
|
437
485
|
)
|
|
438
486
|
client.parsing.get_job_raw_text_result(
|
|
439
|
-
job_id="
|
|
487
|
+
job_id="job_id",
|
|
488
|
+
)
|
|
489
|
+
"""
|
|
490
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
491
|
+
"GET",
|
|
492
|
+
urllib.parse.urljoin(
|
|
493
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/pdf"
|
|
494
|
+
),
|
|
495
|
+
headers=self._client_wrapper.get_headers(),
|
|
496
|
+
timeout=60,
|
|
497
|
+
)
|
|
498
|
+
if 200 <= _response.status_code < 300:
|
|
499
|
+
return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
|
|
500
|
+
if _response.status_code == 422:
|
|
501
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
502
|
+
try:
|
|
503
|
+
_response_json = _response.json()
|
|
504
|
+
except JSONDecodeError:
|
|
505
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
506
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
507
|
+
|
|
508
|
+
def get_job_structured_result(self, job_id: str) -> ParsingJobStructuredResult:
|
|
509
|
+
"""
|
|
510
|
+
Get a job by id
|
|
511
|
+
|
|
512
|
+
Parameters:
|
|
513
|
+
- job_id: str.
|
|
514
|
+
---
|
|
515
|
+
from llama_cloud.client import LlamaCloud
|
|
516
|
+
|
|
517
|
+
client = LlamaCloud(
|
|
518
|
+
token="YOUR_TOKEN",
|
|
519
|
+
)
|
|
520
|
+
client.parsing.get_job_structured_result(
|
|
521
|
+
job_id="job_id",
|
|
522
|
+
)
|
|
523
|
+
"""
|
|
524
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
525
|
+
"GET",
|
|
526
|
+
urllib.parse.urljoin(
|
|
527
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/structured"
|
|
528
|
+
),
|
|
529
|
+
headers=self._client_wrapper.get_headers(),
|
|
530
|
+
timeout=60,
|
|
531
|
+
)
|
|
532
|
+
if 200 <= _response.status_code < 300:
|
|
533
|
+
return pydantic.parse_obj_as(ParsingJobStructuredResult, _response.json()) # type: ignore
|
|
534
|
+
if _response.status_code == 422:
|
|
535
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
536
|
+
try:
|
|
537
|
+
_response_json = _response.json()
|
|
538
|
+
except JSONDecodeError:
|
|
539
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
540
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
541
|
+
|
|
542
|
+
def get_job_raw_structured_result(self, job_id: str) -> typing.Any:
|
|
543
|
+
"""
|
|
544
|
+
Get a job by id
|
|
545
|
+
|
|
546
|
+
Parameters:
|
|
547
|
+
- job_id: str.
|
|
548
|
+
---
|
|
549
|
+
from llama_cloud.client import LlamaCloud
|
|
550
|
+
|
|
551
|
+
client = LlamaCloud(
|
|
552
|
+
token="YOUR_TOKEN",
|
|
553
|
+
)
|
|
554
|
+
client.parsing.get_job_raw_structured_result(
|
|
555
|
+
job_id="job_id",
|
|
440
556
|
)
|
|
441
557
|
"""
|
|
442
558
|
_response = self._client_wrapper.httpx_client.request(
|
|
443
559
|
"GET",
|
|
444
560
|
urllib.parse.urljoin(
|
|
445
|
-
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/
|
|
561
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/structured"
|
|
446
562
|
),
|
|
447
563
|
headers=self._client_wrapper.get_headers(),
|
|
448
564
|
timeout=60,
|
|
@@ -470,7 +586,7 @@ class ParsingClient:
|
|
|
470
586
|
token="YOUR_TOKEN",
|
|
471
587
|
)
|
|
472
588
|
client.parsing.get_job_raw_xlsx_result(
|
|
473
|
-
job_id="
|
|
589
|
+
job_id="job_id",
|
|
474
590
|
)
|
|
475
591
|
"""
|
|
476
592
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -504,7 +620,7 @@ class ParsingClient:
|
|
|
504
620
|
token="YOUR_TOKEN",
|
|
505
621
|
)
|
|
506
622
|
client.parsing.get_job_result(
|
|
507
|
-
job_id="
|
|
623
|
+
job_id="job_id",
|
|
508
624
|
)
|
|
509
625
|
"""
|
|
510
626
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -538,7 +654,7 @@ class ParsingClient:
|
|
|
538
654
|
token="YOUR_TOKEN",
|
|
539
655
|
)
|
|
540
656
|
client.parsing.get_job_raw_md_result(
|
|
541
|
-
job_id="
|
|
657
|
+
job_id="job_id",
|
|
542
658
|
)
|
|
543
659
|
"""
|
|
544
660
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -572,7 +688,7 @@ class ParsingClient:
|
|
|
572
688
|
token="YOUR_TOKEN",
|
|
573
689
|
)
|
|
574
690
|
client.parsing.get_job_json_result(
|
|
575
|
-
job_id="
|
|
691
|
+
job_id="job_id",
|
|
576
692
|
)
|
|
577
693
|
"""
|
|
578
694
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -604,7 +720,7 @@ class ParsingClient:
|
|
|
604
720
|
token="YOUR_TOKEN",
|
|
605
721
|
)
|
|
606
722
|
client.parsing.get_job_json_raw_result(
|
|
607
|
-
job_id="
|
|
723
|
+
job_id="job_id",
|
|
608
724
|
)
|
|
609
725
|
"""
|
|
610
726
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -668,8 +784,8 @@ class ParsingClient:
|
|
|
668
784
|
token="YOUR_TOKEN",
|
|
669
785
|
)
|
|
670
786
|
client.parsing.generate_presigned_url(
|
|
671
|
-
job_id="
|
|
672
|
-
filename="
|
|
787
|
+
job_id="job_id",
|
|
788
|
+
filename="filename",
|
|
673
789
|
)
|
|
674
790
|
"""
|
|
675
791
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -695,7 +811,7 @@ class AsyncParsingClient:
|
|
|
695
811
|
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
696
812
|
self._client_wrapper = client_wrapper
|
|
697
813
|
|
|
698
|
-
async def get_job_image_result(self, job_id: str, name: str) ->
|
|
814
|
+
async def get_job_image_result(self, job_id: str, name: str) -> typing.AsyncIterator[bytes]:
|
|
699
815
|
"""
|
|
700
816
|
Get a job by id
|
|
701
817
|
|
|
@@ -703,34 +819,29 @@ class AsyncParsingClient:
|
|
|
703
819
|
- job_id: str.
|
|
704
820
|
|
|
705
821
|
- name: str.
|
|
706
|
-
---
|
|
707
|
-
from llama_cloud.client import AsyncLlamaCloud
|
|
708
|
-
|
|
709
|
-
client = AsyncLlamaCloud(
|
|
710
|
-
token="YOUR_TOKEN",
|
|
711
|
-
)
|
|
712
|
-
await client.parsing.get_job_image_result(
|
|
713
|
-
job_id="string",
|
|
714
|
-
name="string",
|
|
715
|
-
)
|
|
716
822
|
"""
|
|
717
|
-
|
|
823
|
+
async with self._client_wrapper.httpx_client.stream(
|
|
718
824
|
"GET",
|
|
719
825
|
urllib.parse.urljoin(
|
|
720
826
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/image/{name}"
|
|
721
827
|
),
|
|
722
828
|
headers=self._client_wrapper.get_headers(),
|
|
723
829
|
timeout=60,
|
|
724
|
-
)
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
830
|
+
) as _response:
|
|
831
|
+
if 200 <= _response.status_code < 300:
|
|
832
|
+
async for _chunk in _response.aiter_bytes():
|
|
833
|
+
yield _chunk
|
|
834
|
+
return
|
|
835
|
+
await _response.aread()
|
|
836
|
+
if _response.status_code == 422:
|
|
837
|
+
raise UnprocessableEntityError(
|
|
838
|
+
pydantic.parse_obj_as(HttpValidationError, _response.json()) # type: ignore
|
|
839
|
+
)
|
|
840
|
+
try:
|
|
841
|
+
_response_json = _response.json()
|
|
842
|
+
except JSONDecodeError:
|
|
843
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
844
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
734
845
|
|
|
735
846
|
async def get_supported_file_extensions(self) -> typing.List[LlamaParseSupportedFileExtensions]:
|
|
736
847
|
"""
|
|
@@ -763,47 +874,58 @@ class AsyncParsingClient:
|
|
|
763
874
|
*,
|
|
764
875
|
project_id: typing.Optional[str] = None,
|
|
765
876
|
organization_id: typing.Optional[str] = None,
|
|
766
|
-
language: typing.List[ParserLanguages],
|
|
767
|
-
parsing_instruction: str,
|
|
768
|
-
skip_diagonal_text: bool,
|
|
769
|
-
invalidate_cache: bool,
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
877
|
+
language: typing.Optional[typing.List[ParserLanguages]] = None,
|
|
878
|
+
parsing_instruction: typing.Optional[str] = None,
|
|
879
|
+
skip_diagonal_text: typing.Optional[bool] = None,
|
|
880
|
+
invalidate_cache: typing.Optional[bool] = None,
|
|
881
|
+
output_pdf_of_document: typing.Optional[bool] = None,
|
|
882
|
+
do_not_cache: typing.Optional[bool] = None,
|
|
883
|
+
gpt_4_o_mode: typing.Optional[bool] = None,
|
|
884
|
+
fast_mode: typing.Optional[bool] = None,
|
|
885
|
+
premium_mode: typing.Optional[bool] = None,
|
|
886
|
+
continuous_mode: typing.Optional[bool] = None,
|
|
887
|
+
gpt_4_o_api_key: typing.Optional[str] = None,
|
|
888
|
+
do_not_unroll_columns: typing.Optional[bool] = None,
|
|
889
|
+
html_make_all_elements_visible: typing.Optional[bool] = None,
|
|
890
|
+
html_remove_fixed_elements: typing.Optional[bool] = None,
|
|
891
|
+
guess_xlsx_sheet_name: typing.Optional[bool] = None,
|
|
892
|
+
page_separator: typing.Optional[str] = None,
|
|
893
|
+
bounding_box: typing.Optional[str] = None,
|
|
894
|
+
bbox_top: typing.Optional[float] = None,
|
|
895
|
+
bbox_right: typing.Optional[float] = None,
|
|
896
|
+
bbox_bottom: typing.Optional[float] = None,
|
|
897
|
+
bbox_left: typing.Optional[float] = None,
|
|
898
|
+
target_pages: typing.Optional[str] = None,
|
|
899
|
+
use_vendor_multimodal_model: typing.Optional[bool] = None,
|
|
900
|
+
vendor_multimodal_model_name: typing.Optional[str] = None,
|
|
901
|
+
vendor_multimodal_api_key: typing.Optional[str] = None,
|
|
902
|
+
page_prefix: typing.Optional[str] = None,
|
|
903
|
+
page_suffix: typing.Optional[str] = None,
|
|
904
|
+
webhook_url: typing.Optional[str] = None,
|
|
905
|
+
take_screenshot: typing.Optional[bool] = None,
|
|
906
|
+
is_formatting_instruction: typing.Optional[bool] = None,
|
|
907
|
+
disable_ocr: typing.Optional[bool] = None,
|
|
908
|
+
annotate_links: typing.Optional[bool] = None,
|
|
909
|
+
disable_reconstruction: typing.Optional[bool] = None,
|
|
910
|
+
disable_image_extraction: typing.Optional[bool] = None,
|
|
911
|
+
input_s_3_path: typing.Optional[str] = None,
|
|
912
|
+
output_s_3_path_prefix: typing.Optional[str] = None,
|
|
913
|
+
azure_openai_deployment_name: typing.Optional[str] = None,
|
|
914
|
+
azure_openai_endpoint: typing.Optional[str] = None,
|
|
915
|
+
azure_openai_api_version: typing.Optional[str] = None,
|
|
916
|
+
azure_openai_key: typing.Optional[str] = None,
|
|
917
|
+
auto_mode: typing.Optional[bool] = None,
|
|
918
|
+
auto_mode_trigger_on_regexp_in_page: typing.Optional[str] = None,
|
|
919
|
+
auto_mode_trigger_on_text_in_page: typing.Optional[str] = None,
|
|
920
|
+
auto_mode_trigger_on_table_in_page: typing.Optional[bool] = None,
|
|
921
|
+
auto_mode_trigger_on_image_in_page: typing.Optional[bool] = None,
|
|
922
|
+
file: typing.Optional[str] = None,
|
|
923
|
+
input_url: typing.Optional[str] = None,
|
|
924
|
+
http_proxy: typing.Optional[str] = None,
|
|
925
|
+
structured_output: typing.Optional[bool] = None,
|
|
926
|
+
structured_output_json_schema: typing.Optional[str] = None,
|
|
927
|
+
structured_output_json_schema_name: typing.Optional[str] = None,
|
|
928
|
+
max_pages: typing.Optional[int] = None,
|
|
807
929
|
) -> ParsingJob:
|
|
808
930
|
"""
|
|
809
931
|
Upload a file to s3 and create a job. return a job id
|
|
@@ -813,137 +935,178 @@ class AsyncParsingClient:
|
|
|
813
935
|
|
|
814
936
|
- organization_id: typing.Optional[str].
|
|
815
937
|
|
|
816
|
-
- language: typing.List[ParserLanguages].
|
|
938
|
+
- language: typing.Optional[typing.List[ParserLanguages]].
|
|
939
|
+
|
|
940
|
+
- parsing_instruction: typing.Optional[str].
|
|
941
|
+
|
|
942
|
+
- skip_diagonal_text: typing.Optional[bool].
|
|
943
|
+
|
|
944
|
+
- invalidate_cache: typing.Optional[bool].
|
|
945
|
+
|
|
946
|
+
- output_pdf_of_document: typing.Optional[bool].
|
|
947
|
+
|
|
948
|
+
- do_not_cache: typing.Optional[bool].
|
|
949
|
+
|
|
950
|
+
- gpt_4_o_mode: typing.Optional[bool].
|
|
951
|
+
|
|
952
|
+
- fast_mode: typing.Optional[bool].
|
|
817
953
|
|
|
818
|
-
-
|
|
954
|
+
- premium_mode: typing.Optional[bool].
|
|
819
955
|
|
|
820
|
-
-
|
|
956
|
+
- continuous_mode: typing.Optional[bool].
|
|
821
957
|
|
|
822
|
-
-
|
|
958
|
+
- gpt_4_o_api_key: typing.Optional[str].
|
|
823
959
|
|
|
824
|
-
-
|
|
960
|
+
- do_not_unroll_columns: typing.Optional[bool].
|
|
825
961
|
|
|
826
|
-
-
|
|
962
|
+
- html_make_all_elements_visible: typing.Optional[bool].
|
|
827
963
|
|
|
828
|
-
-
|
|
964
|
+
- html_remove_fixed_elements: typing.Optional[bool].
|
|
829
965
|
|
|
830
|
-
-
|
|
966
|
+
- guess_xlsx_sheet_name: typing.Optional[bool].
|
|
831
967
|
|
|
832
|
-
-
|
|
968
|
+
- page_separator: typing.Optional[str].
|
|
833
969
|
|
|
834
|
-
-
|
|
970
|
+
- bounding_box: typing.Optional[str].
|
|
835
971
|
|
|
836
|
-
-
|
|
972
|
+
- bbox_top: typing.Optional[float].
|
|
837
973
|
|
|
838
|
-
-
|
|
974
|
+
- bbox_right: typing.Optional[float].
|
|
839
975
|
|
|
840
|
-
-
|
|
976
|
+
- bbox_bottom: typing.Optional[float].
|
|
841
977
|
|
|
842
|
-
-
|
|
978
|
+
- bbox_left: typing.Optional[float].
|
|
843
979
|
|
|
844
|
-
- target_pages: str.
|
|
980
|
+
- target_pages: typing.Optional[str].
|
|
845
981
|
|
|
846
|
-
- use_vendor_multimodal_model: bool.
|
|
982
|
+
- use_vendor_multimodal_model: typing.Optional[bool].
|
|
847
983
|
|
|
848
|
-
- vendor_multimodal_model_name: str.
|
|
984
|
+
- vendor_multimodal_model_name: typing.Optional[str].
|
|
849
985
|
|
|
850
|
-
- vendor_multimodal_api_key: str.
|
|
986
|
+
- vendor_multimodal_api_key: typing.Optional[str].
|
|
851
987
|
|
|
852
|
-
- page_prefix: str.
|
|
988
|
+
- page_prefix: typing.Optional[str].
|
|
853
989
|
|
|
854
|
-
- page_suffix: str.
|
|
990
|
+
- page_suffix: typing.Optional[str].
|
|
855
991
|
|
|
856
|
-
- webhook_url: str.
|
|
992
|
+
- webhook_url: typing.Optional[str].
|
|
857
993
|
|
|
858
|
-
- take_screenshot: bool.
|
|
994
|
+
- take_screenshot: typing.Optional[bool].
|
|
859
995
|
|
|
860
|
-
- is_formatting_instruction: bool.
|
|
996
|
+
- is_formatting_instruction: typing.Optional[bool].
|
|
861
997
|
|
|
862
|
-
- disable_ocr: bool.
|
|
998
|
+
- disable_ocr: typing.Optional[bool].
|
|
863
999
|
|
|
864
|
-
- annotate_links: bool.
|
|
1000
|
+
- annotate_links: typing.Optional[bool].
|
|
865
1001
|
|
|
866
|
-
- disable_reconstruction: bool.
|
|
1002
|
+
- disable_reconstruction: typing.Optional[bool].
|
|
867
1003
|
|
|
868
|
-
- disable_image_extraction: bool.
|
|
1004
|
+
- disable_image_extraction: typing.Optional[bool].
|
|
869
1005
|
|
|
870
|
-
- input_s_3_path: str.
|
|
1006
|
+
- input_s_3_path: typing.Optional[str].
|
|
871
1007
|
|
|
872
|
-
- output_s_3_path_prefix: str.
|
|
1008
|
+
- output_s_3_path_prefix: typing.Optional[str].
|
|
873
1009
|
|
|
874
|
-
- azure_openai_deployment_name: str.
|
|
1010
|
+
- azure_openai_deployment_name: typing.Optional[str].
|
|
875
1011
|
|
|
876
|
-
- azure_openai_endpoint: str.
|
|
1012
|
+
- azure_openai_endpoint: typing.Optional[str].
|
|
877
1013
|
|
|
878
|
-
- azure_openai_api_version: str.
|
|
1014
|
+
- azure_openai_api_version: typing.Optional[str].
|
|
879
1015
|
|
|
880
|
-
- azure_openai_key: str.
|
|
1016
|
+
- azure_openai_key: typing.Optional[str].
|
|
881
1017
|
|
|
882
|
-
- auto_mode: bool.
|
|
1018
|
+
- auto_mode: typing.Optional[bool].
|
|
883
1019
|
|
|
884
|
-
- auto_mode_trigger_on_regexp_in_page: str.
|
|
1020
|
+
- auto_mode_trigger_on_regexp_in_page: typing.Optional[str].
|
|
885
1021
|
|
|
886
|
-
- auto_mode_trigger_on_text_in_page: str.
|
|
1022
|
+
- auto_mode_trigger_on_text_in_page: typing.Optional[str].
|
|
887
1023
|
|
|
888
|
-
- auto_mode_trigger_on_table_in_page: bool.
|
|
1024
|
+
- auto_mode_trigger_on_table_in_page: typing.Optional[bool].
|
|
889
1025
|
|
|
890
|
-
- auto_mode_trigger_on_image_in_page: bool.
|
|
1026
|
+
- auto_mode_trigger_on_image_in_page: typing.Optional[bool].
|
|
891
1027
|
|
|
892
1028
|
- file: typing.Optional[str].
|
|
893
1029
|
|
|
894
|
-
- input_url: str.
|
|
895
|
-
|
|
896
|
-
- http_proxy: str.
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
"
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
"target_pages": target_pages,
|
|
914
|
-
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
915
|
-
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
916
|
-
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
917
|
-
"page_prefix": page_prefix,
|
|
918
|
-
"page_suffix": page_suffix,
|
|
919
|
-
"webhook_url": webhook_url,
|
|
920
|
-
"take_screenshot": take_screenshot,
|
|
921
|
-
"is_formatting_instruction": is_formatting_instruction,
|
|
922
|
-
"disable_ocr": disable_ocr,
|
|
923
|
-
"annotate_links": annotate_links,
|
|
924
|
-
"disable_reconstruction": disable_reconstruction,
|
|
925
|
-
"disable_image_extraction": disable_image_extraction,
|
|
926
|
-
"input_s3_path": input_s_3_path,
|
|
927
|
-
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
928
|
-
"azure_openai_deployment_name": azure_openai_deployment_name,
|
|
929
|
-
"azure_openai_endpoint": azure_openai_endpoint,
|
|
930
|
-
"azure_openai_api_version": azure_openai_api_version,
|
|
931
|
-
"azure_openai_key": azure_openai_key,
|
|
932
|
-
"auto_mode": auto_mode,
|
|
933
|
-
"auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
|
|
934
|
-
"auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
|
|
935
|
-
"auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
|
|
936
|
-
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
937
|
-
"input_url": input_url,
|
|
938
|
-
"http_proxy": http_proxy,
|
|
939
|
-
}
|
|
940
|
-
if file is not OMIT:
|
|
941
|
-
_request["file"] = file
|
|
1030
|
+
- input_url: typing.Optional[str].
|
|
1031
|
+
|
|
1032
|
+
- http_proxy: typing.Optional[str].
|
|
1033
|
+
|
|
1034
|
+
- structured_output: typing.Optional[bool].
|
|
1035
|
+
|
|
1036
|
+
- structured_output_json_schema: typing.Optional[str].
|
|
1037
|
+
|
|
1038
|
+
- structured_output_json_schema_name: typing.Optional[str].
|
|
1039
|
+
|
|
1040
|
+
- max_pages: typing.Optional[int].
|
|
1041
|
+
---
|
|
1042
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
1043
|
+
|
|
1044
|
+
client = AsyncLlamaCloud(
|
|
1045
|
+
token="YOUR_TOKEN",
|
|
1046
|
+
)
|
|
1047
|
+
await client.parsing.upload_file()
|
|
1048
|
+
"""
|
|
942
1049
|
_response = await self._client_wrapper.httpx_client.request(
|
|
943
1050
|
"POST",
|
|
944
1051
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
|
|
945
1052
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
946
|
-
|
|
1053
|
+
data=jsonable_encoder(
|
|
1054
|
+
{
|
|
1055
|
+
"language": language,
|
|
1056
|
+
"parsing_instruction": parsing_instruction,
|
|
1057
|
+
"skip_diagonal_text": skip_diagonal_text,
|
|
1058
|
+
"invalidate_cache": invalidate_cache,
|
|
1059
|
+
"output_pdf_of_document": output_pdf_of_document,
|
|
1060
|
+
"do_not_cache": do_not_cache,
|
|
1061
|
+
"gpt4o_mode": gpt_4_o_mode,
|
|
1062
|
+
"fast_mode": fast_mode,
|
|
1063
|
+
"premium_mode": premium_mode,
|
|
1064
|
+
"continuous_mode": continuous_mode,
|
|
1065
|
+
"gpt4o_api_key": gpt_4_o_api_key,
|
|
1066
|
+
"do_not_unroll_columns": do_not_unroll_columns,
|
|
1067
|
+
"html_make_all_elements_visible": html_make_all_elements_visible,
|
|
1068
|
+
"html_remove_fixed_elements": html_remove_fixed_elements,
|
|
1069
|
+
"guess_xlsx_sheet_name": guess_xlsx_sheet_name,
|
|
1070
|
+
"page_separator": page_separator,
|
|
1071
|
+
"bounding_box": bounding_box,
|
|
1072
|
+
"bbox_top": bbox_top,
|
|
1073
|
+
"bbox_right": bbox_right,
|
|
1074
|
+
"bbox_bottom": bbox_bottom,
|
|
1075
|
+
"bbox_left": bbox_left,
|
|
1076
|
+
"target_pages": target_pages,
|
|
1077
|
+
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
1078
|
+
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
1079
|
+
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
1080
|
+
"page_prefix": page_prefix,
|
|
1081
|
+
"page_suffix": page_suffix,
|
|
1082
|
+
"webhook_url": webhook_url,
|
|
1083
|
+
"take_screenshot": take_screenshot,
|
|
1084
|
+
"is_formatting_instruction": is_formatting_instruction,
|
|
1085
|
+
"disable_ocr": disable_ocr,
|
|
1086
|
+
"annotate_links": annotate_links,
|
|
1087
|
+
"disable_reconstruction": disable_reconstruction,
|
|
1088
|
+
"disable_image_extraction": disable_image_extraction,
|
|
1089
|
+
"input_s3_path": input_s_3_path,
|
|
1090
|
+
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
1091
|
+
"azure_openai_deployment_name": azure_openai_deployment_name,
|
|
1092
|
+
"azure_openai_endpoint": azure_openai_endpoint,
|
|
1093
|
+
"azure_openai_api_version": azure_openai_api_version,
|
|
1094
|
+
"azure_openai_key": azure_openai_key,
|
|
1095
|
+
"auto_mode": auto_mode,
|
|
1096
|
+
"auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
|
|
1097
|
+
"auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
|
|
1098
|
+
"auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
|
|
1099
|
+
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
1100
|
+
"file": file,
|
|
1101
|
+
"input_url": input_url,
|
|
1102
|
+
"http_proxy": http_proxy,
|
|
1103
|
+
"structured_output": structured_output,
|
|
1104
|
+
"structured_output_json_schema": structured_output_json_schema,
|
|
1105
|
+
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
1106
|
+
"max_pages": max_pages,
|
|
1107
|
+
}
|
|
1108
|
+
),
|
|
1109
|
+
files={},
|
|
947
1110
|
headers=self._client_wrapper.get_headers(),
|
|
948
1111
|
timeout=60,
|
|
949
1112
|
)
|
|
@@ -999,7 +1162,7 @@ class AsyncParsingClient:
|
|
|
999
1162
|
token="YOUR_TOKEN",
|
|
1000
1163
|
)
|
|
1001
1164
|
await client.parsing.get_job(
|
|
1002
|
-
job_id="
|
|
1165
|
+
job_id="job_id",
|
|
1003
1166
|
)
|
|
1004
1167
|
"""
|
|
1005
1168
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1031,7 +1194,7 @@ class AsyncParsingClient:
|
|
|
1031
1194
|
token="YOUR_TOKEN",
|
|
1032
1195
|
)
|
|
1033
1196
|
await client.parsing.get_parsing_job_details(
|
|
1034
|
-
job_id="
|
|
1197
|
+
job_id="job_id",
|
|
1035
1198
|
)
|
|
1036
1199
|
"""
|
|
1037
1200
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1063,7 +1226,7 @@ class AsyncParsingClient:
|
|
|
1063
1226
|
token="YOUR_TOKEN",
|
|
1064
1227
|
)
|
|
1065
1228
|
await client.parsing.get_job_text_result(
|
|
1066
|
-
job_id="
|
|
1229
|
+
job_id="job_id",
|
|
1067
1230
|
)
|
|
1068
1231
|
"""
|
|
1069
1232
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1095,13 +1258,81 @@ class AsyncParsingClient:
|
|
|
1095
1258
|
token="YOUR_TOKEN",
|
|
1096
1259
|
)
|
|
1097
1260
|
await client.parsing.get_job_raw_text_result(
|
|
1098
|
-
job_id="
|
|
1261
|
+
job_id="job_id",
|
|
1262
|
+
)
|
|
1263
|
+
"""
|
|
1264
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1265
|
+
"GET",
|
|
1266
|
+
urllib.parse.urljoin(
|
|
1267
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/pdf"
|
|
1268
|
+
),
|
|
1269
|
+
headers=self._client_wrapper.get_headers(),
|
|
1270
|
+
timeout=60,
|
|
1271
|
+
)
|
|
1272
|
+
if 200 <= _response.status_code < 300:
|
|
1273
|
+
return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
|
|
1274
|
+
if _response.status_code == 422:
|
|
1275
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1276
|
+
try:
|
|
1277
|
+
_response_json = _response.json()
|
|
1278
|
+
except JSONDecodeError:
|
|
1279
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1280
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1281
|
+
|
|
1282
|
+
async def get_job_structured_result(self, job_id: str) -> ParsingJobStructuredResult:
|
|
1283
|
+
"""
|
|
1284
|
+
Get a job by id
|
|
1285
|
+
|
|
1286
|
+
Parameters:
|
|
1287
|
+
- job_id: str.
|
|
1288
|
+
---
|
|
1289
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
1290
|
+
|
|
1291
|
+
client = AsyncLlamaCloud(
|
|
1292
|
+
token="YOUR_TOKEN",
|
|
1293
|
+
)
|
|
1294
|
+
await client.parsing.get_job_structured_result(
|
|
1295
|
+
job_id="job_id",
|
|
1296
|
+
)
|
|
1297
|
+
"""
|
|
1298
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1299
|
+
"GET",
|
|
1300
|
+
urllib.parse.urljoin(
|
|
1301
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/structured"
|
|
1302
|
+
),
|
|
1303
|
+
headers=self._client_wrapper.get_headers(),
|
|
1304
|
+
timeout=60,
|
|
1305
|
+
)
|
|
1306
|
+
if 200 <= _response.status_code < 300:
|
|
1307
|
+
return pydantic.parse_obj_as(ParsingJobStructuredResult, _response.json()) # type: ignore
|
|
1308
|
+
if _response.status_code == 422:
|
|
1309
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1310
|
+
try:
|
|
1311
|
+
_response_json = _response.json()
|
|
1312
|
+
except JSONDecodeError:
|
|
1313
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1314
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1315
|
+
|
|
1316
|
+
async def get_job_raw_structured_result(self, job_id: str) -> typing.Any:
|
|
1317
|
+
"""
|
|
1318
|
+
Get a job by id
|
|
1319
|
+
|
|
1320
|
+
Parameters:
|
|
1321
|
+
- job_id: str.
|
|
1322
|
+
---
|
|
1323
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
1324
|
+
|
|
1325
|
+
client = AsyncLlamaCloud(
|
|
1326
|
+
token="YOUR_TOKEN",
|
|
1327
|
+
)
|
|
1328
|
+
await client.parsing.get_job_raw_structured_result(
|
|
1329
|
+
job_id="job_id",
|
|
1099
1330
|
)
|
|
1100
1331
|
"""
|
|
1101
1332
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1102
1333
|
"GET",
|
|
1103
1334
|
urllib.parse.urljoin(
|
|
1104
|
-
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/
|
|
1335
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/structured"
|
|
1105
1336
|
),
|
|
1106
1337
|
headers=self._client_wrapper.get_headers(),
|
|
1107
1338
|
timeout=60,
|
|
@@ -1129,7 +1360,7 @@ class AsyncParsingClient:
|
|
|
1129
1360
|
token="YOUR_TOKEN",
|
|
1130
1361
|
)
|
|
1131
1362
|
await client.parsing.get_job_raw_xlsx_result(
|
|
1132
|
-
job_id="
|
|
1363
|
+
job_id="job_id",
|
|
1133
1364
|
)
|
|
1134
1365
|
"""
|
|
1135
1366
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1163,7 +1394,7 @@ class AsyncParsingClient:
|
|
|
1163
1394
|
token="YOUR_TOKEN",
|
|
1164
1395
|
)
|
|
1165
1396
|
await client.parsing.get_job_result(
|
|
1166
|
-
job_id="
|
|
1397
|
+
job_id="job_id",
|
|
1167
1398
|
)
|
|
1168
1399
|
"""
|
|
1169
1400
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1197,7 +1428,7 @@ class AsyncParsingClient:
|
|
|
1197
1428
|
token="YOUR_TOKEN",
|
|
1198
1429
|
)
|
|
1199
1430
|
await client.parsing.get_job_raw_md_result(
|
|
1200
|
-
job_id="
|
|
1431
|
+
job_id="job_id",
|
|
1201
1432
|
)
|
|
1202
1433
|
"""
|
|
1203
1434
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1231,7 +1462,7 @@ class AsyncParsingClient:
|
|
|
1231
1462
|
token="YOUR_TOKEN",
|
|
1232
1463
|
)
|
|
1233
1464
|
await client.parsing.get_job_json_result(
|
|
1234
|
-
job_id="
|
|
1465
|
+
job_id="job_id",
|
|
1235
1466
|
)
|
|
1236
1467
|
"""
|
|
1237
1468
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1263,7 +1494,7 @@ class AsyncParsingClient:
|
|
|
1263
1494
|
token="YOUR_TOKEN",
|
|
1264
1495
|
)
|
|
1265
1496
|
await client.parsing.get_job_json_raw_result(
|
|
1266
|
-
job_id="
|
|
1497
|
+
job_id="job_id",
|
|
1267
1498
|
)
|
|
1268
1499
|
"""
|
|
1269
1500
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -1327,8 +1558,8 @@ class AsyncParsingClient:
|
|
|
1327
1558
|
token="YOUR_TOKEN",
|
|
1328
1559
|
)
|
|
1329
1560
|
await client.parsing.generate_presigned_url(
|
|
1330
|
-
job_id="
|
|
1331
|
-
filename="
|
|
1561
|
+
job_id="job_id",
|
|
1562
|
+
filename="filename",
|
|
1332
1563
|
)
|
|
1333
1564
|
"""
|
|
1334
1565
|
_response = await self._client_wrapper.httpx_client.request(
|