llama-cloud 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (105) hide show
  1. llama_cloud/__init__.py +12 -10
  2. llama_cloud/environment.py +1 -1
  3. llama_cloud/resources/__init__.py +2 -1
  4. llama_cloud/resources/data_sinks/client.py +14 -14
  5. llama_cloud/resources/data_sources/client.py +16 -16
  6. llama_cloud/resources/embedding_model_configs/client.py +80 -24
  7. llama_cloud/resources/evals/client.py +36 -26
  8. llama_cloud/resources/extraction/client.py +32 -32
  9. llama_cloud/resources/files/__init__.py +2 -2
  10. llama_cloud/resources/files/client.py +53 -28
  11. llama_cloud/resources/files/types/__init__.py +2 -1
  12. llama_cloud/resources/files/types/file_create_permission_info_value.py +7 -0
  13. llama_cloud/resources/organizations/client.py +60 -56
  14. llama_cloud/resources/parsing/client.py +555 -324
  15. llama_cloud/resources/pipelines/client.py +446 -302
  16. llama_cloud/resources/projects/client.py +270 -136
  17. llama_cloud/types/__init__.py +10 -10
  18. llama_cloud/types/azure_open_ai_embedding.py +12 -6
  19. llama_cloud/types/base_prompt_template.py +6 -2
  20. llama_cloud/types/bedrock_embedding.py +12 -6
  21. llama_cloud/types/character_splitter.py +4 -2
  22. llama_cloud/types/chat_message.py +1 -1
  23. llama_cloud/types/cloud_az_storage_blob_data_source.py +16 -7
  24. llama_cloud/types/cloud_box_data_source.py +13 -6
  25. llama_cloud/types/cloud_confluence_data_source.py +7 -6
  26. llama_cloud/types/cloud_document.py +3 -1
  27. llama_cloud/types/cloud_document_create.py +3 -1
  28. llama_cloud/types/cloud_google_drive_data_source.py +1 -0
  29. llama_cloud/types/cloud_jira_data_source.py +7 -4
  30. llama_cloud/types/cloud_notion_page_data_source.py +3 -2
  31. llama_cloud/types/cloud_one_drive_data_source.py +6 -3
  32. llama_cloud/types/cloud_s_3_data_source.py +9 -4
  33. llama_cloud/types/cloud_sharepoint_data_source.py +9 -6
  34. llama_cloud/types/cloud_slack_data_source.py +7 -6
  35. llama_cloud/types/code_splitter.py +1 -1
  36. llama_cloud/types/cohere_embedding.py +7 -3
  37. llama_cloud/types/data_sink.py +4 -4
  38. llama_cloud/types/data_sink_create.py +1 -1
  39. llama_cloud/types/data_source.py +7 -5
  40. llama_cloud/types/data_source_create.py +4 -2
  41. llama_cloud/types/embedding_model_config.py +2 -2
  42. llama_cloud/types/embedding_model_config_update.py +4 -2
  43. llama_cloud/types/eval_dataset.py +2 -2
  44. llama_cloud/types/eval_dataset_job_record.py +13 -7
  45. llama_cloud/types/eval_execution_params_override.py +6 -2
  46. llama_cloud/types/eval_question.py +2 -2
  47. llama_cloud/types/extraction_result.py +2 -2
  48. llama_cloud/types/extraction_schema.py +5 -3
  49. llama_cloud/types/file.py +15 -7
  50. llama_cloud/types/file_permission_info_value.py +5 -0
  51. llama_cloud/types/filter_operator.py +2 -2
  52. llama_cloud/types/gemini_embedding.py +10 -6
  53. llama_cloud/types/hugging_face_inference_api_embedding.py +27 -11
  54. llama_cloud/types/input_message.py +3 -1
  55. llama_cloud/types/job_name_mapping.py +4 -0
  56. llama_cloud/types/llama_parse_parameters.py +11 -0
  57. llama_cloud/types/llm.py +4 -2
  58. llama_cloud/types/llm_parameters.py +5 -2
  59. llama_cloud/types/local_eval.py +10 -8
  60. llama_cloud/types/local_eval_results.py +1 -1
  61. llama_cloud/types/managed_ingestion_status_response.py +5 -3
  62. llama_cloud/types/markdown_element_node_parser.py +5 -3
  63. llama_cloud/types/markdown_node_parser.py +1 -1
  64. llama_cloud/types/metadata_filter.py +2 -2
  65. llama_cloud/types/metric_result.py +3 -3
  66. llama_cloud/types/node_parser.py +1 -1
  67. llama_cloud/types/open_ai_embedding.py +12 -6
  68. llama_cloud/types/organization.py +2 -2
  69. llama_cloud/types/page_splitter_node_parser.py +2 -2
  70. llama_cloud/types/parsing_job_structured_result.py +32 -0
  71. llama_cloud/types/permission.py +3 -3
  72. llama_cloud/types/pipeline.py +17 -7
  73. llama_cloud/types/pipeline_configuration_hashes.py +3 -3
  74. llama_cloud/types/pipeline_create.py +15 -5
  75. llama_cloud/types/pipeline_data_source.py +13 -7
  76. llama_cloud/types/pipeline_data_source_create.py +3 -1
  77. llama_cloud/types/pipeline_deployment.py +4 -4
  78. llama_cloud/types/pipeline_file.py +25 -11
  79. llama_cloud/types/pipeline_file_create.py +3 -1
  80. llama_cloud/types/pipeline_file_permission_info_value.py +7 -0
  81. llama_cloud/types/playground_session.py +2 -2
  82. llama_cloud/types/preset_retrieval_params.py +14 -7
  83. llama_cloud/types/presigned_url.py +3 -1
  84. llama_cloud/types/project.py +2 -2
  85. llama_cloud/types/prompt_mixin_prompts.py +1 -1
  86. llama_cloud/types/prompt_spec.py +4 -2
  87. llama_cloud/types/role.py +3 -3
  88. llama_cloud/types/sentence_splitter.py +4 -2
  89. llama_cloud/types/text_node.py +3 -3
  90. llama_cloud/types/{hugging_face_inference_api_embedding_token.py → token.py} +1 -1
  91. llama_cloud/types/token_text_splitter.py +1 -1
  92. llama_cloud/types/user_organization.py +9 -5
  93. llama_cloud/types/user_organization_create.py +4 -4
  94. llama_cloud/types/user_organization_delete.py +2 -2
  95. llama_cloud/types/user_organization_role.py +2 -2
  96. llama_cloud/types/value.py +5 -0
  97. llama_cloud/types/vertex_text_embedding.py +9 -5
  98. {llama_cloud-0.1.5.dist-info → llama_cloud-0.1.6.dist-info}/METADATA +2 -1
  99. {llama_cloud-0.1.5.dist-info → llama_cloud-0.1.6.dist-info}/RECORD +101 -100
  100. {llama_cloud-0.1.5.dist-info → llama_cloud-0.1.6.dist-info}/WHEEL +1 -1
  101. llama_cloud/types/data_sink_component.py +0 -20
  102. llama_cloud/types/data_source_component.py +0 -28
  103. llama_cloud/types/metadata_filter_value.py +0 -5
  104. llama_cloud/types/pipeline_data_source_component.py +0 -28
  105. {llama_cloud-0.1.5.dist-info → llama_cloud-0.1.6.dist-info}/LICENSE +0 -0
@@ -16,6 +16,7 @@ from ...types.parsing_history_item import ParsingHistoryItem
16
16
  from ...types.parsing_job import ParsingJob
17
17
  from ...types.parsing_job_json_result import ParsingJobJsonResult
18
18
  from ...types.parsing_job_markdown_result import ParsingJobMarkdownResult
19
+ from ...types.parsing_job_structured_result import ParsingJobStructuredResult
19
20
  from ...types.parsing_job_text_result import ParsingJobTextResult
20
21
  from ...types.parsing_usage import ParsingUsage
21
22
  from ...types.presigned_url import PresignedUrl
@@ -36,7 +37,7 @@ class ParsingClient:
36
37
  def __init__(self, *, client_wrapper: SyncClientWrapper):
37
38
  self._client_wrapper = client_wrapper
38
39
 
39
- def get_job_image_result(self, job_id: str, name: str) -> None:
40
+ def get_job_image_result(self, job_id: str, name: str) -> typing.Iterator[bytes]:
40
41
  """
41
42
  Get a job by id
42
43
 
@@ -44,34 +45,29 @@ class ParsingClient:
44
45
  - job_id: str.
45
46
 
46
47
  - name: str.
47
- ---
48
- from llama_cloud.client import LlamaCloud
49
-
50
- client = LlamaCloud(
51
- token="YOUR_TOKEN",
52
- )
53
- client.parsing.get_job_image_result(
54
- job_id="string",
55
- name="string",
56
- )
57
48
  """
58
- _response = self._client_wrapper.httpx_client.request(
49
+ with self._client_wrapper.httpx_client.stream(
59
50
  "GET",
60
51
  urllib.parse.urljoin(
61
52
  f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/image/{name}"
62
53
  ),
63
54
  headers=self._client_wrapper.get_headers(),
64
55
  timeout=60,
65
- )
66
- if 200 <= _response.status_code < 300:
67
- return
68
- if _response.status_code == 422:
69
- raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
70
- try:
71
- _response_json = _response.json()
72
- except JSONDecodeError:
73
- raise ApiError(status_code=_response.status_code, body=_response.text)
74
- raise ApiError(status_code=_response.status_code, body=_response_json)
56
+ ) as _response:
57
+ if 200 <= _response.status_code < 300:
58
+ for _chunk in _response.iter_bytes():
59
+ yield _chunk
60
+ return
61
+ _response.read()
62
+ if _response.status_code == 422:
63
+ raise UnprocessableEntityError(
64
+ pydantic.parse_obj_as(HttpValidationError, _response.json()) # type: ignore
65
+ )
66
+ try:
67
+ _response_json = _response.json()
68
+ except JSONDecodeError:
69
+ raise ApiError(status_code=_response.status_code, body=_response.text)
70
+ raise ApiError(status_code=_response.status_code, body=_response_json)
75
71
 
76
72
  def get_supported_file_extensions(self) -> typing.List[LlamaParseSupportedFileExtensions]:
77
73
  """
@@ -104,47 +100,58 @@ class ParsingClient:
104
100
  *,
105
101
  project_id: typing.Optional[str] = None,
106
102
  organization_id: typing.Optional[str] = None,
107
- language: typing.List[ParserLanguages],
108
- parsing_instruction: str,
109
- skip_diagonal_text: bool,
110
- invalidate_cache: bool,
111
- do_not_cache: bool,
112
- gpt_4_o_mode: bool,
113
- fast_mode: bool,
114
- premium_mode: bool,
115
- continuous_mode: bool,
116
- gpt_4_o_api_key: str,
117
- do_not_unroll_columns: bool,
118
- guess_xlsx_sheet_name: bool,
119
- page_separator: str,
120
- bounding_box: str,
121
- target_pages: str,
122
- use_vendor_multimodal_model: bool,
123
- vendor_multimodal_model_name: str,
124
- vendor_multimodal_api_key: str,
125
- page_prefix: str,
126
- page_suffix: str,
127
- webhook_url: str,
128
- take_screenshot: bool,
129
- is_formatting_instruction: bool,
130
- disable_ocr: bool,
131
- annotate_links: bool,
132
- disable_reconstruction: bool,
133
- disable_image_extraction: bool,
134
- input_s_3_path: str,
135
- output_s_3_path_prefix: str,
136
- azure_openai_deployment_name: str,
137
- azure_openai_endpoint: str,
138
- azure_openai_api_version: str,
139
- azure_openai_key: str,
140
- auto_mode: bool,
141
- auto_mode_trigger_on_regexp_in_page: str,
142
- auto_mode_trigger_on_text_in_page: str,
143
- auto_mode_trigger_on_table_in_page: bool,
144
- auto_mode_trigger_on_image_in_page: bool,
145
- file: typing.Optional[str] = OMIT,
146
- input_url: str,
147
- http_proxy: str,
103
+ language: typing.Optional[typing.List[ParserLanguages]] = None,
104
+ parsing_instruction: typing.Optional[str] = None,
105
+ skip_diagonal_text: typing.Optional[bool] = None,
106
+ invalidate_cache: typing.Optional[bool] = None,
107
+ output_pdf_of_document: typing.Optional[bool] = None,
108
+ do_not_cache: typing.Optional[bool] = None,
109
+ gpt_4_o_mode: typing.Optional[bool] = None,
110
+ fast_mode: typing.Optional[bool] = None,
111
+ premium_mode: typing.Optional[bool] = None,
112
+ continuous_mode: typing.Optional[bool] = None,
113
+ gpt_4_o_api_key: typing.Optional[str] = None,
114
+ do_not_unroll_columns: typing.Optional[bool] = None,
115
+ html_make_all_elements_visible: typing.Optional[bool] = None,
116
+ html_remove_fixed_elements: typing.Optional[bool] = None,
117
+ guess_xlsx_sheet_name: typing.Optional[bool] = None,
118
+ page_separator: typing.Optional[str] = None,
119
+ bounding_box: typing.Optional[str] = None,
120
+ bbox_top: typing.Optional[float] = None,
121
+ bbox_right: typing.Optional[float] = None,
122
+ bbox_bottom: typing.Optional[float] = None,
123
+ bbox_left: typing.Optional[float] = None,
124
+ target_pages: typing.Optional[str] = None,
125
+ use_vendor_multimodal_model: typing.Optional[bool] = None,
126
+ vendor_multimodal_model_name: typing.Optional[str] = None,
127
+ vendor_multimodal_api_key: typing.Optional[str] = None,
128
+ page_prefix: typing.Optional[str] = None,
129
+ page_suffix: typing.Optional[str] = None,
130
+ webhook_url: typing.Optional[str] = None,
131
+ take_screenshot: typing.Optional[bool] = None,
132
+ is_formatting_instruction: typing.Optional[bool] = None,
133
+ disable_ocr: typing.Optional[bool] = None,
134
+ annotate_links: typing.Optional[bool] = None,
135
+ disable_reconstruction: typing.Optional[bool] = None,
136
+ disable_image_extraction: typing.Optional[bool] = None,
137
+ input_s_3_path: typing.Optional[str] = None,
138
+ output_s_3_path_prefix: typing.Optional[str] = None,
139
+ azure_openai_deployment_name: typing.Optional[str] = None,
140
+ azure_openai_endpoint: typing.Optional[str] = None,
141
+ azure_openai_api_version: typing.Optional[str] = None,
142
+ azure_openai_key: typing.Optional[str] = None,
143
+ auto_mode: typing.Optional[bool] = None,
144
+ auto_mode_trigger_on_regexp_in_page: typing.Optional[str] = None,
145
+ auto_mode_trigger_on_text_in_page: typing.Optional[str] = None,
146
+ auto_mode_trigger_on_table_in_page: typing.Optional[bool] = None,
147
+ auto_mode_trigger_on_image_in_page: typing.Optional[bool] = None,
148
+ file: typing.Optional[str] = None,
149
+ input_url: typing.Optional[str] = None,
150
+ http_proxy: typing.Optional[str] = None,
151
+ structured_output: typing.Optional[bool] = None,
152
+ structured_output_json_schema: typing.Optional[str] = None,
153
+ structured_output_json_schema_name: typing.Optional[str] = None,
154
+ max_pages: typing.Optional[int] = None,
148
155
  ) -> ParsingJob:
149
156
  """
150
157
  Upload a file to s3 and create a job. return a job id
@@ -154,137 +161,178 @@ class ParsingClient:
154
161
 
155
162
  - organization_id: typing.Optional[str].
156
163
 
157
- - language: typing.List[ParserLanguages].
164
+ - language: typing.Optional[typing.List[ParserLanguages]].
165
+
166
+ - parsing_instruction: typing.Optional[str].
167
+
168
+ - skip_diagonal_text: typing.Optional[bool].
169
+
170
+ - invalidate_cache: typing.Optional[bool].
171
+
172
+ - output_pdf_of_document: typing.Optional[bool].
173
+
174
+ - do_not_cache: typing.Optional[bool].
175
+
176
+ - gpt_4_o_mode: typing.Optional[bool].
177
+
178
+ - fast_mode: typing.Optional[bool].
158
179
 
159
- - parsing_instruction: str.
180
+ - premium_mode: typing.Optional[bool].
160
181
 
161
- - skip_diagonal_text: bool.
182
+ - continuous_mode: typing.Optional[bool].
162
183
 
163
- - invalidate_cache: bool.
184
+ - gpt_4_o_api_key: typing.Optional[str].
164
185
 
165
- - do_not_cache: bool.
186
+ - do_not_unroll_columns: typing.Optional[bool].
166
187
 
167
- - gpt_4_o_mode: bool.
188
+ - html_make_all_elements_visible: typing.Optional[bool].
168
189
 
169
- - fast_mode: bool.
190
+ - html_remove_fixed_elements: typing.Optional[bool].
170
191
 
171
- - premium_mode: bool.
192
+ - guess_xlsx_sheet_name: typing.Optional[bool].
172
193
 
173
- - continuous_mode: bool.
194
+ - page_separator: typing.Optional[str].
174
195
 
175
- - gpt_4_o_api_key: str.
196
+ - bounding_box: typing.Optional[str].
176
197
 
177
- - do_not_unroll_columns: bool.
198
+ - bbox_top: typing.Optional[float].
178
199
 
179
- - guess_xlsx_sheet_name: bool.
200
+ - bbox_right: typing.Optional[float].
180
201
 
181
- - page_separator: str.
202
+ - bbox_bottom: typing.Optional[float].
182
203
 
183
- - bounding_box: str.
204
+ - bbox_left: typing.Optional[float].
184
205
 
185
- - target_pages: str.
206
+ - target_pages: typing.Optional[str].
186
207
 
187
- - use_vendor_multimodal_model: bool.
208
+ - use_vendor_multimodal_model: typing.Optional[bool].
188
209
 
189
- - vendor_multimodal_model_name: str.
210
+ - vendor_multimodal_model_name: typing.Optional[str].
190
211
 
191
- - vendor_multimodal_api_key: str.
212
+ - vendor_multimodal_api_key: typing.Optional[str].
192
213
 
193
- - page_prefix: str.
214
+ - page_prefix: typing.Optional[str].
194
215
 
195
- - page_suffix: str.
216
+ - page_suffix: typing.Optional[str].
196
217
 
197
- - webhook_url: str.
218
+ - webhook_url: typing.Optional[str].
198
219
 
199
- - take_screenshot: bool.
220
+ - take_screenshot: typing.Optional[bool].
200
221
 
201
- - is_formatting_instruction: bool.
222
+ - is_formatting_instruction: typing.Optional[bool].
202
223
 
203
- - disable_ocr: bool.
224
+ - disable_ocr: typing.Optional[bool].
204
225
 
205
- - annotate_links: bool.
226
+ - annotate_links: typing.Optional[bool].
206
227
 
207
- - disable_reconstruction: bool.
228
+ - disable_reconstruction: typing.Optional[bool].
208
229
 
209
- - disable_image_extraction: bool.
230
+ - disable_image_extraction: typing.Optional[bool].
210
231
 
211
- - input_s_3_path: str.
232
+ - input_s_3_path: typing.Optional[str].
212
233
 
213
- - output_s_3_path_prefix: str.
234
+ - output_s_3_path_prefix: typing.Optional[str].
214
235
 
215
- - azure_openai_deployment_name: str.
236
+ - azure_openai_deployment_name: typing.Optional[str].
216
237
 
217
- - azure_openai_endpoint: str.
238
+ - azure_openai_endpoint: typing.Optional[str].
218
239
 
219
- - azure_openai_api_version: str.
240
+ - azure_openai_api_version: typing.Optional[str].
220
241
 
221
- - azure_openai_key: str.
242
+ - azure_openai_key: typing.Optional[str].
222
243
 
223
- - auto_mode: bool.
244
+ - auto_mode: typing.Optional[bool].
224
245
 
225
- - auto_mode_trigger_on_regexp_in_page: str.
246
+ - auto_mode_trigger_on_regexp_in_page: typing.Optional[str].
226
247
 
227
- - auto_mode_trigger_on_text_in_page: str.
248
+ - auto_mode_trigger_on_text_in_page: typing.Optional[str].
228
249
 
229
- - auto_mode_trigger_on_table_in_page: bool.
250
+ - auto_mode_trigger_on_table_in_page: typing.Optional[bool].
230
251
 
231
- - auto_mode_trigger_on_image_in_page: bool.
252
+ - auto_mode_trigger_on_image_in_page: typing.Optional[bool].
232
253
 
233
254
  - file: typing.Optional[str].
234
255
 
235
- - input_url: str.
236
-
237
- - http_proxy: str.
238
- """
239
- _request: typing.Dict[str, typing.Any] = {
240
- "language": language,
241
- "parsing_instruction": parsing_instruction,
242
- "skip_diagonal_text": skip_diagonal_text,
243
- "invalidate_cache": invalidate_cache,
244
- "do_not_cache": do_not_cache,
245
- "gpt4o_mode": gpt_4_o_mode,
246
- "fast_mode": fast_mode,
247
- "premium_mode": premium_mode,
248
- "continuous_mode": continuous_mode,
249
- "gpt4o_api_key": gpt_4_o_api_key,
250
- "do_not_unroll_columns": do_not_unroll_columns,
251
- "guess_xlsx_sheet_name": guess_xlsx_sheet_name,
252
- "page_separator": page_separator,
253
- "bounding_box": bounding_box,
254
- "target_pages": target_pages,
255
- "use_vendor_multimodal_model": use_vendor_multimodal_model,
256
- "vendor_multimodal_model_name": vendor_multimodal_model_name,
257
- "vendor_multimodal_api_key": vendor_multimodal_api_key,
258
- "page_prefix": page_prefix,
259
- "page_suffix": page_suffix,
260
- "webhook_url": webhook_url,
261
- "take_screenshot": take_screenshot,
262
- "is_formatting_instruction": is_formatting_instruction,
263
- "disable_ocr": disable_ocr,
264
- "annotate_links": annotate_links,
265
- "disable_reconstruction": disable_reconstruction,
266
- "disable_image_extraction": disable_image_extraction,
267
- "input_s3_path": input_s_3_path,
268
- "output_s3_path_prefix": output_s_3_path_prefix,
269
- "azure_openai_deployment_name": azure_openai_deployment_name,
270
- "azure_openai_endpoint": azure_openai_endpoint,
271
- "azure_openai_api_version": azure_openai_api_version,
272
- "azure_openai_key": azure_openai_key,
273
- "auto_mode": auto_mode,
274
- "auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
275
- "auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
276
- "auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
277
- "auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
278
- "input_url": input_url,
279
- "http_proxy": http_proxy,
280
- }
281
- if file is not OMIT:
282
- _request["file"] = file
256
+ - input_url: typing.Optional[str].
257
+
258
+ - http_proxy: typing.Optional[str].
259
+
260
+ - structured_output: typing.Optional[bool].
261
+
262
+ - structured_output_json_schema: typing.Optional[str].
263
+
264
+ - structured_output_json_schema_name: typing.Optional[str].
265
+
266
+ - max_pages: typing.Optional[int].
267
+ ---
268
+ from llama_cloud.client import LlamaCloud
269
+
270
+ client = LlamaCloud(
271
+ token="YOUR_TOKEN",
272
+ )
273
+ client.parsing.upload_file()
274
+ """
283
275
  _response = self._client_wrapper.httpx_client.request(
284
276
  "POST",
285
277
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
286
278
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
287
- json=jsonable_encoder(_request),
279
+ data=jsonable_encoder(
280
+ {
281
+ "language": language,
282
+ "parsing_instruction": parsing_instruction,
283
+ "skip_diagonal_text": skip_diagonal_text,
284
+ "invalidate_cache": invalidate_cache,
285
+ "output_pdf_of_document": output_pdf_of_document,
286
+ "do_not_cache": do_not_cache,
287
+ "gpt4o_mode": gpt_4_o_mode,
288
+ "fast_mode": fast_mode,
289
+ "premium_mode": premium_mode,
290
+ "continuous_mode": continuous_mode,
291
+ "gpt4o_api_key": gpt_4_o_api_key,
292
+ "do_not_unroll_columns": do_not_unroll_columns,
293
+ "html_make_all_elements_visible": html_make_all_elements_visible,
294
+ "html_remove_fixed_elements": html_remove_fixed_elements,
295
+ "guess_xlsx_sheet_name": guess_xlsx_sheet_name,
296
+ "page_separator": page_separator,
297
+ "bounding_box": bounding_box,
298
+ "bbox_top": bbox_top,
299
+ "bbox_right": bbox_right,
300
+ "bbox_bottom": bbox_bottom,
301
+ "bbox_left": bbox_left,
302
+ "target_pages": target_pages,
303
+ "use_vendor_multimodal_model": use_vendor_multimodal_model,
304
+ "vendor_multimodal_model_name": vendor_multimodal_model_name,
305
+ "vendor_multimodal_api_key": vendor_multimodal_api_key,
306
+ "page_prefix": page_prefix,
307
+ "page_suffix": page_suffix,
308
+ "webhook_url": webhook_url,
309
+ "take_screenshot": take_screenshot,
310
+ "is_formatting_instruction": is_formatting_instruction,
311
+ "disable_ocr": disable_ocr,
312
+ "annotate_links": annotate_links,
313
+ "disable_reconstruction": disable_reconstruction,
314
+ "disable_image_extraction": disable_image_extraction,
315
+ "input_s3_path": input_s_3_path,
316
+ "output_s3_path_prefix": output_s_3_path_prefix,
317
+ "azure_openai_deployment_name": azure_openai_deployment_name,
318
+ "azure_openai_endpoint": azure_openai_endpoint,
319
+ "azure_openai_api_version": azure_openai_api_version,
320
+ "azure_openai_key": azure_openai_key,
321
+ "auto_mode": auto_mode,
322
+ "auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
323
+ "auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
324
+ "auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
325
+ "auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
326
+ "file": file,
327
+ "input_url": input_url,
328
+ "http_proxy": http_proxy,
329
+ "structured_output": structured_output,
330
+ "structured_output_json_schema": structured_output_json_schema,
331
+ "structured_output_json_schema_name": structured_output_json_schema_name,
332
+ "max_pages": max_pages,
333
+ }
334
+ ),
335
+ files={},
288
336
  headers=self._client_wrapper.get_headers(),
289
337
  timeout=60,
290
338
  )
@@ -340,7 +388,7 @@ class ParsingClient:
340
388
  token="YOUR_TOKEN",
341
389
  )
342
390
  client.parsing.get_job(
343
- job_id="string",
391
+ job_id="job_id",
344
392
  )
345
393
  """
346
394
  _response = self._client_wrapper.httpx_client.request(
@@ -372,7 +420,7 @@ class ParsingClient:
372
420
  token="YOUR_TOKEN",
373
421
  )
374
422
  client.parsing.get_parsing_job_details(
375
- job_id="string",
423
+ job_id="job_id",
376
424
  )
377
425
  """
378
426
  _response = self._client_wrapper.httpx_client.request(
@@ -404,7 +452,7 @@ class ParsingClient:
404
452
  token="YOUR_TOKEN",
405
453
  )
406
454
  client.parsing.get_job_text_result(
407
- job_id="string",
455
+ job_id="job_id",
408
456
  )
409
457
  """
410
458
  _response = self._client_wrapper.httpx_client.request(
@@ -436,13 +484,81 @@ class ParsingClient:
436
484
  token="YOUR_TOKEN",
437
485
  )
438
486
  client.parsing.get_job_raw_text_result(
439
- job_id="string",
487
+ job_id="job_id",
488
+ )
489
+ """
490
+ _response = self._client_wrapper.httpx_client.request(
491
+ "GET",
492
+ urllib.parse.urljoin(
493
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/pdf"
494
+ ),
495
+ headers=self._client_wrapper.get_headers(),
496
+ timeout=60,
497
+ )
498
+ if 200 <= _response.status_code < 300:
499
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
500
+ if _response.status_code == 422:
501
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
502
+ try:
503
+ _response_json = _response.json()
504
+ except JSONDecodeError:
505
+ raise ApiError(status_code=_response.status_code, body=_response.text)
506
+ raise ApiError(status_code=_response.status_code, body=_response_json)
507
+
508
+ def get_job_structured_result(self, job_id: str) -> ParsingJobStructuredResult:
509
+ """
510
+ Get a job by id
511
+
512
+ Parameters:
513
+ - job_id: str.
514
+ ---
515
+ from llama_cloud.client import LlamaCloud
516
+
517
+ client = LlamaCloud(
518
+ token="YOUR_TOKEN",
519
+ )
520
+ client.parsing.get_job_structured_result(
521
+ job_id="job_id",
522
+ )
523
+ """
524
+ _response = self._client_wrapper.httpx_client.request(
525
+ "GET",
526
+ urllib.parse.urljoin(
527
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/structured"
528
+ ),
529
+ headers=self._client_wrapper.get_headers(),
530
+ timeout=60,
531
+ )
532
+ if 200 <= _response.status_code < 300:
533
+ return pydantic.parse_obj_as(ParsingJobStructuredResult, _response.json()) # type: ignore
534
+ if _response.status_code == 422:
535
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
536
+ try:
537
+ _response_json = _response.json()
538
+ except JSONDecodeError:
539
+ raise ApiError(status_code=_response.status_code, body=_response.text)
540
+ raise ApiError(status_code=_response.status_code, body=_response_json)
541
+
542
+ def get_job_raw_structured_result(self, job_id: str) -> typing.Any:
543
+ """
544
+ Get a job by id
545
+
546
+ Parameters:
547
+ - job_id: str.
548
+ ---
549
+ from llama_cloud.client import LlamaCloud
550
+
551
+ client = LlamaCloud(
552
+ token="YOUR_TOKEN",
553
+ )
554
+ client.parsing.get_job_raw_structured_result(
555
+ job_id="job_id",
440
556
  )
441
557
  """
442
558
  _response = self._client_wrapper.httpx_client.request(
443
559
  "GET",
444
560
  urllib.parse.urljoin(
445
- f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/text"
561
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/structured"
446
562
  ),
447
563
  headers=self._client_wrapper.get_headers(),
448
564
  timeout=60,
@@ -470,7 +586,7 @@ class ParsingClient:
470
586
  token="YOUR_TOKEN",
471
587
  )
472
588
  client.parsing.get_job_raw_xlsx_result(
473
- job_id="string",
589
+ job_id="job_id",
474
590
  )
475
591
  """
476
592
  _response = self._client_wrapper.httpx_client.request(
@@ -504,7 +620,7 @@ class ParsingClient:
504
620
  token="YOUR_TOKEN",
505
621
  )
506
622
  client.parsing.get_job_result(
507
- job_id="string",
623
+ job_id="job_id",
508
624
  )
509
625
  """
510
626
  _response = self._client_wrapper.httpx_client.request(
@@ -538,7 +654,7 @@ class ParsingClient:
538
654
  token="YOUR_TOKEN",
539
655
  )
540
656
  client.parsing.get_job_raw_md_result(
541
- job_id="string",
657
+ job_id="job_id",
542
658
  )
543
659
  """
544
660
  _response = self._client_wrapper.httpx_client.request(
@@ -572,7 +688,7 @@ class ParsingClient:
572
688
  token="YOUR_TOKEN",
573
689
  )
574
690
  client.parsing.get_job_json_result(
575
- job_id="string",
691
+ job_id="job_id",
576
692
  )
577
693
  """
578
694
  _response = self._client_wrapper.httpx_client.request(
@@ -604,7 +720,7 @@ class ParsingClient:
604
720
  token="YOUR_TOKEN",
605
721
  )
606
722
  client.parsing.get_job_json_raw_result(
607
- job_id="string",
723
+ job_id="job_id",
608
724
  )
609
725
  """
610
726
  _response = self._client_wrapper.httpx_client.request(
@@ -668,8 +784,8 @@ class ParsingClient:
668
784
  token="YOUR_TOKEN",
669
785
  )
670
786
  client.parsing.generate_presigned_url(
671
- job_id="string",
672
- filename="string",
787
+ job_id="job_id",
788
+ filename="filename",
673
789
  )
674
790
  """
675
791
  _response = self._client_wrapper.httpx_client.request(
@@ -695,7 +811,7 @@ class AsyncParsingClient:
695
811
  def __init__(self, *, client_wrapper: AsyncClientWrapper):
696
812
  self._client_wrapper = client_wrapper
697
813
 
698
- async def get_job_image_result(self, job_id: str, name: str) -> None:
814
+ async def get_job_image_result(self, job_id: str, name: str) -> typing.AsyncIterator[bytes]:
699
815
  """
700
816
  Get a job by id
701
817
 
@@ -703,34 +819,29 @@ class AsyncParsingClient:
703
819
  - job_id: str.
704
820
 
705
821
  - name: str.
706
- ---
707
- from llama_cloud.client import AsyncLlamaCloud
708
-
709
- client = AsyncLlamaCloud(
710
- token="YOUR_TOKEN",
711
- )
712
- await client.parsing.get_job_image_result(
713
- job_id="string",
714
- name="string",
715
- )
716
822
  """
717
- _response = await self._client_wrapper.httpx_client.request(
823
+ async with self._client_wrapper.httpx_client.stream(
718
824
  "GET",
719
825
  urllib.parse.urljoin(
720
826
  f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/image/{name}"
721
827
  ),
722
828
  headers=self._client_wrapper.get_headers(),
723
829
  timeout=60,
724
- )
725
- if 200 <= _response.status_code < 300:
726
- return
727
- if _response.status_code == 422:
728
- raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
729
- try:
730
- _response_json = _response.json()
731
- except JSONDecodeError:
732
- raise ApiError(status_code=_response.status_code, body=_response.text)
733
- raise ApiError(status_code=_response.status_code, body=_response_json)
830
+ ) as _response:
831
+ if 200 <= _response.status_code < 300:
832
+ async for _chunk in _response.aiter_bytes():
833
+ yield _chunk
834
+ return
835
+ await _response.aread()
836
+ if _response.status_code == 422:
837
+ raise UnprocessableEntityError(
838
+ pydantic.parse_obj_as(HttpValidationError, _response.json()) # type: ignore
839
+ )
840
+ try:
841
+ _response_json = _response.json()
842
+ except JSONDecodeError:
843
+ raise ApiError(status_code=_response.status_code, body=_response.text)
844
+ raise ApiError(status_code=_response.status_code, body=_response_json)
734
845
 
735
846
  async def get_supported_file_extensions(self) -> typing.List[LlamaParseSupportedFileExtensions]:
736
847
  """
@@ -763,47 +874,58 @@ class AsyncParsingClient:
763
874
  *,
764
875
  project_id: typing.Optional[str] = None,
765
876
  organization_id: typing.Optional[str] = None,
766
- language: typing.List[ParserLanguages],
767
- parsing_instruction: str,
768
- skip_diagonal_text: bool,
769
- invalidate_cache: bool,
770
- do_not_cache: bool,
771
- gpt_4_o_mode: bool,
772
- fast_mode: bool,
773
- premium_mode: bool,
774
- continuous_mode: bool,
775
- gpt_4_o_api_key: str,
776
- do_not_unroll_columns: bool,
777
- guess_xlsx_sheet_name: bool,
778
- page_separator: str,
779
- bounding_box: str,
780
- target_pages: str,
781
- use_vendor_multimodal_model: bool,
782
- vendor_multimodal_model_name: str,
783
- vendor_multimodal_api_key: str,
784
- page_prefix: str,
785
- page_suffix: str,
786
- webhook_url: str,
787
- take_screenshot: bool,
788
- is_formatting_instruction: bool,
789
- disable_ocr: bool,
790
- annotate_links: bool,
791
- disable_reconstruction: bool,
792
- disable_image_extraction: bool,
793
- input_s_3_path: str,
794
- output_s_3_path_prefix: str,
795
- azure_openai_deployment_name: str,
796
- azure_openai_endpoint: str,
797
- azure_openai_api_version: str,
798
- azure_openai_key: str,
799
- auto_mode: bool,
800
- auto_mode_trigger_on_regexp_in_page: str,
801
- auto_mode_trigger_on_text_in_page: str,
802
- auto_mode_trigger_on_table_in_page: bool,
803
- auto_mode_trigger_on_image_in_page: bool,
804
- file: typing.Optional[str] = OMIT,
805
- input_url: str,
806
- http_proxy: str,
877
+ language: typing.Optional[typing.List[ParserLanguages]] = None,
878
+ parsing_instruction: typing.Optional[str] = None,
879
+ skip_diagonal_text: typing.Optional[bool] = None,
880
+ invalidate_cache: typing.Optional[bool] = None,
881
+ output_pdf_of_document: typing.Optional[bool] = None,
882
+ do_not_cache: typing.Optional[bool] = None,
883
+ gpt_4_o_mode: typing.Optional[bool] = None,
884
+ fast_mode: typing.Optional[bool] = None,
885
+ premium_mode: typing.Optional[bool] = None,
886
+ continuous_mode: typing.Optional[bool] = None,
887
+ gpt_4_o_api_key: typing.Optional[str] = None,
888
+ do_not_unroll_columns: typing.Optional[bool] = None,
889
+ html_make_all_elements_visible: typing.Optional[bool] = None,
890
+ html_remove_fixed_elements: typing.Optional[bool] = None,
891
+ guess_xlsx_sheet_name: typing.Optional[bool] = None,
892
+ page_separator: typing.Optional[str] = None,
893
+ bounding_box: typing.Optional[str] = None,
894
+ bbox_top: typing.Optional[float] = None,
895
+ bbox_right: typing.Optional[float] = None,
896
+ bbox_bottom: typing.Optional[float] = None,
897
+ bbox_left: typing.Optional[float] = None,
898
+ target_pages: typing.Optional[str] = None,
899
+ use_vendor_multimodal_model: typing.Optional[bool] = None,
900
+ vendor_multimodal_model_name: typing.Optional[str] = None,
901
+ vendor_multimodal_api_key: typing.Optional[str] = None,
902
+ page_prefix: typing.Optional[str] = None,
903
+ page_suffix: typing.Optional[str] = None,
904
+ webhook_url: typing.Optional[str] = None,
905
+ take_screenshot: typing.Optional[bool] = None,
906
+ is_formatting_instruction: typing.Optional[bool] = None,
907
+ disable_ocr: typing.Optional[bool] = None,
908
+ annotate_links: typing.Optional[bool] = None,
909
+ disable_reconstruction: typing.Optional[bool] = None,
910
+ disable_image_extraction: typing.Optional[bool] = None,
911
+ input_s_3_path: typing.Optional[str] = None,
912
+ output_s_3_path_prefix: typing.Optional[str] = None,
913
+ azure_openai_deployment_name: typing.Optional[str] = None,
914
+ azure_openai_endpoint: typing.Optional[str] = None,
915
+ azure_openai_api_version: typing.Optional[str] = None,
916
+ azure_openai_key: typing.Optional[str] = None,
917
+ auto_mode: typing.Optional[bool] = None,
918
+ auto_mode_trigger_on_regexp_in_page: typing.Optional[str] = None,
919
+ auto_mode_trigger_on_text_in_page: typing.Optional[str] = None,
920
+ auto_mode_trigger_on_table_in_page: typing.Optional[bool] = None,
921
+ auto_mode_trigger_on_image_in_page: typing.Optional[bool] = None,
922
+ file: typing.Optional[str] = None,
923
+ input_url: typing.Optional[str] = None,
924
+ http_proxy: typing.Optional[str] = None,
925
+ structured_output: typing.Optional[bool] = None,
926
+ structured_output_json_schema: typing.Optional[str] = None,
927
+ structured_output_json_schema_name: typing.Optional[str] = None,
928
+ max_pages: typing.Optional[int] = None,
807
929
  ) -> ParsingJob:
808
930
  """
809
931
  Upload a file to s3 and create a job. return a job id
@@ -813,137 +935,178 @@ class AsyncParsingClient:
813
935
 
814
936
  - organization_id: typing.Optional[str].
815
937
 
816
- - language: typing.List[ParserLanguages].
938
+ - language: typing.Optional[typing.List[ParserLanguages]].
939
+
940
+ - parsing_instruction: typing.Optional[str].
941
+
942
+ - skip_diagonal_text: typing.Optional[bool].
943
+
944
+ - invalidate_cache: typing.Optional[bool].
945
+
946
+ - output_pdf_of_document: typing.Optional[bool].
947
+
948
+ - do_not_cache: typing.Optional[bool].
949
+
950
+ - gpt_4_o_mode: typing.Optional[bool].
951
+
952
+ - fast_mode: typing.Optional[bool].
817
953
 
818
- - parsing_instruction: str.
954
+ - premium_mode: typing.Optional[bool].
819
955
 
820
- - skip_diagonal_text: bool.
956
+ - continuous_mode: typing.Optional[bool].
821
957
 
822
- - invalidate_cache: bool.
958
+ - gpt_4_o_api_key: typing.Optional[str].
823
959
 
824
- - do_not_cache: bool.
960
+ - do_not_unroll_columns: typing.Optional[bool].
825
961
 
826
- - gpt_4_o_mode: bool.
962
+ - html_make_all_elements_visible: typing.Optional[bool].
827
963
 
828
- - fast_mode: bool.
964
+ - html_remove_fixed_elements: typing.Optional[bool].
829
965
 
830
- - premium_mode: bool.
966
+ - guess_xlsx_sheet_name: typing.Optional[bool].
831
967
 
832
- - continuous_mode: bool.
968
+ - page_separator: typing.Optional[str].
833
969
 
834
- - gpt_4_o_api_key: str.
970
+ - bounding_box: typing.Optional[str].
835
971
 
836
- - do_not_unroll_columns: bool.
972
+ - bbox_top: typing.Optional[float].
837
973
 
838
- - guess_xlsx_sheet_name: bool.
974
+ - bbox_right: typing.Optional[float].
839
975
 
840
- - page_separator: str.
976
+ - bbox_bottom: typing.Optional[float].
841
977
 
842
- - bounding_box: str.
978
+ - bbox_left: typing.Optional[float].
843
979
 
844
- - target_pages: str.
980
+ - target_pages: typing.Optional[str].
845
981
 
846
- - use_vendor_multimodal_model: bool.
982
+ - use_vendor_multimodal_model: typing.Optional[bool].
847
983
 
848
- - vendor_multimodal_model_name: str.
984
+ - vendor_multimodal_model_name: typing.Optional[str].
849
985
 
850
- - vendor_multimodal_api_key: str.
986
+ - vendor_multimodal_api_key: typing.Optional[str].
851
987
 
852
- - page_prefix: str.
988
+ - page_prefix: typing.Optional[str].
853
989
 
854
- - page_suffix: str.
990
+ - page_suffix: typing.Optional[str].
855
991
 
856
- - webhook_url: str.
992
+ - webhook_url: typing.Optional[str].
857
993
 
858
- - take_screenshot: bool.
994
+ - take_screenshot: typing.Optional[bool].
859
995
 
860
- - is_formatting_instruction: bool.
996
+ - is_formatting_instruction: typing.Optional[bool].
861
997
 
862
- - disable_ocr: bool.
998
+ - disable_ocr: typing.Optional[bool].
863
999
 
864
- - annotate_links: bool.
1000
+ - annotate_links: typing.Optional[bool].
865
1001
 
866
- - disable_reconstruction: bool.
1002
+ - disable_reconstruction: typing.Optional[bool].
867
1003
 
868
- - disable_image_extraction: bool.
1004
+ - disable_image_extraction: typing.Optional[bool].
869
1005
 
870
- - input_s_3_path: str.
1006
+ - input_s_3_path: typing.Optional[str].
871
1007
 
872
- - output_s_3_path_prefix: str.
1008
+ - output_s_3_path_prefix: typing.Optional[str].
873
1009
 
874
- - azure_openai_deployment_name: str.
1010
+ - azure_openai_deployment_name: typing.Optional[str].
875
1011
 
876
- - azure_openai_endpoint: str.
1012
+ - azure_openai_endpoint: typing.Optional[str].
877
1013
 
878
- - azure_openai_api_version: str.
1014
+ - azure_openai_api_version: typing.Optional[str].
879
1015
 
880
- - azure_openai_key: str.
1016
+ - azure_openai_key: typing.Optional[str].
881
1017
 
882
- - auto_mode: bool.
1018
+ - auto_mode: typing.Optional[bool].
883
1019
 
884
- - auto_mode_trigger_on_regexp_in_page: str.
1020
+ - auto_mode_trigger_on_regexp_in_page: typing.Optional[str].
885
1021
 
886
- - auto_mode_trigger_on_text_in_page: str.
1022
+ - auto_mode_trigger_on_text_in_page: typing.Optional[str].
887
1023
 
888
- - auto_mode_trigger_on_table_in_page: bool.
1024
+ - auto_mode_trigger_on_table_in_page: typing.Optional[bool].
889
1025
 
890
- - auto_mode_trigger_on_image_in_page: bool.
1026
+ - auto_mode_trigger_on_image_in_page: typing.Optional[bool].
891
1027
 
892
1028
  - file: typing.Optional[str].
893
1029
 
894
- - input_url: str.
895
-
896
- - http_proxy: str.
897
- """
898
- _request: typing.Dict[str, typing.Any] = {
899
- "language": language,
900
- "parsing_instruction": parsing_instruction,
901
- "skip_diagonal_text": skip_diagonal_text,
902
- "invalidate_cache": invalidate_cache,
903
- "do_not_cache": do_not_cache,
904
- "gpt4o_mode": gpt_4_o_mode,
905
- "fast_mode": fast_mode,
906
- "premium_mode": premium_mode,
907
- "continuous_mode": continuous_mode,
908
- "gpt4o_api_key": gpt_4_o_api_key,
909
- "do_not_unroll_columns": do_not_unroll_columns,
910
- "guess_xlsx_sheet_name": guess_xlsx_sheet_name,
911
- "page_separator": page_separator,
912
- "bounding_box": bounding_box,
913
- "target_pages": target_pages,
914
- "use_vendor_multimodal_model": use_vendor_multimodal_model,
915
- "vendor_multimodal_model_name": vendor_multimodal_model_name,
916
- "vendor_multimodal_api_key": vendor_multimodal_api_key,
917
- "page_prefix": page_prefix,
918
- "page_suffix": page_suffix,
919
- "webhook_url": webhook_url,
920
- "take_screenshot": take_screenshot,
921
- "is_formatting_instruction": is_formatting_instruction,
922
- "disable_ocr": disable_ocr,
923
- "annotate_links": annotate_links,
924
- "disable_reconstruction": disable_reconstruction,
925
- "disable_image_extraction": disable_image_extraction,
926
- "input_s3_path": input_s_3_path,
927
- "output_s3_path_prefix": output_s_3_path_prefix,
928
- "azure_openai_deployment_name": azure_openai_deployment_name,
929
- "azure_openai_endpoint": azure_openai_endpoint,
930
- "azure_openai_api_version": azure_openai_api_version,
931
- "azure_openai_key": azure_openai_key,
932
- "auto_mode": auto_mode,
933
- "auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
934
- "auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
935
- "auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
936
- "auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
937
- "input_url": input_url,
938
- "http_proxy": http_proxy,
939
- }
940
- if file is not OMIT:
941
- _request["file"] = file
1030
+ - input_url: typing.Optional[str].
1031
+
1032
+ - http_proxy: typing.Optional[str].
1033
+
1034
+ - structured_output: typing.Optional[bool].
1035
+
1036
+ - structured_output_json_schema: typing.Optional[str].
1037
+
1038
+ - structured_output_json_schema_name: typing.Optional[str].
1039
+
1040
+ - max_pages: typing.Optional[int].
1041
+ ---
1042
+ from llama_cloud.client import AsyncLlamaCloud
1043
+
1044
+ client = AsyncLlamaCloud(
1045
+ token="YOUR_TOKEN",
1046
+ )
1047
+ await client.parsing.upload_file()
1048
+ """
942
1049
  _response = await self._client_wrapper.httpx_client.request(
943
1050
  "POST",
944
1051
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
945
1052
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
946
- json=jsonable_encoder(_request),
1053
+ data=jsonable_encoder(
1054
+ {
1055
+ "language": language,
1056
+ "parsing_instruction": parsing_instruction,
1057
+ "skip_diagonal_text": skip_diagonal_text,
1058
+ "invalidate_cache": invalidate_cache,
1059
+ "output_pdf_of_document": output_pdf_of_document,
1060
+ "do_not_cache": do_not_cache,
1061
+ "gpt4o_mode": gpt_4_o_mode,
1062
+ "fast_mode": fast_mode,
1063
+ "premium_mode": premium_mode,
1064
+ "continuous_mode": continuous_mode,
1065
+ "gpt4o_api_key": gpt_4_o_api_key,
1066
+ "do_not_unroll_columns": do_not_unroll_columns,
1067
+ "html_make_all_elements_visible": html_make_all_elements_visible,
1068
+ "html_remove_fixed_elements": html_remove_fixed_elements,
1069
+ "guess_xlsx_sheet_name": guess_xlsx_sheet_name,
1070
+ "page_separator": page_separator,
1071
+ "bounding_box": bounding_box,
1072
+ "bbox_top": bbox_top,
1073
+ "bbox_right": bbox_right,
1074
+ "bbox_bottom": bbox_bottom,
1075
+ "bbox_left": bbox_left,
1076
+ "target_pages": target_pages,
1077
+ "use_vendor_multimodal_model": use_vendor_multimodal_model,
1078
+ "vendor_multimodal_model_name": vendor_multimodal_model_name,
1079
+ "vendor_multimodal_api_key": vendor_multimodal_api_key,
1080
+ "page_prefix": page_prefix,
1081
+ "page_suffix": page_suffix,
1082
+ "webhook_url": webhook_url,
1083
+ "take_screenshot": take_screenshot,
1084
+ "is_formatting_instruction": is_formatting_instruction,
1085
+ "disable_ocr": disable_ocr,
1086
+ "annotate_links": annotate_links,
1087
+ "disable_reconstruction": disable_reconstruction,
1088
+ "disable_image_extraction": disable_image_extraction,
1089
+ "input_s3_path": input_s_3_path,
1090
+ "output_s3_path_prefix": output_s_3_path_prefix,
1091
+ "azure_openai_deployment_name": azure_openai_deployment_name,
1092
+ "azure_openai_endpoint": azure_openai_endpoint,
1093
+ "azure_openai_api_version": azure_openai_api_version,
1094
+ "azure_openai_key": azure_openai_key,
1095
+ "auto_mode": auto_mode,
1096
+ "auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
1097
+ "auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
1098
+ "auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
1099
+ "auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
1100
+ "file": file,
1101
+ "input_url": input_url,
1102
+ "http_proxy": http_proxy,
1103
+ "structured_output": structured_output,
1104
+ "structured_output_json_schema": structured_output_json_schema,
1105
+ "structured_output_json_schema_name": structured_output_json_schema_name,
1106
+ "max_pages": max_pages,
1107
+ }
1108
+ ),
1109
+ files={},
947
1110
  headers=self._client_wrapper.get_headers(),
948
1111
  timeout=60,
949
1112
  )
@@ -999,7 +1162,7 @@ class AsyncParsingClient:
999
1162
  token="YOUR_TOKEN",
1000
1163
  )
1001
1164
  await client.parsing.get_job(
1002
- job_id="string",
1165
+ job_id="job_id",
1003
1166
  )
1004
1167
  """
1005
1168
  _response = await self._client_wrapper.httpx_client.request(
@@ -1031,7 +1194,7 @@ class AsyncParsingClient:
1031
1194
  token="YOUR_TOKEN",
1032
1195
  )
1033
1196
  await client.parsing.get_parsing_job_details(
1034
- job_id="string",
1197
+ job_id="job_id",
1035
1198
  )
1036
1199
  """
1037
1200
  _response = await self._client_wrapper.httpx_client.request(
@@ -1063,7 +1226,7 @@ class AsyncParsingClient:
1063
1226
  token="YOUR_TOKEN",
1064
1227
  )
1065
1228
  await client.parsing.get_job_text_result(
1066
- job_id="string",
1229
+ job_id="job_id",
1067
1230
  )
1068
1231
  """
1069
1232
  _response = await self._client_wrapper.httpx_client.request(
@@ -1095,13 +1258,81 @@ class AsyncParsingClient:
1095
1258
  token="YOUR_TOKEN",
1096
1259
  )
1097
1260
  await client.parsing.get_job_raw_text_result(
1098
- job_id="string",
1261
+ job_id="job_id",
1262
+ )
1263
+ """
1264
+ _response = await self._client_wrapper.httpx_client.request(
1265
+ "GET",
1266
+ urllib.parse.urljoin(
1267
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/pdf"
1268
+ ),
1269
+ headers=self._client_wrapper.get_headers(),
1270
+ timeout=60,
1271
+ )
1272
+ if 200 <= _response.status_code < 300:
1273
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
1274
+ if _response.status_code == 422:
1275
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1276
+ try:
1277
+ _response_json = _response.json()
1278
+ except JSONDecodeError:
1279
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1280
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1281
+
1282
+ async def get_job_structured_result(self, job_id: str) -> ParsingJobStructuredResult:
1283
+ """
1284
+ Get a job by id
1285
+
1286
+ Parameters:
1287
+ - job_id: str.
1288
+ ---
1289
+ from llama_cloud.client import AsyncLlamaCloud
1290
+
1291
+ client = AsyncLlamaCloud(
1292
+ token="YOUR_TOKEN",
1293
+ )
1294
+ await client.parsing.get_job_structured_result(
1295
+ job_id="job_id",
1296
+ )
1297
+ """
1298
+ _response = await self._client_wrapper.httpx_client.request(
1299
+ "GET",
1300
+ urllib.parse.urljoin(
1301
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/structured"
1302
+ ),
1303
+ headers=self._client_wrapper.get_headers(),
1304
+ timeout=60,
1305
+ )
1306
+ if 200 <= _response.status_code < 300:
1307
+ return pydantic.parse_obj_as(ParsingJobStructuredResult, _response.json()) # type: ignore
1308
+ if _response.status_code == 422:
1309
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1310
+ try:
1311
+ _response_json = _response.json()
1312
+ except JSONDecodeError:
1313
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1314
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1315
+
1316
+ async def get_job_raw_structured_result(self, job_id: str) -> typing.Any:
1317
+ """
1318
+ Get a job by id
1319
+
1320
+ Parameters:
1321
+ - job_id: str.
1322
+ ---
1323
+ from llama_cloud.client import AsyncLlamaCloud
1324
+
1325
+ client = AsyncLlamaCloud(
1326
+ token="YOUR_TOKEN",
1327
+ )
1328
+ await client.parsing.get_job_raw_structured_result(
1329
+ job_id="job_id",
1099
1330
  )
1100
1331
  """
1101
1332
  _response = await self._client_wrapper.httpx_client.request(
1102
1333
  "GET",
1103
1334
  urllib.parse.urljoin(
1104
- f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/text"
1335
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/structured"
1105
1336
  ),
1106
1337
  headers=self._client_wrapper.get_headers(),
1107
1338
  timeout=60,
@@ -1129,7 +1360,7 @@ class AsyncParsingClient:
1129
1360
  token="YOUR_TOKEN",
1130
1361
  )
1131
1362
  await client.parsing.get_job_raw_xlsx_result(
1132
- job_id="string",
1363
+ job_id="job_id",
1133
1364
  )
1134
1365
  """
1135
1366
  _response = await self._client_wrapper.httpx_client.request(
@@ -1163,7 +1394,7 @@ class AsyncParsingClient:
1163
1394
  token="YOUR_TOKEN",
1164
1395
  )
1165
1396
  await client.parsing.get_job_result(
1166
- job_id="string",
1397
+ job_id="job_id",
1167
1398
  )
1168
1399
  """
1169
1400
  _response = await self._client_wrapper.httpx_client.request(
@@ -1197,7 +1428,7 @@ class AsyncParsingClient:
1197
1428
  token="YOUR_TOKEN",
1198
1429
  )
1199
1430
  await client.parsing.get_job_raw_md_result(
1200
- job_id="string",
1431
+ job_id="job_id",
1201
1432
  )
1202
1433
  """
1203
1434
  _response = await self._client_wrapper.httpx_client.request(
@@ -1231,7 +1462,7 @@ class AsyncParsingClient:
1231
1462
  token="YOUR_TOKEN",
1232
1463
  )
1233
1464
  await client.parsing.get_job_json_result(
1234
- job_id="string",
1465
+ job_id="job_id",
1235
1466
  )
1236
1467
  """
1237
1468
  _response = await self._client_wrapper.httpx_client.request(
@@ -1263,7 +1494,7 @@ class AsyncParsingClient:
1263
1494
  token="YOUR_TOKEN",
1264
1495
  )
1265
1496
  await client.parsing.get_job_json_raw_result(
1266
- job_id="string",
1497
+ job_id="job_id",
1267
1498
  )
1268
1499
  """
1269
1500
  _response = await self._client_wrapper.httpx_client.request(
@@ -1327,8 +1558,8 @@ class AsyncParsingClient:
1327
1558
  token="YOUR_TOKEN",
1328
1559
  )
1329
1560
  await client.parsing.generate_presigned_url(
1330
- job_id="string",
1331
- filename="string",
1561
+ job_id="job_id",
1562
+ filename="filename",
1332
1563
  )
1333
1564
  """
1334
1565
  _response = await self._client_wrapper.httpx_client.request(