llama-cloud 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (117) hide show
  1. llama_cloud/__init__.py +76 -10
  2. llama_cloud/client.py +3 -0
  3. llama_cloud/environment.py +1 -1
  4. llama_cloud/resources/__init__.py +23 -1
  5. llama_cloud/resources/data_sinks/client.py +26 -20
  6. llama_cloud/resources/data_sources/client.py +16 -16
  7. llama_cloud/resources/embedding_model_configs/__init__.py +23 -0
  8. llama_cloud/resources/embedding_model_configs/client.py +416 -0
  9. llama_cloud/resources/embedding_model_configs/types/__init__.py +23 -0
  10. llama_cloud/resources/embedding_model_configs/types/embedding_model_config_create_embedding_config.py +89 -0
  11. llama_cloud/resources/evals/client.py +36 -26
  12. llama_cloud/resources/extraction/client.py +32 -32
  13. llama_cloud/resources/files/__init__.py +2 -2
  14. llama_cloud/resources/files/client.py +310 -54
  15. llama_cloud/resources/files/types/__init__.py +3 -1
  16. llama_cloud/resources/files/types/file_create_from_url_resource_info_value.py +7 -0
  17. llama_cloud/resources/files/types/file_create_permission_info_value.py +7 -0
  18. llama_cloud/resources/organizations/client.py +125 -56
  19. llama_cloud/resources/parsing/client.py +652 -264
  20. llama_cloud/resources/pipelines/client.py +617 -310
  21. llama_cloud/resources/projects/client.py +341 -136
  22. llama_cloud/types/__init__.py +58 -10
  23. llama_cloud/types/azure_open_ai_embedding.py +12 -6
  24. llama_cloud/types/base_prompt_template.py +6 -2
  25. llama_cloud/types/bedrock_embedding.py +12 -6
  26. llama_cloud/types/character_splitter.py +4 -2
  27. llama_cloud/types/chat_message.py +1 -1
  28. llama_cloud/types/cloud_az_storage_blob_data_source.py +16 -7
  29. llama_cloud/types/cloud_box_data_source.py +13 -6
  30. llama_cloud/types/cloud_confluence_data_source.py +7 -6
  31. llama_cloud/types/cloud_document.py +3 -1
  32. llama_cloud/types/cloud_document_create.py +3 -1
  33. llama_cloud/types/cloud_google_drive_data_source.py +1 -0
  34. llama_cloud/types/cloud_jira_data_source.py +7 -4
  35. llama_cloud/types/cloud_notion_page_data_source.py +3 -2
  36. llama_cloud/types/cloud_one_drive_data_source.py +6 -2
  37. llama_cloud/types/cloud_postgres_vector_store.py +1 -1
  38. llama_cloud/types/cloud_s_3_data_source.py +9 -4
  39. llama_cloud/types/cloud_sharepoint_data_source.py +9 -5
  40. llama_cloud/types/cloud_slack_data_source.py +7 -6
  41. llama_cloud/types/code_splitter.py +1 -1
  42. llama_cloud/types/cohere_embedding.py +7 -3
  43. llama_cloud/types/data_sink.py +4 -4
  44. llama_cloud/types/data_sink_create.py +1 -1
  45. llama_cloud/types/data_source.py +7 -5
  46. llama_cloud/types/data_source_create.py +4 -2
  47. llama_cloud/types/embedding_model_config.py +43 -0
  48. llama_cloud/types/embedding_model_config_embedding_config.py +89 -0
  49. llama_cloud/types/embedding_model_config_update.py +35 -0
  50. llama_cloud/types/embedding_model_config_update_embedding_config.py +89 -0
  51. llama_cloud/types/eval_dataset.py +2 -2
  52. llama_cloud/types/eval_dataset_job_record.py +13 -7
  53. llama_cloud/types/eval_execution_params_override.py +6 -2
  54. llama_cloud/types/eval_question.py +2 -2
  55. llama_cloud/types/extraction_result.py +2 -2
  56. llama_cloud/types/extraction_schema.py +5 -3
  57. llama_cloud/types/file.py +15 -7
  58. llama_cloud/types/file_permission_info_value.py +5 -0
  59. llama_cloud/types/filter_operator.py +2 -2
  60. llama_cloud/types/gemini_embedding.py +10 -6
  61. llama_cloud/types/hugging_face_inference_api_embedding.py +27 -11
  62. llama_cloud/types/input_message.py +3 -1
  63. llama_cloud/types/interval_usage_and_plan.py +36 -0
  64. llama_cloud/types/job_name_mapping.py +4 -0
  65. llama_cloud/types/llama_parse_parameters.py +21 -0
  66. llama_cloud/types/llm.py +4 -2
  67. llama_cloud/types/llm_parameters.py +5 -2
  68. llama_cloud/types/local_eval.py +10 -8
  69. llama_cloud/types/local_eval_results.py +1 -1
  70. llama_cloud/types/managed_ingestion_status_response.py +5 -3
  71. llama_cloud/types/markdown_element_node_parser.py +5 -3
  72. llama_cloud/types/markdown_node_parser.py +3 -2
  73. llama_cloud/types/metadata_filter.py +2 -2
  74. llama_cloud/types/metric_result.py +3 -3
  75. llama_cloud/types/node_parser.py +1 -1
  76. llama_cloud/types/open_ai_embedding.py +12 -6
  77. llama_cloud/types/organization.py +2 -2
  78. llama_cloud/types/page_splitter_node_parser.py +2 -2
  79. llama_cloud/types/paginated_list_pipeline_files_response.py +35 -0
  80. llama_cloud/types/parsing_job_structured_result.py +32 -0
  81. llama_cloud/types/permission.py +3 -3
  82. llama_cloud/types/pipeline.py +17 -6
  83. llama_cloud/types/pipeline_configuration_hashes.py +3 -3
  84. llama_cloud/types/pipeline_create.py +15 -4
  85. llama_cloud/types/pipeline_data_source.py +13 -7
  86. llama_cloud/types/pipeline_data_source_create.py +3 -1
  87. llama_cloud/types/pipeline_deployment.py +4 -4
  88. llama_cloud/types/pipeline_file.py +25 -10
  89. llama_cloud/types/pipeline_file_create.py +3 -1
  90. llama_cloud/types/pipeline_file_permission_info_value.py +7 -0
  91. llama_cloud/types/plan.py +40 -0
  92. llama_cloud/types/playground_session.py +2 -2
  93. llama_cloud/types/preset_retrieval_params.py +14 -7
  94. llama_cloud/types/presigned_url.py +3 -1
  95. llama_cloud/types/project.py +2 -2
  96. llama_cloud/types/prompt_mixin_prompts.py +1 -1
  97. llama_cloud/types/prompt_spec.py +4 -2
  98. llama_cloud/types/role.py +3 -3
  99. llama_cloud/types/sentence_splitter.py +4 -2
  100. llama_cloud/types/text_node.py +3 -3
  101. llama_cloud/types/{hugging_face_inference_api_embedding_token.py → token.py} +1 -1
  102. llama_cloud/types/token_text_splitter.py +1 -1
  103. llama_cloud/types/usage.py +41 -0
  104. llama_cloud/types/user_organization.py +9 -5
  105. llama_cloud/types/user_organization_create.py +4 -4
  106. llama_cloud/types/user_organization_delete.py +2 -2
  107. llama_cloud/types/user_organization_role.py +2 -2
  108. llama_cloud/types/value.py +5 -0
  109. llama_cloud/types/vertex_text_embedding.py +9 -5
  110. {llama_cloud-0.1.4.dist-info → llama_cloud-0.1.6.dist-info}/METADATA +1 -1
  111. {llama_cloud-0.1.4.dist-info → llama_cloud-0.1.6.dist-info}/RECORD +113 -99
  112. llama_cloud/types/data_sink_component.py +0 -20
  113. llama_cloud/types/data_source_component.py +0 -28
  114. llama_cloud/types/metadata_filter_value.py +0 -5
  115. llama_cloud/types/pipeline_data_source_component.py +0 -28
  116. {llama_cloud-0.1.4.dist-info → llama_cloud-0.1.6.dist-info}/LICENSE +0 -0
  117. {llama_cloud-0.1.4.dist-info → llama_cloud-0.1.6.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@ from json.decoder import JSONDecodeError
7
7
  from ...core.api_error import ApiError
8
8
  from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
9
9
  from ...core.jsonable_encoder import jsonable_encoder
10
+ from ...core.remove_none_from_dict import remove_none_from_dict
10
11
  from ...errors.unprocessable_entity_error import UnprocessableEntityError
11
12
  from ...types.http_validation_error import HttpValidationError
12
13
  from ...types.llama_parse_supported_file_extensions import LlamaParseSupportedFileExtensions
@@ -15,6 +16,7 @@ from ...types.parsing_history_item import ParsingHistoryItem
15
16
  from ...types.parsing_job import ParsingJob
16
17
  from ...types.parsing_job_json_result import ParsingJobJsonResult
17
18
  from ...types.parsing_job_markdown_result import ParsingJobMarkdownResult
19
+ from ...types.parsing_job_structured_result import ParsingJobStructuredResult
18
20
  from ...types.parsing_job_text_result import ParsingJobTextResult
19
21
  from ...types.parsing_usage import ParsingUsage
20
22
  from ...types.presigned_url import PresignedUrl
@@ -35,7 +37,7 @@ class ParsingClient:
35
37
  def __init__(self, *, client_wrapper: SyncClientWrapper):
36
38
  self._client_wrapper = client_wrapper
37
39
 
38
- def get_job_image_result(self, job_id: str, name: str) -> None:
40
+ def get_job_image_result(self, job_id: str, name: str) -> typing.Iterator[bytes]:
39
41
  """
40
42
  Get a job by id
41
43
 
@@ -43,34 +45,29 @@ class ParsingClient:
43
45
  - job_id: str.
44
46
 
45
47
  - name: str.
46
- ---
47
- from llama_cloud.client import LlamaCloud
48
-
49
- client = LlamaCloud(
50
- token="YOUR_TOKEN",
51
- )
52
- client.parsing.get_job_image_result(
53
- job_id="string",
54
- name="string",
55
- )
56
48
  """
57
- _response = self._client_wrapper.httpx_client.request(
49
+ with self._client_wrapper.httpx_client.stream(
58
50
  "GET",
59
51
  urllib.parse.urljoin(
60
52
  f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/image/{name}"
61
53
  ),
62
54
  headers=self._client_wrapper.get_headers(),
63
55
  timeout=60,
64
- )
65
- if 200 <= _response.status_code < 300:
66
- return
67
- if _response.status_code == 422:
68
- raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
69
- try:
70
- _response_json = _response.json()
71
- except JSONDecodeError:
72
- raise ApiError(status_code=_response.status_code, body=_response.text)
73
- raise ApiError(status_code=_response.status_code, body=_response_json)
56
+ ) as _response:
57
+ if 200 <= _response.status_code < 300:
58
+ for _chunk in _response.iter_bytes():
59
+ yield _chunk
60
+ return
61
+ _response.read()
62
+ if _response.status_code == 422:
63
+ raise UnprocessableEntityError(
64
+ pydantic.parse_obj_as(HttpValidationError, _response.json()) # type: ignore
65
+ )
66
+ try:
67
+ _response_json = _response.json()
68
+ except JSONDecodeError:
69
+ raise ApiError(status_code=_response.status_code, body=_response.text)
70
+ raise ApiError(status_code=_response.status_code, body=_response_json)
74
71
 
75
72
  def get_supported_file_extensions(self) -> typing.List[LlamaParseSupportedFileExtensions]:
76
73
  """
@@ -101,146 +98,241 @@ class ParsingClient:
101
98
  def upload_file(
102
99
  self,
103
100
  *,
104
- language: typing.List[ParserLanguages],
105
- parsing_instruction: str,
106
- skip_diagonal_text: bool,
107
- invalidate_cache: bool,
108
- do_not_cache: bool,
109
- gpt_4_o_mode: bool,
110
- fast_mode: bool,
111
- premium_mode: bool,
112
- continuous_mode: bool,
113
- gpt_4_o_api_key: str,
114
- do_not_unroll_columns: bool,
115
- page_separator: str,
116
- bounding_box: str,
117
- target_pages: str,
118
- use_vendor_multimodal_model: bool,
119
- vendor_multimodal_model_name: str,
120
- vendor_multimodal_api_key: str,
121
- page_prefix: str,
122
- page_suffix: str,
123
- webhook_url: str,
124
- take_screenshot: bool,
125
- is_formatting_instruction: bool,
126
- disable_ocr: bool,
127
- annotate_links: bool,
128
- disable_reconstruction: bool,
129
- input_s_3_path: str,
130
- output_s_3_path_prefix: str,
131
- azure_openai_deployment_name: str,
132
- azure_openai_endpoint: str,
133
- azure_openai_api_version: str,
134
- azure_openai_key: str,
135
- file: typing.Optional[str] = OMIT,
101
+ project_id: typing.Optional[str] = None,
102
+ organization_id: typing.Optional[str] = None,
103
+ language: typing.Optional[typing.List[ParserLanguages]] = None,
104
+ parsing_instruction: typing.Optional[str] = None,
105
+ skip_diagonal_text: typing.Optional[bool] = None,
106
+ invalidate_cache: typing.Optional[bool] = None,
107
+ output_pdf_of_document: typing.Optional[bool] = None,
108
+ do_not_cache: typing.Optional[bool] = None,
109
+ gpt_4_o_mode: typing.Optional[bool] = None,
110
+ fast_mode: typing.Optional[bool] = None,
111
+ premium_mode: typing.Optional[bool] = None,
112
+ continuous_mode: typing.Optional[bool] = None,
113
+ gpt_4_o_api_key: typing.Optional[str] = None,
114
+ do_not_unroll_columns: typing.Optional[bool] = None,
115
+ html_make_all_elements_visible: typing.Optional[bool] = None,
116
+ html_remove_fixed_elements: typing.Optional[bool] = None,
117
+ guess_xlsx_sheet_name: typing.Optional[bool] = None,
118
+ page_separator: typing.Optional[str] = None,
119
+ bounding_box: typing.Optional[str] = None,
120
+ bbox_top: typing.Optional[float] = None,
121
+ bbox_right: typing.Optional[float] = None,
122
+ bbox_bottom: typing.Optional[float] = None,
123
+ bbox_left: typing.Optional[float] = None,
124
+ target_pages: typing.Optional[str] = None,
125
+ use_vendor_multimodal_model: typing.Optional[bool] = None,
126
+ vendor_multimodal_model_name: typing.Optional[str] = None,
127
+ vendor_multimodal_api_key: typing.Optional[str] = None,
128
+ page_prefix: typing.Optional[str] = None,
129
+ page_suffix: typing.Optional[str] = None,
130
+ webhook_url: typing.Optional[str] = None,
131
+ take_screenshot: typing.Optional[bool] = None,
132
+ is_formatting_instruction: typing.Optional[bool] = None,
133
+ disable_ocr: typing.Optional[bool] = None,
134
+ annotate_links: typing.Optional[bool] = None,
135
+ disable_reconstruction: typing.Optional[bool] = None,
136
+ disable_image_extraction: typing.Optional[bool] = None,
137
+ input_s_3_path: typing.Optional[str] = None,
138
+ output_s_3_path_prefix: typing.Optional[str] = None,
139
+ azure_openai_deployment_name: typing.Optional[str] = None,
140
+ azure_openai_endpoint: typing.Optional[str] = None,
141
+ azure_openai_api_version: typing.Optional[str] = None,
142
+ azure_openai_key: typing.Optional[str] = None,
143
+ auto_mode: typing.Optional[bool] = None,
144
+ auto_mode_trigger_on_regexp_in_page: typing.Optional[str] = None,
145
+ auto_mode_trigger_on_text_in_page: typing.Optional[str] = None,
146
+ auto_mode_trigger_on_table_in_page: typing.Optional[bool] = None,
147
+ auto_mode_trigger_on_image_in_page: typing.Optional[bool] = None,
148
+ file: typing.Optional[str] = None,
149
+ input_url: typing.Optional[str] = None,
150
+ http_proxy: typing.Optional[str] = None,
151
+ structured_output: typing.Optional[bool] = None,
152
+ structured_output_json_schema: typing.Optional[str] = None,
153
+ structured_output_json_schema_name: typing.Optional[str] = None,
154
+ max_pages: typing.Optional[int] = None,
136
155
  ) -> ParsingJob:
137
156
  """
138
157
  Upload a file to s3 and create a job. return a job id
139
158
 
140
159
  Parameters:
141
- - language: typing.List[ParserLanguages].
160
+ - project_id: typing.Optional[str].
142
161
 
143
- - parsing_instruction: str.
162
+ - organization_id: typing.Optional[str].
144
163
 
145
- - skip_diagonal_text: bool.
164
+ - language: typing.Optional[typing.List[ParserLanguages]].
146
165
 
147
- - invalidate_cache: bool.
166
+ - parsing_instruction: typing.Optional[str].
148
167
 
149
- - do_not_cache: bool.
168
+ - skip_diagonal_text: typing.Optional[bool].
150
169
 
151
- - gpt_4_o_mode: bool.
170
+ - invalidate_cache: typing.Optional[bool].
152
171
 
153
- - fast_mode: bool.
172
+ - output_pdf_of_document: typing.Optional[bool].
154
173
 
155
- - premium_mode: bool.
174
+ - do_not_cache: typing.Optional[bool].
156
175
 
157
- - continuous_mode: bool.
176
+ - gpt_4_o_mode: typing.Optional[bool].
158
177
 
159
- - gpt_4_o_api_key: str.
178
+ - fast_mode: typing.Optional[bool].
160
179
 
161
- - do_not_unroll_columns: bool.
180
+ - premium_mode: typing.Optional[bool].
162
181
 
163
- - page_separator: str.
182
+ - continuous_mode: typing.Optional[bool].
164
183
 
165
- - bounding_box: str.
184
+ - gpt_4_o_api_key: typing.Optional[str].
166
185
 
167
- - target_pages: str.
186
+ - do_not_unroll_columns: typing.Optional[bool].
168
187
 
169
- - use_vendor_multimodal_model: bool.
188
+ - html_make_all_elements_visible: typing.Optional[bool].
170
189
 
171
- - vendor_multimodal_model_name: str.
190
+ - html_remove_fixed_elements: typing.Optional[bool].
172
191
 
173
- - vendor_multimodal_api_key: str.
192
+ - guess_xlsx_sheet_name: typing.Optional[bool].
174
193
 
175
- - page_prefix: str.
194
+ - page_separator: typing.Optional[str].
176
195
 
177
- - page_suffix: str.
196
+ - bounding_box: typing.Optional[str].
178
197
 
179
- - webhook_url: str.
198
+ - bbox_top: typing.Optional[float].
180
199
 
181
- - take_screenshot: bool.
200
+ - bbox_right: typing.Optional[float].
182
201
 
183
- - is_formatting_instruction: bool.
202
+ - bbox_bottom: typing.Optional[float].
184
203
 
185
- - disable_ocr: bool.
204
+ - bbox_left: typing.Optional[float].
186
205
 
187
- - annotate_links: bool.
206
+ - target_pages: typing.Optional[str].
188
207
 
189
- - disable_reconstruction: bool.
208
+ - use_vendor_multimodal_model: typing.Optional[bool].
190
209
 
191
- - input_s_3_path: str.
210
+ - vendor_multimodal_model_name: typing.Optional[str].
192
211
 
193
- - output_s_3_path_prefix: str.
212
+ - vendor_multimodal_api_key: typing.Optional[str].
194
213
 
195
- - azure_openai_deployment_name: str.
214
+ - page_prefix: typing.Optional[str].
196
215
 
197
- - azure_openai_endpoint: str.
216
+ - page_suffix: typing.Optional[str].
198
217
 
199
- - azure_openai_api_version: str.
218
+ - webhook_url: typing.Optional[str].
200
219
 
201
- - azure_openai_key: str.
220
+ - take_screenshot: typing.Optional[bool].
221
+
222
+ - is_formatting_instruction: typing.Optional[bool].
223
+
224
+ - disable_ocr: typing.Optional[bool].
225
+
226
+ - annotate_links: typing.Optional[bool].
227
+
228
+ - disable_reconstruction: typing.Optional[bool].
229
+
230
+ - disable_image_extraction: typing.Optional[bool].
231
+
232
+ - input_s_3_path: typing.Optional[str].
233
+
234
+ - output_s_3_path_prefix: typing.Optional[str].
235
+
236
+ - azure_openai_deployment_name: typing.Optional[str].
237
+
238
+ - azure_openai_endpoint: typing.Optional[str].
239
+
240
+ - azure_openai_api_version: typing.Optional[str].
241
+
242
+ - azure_openai_key: typing.Optional[str].
243
+
244
+ - auto_mode: typing.Optional[bool].
245
+
246
+ - auto_mode_trigger_on_regexp_in_page: typing.Optional[str].
247
+
248
+ - auto_mode_trigger_on_text_in_page: typing.Optional[str].
249
+
250
+ - auto_mode_trigger_on_table_in_page: typing.Optional[bool].
251
+
252
+ - auto_mode_trigger_on_image_in_page: typing.Optional[bool].
202
253
 
203
254
  - file: typing.Optional[str].
255
+
256
+ - input_url: typing.Optional[str].
257
+
258
+ - http_proxy: typing.Optional[str].
259
+
260
+ - structured_output: typing.Optional[bool].
261
+
262
+ - structured_output_json_schema: typing.Optional[str].
263
+
264
+ - structured_output_json_schema_name: typing.Optional[str].
265
+
266
+ - max_pages: typing.Optional[int].
267
+ ---
268
+ from llama_cloud.client import LlamaCloud
269
+
270
+ client = LlamaCloud(
271
+ token="YOUR_TOKEN",
272
+ )
273
+ client.parsing.upload_file()
204
274
  """
205
- _request: typing.Dict[str, typing.Any] = {
206
- "language": language,
207
- "parsing_instruction": parsing_instruction,
208
- "skip_diagonal_text": skip_diagonal_text,
209
- "invalidate_cache": invalidate_cache,
210
- "do_not_cache": do_not_cache,
211
- "gpt4o_mode": gpt_4_o_mode,
212
- "fast_mode": fast_mode,
213
- "premium_mode": premium_mode,
214
- "continuous_mode": continuous_mode,
215
- "gpt4o_api_key": gpt_4_o_api_key,
216
- "do_not_unroll_columns": do_not_unroll_columns,
217
- "page_separator": page_separator,
218
- "bounding_box": bounding_box,
219
- "target_pages": target_pages,
220
- "use_vendor_multimodal_model": use_vendor_multimodal_model,
221
- "vendor_multimodal_model_name": vendor_multimodal_model_name,
222
- "vendor_multimodal_api_key": vendor_multimodal_api_key,
223
- "page_prefix": page_prefix,
224
- "page_suffix": page_suffix,
225
- "webhook_url": webhook_url,
226
- "take_screenshot": take_screenshot,
227
- "is_formatting_instruction": is_formatting_instruction,
228
- "disable_ocr": disable_ocr,
229
- "annotate_links": annotate_links,
230
- "disable_reconstruction": disable_reconstruction,
231
- "input_s3_path": input_s_3_path,
232
- "output_s3_path_prefix": output_s_3_path_prefix,
233
- "azure_openai_deployment_name": azure_openai_deployment_name,
234
- "azure_openai_endpoint": azure_openai_endpoint,
235
- "azure_openai_api_version": azure_openai_api_version,
236
- "azure_openai_key": azure_openai_key,
237
- }
238
- if file is not OMIT:
239
- _request["file"] = file
240
275
  _response = self._client_wrapper.httpx_client.request(
241
276
  "POST",
242
277
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
243
- json=jsonable_encoder(_request),
278
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
279
+ data=jsonable_encoder(
280
+ {
281
+ "language": language,
282
+ "parsing_instruction": parsing_instruction,
283
+ "skip_diagonal_text": skip_diagonal_text,
284
+ "invalidate_cache": invalidate_cache,
285
+ "output_pdf_of_document": output_pdf_of_document,
286
+ "do_not_cache": do_not_cache,
287
+ "gpt4o_mode": gpt_4_o_mode,
288
+ "fast_mode": fast_mode,
289
+ "premium_mode": premium_mode,
290
+ "continuous_mode": continuous_mode,
291
+ "gpt4o_api_key": gpt_4_o_api_key,
292
+ "do_not_unroll_columns": do_not_unroll_columns,
293
+ "html_make_all_elements_visible": html_make_all_elements_visible,
294
+ "html_remove_fixed_elements": html_remove_fixed_elements,
295
+ "guess_xlsx_sheet_name": guess_xlsx_sheet_name,
296
+ "page_separator": page_separator,
297
+ "bounding_box": bounding_box,
298
+ "bbox_top": bbox_top,
299
+ "bbox_right": bbox_right,
300
+ "bbox_bottom": bbox_bottom,
301
+ "bbox_left": bbox_left,
302
+ "target_pages": target_pages,
303
+ "use_vendor_multimodal_model": use_vendor_multimodal_model,
304
+ "vendor_multimodal_model_name": vendor_multimodal_model_name,
305
+ "vendor_multimodal_api_key": vendor_multimodal_api_key,
306
+ "page_prefix": page_prefix,
307
+ "page_suffix": page_suffix,
308
+ "webhook_url": webhook_url,
309
+ "take_screenshot": take_screenshot,
310
+ "is_formatting_instruction": is_formatting_instruction,
311
+ "disable_ocr": disable_ocr,
312
+ "annotate_links": annotate_links,
313
+ "disable_reconstruction": disable_reconstruction,
314
+ "disable_image_extraction": disable_image_extraction,
315
+ "input_s3_path": input_s_3_path,
316
+ "output_s3_path_prefix": output_s_3_path_prefix,
317
+ "azure_openai_deployment_name": azure_openai_deployment_name,
318
+ "azure_openai_endpoint": azure_openai_endpoint,
319
+ "azure_openai_api_version": azure_openai_api_version,
320
+ "azure_openai_key": azure_openai_key,
321
+ "auto_mode": auto_mode,
322
+ "auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
323
+ "auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
324
+ "auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
325
+ "auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
326
+ "file": file,
327
+ "input_url": input_url,
328
+ "http_proxy": http_proxy,
329
+ "structured_output": structured_output,
330
+ "structured_output_json_schema": structured_output_json_schema,
331
+ "structured_output_json_schema_name": structured_output_json_schema_name,
332
+ "max_pages": max_pages,
333
+ }
334
+ ),
335
+ files={},
244
336
  headers=self._client_wrapper.get_headers(),
245
337
  timeout=60,
246
338
  )
@@ -256,6 +348,7 @@ class ParsingClient:
256
348
 
257
349
  def usage(self) -> ParsingUsage:
258
350
  """
351
+ DEPRECATED: use either /organizations/{organization_id}/usage or /projects/{project_id}/usage instead
259
352
  Get parsing usage for user
260
353
 
261
354
  ---
@@ -295,7 +388,7 @@ class ParsingClient:
295
388
  token="YOUR_TOKEN",
296
389
  )
297
390
  client.parsing.get_job(
298
- job_id="string",
391
+ job_id="job_id",
299
392
  )
300
393
  """
301
394
  _response = self._client_wrapper.httpx_client.request(
@@ -327,7 +420,7 @@ class ParsingClient:
327
420
  token="YOUR_TOKEN",
328
421
  )
329
422
  client.parsing.get_parsing_job_details(
330
- job_id="string",
423
+ job_id="job_id",
331
424
  )
332
425
  """
333
426
  _response = self._client_wrapper.httpx_client.request(
@@ -359,7 +452,7 @@ class ParsingClient:
359
452
  token="YOUR_TOKEN",
360
453
  )
361
454
  client.parsing.get_job_text_result(
362
- job_id="string",
455
+ job_id="job_id",
363
456
  )
364
457
  """
365
458
  _response = self._client_wrapper.httpx_client.request(
@@ -391,13 +484,115 @@ class ParsingClient:
391
484
  token="YOUR_TOKEN",
392
485
  )
393
486
  client.parsing.get_job_raw_text_result(
394
- job_id="string",
487
+ job_id="job_id",
488
+ )
489
+ """
490
+ _response = self._client_wrapper.httpx_client.request(
491
+ "GET",
492
+ urllib.parse.urljoin(
493
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/pdf"
494
+ ),
495
+ headers=self._client_wrapper.get_headers(),
496
+ timeout=60,
497
+ )
498
+ if 200 <= _response.status_code < 300:
499
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
500
+ if _response.status_code == 422:
501
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
502
+ try:
503
+ _response_json = _response.json()
504
+ except JSONDecodeError:
505
+ raise ApiError(status_code=_response.status_code, body=_response.text)
506
+ raise ApiError(status_code=_response.status_code, body=_response_json)
507
+
508
+ def get_job_structured_result(self, job_id: str) -> ParsingJobStructuredResult:
509
+ """
510
+ Get a job by id
511
+
512
+ Parameters:
513
+ - job_id: str.
514
+ ---
515
+ from llama_cloud.client import LlamaCloud
516
+
517
+ client = LlamaCloud(
518
+ token="YOUR_TOKEN",
519
+ )
520
+ client.parsing.get_job_structured_result(
521
+ job_id="job_id",
522
+ )
523
+ """
524
+ _response = self._client_wrapper.httpx_client.request(
525
+ "GET",
526
+ urllib.parse.urljoin(
527
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/structured"
528
+ ),
529
+ headers=self._client_wrapper.get_headers(),
530
+ timeout=60,
531
+ )
532
+ if 200 <= _response.status_code < 300:
533
+ return pydantic.parse_obj_as(ParsingJobStructuredResult, _response.json()) # type: ignore
534
+ if _response.status_code == 422:
535
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
536
+ try:
537
+ _response_json = _response.json()
538
+ except JSONDecodeError:
539
+ raise ApiError(status_code=_response.status_code, body=_response.text)
540
+ raise ApiError(status_code=_response.status_code, body=_response_json)
541
+
542
+ def get_job_raw_structured_result(self, job_id: str) -> typing.Any:
543
+ """
544
+ Get a job by id
545
+
546
+ Parameters:
547
+ - job_id: str.
548
+ ---
549
+ from llama_cloud.client import LlamaCloud
550
+
551
+ client = LlamaCloud(
552
+ token="YOUR_TOKEN",
553
+ )
554
+ client.parsing.get_job_raw_structured_result(
555
+ job_id="job_id",
556
+ )
557
+ """
558
+ _response = self._client_wrapper.httpx_client.request(
559
+ "GET",
560
+ urllib.parse.urljoin(
561
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/structured"
562
+ ),
563
+ headers=self._client_wrapper.get_headers(),
564
+ timeout=60,
565
+ )
566
+ if 200 <= _response.status_code < 300:
567
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
568
+ if _response.status_code == 422:
569
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
570
+ try:
571
+ _response_json = _response.json()
572
+ except JSONDecodeError:
573
+ raise ApiError(status_code=_response.status_code, body=_response.text)
574
+ raise ApiError(status_code=_response.status_code, body=_response_json)
575
+
576
+ def get_job_raw_xlsx_result(self, job_id: str) -> typing.Any:
577
+ """
578
+ Get a job by id
579
+
580
+ Parameters:
581
+ - job_id: str.
582
+ ---
583
+ from llama_cloud.client import LlamaCloud
584
+
585
+ client = LlamaCloud(
586
+ token="YOUR_TOKEN",
587
+ )
588
+ client.parsing.get_job_raw_xlsx_result(
589
+ job_id="job_id",
395
590
  )
396
591
  """
397
592
  _response = self._client_wrapper.httpx_client.request(
398
593
  "GET",
399
594
  urllib.parse.urljoin(
400
- f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/text"
595
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/xlsx"
401
596
  ),
402
597
  headers=self._client_wrapper.get_headers(),
403
598
  timeout=60,
@@ -425,7 +620,7 @@ class ParsingClient:
425
620
  token="YOUR_TOKEN",
426
621
  )
427
622
  client.parsing.get_job_result(
428
- job_id="string",
623
+ job_id="job_id",
429
624
  )
430
625
  """
431
626
  _response = self._client_wrapper.httpx_client.request(
@@ -459,7 +654,7 @@ class ParsingClient:
459
654
  token="YOUR_TOKEN",
460
655
  )
461
656
  client.parsing.get_job_raw_md_result(
462
- job_id="string",
657
+ job_id="job_id",
463
658
  )
464
659
  """
465
660
  _response = self._client_wrapper.httpx_client.request(
@@ -493,7 +688,7 @@ class ParsingClient:
493
688
  token="YOUR_TOKEN",
494
689
  )
495
690
  client.parsing.get_job_json_result(
496
- job_id="string",
691
+ job_id="job_id",
497
692
  )
498
693
  """
499
694
  _response = self._client_wrapper.httpx_client.request(
@@ -525,7 +720,7 @@ class ParsingClient:
525
720
  token="YOUR_TOKEN",
526
721
  )
527
722
  client.parsing.get_job_json_raw_result(
528
- job_id="string",
723
+ job_id="job_id",
529
724
  )
530
725
  """
531
726
  _response = self._client_wrapper.httpx_client.request(
@@ -589,8 +784,8 @@ class ParsingClient:
589
784
  token="YOUR_TOKEN",
590
785
  )
591
786
  client.parsing.generate_presigned_url(
592
- job_id="string",
593
- filename="string",
787
+ job_id="job_id",
788
+ filename="filename",
594
789
  )
595
790
  """
596
791
  _response = self._client_wrapper.httpx_client.request(
@@ -616,7 +811,7 @@ class AsyncParsingClient:
616
811
  def __init__(self, *, client_wrapper: AsyncClientWrapper):
617
812
  self._client_wrapper = client_wrapper
618
813
 
619
- async def get_job_image_result(self, job_id: str, name: str) -> None:
814
+ async def get_job_image_result(self, job_id: str, name: str) -> typing.AsyncIterator[bytes]:
620
815
  """
621
816
  Get a job by id
622
817
 
@@ -624,34 +819,29 @@ class AsyncParsingClient:
624
819
  - job_id: str.
625
820
 
626
821
  - name: str.
627
- ---
628
- from llama_cloud.client import AsyncLlamaCloud
629
-
630
- client = AsyncLlamaCloud(
631
- token="YOUR_TOKEN",
632
- )
633
- await client.parsing.get_job_image_result(
634
- job_id="string",
635
- name="string",
636
- )
637
822
  """
638
- _response = await self._client_wrapper.httpx_client.request(
823
+ async with self._client_wrapper.httpx_client.stream(
639
824
  "GET",
640
825
  urllib.parse.urljoin(
641
826
  f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/image/{name}"
642
827
  ),
643
828
  headers=self._client_wrapper.get_headers(),
644
829
  timeout=60,
645
- )
646
- if 200 <= _response.status_code < 300:
647
- return
648
- if _response.status_code == 422:
649
- raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
650
- try:
651
- _response_json = _response.json()
652
- except JSONDecodeError:
653
- raise ApiError(status_code=_response.status_code, body=_response.text)
654
- raise ApiError(status_code=_response.status_code, body=_response_json)
830
+ ) as _response:
831
+ if 200 <= _response.status_code < 300:
832
+ async for _chunk in _response.aiter_bytes():
833
+ yield _chunk
834
+ return
835
+ await _response.aread()
836
+ if _response.status_code == 422:
837
+ raise UnprocessableEntityError(
838
+ pydantic.parse_obj_as(HttpValidationError, _response.json()) # type: ignore
839
+ )
840
+ try:
841
+ _response_json = _response.json()
842
+ except JSONDecodeError:
843
+ raise ApiError(status_code=_response.status_code, body=_response.text)
844
+ raise ApiError(status_code=_response.status_code, body=_response_json)
655
845
 
656
846
  async def get_supported_file_extensions(self) -> typing.List[LlamaParseSupportedFileExtensions]:
657
847
  """
@@ -682,146 +872,241 @@ class AsyncParsingClient:
682
872
  async def upload_file(
683
873
  self,
684
874
  *,
685
- language: typing.List[ParserLanguages],
686
- parsing_instruction: str,
687
- skip_diagonal_text: bool,
688
- invalidate_cache: bool,
689
- do_not_cache: bool,
690
- gpt_4_o_mode: bool,
691
- fast_mode: bool,
692
- premium_mode: bool,
693
- continuous_mode: bool,
694
- gpt_4_o_api_key: str,
695
- do_not_unroll_columns: bool,
696
- page_separator: str,
697
- bounding_box: str,
698
- target_pages: str,
699
- use_vendor_multimodal_model: bool,
700
- vendor_multimodal_model_name: str,
701
- vendor_multimodal_api_key: str,
702
- page_prefix: str,
703
- page_suffix: str,
704
- webhook_url: str,
705
- take_screenshot: bool,
706
- is_formatting_instruction: bool,
707
- disable_ocr: bool,
708
- annotate_links: bool,
709
- disable_reconstruction: bool,
710
- input_s_3_path: str,
711
- output_s_3_path_prefix: str,
712
- azure_openai_deployment_name: str,
713
- azure_openai_endpoint: str,
714
- azure_openai_api_version: str,
715
- azure_openai_key: str,
716
- file: typing.Optional[str] = OMIT,
875
+ project_id: typing.Optional[str] = None,
876
+ organization_id: typing.Optional[str] = None,
877
+ language: typing.Optional[typing.List[ParserLanguages]] = None,
878
+ parsing_instruction: typing.Optional[str] = None,
879
+ skip_diagonal_text: typing.Optional[bool] = None,
880
+ invalidate_cache: typing.Optional[bool] = None,
881
+ output_pdf_of_document: typing.Optional[bool] = None,
882
+ do_not_cache: typing.Optional[bool] = None,
883
+ gpt_4_o_mode: typing.Optional[bool] = None,
884
+ fast_mode: typing.Optional[bool] = None,
885
+ premium_mode: typing.Optional[bool] = None,
886
+ continuous_mode: typing.Optional[bool] = None,
887
+ gpt_4_o_api_key: typing.Optional[str] = None,
888
+ do_not_unroll_columns: typing.Optional[bool] = None,
889
+ html_make_all_elements_visible: typing.Optional[bool] = None,
890
+ html_remove_fixed_elements: typing.Optional[bool] = None,
891
+ guess_xlsx_sheet_name: typing.Optional[bool] = None,
892
+ page_separator: typing.Optional[str] = None,
893
+ bounding_box: typing.Optional[str] = None,
894
+ bbox_top: typing.Optional[float] = None,
895
+ bbox_right: typing.Optional[float] = None,
896
+ bbox_bottom: typing.Optional[float] = None,
897
+ bbox_left: typing.Optional[float] = None,
898
+ target_pages: typing.Optional[str] = None,
899
+ use_vendor_multimodal_model: typing.Optional[bool] = None,
900
+ vendor_multimodal_model_name: typing.Optional[str] = None,
901
+ vendor_multimodal_api_key: typing.Optional[str] = None,
902
+ page_prefix: typing.Optional[str] = None,
903
+ page_suffix: typing.Optional[str] = None,
904
+ webhook_url: typing.Optional[str] = None,
905
+ take_screenshot: typing.Optional[bool] = None,
906
+ is_formatting_instruction: typing.Optional[bool] = None,
907
+ disable_ocr: typing.Optional[bool] = None,
908
+ annotate_links: typing.Optional[bool] = None,
909
+ disable_reconstruction: typing.Optional[bool] = None,
910
+ disable_image_extraction: typing.Optional[bool] = None,
911
+ input_s_3_path: typing.Optional[str] = None,
912
+ output_s_3_path_prefix: typing.Optional[str] = None,
913
+ azure_openai_deployment_name: typing.Optional[str] = None,
914
+ azure_openai_endpoint: typing.Optional[str] = None,
915
+ azure_openai_api_version: typing.Optional[str] = None,
916
+ azure_openai_key: typing.Optional[str] = None,
917
+ auto_mode: typing.Optional[bool] = None,
918
+ auto_mode_trigger_on_regexp_in_page: typing.Optional[str] = None,
919
+ auto_mode_trigger_on_text_in_page: typing.Optional[str] = None,
920
+ auto_mode_trigger_on_table_in_page: typing.Optional[bool] = None,
921
+ auto_mode_trigger_on_image_in_page: typing.Optional[bool] = None,
922
+ file: typing.Optional[str] = None,
923
+ input_url: typing.Optional[str] = None,
924
+ http_proxy: typing.Optional[str] = None,
925
+ structured_output: typing.Optional[bool] = None,
926
+ structured_output_json_schema: typing.Optional[str] = None,
927
+ structured_output_json_schema_name: typing.Optional[str] = None,
928
+ max_pages: typing.Optional[int] = None,
717
929
  ) -> ParsingJob:
718
930
  """
719
931
  Upload a file to s3 and create a job. return a job id
720
932
 
721
933
  Parameters:
722
- - language: typing.List[ParserLanguages].
934
+ - project_id: typing.Optional[str].
723
935
 
724
- - parsing_instruction: str.
936
+ - organization_id: typing.Optional[str].
725
937
 
726
- - skip_diagonal_text: bool.
938
+ - language: typing.Optional[typing.List[ParserLanguages]].
727
939
 
728
- - invalidate_cache: bool.
940
+ - parsing_instruction: typing.Optional[str].
729
941
 
730
- - do_not_cache: bool.
942
+ - skip_diagonal_text: typing.Optional[bool].
731
943
 
732
- - gpt_4_o_mode: bool.
944
+ - invalidate_cache: typing.Optional[bool].
733
945
 
734
- - fast_mode: bool.
946
+ - output_pdf_of_document: typing.Optional[bool].
735
947
 
736
- - premium_mode: bool.
948
+ - do_not_cache: typing.Optional[bool].
737
949
 
738
- - continuous_mode: bool.
950
+ - gpt_4_o_mode: typing.Optional[bool].
739
951
 
740
- - gpt_4_o_api_key: str.
952
+ - fast_mode: typing.Optional[bool].
741
953
 
742
- - do_not_unroll_columns: bool.
954
+ - premium_mode: typing.Optional[bool].
743
955
 
744
- - page_separator: str.
956
+ - continuous_mode: typing.Optional[bool].
745
957
 
746
- - bounding_box: str.
958
+ - gpt_4_o_api_key: typing.Optional[str].
747
959
 
748
- - target_pages: str.
960
+ - do_not_unroll_columns: typing.Optional[bool].
749
961
 
750
- - use_vendor_multimodal_model: bool.
962
+ - html_make_all_elements_visible: typing.Optional[bool].
751
963
 
752
- - vendor_multimodal_model_name: str.
964
+ - html_remove_fixed_elements: typing.Optional[bool].
753
965
 
754
- - vendor_multimodal_api_key: str.
966
+ - guess_xlsx_sheet_name: typing.Optional[bool].
755
967
 
756
- - page_prefix: str.
968
+ - page_separator: typing.Optional[str].
757
969
 
758
- - page_suffix: str.
970
+ - bounding_box: typing.Optional[str].
759
971
 
760
- - webhook_url: str.
972
+ - bbox_top: typing.Optional[float].
761
973
 
762
- - take_screenshot: bool.
974
+ - bbox_right: typing.Optional[float].
763
975
 
764
- - is_formatting_instruction: bool.
976
+ - bbox_bottom: typing.Optional[float].
765
977
 
766
- - disable_ocr: bool.
978
+ - bbox_left: typing.Optional[float].
767
979
 
768
- - annotate_links: bool.
980
+ - target_pages: typing.Optional[str].
769
981
 
770
- - disable_reconstruction: bool.
982
+ - use_vendor_multimodal_model: typing.Optional[bool].
771
983
 
772
- - input_s_3_path: str.
984
+ - vendor_multimodal_model_name: typing.Optional[str].
773
985
 
774
- - output_s_3_path_prefix: str.
986
+ - vendor_multimodal_api_key: typing.Optional[str].
775
987
 
776
- - azure_openai_deployment_name: str.
988
+ - page_prefix: typing.Optional[str].
777
989
 
778
- - azure_openai_endpoint: str.
990
+ - page_suffix: typing.Optional[str].
779
991
 
780
- - azure_openai_api_version: str.
992
+ - webhook_url: typing.Optional[str].
781
993
 
782
- - azure_openai_key: str.
994
+ - take_screenshot: typing.Optional[bool].
995
+
996
+ - is_formatting_instruction: typing.Optional[bool].
997
+
998
+ - disable_ocr: typing.Optional[bool].
999
+
1000
+ - annotate_links: typing.Optional[bool].
1001
+
1002
+ - disable_reconstruction: typing.Optional[bool].
1003
+
1004
+ - disable_image_extraction: typing.Optional[bool].
1005
+
1006
+ - input_s_3_path: typing.Optional[str].
1007
+
1008
+ - output_s_3_path_prefix: typing.Optional[str].
1009
+
1010
+ - azure_openai_deployment_name: typing.Optional[str].
1011
+
1012
+ - azure_openai_endpoint: typing.Optional[str].
1013
+
1014
+ - azure_openai_api_version: typing.Optional[str].
1015
+
1016
+ - azure_openai_key: typing.Optional[str].
1017
+
1018
+ - auto_mode: typing.Optional[bool].
1019
+
1020
+ - auto_mode_trigger_on_regexp_in_page: typing.Optional[str].
1021
+
1022
+ - auto_mode_trigger_on_text_in_page: typing.Optional[str].
1023
+
1024
+ - auto_mode_trigger_on_table_in_page: typing.Optional[bool].
1025
+
1026
+ - auto_mode_trigger_on_image_in_page: typing.Optional[bool].
783
1027
 
784
1028
  - file: typing.Optional[str].
1029
+
1030
+ - input_url: typing.Optional[str].
1031
+
1032
+ - http_proxy: typing.Optional[str].
1033
+
1034
+ - structured_output: typing.Optional[bool].
1035
+
1036
+ - structured_output_json_schema: typing.Optional[str].
1037
+
1038
+ - structured_output_json_schema_name: typing.Optional[str].
1039
+
1040
+ - max_pages: typing.Optional[int].
1041
+ ---
1042
+ from llama_cloud.client import AsyncLlamaCloud
1043
+
1044
+ client = AsyncLlamaCloud(
1045
+ token="YOUR_TOKEN",
1046
+ )
1047
+ await client.parsing.upload_file()
785
1048
  """
786
- _request: typing.Dict[str, typing.Any] = {
787
- "language": language,
788
- "parsing_instruction": parsing_instruction,
789
- "skip_diagonal_text": skip_diagonal_text,
790
- "invalidate_cache": invalidate_cache,
791
- "do_not_cache": do_not_cache,
792
- "gpt4o_mode": gpt_4_o_mode,
793
- "fast_mode": fast_mode,
794
- "premium_mode": premium_mode,
795
- "continuous_mode": continuous_mode,
796
- "gpt4o_api_key": gpt_4_o_api_key,
797
- "do_not_unroll_columns": do_not_unroll_columns,
798
- "page_separator": page_separator,
799
- "bounding_box": bounding_box,
800
- "target_pages": target_pages,
801
- "use_vendor_multimodal_model": use_vendor_multimodal_model,
802
- "vendor_multimodal_model_name": vendor_multimodal_model_name,
803
- "vendor_multimodal_api_key": vendor_multimodal_api_key,
804
- "page_prefix": page_prefix,
805
- "page_suffix": page_suffix,
806
- "webhook_url": webhook_url,
807
- "take_screenshot": take_screenshot,
808
- "is_formatting_instruction": is_formatting_instruction,
809
- "disable_ocr": disable_ocr,
810
- "annotate_links": annotate_links,
811
- "disable_reconstruction": disable_reconstruction,
812
- "input_s3_path": input_s_3_path,
813
- "output_s3_path_prefix": output_s_3_path_prefix,
814
- "azure_openai_deployment_name": azure_openai_deployment_name,
815
- "azure_openai_endpoint": azure_openai_endpoint,
816
- "azure_openai_api_version": azure_openai_api_version,
817
- "azure_openai_key": azure_openai_key,
818
- }
819
- if file is not OMIT:
820
- _request["file"] = file
821
1049
  _response = await self._client_wrapper.httpx_client.request(
822
1050
  "POST",
823
1051
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
824
- json=jsonable_encoder(_request),
1052
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1053
+ data=jsonable_encoder(
1054
+ {
1055
+ "language": language,
1056
+ "parsing_instruction": parsing_instruction,
1057
+ "skip_diagonal_text": skip_diagonal_text,
1058
+ "invalidate_cache": invalidate_cache,
1059
+ "output_pdf_of_document": output_pdf_of_document,
1060
+ "do_not_cache": do_not_cache,
1061
+ "gpt4o_mode": gpt_4_o_mode,
1062
+ "fast_mode": fast_mode,
1063
+ "premium_mode": premium_mode,
1064
+ "continuous_mode": continuous_mode,
1065
+ "gpt4o_api_key": gpt_4_o_api_key,
1066
+ "do_not_unroll_columns": do_not_unroll_columns,
1067
+ "html_make_all_elements_visible": html_make_all_elements_visible,
1068
+ "html_remove_fixed_elements": html_remove_fixed_elements,
1069
+ "guess_xlsx_sheet_name": guess_xlsx_sheet_name,
1070
+ "page_separator": page_separator,
1071
+ "bounding_box": bounding_box,
1072
+ "bbox_top": bbox_top,
1073
+ "bbox_right": bbox_right,
1074
+ "bbox_bottom": bbox_bottom,
1075
+ "bbox_left": bbox_left,
1076
+ "target_pages": target_pages,
1077
+ "use_vendor_multimodal_model": use_vendor_multimodal_model,
1078
+ "vendor_multimodal_model_name": vendor_multimodal_model_name,
1079
+ "vendor_multimodal_api_key": vendor_multimodal_api_key,
1080
+ "page_prefix": page_prefix,
1081
+ "page_suffix": page_suffix,
1082
+ "webhook_url": webhook_url,
1083
+ "take_screenshot": take_screenshot,
1084
+ "is_formatting_instruction": is_formatting_instruction,
1085
+ "disable_ocr": disable_ocr,
1086
+ "annotate_links": annotate_links,
1087
+ "disable_reconstruction": disable_reconstruction,
1088
+ "disable_image_extraction": disable_image_extraction,
1089
+ "input_s3_path": input_s_3_path,
1090
+ "output_s3_path_prefix": output_s_3_path_prefix,
1091
+ "azure_openai_deployment_name": azure_openai_deployment_name,
1092
+ "azure_openai_endpoint": azure_openai_endpoint,
1093
+ "azure_openai_api_version": azure_openai_api_version,
1094
+ "azure_openai_key": azure_openai_key,
1095
+ "auto_mode": auto_mode,
1096
+ "auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
1097
+ "auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
1098
+ "auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
1099
+ "auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
1100
+ "file": file,
1101
+ "input_url": input_url,
1102
+ "http_proxy": http_proxy,
1103
+ "structured_output": structured_output,
1104
+ "structured_output_json_schema": structured_output_json_schema,
1105
+ "structured_output_json_schema_name": structured_output_json_schema_name,
1106
+ "max_pages": max_pages,
1107
+ }
1108
+ ),
1109
+ files={},
825
1110
  headers=self._client_wrapper.get_headers(),
826
1111
  timeout=60,
827
1112
  )
@@ -837,6 +1122,7 @@ class AsyncParsingClient:
837
1122
 
838
1123
  async def usage(self) -> ParsingUsage:
839
1124
  """
1125
+ DEPRECATED: use either /organizations/{organization_id}/usage or /projects/{project_id}/usage instead
840
1126
  Get parsing usage for user
841
1127
 
842
1128
  ---
@@ -876,7 +1162,7 @@ class AsyncParsingClient:
876
1162
  token="YOUR_TOKEN",
877
1163
  )
878
1164
  await client.parsing.get_job(
879
- job_id="string",
1165
+ job_id="job_id",
880
1166
  )
881
1167
  """
882
1168
  _response = await self._client_wrapper.httpx_client.request(
@@ -908,7 +1194,7 @@ class AsyncParsingClient:
908
1194
  token="YOUR_TOKEN",
909
1195
  )
910
1196
  await client.parsing.get_parsing_job_details(
911
- job_id="string",
1197
+ job_id="job_id",
912
1198
  )
913
1199
  """
914
1200
  _response = await self._client_wrapper.httpx_client.request(
@@ -940,7 +1226,7 @@ class AsyncParsingClient:
940
1226
  token="YOUR_TOKEN",
941
1227
  )
942
1228
  await client.parsing.get_job_text_result(
943
- job_id="string",
1229
+ job_id="job_id",
944
1230
  )
945
1231
  """
946
1232
  _response = await self._client_wrapper.httpx_client.request(
@@ -972,13 +1258,115 @@ class AsyncParsingClient:
972
1258
  token="YOUR_TOKEN",
973
1259
  )
974
1260
  await client.parsing.get_job_raw_text_result(
975
- job_id="string",
1261
+ job_id="job_id",
1262
+ )
1263
+ """
1264
+ _response = await self._client_wrapper.httpx_client.request(
1265
+ "GET",
1266
+ urllib.parse.urljoin(
1267
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/pdf"
1268
+ ),
1269
+ headers=self._client_wrapper.get_headers(),
1270
+ timeout=60,
1271
+ )
1272
+ if 200 <= _response.status_code < 300:
1273
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
1274
+ if _response.status_code == 422:
1275
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1276
+ try:
1277
+ _response_json = _response.json()
1278
+ except JSONDecodeError:
1279
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1280
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1281
+
1282
+ async def get_job_structured_result(self, job_id: str) -> ParsingJobStructuredResult:
1283
+ """
1284
+ Get a job by id
1285
+
1286
+ Parameters:
1287
+ - job_id: str.
1288
+ ---
1289
+ from llama_cloud.client import AsyncLlamaCloud
1290
+
1291
+ client = AsyncLlamaCloud(
1292
+ token="YOUR_TOKEN",
1293
+ )
1294
+ await client.parsing.get_job_structured_result(
1295
+ job_id="job_id",
1296
+ )
1297
+ """
1298
+ _response = await self._client_wrapper.httpx_client.request(
1299
+ "GET",
1300
+ urllib.parse.urljoin(
1301
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/structured"
1302
+ ),
1303
+ headers=self._client_wrapper.get_headers(),
1304
+ timeout=60,
1305
+ )
1306
+ if 200 <= _response.status_code < 300:
1307
+ return pydantic.parse_obj_as(ParsingJobStructuredResult, _response.json()) # type: ignore
1308
+ if _response.status_code == 422:
1309
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1310
+ try:
1311
+ _response_json = _response.json()
1312
+ except JSONDecodeError:
1313
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1314
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1315
+
1316
+ async def get_job_raw_structured_result(self, job_id: str) -> typing.Any:
1317
+ """
1318
+ Get a job by id
1319
+
1320
+ Parameters:
1321
+ - job_id: str.
1322
+ ---
1323
+ from llama_cloud.client import AsyncLlamaCloud
1324
+
1325
+ client = AsyncLlamaCloud(
1326
+ token="YOUR_TOKEN",
1327
+ )
1328
+ await client.parsing.get_job_raw_structured_result(
1329
+ job_id="job_id",
1330
+ )
1331
+ """
1332
+ _response = await self._client_wrapper.httpx_client.request(
1333
+ "GET",
1334
+ urllib.parse.urljoin(
1335
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/structured"
1336
+ ),
1337
+ headers=self._client_wrapper.get_headers(),
1338
+ timeout=60,
1339
+ )
1340
+ if 200 <= _response.status_code < 300:
1341
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
1342
+ if _response.status_code == 422:
1343
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1344
+ try:
1345
+ _response_json = _response.json()
1346
+ except JSONDecodeError:
1347
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1348
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1349
+
1350
+ async def get_job_raw_xlsx_result(self, job_id: str) -> typing.Any:
1351
+ """
1352
+ Get a job by id
1353
+
1354
+ Parameters:
1355
+ - job_id: str.
1356
+ ---
1357
+ from llama_cloud.client import AsyncLlamaCloud
1358
+
1359
+ client = AsyncLlamaCloud(
1360
+ token="YOUR_TOKEN",
1361
+ )
1362
+ await client.parsing.get_job_raw_xlsx_result(
1363
+ job_id="job_id",
976
1364
  )
977
1365
  """
978
1366
  _response = await self._client_wrapper.httpx_client.request(
979
1367
  "GET",
980
1368
  urllib.parse.urljoin(
981
- f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/text"
1369
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/xlsx"
982
1370
  ),
983
1371
  headers=self._client_wrapper.get_headers(),
984
1372
  timeout=60,
@@ -1006,7 +1394,7 @@ class AsyncParsingClient:
1006
1394
  token="YOUR_TOKEN",
1007
1395
  )
1008
1396
  await client.parsing.get_job_result(
1009
- job_id="string",
1397
+ job_id="job_id",
1010
1398
  )
1011
1399
  """
1012
1400
  _response = await self._client_wrapper.httpx_client.request(
@@ -1040,7 +1428,7 @@ class AsyncParsingClient:
1040
1428
  token="YOUR_TOKEN",
1041
1429
  )
1042
1430
  await client.parsing.get_job_raw_md_result(
1043
- job_id="string",
1431
+ job_id="job_id",
1044
1432
  )
1045
1433
  """
1046
1434
  _response = await self._client_wrapper.httpx_client.request(
@@ -1074,7 +1462,7 @@ class AsyncParsingClient:
1074
1462
  token="YOUR_TOKEN",
1075
1463
  )
1076
1464
  await client.parsing.get_job_json_result(
1077
- job_id="string",
1465
+ job_id="job_id",
1078
1466
  )
1079
1467
  """
1080
1468
  _response = await self._client_wrapper.httpx_client.request(
@@ -1106,7 +1494,7 @@ class AsyncParsingClient:
1106
1494
  token="YOUR_TOKEN",
1107
1495
  )
1108
1496
  await client.parsing.get_job_json_raw_result(
1109
- job_id="string",
1497
+ job_id="job_id",
1110
1498
  )
1111
1499
  """
1112
1500
  _response = await self._client_wrapper.httpx_client.request(
@@ -1170,8 +1558,8 @@ class AsyncParsingClient:
1170
1558
  token="YOUR_TOKEN",
1171
1559
  )
1172
1560
  await client.parsing.generate_presigned_url(
1173
- job_id="string",
1174
- filename="string",
1561
+ job_id="job_id",
1562
+ filename="filename",
1175
1563
  )
1176
1564
  """
1177
1565
  _response = await self._client_wrapper.httpx_client.request(