llama-cloud 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +38 -12
- llama_cloud/resources/__init__.py +0 -14
- llama_cloud/resources/llama_extract/__init__.py +0 -17
- llama_cloud/resources/llama_extract/client.py +113 -314
- llama_cloud/resources/organizations/client.py +15 -5
- llama_cloud/resources/parsing/client.py +153 -86
- llama_cloud/resources/pipelines/client.py +145 -10
- llama_cloud/resources/projects/client.py +25 -9
- llama_cloud/resources/reports/client.py +16 -6
- llama_cloud/types/__init__.py +44 -6
- llama_cloud/types/{plan.py → base_plan.py} +16 -13
- llama_cloud/types/base_plan_metronome_plan_type.py +17 -0
- llama_cloud/types/base_plan_name.py +45 -0
- llama_cloud/types/base_plan_plan_frequency.py +25 -0
- llama_cloud/types/billing_period.py +32 -0
- llama_cloud/types/{base.py → credit_type.py} +4 -1
- llama_cloud/types/data_source.py +1 -0
- llama_cloud/types/eval_dataset_job_record.py +1 -2
- llama_cloud/types/extract_agent_create.py +39 -0
- llama_cloud/types/extract_agent_update.py +38 -0
- llama_cloud/types/extract_schema_validate_request.py +32 -0
- llama_cloud/types/free_credits_usage.py +34 -0
- llama_cloud/types/job_record.py +2 -3
- llama_cloud/types/llama_parse_parameters.py +9 -0
- llama_cloud/types/llm_parameters.py +1 -0
- llama_cloud/types/page_screenshot_metadata.py +1 -0
- llama_cloud/types/paginated_list_cloud_documents_response.py +35 -0
- llama_cloud/types/parsing_mode.py +37 -0
- llama_cloud/types/pipeline_data_source.py +1 -0
- llama_cloud/types/pipeline_file.py +1 -0
- llama_cloud/types/plan_limits.py +52 -0
- llama_cloud/types/recurring_credit_grant.py +44 -0
- llama_cloud/types/usage.py +5 -4
- llama_cloud/types/usage_active_alerts_item.py +25 -0
- llama_cloud/types/{interval_usage_and_plan.py → usage_and_plan.py} +4 -6
- {llama_cloud-0.1.12.dist-info → llama_cloud-0.1.14.dist-info}/METADATA +2 -1
- {llama_cloud-0.1.12.dist-info → llama_cloud-0.1.14.dist-info}/RECORD +45 -33
- {llama_cloud-0.1.12.dist-info → llama_cloud-0.1.14.dist-info}/WHEEL +1 -1
- llama_cloud/resources/llama_extract/types/__init__.py +0 -17
- /llama_cloud/{resources/llama_extract/types → types}/extract_agent_create_data_schema.py +0 -0
- /llama_cloud/{resources/llama_extract/types → types}/extract_agent_create_data_schema_zero_value.py +0 -0
- /llama_cloud/{resources/llama_extract/types → types}/extract_agent_update_data_schema.py +0 -0
- /llama_cloud/{resources/llama_extract/types → types}/extract_agent_update_data_schema_zero_value.py +0 -0
- /llama_cloud/{resources/llama_extract/types → types}/extract_schema_validate_request_data_schema.py +0 -0
- /llama_cloud/{resources/llama_extract/types → types}/extract_schema_validate_request_data_schema_zero_value.py +0 -0
- {llama_cloud-0.1.12.dist-info → llama_cloud-0.1.14.dist-info}/LICENSE +0 -0
|
@@ -10,11 +10,11 @@ from ...core.jsonable_encoder import jsonable_encoder
|
|
|
10
10
|
from ...core.remove_none_from_dict import remove_none_from_dict
|
|
11
11
|
from ...errors.unprocessable_entity_error import UnprocessableEntityError
|
|
12
12
|
from ...types.http_validation_error import HttpValidationError
|
|
13
|
-
from ...types.interval_usage_and_plan import IntervalUsageAndPlan
|
|
14
13
|
from ...types.organization import Organization
|
|
15
14
|
from ...types.organization_create import OrganizationCreate
|
|
16
15
|
from ...types.project import Project
|
|
17
16
|
from ...types.role import Role
|
|
17
|
+
from ...types.usage_and_plan import UsageAndPlan
|
|
18
18
|
from ...types.user_organization import UserOrganization
|
|
19
19
|
from ...types.user_organization_create import UserOrganizationCreate
|
|
20
20
|
from ...types.user_organization_delete import UserOrganizationDelete
|
|
@@ -297,12 +297,16 @@ class OrganizationsClient:
|
|
|
297
297
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
298
298
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
299
299
|
|
|
300
|
-
def get_organization_usage(
|
|
300
|
+
def get_organization_usage(
|
|
301
|
+
self, organization_id: typing.Optional[str], *, get_current_invoice_total: typing.Optional[bool] = None
|
|
302
|
+
) -> UsageAndPlan:
|
|
301
303
|
"""
|
|
302
304
|
Get usage for a project
|
|
303
305
|
|
|
304
306
|
Parameters:
|
|
305
307
|
- organization_id: typing.Optional[str].
|
|
308
|
+
|
|
309
|
+
- get_current_invoice_total: typing.Optional[bool].
|
|
306
310
|
---
|
|
307
311
|
from llama_cloud.client import LlamaCloud
|
|
308
312
|
|
|
@@ -316,11 +320,12 @@ class OrganizationsClient:
|
|
|
316
320
|
urllib.parse.urljoin(
|
|
317
321
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/organizations/{organization_id}/usage"
|
|
318
322
|
),
|
|
323
|
+
params=remove_none_from_dict({"get_current_invoice_total": get_current_invoice_total}),
|
|
319
324
|
headers=self._client_wrapper.get_headers(),
|
|
320
325
|
timeout=60,
|
|
321
326
|
)
|
|
322
327
|
if 200 <= _response.status_code < 300:
|
|
323
|
-
return pydantic.parse_obj_as(
|
|
328
|
+
return pydantic.parse_obj_as(UsageAndPlan, _response.json()) # type: ignore
|
|
324
329
|
if _response.status_code == 422:
|
|
325
330
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
326
331
|
try:
|
|
@@ -988,12 +993,16 @@ class AsyncOrganizationsClient:
|
|
|
988
993
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
989
994
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
990
995
|
|
|
991
|
-
async def get_organization_usage(
|
|
996
|
+
async def get_organization_usage(
|
|
997
|
+
self, organization_id: typing.Optional[str], *, get_current_invoice_total: typing.Optional[bool] = None
|
|
998
|
+
) -> UsageAndPlan:
|
|
992
999
|
"""
|
|
993
1000
|
Get usage for a project
|
|
994
1001
|
|
|
995
1002
|
Parameters:
|
|
996
1003
|
- organization_id: typing.Optional[str].
|
|
1004
|
+
|
|
1005
|
+
- get_current_invoice_total: typing.Optional[bool].
|
|
997
1006
|
---
|
|
998
1007
|
from llama_cloud.client import AsyncLlamaCloud
|
|
999
1008
|
|
|
@@ -1007,11 +1016,12 @@ class AsyncOrganizationsClient:
|
|
|
1007
1016
|
urllib.parse.urljoin(
|
|
1008
1017
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/organizations/{organization_id}/usage"
|
|
1009
1018
|
),
|
|
1019
|
+
params=remove_none_from_dict({"get_current_invoice_total": get_current_invoice_total}),
|
|
1010
1020
|
headers=self._client_wrapper.get_headers(),
|
|
1011
1021
|
timeout=60,
|
|
1012
1022
|
)
|
|
1013
1023
|
if 200 <= _response.status_code < 300:
|
|
1014
|
-
return pydantic.parse_obj_as(
|
|
1024
|
+
return pydantic.parse_obj_as(UsageAndPlan, _response.json()) # type: ignore
|
|
1015
1025
|
if _response.status_code == 422:
|
|
1016
1026
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1017
1027
|
try:
|
|
@@ -18,6 +18,7 @@ from ...types.parsing_job_json_result import ParsingJobJsonResult
|
|
|
18
18
|
from ...types.parsing_job_markdown_result import ParsingJobMarkdownResult
|
|
19
19
|
from ...types.parsing_job_structured_result import ParsingJobStructuredResult
|
|
20
20
|
from ...types.parsing_job_text_result import ParsingJobTextResult
|
|
21
|
+
from ...types.parsing_mode import ParsingMode
|
|
21
22
|
from ...types.parsing_usage import ParsingUsage
|
|
22
23
|
from ...types.presigned_url import PresignedUrl
|
|
23
24
|
|
|
@@ -196,6 +197,7 @@ class ParsingClient:
|
|
|
196
197
|
organization_id: typing.Optional[str] = None,
|
|
197
198
|
project_id: typing.Optional[str] = None,
|
|
198
199
|
file: typing.Optional[str] = OMIT,
|
|
200
|
+
adaptive_long_table: bool,
|
|
199
201
|
annotate_links: bool,
|
|
200
202
|
auto_mode: bool,
|
|
201
203
|
auto_mode_trigger_on_image_in_page: bool,
|
|
@@ -210,17 +212,13 @@ class ParsingClient:
|
|
|
210
212
|
bbox_left: float,
|
|
211
213
|
bbox_right: float,
|
|
212
214
|
bbox_top: float,
|
|
213
|
-
|
|
214
|
-
content_guideline_instruction: str,
|
|
215
|
-
continuous_mode: bool,
|
|
215
|
+
compact_markdown_table: bool,
|
|
216
216
|
disable_ocr: bool,
|
|
217
217
|
disable_reconstruction: bool,
|
|
218
218
|
disable_image_extraction: bool,
|
|
219
219
|
do_not_cache: bool,
|
|
220
220
|
do_not_unroll_columns: bool,
|
|
221
221
|
extract_charts: bool,
|
|
222
|
-
fast_mode: bool,
|
|
223
|
-
formatting_instruction: str,
|
|
224
222
|
guess_xlsx_sheet_name: bool,
|
|
225
223
|
html_make_all_elements_visible: bool,
|
|
226
224
|
html_remove_fixed_elements: bool,
|
|
@@ -230,7 +228,6 @@ class ParsingClient:
|
|
|
230
228
|
input_s_3_region: str,
|
|
231
229
|
input_url: str,
|
|
232
230
|
invalidate_cache: bool,
|
|
233
|
-
is_formatting_instruction: bool,
|
|
234
231
|
language: typing.List[ParserLanguages],
|
|
235
232
|
extract_layout: bool,
|
|
236
233
|
max_pages: typing.Optional[int] = OMIT,
|
|
@@ -240,8 +237,7 @@ class ParsingClient:
|
|
|
240
237
|
page_prefix: str,
|
|
241
238
|
page_separator: str,
|
|
242
239
|
page_suffix: str,
|
|
243
|
-
|
|
244
|
-
premium_mode: bool,
|
|
240
|
+
preserve_layout_alignment_across_pages: bool,
|
|
245
241
|
skip_diagonal_text: bool,
|
|
246
242
|
spreadsheet_extract_sub_tables: bool,
|
|
247
243
|
structured_output: bool,
|
|
@@ -249,13 +245,14 @@ class ParsingClient:
|
|
|
249
245
|
structured_output_json_schema_name: str,
|
|
250
246
|
take_screenshot: bool,
|
|
251
247
|
target_pages: str,
|
|
252
|
-
use_vendor_multimodal_model: bool,
|
|
253
248
|
vendor_multimodal_api_key: str,
|
|
254
249
|
vendor_multimodal_model_name: str,
|
|
250
|
+
model: str,
|
|
255
251
|
webhook_url: str,
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
252
|
+
parse_mode: typing.Optional[ParsingMode] = OMIT,
|
|
253
|
+
system_prompt: str,
|
|
254
|
+
system_prompt_append: str,
|
|
255
|
+
user_prompt: str,
|
|
259
256
|
job_timeout_in_seconds: float,
|
|
260
257
|
job_timeout_extra_time_per_page_in_seconds: float,
|
|
261
258
|
strict_mode_image_extraction: bool,
|
|
@@ -264,6 +261,18 @@ class ParsingClient:
|
|
|
264
261
|
strict_mode_buggy_font: bool,
|
|
265
262
|
ignore_document_elements_for_layout_detection: bool,
|
|
266
263
|
output_tables_as_html: bool,
|
|
264
|
+
use_vendor_multimodal_model: bool,
|
|
265
|
+
bounding_box: str,
|
|
266
|
+
gpt_4_o_mode: bool,
|
|
267
|
+
gpt_4_o_api_key: str,
|
|
268
|
+
complemental_formatting_instruction: str,
|
|
269
|
+
content_guideline_instruction: str,
|
|
270
|
+
premium_mode: bool,
|
|
271
|
+
is_formatting_instruction: bool,
|
|
272
|
+
continuous_mode: bool,
|
|
273
|
+
parsing_instruction: str,
|
|
274
|
+
fast_mode: bool,
|
|
275
|
+
formatting_instruction: str,
|
|
267
276
|
) -> ParsingJob:
|
|
268
277
|
"""
|
|
269
278
|
Upload a file to s3 and create a job. return a job id
|
|
@@ -275,6 +284,8 @@ class ParsingClient:
|
|
|
275
284
|
|
|
276
285
|
- file: typing.Optional[str].
|
|
277
286
|
|
|
287
|
+
- adaptive_long_table: bool.
|
|
288
|
+
|
|
278
289
|
- annotate_links: bool.
|
|
279
290
|
|
|
280
291
|
- auto_mode: bool.
|
|
@@ -303,11 +314,7 @@ class ParsingClient:
|
|
|
303
314
|
|
|
304
315
|
- bbox_top: float.
|
|
305
316
|
|
|
306
|
-
-
|
|
307
|
-
|
|
308
|
-
- content_guideline_instruction: str.
|
|
309
|
-
|
|
310
|
-
- continuous_mode: bool.
|
|
317
|
+
- compact_markdown_table: bool.
|
|
311
318
|
|
|
312
319
|
- disable_ocr: bool.
|
|
313
320
|
|
|
@@ -321,10 +328,6 @@ class ParsingClient:
|
|
|
321
328
|
|
|
322
329
|
- extract_charts: bool.
|
|
323
330
|
|
|
324
|
-
- fast_mode: bool.
|
|
325
|
-
|
|
326
|
-
- formatting_instruction: str.
|
|
327
|
-
|
|
328
331
|
- guess_xlsx_sheet_name: bool.
|
|
329
332
|
|
|
330
333
|
- html_make_all_elements_visible: bool.
|
|
@@ -343,8 +346,6 @@ class ParsingClient:
|
|
|
343
346
|
|
|
344
347
|
- invalidate_cache: bool.
|
|
345
348
|
|
|
346
|
-
- is_formatting_instruction: bool.
|
|
347
|
-
|
|
348
349
|
- language: typing.List[ParserLanguages].
|
|
349
350
|
|
|
350
351
|
- extract_layout: bool.
|
|
@@ -363,9 +364,7 @@ class ParsingClient:
|
|
|
363
364
|
|
|
364
365
|
- page_suffix: str.
|
|
365
366
|
|
|
366
|
-
-
|
|
367
|
-
|
|
368
|
-
- premium_mode: bool.
|
|
367
|
+
- preserve_layout_alignment_across_pages: bool.
|
|
369
368
|
|
|
370
369
|
- skip_diagonal_text: bool.
|
|
371
370
|
|
|
@@ -381,19 +380,21 @@ class ParsingClient:
|
|
|
381
380
|
|
|
382
381
|
- target_pages: str.
|
|
383
382
|
|
|
384
|
-
- use_vendor_multimodal_model: bool.
|
|
385
|
-
|
|
386
383
|
- vendor_multimodal_api_key: str.
|
|
387
384
|
|
|
388
385
|
- vendor_multimodal_model_name: str.
|
|
389
386
|
|
|
387
|
+
- model: str.
|
|
388
|
+
|
|
390
389
|
- webhook_url: str.
|
|
391
390
|
|
|
392
|
-
-
|
|
391
|
+
- parse_mode: typing.Optional[ParsingMode].
|
|
393
392
|
|
|
394
|
-
-
|
|
393
|
+
- system_prompt: str.
|
|
395
394
|
|
|
396
|
-
-
|
|
395
|
+
- system_prompt_append: str.
|
|
396
|
+
|
|
397
|
+
- user_prompt: str.
|
|
397
398
|
|
|
398
399
|
- job_timeout_in_seconds: float.
|
|
399
400
|
|
|
@@ -410,8 +411,33 @@ class ParsingClient:
|
|
|
410
411
|
- ignore_document_elements_for_layout_detection: bool.
|
|
411
412
|
|
|
412
413
|
- output_tables_as_html: bool.
|
|
414
|
+
|
|
415
|
+
- use_vendor_multimodal_model: bool.
|
|
416
|
+
|
|
417
|
+
- bounding_box: str.
|
|
418
|
+
|
|
419
|
+
- gpt_4_o_mode: bool.
|
|
420
|
+
|
|
421
|
+
- gpt_4_o_api_key: str.
|
|
422
|
+
|
|
423
|
+
- complemental_formatting_instruction: str.
|
|
424
|
+
|
|
425
|
+
- content_guideline_instruction: str.
|
|
426
|
+
|
|
427
|
+
- premium_mode: bool.
|
|
428
|
+
|
|
429
|
+
- is_formatting_instruction: bool.
|
|
430
|
+
|
|
431
|
+
- continuous_mode: bool.
|
|
432
|
+
|
|
433
|
+
- parsing_instruction: str.
|
|
434
|
+
|
|
435
|
+
- fast_mode: bool.
|
|
436
|
+
|
|
437
|
+
- formatting_instruction: str.
|
|
413
438
|
"""
|
|
414
439
|
_request: typing.Dict[str, typing.Any] = {
|
|
440
|
+
"adaptive_long_table": adaptive_long_table,
|
|
415
441
|
"annotate_links": annotate_links,
|
|
416
442
|
"auto_mode": auto_mode,
|
|
417
443
|
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
@@ -426,17 +452,13 @@ class ParsingClient:
|
|
|
426
452
|
"bbox_left": bbox_left,
|
|
427
453
|
"bbox_right": bbox_right,
|
|
428
454
|
"bbox_top": bbox_top,
|
|
429
|
-
"
|
|
430
|
-
"content_guideline_instruction": content_guideline_instruction,
|
|
431
|
-
"continuous_mode": continuous_mode,
|
|
455
|
+
"compact_markdown_table": compact_markdown_table,
|
|
432
456
|
"disable_ocr": disable_ocr,
|
|
433
457
|
"disable_reconstruction": disable_reconstruction,
|
|
434
458
|
"disable_image_extraction": disable_image_extraction,
|
|
435
459
|
"do_not_cache": do_not_cache,
|
|
436
460
|
"do_not_unroll_columns": do_not_unroll_columns,
|
|
437
461
|
"extract_charts": extract_charts,
|
|
438
|
-
"fast_mode": fast_mode,
|
|
439
|
-
"formatting_instruction": formatting_instruction,
|
|
440
462
|
"guess_xlsx_sheet_name": guess_xlsx_sheet_name,
|
|
441
463
|
"html_make_all_elements_visible": html_make_all_elements_visible,
|
|
442
464
|
"html_remove_fixed_elements": html_remove_fixed_elements,
|
|
@@ -446,7 +468,6 @@ class ParsingClient:
|
|
|
446
468
|
"input_s3_region": input_s_3_region,
|
|
447
469
|
"input_url": input_url,
|
|
448
470
|
"invalidate_cache": invalidate_cache,
|
|
449
|
-
"is_formatting_instruction": is_formatting_instruction,
|
|
450
471
|
"language": language,
|
|
451
472
|
"extract_layout": extract_layout,
|
|
452
473
|
"output_pdf_of_document": output_pdf_of_document,
|
|
@@ -455,8 +476,7 @@ class ParsingClient:
|
|
|
455
476
|
"page_prefix": page_prefix,
|
|
456
477
|
"page_separator": page_separator,
|
|
457
478
|
"page_suffix": page_suffix,
|
|
458
|
-
"
|
|
459
|
-
"premium_mode": premium_mode,
|
|
479
|
+
"preserve_layout_alignment_across_pages": preserve_layout_alignment_across_pages,
|
|
460
480
|
"skip_diagonal_text": skip_diagonal_text,
|
|
461
481
|
"spreadsheet_extract_sub_tables": spreadsheet_extract_sub_tables,
|
|
462
482
|
"structured_output": structured_output,
|
|
@@ -464,13 +484,13 @@ class ParsingClient:
|
|
|
464
484
|
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
465
485
|
"take_screenshot": take_screenshot,
|
|
466
486
|
"target_pages": target_pages,
|
|
467
|
-
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
468
487
|
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
469
488
|
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
489
|
+
"model": model,
|
|
470
490
|
"webhook_url": webhook_url,
|
|
471
|
-
"
|
|
472
|
-
"
|
|
473
|
-
"
|
|
491
|
+
"system_prompt": system_prompt,
|
|
492
|
+
"system_prompt_append": system_prompt_append,
|
|
493
|
+
"user_prompt": user_prompt,
|
|
474
494
|
"job_timeout_in_seconds": job_timeout_in_seconds,
|
|
475
495
|
"job_timeout_extra_time_per_page_in_seconds": job_timeout_extra_time_per_page_in_seconds,
|
|
476
496
|
"strict_mode_image_extraction": strict_mode_image_extraction,
|
|
@@ -479,11 +499,25 @@ class ParsingClient:
|
|
|
479
499
|
"strict_mode_buggy_font": strict_mode_buggy_font,
|
|
480
500
|
"ignore_document_elements_for_layout_detection": ignore_document_elements_for_layout_detection,
|
|
481
501
|
"output_tables_as_HTML": output_tables_as_html,
|
|
502
|
+
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
503
|
+
"bounding_box": bounding_box,
|
|
504
|
+
"gpt4o_mode": gpt_4_o_mode,
|
|
505
|
+
"gpt4o_api_key": gpt_4_o_api_key,
|
|
506
|
+
"complemental_formatting_instruction": complemental_formatting_instruction,
|
|
507
|
+
"content_guideline_instruction": content_guideline_instruction,
|
|
508
|
+
"premium_mode": premium_mode,
|
|
509
|
+
"is_formatting_instruction": is_formatting_instruction,
|
|
510
|
+
"continuous_mode": continuous_mode,
|
|
511
|
+
"parsing_instruction": parsing_instruction,
|
|
512
|
+
"fast_mode": fast_mode,
|
|
513
|
+
"formatting_instruction": formatting_instruction,
|
|
482
514
|
}
|
|
483
515
|
if file is not OMIT:
|
|
484
516
|
_request["file"] = file
|
|
485
517
|
if max_pages is not OMIT:
|
|
486
518
|
_request["max_pages"] = max_pages
|
|
519
|
+
if parse_mode is not OMIT:
|
|
520
|
+
_request["parse_mode"] = parse_mode
|
|
487
521
|
_response = self._client_wrapper.httpx_client.request(
|
|
488
522
|
"POST",
|
|
489
523
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
|
|
@@ -1143,6 +1177,7 @@ class AsyncParsingClient:
|
|
|
1143
1177
|
organization_id: typing.Optional[str] = None,
|
|
1144
1178
|
project_id: typing.Optional[str] = None,
|
|
1145
1179
|
file: typing.Optional[str] = OMIT,
|
|
1180
|
+
adaptive_long_table: bool,
|
|
1146
1181
|
annotate_links: bool,
|
|
1147
1182
|
auto_mode: bool,
|
|
1148
1183
|
auto_mode_trigger_on_image_in_page: bool,
|
|
@@ -1157,17 +1192,13 @@ class AsyncParsingClient:
|
|
|
1157
1192
|
bbox_left: float,
|
|
1158
1193
|
bbox_right: float,
|
|
1159
1194
|
bbox_top: float,
|
|
1160
|
-
|
|
1161
|
-
content_guideline_instruction: str,
|
|
1162
|
-
continuous_mode: bool,
|
|
1195
|
+
compact_markdown_table: bool,
|
|
1163
1196
|
disable_ocr: bool,
|
|
1164
1197
|
disable_reconstruction: bool,
|
|
1165
1198
|
disable_image_extraction: bool,
|
|
1166
1199
|
do_not_cache: bool,
|
|
1167
1200
|
do_not_unroll_columns: bool,
|
|
1168
1201
|
extract_charts: bool,
|
|
1169
|
-
fast_mode: bool,
|
|
1170
|
-
formatting_instruction: str,
|
|
1171
1202
|
guess_xlsx_sheet_name: bool,
|
|
1172
1203
|
html_make_all_elements_visible: bool,
|
|
1173
1204
|
html_remove_fixed_elements: bool,
|
|
@@ -1177,7 +1208,6 @@ class AsyncParsingClient:
|
|
|
1177
1208
|
input_s_3_region: str,
|
|
1178
1209
|
input_url: str,
|
|
1179
1210
|
invalidate_cache: bool,
|
|
1180
|
-
is_formatting_instruction: bool,
|
|
1181
1211
|
language: typing.List[ParserLanguages],
|
|
1182
1212
|
extract_layout: bool,
|
|
1183
1213
|
max_pages: typing.Optional[int] = OMIT,
|
|
@@ -1187,8 +1217,7 @@ class AsyncParsingClient:
|
|
|
1187
1217
|
page_prefix: str,
|
|
1188
1218
|
page_separator: str,
|
|
1189
1219
|
page_suffix: str,
|
|
1190
|
-
|
|
1191
|
-
premium_mode: bool,
|
|
1220
|
+
preserve_layout_alignment_across_pages: bool,
|
|
1192
1221
|
skip_diagonal_text: bool,
|
|
1193
1222
|
spreadsheet_extract_sub_tables: bool,
|
|
1194
1223
|
structured_output: bool,
|
|
@@ -1196,13 +1225,14 @@ class AsyncParsingClient:
|
|
|
1196
1225
|
structured_output_json_schema_name: str,
|
|
1197
1226
|
take_screenshot: bool,
|
|
1198
1227
|
target_pages: str,
|
|
1199
|
-
use_vendor_multimodal_model: bool,
|
|
1200
1228
|
vendor_multimodal_api_key: str,
|
|
1201
1229
|
vendor_multimodal_model_name: str,
|
|
1230
|
+
model: str,
|
|
1202
1231
|
webhook_url: str,
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1232
|
+
parse_mode: typing.Optional[ParsingMode] = OMIT,
|
|
1233
|
+
system_prompt: str,
|
|
1234
|
+
system_prompt_append: str,
|
|
1235
|
+
user_prompt: str,
|
|
1206
1236
|
job_timeout_in_seconds: float,
|
|
1207
1237
|
job_timeout_extra_time_per_page_in_seconds: float,
|
|
1208
1238
|
strict_mode_image_extraction: bool,
|
|
@@ -1211,6 +1241,18 @@ class AsyncParsingClient:
|
|
|
1211
1241
|
strict_mode_buggy_font: bool,
|
|
1212
1242
|
ignore_document_elements_for_layout_detection: bool,
|
|
1213
1243
|
output_tables_as_html: bool,
|
|
1244
|
+
use_vendor_multimodal_model: bool,
|
|
1245
|
+
bounding_box: str,
|
|
1246
|
+
gpt_4_o_mode: bool,
|
|
1247
|
+
gpt_4_o_api_key: str,
|
|
1248
|
+
complemental_formatting_instruction: str,
|
|
1249
|
+
content_guideline_instruction: str,
|
|
1250
|
+
premium_mode: bool,
|
|
1251
|
+
is_formatting_instruction: bool,
|
|
1252
|
+
continuous_mode: bool,
|
|
1253
|
+
parsing_instruction: str,
|
|
1254
|
+
fast_mode: bool,
|
|
1255
|
+
formatting_instruction: str,
|
|
1214
1256
|
) -> ParsingJob:
|
|
1215
1257
|
"""
|
|
1216
1258
|
Upload a file to s3 and create a job. return a job id
|
|
@@ -1222,6 +1264,8 @@ class AsyncParsingClient:
|
|
|
1222
1264
|
|
|
1223
1265
|
- file: typing.Optional[str].
|
|
1224
1266
|
|
|
1267
|
+
- adaptive_long_table: bool.
|
|
1268
|
+
|
|
1225
1269
|
- annotate_links: bool.
|
|
1226
1270
|
|
|
1227
1271
|
- auto_mode: bool.
|
|
@@ -1250,11 +1294,7 @@ class AsyncParsingClient:
|
|
|
1250
1294
|
|
|
1251
1295
|
- bbox_top: float.
|
|
1252
1296
|
|
|
1253
|
-
-
|
|
1254
|
-
|
|
1255
|
-
- content_guideline_instruction: str.
|
|
1256
|
-
|
|
1257
|
-
- continuous_mode: bool.
|
|
1297
|
+
- compact_markdown_table: bool.
|
|
1258
1298
|
|
|
1259
1299
|
- disable_ocr: bool.
|
|
1260
1300
|
|
|
@@ -1268,10 +1308,6 @@ class AsyncParsingClient:
|
|
|
1268
1308
|
|
|
1269
1309
|
- extract_charts: bool.
|
|
1270
1310
|
|
|
1271
|
-
- fast_mode: bool.
|
|
1272
|
-
|
|
1273
|
-
- formatting_instruction: str.
|
|
1274
|
-
|
|
1275
1311
|
- guess_xlsx_sheet_name: bool.
|
|
1276
1312
|
|
|
1277
1313
|
- html_make_all_elements_visible: bool.
|
|
@@ -1290,8 +1326,6 @@ class AsyncParsingClient:
|
|
|
1290
1326
|
|
|
1291
1327
|
- invalidate_cache: bool.
|
|
1292
1328
|
|
|
1293
|
-
- is_formatting_instruction: bool.
|
|
1294
|
-
|
|
1295
1329
|
- language: typing.List[ParserLanguages].
|
|
1296
1330
|
|
|
1297
1331
|
- extract_layout: bool.
|
|
@@ -1310,9 +1344,7 @@ class AsyncParsingClient:
|
|
|
1310
1344
|
|
|
1311
1345
|
- page_suffix: str.
|
|
1312
1346
|
|
|
1313
|
-
-
|
|
1314
|
-
|
|
1315
|
-
- premium_mode: bool.
|
|
1347
|
+
- preserve_layout_alignment_across_pages: bool.
|
|
1316
1348
|
|
|
1317
1349
|
- skip_diagonal_text: bool.
|
|
1318
1350
|
|
|
@@ -1328,19 +1360,21 @@ class AsyncParsingClient:
|
|
|
1328
1360
|
|
|
1329
1361
|
- target_pages: str.
|
|
1330
1362
|
|
|
1331
|
-
- use_vendor_multimodal_model: bool.
|
|
1332
|
-
|
|
1333
1363
|
- vendor_multimodal_api_key: str.
|
|
1334
1364
|
|
|
1335
1365
|
- vendor_multimodal_model_name: str.
|
|
1336
1366
|
|
|
1367
|
+
- model: str.
|
|
1368
|
+
|
|
1337
1369
|
- webhook_url: str.
|
|
1338
1370
|
|
|
1339
|
-
-
|
|
1371
|
+
- parse_mode: typing.Optional[ParsingMode].
|
|
1340
1372
|
|
|
1341
|
-
-
|
|
1373
|
+
- system_prompt: str.
|
|
1342
1374
|
|
|
1343
|
-
-
|
|
1375
|
+
- system_prompt_append: str.
|
|
1376
|
+
|
|
1377
|
+
- user_prompt: str.
|
|
1344
1378
|
|
|
1345
1379
|
- job_timeout_in_seconds: float.
|
|
1346
1380
|
|
|
@@ -1357,8 +1391,33 @@ class AsyncParsingClient:
|
|
|
1357
1391
|
- ignore_document_elements_for_layout_detection: bool.
|
|
1358
1392
|
|
|
1359
1393
|
- output_tables_as_html: bool.
|
|
1394
|
+
|
|
1395
|
+
- use_vendor_multimodal_model: bool.
|
|
1396
|
+
|
|
1397
|
+
- bounding_box: str.
|
|
1398
|
+
|
|
1399
|
+
- gpt_4_o_mode: bool.
|
|
1400
|
+
|
|
1401
|
+
- gpt_4_o_api_key: str.
|
|
1402
|
+
|
|
1403
|
+
- complemental_formatting_instruction: str.
|
|
1404
|
+
|
|
1405
|
+
- content_guideline_instruction: str.
|
|
1406
|
+
|
|
1407
|
+
- premium_mode: bool.
|
|
1408
|
+
|
|
1409
|
+
- is_formatting_instruction: bool.
|
|
1410
|
+
|
|
1411
|
+
- continuous_mode: bool.
|
|
1412
|
+
|
|
1413
|
+
- parsing_instruction: str.
|
|
1414
|
+
|
|
1415
|
+
- fast_mode: bool.
|
|
1416
|
+
|
|
1417
|
+
- formatting_instruction: str.
|
|
1360
1418
|
"""
|
|
1361
1419
|
_request: typing.Dict[str, typing.Any] = {
|
|
1420
|
+
"adaptive_long_table": adaptive_long_table,
|
|
1362
1421
|
"annotate_links": annotate_links,
|
|
1363
1422
|
"auto_mode": auto_mode,
|
|
1364
1423
|
"auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
|
|
@@ -1373,17 +1432,13 @@ class AsyncParsingClient:
|
|
|
1373
1432
|
"bbox_left": bbox_left,
|
|
1374
1433
|
"bbox_right": bbox_right,
|
|
1375
1434
|
"bbox_top": bbox_top,
|
|
1376
|
-
"
|
|
1377
|
-
"content_guideline_instruction": content_guideline_instruction,
|
|
1378
|
-
"continuous_mode": continuous_mode,
|
|
1435
|
+
"compact_markdown_table": compact_markdown_table,
|
|
1379
1436
|
"disable_ocr": disable_ocr,
|
|
1380
1437
|
"disable_reconstruction": disable_reconstruction,
|
|
1381
1438
|
"disable_image_extraction": disable_image_extraction,
|
|
1382
1439
|
"do_not_cache": do_not_cache,
|
|
1383
1440
|
"do_not_unroll_columns": do_not_unroll_columns,
|
|
1384
1441
|
"extract_charts": extract_charts,
|
|
1385
|
-
"fast_mode": fast_mode,
|
|
1386
|
-
"formatting_instruction": formatting_instruction,
|
|
1387
1442
|
"guess_xlsx_sheet_name": guess_xlsx_sheet_name,
|
|
1388
1443
|
"html_make_all_elements_visible": html_make_all_elements_visible,
|
|
1389
1444
|
"html_remove_fixed_elements": html_remove_fixed_elements,
|
|
@@ -1393,7 +1448,6 @@ class AsyncParsingClient:
|
|
|
1393
1448
|
"input_s3_region": input_s_3_region,
|
|
1394
1449
|
"input_url": input_url,
|
|
1395
1450
|
"invalidate_cache": invalidate_cache,
|
|
1396
|
-
"is_formatting_instruction": is_formatting_instruction,
|
|
1397
1451
|
"language": language,
|
|
1398
1452
|
"extract_layout": extract_layout,
|
|
1399
1453
|
"output_pdf_of_document": output_pdf_of_document,
|
|
@@ -1402,8 +1456,7 @@ class AsyncParsingClient:
|
|
|
1402
1456
|
"page_prefix": page_prefix,
|
|
1403
1457
|
"page_separator": page_separator,
|
|
1404
1458
|
"page_suffix": page_suffix,
|
|
1405
|
-
"
|
|
1406
|
-
"premium_mode": premium_mode,
|
|
1459
|
+
"preserve_layout_alignment_across_pages": preserve_layout_alignment_across_pages,
|
|
1407
1460
|
"skip_diagonal_text": skip_diagonal_text,
|
|
1408
1461
|
"spreadsheet_extract_sub_tables": spreadsheet_extract_sub_tables,
|
|
1409
1462
|
"structured_output": structured_output,
|
|
@@ -1411,13 +1464,13 @@ class AsyncParsingClient:
|
|
|
1411
1464
|
"structured_output_json_schema_name": structured_output_json_schema_name,
|
|
1412
1465
|
"take_screenshot": take_screenshot,
|
|
1413
1466
|
"target_pages": target_pages,
|
|
1414
|
-
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
1415
1467
|
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
1416
1468
|
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
1469
|
+
"model": model,
|
|
1417
1470
|
"webhook_url": webhook_url,
|
|
1418
|
-
"
|
|
1419
|
-
"
|
|
1420
|
-
"
|
|
1471
|
+
"system_prompt": system_prompt,
|
|
1472
|
+
"system_prompt_append": system_prompt_append,
|
|
1473
|
+
"user_prompt": user_prompt,
|
|
1421
1474
|
"job_timeout_in_seconds": job_timeout_in_seconds,
|
|
1422
1475
|
"job_timeout_extra_time_per_page_in_seconds": job_timeout_extra_time_per_page_in_seconds,
|
|
1423
1476
|
"strict_mode_image_extraction": strict_mode_image_extraction,
|
|
@@ -1426,11 +1479,25 @@ class AsyncParsingClient:
|
|
|
1426
1479
|
"strict_mode_buggy_font": strict_mode_buggy_font,
|
|
1427
1480
|
"ignore_document_elements_for_layout_detection": ignore_document_elements_for_layout_detection,
|
|
1428
1481
|
"output_tables_as_HTML": output_tables_as_html,
|
|
1482
|
+
"use_vendor_multimodal_model": use_vendor_multimodal_model,
|
|
1483
|
+
"bounding_box": bounding_box,
|
|
1484
|
+
"gpt4o_mode": gpt_4_o_mode,
|
|
1485
|
+
"gpt4o_api_key": gpt_4_o_api_key,
|
|
1486
|
+
"complemental_formatting_instruction": complemental_formatting_instruction,
|
|
1487
|
+
"content_guideline_instruction": content_guideline_instruction,
|
|
1488
|
+
"premium_mode": premium_mode,
|
|
1489
|
+
"is_formatting_instruction": is_formatting_instruction,
|
|
1490
|
+
"continuous_mode": continuous_mode,
|
|
1491
|
+
"parsing_instruction": parsing_instruction,
|
|
1492
|
+
"fast_mode": fast_mode,
|
|
1493
|
+
"formatting_instruction": formatting_instruction,
|
|
1429
1494
|
}
|
|
1430
1495
|
if file is not OMIT:
|
|
1431
1496
|
_request["file"] = file
|
|
1432
1497
|
if max_pages is not OMIT:
|
|
1433
1498
|
_request["max_pages"] = max_pages
|
|
1499
|
+
if parse_mode is not OMIT:
|
|
1500
|
+
_request["parse_mode"] = parse_mode
|
|
1434
1501
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1435
1502
|
"POST",
|
|
1436
1503
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
|