llama-cloud 0.1.34__py3-none-any.whl → 0.1.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +34 -0
- llama_cloud/client.py +3 -0
- llama_cloud/resources/__init__.py +6 -0
- llama_cloud/resources/beta/client.py +211 -8
- llama_cloud/resources/files/client.py +226 -0
- llama_cloud/resources/llama_extract/__init__.py +4 -0
- llama_cloud/resources/llama_extract/client.py +179 -0
- llama_cloud/resources/llama_extract/types/__init__.py +4 -0
- llama_cloud/resources/llama_extract/types/extract_stateless_request_data_schema.py +9 -0
- llama_cloud/resources/llama_extract/types/extract_stateless_request_data_schema_zero_value.py +7 -0
- llama_cloud/resources/parsing/client.py +24 -0
- llama_cloud/resources/users/__init__.py +2 -0
- llama_cloud/resources/users/client.py +155 -0
- llama_cloud/types/__init__.py +28 -0
- llama_cloud/types/data_source_reader_version_metadata.py +2 -1
- llama_cloud/types/data_source_reader_version_metadata_reader_version.py +17 -0
- llama_cloud/types/extract_agent.py +3 -0
- llama_cloud/types/extract_config.py +4 -0
- llama_cloud/types/file_data.py +36 -0
- llama_cloud/types/legacy_parse_job_config.py +3 -0
- llama_cloud/types/llama_extract_settings.py +4 -0
- llama_cloud/types/llama_parse_parameters.py +3 -0
- llama_cloud/types/managed_open_ai_embedding.py +36 -0
- llama_cloud/types/managed_open_ai_embedding_config.py +34 -0
- llama_cloud/types/multimodal_parse_resolution.py +17 -0
- llama_cloud/types/paginated_response_quota_configuration.py +36 -0
- llama_cloud/types/parse_job_config.py +3 -0
- llama_cloud/types/pipeline_embedding_config.py +11 -0
- llama_cloud/types/quota_configuration.py +53 -0
- llama_cloud/types/quota_configuration_configuration_type.py +33 -0
- llama_cloud/types/quota_configuration_status.py +21 -0
- llama_cloud/types/quota_rate_limit_configuration_value.py +38 -0
- llama_cloud/types/quota_rate_limit_configuration_value_denominator_units.py +29 -0
- llama_cloud/types/update_user_response.py +33 -0
- llama_cloud/types/usage_response_active_alerts_item.py +4 -0
- llama_cloud/types/user_summary.py +38 -0
- llama_cloud/types/webhook_configuration_webhook_events_item.py +20 -0
- {llama_cloud-0.1.34.dist-info → llama_cloud-0.1.35.dist-info}/METADATA +1 -1
- {llama_cloud-0.1.34.dist-info → llama_cloud-0.1.35.dist-info}/RECORD +41 -24
- {llama_cloud-0.1.34.dist-info → llama_cloud-0.1.35.dist-info}/LICENSE +0 -0
- {llama_cloud-0.1.34.dist-info → llama_cloud-0.1.35.dist-info}/WHEEL +0 -0
|
@@ -207,6 +207,9 @@ class FilesClient:
|
|
|
207
207
|
"""
|
|
208
208
|
Create a presigned url for uploading a file.
|
|
209
209
|
|
|
210
|
+
The presigned url is valid for a limited time period, after which it will expire.
|
|
211
|
+
Be careful on accidental exposure of the presigned url, as it may allow unauthorized access to the file before the expiration.
|
|
212
|
+
|
|
210
213
|
Parameters:
|
|
211
214
|
- expires_at_seconds: typing.Optional[int].
|
|
212
215
|
|
|
@@ -658,6 +661,116 @@ class FilesClient:
|
|
|
658
661
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
659
662
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
660
663
|
|
|
664
|
+
def generate_file_page_screenshot_presigned_url(
|
|
665
|
+
self,
|
|
666
|
+
id: str,
|
|
667
|
+
page_index: int,
|
|
668
|
+
*,
|
|
669
|
+
project_id: typing.Optional[str] = None,
|
|
670
|
+
organization_id: typing.Optional[str] = None,
|
|
671
|
+
) -> PresignedUrl:
|
|
672
|
+
"""
|
|
673
|
+
Returns a presigned url to read a page screenshot.
|
|
674
|
+
|
|
675
|
+
The presigned url is valid for a limited time period, after which it will expire.
|
|
676
|
+
Be careful on accidental exposure of the presigned url, as it may allow unauthorized access to the file before the expiration.
|
|
677
|
+
|
|
678
|
+
Parameters:
|
|
679
|
+
- id: str.
|
|
680
|
+
|
|
681
|
+
- page_index: int.
|
|
682
|
+
|
|
683
|
+
- project_id: typing.Optional[str].
|
|
684
|
+
|
|
685
|
+
- organization_id: typing.Optional[str].
|
|
686
|
+
---
|
|
687
|
+
from llama_cloud.client import LlamaCloud
|
|
688
|
+
|
|
689
|
+
client = LlamaCloud(
|
|
690
|
+
token="YOUR_TOKEN",
|
|
691
|
+
)
|
|
692
|
+
client.files.generate_file_page_screenshot_presigned_url(
|
|
693
|
+
id="string",
|
|
694
|
+
page_index=1,
|
|
695
|
+
)
|
|
696
|
+
"""
|
|
697
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
698
|
+
"POST",
|
|
699
|
+
urllib.parse.urljoin(
|
|
700
|
+
f"{self._client_wrapper.get_base_url()}/",
|
|
701
|
+
f"api/v1/files/{id}/page_screenshots/{page_index}/presigned_url",
|
|
702
|
+
),
|
|
703
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
704
|
+
headers=self._client_wrapper.get_headers(),
|
|
705
|
+
timeout=60,
|
|
706
|
+
)
|
|
707
|
+
if 200 <= _response.status_code < 300:
|
|
708
|
+
return pydantic.parse_obj_as(PresignedUrl, _response.json()) # type: ignore
|
|
709
|
+
if _response.status_code == 422:
|
|
710
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
711
|
+
try:
|
|
712
|
+
_response_json = _response.json()
|
|
713
|
+
except JSONDecodeError:
|
|
714
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
715
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
716
|
+
|
|
717
|
+
def generate_file_page_figure_presigned_url(
|
|
718
|
+
self,
|
|
719
|
+
id: str,
|
|
720
|
+
page_index: int,
|
|
721
|
+
figure_name: str,
|
|
722
|
+
*,
|
|
723
|
+
project_id: typing.Optional[str] = None,
|
|
724
|
+
organization_id: typing.Optional[str] = None,
|
|
725
|
+
) -> PresignedUrl:
|
|
726
|
+
"""
|
|
727
|
+
Returns a presigned url to read a page figure.
|
|
728
|
+
|
|
729
|
+
The presigned url is valid for a limited time period, after which it will expire.
|
|
730
|
+
Be careful on accidental exposure of the presigned url, as it may allow unauthorized access to the file before the expiration.
|
|
731
|
+
|
|
732
|
+
Parameters:
|
|
733
|
+
- id: str.
|
|
734
|
+
|
|
735
|
+
- page_index: int.
|
|
736
|
+
|
|
737
|
+
- figure_name: str.
|
|
738
|
+
|
|
739
|
+
- project_id: typing.Optional[str].
|
|
740
|
+
|
|
741
|
+
- organization_id: typing.Optional[str].
|
|
742
|
+
---
|
|
743
|
+
from llama_cloud.client import LlamaCloud
|
|
744
|
+
|
|
745
|
+
client = LlamaCloud(
|
|
746
|
+
token="YOUR_TOKEN",
|
|
747
|
+
)
|
|
748
|
+
client.files.generate_file_page_figure_presigned_url(
|
|
749
|
+
id="string",
|
|
750
|
+
page_index=1,
|
|
751
|
+
figure_name="string",
|
|
752
|
+
)
|
|
753
|
+
"""
|
|
754
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
755
|
+
"POST",
|
|
756
|
+
urllib.parse.urljoin(
|
|
757
|
+
f"{self._client_wrapper.get_base_url()}/",
|
|
758
|
+
f"api/v1/files/{id}/page-figures/{page_index}/{figure_name}/presigned_url",
|
|
759
|
+
),
|
|
760
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
761
|
+
headers=self._client_wrapper.get_headers(),
|
|
762
|
+
timeout=60,
|
|
763
|
+
)
|
|
764
|
+
if 200 <= _response.status_code < 300:
|
|
765
|
+
return pydantic.parse_obj_as(PresignedUrl, _response.json()) # type: ignore
|
|
766
|
+
if _response.status_code == 422:
|
|
767
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
768
|
+
try:
|
|
769
|
+
_response_json = _response.json()
|
|
770
|
+
except JSONDecodeError:
|
|
771
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
772
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
773
|
+
|
|
661
774
|
|
|
662
775
|
class AsyncFilesClient:
|
|
663
776
|
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
@@ -834,6 +947,9 @@ class AsyncFilesClient:
|
|
|
834
947
|
"""
|
|
835
948
|
Create a presigned url for uploading a file.
|
|
836
949
|
|
|
950
|
+
The presigned url is valid for a limited time period, after which it will expire.
|
|
951
|
+
Be careful on accidental exposure of the presigned url, as it may allow unauthorized access to the file before the expiration.
|
|
952
|
+
|
|
837
953
|
Parameters:
|
|
838
954
|
- expires_at_seconds: typing.Optional[int].
|
|
839
955
|
|
|
@@ -1284,3 +1400,113 @@ class AsyncFilesClient:
|
|
|
1284
1400
|
except JSONDecodeError:
|
|
1285
1401
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1286
1402
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1403
|
+
|
|
1404
|
+
async def generate_file_page_screenshot_presigned_url(
|
|
1405
|
+
self,
|
|
1406
|
+
id: str,
|
|
1407
|
+
page_index: int,
|
|
1408
|
+
*,
|
|
1409
|
+
project_id: typing.Optional[str] = None,
|
|
1410
|
+
organization_id: typing.Optional[str] = None,
|
|
1411
|
+
) -> PresignedUrl:
|
|
1412
|
+
"""
|
|
1413
|
+
Returns a presigned url to read a page screenshot.
|
|
1414
|
+
|
|
1415
|
+
The presigned url is valid for a limited time period, after which it will expire.
|
|
1416
|
+
Be careful on accidental exposure of the presigned url, as it may allow unauthorized access to the file before the expiration.
|
|
1417
|
+
|
|
1418
|
+
Parameters:
|
|
1419
|
+
- id: str.
|
|
1420
|
+
|
|
1421
|
+
- page_index: int.
|
|
1422
|
+
|
|
1423
|
+
- project_id: typing.Optional[str].
|
|
1424
|
+
|
|
1425
|
+
- organization_id: typing.Optional[str].
|
|
1426
|
+
---
|
|
1427
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
1428
|
+
|
|
1429
|
+
client = AsyncLlamaCloud(
|
|
1430
|
+
token="YOUR_TOKEN",
|
|
1431
|
+
)
|
|
1432
|
+
await client.files.generate_file_page_screenshot_presigned_url(
|
|
1433
|
+
id="string",
|
|
1434
|
+
page_index=1,
|
|
1435
|
+
)
|
|
1436
|
+
"""
|
|
1437
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1438
|
+
"POST",
|
|
1439
|
+
urllib.parse.urljoin(
|
|
1440
|
+
f"{self._client_wrapper.get_base_url()}/",
|
|
1441
|
+
f"api/v1/files/{id}/page_screenshots/{page_index}/presigned_url",
|
|
1442
|
+
),
|
|
1443
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1444
|
+
headers=self._client_wrapper.get_headers(),
|
|
1445
|
+
timeout=60,
|
|
1446
|
+
)
|
|
1447
|
+
if 200 <= _response.status_code < 300:
|
|
1448
|
+
return pydantic.parse_obj_as(PresignedUrl, _response.json()) # type: ignore
|
|
1449
|
+
if _response.status_code == 422:
|
|
1450
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1451
|
+
try:
|
|
1452
|
+
_response_json = _response.json()
|
|
1453
|
+
except JSONDecodeError:
|
|
1454
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1455
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1456
|
+
|
|
1457
|
+
async def generate_file_page_figure_presigned_url(
|
|
1458
|
+
self,
|
|
1459
|
+
id: str,
|
|
1460
|
+
page_index: int,
|
|
1461
|
+
figure_name: str,
|
|
1462
|
+
*,
|
|
1463
|
+
project_id: typing.Optional[str] = None,
|
|
1464
|
+
organization_id: typing.Optional[str] = None,
|
|
1465
|
+
) -> PresignedUrl:
|
|
1466
|
+
"""
|
|
1467
|
+
Returns a presigned url to read a page figure.
|
|
1468
|
+
|
|
1469
|
+
The presigned url is valid for a limited time period, after which it will expire.
|
|
1470
|
+
Be careful on accidental exposure of the presigned url, as it may allow unauthorized access to the file before the expiration.
|
|
1471
|
+
|
|
1472
|
+
Parameters:
|
|
1473
|
+
- id: str.
|
|
1474
|
+
|
|
1475
|
+
- page_index: int.
|
|
1476
|
+
|
|
1477
|
+
- figure_name: str.
|
|
1478
|
+
|
|
1479
|
+
- project_id: typing.Optional[str].
|
|
1480
|
+
|
|
1481
|
+
- organization_id: typing.Optional[str].
|
|
1482
|
+
---
|
|
1483
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
1484
|
+
|
|
1485
|
+
client = AsyncLlamaCloud(
|
|
1486
|
+
token="YOUR_TOKEN",
|
|
1487
|
+
)
|
|
1488
|
+
await client.files.generate_file_page_figure_presigned_url(
|
|
1489
|
+
id="string",
|
|
1490
|
+
page_index=1,
|
|
1491
|
+
figure_name="string",
|
|
1492
|
+
)
|
|
1493
|
+
"""
|
|
1494
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1495
|
+
"POST",
|
|
1496
|
+
urllib.parse.urljoin(
|
|
1497
|
+
f"{self._client_wrapper.get_base_url()}/",
|
|
1498
|
+
f"api/v1/files/{id}/page-figures/{page_index}/{figure_name}/presigned_url",
|
|
1499
|
+
),
|
|
1500
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1501
|
+
headers=self._client_wrapper.get_headers(),
|
|
1502
|
+
timeout=60,
|
|
1503
|
+
)
|
|
1504
|
+
if 200 <= _response.status_code < 300:
|
|
1505
|
+
return pydantic.parse_obj_as(PresignedUrl, _response.json()) # type: ignore
|
|
1506
|
+
if _response.status_code == 422:
|
|
1507
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1508
|
+
try:
|
|
1509
|
+
_response_json = _response.json()
|
|
1510
|
+
except JSONDecodeError:
|
|
1511
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1512
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
@@ -9,6 +9,8 @@ from .types import (
|
|
|
9
9
|
ExtractJobCreateBatchDataSchemaOverrideZeroValue,
|
|
10
10
|
ExtractSchemaValidateRequestDataSchema,
|
|
11
11
|
ExtractSchemaValidateRequestDataSchemaZeroValue,
|
|
12
|
+
ExtractStatelessRequestDataSchema,
|
|
13
|
+
ExtractStatelessRequestDataSchemaZeroValue,
|
|
12
14
|
)
|
|
13
15
|
|
|
14
16
|
__all__ = [
|
|
@@ -20,4 +22,6 @@ __all__ = [
|
|
|
20
22
|
"ExtractJobCreateBatchDataSchemaOverrideZeroValue",
|
|
21
23
|
"ExtractSchemaValidateRequestDataSchema",
|
|
22
24
|
"ExtractSchemaValidateRequestDataSchemaZeroValue",
|
|
25
|
+
"ExtractStatelessRequestDataSchema",
|
|
26
|
+
"ExtractStatelessRequestDataSchemaZeroValue",
|
|
23
27
|
]
|
|
@@ -17,13 +17,16 @@ from ...types.extract_resultset import ExtractResultset
|
|
|
17
17
|
from ...types.extract_run import ExtractRun
|
|
18
18
|
from ...types.extract_schema_generate_response import ExtractSchemaGenerateResponse
|
|
19
19
|
from ...types.extract_schema_validate_response import ExtractSchemaValidateResponse
|
|
20
|
+
from ...types.file_data import FileData
|
|
20
21
|
from ...types.http_validation_error import HttpValidationError
|
|
21
22
|
from ...types.llama_extract_settings import LlamaExtractSettings
|
|
22
23
|
from ...types.paginated_extract_runs_response import PaginatedExtractRunsResponse
|
|
24
|
+
from ...types.webhook_configuration import WebhookConfiguration
|
|
23
25
|
from .types.extract_agent_create_data_schema import ExtractAgentCreateDataSchema
|
|
24
26
|
from .types.extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
|
|
25
27
|
from .types.extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
|
|
26
28
|
from .types.extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
|
|
29
|
+
from .types.extract_stateless_request_data_schema import ExtractStatelessRequestDataSchema
|
|
27
30
|
|
|
28
31
|
try:
|
|
29
32
|
import pydantic
|
|
@@ -517,6 +520,7 @@ class LlamaExtractClient:
|
|
|
517
520
|
LlamaExtractSettings,
|
|
518
521
|
LlamaParseParameters,
|
|
519
522
|
LlamaParseParametersPriority,
|
|
523
|
+
MultimodalParseResolution,
|
|
520
524
|
ParsingMode,
|
|
521
525
|
)
|
|
522
526
|
from llama_cloud.client import LlamaCloud
|
|
@@ -542,6 +546,7 @@ class LlamaExtractClient:
|
|
|
542
546
|
parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
|
|
543
547
|
replace_failed_page_mode=FailPageMode.RAW_TEXT,
|
|
544
548
|
),
|
|
549
|
+
multimodal_parse_resolution=MultimodalParseResolution.MEDIUM,
|
|
545
550
|
),
|
|
546
551
|
)
|
|
547
552
|
"""
|
|
@@ -895,6 +900,92 @@ class LlamaExtractClient:
|
|
|
895
900
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
896
901
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
897
902
|
|
|
903
|
+
def extract_stateless(
|
|
904
|
+
self,
|
|
905
|
+
*,
|
|
906
|
+
project_id: typing.Optional[str] = None,
|
|
907
|
+
organization_id: typing.Optional[str] = None,
|
|
908
|
+
webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]] = OMIT,
|
|
909
|
+
data_schema: ExtractStatelessRequestDataSchema,
|
|
910
|
+
config: ExtractConfig,
|
|
911
|
+
file_id: typing.Optional[str] = OMIT,
|
|
912
|
+
text: typing.Optional[str] = OMIT,
|
|
913
|
+
file: typing.Optional[FileData] = OMIT,
|
|
914
|
+
) -> ExtractJob:
|
|
915
|
+
"""
|
|
916
|
+
Stateless extraction endpoint that uses a default extraction agent in the user's default project.
|
|
917
|
+
Requires data_schema, config, and either file_id, text, or base64 encoded file data.
|
|
918
|
+
|
|
919
|
+
Parameters:
|
|
920
|
+
- project_id: typing.Optional[str].
|
|
921
|
+
|
|
922
|
+
- organization_id: typing.Optional[str].
|
|
923
|
+
|
|
924
|
+
- webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]].
|
|
925
|
+
|
|
926
|
+
- data_schema: ExtractStatelessRequestDataSchema. The schema of the data to extract
|
|
927
|
+
|
|
928
|
+
- config: ExtractConfig. The configuration parameters for the extraction
|
|
929
|
+
|
|
930
|
+
- file_id: typing.Optional[str].
|
|
931
|
+
|
|
932
|
+
- text: typing.Optional[str].
|
|
933
|
+
|
|
934
|
+
- file: typing.Optional[FileData].
|
|
935
|
+
---
|
|
936
|
+
from llama_cloud import (
|
|
937
|
+
DocumentChunkMode,
|
|
938
|
+
ExtractConfig,
|
|
939
|
+
ExtractConfigPriority,
|
|
940
|
+
ExtractMode,
|
|
941
|
+
ExtractTarget,
|
|
942
|
+
FileData,
|
|
943
|
+
)
|
|
944
|
+
from llama_cloud.client import LlamaCloud
|
|
945
|
+
|
|
946
|
+
client = LlamaCloud(
|
|
947
|
+
token="YOUR_TOKEN",
|
|
948
|
+
)
|
|
949
|
+
client.llama_extract.extract_stateless(
|
|
950
|
+
config=ExtractConfig(
|
|
951
|
+
priority=ExtractConfigPriority.LOW,
|
|
952
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
953
|
+
extraction_mode=ExtractMode.FAST,
|
|
954
|
+
chunk_mode=DocumentChunkMode.PAGE,
|
|
955
|
+
),
|
|
956
|
+
file=FileData(
|
|
957
|
+
data="string",
|
|
958
|
+
mime_type="string",
|
|
959
|
+
),
|
|
960
|
+
)
|
|
961
|
+
"""
|
|
962
|
+
_request: typing.Dict[str, typing.Any] = {"data_schema": data_schema, "config": config}
|
|
963
|
+
if webhook_configurations is not OMIT:
|
|
964
|
+
_request["webhook_configurations"] = webhook_configurations
|
|
965
|
+
if file_id is not OMIT:
|
|
966
|
+
_request["file_id"] = file_id
|
|
967
|
+
if text is not OMIT:
|
|
968
|
+
_request["text"] = text
|
|
969
|
+
if file is not OMIT:
|
|
970
|
+
_request["file"] = file
|
|
971
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
972
|
+
"POST",
|
|
973
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/run"),
|
|
974
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
975
|
+
json=jsonable_encoder(_request),
|
|
976
|
+
headers=self._client_wrapper.get_headers(),
|
|
977
|
+
timeout=60,
|
|
978
|
+
)
|
|
979
|
+
if 200 <= _response.status_code < 300:
|
|
980
|
+
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
981
|
+
if _response.status_code == 422:
|
|
982
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
983
|
+
try:
|
|
984
|
+
_response_json = _response.json()
|
|
985
|
+
except JSONDecodeError:
|
|
986
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
987
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
988
|
+
|
|
898
989
|
|
|
899
990
|
class AsyncLlamaExtractClient:
|
|
900
991
|
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
@@ -1376,6 +1467,7 @@ class AsyncLlamaExtractClient:
|
|
|
1376
1467
|
LlamaExtractSettings,
|
|
1377
1468
|
LlamaParseParameters,
|
|
1378
1469
|
LlamaParseParametersPriority,
|
|
1470
|
+
MultimodalParseResolution,
|
|
1379
1471
|
ParsingMode,
|
|
1380
1472
|
)
|
|
1381
1473
|
from llama_cloud.client import AsyncLlamaCloud
|
|
@@ -1401,6 +1493,7 @@ class AsyncLlamaExtractClient:
|
|
|
1401
1493
|
parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
|
|
1402
1494
|
replace_failed_page_mode=FailPageMode.RAW_TEXT,
|
|
1403
1495
|
),
|
|
1496
|
+
multimodal_parse_resolution=MultimodalParseResolution.MEDIUM,
|
|
1404
1497
|
),
|
|
1405
1498
|
)
|
|
1406
1499
|
"""
|
|
@@ -1753,3 +1846,89 @@ class AsyncLlamaExtractClient:
|
|
|
1753
1846
|
except JSONDecodeError:
|
|
1754
1847
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1755
1848
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1849
|
+
|
|
1850
|
+
async def extract_stateless(
|
|
1851
|
+
self,
|
|
1852
|
+
*,
|
|
1853
|
+
project_id: typing.Optional[str] = None,
|
|
1854
|
+
organization_id: typing.Optional[str] = None,
|
|
1855
|
+
webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]] = OMIT,
|
|
1856
|
+
data_schema: ExtractStatelessRequestDataSchema,
|
|
1857
|
+
config: ExtractConfig,
|
|
1858
|
+
file_id: typing.Optional[str] = OMIT,
|
|
1859
|
+
text: typing.Optional[str] = OMIT,
|
|
1860
|
+
file: typing.Optional[FileData] = OMIT,
|
|
1861
|
+
) -> ExtractJob:
|
|
1862
|
+
"""
|
|
1863
|
+
Stateless extraction endpoint that uses a default extraction agent in the user's default project.
|
|
1864
|
+
Requires data_schema, config, and either file_id, text, or base64 encoded file data.
|
|
1865
|
+
|
|
1866
|
+
Parameters:
|
|
1867
|
+
- project_id: typing.Optional[str].
|
|
1868
|
+
|
|
1869
|
+
- organization_id: typing.Optional[str].
|
|
1870
|
+
|
|
1871
|
+
- webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]].
|
|
1872
|
+
|
|
1873
|
+
- data_schema: ExtractStatelessRequestDataSchema. The schema of the data to extract
|
|
1874
|
+
|
|
1875
|
+
- config: ExtractConfig. The configuration parameters for the extraction
|
|
1876
|
+
|
|
1877
|
+
- file_id: typing.Optional[str].
|
|
1878
|
+
|
|
1879
|
+
- text: typing.Optional[str].
|
|
1880
|
+
|
|
1881
|
+
- file: typing.Optional[FileData].
|
|
1882
|
+
---
|
|
1883
|
+
from llama_cloud import (
|
|
1884
|
+
DocumentChunkMode,
|
|
1885
|
+
ExtractConfig,
|
|
1886
|
+
ExtractConfigPriority,
|
|
1887
|
+
ExtractMode,
|
|
1888
|
+
ExtractTarget,
|
|
1889
|
+
FileData,
|
|
1890
|
+
)
|
|
1891
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
1892
|
+
|
|
1893
|
+
client = AsyncLlamaCloud(
|
|
1894
|
+
token="YOUR_TOKEN",
|
|
1895
|
+
)
|
|
1896
|
+
await client.llama_extract.extract_stateless(
|
|
1897
|
+
config=ExtractConfig(
|
|
1898
|
+
priority=ExtractConfigPriority.LOW,
|
|
1899
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
1900
|
+
extraction_mode=ExtractMode.FAST,
|
|
1901
|
+
chunk_mode=DocumentChunkMode.PAGE,
|
|
1902
|
+
),
|
|
1903
|
+
file=FileData(
|
|
1904
|
+
data="string",
|
|
1905
|
+
mime_type="string",
|
|
1906
|
+
),
|
|
1907
|
+
)
|
|
1908
|
+
"""
|
|
1909
|
+
_request: typing.Dict[str, typing.Any] = {"data_schema": data_schema, "config": config}
|
|
1910
|
+
if webhook_configurations is not OMIT:
|
|
1911
|
+
_request["webhook_configurations"] = webhook_configurations
|
|
1912
|
+
if file_id is not OMIT:
|
|
1913
|
+
_request["file_id"] = file_id
|
|
1914
|
+
if text is not OMIT:
|
|
1915
|
+
_request["text"] = text
|
|
1916
|
+
if file is not OMIT:
|
|
1917
|
+
_request["file"] = file
|
|
1918
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1919
|
+
"POST",
|
|
1920
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/run"),
|
|
1921
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1922
|
+
json=jsonable_encoder(_request),
|
|
1923
|
+
headers=self._client_wrapper.get_headers(),
|
|
1924
|
+
timeout=60,
|
|
1925
|
+
)
|
|
1926
|
+
if 200 <= _response.status_code < 300:
|
|
1927
|
+
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
1928
|
+
if _response.status_code == 422:
|
|
1929
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1930
|
+
try:
|
|
1931
|
+
_response_json = _response.json()
|
|
1932
|
+
except JSONDecodeError:
|
|
1933
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1934
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
@@ -8,6 +8,8 @@ from .extract_job_create_batch_data_schema_override import ExtractJobCreateBatch
|
|
|
8
8
|
from .extract_job_create_batch_data_schema_override_zero_value import ExtractJobCreateBatchDataSchemaOverrideZeroValue
|
|
9
9
|
from .extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
|
|
10
10
|
from .extract_schema_validate_request_data_schema_zero_value import ExtractSchemaValidateRequestDataSchemaZeroValue
|
|
11
|
+
from .extract_stateless_request_data_schema import ExtractStatelessRequestDataSchema
|
|
12
|
+
from .extract_stateless_request_data_schema_zero_value import ExtractStatelessRequestDataSchemaZeroValue
|
|
11
13
|
|
|
12
14
|
__all__ = [
|
|
13
15
|
"ExtractAgentCreateDataSchema",
|
|
@@ -18,4 +20,6 @@ __all__ = [
|
|
|
18
20
|
"ExtractJobCreateBatchDataSchemaOverrideZeroValue",
|
|
19
21
|
"ExtractSchemaValidateRequestDataSchema",
|
|
20
22
|
"ExtractSchemaValidateRequestDataSchemaZeroValue",
|
|
23
|
+
"ExtractStatelessRequestDataSchema",
|
|
24
|
+
"ExtractStatelessRequestDataSchemaZeroValue",
|
|
21
25
|
]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
from .extract_stateless_request_data_schema_zero_value import ExtractStatelessRequestDataSchemaZeroValue
|
|
6
|
+
|
|
7
|
+
ExtractStatelessRequestDataSchema = typing.Union[
|
|
8
|
+
typing.Dict[str, typing.Optional[ExtractStatelessRequestDataSchemaZeroValue]], str
|
|
9
|
+
]
|
|
@@ -118,6 +118,7 @@ class ParsingClient:
|
|
|
118
118
|
output_s_3_region: str,
|
|
119
119
|
target_pages: str,
|
|
120
120
|
webhook_url: str,
|
|
121
|
+
webhook_configurations: str,
|
|
121
122
|
job_timeout_in_seconds: float,
|
|
122
123
|
job_timeout_extra_time_per_page_in_seconds: float,
|
|
123
124
|
) -> ParsingJob:
|
|
@@ -151,6 +152,8 @@ class ParsingClient:
|
|
|
151
152
|
|
|
152
153
|
- webhook_url: str.
|
|
153
154
|
|
|
155
|
+
- webhook_configurations: str.
|
|
156
|
+
|
|
154
157
|
- job_timeout_in_seconds: float.
|
|
155
158
|
|
|
156
159
|
- job_timeout_extra_time_per_page_in_seconds: float.
|
|
@@ -166,6 +169,7 @@ class ParsingClient:
|
|
|
166
169
|
"output_s3_region": output_s_3_region,
|
|
167
170
|
"target_pages": target_pages,
|
|
168
171
|
"webhook_url": webhook_url,
|
|
172
|
+
"webhook_configurations": webhook_configurations,
|
|
169
173
|
"job_timeout_in_seconds": job_timeout_in_seconds,
|
|
170
174
|
"job_timeout_extra_time_per_page_in_seconds": job_timeout_extra_time_per_page_in_seconds,
|
|
171
175
|
}
|
|
@@ -242,6 +246,7 @@ class ParsingClient:
|
|
|
242
246
|
page_separator: str,
|
|
243
247
|
page_suffix: str,
|
|
244
248
|
preserve_layout_alignment_across_pages: bool,
|
|
249
|
+
preserve_very_small_text: bool,
|
|
245
250
|
skip_diagonal_text: bool,
|
|
246
251
|
spreadsheet_extract_sub_tables: bool,
|
|
247
252
|
structured_output: bool,
|
|
@@ -253,6 +258,7 @@ class ParsingClient:
|
|
|
253
258
|
vendor_multimodal_model_name: str,
|
|
254
259
|
model: str,
|
|
255
260
|
webhook_url: str,
|
|
261
|
+
webhook_configurations: str,
|
|
256
262
|
preset: str,
|
|
257
263
|
parse_mode: typing.Optional[ParsingMode] = OMIT,
|
|
258
264
|
page_error_tolerance: float,
|
|
@@ -389,6 +395,8 @@ class ParsingClient:
|
|
|
389
395
|
|
|
390
396
|
- preserve_layout_alignment_across_pages: bool.
|
|
391
397
|
|
|
398
|
+
- preserve_very_small_text: bool.
|
|
399
|
+
|
|
392
400
|
- skip_diagonal_text: bool.
|
|
393
401
|
|
|
394
402
|
- spreadsheet_extract_sub_tables: bool.
|
|
@@ -411,6 +419,8 @@ class ParsingClient:
|
|
|
411
419
|
|
|
412
420
|
- webhook_url: str.
|
|
413
421
|
|
|
422
|
+
- webhook_configurations: str.
|
|
423
|
+
|
|
414
424
|
- preset: str.
|
|
415
425
|
|
|
416
426
|
- parse_mode: typing.Optional[ParsingMode].
|
|
@@ -530,6 +540,7 @@ class ParsingClient:
|
|
|
530
540
|
"page_separator": page_separator,
|
|
531
541
|
"page_suffix": page_suffix,
|
|
532
542
|
"preserve_layout_alignment_across_pages": preserve_layout_alignment_across_pages,
|
|
543
|
+
"preserve_very_small_text": preserve_very_small_text,
|
|
533
544
|
"skip_diagonal_text": skip_diagonal_text,
|
|
534
545
|
"spreadsheet_extract_sub_tables": spreadsheet_extract_sub_tables,
|
|
535
546
|
"structured_output": structured_output,
|
|
@@ -541,6 +552,7 @@ class ParsingClient:
|
|
|
541
552
|
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
542
553
|
"model": model,
|
|
543
554
|
"webhook_url": webhook_url,
|
|
555
|
+
"webhook_configurations": webhook_configurations,
|
|
544
556
|
"preset": preset,
|
|
545
557
|
"page_error_tolerance": page_error_tolerance,
|
|
546
558
|
"replace_failed_page_with_error_message_prefix": replace_failed_page_with_error_message_prefix,
|
|
@@ -1278,6 +1290,7 @@ class AsyncParsingClient:
|
|
|
1278
1290
|
output_s_3_region: str,
|
|
1279
1291
|
target_pages: str,
|
|
1280
1292
|
webhook_url: str,
|
|
1293
|
+
webhook_configurations: str,
|
|
1281
1294
|
job_timeout_in_seconds: float,
|
|
1282
1295
|
job_timeout_extra_time_per_page_in_seconds: float,
|
|
1283
1296
|
) -> ParsingJob:
|
|
@@ -1311,6 +1324,8 @@ class AsyncParsingClient:
|
|
|
1311
1324
|
|
|
1312
1325
|
- webhook_url: str.
|
|
1313
1326
|
|
|
1327
|
+
- webhook_configurations: str.
|
|
1328
|
+
|
|
1314
1329
|
- job_timeout_in_seconds: float.
|
|
1315
1330
|
|
|
1316
1331
|
- job_timeout_extra_time_per_page_in_seconds: float.
|
|
@@ -1326,6 +1341,7 @@ class AsyncParsingClient:
|
|
|
1326
1341
|
"output_s3_region": output_s_3_region,
|
|
1327
1342
|
"target_pages": target_pages,
|
|
1328
1343
|
"webhook_url": webhook_url,
|
|
1344
|
+
"webhook_configurations": webhook_configurations,
|
|
1329
1345
|
"job_timeout_in_seconds": job_timeout_in_seconds,
|
|
1330
1346
|
"job_timeout_extra_time_per_page_in_seconds": job_timeout_extra_time_per_page_in_seconds,
|
|
1331
1347
|
}
|
|
@@ -1402,6 +1418,7 @@ class AsyncParsingClient:
|
|
|
1402
1418
|
page_separator: str,
|
|
1403
1419
|
page_suffix: str,
|
|
1404
1420
|
preserve_layout_alignment_across_pages: bool,
|
|
1421
|
+
preserve_very_small_text: bool,
|
|
1405
1422
|
skip_diagonal_text: bool,
|
|
1406
1423
|
spreadsheet_extract_sub_tables: bool,
|
|
1407
1424
|
structured_output: bool,
|
|
@@ -1413,6 +1430,7 @@ class AsyncParsingClient:
|
|
|
1413
1430
|
vendor_multimodal_model_name: str,
|
|
1414
1431
|
model: str,
|
|
1415
1432
|
webhook_url: str,
|
|
1433
|
+
webhook_configurations: str,
|
|
1416
1434
|
preset: str,
|
|
1417
1435
|
parse_mode: typing.Optional[ParsingMode] = OMIT,
|
|
1418
1436
|
page_error_tolerance: float,
|
|
@@ -1549,6 +1567,8 @@ class AsyncParsingClient:
|
|
|
1549
1567
|
|
|
1550
1568
|
- preserve_layout_alignment_across_pages: bool.
|
|
1551
1569
|
|
|
1570
|
+
- preserve_very_small_text: bool.
|
|
1571
|
+
|
|
1552
1572
|
- skip_diagonal_text: bool.
|
|
1553
1573
|
|
|
1554
1574
|
- spreadsheet_extract_sub_tables: bool.
|
|
@@ -1571,6 +1591,8 @@ class AsyncParsingClient:
|
|
|
1571
1591
|
|
|
1572
1592
|
- webhook_url: str.
|
|
1573
1593
|
|
|
1594
|
+
- webhook_configurations: str.
|
|
1595
|
+
|
|
1574
1596
|
- preset: str.
|
|
1575
1597
|
|
|
1576
1598
|
- parse_mode: typing.Optional[ParsingMode].
|
|
@@ -1690,6 +1712,7 @@ class AsyncParsingClient:
|
|
|
1690
1712
|
"page_separator": page_separator,
|
|
1691
1713
|
"page_suffix": page_suffix,
|
|
1692
1714
|
"preserve_layout_alignment_across_pages": preserve_layout_alignment_across_pages,
|
|
1715
|
+
"preserve_very_small_text": preserve_very_small_text,
|
|
1693
1716
|
"skip_diagonal_text": skip_diagonal_text,
|
|
1694
1717
|
"spreadsheet_extract_sub_tables": spreadsheet_extract_sub_tables,
|
|
1695
1718
|
"structured_output": structured_output,
|
|
@@ -1701,6 +1724,7 @@ class AsyncParsingClient:
|
|
|
1701
1724
|
"vendor_multimodal_model_name": vendor_multimodal_model_name,
|
|
1702
1725
|
"model": model,
|
|
1703
1726
|
"webhook_url": webhook_url,
|
|
1727
|
+
"webhook_configurations": webhook_configurations,
|
|
1704
1728
|
"preset": preset,
|
|
1705
1729
|
"page_error_tolerance": page_error_tolerance,
|
|
1706
1730
|
"replace_failed_page_with_error_message_prefix": replace_failed_page_with_error_message_prefix,
|