llama-cloud 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +36 -18
- llama_cloud/client.py +3 -0
- llama_cloud/resources/__init__.py +20 -0
- llama_cloud/resources/beta/__init__.py +2 -0
- llama_cloud/resources/beta/client.py +371 -0
- llama_cloud/resources/chat_apps/client.py +4 -4
- llama_cloud/resources/embedding_model_configs/client.py +82 -22
- llama_cloud/resources/llama_extract/__init__.py +21 -0
- llama_cloud/resources/llama_extract/client.py +223 -114
- llama_cloud/resources/llama_extract/types/__init__.py +21 -0
- llama_cloud/resources/parsing/client.py +83 -29
- llama_cloud/resources/pipelines/client.py +107 -2
- llama_cloud/resources/projects/client.py +70 -0
- llama_cloud/types/__init__.py +26 -26
- llama_cloud/types/{parsing_usage.py → audio_block.py} +5 -3
- llama_cloud/types/batch.py +47 -0
- llama_cloud/types/batch_item.py +40 -0
- llama_cloud/types/{extract_agent_update.py → batch_paginated_list.py} +6 -9
- llama_cloud/types/{extract_schema_validate_request.py → batch_public_output.py} +7 -3
- llama_cloud/types/cloud_confluence_data_source.py +1 -0
- llama_cloud/types/cloud_postgres_vector_store.py +2 -0
- llama_cloud/types/cloud_sharepoint_data_source.py +1 -0
- llama_cloud/types/extract_config.py +2 -0
- llama_cloud/types/fail_page_mode.py +29 -0
- llama_cloud/types/{extract_agent_create.py → file_count_by_status_response.py} +8 -10
- llama_cloud/types/file_parse_public.py +36 -0
- llama_cloud/types/job_names.py +8 -12
- llama_cloud/types/llama_extract_settings.py +2 -2
- llama_cloud/types/llama_index_core_base_llms_types_chat_message_blocks_item.py +13 -1
- llama_cloud/types/llama_parse_parameters.py +10 -2
- llama_cloud/types/markdown_node_parser.py +4 -0
- llama_cloud/types/message_role.py +4 -0
- llama_cloud/types/pg_vector_distance_method.py +43 -0
- llama_cloud/types/{extract_job_create_batch.py → pg_vector_hnsw_settings.py} +12 -9
- llama_cloud/types/pg_vector_vector_type.py +35 -0
- llama_cloud/types/pipeline_create.py +1 -0
- llama_cloud/types/pipeline_data_source.py +3 -0
- llama_cloud/types/pipeline_data_source_status.py +33 -0
- llama_cloud/types/pipeline_file.py +1 -0
- llama_cloud/types/prompt_conf.py +3 -0
- llama_cloud/types/struct_parse_conf.py +4 -1
- llama_cloud/types/supported_llm_model_names.py +0 -12
- llama_cloud/types/token_text_splitter.py +3 -0
- {llama_cloud-0.1.17.dist-info → llama_cloud-0.1.19.dist-info}/METADATA +1 -1
- {llama_cloud-0.1.17.dist-info → llama_cloud-0.1.19.dist-info}/RECORD +55 -45
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema_zero_value.py +0 -0
- {llama_cloud-0.1.17.dist-info → llama_cloud-0.1.19.dist-info}/LICENSE +0 -0
- {llama_cloud-0.1.17.dist-info → llama_cloud-0.1.19.dist-info}/WHEEL +0 -0
|
@@ -10,18 +10,19 @@ from ...core.jsonable_encoder import jsonable_encoder
|
|
|
10
10
|
from ...core.remove_none_from_dict import remove_none_from_dict
|
|
11
11
|
from ...errors.unprocessable_entity_error import UnprocessableEntityError
|
|
12
12
|
from ...types.extract_agent import ExtractAgent
|
|
13
|
-
from ...types.
|
|
14
|
-
from ...types.extract_agent_update import ExtractAgentUpdate
|
|
13
|
+
from ...types.extract_config import ExtractConfig
|
|
15
14
|
from ...types.extract_job import ExtractJob
|
|
16
15
|
from ...types.extract_job_create import ExtractJobCreate
|
|
17
|
-
from ...types.extract_job_create_batch import ExtractJobCreateBatch
|
|
18
16
|
from ...types.extract_resultset import ExtractResultset
|
|
19
17
|
from ...types.extract_run import ExtractRun
|
|
20
|
-
from ...types.extract_schema_validate_request import ExtractSchemaValidateRequest
|
|
21
18
|
from ...types.extract_schema_validate_response import ExtractSchemaValidateResponse
|
|
22
19
|
from ...types.http_validation_error import HttpValidationError
|
|
23
20
|
from ...types.llama_extract_settings import LlamaExtractSettings
|
|
24
21
|
from ...types.paginated_extract_runs_response import PaginatedExtractRunsResponse
|
|
22
|
+
from .types.extract_agent_create_data_schema import ExtractAgentCreateDataSchema
|
|
23
|
+
from .types.extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
|
|
24
|
+
from .types.extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
|
|
25
|
+
from .types.extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
|
|
25
26
|
|
|
26
27
|
try:
|
|
27
28
|
import pydantic
|
|
@@ -39,17 +40,19 @@ class LlamaExtractClient:
|
|
|
39
40
|
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
|
40
41
|
self._client_wrapper = client_wrapper
|
|
41
42
|
|
|
42
|
-
def list_extraction_agents(self, *, project_id:
|
|
43
|
+
def list_extraction_agents(self, *, project_id: str) -> typing.List[ExtractAgent]:
|
|
43
44
|
"""
|
|
44
45
|
Parameters:
|
|
45
|
-
- project_id:
|
|
46
|
+
- project_id: str.
|
|
46
47
|
---
|
|
47
48
|
from llama_cloud.client import LlamaCloud
|
|
48
49
|
|
|
49
50
|
client = LlamaCloud(
|
|
50
51
|
token="YOUR_TOKEN",
|
|
51
52
|
)
|
|
52
|
-
client.llama_extract.list_extraction_agents(
|
|
53
|
+
client.llama_extract.list_extraction_agents(
|
|
54
|
+
project_id="string",
|
|
55
|
+
)
|
|
53
56
|
"""
|
|
54
57
|
_response = self._client_wrapper.httpx_client.request(
|
|
55
58
|
"GET",
|
|
@@ -73,7 +76,9 @@ class LlamaExtractClient:
|
|
|
73
76
|
*,
|
|
74
77
|
project_id: typing.Optional[str] = None,
|
|
75
78
|
organization_id: typing.Optional[str] = None,
|
|
76
|
-
|
|
79
|
+
name: str,
|
|
80
|
+
data_schema: ExtractAgentCreateDataSchema,
|
|
81
|
+
config: ExtractConfig,
|
|
77
82
|
) -> ExtractAgent:
|
|
78
83
|
"""
|
|
79
84
|
Parameters:
|
|
@@ -81,26 +86,23 @@ class LlamaExtractClient:
|
|
|
81
86
|
|
|
82
87
|
- organization_id: typing.Optional[str].
|
|
83
88
|
|
|
84
|
-
-
|
|
89
|
+
- name: str. The name of the extraction schema
|
|
90
|
+
|
|
91
|
+
- data_schema: ExtractAgentCreateDataSchema. The schema of the data.
|
|
92
|
+
|
|
93
|
+
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
85
94
|
---
|
|
86
|
-
from llama_cloud import
|
|
87
|
-
ExtractAgentCreate,
|
|
88
|
-
ExtractConfig,
|
|
89
|
-
ExtractMode,
|
|
90
|
-
ExtractTarget,
|
|
91
|
-
)
|
|
95
|
+
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
|
92
96
|
from llama_cloud.client import LlamaCloud
|
|
93
97
|
|
|
94
98
|
client = LlamaCloud(
|
|
95
99
|
token="YOUR_TOKEN",
|
|
96
100
|
)
|
|
97
101
|
client.llama_extract.create_extraction_agent(
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
extraction_mode=ExtractMode.FAST,
|
|
103
|
-
),
|
|
102
|
+
name="string",
|
|
103
|
+
config=ExtractConfig(
|
|
104
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
105
|
+
extraction_mode=ExtractMode.FAST,
|
|
104
106
|
),
|
|
105
107
|
)
|
|
106
108
|
"""
|
|
@@ -108,7 +110,7 @@ class LlamaExtractClient:
|
|
|
108
110
|
"POST",
|
|
109
111
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
|
|
110
112
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
111
|
-
json=jsonable_encoder(
|
|
113
|
+
json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
|
|
112
114
|
headers=self._client_wrapper.get_headers(),
|
|
113
115
|
timeout=60,
|
|
114
116
|
)
|
|
@@ -122,30 +124,29 @@ class LlamaExtractClient:
|
|
|
122
124
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
123
125
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
124
126
|
|
|
125
|
-
def validate_extraction_schema(
|
|
127
|
+
def validate_extraction_schema(
|
|
128
|
+
self, *, data_schema: ExtractSchemaValidateRequestDataSchema
|
|
129
|
+
) -> ExtractSchemaValidateResponse:
|
|
126
130
|
"""
|
|
127
131
|
Validates an extraction agent's schema definition.
|
|
128
132
|
Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
|
|
129
133
|
|
|
130
134
|
Parameters:
|
|
131
|
-
-
|
|
135
|
+
- data_schema: ExtractSchemaValidateRequestDataSchema.
|
|
132
136
|
---
|
|
133
|
-
from llama_cloud import ExtractSchemaValidateRequest
|
|
134
137
|
from llama_cloud.client import LlamaCloud
|
|
135
138
|
|
|
136
139
|
client = LlamaCloud(
|
|
137
140
|
token="YOUR_TOKEN",
|
|
138
141
|
)
|
|
139
|
-
client.llama_extract.validate_extraction_schema(
|
|
140
|
-
request=ExtractSchemaValidateRequest(),
|
|
141
|
-
)
|
|
142
|
+
client.llama_extract.validate_extraction_schema()
|
|
142
143
|
"""
|
|
143
144
|
_response = self._client_wrapper.httpx_client.request(
|
|
144
145
|
"POST",
|
|
145
146
|
urllib.parse.urljoin(
|
|
146
147
|
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
|
|
147
148
|
),
|
|
148
|
-
json=jsonable_encoder(
|
|
149
|
+
json=jsonable_encoder({"data_schema": data_schema}),
|
|
149
150
|
headers=self._client_wrapper.get_headers(),
|
|
150
151
|
timeout=60,
|
|
151
152
|
)
|
|
@@ -226,19 +227,18 @@ class LlamaExtractClient:
|
|
|
226
227
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
227
228
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
228
229
|
|
|
229
|
-
def update_extraction_agent(
|
|
230
|
+
def update_extraction_agent(
|
|
231
|
+
self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
|
|
232
|
+
) -> ExtractAgent:
|
|
230
233
|
"""
|
|
231
234
|
Parameters:
|
|
232
235
|
- extraction_agent_id: str.
|
|
233
236
|
|
|
234
|
-
-
|
|
237
|
+
- data_schema: ExtractAgentUpdateDataSchema. The schema of the data
|
|
238
|
+
|
|
239
|
+
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
235
240
|
---
|
|
236
|
-
from llama_cloud import
|
|
237
|
-
ExtractAgentUpdate,
|
|
238
|
-
ExtractConfig,
|
|
239
|
-
ExtractMode,
|
|
240
|
-
ExtractTarget,
|
|
241
|
-
)
|
|
241
|
+
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
|
242
242
|
from llama_cloud.client import LlamaCloud
|
|
243
243
|
|
|
244
244
|
client = LlamaCloud(
|
|
@@ -246,11 +246,9 @@ class LlamaExtractClient:
|
|
|
246
246
|
)
|
|
247
247
|
client.llama_extract.update_extraction_agent(
|
|
248
248
|
extraction_agent_id="string",
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
extraction_mode=ExtractMode.FAST,
|
|
253
|
-
),
|
|
249
|
+
config=ExtractConfig(
|
|
250
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
251
|
+
extraction_mode=ExtractMode.FAST,
|
|
254
252
|
),
|
|
255
253
|
)
|
|
256
254
|
"""
|
|
@@ -259,7 +257,7 @@ class LlamaExtractClient:
|
|
|
259
257
|
urllib.parse.urljoin(
|
|
260
258
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
|
|
261
259
|
),
|
|
262
|
-
json=jsonable_encoder(
|
|
260
|
+
json=jsonable_encoder({"data_schema": data_schema, "config": config}),
|
|
263
261
|
headers=self._client_wrapper.get_headers(),
|
|
264
262
|
timeout=60,
|
|
265
263
|
)
|
|
@@ -434,6 +432,7 @@ class LlamaExtractClient:
|
|
|
434
432
|
ExtractJobCreate,
|
|
435
433
|
ExtractMode,
|
|
436
434
|
ExtractTarget,
|
|
435
|
+
FailPageMode,
|
|
437
436
|
LlamaExtractSettings,
|
|
438
437
|
LlamaParseParameters,
|
|
439
438
|
ParsingMode,
|
|
@@ -456,6 +455,7 @@ class LlamaExtractClient:
|
|
|
456
455
|
chunk_mode=ChunkMode.PAGE,
|
|
457
456
|
llama_parse_params=LlamaParseParameters(
|
|
458
457
|
parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
|
|
458
|
+
replace_failed_page_mode=FailPageMode.RAW_TEXT,
|
|
459
459
|
),
|
|
460
460
|
),
|
|
461
461
|
)
|
|
@@ -481,42 +481,98 @@ class LlamaExtractClient:
|
|
|
481
481
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
482
482
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
483
483
|
|
|
484
|
+
def run_job_on_file(
|
|
485
|
+
self,
|
|
486
|
+
*,
|
|
487
|
+
from_ui: typing.Optional[bool] = None,
|
|
488
|
+
extraction_agent_id: str,
|
|
489
|
+
file: typing.IO,
|
|
490
|
+
data_schema_override: typing.Optional[str] = None,
|
|
491
|
+
config_override: typing.Optional[str] = None,
|
|
492
|
+
) -> ExtractJob:
|
|
493
|
+
"""
|
|
494
|
+
Parameters:
|
|
495
|
+
- from_ui: typing.Optional[bool].
|
|
496
|
+
|
|
497
|
+
- extraction_agent_id: str.
|
|
498
|
+
|
|
499
|
+
- file: typing.IO.
|
|
500
|
+
|
|
501
|
+
- data_schema_override: typing.Optional[str].
|
|
502
|
+
|
|
503
|
+
- config_override: typing.Optional[str].
|
|
504
|
+
"""
|
|
505
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
506
|
+
"POST",
|
|
507
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
|
|
508
|
+
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
509
|
+
data=jsonable_encoder(
|
|
510
|
+
{
|
|
511
|
+
"extraction_agent_id": extraction_agent_id,
|
|
512
|
+
"data_schema_override": data_schema_override,
|
|
513
|
+
"config_override": config_override,
|
|
514
|
+
}
|
|
515
|
+
),
|
|
516
|
+
files={"file": file},
|
|
517
|
+
headers=self._client_wrapper.get_headers(),
|
|
518
|
+
timeout=60,
|
|
519
|
+
)
|
|
520
|
+
if 200 <= _response.status_code < 300:
|
|
521
|
+
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
522
|
+
if _response.status_code == 422:
|
|
523
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
524
|
+
try:
|
|
525
|
+
_response_json = _response.json()
|
|
526
|
+
except JSONDecodeError:
|
|
527
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
528
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
529
|
+
|
|
484
530
|
def run_batch_jobs(
|
|
485
|
-
self,
|
|
531
|
+
self,
|
|
532
|
+
*,
|
|
533
|
+
from_ui: typing.Optional[bool] = None,
|
|
534
|
+
extraction_agent_id: str,
|
|
535
|
+
file_ids: typing.List[str],
|
|
536
|
+
data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
|
|
537
|
+
config_override: typing.Optional[ExtractConfig] = OMIT,
|
|
486
538
|
) -> typing.List[ExtractJob]:
|
|
487
539
|
"""
|
|
488
540
|
Parameters:
|
|
489
541
|
- from_ui: typing.Optional[bool].
|
|
490
542
|
|
|
491
|
-
-
|
|
543
|
+
- extraction_agent_id: str. The id of the extraction agent
|
|
544
|
+
|
|
545
|
+
- file_ids: typing.List[str]. The ids of the files
|
|
546
|
+
|
|
547
|
+
- data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
|
|
548
|
+
|
|
549
|
+
- config_override: typing.Optional[ExtractConfig].
|
|
492
550
|
---
|
|
493
|
-
from llama_cloud import
|
|
494
|
-
ExtractConfig,
|
|
495
|
-
ExtractJobCreateBatch,
|
|
496
|
-
ExtractMode,
|
|
497
|
-
ExtractTarget,
|
|
498
|
-
)
|
|
551
|
+
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
|
499
552
|
from llama_cloud.client import LlamaCloud
|
|
500
553
|
|
|
501
554
|
client = LlamaCloud(
|
|
502
555
|
token="YOUR_TOKEN",
|
|
503
556
|
)
|
|
504
557
|
client.llama_extract.run_batch_jobs(
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
extraction_mode=ExtractMode.FAST,
|
|
511
|
-
),
|
|
558
|
+
extraction_agent_id="string",
|
|
559
|
+
file_ids=[],
|
|
560
|
+
config_override=ExtractConfig(
|
|
561
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
562
|
+
extraction_mode=ExtractMode.FAST,
|
|
512
563
|
),
|
|
513
564
|
)
|
|
514
565
|
"""
|
|
566
|
+
_request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
|
|
567
|
+
if data_schema_override is not OMIT:
|
|
568
|
+
_request["data_schema_override"] = data_schema_override
|
|
569
|
+
if config_override is not OMIT:
|
|
570
|
+
_request["config_override"] = config_override
|
|
515
571
|
_response = self._client_wrapper.httpx_client.request(
|
|
516
572
|
"POST",
|
|
517
573
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
|
|
518
574
|
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
519
|
-
json=jsonable_encoder(
|
|
575
|
+
json=jsonable_encoder(_request),
|
|
520
576
|
headers=self._client_wrapper.get_headers(),
|
|
521
577
|
timeout=60,
|
|
522
578
|
)
|
|
@@ -723,17 +779,19 @@ class AsyncLlamaExtractClient:
|
|
|
723
779
|
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
724
780
|
self._client_wrapper = client_wrapper
|
|
725
781
|
|
|
726
|
-
async def list_extraction_agents(self, *, project_id:
|
|
782
|
+
async def list_extraction_agents(self, *, project_id: str) -> typing.List[ExtractAgent]:
|
|
727
783
|
"""
|
|
728
784
|
Parameters:
|
|
729
|
-
- project_id:
|
|
785
|
+
- project_id: str.
|
|
730
786
|
---
|
|
731
787
|
from llama_cloud.client import AsyncLlamaCloud
|
|
732
788
|
|
|
733
789
|
client = AsyncLlamaCloud(
|
|
734
790
|
token="YOUR_TOKEN",
|
|
735
791
|
)
|
|
736
|
-
await client.llama_extract.list_extraction_agents(
|
|
792
|
+
await client.llama_extract.list_extraction_agents(
|
|
793
|
+
project_id="string",
|
|
794
|
+
)
|
|
737
795
|
"""
|
|
738
796
|
_response = await self._client_wrapper.httpx_client.request(
|
|
739
797
|
"GET",
|
|
@@ -757,7 +815,9 @@ class AsyncLlamaExtractClient:
|
|
|
757
815
|
*,
|
|
758
816
|
project_id: typing.Optional[str] = None,
|
|
759
817
|
organization_id: typing.Optional[str] = None,
|
|
760
|
-
|
|
818
|
+
name: str,
|
|
819
|
+
data_schema: ExtractAgentCreateDataSchema,
|
|
820
|
+
config: ExtractConfig,
|
|
761
821
|
) -> ExtractAgent:
|
|
762
822
|
"""
|
|
763
823
|
Parameters:
|
|
@@ -765,26 +825,23 @@ class AsyncLlamaExtractClient:
|
|
|
765
825
|
|
|
766
826
|
- organization_id: typing.Optional[str].
|
|
767
827
|
|
|
768
|
-
-
|
|
828
|
+
- name: str. The name of the extraction schema
|
|
829
|
+
|
|
830
|
+
- data_schema: ExtractAgentCreateDataSchema. The schema of the data.
|
|
831
|
+
|
|
832
|
+
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
769
833
|
---
|
|
770
|
-
from llama_cloud import
|
|
771
|
-
ExtractAgentCreate,
|
|
772
|
-
ExtractConfig,
|
|
773
|
-
ExtractMode,
|
|
774
|
-
ExtractTarget,
|
|
775
|
-
)
|
|
834
|
+
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
|
776
835
|
from llama_cloud.client import AsyncLlamaCloud
|
|
777
836
|
|
|
778
837
|
client = AsyncLlamaCloud(
|
|
779
838
|
token="YOUR_TOKEN",
|
|
780
839
|
)
|
|
781
840
|
await client.llama_extract.create_extraction_agent(
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
extraction_mode=ExtractMode.FAST,
|
|
787
|
-
),
|
|
841
|
+
name="string",
|
|
842
|
+
config=ExtractConfig(
|
|
843
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
844
|
+
extraction_mode=ExtractMode.FAST,
|
|
788
845
|
),
|
|
789
846
|
)
|
|
790
847
|
"""
|
|
@@ -792,7 +849,7 @@ class AsyncLlamaExtractClient:
|
|
|
792
849
|
"POST",
|
|
793
850
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
|
|
794
851
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
795
|
-
json=jsonable_encoder(
|
|
852
|
+
json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
|
|
796
853
|
headers=self._client_wrapper.get_headers(),
|
|
797
854
|
timeout=60,
|
|
798
855
|
)
|
|
@@ -807,31 +864,28 @@ class AsyncLlamaExtractClient:
|
|
|
807
864
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
808
865
|
|
|
809
866
|
async def validate_extraction_schema(
|
|
810
|
-
self, *,
|
|
867
|
+
self, *, data_schema: ExtractSchemaValidateRequestDataSchema
|
|
811
868
|
) -> ExtractSchemaValidateResponse:
|
|
812
869
|
"""
|
|
813
870
|
Validates an extraction agent's schema definition.
|
|
814
871
|
Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
|
|
815
872
|
|
|
816
873
|
Parameters:
|
|
817
|
-
-
|
|
874
|
+
- data_schema: ExtractSchemaValidateRequestDataSchema.
|
|
818
875
|
---
|
|
819
|
-
from llama_cloud import ExtractSchemaValidateRequest
|
|
820
876
|
from llama_cloud.client import AsyncLlamaCloud
|
|
821
877
|
|
|
822
878
|
client = AsyncLlamaCloud(
|
|
823
879
|
token="YOUR_TOKEN",
|
|
824
880
|
)
|
|
825
|
-
await client.llama_extract.validate_extraction_schema(
|
|
826
|
-
request=ExtractSchemaValidateRequest(),
|
|
827
|
-
)
|
|
881
|
+
await client.llama_extract.validate_extraction_schema()
|
|
828
882
|
"""
|
|
829
883
|
_response = await self._client_wrapper.httpx_client.request(
|
|
830
884
|
"POST",
|
|
831
885
|
urllib.parse.urljoin(
|
|
832
886
|
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
|
|
833
887
|
),
|
|
834
|
-
json=jsonable_encoder(
|
|
888
|
+
json=jsonable_encoder({"data_schema": data_schema}),
|
|
835
889
|
headers=self._client_wrapper.get_headers(),
|
|
836
890
|
timeout=60,
|
|
837
891
|
)
|
|
@@ -912,19 +966,18 @@ class AsyncLlamaExtractClient:
|
|
|
912
966
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
913
967
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
914
968
|
|
|
915
|
-
async def update_extraction_agent(
|
|
969
|
+
async def update_extraction_agent(
|
|
970
|
+
self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
|
|
971
|
+
) -> ExtractAgent:
|
|
916
972
|
"""
|
|
917
973
|
Parameters:
|
|
918
974
|
- extraction_agent_id: str.
|
|
919
975
|
|
|
920
|
-
-
|
|
976
|
+
- data_schema: ExtractAgentUpdateDataSchema. The schema of the data
|
|
977
|
+
|
|
978
|
+
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
921
979
|
---
|
|
922
|
-
from llama_cloud import
|
|
923
|
-
ExtractAgentUpdate,
|
|
924
|
-
ExtractConfig,
|
|
925
|
-
ExtractMode,
|
|
926
|
-
ExtractTarget,
|
|
927
|
-
)
|
|
980
|
+
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
|
928
981
|
from llama_cloud.client import AsyncLlamaCloud
|
|
929
982
|
|
|
930
983
|
client = AsyncLlamaCloud(
|
|
@@ -932,11 +985,9 @@ class AsyncLlamaExtractClient:
|
|
|
932
985
|
)
|
|
933
986
|
await client.llama_extract.update_extraction_agent(
|
|
934
987
|
extraction_agent_id="string",
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
extraction_mode=ExtractMode.FAST,
|
|
939
|
-
),
|
|
988
|
+
config=ExtractConfig(
|
|
989
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
990
|
+
extraction_mode=ExtractMode.FAST,
|
|
940
991
|
),
|
|
941
992
|
)
|
|
942
993
|
"""
|
|
@@ -945,7 +996,7 @@ class AsyncLlamaExtractClient:
|
|
|
945
996
|
urllib.parse.urljoin(
|
|
946
997
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
|
|
947
998
|
),
|
|
948
|
-
json=jsonable_encoder(
|
|
999
|
+
json=jsonable_encoder({"data_schema": data_schema, "config": config}),
|
|
949
1000
|
headers=self._client_wrapper.get_headers(),
|
|
950
1001
|
timeout=60,
|
|
951
1002
|
)
|
|
@@ -1120,6 +1171,7 @@ class AsyncLlamaExtractClient:
|
|
|
1120
1171
|
ExtractJobCreate,
|
|
1121
1172
|
ExtractMode,
|
|
1122
1173
|
ExtractTarget,
|
|
1174
|
+
FailPageMode,
|
|
1123
1175
|
LlamaExtractSettings,
|
|
1124
1176
|
LlamaParseParameters,
|
|
1125
1177
|
ParsingMode,
|
|
@@ -1142,6 +1194,7 @@ class AsyncLlamaExtractClient:
|
|
|
1142
1194
|
chunk_mode=ChunkMode.PAGE,
|
|
1143
1195
|
llama_parse_params=LlamaParseParameters(
|
|
1144
1196
|
parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
|
|
1197
|
+
replace_failed_page_mode=FailPageMode.RAW_TEXT,
|
|
1145
1198
|
),
|
|
1146
1199
|
),
|
|
1147
1200
|
)
|
|
@@ -1167,42 +1220,98 @@ class AsyncLlamaExtractClient:
|
|
|
1167
1220
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1168
1221
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1169
1222
|
|
|
1223
|
+
async def run_job_on_file(
|
|
1224
|
+
self,
|
|
1225
|
+
*,
|
|
1226
|
+
from_ui: typing.Optional[bool] = None,
|
|
1227
|
+
extraction_agent_id: str,
|
|
1228
|
+
file: typing.IO,
|
|
1229
|
+
data_schema_override: typing.Optional[str] = None,
|
|
1230
|
+
config_override: typing.Optional[str] = None,
|
|
1231
|
+
) -> ExtractJob:
|
|
1232
|
+
"""
|
|
1233
|
+
Parameters:
|
|
1234
|
+
- from_ui: typing.Optional[bool].
|
|
1235
|
+
|
|
1236
|
+
- extraction_agent_id: str.
|
|
1237
|
+
|
|
1238
|
+
- file: typing.IO.
|
|
1239
|
+
|
|
1240
|
+
- data_schema_override: typing.Optional[str].
|
|
1241
|
+
|
|
1242
|
+
- config_override: typing.Optional[str].
|
|
1243
|
+
"""
|
|
1244
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1245
|
+
"POST",
|
|
1246
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
|
|
1247
|
+
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
1248
|
+
data=jsonable_encoder(
|
|
1249
|
+
{
|
|
1250
|
+
"extraction_agent_id": extraction_agent_id,
|
|
1251
|
+
"data_schema_override": data_schema_override,
|
|
1252
|
+
"config_override": config_override,
|
|
1253
|
+
}
|
|
1254
|
+
),
|
|
1255
|
+
files={"file": file},
|
|
1256
|
+
headers=self._client_wrapper.get_headers(),
|
|
1257
|
+
timeout=60,
|
|
1258
|
+
)
|
|
1259
|
+
if 200 <= _response.status_code < 300:
|
|
1260
|
+
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
1261
|
+
if _response.status_code == 422:
|
|
1262
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1263
|
+
try:
|
|
1264
|
+
_response_json = _response.json()
|
|
1265
|
+
except JSONDecodeError:
|
|
1266
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1267
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1268
|
+
|
|
1170
1269
|
async def run_batch_jobs(
|
|
1171
|
-
self,
|
|
1270
|
+
self,
|
|
1271
|
+
*,
|
|
1272
|
+
from_ui: typing.Optional[bool] = None,
|
|
1273
|
+
extraction_agent_id: str,
|
|
1274
|
+
file_ids: typing.List[str],
|
|
1275
|
+
data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
|
|
1276
|
+
config_override: typing.Optional[ExtractConfig] = OMIT,
|
|
1172
1277
|
) -> typing.List[ExtractJob]:
|
|
1173
1278
|
"""
|
|
1174
1279
|
Parameters:
|
|
1175
1280
|
- from_ui: typing.Optional[bool].
|
|
1176
1281
|
|
|
1177
|
-
-
|
|
1282
|
+
- extraction_agent_id: str. The id of the extraction agent
|
|
1283
|
+
|
|
1284
|
+
- file_ids: typing.List[str]. The ids of the files
|
|
1285
|
+
|
|
1286
|
+
- data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
|
|
1287
|
+
|
|
1288
|
+
- config_override: typing.Optional[ExtractConfig].
|
|
1178
1289
|
---
|
|
1179
|
-
from llama_cloud import
|
|
1180
|
-
ExtractConfig,
|
|
1181
|
-
ExtractJobCreateBatch,
|
|
1182
|
-
ExtractMode,
|
|
1183
|
-
ExtractTarget,
|
|
1184
|
-
)
|
|
1290
|
+
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
|
1185
1291
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1186
1292
|
|
|
1187
1293
|
client = AsyncLlamaCloud(
|
|
1188
1294
|
token="YOUR_TOKEN",
|
|
1189
1295
|
)
|
|
1190
1296
|
await client.llama_extract.run_batch_jobs(
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
extraction_mode=ExtractMode.FAST,
|
|
1197
|
-
),
|
|
1297
|
+
extraction_agent_id="string",
|
|
1298
|
+
file_ids=[],
|
|
1299
|
+
config_override=ExtractConfig(
|
|
1300
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
1301
|
+
extraction_mode=ExtractMode.FAST,
|
|
1198
1302
|
),
|
|
1199
1303
|
)
|
|
1200
1304
|
"""
|
|
1305
|
+
_request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
|
|
1306
|
+
if data_schema_override is not OMIT:
|
|
1307
|
+
_request["data_schema_override"] = data_schema_override
|
|
1308
|
+
if config_override is not OMIT:
|
|
1309
|
+
_request["config_override"] = config_override
|
|
1201
1310
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1202
1311
|
"POST",
|
|
1203
1312
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
|
|
1204
1313
|
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
1205
|
-
json=jsonable_encoder(
|
|
1314
|
+
json=jsonable_encoder(_request),
|
|
1206
1315
|
headers=self._client_wrapper.get_headers(),
|
|
1207
1316
|
timeout=60,
|
|
1208
1317
|
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
from .extract_agent_create_data_schema import ExtractAgentCreateDataSchema
|
|
4
|
+
from .extract_agent_create_data_schema_zero_value import ExtractAgentCreateDataSchemaZeroValue
|
|
5
|
+
from .extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
|
|
6
|
+
from .extract_agent_update_data_schema_zero_value import ExtractAgentUpdateDataSchemaZeroValue
|
|
7
|
+
from .extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
|
|
8
|
+
from .extract_job_create_batch_data_schema_override_zero_value import ExtractJobCreateBatchDataSchemaOverrideZeroValue
|
|
9
|
+
from .extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
|
|
10
|
+
from .extract_schema_validate_request_data_schema_zero_value import ExtractSchemaValidateRequestDataSchemaZeroValue
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"ExtractAgentCreateDataSchema",
|
|
14
|
+
"ExtractAgentCreateDataSchemaZeroValue",
|
|
15
|
+
"ExtractAgentUpdateDataSchema",
|
|
16
|
+
"ExtractAgentUpdateDataSchemaZeroValue",
|
|
17
|
+
"ExtractJobCreateBatchDataSchemaOverride",
|
|
18
|
+
"ExtractJobCreateBatchDataSchemaOverrideZeroValue",
|
|
19
|
+
"ExtractSchemaValidateRequestDataSchema",
|
|
20
|
+
"ExtractSchemaValidateRequestDataSchemaZeroValue",
|
|
21
|
+
]
|