llama-cloud 0.1.18__py3-none-any.whl → 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +36 -16
- llama_cloud/client.py +3 -0
- llama_cloud/resources/__init__.py +20 -0
- llama_cloud/resources/beta/__init__.py +2 -0
- llama_cloud/resources/beta/client.py +371 -0
- llama_cloud/resources/embedding_model_configs/client.py +82 -22
- llama_cloud/resources/llama_extract/__init__.py +21 -0
- llama_cloud/resources/llama_extract/client.py +227 -114
- llama_cloud/resources/llama_extract/types/__init__.py +21 -0
- llama_cloud/resources/parsing/client.py +115 -4
- llama_cloud/resources/pipelines/client.py +105 -0
- llama_cloud/types/__init__.py +26 -24
- llama_cloud/types/{extract_schema_validate_request.py → audio_block.py} +5 -3
- llama_cloud/types/batch.py +47 -0
- llama_cloud/types/batch_item.py +40 -0
- llama_cloud/types/{extract_agent_update.py → batch_paginated_list.py} +6 -9
- llama_cloud/types/{extract_agent_create.py → batch_public_output.py} +7 -10
- llama_cloud/types/cloud_confluence_data_source.py +1 -0
- llama_cloud/types/cloud_postgres_vector_store.py +2 -0
- llama_cloud/types/cloud_sharepoint_data_source.py +1 -0
- llama_cloud/types/extract_config.py +2 -0
- llama_cloud/types/extract_job_create.py +1 -2
- llama_cloud/types/fail_page_mode.py +29 -0
- llama_cloud/types/{extract_job_create_batch.py → file_count_by_status_response.py} +7 -12
- llama_cloud/types/file_parse_public.py +36 -0
- llama_cloud/types/job_names.py +8 -12
- llama_cloud/types/llama_index_core_base_llms_types_chat_message_blocks_item.py +13 -1
- llama_cloud/types/llama_parse_parameters.py +7 -0
- llama_cloud/types/markdown_node_parser.py +4 -0
- llama_cloud/types/message_role.py +4 -0
- llama_cloud/types/pg_vector_distance_method.py +43 -0
- llama_cloud/types/pg_vector_hnsw_settings.py +45 -0
- llama_cloud/types/pg_vector_vector_type.py +35 -0
- llama_cloud/types/pipeline_create.py +1 -0
- llama_cloud/types/pipeline_data_source.py +3 -0
- llama_cloud/types/pipeline_data_source_status.py +33 -0
- llama_cloud/types/pipeline_file.py +1 -0
- llama_cloud/types/prompt_conf.py +3 -0
- llama_cloud/types/struct_parse_conf.py +4 -1
- llama_cloud/types/token_text_splitter.py +3 -0
- {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/METADATA +1 -1
- {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/RECORD +52 -41
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema_zero_value.py +0 -0
- {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/LICENSE +0 -0
- {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/WHEEL +0 -0
|
@@ -10,18 +10,19 @@ from ...core.jsonable_encoder import jsonable_encoder
|
|
|
10
10
|
from ...core.remove_none_from_dict import remove_none_from_dict
|
|
11
11
|
from ...errors.unprocessable_entity_error import UnprocessableEntityError
|
|
12
12
|
from ...types.extract_agent import ExtractAgent
|
|
13
|
-
from ...types.
|
|
14
|
-
from ...types.extract_agent_update import ExtractAgentUpdate
|
|
13
|
+
from ...types.extract_config import ExtractConfig
|
|
15
14
|
from ...types.extract_job import ExtractJob
|
|
16
15
|
from ...types.extract_job_create import ExtractJobCreate
|
|
17
|
-
from ...types.extract_job_create_batch import ExtractJobCreateBatch
|
|
18
16
|
from ...types.extract_resultset import ExtractResultset
|
|
19
17
|
from ...types.extract_run import ExtractRun
|
|
20
|
-
from ...types.extract_schema_validate_request import ExtractSchemaValidateRequest
|
|
21
18
|
from ...types.extract_schema_validate_response import ExtractSchemaValidateResponse
|
|
22
19
|
from ...types.http_validation_error import HttpValidationError
|
|
23
20
|
from ...types.llama_extract_settings import LlamaExtractSettings
|
|
24
21
|
from ...types.paginated_extract_runs_response import PaginatedExtractRunsResponse
|
|
22
|
+
from .types.extract_agent_create_data_schema import ExtractAgentCreateDataSchema
|
|
23
|
+
from .types.extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
|
|
24
|
+
from .types.extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
|
|
25
|
+
from .types.extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
|
|
25
26
|
|
|
26
27
|
try:
|
|
27
28
|
import pydantic
|
|
@@ -39,17 +40,19 @@ class LlamaExtractClient:
|
|
|
39
40
|
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
|
40
41
|
self._client_wrapper = client_wrapper
|
|
41
42
|
|
|
42
|
-
def list_extraction_agents(self, *, project_id:
|
|
43
|
+
def list_extraction_agents(self, *, project_id: str) -> typing.List[ExtractAgent]:
|
|
43
44
|
"""
|
|
44
45
|
Parameters:
|
|
45
|
-
- project_id:
|
|
46
|
+
- project_id: str.
|
|
46
47
|
---
|
|
47
48
|
from llama_cloud.client import LlamaCloud
|
|
48
49
|
|
|
49
50
|
client = LlamaCloud(
|
|
50
51
|
token="YOUR_TOKEN",
|
|
51
52
|
)
|
|
52
|
-
client.llama_extract.list_extraction_agents(
|
|
53
|
+
client.llama_extract.list_extraction_agents(
|
|
54
|
+
project_id="string",
|
|
55
|
+
)
|
|
53
56
|
"""
|
|
54
57
|
_response = self._client_wrapper.httpx_client.request(
|
|
55
58
|
"GET",
|
|
@@ -73,7 +76,9 @@ class LlamaExtractClient:
|
|
|
73
76
|
*,
|
|
74
77
|
project_id: typing.Optional[str] = None,
|
|
75
78
|
organization_id: typing.Optional[str] = None,
|
|
76
|
-
|
|
79
|
+
name: str,
|
|
80
|
+
data_schema: ExtractAgentCreateDataSchema,
|
|
81
|
+
config: ExtractConfig,
|
|
77
82
|
) -> ExtractAgent:
|
|
78
83
|
"""
|
|
79
84
|
Parameters:
|
|
@@ -81,26 +86,23 @@ class LlamaExtractClient:
|
|
|
81
86
|
|
|
82
87
|
- organization_id: typing.Optional[str].
|
|
83
88
|
|
|
84
|
-
-
|
|
89
|
+
- name: str. The name of the extraction schema
|
|
90
|
+
|
|
91
|
+
- data_schema: ExtractAgentCreateDataSchema. The schema of the data.
|
|
92
|
+
|
|
93
|
+
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
85
94
|
---
|
|
86
|
-
from llama_cloud import
|
|
87
|
-
ExtractAgentCreate,
|
|
88
|
-
ExtractConfig,
|
|
89
|
-
ExtractMode,
|
|
90
|
-
ExtractTarget,
|
|
91
|
-
)
|
|
95
|
+
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
|
92
96
|
from llama_cloud.client import LlamaCloud
|
|
93
97
|
|
|
94
98
|
client = LlamaCloud(
|
|
95
99
|
token="YOUR_TOKEN",
|
|
96
100
|
)
|
|
97
101
|
client.llama_extract.create_extraction_agent(
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
extraction_mode=ExtractMode.FAST,
|
|
103
|
-
),
|
|
102
|
+
name="string",
|
|
103
|
+
config=ExtractConfig(
|
|
104
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
105
|
+
extraction_mode=ExtractMode.FAST,
|
|
104
106
|
),
|
|
105
107
|
)
|
|
106
108
|
"""
|
|
@@ -108,7 +110,7 @@ class LlamaExtractClient:
|
|
|
108
110
|
"POST",
|
|
109
111
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
|
|
110
112
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
111
|
-
json=jsonable_encoder(
|
|
113
|
+
json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
|
|
112
114
|
headers=self._client_wrapper.get_headers(),
|
|
113
115
|
timeout=60,
|
|
114
116
|
)
|
|
@@ -122,30 +124,29 @@ class LlamaExtractClient:
|
|
|
122
124
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
123
125
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
124
126
|
|
|
125
|
-
def validate_extraction_schema(
|
|
127
|
+
def validate_extraction_schema(
|
|
128
|
+
self, *, data_schema: ExtractSchemaValidateRequestDataSchema
|
|
129
|
+
) -> ExtractSchemaValidateResponse:
|
|
126
130
|
"""
|
|
127
131
|
Validates an extraction agent's schema definition.
|
|
128
132
|
Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
|
|
129
133
|
|
|
130
134
|
Parameters:
|
|
131
|
-
-
|
|
135
|
+
- data_schema: ExtractSchemaValidateRequestDataSchema.
|
|
132
136
|
---
|
|
133
|
-
from llama_cloud import ExtractSchemaValidateRequest
|
|
134
137
|
from llama_cloud.client import LlamaCloud
|
|
135
138
|
|
|
136
139
|
client = LlamaCloud(
|
|
137
140
|
token="YOUR_TOKEN",
|
|
138
141
|
)
|
|
139
|
-
client.llama_extract.validate_extraction_schema(
|
|
140
|
-
request=ExtractSchemaValidateRequest(),
|
|
141
|
-
)
|
|
142
|
+
client.llama_extract.validate_extraction_schema()
|
|
142
143
|
"""
|
|
143
144
|
_response = self._client_wrapper.httpx_client.request(
|
|
144
145
|
"POST",
|
|
145
146
|
urllib.parse.urljoin(
|
|
146
147
|
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
|
|
147
148
|
),
|
|
148
|
-
json=jsonable_encoder(
|
|
149
|
+
json=jsonable_encoder({"data_schema": data_schema}),
|
|
149
150
|
headers=self._client_wrapper.get_headers(),
|
|
150
151
|
timeout=60,
|
|
151
152
|
)
|
|
@@ -226,19 +227,18 @@ class LlamaExtractClient:
|
|
|
226
227
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
227
228
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
228
229
|
|
|
229
|
-
def update_extraction_agent(
|
|
230
|
+
def update_extraction_agent(
|
|
231
|
+
self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
|
|
232
|
+
) -> ExtractAgent:
|
|
230
233
|
"""
|
|
231
234
|
Parameters:
|
|
232
235
|
- extraction_agent_id: str.
|
|
233
236
|
|
|
234
|
-
-
|
|
237
|
+
- data_schema: ExtractAgentUpdateDataSchema. The schema of the data
|
|
238
|
+
|
|
239
|
+
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
235
240
|
---
|
|
236
|
-
from llama_cloud import
|
|
237
|
-
ExtractAgentUpdate,
|
|
238
|
-
ExtractConfig,
|
|
239
|
-
ExtractMode,
|
|
240
|
-
ExtractTarget,
|
|
241
|
-
)
|
|
241
|
+
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
|
242
242
|
from llama_cloud.client import LlamaCloud
|
|
243
243
|
|
|
244
244
|
client = LlamaCloud(
|
|
@@ -246,11 +246,9 @@ class LlamaExtractClient:
|
|
|
246
246
|
)
|
|
247
247
|
client.llama_extract.update_extraction_agent(
|
|
248
248
|
extraction_agent_id="string",
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
extraction_mode=ExtractMode.FAST,
|
|
253
|
-
),
|
|
249
|
+
config=ExtractConfig(
|
|
250
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
251
|
+
extraction_mode=ExtractMode.FAST,
|
|
254
252
|
),
|
|
255
253
|
)
|
|
256
254
|
"""
|
|
@@ -259,7 +257,7 @@ class LlamaExtractClient:
|
|
|
259
257
|
urllib.parse.urljoin(
|
|
260
258
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
|
|
261
259
|
),
|
|
262
|
-
json=jsonable_encoder(
|
|
260
|
+
json=jsonable_encoder({"data_schema": data_schema, "config": config}),
|
|
263
261
|
headers=self._client_wrapper.get_headers(),
|
|
264
262
|
timeout=60,
|
|
265
263
|
)
|
|
@@ -357,6 +355,7 @@ class LlamaExtractClient:
|
|
|
357
355
|
client.llama_extract.run_job(
|
|
358
356
|
request=ExtractJobCreate(
|
|
359
357
|
extraction_agent_id="string",
|
|
358
|
+
file_id="string",
|
|
360
359
|
config_override=ExtractConfig(
|
|
361
360
|
extraction_target=ExtractTarget.PER_DOC,
|
|
362
361
|
extraction_mode=ExtractMode.FAST,
|
|
@@ -433,6 +432,7 @@ class LlamaExtractClient:
|
|
|
433
432
|
ExtractJobCreate,
|
|
434
433
|
ExtractMode,
|
|
435
434
|
ExtractTarget,
|
|
435
|
+
FailPageMode,
|
|
436
436
|
LlamaExtractSettings,
|
|
437
437
|
LlamaParseParameters,
|
|
438
438
|
ParsingMode,
|
|
@@ -445,6 +445,7 @@ class LlamaExtractClient:
|
|
|
445
445
|
client.llama_extract.run_job_test_user(
|
|
446
446
|
job_create=ExtractJobCreate(
|
|
447
447
|
extraction_agent_id="string",
|
|
448
|
+
file_id="string",
|
|
448
449
|
config_override=ExtractConfig(
|
|
449
450
|
extraction_target=ExtractTarget.PER_DOC,
|
|
450
451
|
extraction_mode=ExtractMode.FAST,
|
|
@@ -454,6 +455,7 @@ class LlamaExtractClient:
|
|
|
454
455
|
chunk_mode=ChunkMode.PAGE,
|
|
455
456
|
llama_parse_params=LlamaParseParameters(
|
|
456
457
|
parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
|
|
458
|
+
replace_failed_page_mode=FailPageMode.RAW_TEXT,
|
|
457
459
|
),
|
|
458
460
|
),
|
|
459
461
|
)
|
|
@@ -479,42 +481,98 @@ class LlamaExtractClient:
|
|
|
479
481
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
480
482
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
481
483
|
|
|
484
|
+
def run_job_on_file(
|
|
485
|
+
self,
|
|
486
|
+
*,
|
|
487
|
+
from_ui: typing.Optional[bool] = None,
|
|
488
|
+
extraction_agent_id: str,
|
|
489
|
+
file: typing.IO,
|
|
490
|
+
data_schema_override: typing.Optional[str] = None,
|
|
491
|
+
config_override: typing.Optional[str] = None,
|
|
492
|
+
) -> ExtractJob:
|
|
493
|
+
"""
|
|
494
|
+
Parameters:
|
|
495
|
+
- from_ui: typing.Optional[bool].
|
|
496
|
+
|
|
497
|
+
- extraction_agent_id: str.
|
|
498
|
+
|
|
499
|
+
- file: typing.IO.
|
|
500
|
+
|
|
501
|
+
- data_schema_override: typing.Optional[str].
|
|
502
|
+
|
|
503
|
+
- config_override: typing.Optional[str].
|
|
504
|
+
"""
|
|
505
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
506
|
+
"POST",
|
|
507
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
|
|
508
|
+
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
509
|
+
data=jsonable_encoder(
|
|
510
|
+
{
|
|
511
|
+
"extraction_agent_id": extraction_agent_id,
|
|
512
|
+
"data_schema_override": data_schema_override,
|
|
513
|
+
"config_override": config_override,
|
|
514
|
+
}
|
|
515
|
+
),
|
|
516
|
+
files={"file": file},
|
|
517
|
+
headers=self._client_wrapper.get_headers(),
|
|
518
|
+
timeout=60,
|
|
519
|
+
)
|
|
520
|
+
if 200 <= _response.status_code < 300:
|
|
521
|
+
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
522
|
+
if _response.status_code == 422:
|
|
523
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
524
|
+
try:
|
|
525
|
+
_response_json = _response.json()
|
|
526
|
+
except JSONDecodeError:
|
|
527
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
528
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
529
|
+
|
|
482
530
|
def run_batch_jobs(
|
|
483
|
-
self,
|
|
531
|
+
self,
|
|
532
|
+
*,
|
|
533
|
+
from_ui: typing.Optional[bool] = None,
|
|
534
|
+
extraction_agent_id: str,
|
|
535
|
+
file_ids: typing.List[str],
|
|
536
|
+
data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
|
|
537
|
+
config_override: typing.Optional[ExtractConfig] = OMIT,
|
|
484
538
|
) -> typing.List[ExtractJob]:
|
|
485
539
|
"""
|
|
486
540
|
Parameters:
|
|
487
541
|
- from_ui: typing.Optional[bool].
|
|
488
542
|
|
|
489
|
-
-
|
|
543
|
+
- extraction_agent_id: str. The id of the extraction agent
|
|
544
|
+
|
|
545
|
+
- file_ids: typing.List[str]. The ids of the files
|
|
546
|
+
|
|
547
|
+
- data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
|
|
548
|
+
|
|
549
|
+
- config_override: typing.Optional[ExtractConfig].
|
|
490
550
|
---
|
|
491
|
-
from llama_cloud import
|
|
492
|
-
ExtractConfig,
|
|
493
|
-
ExtractJobCreateBatch,
|
|
494
|
-
ExtractMode,
|
|
495
|
-
ExtractTarget,
|
|
496
|
-
)
|
|
551
|
+
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
|
497
552
|
from llama_cloud.client import LlamaCloud
|
|
498
553
|
|
|
499
554
|
client = LlamaCloud(
|
|
500
555
|
token="YOUR_TOKEN",
|
|
501
556
|
)
|
|
502
557
|
client.llama_extract.run_batch_jobs(
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
extraction_mode=ExtractMode.FAST,
|
|
509
|
-
),
|
|
558
|
+
extraction_agent_id="string",
|
|
559
|
+
file_ids=[],
|
|
560
|
+
config_override=ExtractConfig(
|
|
561
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
562
|
+
extraction_mode=ExtractMode.FAST,
|
|
510
563
|
),
|
|
511
564
|
)
|
|
512
565
|
"""
|
|
566
|
+
_request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
|
|
567
|
+
if data_schema_override is not OMIT:
|
|
568
|
+
_request["data_schema_override"] = data_schema_override
|
|
569
|
+
if config_override is not OMIT:
|
|
570
|
+
_request["config_override"] = config_override
|
|
513
571
|
_response = self._client_wrapper.httpx_client.request(
|
|
514
572
|
"POST",
|
|
515
573
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
|
|
516
574
|
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
517
|
-
json=jsonable_encoder(
|
|
575
|
+
json=jsonable_encoder(_request),
|
|
518
576
|
headers=self._client_wrapper.get_headers(),
|
|
519
577
|
timeout=60,
|
|
520
578
|
)
|
|
@@ -721,17 +779,19 @@ class AsyncLlamaExtractClient:
|
|
|
721
779
|
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
722
780
|
self._client_wrapper = client_wrapper
|
|
723
781
|
|
|
724
|
-
async def list_extraction_agents(self, *, project_id:
|
|
782
|
+
async def list_extraction_agents(self, *, project_id: str) -> typing.List[ExtractAgent]:
|
|
725
783
|
"""
|
|
726
784
|
Parameters:
|
|
727
|
-
- project_id:
|
|
785
|
+
- project_id: str.
|
|
728
786
|
---
|
|
729
787
|
from llama_cloud.client import AsyncLlamaCloud
|
|
730
788
|
|
|
731
789
|
client = AsyncLlamaCloud(
|
|
732
790
|
token="YOUR_TOKEN",
|
|
733
791
|
)
|
|
734
|
-
await client.llama_extract.list_extraction_agents(
|
|
792
|
+
await client.llama_extract.list_extraction_agents(
|
|
793
|
+
project_id="string",
|
|
794
|
+
)
|
|
735
795
|
"""
|
|
736
796
|
_response = await self._client_wrapper.httpx_client.request(
|
|
737
797
|
"GET",
|
|
@@ -755,7 +815,9 @@ class AsyncLlamaExtractClient:
|
|
|
755
815
|
*,
|
|
756
816
|
project_id: typing.Optional[str] = None,
|
|
757
817
|
organization_id: typing.Optional[str] = None,
|
|
758
|
-
|
|
818
|
+
name: str,
|
|
819
|
+
data_schema: ExtractAgentCreateDataSchema,
|
|
820
|
+
config: ExtractConfig,
|
|
759
821
|
) -> ExtractAgent:
|
|
760
822
|
"""
|
|
761
823
|
Parameters:
|
|
@@ -763,26 +825,23 @@ class AsyncLlamaExtractClient:
|
|
|
763
825
|
|
|
764
826
|
- organization_id: typing.Optional[str].
|
|
765
827
|
|
|
766
|
-
-
|
|
828
|
+
- name: str. The name of the extraction schema
|
|
829
|
+
|
|
830
|
+
- data_schema: ExtractAgentCreateDataSchema. The schema of the data.
|
|
831
|
+
|
|
832
|
+
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
767
833
|
---
|
|
768
|
-
from llama_cloud import
|
|
769
|
-
ExtractAgentCreate,
|
|
770
|
-
ExtractConfig,
|
|
771
|
-
ExtractMode,
|
|
772
|
-
ExtractTarget,
|
|
773
|
-
)
|
|
834
|
+
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
|
774
835
|
from llama_cloud.client import AsyncLlamaCloud
|
|
775
836
|
|
|
776
837
|
client = AsyncLlamaCloud(
|
|
777
838
|
token="YOUR_TOKEN",
|
|
778
839
|
)
|
|
779
840
|
await client.llama_extract.create_extraction_agent(
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
extraction_mode=ExtractMode.FAST,
|
|
785
|
-
),
|
|
841
|
+
name="string",
|
|
842
|
+
config=ExtractConfig(
|
|
843
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
844
|
+
extraction_mode=ExtractMode.FAST,
|
|
786
845
|
),
|
|
787
846
|
)
|
|
788
847
|
"""
|
|
@@ -790,7 +849,7 @@ class AsyncLlamaExtractClient:
|
|
|
790
849
|
"POST",
|
|
791
850
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
|
|
792
851
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
793
|
-
json=jsonable_encoder(
|
|
852
|
+
json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
|
|
794
853
|
headers=self._client_wrapper.get_headers(),
|
|
795
854
|
timeout=60,
|
|
796
855
|
)
|
|
@@ -805,31 +864,28 @@ class AsyncLlamaExtractClient:
|
|
|
805
864
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
806
865
|
|
|
807
866
|
async def validate_extraction_schema(
|
|
808
|
-
self, *,
|
|
867
|
+
self, *, data_schema: ExtractSchemaValidateRequestDataSchema
|
|
809
868
|
) -> ExtractSchemaValidateResponse:
|
|
810
869
|
"""
|
|
811
870
|
Validates an extraction agent's schema definition.
|
|
812
871
|
Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
|
|
813
872
|
|
|
814
873
|
Parameters:
|
|
815
|
-
-
|
|
874
|
+
- data_schema: ExtractSchemaValidateRequestDataSchema.
|
|
816
875
|
---
|
|
817
|
-
from llama_cloud import ExtractSchemaValidateRequest
|
|
818
876
|
from llama_cloud.client import AsyncLlamaCloud
|
|
819
877
|
|
|
820
878
|
client = AsyncLlamaCloud(
|
|
821
879
|
token="YOUR_TOKEN",
|
|
822
880
|
)
|
|
823
|
-
await client.llama_extract.validate_extraction_schema(
|
|
824
|
-
request=ExtractSchemaValidateRequest(),
|
|
825
|
-
)
|
|
881
|
+
await client.llama_extract.validate_extraction_schema()
|
|
826
882
|
"""
|
|
827
883
|
_response = await self._client_wrapper.httpx_client.request(
|
|
828
884
|
"POST",
|
|
829
885
|
urllib.parse.urljoin(
|
|
830
886
|
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
|
|
831
887
|
),
|
|
832
|
-
json=jsonable_encoder(
|
|
888
|
+
json=jsonable_encoder({"data_schema": data_schema}),
|
|
833
889
|
headers=self._client_wrapper.get_headers(),
|
|
834
890
|
timeout=60,
|
|
835
891
|
)
|
|
@@ -910,19 +966,18 @@ class AsyncLlamaExtractClient:
|
|
|
910
966
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
911
967
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
912
968
|
|
|
913
|
-
async def update_extraction_agent(
|
|
969
|
+
async def update_extraction_agent(
|
|
970
|
+
self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
|
|
971
|
+
) -> ExtractAgent:
|
|
914
972
|
"""
|
|
915
973
|
Parameters:
|
|
916
974
|
- extraction_agent_id: str.
|
|
917
975
|
|
|
918
|
-
-
|
|
976
|
+
- data_schema: ExtractAgentUpdateDataSchema. The schema of the data
|
|
977
|
+
|
|
978
|
+
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
919
979
|
---
|
|
920
|
-
from llama_cloud import
|
|
921
|
-
ExtractAgentUpdate,
|
|
922
|
-
ExtractConfig,
|
|
923
|
-
ExtractMode,
|
|
924
|
-
ExtractTarget,
|
|
925
|
-
)
|
|
980
|
+
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
|
926
981
|
from llama_cloud.client import AsyncLlamaCloud
|
|
927
982
|
|
|
928
983
|
client = AsyncLlamaCloud(
|
|
@@ -930,11 +985,9 @@ class AsyncLlamaExtractClient:
|
|
|
930
985
|
)
|
|
931
986
|
await client.llama_extract.update_extraction_agent(
|
|
932
987
|
extraction_agent_id="string",
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
extraction_mode=ExtractMode.FAST,
|
|
937
|
-
),
|
|
988
|
+
config=ExtractConfig(
|
|
989
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
990
|
+
extraction_mode=ExtractMode.FAST,
|
|
938
991
|
),
|
|
939
992
|
)
|
|
940
993
|
"""
|
|
@@ -943,7 +996,7 @@ class AsyncLlamaExtractClient:
|
|
|
943
996
|
urllib.parse.urljoin(
|
|
944
997
|
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
|
|
945
998
|
),
|
|
946
|
-
json=jsonable_encoder(
|
|
999
|
+
json=jsonable_encoder({"data_schema": data_schema, "config": config}),
|
|
947
1000
|
headers=self._client_wrapper.get_headers(),
|
|
948
1001
|
timeout=60,
|
|
949
1002
|
)
|
|
@@ -1041,6 +1094,7 @@ class AsyncLlamaExtractClient:
|
|
|
1041
1094
|
await client.llama_extract.run_job(
|
|
1042
1095
|
request=ExtractJobCreate(
|
|
1043
1096
|
extraction_agent_id="string",
|
|
1097
|
+
file_id="string",
|
|
1044
1098
|
config_override=ExtractConfig(
|
|
1045
1099
|
extraction_target=ExtractTarget.PER_DOC,
|
|
1046
1100
|
extraction_mode=ExtractMode.FAST,
|
|
@@ -1117,6 +1171,7 @@ class AsyncLlamaExtractClient:
|
|
|
1117
1171
|
ExtractJobCreate,
|
|
1118
1172
|
ExtractMode,
|
|
1119
1173
|
ExtractTarget,
|
|
1174
|
+
FailPageMode,
|
|
1120
1175
|
LlamaExtractSettings,
|
|
1121
1176
|
LlamaParseParameters,
|
|
1122
1177
|
ParsingMode,
|
|
@@ -1129,6 +1184,7 @@ class AsyncLlamaExtractClient:
|
|
|
1129
1184
|
await client.llama_extract.run_job_test_user(
|
|
1130
1185
|
job_create=ExtractJobCreate(
|
|
1131
1186
|
extraction_agent_id="string",
|
|
1187
|
+
file_id="string",
|
|
1132
1188
|
config_override=ExtractConfig(
|
|
1133
1189
|
extraction_target=ExtractTarget.PER_DOC,
|
|
1134
1190
|
extraction_mode=ExtractMode.FAST,
|
|
@@ -1138,6 +1194,7 @@ class AsyncLlamaExtractClient:
|
|
|
1138
1194
|
chunk_mode=ChunkMode.PAGE,
|
|
1139
1195
|
llama_parse_params=LlamaParseParameters(
|
|
1140
1196
|
parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
|
|
1197
|
+
replace_failed_page_mode=FailPageMode.RAW_TEXT,
|
|
1141
1198
|
),
|
|
1142
1199
|
),
|
|
1143
1200
|
)
|
|
@@ -1163,42 +1220,98 @@ class AsyncLlamaExtractClient:
|
|
|
1163
1220
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1164
1221
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1165
1222
|
|
|
1223
|
+
async def run_job_on_file(
|
|
1224
|
+
self,
|
|
1225
|
+
*,
|
|
1226
|
+
from_ui: typing.Optional[bool] = None,
|
|
1227
|
+
extraction_agent_id: str,
|
|
1228
|
+
file: typing.IO,
|
|
1229
|
+
data_schema_override: typing.Optional[str] = None,
|
|
1230
|
+
config_override: typing.Optional[str] = None,
|
|
1231
|
+
) -> ExtractJob:
|
|
1232
|
+
"""
|
|
1233
|
+
Parameters:
|
|
1234
|
+
- from_ui: typing.Optional[bool].
|
|
1235
|
+
|
|
1236
|
+
- extraction_agent_id: str.
|
|
1237
|
+
|
|
1238
|
+
- file: typing.IO.
|
|
1239
|
+
|
|
1240
|
+
- data_schema_override: typing.Optional[str].
|
|
1241
|
+
|
|
1242
|
+
- config_override: typing.Optional[str].
|
|
1243
|
+
"""
|
|
1244
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1245
|
+
"POST",
|
|
1246
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
|
|
1247
|
+
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
1248
|
+
data=jsonable_encoder(
|
|
1249
|
+
{
|
|
1250
|
+
"extraction_agent_id": extraction_agent_id,
|
|
1251
|
+
"data_schema_override": data_schema_override,
|
|
1252
|
+
"config_override": config_override,
|
|
1253
|
+
}
|
|
1254
|
+
),
|
|
1255
|
+
files={"file": file},
|
|
1256
|
+
headers=self._client_wrapper.get_headers(),
|
|
1257
|
+
timeout=60,
|
|
1258
|
+
)
|
|
1259
|
+
if 200 <= _response.status_code < 300:
|
|
1260
|
+
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
1261
|
+
if _response.status_code == 422:
|
|
1262
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1263
|
+
try:
|
|
1264
|
+
_response_json = _response.json()
|
|
1265
|
+
except JSONDecodeError:
|
|
1266
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1267
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1268
|
+
|
|
1166
1269
|
async def run_batch_jobs(
|
|
1167
|
-
self,
|
|
1270
|
+
self,
|
|
1271
|
+
*,
|
|
1272
|
+
from_ui: typing.Optional[bool] = None,
|
|
1273
|
+
extraction_agent_id: str,
|
|
1274
|
+
file_ids: typing.List[str],
|
|
1275
|
+
data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
|
|
1276
|
+
config_override: typing.Optional[ExtractConfig] = OMIT,
|
|
1168
1277
|
) -> typing.List[ExtractJob]:
|
|
1169
1278
|
"""
|
|
1170
1279
|
Parameters:
|
|
1171
1280
|
- from_ui: typing.Optional[bool].
|
|
1172
1281
|
|
|
1173
|
-
-
|
|
1282
|
+
- extraction_agent_id: str. The id of the extraction agent
|
|
1283
|
+
|
|
1284
|
+
- file_ids: typing.List[str]. The ids of the files
|
|
1285
|
+
|
|
1286
|
+
- data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
|
|
1287
|
+
|
|
1288
|
+
- config_override: typing.Optional[ExtractConfig].
|
|
1174
1289
|
---
|
|
1175
|
-
from llama_cloud import
|
|
1176
|
-
ExtractConfig,
|
|
1177
|
-
ExtractJobCreateBatch,
|
|
1178
|
-
ExtractMode,
|
|
1179
|
-
ExtractTarget,
|
|
1180
|
-
)
|
|
1290
|
+
from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
|
|
1181
1291
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1182
1292
|
|
|
1183
1293
|
client = AsyncLlamaCloud(
|
|
1184
1294
|
token="YOUR_TOKEN",
|
|
1185
1295
|
)
|
|
1186
1296
|
await client.llama_extract.run_batch_jobs(
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
extraction_mode=ExtractMode.FAST,
|
|
1193
|
-
),
|
|
1297
|
+
extraction_agent_id="string",
|
|
1298
|
+
file_ids=[],
|
|
1299
|
+
config_override=ExtractConfig(
|
|
1300
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
1301
|
+
extraction_mode=ExtractMode.FAST,
|
|
1194
1302
|
),
|
|
1195
1303
|
)
|
|
1196
1304
|
"""
|
|
1305
|
+
_request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
|
|
1306
|
+
if data_schema_override is not OMIT:
|
|
1307
|
+
_request["data_schema_override"] = data_schema_override
|
|
1308
|
+
if config_override is not OMIT:
|
|
1309
|
+
_request["config_override"] = config_override
|
|
1197
1310
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1198
1311
|
"POST",
|
|
1199
1312
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
|
|
1200
1313
|
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
1201
|
-
json=jsonable_encoder(
|
|
1314
|
+
json=jsonable_encoder(_request),
|
|
1202
1315
|
headers=self._client_wrapper.get_headers(),
|
|
1203
1316
|
timeout=60,
|
|
1204
1317
|
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
from .extract_agent_create_data_schema import ExtractAgentCreateDataSchema
|
|
4
|
+
from .extract_agent_create_data_schema_zero_value import ExtractAgentCreateDataSchemaZeroValue
|
|
5
|
+
from .extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
|
|
6
|
+
from .extract_agent_update_data_schema_zero_value import ExtractAgentUpdateDataSchemaZeroValue
|
|
7
|
+
from .extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
|
|
8
|
+
from .extract_job_create_batch_data_schema_override_zero_value import ExtractJobCreateBatchDataSchemaOverrideZeroValue
|
|
9
|
+
from .extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
|
|
10
|
+
from .extract_schema_validate_request_data_schema_zero_value import ExtractSchemaValidateRequestDataSchemaZeroValue
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"ExtractAgentCreateDataSchema",
|
|
14
|
+
"ExtractAgentCreateDataSchemaZeroValue",
|
|
15
|
+
"ExtractAgentUpdateDataSchema",
|
|
16
|
+
"ExtractAgentUpdateDataSchemaZeroValue",
|
|
17
|
+
"ExtractJobCreateBatchDataSchemaOverride",
|
|
18
|
+
"ExtractJobCreateBatchDataSchemaOverrideZeroValue",
|
|
19
|
+
"ExtractSchemaValidateRequestDataSchema",
|
|
20
|
+
"ExtractSchemaValidateRequestDataSchemaZeroValue",
|
|
21
|
+
]
|