llama-cloud 0.1.41__py3-none-any.whl → 0.1.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +19 -19
- llama_cloud/resources/__init__.py +6 -0
- llama_cloud/resources/alpha/client.py +14 -30
- llama_cloud/resources/beta/client.py +1045 -59
- llama_cloud/resources/jobs/client.py +0 -8
- llama_cloud/resources/llama_extract/__init__.py +6 -0
- llama_cloud/resources/llama_extract/client.py +825 -941
- llama_cloud/resources/llama_extract/types/__init__.py +6 -0
- llama_cloud/resources/organizations/client.py +18 -4
- llama_cloud/resources/parsing/client.py +56 -0
- llama_cloud/resources/pipelines/client.py +164 -0
- llama_cloud/types/__init__.py +16 -22
- llama_cloud/types/agent_data.py +1 -1
- llama_cloud/types/agent_deployment_summary.py +1 -2
- llama_cloud/types/{prompt_conf.py → api_key.py} +14 -9
- llama_cloud/types/{extract_job_create.py → api_key_query_response.py} +6 -14
- llama_cloud/types/api_key_type.py +17 -0
- llama_cloud/types/delete_response.py +35 -0
- llama_cloud/types/extract_config.py +1 -0
- llama_cloud/types/extract_models.py +4 -0
- llama_cloud/types/extracted_table.py +40 -0
- llama_cloud/types/legacy_parse_job_config.py +3 -0
- llama_cloud/types/llama_parse_parameters.py +7 -0
- llama_cloud/types/organization.py +1 -0
- llama_cloud/types/paginated_response_spreadsheet_job.py +34 -0
- llama_cloud/types/parse_job_config.py +7 -0
- llama_cloud/types/public_model_name.py +4 -0
- llama_cloud/types/quota_configuration_configuration_type.py +4 -0
- llama_cloud/types/spreadsheet_job.py +50 -0
- llama_cloud/types/spreadsheet_parsing_config.py +35 -0
- {llama_cloud-0.1.41.dist-info → llama_cloud-0.1.43.dist-info}/METADATA +1 -1
- {llama_cloud-0.1.41.dist-info → llama_cloud-0.1.43.dist-info}/RECORD +37 -37
- llama_cloud/types/chunk_mode.py +0 -29
- llama_cloud/types/llama_extract_settings.py +0 -67
- llama_cloud/types/multimodal_parse_resolution.py +0 -17
- llama_cloud/types/schema_relax_mode.py +0 -25
- llama_cloud/types/struct_mode.py +0 -33
- llama_cloud/types/struct_parse_conf.py +0 -63
- /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_data_schema_override.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_data_schema_override_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_priority.py +0 -0
- {llama_cloud-0.1.41.dist-info → llama_cloud-0.1.43.dist-info}/LICENSE +0 -0
- {llama_cloud-0.1.41.dist-info → llama_cloud-0.1.43.dist-info}/WHEEL +0 -0
|
@@ -12,19 +12,19 @@ from ...errors.unprocessable_entity_error import UnprocessableEntityError
|
|
|
12
12
|
from ...types.extract_agent import ExtractAgent
|
|
13
13
|
from ...types.extract_config import ExtractConfig
|
|
14
14
|
from ...types.extract_job import ExtractJob
|
|
15
|
-
from ...types.extract_job_create import ExtractJobCreate
|
|
16
15
|
from ...types.extract_resultset import ExtractResultset
|
|
17
16
|
from ...types.extract_run import ExtractRun
|
|
18
17
|
from ...types.extract_schema_generate_response import ExtractSchemaGenerateResponse
|
|
19
18
|
from ...types.extract_schema_validate_response import ExtractSchemaValidateResponse
|
|
20
19
|
from ...types.file_data import FileData
|
|
21
20
|
from ...types.http_validation_error import HttpValidationError
|
|
22
|
-
from ...types.llama_extract_settings import LlamaExtractSettings
|
|
23
21
|
from ...types.paginated_extract_runs_response import PaginatedExtractRunsResponse
|
|
24
22
|
from ...types.webhook_configuration import WebhookConfiguration
|
|
25
23
|
from .types.extract_agent_create_data_schema import ExtractAgentCreateDataSchema
|
|
26
24
|
from .types.extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
|
|
27
25
|
from .types.extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
|
|
26
|
+
from .types.extract_job_create_data_schema_override import ExtractJobCreateDataSchemaOverride
|
|
27
|
+
from .types.extract_job_create_priority import ExtractJobCreatePriority
|
|
28
28
|
from .types.extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
|
|
29
29
|
from .types.extract_stateless_request_data_schema import ExtractStatelessRequestDataSchema
|
|
30
30
|
|
|
@@ -44,39 +44,29 @@ class LlamaExtractClient:
|
|
|
44
44
|
def __init__(self, *, client_wrapper: SyncClientWrapper):
|
|
45
45
|
self._client_wrapper = client_wrapper
|
|
46
46
|
|
|
47
|
-
def
|
|
48
|
-
self,
|
|
49
|
-
*,
|
|
50
|
-
include_default: typing.Optional[bool] = None,
|
|
51
|
-
project_id: typing.Optional[str] = None,
|
|
52
|
-
organization_id: typing.Optional[str] = None,
|
|
53
|
-
) -> typing.List[ExtractAgent]:
|
|
47
|
+
def list_jobs(self, *, extraction_agent_id: str) -> typing.List[ExtractJob]:
|
|
54
48
|
"""
|
|
55
49
|
Parameters:
|
|
56
|
-
-
|
|
57
|
-
|
|
58
|
-
- project_id: typing.Optional[str].
|
|
59
|
-
|
|
60
|
-
- organization_id: typing.Optional[str].
|
|
50
|
+
- extraction_agent_id: str.
|
|
61
51
|
---
|
|
62
52
|
from llama_cloud.client import LlamaCloud
|
|
63
53
|
|
|
64
54
|
client = LlamaCloud(
|
|
65
55
|
token="YOUR_TOKEN",
|
|
66
56
|
)
|
|
67
|
-
client.llama_extract.
|
|
57
|
+
client.llama_extract.list_jobs(
|
|
58
|
+
extraction_agent_id="string",
|
|
59
|
+
)
|
|
68
60
|
"""
|
|
69
61
|
_response = self._client_wrapper.httpx_client.request(
|
|
70
62
|
"GET",
|
|
71
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/
|
|
72
|
-
params=remove_none_from_dict(
|
|
73
|
-
{"include_default": include_default, "project_id": project_id, "organization_id": organization_id}
|
|
74
|
-
),
|
|
63
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
|
|
64
|
+
params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
|
|
75
65
|
headers=self._client_wrapper.get_headers(),
|
|
76
66
|
timeout=60,
|
|
77
67
|
)
|
|
78
68
|
if 200 <= _response.status_code < 300:
|
|
79
|
-
return pydantic.parse_obj_as(typing.List[
|
|
69
|
+
return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
|
|
80
70
|
if _response.status_code == 422:
|
|
81
71
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
82
72
|
try:
|
|
@@ -85,31 +75,38 @@ class LlamaExtractClient:
|
|
|
85
75
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
86
76
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
87
77
|
|
|
88
|
-
def
|
|
78
|
+
def run_job(
|
|
89
79
|
self,
|
|
90
80
|
*,
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
81
|
+
from_ui: typing.Optional[bool] = None,
|
|
82
|
+
priority: typing.Optional[ExtractJobCreatePriority] = OMIT,
|
|
83
|
+
webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]] = OMIT,
|
|
84
|
+
extraction_agent_id: str,
|
|
85
|
+
file_id: str,
|
|
86
|
+
data_schema_override: typing.Optional[ExtractJobCreateDataSchemaOverride] = OMIT,
|
|
87
|
+
config_override: typing.Optional[ExtractConfig] = OMIT,
|
|
88
|
+
) -> ExtractJob:
|
|
97
89
|
"""
|
|
98
90
|
Parameters:
|
|
99
|
-
-
|
|
91
|
+
- from_ui: typing.Optional[bool].
|
|
100
92
|
|
|
101
|
-
-
|
|
93
|
+
- priority: typing.Optional[ExtractJobCreatePriority].
|
|
102
94
|
|
|
103
|
-
-
|
|
95
|
+
- webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]].
|
|
104
96
|
|
|
105
|
-
-
|
|
97
|
+
- extraction_agent_id: str. The id of the extraction agent
|
|
106
98
|
|
|
107
|
-
-
|
|
99
|
+
- file_id: str. The id of the file
|
|
100
|
+
|
|
101
|
+
- data_schema_override: typing.Optional[ExtractJobCreateDataSchemaOverride]. The data schema to override the extraction agent's data schema with
|
|
102
|
+
|
|
103
|
+
- config_override: typing.Optional[ExtractConfig].
|
|
108
104
|
---
|
|
109
105
|
from llama_cloud import (
|
|
110
106
|
DocumentChunkMode,
|
|
111
107
|
ExtractConfig,
|
|
112
108
|
ExtractConfigPriority,
|
|
109
|
+
ExtractJobCreatePriority,
|
|
113
110
|
ExtractMode,
|
|
114
111
|
ExtractModels,
|
|
115
112
|
ExtractTarget,
|
|
@@ -120,9 +117,11 @@ class LlamaExtractClient:
|
|
|
120
117
|
client = LlamaCloud(
|
|
121
118
|
token="YOUR_TOKEN",
|
|
122
119
|
)
|
|
123
|
-
client.llama_extract.
|
|
124
|
-
|
|
125
|
-
|
|
120
|
+
client.llama_extract.run_job(
|
|
121
|
+
priority=ExtractJobCreatePriority.LOW,
|
|
122
|
+
extraction_agent_id="string",
|
|
123
|
+
file_id="string",
|
|
124
|
+
config_override=ExtractConfig(
|
|
126
125
|
priority=ExtractConfigPriority.LOW,
|
|
127
126
|
extraction_target=ExtractTarget.PER_DOC,
|
|
128
127
|
extraction_mode=ExtractMode.FAST,
|
|
@@ -132,16 +131,25 @@ class LlamaExtractClient:
|
|
|
132
131
|
),
|
|
133
132
|
)
|
|
134
133
|
"""
|
|
134
|
+
_request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_id": file_id}
|
|
135
|
+
if priority is not OMIT:
|
|
136
|
+
_request["priority"] = priority
|
|
137
|
+
if webhook_configurations is not OMIT:
|
|
138
|
+
_request["webhook_configurations"] = webhook_configurations
|
|
139
|
+
if data_schema_override is not OMIT:
|
|
140
|
+
_request["data_schema_override"] = data_schema_override
|
|
141
|
+
if config_override is not OMIT:
|
|
142
|
+
_request["config_override"] = config_override
|
|
135
143
|
_response = self._client_wrapper.httpx_client.request(
|
|
136
144
|
"POST",
|
|
137
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/
|
|
138
|
-
params=remove_none_from_dict({"
|
|
139
|
-
json=jsonable_encoder(
|
|
145
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
|
|
146
|
+
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
147
|
+
json=jsonable_encoder(_request),
|
|
140
148
|
headers=self._client_wrapper.get_headers(),
|
|
141
149
|
timeout=60,
|
|
142
150
|
)
|
|
143
151
|
if 200 <= _response.status_code < 300:
|
|
144
|
-
return pydantic.parse_obj_as(
|
|
152
|
+
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
145
153
|
if _response.status_code == 422:
|
|
146
154
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
147
155
|
try:
|
|
@@ -150,34 +158,74 @@ class LlamaExtractClient:
|
|
|
150
158
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
151
159
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
152
160
|
|
|
153
|
-
def
|
|
154
|
-
self, *, data_schema: ExtractSchemaValidateRequestDataSchema
|
|
155
|
-
) -> ExtractSchemaValidateResponse:
|
|
161
|
+
def get_job(self, job_id: str) -> ExtractJob:
|
|
156
162
|
"""
|
|
157
|
-
Validates an extraction agent's schema definition.
|
|
158
|
-
Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
|
|
159
|
-
|
|
160
163
|
Parameters:
|
|
161
|
-
-
|
|
164
|
+
- job_id: str.
|
|
162
165
|
---
|
|
163
166
|
from llama_cloud.client import LlamaCloud
|
|
164
167
|
|
|
165
168
|
client = LlamaCloud(
|
|
166
169
|
token="YOUR_TOKEN",
|
|
167
170
|
)
|
|
168
|
-
client.llama_extract.
|
|
171
|
+
client.llama_extract.get_job(
|
|
172
|
+
job_id="string",
|
|
173
|
+
)
|
|
174
|
+
"""
|
|
175
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
176
|
+
"GET",
|
|
177
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}"),
|
|
178
|
+
headers=self._client_wrapper.get_headers(),
|
|
179
|
+
timeout=60,
|
|
180
|
+
)
|
|
181
|
+
if 200 <= _response.status_code < 300:
|
|
182
|
+
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
183
|
+
if _response.status_code == 422:
|
|
184
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
185
|
+
try:
|
|
186
|
+
_response_json = _response.json()
|
|
187
|
+
except JSONDecodeError:
|
|
188
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
189
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
190
|
+
|
|
191
|
+
def run_job_on_file(
|
|
192
|
+
self,
|
|
193
|
+
*,
|
|
194
|
+
from_ui: typing.Optional[bool] = None,
|
|
195
|
+
extraction_agent_id: str,
|
|
196
|
+
file: typing.IO,
|
|
197
|
+
data_schema_override: typing.Optional[str] = None,
|
|
198
|
+
config_override: typing.Optional[str] = None,
|
|
199
|
+
) -> ExtractJob:
|
|
200
|
+
"""
|
|
201
|
+
Parameters:
|
|
202
|
+
- from_ui: typing.Optional[bool].
|
|
203
|
+
|
|
204
|
+
- extraction_agent_id: str.
|
|
205
|
+
|
|
206
|
+
- file: typing.IO.
|
|
207
|
+
|
|
208
|
+
- data_schema_override: typing.Optional[str].
|
|
209
|
+
|
|
210
|
+
- config_override: typing.Optional[str].
|
|
169
211
|
"""
|
|
170
212
|
_response = self._client_wrapper.httpx_client.request(
|
|
171
213
|
"POST",
|
|
172
|
-
urllib.parse.urljoin(
|
|
173
|
-
|
|
214
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
|
|
215
|
+
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
216
|
+
data=jsonable_encoder(
|
|
217
|
+
{
|
|
218
|
+
"extraction_agent_id": extraction_agent_id,
|
|
219
|
+
"data_schema_override": data_schema_override,
|
|
220
|
+
"config_override": config_override,
|
|
221
|
+
}
|
|
174
222
|
),
|
|
175
|
-
|
|
223
|
+
files={"file": file},
|
|
176
224
|
headers=self._client_wrapper.get_headers(),
|
|
177
225
|
timeout=60,
|
|
178
226
|
)
|
|
179
227
|
if 200 <= _response.status_code < 300:
|
|
180
|
-
return pydantic.parse_obj_as(
|
|
228
|
+
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
181
229
|
if _response.status_code == 422:
|
|
182
230
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
183
231
|
try:
|
|
@@ -186,50 +234,69 @@ class LlamaExtractClient:
|
|
|
186
234
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
187
235
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
188
236
|
|
|
189
|
-
def
|
|
237
|
+
def run_batch_jobs(
|
|
190
238
|
self,
|
|
191
239
|
*,
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
240
|
+
from_ui: typing.Optional[bool] = None,
|
|
241
|
+
extraction_agent_id: str,
|
|
242
|
+
file_ids: typing.List[str],
|
|
243
|
+
data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
|
|
244
|
+
config_override: typing.Optional[ExtractConfig] = OMIT,
|
|
245
|
+
) -> typing.List[ExtractJob]:
|
|
197
246
|
"""
|
|
198
|
-
Generates an extraction agent's schema definition from a file and/or natural language prompt.
|
|
199
|
-
|
|
200
247
|
Parameters:
|
|
201
|
-
-
|
|
248
|
+
- from_ui: typing.Optional[bool].
|
|
202
249
|
|
|
203
|
-
-
|
|
250
|
+
- extraction_agent_id: str. The id of the extraction agent
|
|
204
251
|
|
|
205
|
-
-
|
|
252
|
+
- file_ids: typing.List[str]. The ids of the files
|
|
206
253
|
|
|
207
|
-
-
|
|
254
|
+
- data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
|
|
255
|
+
|
|
256
|
+
- config_override: typing.Optional[ExtractConfig].
|
|
208
257
|
---
|
|
258
|
+
from llama_cloud import (
|
|
259
|
+
DocumentChunkMode,
|
|
260
|
+
ExtractConfig,
|
|
261
|
+
ExtractConfigPriority,
|
|
262
|
+
ExtractMode,
|
|
263
|
+
ExtractModels,
|
|
264
|
+
ExtractTarget,
|
|
265
|
+
PublicModelName,
|
|
266
|
+
)
|
|
209
267
|
from llama_cloud.client import LlamaCloud
|
|
210
268
|
|
|
211
269
|
client = LlamaCloud(
|
|
212
270
|
token="YOUR_TOKEN",
|
|
213
271
|
)
|
|
214
|
-
client.llama_extract.
|
|
272
|
+
client.llama_extract.run_batch_jobs(
|
|
273
|
+
extraction_agent_id="string",
|
|
274
|
+
file_ids=[],
|
|
275
|
+
config_override=ExtractConfig(
|
|
276
|
+
priority=ExtractConfigPriority.LOW,
|
|
277
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
278
|
+
extraction_mode=ExtractMode.FAST,
|
|
279
|
+
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
280
|
+
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
281
|
+
chunk_mode=DocumentChunkMode.PAGE,
|
|
282
|
+
),
|
|
283
|
+
)
|
|
215
284
|
"""
|
|
216
|
-
_request: typing.Dict[str, typing.Any] = {}
|
|
217
|
-
if
|
|
218
|
-
_request["
|
|
219
|
-
if
|
|
220
|
-
_request["
|
|
285
|
+
_request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
|
|
286
|
+
if data_schema_override is not OMIT:
|
|
287
|
+
_request["data_schema_override"] = data_schema_override
|
|
288
|
+
if config_override is not OMIT:
|
|
289
|
+
_request["config_override"] = config_override
|
|
221
290
|
_response = self._client_wrapper.httpx_client.request(
|
|
222
291
|
"POST",
|
|
223
|
-
urllib.parse.urljoin(
|
|
224
|
-
|
|
225
|
-
),
|
|
226
|
-
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
292
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
|
|
293
|
+
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
227
294
|
json=jsonable_encoder(_request),
|
|
228
295
|
headers=self._client_wrapper.get_headers(),
|
|
229
296
|
timeout=60,
|
|
230
297
|
)
|
|
231
298
|
if 200 <= _response.status_code < 300:
|
|
232
|
-
return pydantic.parse_obj_as(
|
|
299
|
+
return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
|
|
233
300
|
if _response.status_code == 422:
|
|
234
301
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
235
302
|
try:
|
|
@@ -238,12 +305,12 @@ class LlamaExtractClient:
|
|
|
238
305
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
239
306
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
240
307
|
|
|
241
|
-
def
|
|
242
|
-
self,
|
|
243
|
-
) ->
|
|
308
|
+
def get_job_result(
|
|
309
|
+
self, job_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
310
|
+
) -> ExtractResultset:
|
|
244
311
|
"""
|
|
245
312
|
Parameters:
|
|
246
|
-
-
|
|
313
|
+
- job_id: str.
|
|
247
314
|
|
|
248
315
|
- project_id: typing.Optional[str].
|
|
249
316
|
|
|
@@ -254,21 +321,19 @@ class LlamaExtractClient:
|
|
|
254
321
|
client = LlamaCloud(
|
|
255
322
|
token="YOUR_TOKEN",
|
|
256
323
|
)
|
|
257
|
-
client.llama_extract.
|
|
258
|
-
|
|
324
|
+
client.llama_extract.get_job_result(
|
|
325
|
+
job_id="string",
|
|
259
326
|
)
|
|
260
327
|
"""
|
|
261
328
|
_response = self._client_wrapper.httpx_client.request(
|
|
262
329
|
"GET",
|
|
263
|
-
urllib.parse.urljoin(
|
|
264
|
-
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/by-name/{name}"
|
|
265
|
-
),
|
|
330
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}/result"),
|
|
266
331
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
267
332
|
headers=self._client_wrapper.get_headers(),
|
|
268
333
|
timeout=60,
|
|
269
334
|
)
|
|
270
335
|
if 200 <= _response.status_code < 300:
|
|
271
|
-
return pydantic.parse_obj_as(
|
|
336
|
+
return pydantic.parse_obj_as(ExtractResultset, _response.json()) # type: ignore
|
|
272
337
|
if _response.status_code == 422:
|
|
273
338
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
274
339
|
try:
|
|
@@ -277,36 +342,35 @@ class LlamaExtractClient:
|
|
|
277
342
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
278
343
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
279
344
|
|
|
280
|
-
def
|
|
281
|
-
self, *,
|
|
282
|
-
) ->
|
|
345
|
+
def list_extract_runs(
|
|
346
|
+
self, *, extraction_agent_id: str, skip: typing.Optional[int] = None, limit: typing.Optional[int] = None
|
|
347
|
+
) -> PaginatedExtractRunsResponse:
|
|
283
348
|
"""
|
|
284
|
-
Get or create a default extraction agent for the current project.
|
|
285
|
-
The default agent has an empty schema and default configuration.
|
|
286
|
-
|
|
287
349
|
Parameters:
|
|
288
|
-
-
|
|
350
|
+
- extraction_agent_id: str.
|
|
289
351
|
|
|
290
|
-
-
|
|
352
|
+
- skip: typing.Optional[int].
|
|
353
|
+
|
|
354
|
+
- limit: typing.Optional[int].
|
|
291
355
|
---
|
|
292
356
|
from llama_cloud.client import LlamaCloud
|
|
293
357
|
|
|
294
358
|
client = LlamaCloud(
|
|
295
359
|
token="YOUR_TOKEN",
|
|
296
360
|
)
|
|
297
|
-
client.llama_extract.
|
|
361
|
+
client.llama_extract.list_extract_runs(
|
|
362
|
+
extraction_agent_id="string",
|
|
363
|
+
)
|
|
298
364
|
"""
|
|
299
365
|
_response = self._client_wrapper.httpx_client.request(
|
|
300
366
|
"GET",
|
|
301
|
-
urllib.parse.urljoin(
|
|
302
|
-
|
|
303
|
-
),
|
|
304
|
-
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
367
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/runs"),
|
|
368
|
+
params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id, "skip": skip, "limit": limit}),
|
|
305
369
|
headers=self._client_wrapper.get_headers(),
|
|
306
370
|
timeout=60,
|
|
307
371
|
)
|
|
308
372
|
if 200 <= _response.status_code < 300:
|
|
309
|
-
return pydantic.parse_obj_as(
|
|
373
|
+
return pydantic.parse_obj_as(PaginatedExtractRunsResponse, _response.json()) # type: ignore
|
|
310
374
|
if _response.status_code == 422:
|
|
311
375
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
312
376
|
try:
|
|
@@ -315,7 +379,7 @@ class LlamaExtractClient:
|
|
|
315
379
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
316
380
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
317
381
|
|
|
318
|
-
def
|
|
382
|
+
def get_latest_run_from_ui(self, *, extraction_agent_id: str) -> typing.Optional[ExtractRun]:
|
|
319
383
|
"""
|
|
320
384
|
Parameters:
|
|
321
385
|
- extraction_agent_id: str.
|
|
@@ -325,20 +389,19 @@ class LlamaExtractClient:
|
|
|
325
389
|
client = LlamaCloud(
|
|
326
390
|
token="YOUR_TOKEN",
|
|
327
391
|
)
|
|
328
|
-
client.llama_extract.
|
|
392
|
+
client.llama_extract.get_latest_run_from_ui(
|
|
329
393
|
extraction_agent_id="string",
|
|
330
394
|
)
|
|
331
395
|
"""
|
|
332
396
|
_response = self._client_wrapper.httpx_client.request(
|
|
333
397
|
"GET",
|
|
334
|
-
urllib.parse.urljoin(
|
|
335
|
-
|
|
336
|
-
),
|
|
398
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/runs/latest-from-ui"),
|
|
399
|
+
params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
|
|
337
400
|
headers=self._client_wrapper.get_headers(),
|
|
338
401
|
timeout=60,
|
|
339
402
|
)
|
|
340
403
|
if 200 <= _response.status_code < 300:
|
|
341
|
-
return pydantic.parse_obj_as(
|
|
404
|
+
return pydantic.parse_obj_as(typing.Optional[ExtractRun], _response.json()) # type: ignore
|
|
342
405
|
if _response.status_code == 422:
|
|
343
406
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
344
407
|
try:
|
|
@@ -347,54 +410,35 @@ class LlamaExtractClient:
|
|
|
347
410
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
348
411
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
349
412
|
|
|
350
|
-
def
|
|
351
|
-
self,
|
|
352
|
-
) ->
|
|
413
|
+
def get_run_by_job_id(
|
|
414
|
+
self, job_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
415
|
+
) -> ExtractRun:
|
|
353
416
|
"""
|
|
354
417
|
Parameters:
|
|
355
|
-
-
|
|
418
|
+
- job_id: str.
|
|
356
419
|
|
|
357
|
-
-
|
|
420
|
+
- project_id: typing.Optional[str].
|
|
358
421
|
|
|
359
|
-
-
|
|
422
|
+
- organization_id: typing.Optional[str].
|
|
360
423
|
---
|
|
361
|
-
from llama_cloud import (
|
|
362
|
-
DocumentChunkMode,
|
|
363
|
-
ExtractConfig,
|
|
364
|
-
ExtractConfigPriority,
|
|
365
|
-
ExtractMode,
|
|
366
|
-
ExtractModels,
|
|
367
|
-
ExtractTarget,
|
|
368
|
-
PublicModelName,
|
|
369
|
-
)
|
|
370
424
|
from llama_cloud.client import LlamaCloud
|
|
371
425
|
|
|
372
426
|
client = LlamaCloud(
|
|
373
427
|
token="YOUR_TOKEN",
|
|
374
428
|
)
|
|
375
|
-
client.llama_extract.
|
|
376
|
-
|
|
377
|
-
config=ExtractConfig(
|
|
378
|
-
priority=ExtractConfigPriority.LOW,
|
|
379
|
-
extraction_target=ExtractTarget.PER_DOC,
|
|
380
|
-
extraction_mode=ExtractMode.FAST,
|
|
381
|
-
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
382
|
-
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
383
|
-
chunk_mode=DocumentChunkMode.PAGE,
|
|
384
|
-
),
|
|
429
|
+
client.llama_extract.get_run_by_job_id(
|
|
430
|
+
job_id="string",
|
|
385
431
|
)
|
|
386
432
|
"""
|
|
387
433
|
_response = self._client_wrapper.httpx_client.request(
|
|
388
|
-
"
|
|
389
|
-
urllib.parse.urljoin(
|
|
390
|
-
|
|
391
|
-
),
|
|
392
|
-
json=jsonable_encoder({"data_schema": data_schema, "config": config}),
|
|
434
|
+
"GET",
|
|
435
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/by-job/{job_id}"),
|
|
436
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
393
437
|
headers=self._client_wrapper.get_headers(),
|
|
394
438
|
timeout=60,
|
|
395
439
|
)
|
|
396
440
|
if 200 <= _response.status_code < 300:
|
|
397
|
-
return pydantic.parse_obj_as(
|
|
441
|
+
return pydantic.parse_obj_as(ExtractRun, _response.json()) # type: ignore
|
|
398
442
|
if _response.status_code == 422:
|
|
399
443
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
400
444
|
try:
|
|
@@ -403,30 +447,35 @@ class LlamaExtractClient:
|
|
|
403
447
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
404
448
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
405
449
|
|
|
406
|
-
def
|
|
450
|
+
def get_run(
|
|
451
|
+
self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
452
|
+
) -> ExtractRun:
|
|
407
453
|
"""
|
|
408
454
|
Parameters:
|
|
409
|
-
-
|
|
455
|
+
- run_id: str.
|
|
456
|
+
|
|
457
|
+
- project_id: typing.Optional[str].
|
|
458
|
+
|
|
459
|
+
- organization_id: typing.Optional[str].
|
|
410
460
|
---
|
|
411
461
|
from llama_cloud.client import LlamaCloud
|
|
412
462
|
|
|
413
463
|
client = LlamaCloud(
|
|
414
464
|
token="YOUR_TOKEN",
|
|
415
465
|
)
|
|
416
|
-
client.llama_extract.
|
|
417
|
-
|
|
466
|
+
client.llama_extract.get_run(
|
|
467
|
+
run_id="string",
|
|
418
468
|
)
|
|
419
469
|
"""
|
|
420
470
|
_response = self._client_wrapper.httpx_client.request(
|
|
421
|
-
"
|
|
422
|
-
urllib.parse.urljoin(
|
|
423
|
-
|
|
424
|
-
),
|
|
471
|
+
"GET",
|
|
472
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/{run_id}"),
|
|
473
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
425
474
|
headers=self._client_wrapper.get_headers(),
|
|
426
475
|
timeout=60,
|
|
427
476
|
)
|
|
428
477
|
if 200 <= _response.status_code < 300:
|
|
429
|
-
return pydantic.parse_obj_as(
|
|
478
|
+
return pydantic.parse_obj_as(ExtractRun, _response.json()) # type: ignore
|
|
430
479
|
if _response.status_code == 422:
|
|
431
480
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
432
481
|
try:
|
|
@@ -435,29 +484,35 @@ class LlamaExtractClient:
|
|
|
435
484
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
436
485
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
437
486
|
|
|
438
|
-
def
|
|
487
|
+
def delete_extraction_run(
|
|
488
|
+
self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
489
|
+
) -> typing.Any:
|
|
439
490
|
"""
|
|
440
491
|
Parameters:
|
|
441
|
-
-
|
|
492
|
+
- run_id: str.
|
|
493
|
+
|
|
494
|
+
- project_id: typing.Optional[str].
|
|
495
|
+
|
|
496
|
+
- organization_id: typing.Optional[str].
|
|
442
497
|
---
|
|
443
498
|
from llama_cloud.client import LlamaCloud
|
|
444
499
|
|
|
445
500
|
client = LlamaCloud(
|
|
446
501
|
token="YOUR_TOKEN",
|
|
447
502
|
)
|
|
448
|
-
client.llama_extract.
|
|
449
|
-
|
|
503
|
+
client.llama_extract.delete_extraction_run(
|
|
504
|
+
run_id="string",
|
|
450
505
|
)
|
|
451
506
|
"""
|
|
452
507
|
_response = self._client_wrapper.httpx_client.request(
|
|
453
|
-
"
|
|
454
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/
|
|
455
|
-
params=remove_none_from_dict({"
|
|
508
|
+
"DELETE",
|
|
509
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/{run_id}"),
|
|
510
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
456
511
|
headers=self._client_wrapper.get_headers(),
|
|
457
512
|
timeout=60,
|
|
458
513
|
)
|
|
459
514
|
if 200 <= _response.status_code < 300:
|
|
460
|
-
return pydantic.parse_obj_as(typing.
|
|
515
|
+
return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
|
|
461
516
|
if _response.status_code == 422:
|
|
462
517
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
463
518
|
try:
|
|
@@ -466,22 +521,47 @@ class LlamaExtractClient:
|
|
|
466
521
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
467
522
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
468
523
|
|
|
469
|
-
def
|
|
524
|
+
def extract_stateless(
|
|
525
|
+
self,
|
|
526
|
+
*,
|
|
527
|
+
project_id: typing.Optional[str] = None,
|
|
528
|
+
organization_id: typing.Optional[str] = None,
|
|
529
|
+
webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]] = OMIT,
|
|
530
|
+
data_schema: ExtractStatelessRequestDataSchema,
|
|
531
|
+
config: ExtractConfig,
|
|
532
|
+
file_id: typing.Optional[str] = OMIT,
|
|
533
|
+
text: typing.Optional[str] = OMIT,
|
|
534
|
+
file: typing.Optional[FileData] = OMIT,
|
|
535
|
+
) -> ExtractJob:
|
|
470
536
|
"""
|
|
537
|
+
Stateless extraction endpoint that uses a default extraction agent in the user's default project.
|
|
538
|
+
Requires data_schema, config, and either file_id, text, or base64 encoded file data.
|
|
539
|
+
|
|
471
540
|
Parameters:
|
|
472
|
-
-
|
|
541
|
+
- project_id: typing.Optional[str].
|
|
542
|
+
|
|
543
|
+
- organization_id: typing.Optional[str].
|
|
544
|
+
|
|
545
|
+
- webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]].
|
|
546
|
+
|
|
547
|
+
- data_schema: ExtractStatelessRequestDataSchema. The schema of the data to extract
|
|
548
|
+
|
|
549
|
+
- config: ExtractConfig. The configuration parameters for the extraction
|
|
473
550
|
|
|
474
|
-
-
|
|
551
|
+
- file_id: typing.Optional[str].
|
|
552
|
+
|
|
553
|
+
- text: typing.Optional[str].
|
|
554
|
+
|
|
555
|
+
- file: typing.Optional[FileData].
|
|
475
556
|
---
|
|
476
557
|
from llama_cloud import (
|
|
477
558
|
DocumentChunkMode,
|
|
478
559
|
ExtractConfig,
|
|
479
560
|
ExtractConfigPriority,
|
|
480
|
-
ExtractJobCreate,
|
|
481
|
-
ExtractJobCreatePriority,
|
|
482
561
|
ExtractMode,
|
|
483
562
|
ExtractModels,
|
|
484
563
|
ExtractTarget,
|
|
564
|
+
FileData,
|
|
485
565
|
PublicModelName,
|
|
486
566
|
)
|
|
487
567
|
from llama_cloud.client import LlamaCloud
|
|
@@ -489,27 +569,35 @@ class LlamaExtractClient:
|
|
|
489
569
|
client = LlamaCloud(
|
|
490
570
|
token="YOUR_TOKEN",
|
|
491
571
|
)
|
|
492
|
-
client.llama_extract.
|
|
493
|
-
|
|
494
|
-
priority=
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
),
|
|
572
|
+
client.llama_extract.extract_stateless(
|
|
573
|
+
config=ExtractConfig(
|
|
574
|
+
priority=ExtractConfigPriority.LOW,
|
|
575
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
576
|
+
extraction_mode=ExtractMode.FAST,
|
|
577
|
+
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
578
|
+
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
579
|
+
chunk_mode=DocumentChunkMode.PAGE,
|
|
580
|
+
),
|
|
581
|
+
file=FileData(
|
|
582
|
+
data="string",
|
|
583
|
+
mime_type="string",
|
|
505
584
|
),
|
|
506
585
|
)
|
|
507
586
|
"""
|
|
587
|
+
_request: typing.Dict[str, typing.Any] = {"data_schema": data_schema, "config": config}
|
|
588
|
+
if webhook_configurations is not OMIT:
|
|
589
|
+
_request["webhook_configurations"] = webhook_configurations
|
|
590
|
+
if file_id is not OMIT:
|
|
591
|
+
_request["file_id"] = file_id
|
|
592
|
+
if text is not OMIT:
|
|
593
|
+
_request["text"] = text
|
|
594
|
+
if file is not OMIT:
|
|
595
|
+
_request["file"] = file
|
|
508
596
|
_response = self._client_wrapper.httpx_client.request(
|
|
509
597
|
"POST",
|
|
510
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/
|
|
511
|
-
params=remove_none_from_dict({"
|
|
512
|
-
json=jsonable_encoder(
|
|
598
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/run"),
|
|
599
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
600
|
+
json=jsonable_encoder(_request),
|
|
513
601
|
headers=self._client_wrapper.get_headers(),
|
|
514
602
|
timeout=60,
|
|
515
603
|
)
|
|
@@ -523,28 +611,39 @@ class LlamaExtractClient:
|
|
|
523
611
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
524
612
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
525
613
|
|
|
526
|
-
def
|
|
614
|
+
def list_extraction_agents(
|
|
615
|
+
self,
|
|
616
|
+
*,
|
|
617
|
+
include_default: typing.Optional[bool] = None,
|
|
618
|
+
project_id: typing.Optional[str] = None,
|
|
619
|
+
organization_id: typing.Optional[str] = None,
|
|
620
|
+
) -> typing.List[ExtractAgent]:
|
|
527
621
|
"""
|
|
528
622
|
Parameters:
|
|
529
|
-
-
|
|
623
|
+
- include_default: typing.Optional[bool]. Whether to include default agents in the results
|
|
624
|
+
|
|
625
|
+
- project_id: typing.Optional[str].
|
|
626
|
+
|
|
627
|
+
- organization_id: typing.Optional[str].
|
|
530
628
|
---
|
|
531
629
|
from llama_cloud.client import LlamaCloud
|
|
532
630
|
|
|
533
631
|
client = LlamaCloud(
|
|
534
632
|
token="YOUR_TOKEN",
|
|
535
633
|
)
|
|
536
|
-
client.llama_extract.
|
|
537
|
-
job_id="string",
|
|
538
|
-
)
|
|
634
|
+
client.llama_extract.list_extraction_agents()
|
|
539
635
|
"""
|
|
540
636
|
_response = self._client_wrapper.httpx_client.request(
|
|
541
637
|
"GET",
|
|
542
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/",
|
|
638
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
|
|
639
|
+
params=remove_none_from_dict(
|
|
640
|
+
{"include_default": include_default, "project_id": project_id, "organization_id": organization_id}
|
|
641
|
+
),
|
|
543
642
|
headers=self._client_wrapper.get_headers(),
|
|
544
643
|
timeout=60,
|
|
545
644
|
)
|
|
546
645
|
if 200 <= _response.status_code < 300:
|
|
547
|
-
return pydantic.parse_obj_as(
|
|
646
|
+
return pydantic.parse_obj_as(typing.List[ExtractAgent], _response.json()) # type: ignore
|
|
548
647
|
if _response.status_code == 422:
|
|
549
648
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
550
649
|
try:
|
|
@@ -553,37 +652,34 @@ class LlamaExtractClient:
|
|
|
553
652
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
554
653
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
555
654
|
|
|
556
|
-
def
|
|
655
|
+
def create_extraction_agent(
|
|
557
656
|
self,
|
|
558
657
|
*,
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
658
|
+
project_id: typing.Optional[str] = None,
|
|
659
|
+
organization_id: typing.Optional[str] = None,
|
|
660
|
+
name: str,
|
|
661
|
+
data_schema: ExtractAgentCreateDataSchema,
|
|
662
|
+
config: ExtractConfig,
|
|
663
|
+
) -> ExtractAgent:
|
|
563
664
|
"""
|
|
564
665
|
Parameters:
|
|
565
|
-
-
|
|
666
|
+
- project_id: typing.Optional[str].
|
|
667
|
+
|
|
668
|
+
- organization_id: typing.Optional[str].
|
|
566
669
|
|
|
567
|
-
-
|
|
670
|
+
- name: str. The name of the extraction schema
|
|
671
|
+
|
|
672
|
+
- data_schema: ExtractAgentCreateDataSchema. The schema of the data.
|
|
568
673
|
|
|
569
|
-
-
|
|
674
|
+
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
570
675
|
---
|
|
571
676
|
from llama_cloud import (
|
|
572
|
-
ChunkMode,
|
|
573
677
|
DocumentChunkMode,
|
|
574
678
|
ExtractConfig,
|
|
575
679
|
ExtractConfigPriority,
|
|
576
|
-
ExtractJobCreate,
|
|
577
|
-
ExtractJobCreatePriority,
|
|
578
680
|
ExtractMode,
|
|
579
681
|
ExtractModels,
|
|
580
682
|
ExtractTarget,
|
|
581
|
-
FailPageMode,
|
|
582
|
-
LlamaExtractSettings,
|
|
583
|
-
LlamaParseParameters,
|
|
584
|
-
LlamaParseParametersPriority,
|
|
585
|
-
MultimodalParseResolution,
|
|
586
|
-
ParsingMode,
|
|
587
683
|
PublicModelName,
|
|
588
684
|
)
|
|
589
685
|
from llama_cloud.client import LlamaCloud
|
|
@@ -591,44 +687,28 @@ class LlamaExtractClient:
|
|
|
591
687
|
client = LlamaCloud(
|
|
592
688
|
token="YOUR_TOKEN",
|
|
593
689
|
)
|
|
594
|
-
client.llama_extract.
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
604
|
-
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
605
|
-
chunk_mode=DocumentChunkMode.PAGE,
|
|
606
|
-
),
|
|
607
|
-
),
|
|
608
|
-
extract_settings=LlamaExtractSettings(
|
|
609
|
-
chunk_mode=ChunkMode.PAGE,
|
|
610
|
-
llama_parse_params=LlamaParseParameters(
|
|
611
|
-
priority=LlamaParseParametersPriority.LOW,
|
|
612
|
-
parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
|
|
613
|
-
replace_failed_page_mode=FailPageMode.RAW_TEXT,
|
|
614
|
-
),
|
|
615
|
-
multimodal_parse_resolution=MultimodalParseResolution.MEDIUM,
|
|
690
|
+
client.llama_extract.create_extraction_agent(
|
|
691
|
+
name="string",
|
|
692
|
+
config=ExtractConfig(
|
|
693
|
+
priority=ExtractConfigPriority.LOW,
|
|
694
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
695
|
+
extraction_mode=ExtractMode.FAST,
|
|
696
|
+
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
697
|
+
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
698
|
+
chunk_mode=DocumentChunkMode.PAGE,
|
|
616
699
|
),
|
|
617
700
|
)
|
|
618
701
|
"""
|
|
619
|
-
_request: typing.Dict[str, typing.Any] = {"job_create": job_create}
|
|
620
|
-
if extract_settings is not OMIT:
|
|
621
|
-
_request["extract_settings"] = extract_settings
|
|
622
702
|
_response = self._client_wrapper.httpx_client.request(
|
|
623
703
|
"POST",
|
|
624
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/
|
|
625
|
-
params=remove_none_from_dict({"
|
|
626
|
-
json=jsonable_encoder(
|
|
704
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
|
|
705
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
706
|
+
json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
|
|
627
707
|
headers=self._client_wrapper.get_headers(),
|
|
628
708
|
timeout=60,
|
|
629
709
|
)
|
|
630
710
|
if 200 <= _response.status_code < 300:
|
|
631
|
-
return pydantic.parse_obj_as(
|
|
711
|
+
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
632
712
|
if _response.status_code == 422:
|
|
633
713
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
634
714
|
try:
|
|
@@ -637,44 +717,34 @@ class LlamaExtractClient:
|
|
|
637
717
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
638
718
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
639
719
|
|
|
640
|
-
def
|
|
641
|
-
self,
|
|
642
|
-
|
|
643
|
-
from_ui: typing.Optional[bool] = None,
|
|
644
|
-
extraction_agent_id: str,
|
|
645
|
-
file: typing.IO,
|
|
646
|
-
data_schema_override: typing.Optional[str] = None,
|
|
647
|
-
config_override: typing.Optional[str] = None,
|
|
648
|
-
) -> ExtractJob:
|
|
720
|
+
def validate_extraction_schema(
|
|
721
|
+
self, *, data_schema: ExtractSchemaValidateRequestDataSchema
|
|
722
|
+
) -> ExtractSchemaValidateResponse:
|
|
649
723
|
"""
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
- extraction_agent_id: str.
|
|
654
|
-
|
|
655
|
-
- file: typing.IO.
|
|
724
|
+
Validates an extraction agent's schema definition.
|
|
725
|
+
Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
|
|
656
726
|
|
|
657
|
-
|
|
727
|
+
Parameters:
|
|
728
|
+
- data_schema: ExtractSchemaValidateRequestDataSchema.
|
|
729
|
+
---
|
|
730
|
+
from llama_cloud.client import LlamaCloud
|
|
658
731
|
|
|
659
|
-
|
|
732
|
+
client = LlamaCloud(
|
|
733
|
+
token="YOUR_TOKEN",
|
|
734
|
+
)
|
|
735
|
+
client.llama_extract.validate_extraction_schema()
|
|
660
736
|
"""
|
|
661
737
|
_response = self._client_wrapper.httpx_client.request(
|
|
662
738
|
"POST",
|
|
663
|
-
urllib.parse.urljoin(
|
|
664
|
-
|
|
665
|
-
data=jsonable_encoder(
|
|
666
|
-
{
|
|
667
|
-
"extraction_agent_id": extraction_agent_id,
|
|
668
|
-
"data_schema_override": data_schema_override,
|
|
669
|
-
"config_override": config_override,
|
|
670
|
-
}
|
|
739
|
+
urllib.parse.urljoin(
|
|
740
|
+
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
|
|
671
741
|
),
|
|
672
|
-
|
|
742
|
+
json=jsonable_encoder({"data_schema": data_schema}),
|
|
673
743
|
headers=self._client_wrapper.get_headers(),
|
|
674
744
|
timeout=60,
|
|
675
745
|
)
|
|
676
746
|
if 200 <= _response.status_code < 300:
|
|
677
|
-
return pydantic.parse_obj_as(
|
|
747
|
+
return pydantic.parse_obj_as(ExtractSchemaValidateResponse, _response.json()) # type: ignore
|
|
678
748
|
if _response.status_code == 422:
|
|
679
749
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
680
750
|
try:
|
|
@@ -683,69 +753,50 @@ class LlamaExtractClient:
|
|
|
683
753
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
684
754
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
685
755
|
|
|
686
|
-
def
|
|
756
|
+
def generate_extraction_schema(
|
|
687
757
|
self,
|
|
688
758
|
*,
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
) -> typing.List[ExtractJob]:
|
|
759
|
+
project_id: typing.Optional[str] = None,
|
|
760
|
+
organization_id: typing.Optional[str] = None,
|
|
761
|
+
prompt: typing.Optional[str] = OMIT,
|
|
762
|
+
file_id: typing.Optional[str] = OMIT,
|
|
763
|
+
) -> ExtractSchemaGenerateResponse:
|
|
695
764
|
"""
|
|
696
|
-
|
|
697
|
-
- from_ui: typing.Optional[bool].
|
|
765
|
+
Generates an extraction agent's schema definition from a file and/or natural language prompt.
|
|
698
766
|
|
|
699
|
-
|
|
767
|
+
Parameters:
|
|
768
|
+
- project_id: typing.Optional[str].
|
|
700
769
|
|
|
701
|
-
-
|
|
770
|
+
- organization_id: typing.Optional[str].
|
|
702
771
|
|
|
703
|
-
-
|
|
772
|
+
- prompt: typing.Optional[str].
|
|
704
773
|
|
|
705
|
-
-
|
|
774
|
+
- file_id: typing.Optional[str].
|
|
706
775
|
---
|
|
707
|
-
from llama_cloud import (
|
|
708
|
-
DocumentChunkMode,
|
|
709
|
-
ExtractConfig,
|
|
710
|
-
ExtractConfigPriority,
|
|
711
|
-
ExtractMode,
|
|
712
|
-
ExtractModels,
|
|
713
|
-
ExtractTarget,
|
|
714
|
-
PublicModelName,
|
|
715
|
-
)
|
|
716
776
|
from llama_cloud.client import LlamaCloud
|
|
717
777
|
|
|
718
778
|
client = LlamaCloud(
|
|
719
779
|
token="YOUR_TOKEN",
|
|
720
780
|
)
|
|
721
|
-
client.llama_extract.
|
|
722
|
-
extraction_agent_id="string",
|
|
723
|
-
file_ids=[],
|
|
724
|
-
config_override=ExtractConfig(
|
|
725
|
-
priority=ExtractConfigPriority.LOW,
|
|
726
|
-
extraction_target=ExtractTarget.PER_DOC,
|
|
727
|
-
extraction_mode=ExtractMode.FAST,
|
|
728
|
-
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
729
|
-
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
730
|
-
chunk_mode=DocumentChunkMode.PAGE,
|
|
731
|
-
),
|
|
732
|
-
)
|
|
781
|
+
client.llama_extract.generate_extraction_schema()
|
|
733
782
|
"""
|
|
734
|
-
_request: typing.Dict[str, typing.Any] = {
|
|
735
|
-
if
|
|
736
|
-
_request["
|
|
737
|
-
if
|
|
738
|
-
_request["
|
|
783
|
+
_request: typing.Dict[str, typing.Any] = {}
|
|
784
|
+
if prompt is not OMIT:
|
|
785
|
+
_request["prompt"] = prompt
|
|
786
|
+
if file_id is not OMIT:
|
|
787
|
+
_request["file_id"] = file_id
|
|
739
788
|
_response = self._client_wrapper.httpx_client.request(
|
|
740
789
|
"POST",
|
|
741
|
-
urllib.parse.urljoin(
|
|
742
|
-
|
|
790
|
+
urllib.parse.urljoin(
|
|
791
|
+
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/generate"
|
|
792
|
+
),
|
|
793
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
743
794
|
json=jsonable_encoder(_request),
|
|
744
795
|
headers=self._client_wrapper.get_headers(),
|
|
745
796
|
timeout=60,
|
|
746
797
|
)
|
|
747
798
|
if 200 <= _response.status_code < 300:
|
|
748
|
-
return pydantic.parse_obj_as(
|
|
799
|
+
return pydantic.parse_obj_as(ExtractSchemaGenerateResponse, _response.json()) # type: ignore
|
|
749
800
|
if _response.status_code == 422:
|
|
750
801
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
751
802
|
try:
|
|
@@ -754,12 +805,12 @@ class LlamaExtractClient:
|
|
|
754
805
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
755
806
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
756
807
|
|
|
757
|
-
def
|
|
758
|
-
self,
|
|
759
|
-
) ->
|
|
808
|
+
def get_extraction_agent_by_name(
|
|
809
|
+
self, name: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
810
|
+
) -> ExtractAgent:
|
|
760
811
|
"""
|
|
761
812
|
Parameters:
|
|
762
|
-
-
|
|
813
|
+
- name: str.
|
|
763
814
|
|
|
764
815
|
- project_id: typing.Optional[str].
|
|
765
816
|
|
|
@@ -770,19 +821,21 @@ class LlamaExtractClient:
|
|
|
770
821
|
client = LlamaCloud(
|
|
771
822
|
token="YOUR_TOKEN",
|
|
772
823
|
)
|
|
773
|
-
client.llama_extract.
|
|
774
|
-
|
|
824
|
+
client.llama_extract.get_extraction_agent_by_name(
|
|
825
|
+
name="string",
|
|
775
826
|
)
|
|
776
827
|
"""
|
|
777
828
|
_response = self._client_wrapper.httpx_client.request(
|
|
778
829
|
"GET",
|
|
779
|
-
urllib.parse.urljoin(
|
|
830
|
+
urllib.parse.urljoin(
|
|
831
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/by-name/{name}"
|
|
832
|
+
),
|
|
780
833
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
781
834
|
headers=self._client_wrapper.get_headers(),
|
|
782
835
|
timeout=60,
|
|
783
836
|
)
|
|
784
837
|
if 200 <= _response.status_code < 300:
|
|
785
|
-
return pydantic.parse_obj_as(
|
|
838
|
+
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
786
839
|
if _response.status_code == 422:
|
|
787
840
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
788
841
|
try:
|
|
@@ -791,35 +844,36 @@ class LlamaExtractClient:
|
|
|
791
844
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
792
845
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
793
846
|
|
|
794
|
-
def
|
|
795
|
-
self, *,
|
|
796
|
-
) ->
|
|
847
|
+
def get_or_create_default_extraction_agent(
|
|
848
|
+
self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
849
|
+
) -> ExtractAgent:
|
|
797
850
|
"""
|
|
798
|
-
|
|
799
|
-
|
|
851
|
+
Get or create a default extraction agent for the current project.
|
|
852
|
+
The default agent has an empty schema and default configuration.
|
|
800
853
|
|
|
801
|
-
|
|
854
|
+
Parameters:
|
|
855
|
+
- project_id: typing.Optional[str].
|
|
802
856
|
|
|
803
|
-
-
|
|
857
|
+
- organization_id: typing.Optional[str].
|
|
804
858
|
---
|
|
805
859
|
from llama_cloud.client import LlamaCloud
|
|
806
860
|
|
|
807
861
|
client = LlamaCloud(
|
|
808
862
|
token="YOUR_TOKEN",
|
|
809
863
|
)
|
|
810
|
-
client.llama_extract.
|
|
811
|
-
extraction_agent_id="string",
|
|
812
|
-
)
|
|
864
|
+
client.llama_extract.get_or_create_default_extraction_agent()
|
|
813
865
|
"""
|
|
814
866
|
_response = self._client_wrapper.httpx_client.request(
|
|
815
867
|
"GET",
|
|
816
|
-
urllib.parse.urljoin(
|
|
817
|
-
|
|
868
|
+
urllib.parse.urljoin(
|
|
869
|
+
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/default"
|
|
870
|
+
),
|
|
871
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
818
872
|
headers=self._client_wrapper.get_headers(),
|
|
819
873
|
timeout=60,
|
|
820
874
|
)
|
|
821
875
|
if 200 <= _response.status_code < 300:
|
|
822
|
-
return pydantic.parse_obj_as(
|
|
876
|
+
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
823
877
|
if _response.status_code == 422:
|
|
824
878
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
825
879
|
try:
|
|
@@ -828,7 +882,7 @@ class LlamaExtractClient:
|
|
|
828
882
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
829
883
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
830
884
|
|
|
831
|
-
def
|
|
885
|
+
def get_extraction_agent(self, extraction_agent_id: str) -> ExtractAgent:
|
|
832
886
|
"""
|
|
833
887
|
Parameters:
|
|
834
888
|
- extraction_agent_id: str.
|
|
@@ -838,19 +892,20 @@ class LlamaExtractClient:
|
|
|
838
892
|
client = LlamaCloud(
|
|
839
893
|
token="YOUR_TOKEN",
|
|
840
894
|
)
|
|
841
|
-
client.llama_extract.
|
|
895
|
+
client.llama_extract.get_extraction_agent(
|
|
842
896
|
extraction_agent_id="string",
|
|
843
897
|
)
|
|
844
898
|
"""
|
|
845
899
|
_response = self._client_wrapper.httpx_client.request(
|
|
846
900
|
"GET",
|
|
847
|
-
urllib.parse.urljoin(
|
|
848
|
-
|
|
901
|
+
urllib.parse.urljoin(
|
|
902
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
|
|
903
|
+
),
|
|
849
904
|
headers=self._client_wrapper.get_headers(),
|
|
850
905
|
timeout=60,
|
|
851
906
|
)
|
|
852
907
|
if 200 <= _response.status_code < 300:
|
|
853
|
-
return pydantic.parse_obj_as(
|
|
908
|
+
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
854
909
|
if _response.status_code == 422:
|
|
855
910
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
856
911
|
try:
|
|
@@ -859,35 +914,54 @@ class LlamaExtractClient:
|
|
|
859
914
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
860
915
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
861
916
|
|
|
862
|
-
def
|
|
863
|
-
self,
|
|
864
|
-
) ->
|
|
917
|
+
def update_extraction_agent(
|
|
918
|
+
self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
|
|
919
|
+
) -> ExtractAgent:
|
|
865
920
|
"""
|
|
866
921
|
Parameters:
|
|
867
|
-
-
|
|
922
|
+
- extraction_agent_id: str.
|
|
868
923
|
|
|
869
|
-
-
|
|
924
|
+
- data_schema: ExtractAgentUpdateDataSchema. The schema of the data
|
|
870
925
|
|
|
871
|
-
-
|
|
926
|
+
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
872
927
|
---
|
|
928
|
+
from llama_cloud import (
|
|
929
|
+
DocumentChunkMode,
|
|
930
|
+
ExtractConfig,
|
|
931
|
+
ExtractConfigPriority,
|
|
932
|
+
ExtractMode,
|
|
933
|
+
ExtractModels,
|
|
934
|
+
ExtractTarget,
|
|
935
|
+
PublicModelName,
|
|
936
|
+
)
|
|
873
937
|
from llama_cloud.client import LlamaCloud
|
|
874
938
|
|
|
875
939
|
client = LlamaCloud(
|
|
876
940
|
token="YOUR_TOKEN",
|
|
877
941
|
)
|
|
878
|
-
client.llama_extract.
|
|
879
|
-
|
|
942
|
+
client.llama_extract.update_extraction_agent(
|
|
943
|
+
extraction_agent_id="string",
|
|
944
|
+
config=ExtractConfig(
|
|
945
|
+
priority=ExtractConfigPriority.LOW,
|
|
946
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
947
|
+
extraction_mode=ExtractMode.FAST,
|
|
948
|
+
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
949
|
+
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
950
|
+
chunk_mode=DocumentChunkMode.PAGE,
|
|
951
|
+
),
|
|
880
952
|
)
|
|
881
953
|
"""
|
|
882
954
|
_response = self._client_wrapper.httpx_client.request(
|
|
883
|
-
"
|
|
884
|
-
urllib.parse.urljoin(
|
|
885
|
-
|
|
955
|
+
"PUT",
|
|
956
|
+
urllib.parse.urljoin(
|
|
957
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
|
|
958
|
+
),
|
|
959
|
+
json=jsonable_encoder({"data_schema": data_schema, "config": config}),
|
|
886
960
|
headers=self._client_wrapper.get_headers(),
|
|
887
961
|
timeout=60,
|
|
888
962
|
)
|
|
889
963
|
if 200 <= _response.status_code < 300:
|
|
890
|
-
return pydantic.parse_obj_as(
|
|
964
|
+
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
891
965
|
if _response.status_code == 422:
|
|
892
966
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
893
967
|
try:
|
|
@@ -896,35 +970,30 @@ class LlamaExtractClient:
|
|
|
896
970
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
897
971
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
898
972
|
|
|
899
|
-
def
|
|
900
|
-
self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
901
|
-
) -> ExtractRun:
|
|
973
|
+
def delete_extraction_agent(self, extraction_agent_id: str) -> typing.Any:
|
|
902
974
|
"""
|
|
903
975
|
Parameters:
|
|
904
|
-
-
|
|
905
|
-
|
|
906
|
-
- project_id: typing.Optional[str].
|
|
907
|
-
|
|
908
|
-
- organization_id: typing.Optional[str].
|
|
976
|
+
- extraction_agent_id: str.
|
|
909
977
|
---
|
|
910
978
|
from llama_cloud.client import LlamaCloud
|
|
911
979
|
|
|
912
980
|
client = LlamaCloud(
|
|
913
981
|
token="YOUR_TOKEN",
|
|
914
982
|
)
|
|
915
|
-
client.llama_extract.
|
|
916
|
-
|
|
983
|
+
client.llama_extract.delete_extraction_agent(
|
|
984
|
+
extraction_agent_id="string",
|
|
917
985
|
)
|
|
918
986
|
"""
|
|
919
987
|
_response = self._client_wrapper.httpx_client.request(
|
|
920
|
-
"
|
|
921
|
-
urllib.parse.urljoin(
|
|
922
|
-
|
|
988
|
+
"DELETE",
|
|
989
|
+
urllib.parse.urljoin(
|
|
990
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
|
|
991
|
+
),
|
|
923
992
|
headers=self._client_wrapper.get_headers(),
|
|
924
993
|
timeout=60,
|
|
925
994
|
)
|
|
926
995
|
if 200 <= _response.status_code < 300:
|
|
927
|
-
return pydantic.parse_obj_as(
|
|
996
|
+
return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
|
|
928
997
|
if _response.status_code == 422:
|
|
929
998
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
930
999
|
try:
|
|
@@ -933,35 +1002,34 @@ class LlamaExtractClient:
|
|
|
933
1002
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
934
1003
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
935
1004
|
|
|
936
|
-
def delete_extraction_run(
|
|
937
|
-
self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
938
|
-
) -> typing.Any:
|
|
939
|
-
"""
|
|
940
|
-
Parameters:
|
|
941
|
-
- run_id: str.
|
|
942
1005
|
|
|
943
|
-
|
|
1006
|
+
class AsyncLlamaExtractClient:
|
|
1007
|
+
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
1008
|
+
self._client_wrapper = client_wrapper
|
|
944
1009
|
|
|
945
|
-
|
|
1010
|
+
async def list_jobs(self, *, extraction_agent_id: str) -> typing.List[ExtractJob]:
|
|
1011
|
+
"""
|
|
1012
|
+
Parameters:
|
|
1013
|
+
- extraction_agent_id: str.
|
|
946
1014
|
---
|
|
947
|
-
from llama_cloud.client import
|
|
1015
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
948
1016
|
|
|
949
|
-
client =
|
|
1017
|
+
client = AsyncLlamaCloud(
|
|
950
1018
|
token="YOUR_TOKEN",
|
|
951
1019
|
)
|
|
952
|
-
client.llama_extract.
|
|
953
|
-
|
|
1020
|
+
await client.llama_extract.list_jobs(
|
|
1021
|
+
extraction_agent_id="string",
|
|
954
1022
|
)
|
|
955
1023
|
"""
|
|
956
|
-
_response = self._client_wrapper.httpx_client.request(
|
|
957
|
-
"
|
|
958
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/",
|
|
959
|
-
params=remove_none_from_dict({"
|
|
1024
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1025
|
+
"GET",
|
|
1026
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
|
|
1027
|
+
params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
|
|
960
1028
|
headers=self._client_wrapper.get_headers(),
|
|
961
1029
|
timeout=60,
|
|
962
1030
|
)
|
|
963
1031
|
if 200 <= _response.status_code < 300:
|
|
964
|
-
return pydantic.parse_obj_as(typing.
|
|
1032
|
+
return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
|
|
965
1033
|
if _response.status_code == 422:
|
|
966
1034
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
967
1035
|
try:
|
|
@@ -970,56 +1038,53 @@ class LlamaExtractClient:
|
|
|
970
1038
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
971
1039
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
972
1040
|
|
|
973
|
-
def
|
|
1041
|
+
async def run_job(
|
|
974
1042
|
self,
|
|
975
1043
|
*,
|
|
976
|
-
|
|
977
|
-
|
|
1044
|
+
from_ui: typing.Optional[bool] = None,
|
|
1045
|
+
priority: typing.Optional[ExtractJobCreatePriority] = OMIT,
|
|
978
1046
|
webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]] = OMIT,
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
file: typing.Optional[FileData] = OMIT,
|
|
1047
|
+
extraction_agent_id: str,
|
|
1048
|
+
file_id: str,
|
|
1049
|
+
data_schema_override: typing.Optional[ExtractJobCreateDataSchemaOverride] = OMIT,
|
|
1050
|
+
config_override: typing.Optional[ExtractConfig] = OMIT,
|
|
984
1051
|
) -> ExtractJob:
|
|
985
1052
|
"""
|
|
986
|
-
Stateless extraction endpoint that uses a default extraction agent in the user's default project.
|
|
987
|
-
Requires data_schema, config, and either file_id, text, or base64 encoded file data.
|
|
988
|
-
|
|
989
1053
|
Parameters:
|
|
990
|
-
-
|
|
1054
|
+
- from_ui: typing.Optional[bool].
|
|
991
1055
|
|
|
992
|
-
-
|
|
1056
|
+
- priority: typing.Optional[ExtractJobCreatePriority].
|
|
993
1057
|
|
|
994
1058
|
- webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]].
|
|
995
1059
|
|
|
996
|
-
-
|
|
997
|
-
|
|
998
|
-
- config: ExtractConfig. The configuration parameters for the extraction
|
|
1060
|
+
- extraction_agent_id: str. The id of the extraction agent
|
|
999
1061
|
|
|
1000
|
-
- file_id:
|
|
1062
|
+
- file_id: str. The id of the file
|
|
1001
1063
|
|
|
1002
|
-
-
|
|
1064
|
+
- data_schema_override: typing.Optional[ExtractJobCreateDataSchemaOverride]. The data schema to override the extraction agent's data schema with
|
|
1003
1065
|
|
|
1004
|
-
-
|
|
1066
|
+
- config_override: typing.Optional[ExtractConfig].
|
|
1005
1067
|
---
|
|
1006
1068
|
from llama_cloud import (
|
|
1007
1069
|
DocumentChunkMode,
|
|
1008
1070
|
ExtractConfig,
|
|
1009
1071
|
ExtractConfigPriority,
|
|
1072
|
+
ExtractJobCreatePriority,
|
|
1010
1073
|
ExtractMode,
|
|
1011
1074
|
ExtractModels,
|
|
1012
1075
|
ExtractTarget,
|
|
1013
|
-
FileData,
|
|
1014
1076
|
PublicModelName,
|
|
1015
1077
|
)
|
|
1016
|
-
from llama_cloud.client import
|
|
1078
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
1017
1079
|
|
|
1018
|
-
client =
|
|
1080
|
+
client = AsyncLlamaCloud(
|
|
1019
1081
|
token="YOUR_TOKEN",
|
|
1020
1082
|
)
|
|
1021
|
-
client.llama_extract.
|
|
1022
|
-
|
|
1083
|
+
await client.llama_extract.run_job(
|
|
1084
|
+
priority=ExtractJobCreatePriority.LOW,
|
|
1085
|
+
extraction_agent_id="string",
|
|
1086
|
+
file_id="string",
|
|
1087
|
+
config_override=ExtractConfig(
|
|
1023
1088
|
priority=ExtractConfigPriority.LOW,
|
|
1024
1089
|
extraction_target=ExtractTarget.PER_DOC,
|
|
1025
1090
|
extraction_mode=ExtractMode.FAST,
|
|
@@ -1027,25 +1092,21 @@ class LlamaExtractClient:
|
|
|
1027
1092
|
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
1028
1093
|
chunk_mode=DocumentChunkMode.PAGE,
|
|
1029
1094
|
),
|
|
1030
|
-
file=FileData(
|
|
1031
|
-
data="string",
|
|
1032
|
-
mime_type="string",
|
|
1033
|
-
),
|
|
1034
1095
|
)
|
|
1035
1096
|
"""
|
|
1036
|
-
_request: typing.Dict[str, typing.Any] = {"
|
|
1097
|
+
_request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_id": file_id}
|
|
1098
|
+
if priority is not OMIT:
|
|
1099
|
+
_request["priority"] = priority
|
|
1037
1100
|
if webhook_configurations is not OMIT:
|
|
1038
1101
|
_request["webhook_configurations"] = webhook_configurations
|
|
1039
|
-
if
|
|
1040
|
-
_request["
|
|
1041
|
-
if
|
|
1042
|
-
_request["
|
|
1043
|
-
|
|
1044
|
-
_request["file"] = file
|
|
1045
|
-
_response = self._client_wrapper.httpx_client.request(
|
|
1102
|
+
if data_schema_override is not OMIT:
|
|
1103
|
+
_request["data_schema_override"] = data_schema_override
|
|
1104
|
+
if config_override is not OMIT:
|
|
1105
|
+
_request["config_override"] = config_override
|
|
1106
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1046
1107
|
"POST",
|
|
1047
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/
|
|
1048
|
-
params=remove_none_from_dict({"
|
|
1108
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
|
|
1109
|
+
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
1049
1110
|
json=jsonable_encoder(_request),
|
|
1050
1111
|
headers=self._client_wrapper.get_headers(),
|
|
1051
1112
|
timeout=60,
|
|
@@ -1060,44 +1121,74 @@ class LlamaExtractClient:
|
|
|
1060
1121
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1061
1122
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1062
1123
|
|
|
1063
|
-
|
|
1064
|
-
class AsyncLlamaExtractClient:
|
|
1065
|
-
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
1066
|
-
self._client_wrapper = client_wrapper
|
|
1067
|
-
|
|
1068
|
-
async def list_extraction_agents(
|
|
1069
|
-
self,
|
|
1070
|
-
*,
|
|
1071
|
-
include_default: typing.Optional[bool] = None,
|
|
1072
|
-
project_id: typing.Optional[str] = None,
|
|
1073
|
-
organization_id: typing.Optional[str] = None,
|
|
1074
|
-
) -> typing.List[ExtractAgent]:
|
|
1124
|
+
async def get_job(self, job_id: str) -> ExtractJob:
|
|
1075
1125
|
"""
|
|
1076
1126
|
Parameters:
|
|
1077
|
-
-
|
|
1078
|
-
|
|
1079
|
-
- project_id: typing.Optional[str].
|
|
1080
|
-
|
|
1081
|
-
- organization_id: typing.Optional[str].
|
|
1127
|
+
- job_id: str.
|
|
1082
1128
|
---
|
|
1083
1129
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1084
1130
|
|
|
1085
1131
|
client = AsyncLlamaCloud(
|
|
1086
1132
|
token="YOUR_TOKEN",
|
|
1087
1133
|
)
|
|
1088
|
-
await client.llama_extract.
|
|
1134
|
+
await client.llama_extract.get_job(
|
|
1135
|
+
job_id="string",
|
|
1136
|
+
)
|
|
1089
1137
|
"""
|
|
1090
1138
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1091
1139
|
"GET",
|
|
1092
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/
|
|
1093
|
-
|
|
1094
|
-
|
|
1140
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}"),
|
|
1141
|
+
headers=self._client_wrapper.get_headers(),
|
|
1142
|
+
timeout=60,
|
|
1143
|
+
)
|
|
1144
|
+
if 200 <= _response.status_code < 300:
|
|
1145
|
+
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
1146
|
+
if _response.status_code == 422:
|
|
1147
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1148
|
+
try:
|
|
1149
|
+
_response_json = _response.json()
|
|
1150
|
+
except JSONDecodeError:
|
|
1151
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1152
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1153
|
+
|
|
1154
|
+
async def run_job_on_file(
|
|
1155
|
+
self,
|
|
1156
|
+
*,
|
|
1157
|
+
from_ui: typing.Optional[bool] = None,
|
|
1158
|
+
extraction_agent_id: str,
|
|
1159
|
+
file: typing.IO,
|
|
1160
|
+
data_schema_override: typing.Optional[str] = None,
|
|
1161
|
+
config_override: typing.Optional[str] = None,
|
|
1162
|
+
) -> ExtractJob:
|
|
1163
|
+
"""
|
|
1164
|
+
Parameters:
|
|
1165
|
+
- from_ui: typing.Optional[bool].
|
|
1166
|
+
|
|
1167
|
+
- extraction_agent_id: str.
|
|
1168
|
+
|
|
1169
|
+
- file: typing.IO.
|
|
1170
|
+
|
|
1171
|
+
- data_schema_override: typing.Optional[str].
|
|
1172
|
+
|
|
1173
|
+
- config_override: typing.Optional[str].
|
|
1174
|
+
"""
|
|
1175
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1176
|
+
"POST",
|
|
1177
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
|
|
1178
|
+
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
1179
|
+
data=jsonable_encoder(
|
|
1180
|
+
{
|
|
1181
|
+
"extraction_agent_id": extraction_agent_id,
|
|
1182
|
+
"data_schema_override": data_schema_override,
|
|
1183
|
+
"config_override": config_override,
|
|
1184
|
+
}
|
|
1095
1185
|
),
|
|
1186
|
+
files={"file": file},
|
|
1096
1187
|
headers=self._client_wrapper.get_headers(),
|
|
1097
1188
|
timeout=60,
|
|
1098
1189
|
)
|
|
1099
1190
|
if 200 <= _response.status_code < 300:
|
|
1100
|
-
return pydantic.parse_obj_as(
|
|
1191
|
+
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
1101
1192
|
if _response.status_code == 422:
|
|
1102
1193
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1103
1194
|
try:
|
|
@@ -1106,26 +1197,26 @@ class AsyncLlamaExtractClient:
|
|
|
1106
1197
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1107
1198
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1108
1199
|
|
|
1109
|
-
async def
|
|
1200
|
+
async def run_batch_jobs(
|
|
1110
1201
|
self,
|
|
1111
1202
|
*,
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
) ->
|
|
1203
|
+
from_ui: typing.Optional[bool] = None,
|
|
1204
|
+
extraction_agent_id: str,
|
|
1205
|
+
file_ids: typing.List[str],
|
|
1206
|
+
data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
|
|
1207
|
+
config_override: typing.Optional[ExtractConfig] = OMIT,
|
|
1208
|
+
) -> typing.List[ExtractJob]:
|
|
1118
1209
|
"""
|
|
1119
1210
|
Parameters:
|
|
1120
|
-
-
|
|
1211
|
+
- from_ui: typing.Optional[bool].
|
|
1121
1212
|
|
|
1122
|
-
-
|
|
1213
|
+
- extraction_agent_id: str. The id of the extraction agent
|
|
1123
1214
|
|
|
1124
|
-
-
|
|
1215
|
+
- file_ids: typing.List[str]. The ids of the files
|
|
1125
1216
|
|
|
1126
|
-
-
|
|
1217
|
+
- data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
|
|
1127
1218
|
|
|
1128
|
-
-
|
|
1219
|
+
- config_override: typing.Optional[ExtractConfig].
|
|
1129
1220
|
---
|
|
1130
1221
|
from llama_cloud import (
|
|
1131
1222
|
DocumentChunkMode,
|
|
@@ -1141,9 +1232,10 @@ class AsyncLlamaExtractClient:
|
|
|
1141
1232
|
client = AsyncLlamaCloud(
|
|
1142
1233
|
token="YOUR_TOKEN",
|
|
1143
1234
|
)
|
|
1144
|
-
await client.llama_extract.
|
|
1145
|
-
|
|
1146
|
-
|
|
1235
|
+
await client.llama_extract.run_batch_jobs(
|
|
1236
|
+
extraction_agent_id="string",
|
|
1237
|
+
file_ids=[],
|
|
1238
|
+
config_override=ExtractConfig(
|
|
1147
1239
|
priority=ExtractConfigPriority.LOW,
|
|
1148
1240
|
extraction_target=ExtractTarget.PER_DOC,
|
|
1149
1241
|
extraction_mode=ExtractMode.FAST,
|
|
@@ -1153,16 +1245,21 @@ class AsyncLlamaExtractClient:
|
|
|
1153
1245
|
),
|
|
1154
1246
|
)
|
|
1155
1247
|
"""
|
|
1248
|
+
_request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
|
|
1249
|
+
if data_schema_override is not OMIT:
|
|
1250
|
+
_request["data_schema_override"] = data_schema_override
|
|
1251
|
+
if config_override is not OMIT:
|
|
1252
|
+
_request["config_override"] = config_override
|
|
1156
1253
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1157
1254
|
"POST",
|
|
1158
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/
|
|
1159
|
-
params=remove_none_from_dict({"
|
|
1160
|
-
json=jsonable_encoder(
|
|
1255
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
|
|
1256
|
+
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
1257
|
+
json=jsonable_encoder(_request),
|
|
1161
1258
|
headers=self._client_wrapper.get_headers(),
|
|
1162
1259
|
timeout=60,
|
|
1163
1260
|
)
|
|
1164
1261
|
if 200 <= _response.status_code < 300:
|
|
1165
|
-
return pydantic.parse_obj_as(
|
|
1262
|
+
return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
|
|
1166
1263
|
if _response.status_code == 422:
|
|
1167
1264
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1168
1265
|
try:
|
|
@@ -1171,34 +1268,35 @@ class AsyncLlamaExtractClient:
|
|
|
1171
1268
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1172
1269
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1173
1270
|
|
|
1174
|
-
async def
|
|
1175
|
-
self, *,
|
|
1176
|
-
) ->
|
|
1271
|
+
async def get_job_result(
|
|
1272
|
+
self, job_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
1273
|
+
) -> ExtractResultset:
|
|
1177
1274
|
"""
|
|
1178
|
-
Validates an extraction agent's schema definition.
|
|
1179
|
-
Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
|
|
1180
|
-
|
|
1181
1275
|
Parameters:
|
|
1182
|
-
-
|
|
1276
|
+
- job_id: str.
|
|
1277
|
+
|
|
1278
|
+
- project_id: typing.Optional[str].
|
|
1279
|
+
|
|
1280
|
+
- organization_id: typing.Optional[str].
|
|
1183
1281
|
---
|
|
1184
1282
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1185
1283
|
|
|
1186
1284
|
client = AsyncLlamaCloud(
|
|
1187
1285
|
token="YOUR_TOKEN",
|
|
1188
1286
|
)
|
|
1189
|
-
await client.llama_extract.
|
|
1287
|
+
await client.llama_extract.get_job_result(
|
|
1288
|
+
job_id="string",
|
|
1289
|
+
)
|
|
1190
1290
|
"""
|
|
1191
1291
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1192
|
-
"
|
|
1193
|
-
urllib.parse.urljoin(
|
|
1194
|
-
|
|
1195
|
-
),
|
|
1196
|
-
json=jsonable_encoder({"data_schema": data_schema}),
|
|
1292
|
+
"GET",
|
|
1293
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}/result"),
|
|
1294
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1197
1295
|
headers=self._client_wrapper.get_headers(),
|
|
1198
1296
|
timeout=60,
|
|
1199
1297
|
)
|
|
1200
1298
|
if 200 <= _response.status_code < 300:
|
|
1201
|
-
return pydantic.parse_obj_as(
|
|
1299
|
+
return pydantic.parse_obj_as(ExtractResultset, _response.json()) # type: ignore
|
|
1202
1300
|
if _response.status_code == 422:
|
|
1203
1301
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1204
1302
|
try:
|
|
@@ -1207,50 +1305,35 @@ class AsyncLlamaExtractClient:
|
|
|
1207
1305
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1208
1306
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1209
1307
|
|
|
1210
|
-
async def
|
|
1211
|
-
self,
|
|
1212
|
-
|
|
1213
|
-
project_id: typing.Optional[str] = None,
|
|
1214
|
-
organization_id: typing.Optional[str] = None,
|
|
1215
|
-
prompt: typing.Optional[str] = OMIT,
|
|
1216
|
-
file_id: typing.Optional[str] = OMIT,
|
|
1217
|
-
) -> ExtractSchemaGenerateResponse:
|
|
1308
|
+
async def list_extract_runs(
|
|
1309
|
+
self, *, extraction_agent_id: str, skip: typing.Optional[int] = None, limit: typing.Optional[int] = None
|
|
1310
|
+
) -> PaginatedExtractRunsResponse:
|
|
1218
1311
|
"""
|
|
1219
|
-
Generates an extraction agent's schema definition from a file and/or natural language prompt.
|
|
1220
|
-
|
|
1221
1312
|
Parameters:
|
|
1222
|
-
-
|
|
1223
|
-
|
|
1224
|
-
- organization_id: typing.Optional[str].
|
|
1313
|
+
- extraction_agent_id: str.
|
|
1225
1314
|
|
|
1226
|
-
-
|
|
1315
|
+
- skip: typing.Optional[int].
|
|
1227
1316
|
|
|
1228
|
-
-
|
|
1317
|
+
- limit: typing.Optional[int].
|
|
1229
1318
|
---
|
|
1230
1319
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1231
1320
|
|
|
1232
1321
|
client = AsyncLlamaCloud(
|
|
1233
1322
|
token="YOUR_TOKEN",
|
|
1234
1323
|
)
|
|
1235
|
-
await client.llama_extract.
|
|
1324
|
+
await client.llama_extract.list_extract_runs(
|
|
1325
|
+
extraction_agent_id="string",
|
|
1326
|
+
)
|
|
1236
1327
|
"""
|
|
1237
|
-
_request: typing.Dict[str, typing.Any] = {}
|
|
1238
|
-
if prompt is not OMIT:
|
|
1239
|
-
_request["prompt"] = prompt
|
|
1240
|
-
if file_id is not OMIT:
|
|
1241
|
-
_request["file_id"] = file_id
|
|
1242
1328
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1243
|
-
"
|
|
1244
|
-
urllib.parse.urljoin(
|
|
1245
|
-
|
|
1246
|
-
),
|
|
1247
|
-
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1248
|
-
json=jsonable_encoder(_request),
|
|
1329
|
+
"GET",
|
|
1330
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/runs"),
|
|
1331
|
+
params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id, "skip": skip, "limit": limit}),
|
|
1249
1332
|
headers=self._client_wrapper.get_headers(),
|
|
1250
1333
|
timeout=60,
|
|
1251
1334
|
)
|
|
1252
1335
|
if 200 <= _response.status_code < 300:
|
|
1253
|
-
return pydantic.parse_obj_as(
|
|
1336
|
+
return pydantic.parse_obj_as(PaginatedExtractRunsResponse, _response.json()) # type: ignore
|
|
1254
1337
|
if _response.status_code == 422:
|
|
1255
1338
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1256
1339
|
try:
|
|
@@ -1259,37 +1342,29 @@ class AsyncLlamaExtractClient:
|
|
|
1259
1342
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1260
1343
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1261
1344
|
|
|
1262
|
-
async def
|
|
1263
|
-
self, name: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
1264
|
-
) -> ExtractAgent:
|
|
1345
|
+
async def get_latest_run_from_ui(self, *, extraction_agent_id: str) -> typing.Optional[ExtractRun]:
|
|
1265
1346
|
"""
|
|
1266
1347
|
Parameters:
|
|
1267
|
-
-
|
|
1268
|
-
|
|
1269
|
-
- project_id: typing.Optional[str].
|
|
1270
|
-
|
|
1271
|
-
- organization_id: typing.Optional[str].
|
|
1348
|
+
- extraction_agent_id: str.
|
|
1272
1349
|
---
|
|
1273
1350
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1274
1351
|
|
|
1275
1352
|
client = AsyncLlamaCloud(
|
|
1276
1353
|
token="YOUR_TOKEN",
|
|
1277
1354
|
)
|
|
1278
|
-
await client.llama_extract.
|
|
1279
|
-
|
|
1355
|
+
await client.llama_extract.get_latest_run_from_ui(
|
|
1356
|
+
extraction_agent_id="string",
|
|
1280
1357
|
)
|
|
1281
1358
|
"""
|
|
1282
1359
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1283
1360
|
"GET",
|
|
1284
|
-
urllib.parse.urljoin(
|
|
1285
|
-
|
|
1286
|
-
),
|
|
1287
|
-
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1361
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/runs/latest-from-ui"),
|
|
1362
|
+
params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
|
|
1288
1363
|
headers=self._client_wrapper.get_headers(),
|
|
1289
1364
|
timeout=60,
|
|
1290
1365
|
)
|
|
1291
1366
|
if 200 <= _response.status_code < 300:
|
|
1292
|
-
return pydantic.parse_obj_as(
|
|
1367
|
+
return pydantic.parse_obj_as(typing.Optional[ExtractRun], _response.json()) # type: ignore
|
|
1293
1368
|
if _response.status_code == 422:
|
|
1294
1369
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1295
1370
|
try:
|
|
@@ -1298,14 +1373,13 @@ class AsyncLlamaExtractClient:
|
|
|
1298
1373
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1299
1374
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1300
1375
|
|
|
1301
|
-
async def
|
|
1302
|
-
self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
1303
|
-
) ->
|
|
1376
|
+
async def get_run_by_job_id(
|
|
1377
|
+
self, job_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
1378
|
+
) -> ExtractRun:
|
|
1304
1379
|
"""
|
|
1305
|
-
Get or create a default extraction agent for the current project.
|
|
1306
|
-
The default agent has an empty schema and default configuration.
|
|
1307
|
-
|
|
1308
1380
|
Parameters:
|
|
1381
|
+
- job_id: str.
|
|
1382
|
+
|
|
1309
1383
|
- project_id: typing.Optional[str].
|
|
1310
1384
|
|
|
1311
1385
|
- organization_id: typing.Optional[str].
|
|
@@ -1315,19 +1389,19 @@ class AsyncLlamaExtractClient:
|
|
|
1315
1389
|
client = AsyncLlamaCloud(
|
|
1316
1390
|
token="YOUR_TOKEN",
|
|
1317
1391
|
)
|
|
1318
|
-
await client.llama_extract.
|
|
1392
|
+
await client.llama_extract.get_run_by_job_id(
|
|
1393
|
+
job_id="string",
|
|
1394
|
+
)
|
|
1319
1395
|
"""
|
|
1320
1396
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1321
1397
|
"GET",
|
|
1322
|
-
urllib.parse.urljoin(
|
|
1323
|
-
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/default"
|
|
1324
|
-
),
|
|
1398
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/by-job/{job_id}"),
|
|
1325
1399
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1326
1400
|
headers=self._client_wrapper.get_headers(),
|
|
1327
1401
|
timeout=60,
|
|
1328
1402
|
)
|
|
1329
1403
|
if 200 <= _response.status_code < 300:
|
|
1330
|
-
return pydantic.parse_obj_as(
|
|
1404
|
+
return pydantic.parse_obj_as(ExtractRun, _response.json()) # type: ignore
|
|
1331
1405
|
if _response.status_code == 422:
|
|
1332
1406
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1333
1407
|
try:
|
|
@@ -1336,30 +1410,35 @@ class AsyncLlamaExtractClient:
|
|
|
1336
1410
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1337
1411
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1338
1412
|
|
|
1339
|
-
async def
|
|
1413
|
+
async def get_run(
|
|
1414
|
+
self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
1415
|
+
) -> ExtractRun:
|
|
1340
1416
|
"""
|
|
1341
1417
|
Parameters:
|
|
1342
|
-
-
|
|
1418
|
+
- run_id: str.
|
|
1419
|
+
|
|
1420
|
+
- project_id: typing.Optional[str].
|
|
1421
|
+
|
|
1422
|
+
- organization_id: typing.Optional[str].
|
|
1343
1423
|
---
|
|
1344
1424
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1345
1425
|
|
|
1346
1426
|
client = AsyncLlamaCloud(
|
|
1347
1427
|
token="YOUR_TOKEN",
|
|
1348
1428
|
)
|
|
1349
|
-
await client.llama_extract.
|
|
1350
|
-
|
|
1429
|
+
await client.llama_extract.get_run(
|
|
1430
|
+
run_id="string",
|
|
1351
1431
|
)
|
|
1352
1432
|
"""
|
|
1353
1433
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1354
1434
|
"GET",
|
|
1355
|
-
urllib.parse.urljoin(
|
|
1356
|
-
|
|
1357
|
-
),
|
|
1435
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/{run_id}"),
|
|
1436
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1358
1437
|
headers=self._client_wrapper.get_headers(),
|
|
1359
1438
|
timeout=60,
|
|
1360
1439
|
)
|
|
1361
1440
|
if 200 <= _response.status_code < 300:
|
|
1362
|
-
return pydantic.parse_obj_as(
|
|
1441
|
+
return pydantic.parse_obj_as(ExtractRun, _response.json()) # type: ignore
|
|
1363
1442
|
if _response.status_code == 422:
|
|
1364
1443
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1365
1444
|
try:
|
|
@@ -1368,81 +1447,30 @@ class AsyncLlamaExtractClient:
|
|
|
1368
1447
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1369
1448
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1370
1449
|
|
|
1371
|
-
async def
|
|
1372
|
-
self,
|
|
1373
|
-
) ->
|
|
1450
|
+
async def delete_extraction_run(
|
|
1451
|
+
self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
1452
|
+
) -> typing.Any:
|
|
1374
1453
|
"""
|
|
1375
1454
|
Parameters:
|
|
1376
|
-
-
|
|
1377
|
-
|
|
1378
|
-
- data_schema: ExtractAgentUpdateDataSchema. The schema of the data
|
|
1379
|
-
|
|
1380
|
-
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
1381
|
-
---
|
|
1382
|
-
from llama_cloud import (
|
|
1383
|
-
DocumentChunkMode,
|
|
1384
|
-
ExtractConfig,
|
|
1385
|
-
ExtractConfigPriority,
|
|
1386
|
-
ExtractMode,
|
|
1387
|
-
ExtractModels,
|
|
1388
|
-
ExtractTarget,
|
|
1389
|
-
PublicModelName,
|
|
1390
|
-
)
|
|
1391
|
-
from llama_cloud.client import AsyncLlamaCloud
|
|
1455
|
+
- run_id: str.
|
|
1392
1456
|
|
|
1393
|
-
|
|
1394
|
-
token="YOUR_TOKEN",
|
|
1395
|
-
)
|
|
1396
|
-
await client.llama_extract.update_extraction_agent(
|
|
1397
|
-
extraction_agent_id="string",
|
|
1398
|
-
config=ExtractConfig(
|
|
1399
|
-
priority=ExtractConfigPriority.LOW,
|
|
1400
|
-
extraction_target=ExtractTarget.PER_DOC,
|
|
1401
|
-
extraction_mode=ExtractMode.FAST,
|
|
1402
|
-
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
1403
|
-
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
1404
|
-
chunk_mode=DocumentChunkMode.PAGE,
|
|
1405
|
-
),
|
|
1406
|
-
)
|
|
1407
|
-
"""
|
|
1408
|
-
_response = await self._client_wrapper.httpx_client.request(
|
|
1409
|
-
"PUT",
|
|
1410
|
-
urllib.parse.urljoin(
|
|
1411
|
-
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
|
|
1412
|
-
),
|
|
1413
|
-
json=jsonable_encoder({"data_schema": data_schema, "config": config}),
|
|
1414
|
-
headers=self._client_wrapper.get_headers(),
|
|
1415
|
-
timeout=60,
|
|
1416
|
-
)
|
|
1417
|
-
if 200 <= _response.status_code < 300:
|
|
1418
|
-
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
1419
|
-
if _response.status_code == 422:
|
|
1420
|
-
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1421
|
-
try:
|
|
1422
|
-
_response_json = _response.json()
|
|
1423
|
-
except JSONDecodeError:
|
|
1424
|
-
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1425
|
-
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1457
|
+
- project_id: typing.Optional[str].
|
|
1426
1458
|
|
|
1427
|
-
|
|
1428
|
-
"""
|
|
1429
|
-
Parameters:
|
|
1430
|
-
- extraction_agent_id: str.
|
|
1459
|
+
- organization_id: typing.Optional[str].
|
|
1431
1460
|
---
|
|
1432
1461
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1433
1462
|
|
|
1434
1463
|
client = AsyncLlamaCloud(
|
|
1435
1464
|
token="YOUR_TOKEN",
|
|
1436
1465
|
)
|
|
1437
|
-
await client.llama_extract.
|
|
1438
|
-
|
|
1466
|
+
await client.llama_extract.delete_extraction_run(
|
|
1467
|
+
run_id="string",
|
|
1439
1468
|
)
|
|
1440
1469
|
"""
|
|
1441
1470
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1442
1471
|
"DELETE",
|
|
1443
|
-
urllib.parse.urljoin(
|
|
1444
|
-
|
|
1445
|
-
),
|
|
1472
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/{run_id}"),
|
|
1473
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1446
1474
|
headers=self._client_wrapper.get_headers(),
|
|
1447
1475
|
timeout=60,
|
|
1448
1476
|
)
|
|
@@ -1456,155 +1484,47 @@ class AsyncLlamaExtractClient:
|
|
|
1456
1484
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1457
1485
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1458
1486
|
|
|
1459
|
-
async def
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
)
|
|
1487
|
+
async def extract_stateless(
|
|
1488
|
+
self,
|
|
1489
|
+
*,
|
|
1490
|
+
project_id: typing.Optional[str] = None,
|
|
1491
|
+
organization_id: typing.Optional[str] = None,
|
|
1492
|
+
webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]] = OMIT,
|
|
1493
|
+
data_schema: ExtractStatelessRequestDataSchema,
|
|
1494
|
+
config: ExtractConfig,
|
|
1495
|
+
file_id: typing.Optional[str] = OMIT,
|
|
1496
|
+
text: typing.Optional[str] = OMIT,
|
|
1497
|
+
file: typing.Optional[FileData] = OMIT,
|
|
1498
|
+
) -> ExtractJob:
|
|
1472
1499
|
"""
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
|
|
1476
|
-
params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
|
|
1477
|
-
headers=self._client_wrapper.get_headers(),
|
|
1478
|
-
timeout=60,
|
|
1479
|
-
)
|
|
1480
|
-
if 200 <= _response.status_code < 300:
|
|
1481
|
-
return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
|
|
1482
|
-
if _response.status_code == 422:
|
|
1483
|
-
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1484
|
-
try:
|
|
1485
|
-
_response_json = _response.json()
|
|
1486
|
-
except JSONDecodeError:
|
|
1487
|
-
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1488
|
-
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1500
|
+
Stateless extraction endpoint that uses a default extraction agent in the user's default project.
|
|
1501
|
+
Requires data_schema, config, and either file_id, text, or base64 encoded file data.
|
|
1489
1502
|
|
|
1490
|
-
async def run_job(self, *, from_ui: typing.Optional[bool] = None, request: ExtractJobCreate) -> ExtractJob:
|
|
1491
|
-
"""
|
|
1492
1503
|
Parameters:
|
|
1493
|
-
-
|
|
1504
|
+
- project_id: typing.Optional[str].
|
|
1494
1505
|
|
|
1495
|
-
-
|
|
1496
|
-
---
|
|
1497
|
-
from llama_cloud import (
|
|
1498
|
-
DocumentChunkMode,
|
|
1499
|
-
ExtractConfig,
|
|
1500
|
-
ExtractConfigPriority,
|
|
1501
|
-
ExtractJobCreate,
|
|
1502
|
-
ExtractJobCreatePriority,
|
|
1503
|
-
ExtractMode,
|
|
1504
|
-
ExtractModels,
|
|
1505
|
-
ExtractTarget,
|
|
1506
|
-
PublicModelName,
|
|
1507
|
-
)
|
|
1508
|
-
from llama_cloud.client import AsyncLlamaCloud
|
|
1506
|
+
- organization_id: typing.Optional[str].
|
|
1509
1507
|
|
|
1510
|
-
|
|
1511
|
-
token="YOUR_TOKEN",
|
|
1512
|
-
)
|
|
1513
|
-
await client.llama_extract.run_job(
|
|
1514
|
-
request=ExtractJobCreate(
|
|
1515
|
-
priority=ExtractJobCreatePriority.LOW,
|
|
1516
|
-
extraction_agent_id="string",
|
|
1517
|
-
file_id="string",
|
|
1518
|
-
config_override=ExtractConfig(
|
|
1519
|
-
priority=ExtractConfigPriority.LOW,
|
|
1520
|
-
extraction_target=ExtractTarget.PER_DOC,
|
|
1521
|
-
extraction_mode=ExtractMode.FAST,
|
|
1522
|
-
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
1523
|
-
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
1524
|
-
chunk_mode=DocumentChunkMode.PAGE,
|
|
1525
|
-
),
|
|
1526
|
-
),
|
|
1527
|
-
)
|
|
1528
|
-
"""
|
|
1529
|
-
_response = await self._client_wrapper.httpx_client.request(
|
|
1530
|
-
"POST",
|
|
1531
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
|
|
1532
|
-
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
1533
|
-
json=jsonable_encoder(request),
|
|
1534
|
-
headers=self._client_wrapper.get_headers(),
|
|
1535
|
-
timeout=60,
|
|
1536
|
-
)
|
|
1537
|
-
if 200 <= _response.status_code < 300:
|
|
1538
|
-
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
1539
|
-
if _response.status_code == 422:
|
|
1540
|
-
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1541
|
-
try:
|
|
1542
|
-
_response_json = _response.json()
|
|
1543
|
-
except JSONDecodeError:
|
|
1544
|
-
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1545
|
-
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1508
|
+
- webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]].
|
|
1546
1509
|
|
|
1547
|
-
|
|
1548
|
-
"""
|
|
1549
|
-
Parameters:
|
|
1550
|
-
- job_id: str.
|
|
1551
|
-
---
|
|
1552
|
-
from llama_cloud.client import AsyncLlamaCloud
|
|
1510
|
+
- data_schema: ExtractStatelessRequestDataSchema. The schema of the data to extract
|
|
1553
1511
|
|
|
1554
|
-
|
|
1555
|
-
token="YOUR_TOKEN",
|
|
1556
|
-
)
|
|
1557
|
-
await client.llama_extract.get_job(
|
|
1558
|
-
job_id="string",
|
|
1559
|
-
)
|
|
1560
|
-
"""
|
|
1561
|
-
_response = await self._client_wrapper.httpx_client.request(
|
|
1562
|
-
"GET",
|
|
1563
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}"),
|
|
1564
|
-
headers=self._client_wrapper.get_headers(),
|
|
1565
|
-
timeout=60,
|
|
1566
|
-
)
|
|
1567
|
-
if 200 <= _response.status_code < 300:
|
|
1568
|
-
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
1569
|
-
if _response.status_code == 422:
|
|
1570
|
-
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1571
|
-
try:
|
|
1572
|
-
_response_json = _response.json()
|
|
1573
|
-
except JSONDecodeError:
|
|
1574
|
-
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1575
|
-
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1512
|
+
- config: ExtractConfig. The configuration parameters for the extraction
|
|
1576
1513
|
|
|
1577
|
-
|
|
1578
|
-
self,
|
|
1579
|
-
*,
|
|
1580
|
-
from_ui: typing.Optional[bool] = None,
|
|
1581
|
-
job_create: ExtractJobCreate,
|
|
1582
|
-
extract_settings: typing.Optional[LlamaExtractSettings] = OMIT,
|
|
1583
|
-
) -> ExtractJob:
|
|
1584
|
-
"""
|
|
1585
|
-
Parameters:
|
|
1586
|
-
- from_ui: typing.Optional[bool].
|
|
1514
|
+
- file_id: typing.Optional[str].
|
|
1587
1515
|
|
|
1588
|
-
-
|
|
1516
|
+
- text: typing.Optional[str].
|
|
1589
1517
|
|
|
1590
|
-
-
|
|
1518
|
+
- file: typing.Optional[FileData].
|
|
1591
1519
|
---
|
|
1592
1520
|
from llama_cloud import (
|
|
1593
|
-
ChunkMode,
|
|
1594
1521
|
DocumentChunkMode,
|
|
1595
1522
|
ExtractConfig,
|
|
1596
1523
|
ExtractConfigPriority,
|
|
1597
|
-
ExtractJobCreate,
|
|
1598
|
-
ExtractJobCreatePriority,
|
|
1599
1524
|
ExtractMode,
|
|
1600
1525
|
ExtractModels,
|
|
1601
1526
|
ExtractTarget,
|
|
1602
|
-
|
|
1603
|
-
LlamaExtractSettings,
|
|
1604
|
-
LlamaParseParameters,
|
|
1605
|
-
LlamaParseParametersPriority,
|
|
1606
|
-
MultimodalParseResolution,
|
|
1607
|
-
ParsingMode,
|
|
1527
|
+
FileData,
|
|
1608
1528
|
PublicModelName,
|
|
1609
1529
|
)
|
|
1610
1530
|
from llama_cloud.client import AsyncLlamaCloud
|
|
@@ -1612,38 +1532,34 @@ class AsyncLlamaExtractClient:
|
|
|
1612
1532
|
client = AsyncLlamaCloud(
|
|
1613
1533
|
token="YOUR_TOKEN",
|
|
1614
1534
|
)
|
|
1615
|
-
await client.llama_extract.
|
|
1616
|
-
|
|
1617
|
-
priority=
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
extraction_mode=ExtractMode.FAST,
|
|
1624
|
-
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
1625
|
-
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
1626
|
-
chunk_mode=DocumentChunkMode.PAGE,
|
|
1627
|
-
),
|
|
1535
|
+
await client.llama_extract.extract_stateless(
|
|
1536
|
+
config=ExtractConfig(
|
|
1537
|
+
priority=ExtractConfigPriority.LOW,
|
|
1538
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
1539
|
+
extraction_mode=ExtractMode.FAST,
|
|
1540
|
+
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
1541
|
+
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
1542
|
+
chunk_mode=DocumentChunkMode.PAGE,
|
|
1628
1543
|
),
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
priority=LlamaParseParametersPriority.LOW,
|
|
1633
|
-
parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
|
|
1634
|
-
replace_failed_page_mode=FailPageMode.RAW_TEXT,
|
|
1635
|
-
),
|
|
1636
|
-
multimodal_parse_resolution=MultimodalParseResolution.MEDIUM,
|
|
1544
|
+
file=FileData(
|
|
1545
|
+
data="string",
|
|
1546
|
+
mime_type="string",
|
|
1637
1547
|
),
|
|
1638
1548
|
)
|
|
1639
1549
|
"""
|
|
1640
|
-
_request: typing.Dict[str, typing.Any] = {"
|
|
1641
|
-
if
|
|
1642
|
-
_request["
|
|
1550
|
+
_request: typing.Dict[str, typing.Any] = {"data_schema": data_schema, "config": config}
|
|
1551
|
+
if webhook_configurations is not OMIT:
|
|
1552
|
+
_request["webhook_configurations"] = webhook_configurations
|
|
1553
|
+
if file_id is not OMIT:
|
|
1554
|
+
_request["file_id"] = file_id
|
|
1555
|
+
if text is not OMIT:
|
|
1556
|
+
_request["text"] = text
|
|
1557
|
+
if file is not OMIT:
|
|
1558
|
+
_request["file"] = file
|
|
1643
1559
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1644
1560
|
"POST",
|
|
1645
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/
|
|
1646
|
-
params=remove_none_from_dict({"
|
|
1561
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/run"),
|
|
1562
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1647
1563
|
json=jsonable_encoder(_request),
|
|
1648
1564
|
headers=self._client_wrapper.get_headers(),
|
|
1649
1565
|
timeout=60,
|
|
@@ -1658,115 +1574,39 @@ class AsyncLlamaExtractClient:
|
|
|
1658
1574
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1659
1575
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1660
1576
|
|
|
1661
|
-
async def
|
|
1662
|
-
self,
|
|
1663
|
-
*,
|
|
1664
|
-
from_ui: typing.Optional[bool] = None,
|
|
1665
|
-
extraction_agent_id: str,
|
|
1666
|
-
file: typing.IO,
|
|
1667
|
-
data_schema_override: typing.Optional[str] = None,
|
|
1668
|
-
config_override: typing.Optional[str] = None,
|
|
1669
|
-
) -> ExtractJob:
|
|
1670
|
-
"""
|
|
1671
|
-
Parameters:
|
|
1672
|
-
- from_ui: typing.Optional[bool].
|
|
1673
|
-
|
|
1674
|
-
- extraction_agent_id: str.
|
|
1675
|
-
|
|
1676
|
-
- file: typing.IO.
|
|
1677
|
-
|
|
1678
|
-
- data_schema_override: typing.Optional[str].
|
|
1679
|
-
|
|
1680
|
-
- config_override: typing.Optional[str].
|
|
1681
|
-
"""
|
|
1682
|
-
_response = await self._client_wrapper.httpx_client.request(
|
|
1683
|
-
"POST",
|
|
1684
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
|
|
1685
|
-
params=remove_none_from_dict({"from_ui": from_ui}),
|
|
1686
|
-
data=jsonable_encoder(
|
|
1687
|
-
{
|
|
1688
|
-
"extraction_agent_id": extraction_agent_id,
|
|
1689
|
-
"data_schema_override": data_schema_override,
|
|
1690
|
-
"config_override": config_override,
|
|
1691
|
-
}
|
|
1692
|
-
),
|
|
1693
|
-
files={"file": file},
|
|
1694
|
-
headers=self._client_wrapper.get_headers(),
|
|
1695
|
-
timeout=60,
|
|
1696
|
-
)
|
|
1697
|
-
if 200 <= _response.status_code < 300:
|
|
1698
|
-
return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
|
|
1699
|
-
if _response.status_code == 422:
|
|
1700
|
-
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1701
|
-
try:
|
|
1702
|
-
_response_json = _response.json()
|
|
1703
|
-
except JSONDecodeError:
|
|
1704
|
-
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1705
|
-
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1706
|
-
|
|
1707
|
-
async def run_batch_jobs(
|
|
1577
|
+
async def list_extraction_agents(
|
|
1708
1578
|
self,
|
|
1709
1579
|
*,
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
config_override: typing.Optional[ExtractConfig] = OMIT,
|
|
1715
|
-
) -> typing.List[ExtractJob]:
|
|
1580
|
+
include_default: typing.Optional[bool] = None,
|
|
1581
|
+
project_id: typing.Optional[str] = None,
|
|
1582
|
+
organization_id: typing.Optional[str] = None,
|
|
1583
|
+
) -> typing.List[ExtractAgent]:
|
|
1716
1584
|
"""
|
|
1717
1585
|
Parameters:
|
|
1718
|
-
-
|
|
1719
|
-
|
|
1720
|
-
- extraction_agent_id: str. The id of the extraction agent
|
|
1721
|
-
|
|
1722
|
-
- file_ids: typing.List[str]. The ids of the files
|
|
1586
|
+
- include_default: typing.Optional[bool]. Whether to include default agents in the results
|
|
1723
1587
|
|
|
1724
|
-
-
|
|
1588
|
+
- project_id: typing.Optional[str].
|
|
1725
1589
|
|
|
1726
|
-
-
|
|
1590
|
+
- organization_id: typing.Optional[str].
|
|
1727
1591
|
---
|
|
1728
|
-
from llama_cloud import (
|
|
1729
|
-
DocumentChunkMode,
|
|
1730
|
-
ExtractConfig,
|
|
1731
|
-
ExtractConfigPriority,
|
|
1732
|
-
ExtractMode,
|
|
1733
|
-
ExtractModels,
|
|
1734
|
-
ExtractTarget,
|
|
1735
|
-
PublicModelName,
|
|
1736
|
-
)
|
|
1737
1592
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1738
1593
|
|
|
1739
1594
|
client = AsyncLlamaCloud(
|
|
1740
1595
|
token="YOUR_TOKEN",
|
|
1741
1596
|
)
|
|
1742
|
-
await client.llama_extract.
|
|
1743
|
-
extraction_agent_id="string",
|
|
1744
|
-
file_ids=[],
|
|
1745
|
-
config_override=ExtractConfig(
|
|
1746
|
-
priority=ExtractConfigPriority.LOW,
|
|
1747
|
-
extraction_target=ExtractTarget.PER_DOC,
|
|
1748
|
-
extraction_mode=ExtractMode.FAST,
|
|
1749
|
-
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
1750
|
-
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
1751
|
-
chunk_mode=DocumentChunkMode.PAGE,
|
|
1752
|
-
),
|
|
1753
|
-
)
|
|
1597
|
+
await client.llama_extract.list_extraction_agents()
|
|
1754
1598
|
"""
|
|
1755
|
-
_request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
|
|
1756
|
-
if data_schema_override is not OMIT:
|
|
1757
|
-
_request["data_schema_override"] = data_schema_override
|
|
1758
|
-
if config_override is not OMIT:
|
|
1759
|
-
_request["config_override"] = config_override
|
|
1760
1599
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1761
|
-
"
|
|
1762
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/
|
|
1763
|
-
params=remove_none_from_dict(
|
|
1764
|
-
|
|
1600
|
+
"GET",
|
|
1601
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
|
|
1602
|
+
params=remove_none_from_dict(
|
|
1603
|
+
{"include_default": include_default, "project_id": project_id, "organization_id": organization_id}
|
|
1604
|
+
),
|
|
1765
1605
|
headers=self._client_wrapper.get_headers(),
|
|
1766
1606
|
timeout=60,
|
|
1767
1607
|
)
|
|
1768
1608
|
if 200 <= _response.status_code < 300:
|
|
1769
|
-
return pydantic.parse_obj_as(typing.List[
|
|
1609
|
+
return pydantic.parse_obj_as(typing.List[ExtractAgent], _response.json()) # type: ignore
|
|
1770
1610
|
if _response.status_code == 422:
|
|
1771
1611
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1772
1612
|
try:
|
|
@@ -1775,35 +1615,63 @@ class AsyncLlamaExtractClient:
|
|
|
1775
1615
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1776
1616
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1777
1617
|
|
|
1778
|
-
async def
|
|
1779
|
-
self,
|
|
1780
|
-
|
|
1618
|
+
async def create_extraction_agent(
|
|
1619
|
+
self,
|
|
1620
|
+
*,
|
|
1621
|
+
project_id: typing.Optional[str] = None,
|
|
1622
|
+
organization_id: typing.Optional[str] = None,
|
|
1623
|
+
name: str,
|
|
1624
|
+
data_schema: ExtractAgentCreateDataSchema,
|
|
1625
|
+
config: ExtractConfig,
|
|
1626
|
+
) -> ExtractAgent:
|
|
1781
1627
|
"""
|
|
1782
1628
|
Parameters:
|
|
1783
|
-
- job_id: str.
|
|
1784
|
-
|
|
1785
1629
|
- project_id: typing.Optional[str].
|
|
1786
1630
|
|
|
1787
1631
|
- organization_id: typing.Optional[str].
|
|
1632
|
+
|
|
1633
|
+
- name: str. The name of the extraction schema
|
|
1634
|
+
|
|
1635
|
+
- data_schema: ExtractAgentCreateDataSchema. The schema of the data.
|
|
1636
|
+
|
|
1637
|
+
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
1788
1638
|
---
|
|
1639
|
+
from llama_cloud import (
|
|
1640
|
+
DocumentChunkMode,
|
|
1641
|
+
ExtractConfig,
|
|
1642
|
+
ExtractConfigPriority,
|
|
1643
|
+
ExtractMode,
|
|
1644
|
+
ExtractModels,
|
|
1645
|
+
ExtractTarget,
|
|
1646
|
+
PublicModelName,
|
|
1647
|
+
)
|
|
1789
1648
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1790
1649
|
|
|
1791
1650
|
client = AsyncLlamaCloud(
|
|
1792
1651
|
token="YOUR_TOKEN",
|
|
1793
1652
|
)
|
|
1794
|
-
await client.llama_extract.
|
|
1795
|
-
|
|
1653
|
+
await client.llama_extract.create_extraction_agent(
|
|
1654
|
+
name="string",
|
|
1655
|
+
config=ExtractConfig(
|
|
1656
|
+
priority=ExtractConfigPriority.LOW,
|
|
1657
|
+
extraction_target=ExtractTarget.PER_DOC,
|
|
1658
|
+
extraction_mode=ExtractMode.FAST,
|
|
1659
|
+
parse_model=PublicModelName.OPENAI_GPT_4_O,
|
|
1660
|
+
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
1661
|
+
chunk_mode=DocumentChunkMode.PAGE,
|
|
1662
|
+
),
|
|
1796
1663
|
)
|
|
1797
1664
|
"""
|
|
1798
1665
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1799
|
-
"
|
|
1800
|
-
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/",
|
|
1666
|
+
"POST",
|
|
1667
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
|
|
1801
1668
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1669
|
+
json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
|
|
1802
1670
|
headers=self._client_wrapper.get_headers(),
|
|
1803
1671
|
timeout=60,
|
|
1804
1672
|
)
|
|
1805
1673
|
if 200 <= _response.status_code < 300:
|
|
1806
|
-
return pydantic.parse_obj_as(
|
|
1674
|
+
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
1807
1675
|
if _response.status_code == 422:
|
|
1808
1676
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1809
1677
|
try:
|
|
@@ -1812,35 +1680,34 @@ class AsyncLlamaExtractClient:
|
|
|
1812
1680
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1813
1681
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1814
1682
|
|
|
1815
|
-
async def
|
|
1816
|
-
self, *,
|
|
1817
|
-
) ->
|
|
1683
|
+
async def validate_extraction_schema(
|
|
1684
|
+
self, *, data_schema: ExtractSchemaValidateRequestDataSchema
|
|
1685
|
+
) -> ExtractSchemaValidateResponse:
|
|
1818
1686
|
"""
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
- skip: typing.Optional[int].
|
|
1687
|
+
Validates an extraction agent's schema definition.
|
|
1688
|
+
Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
|
|
1823
1689
|
|
|
1824
|
-
|
|
1690
|
+
Parameters:
|
|
1691
|
+
- data_schema: ExtractSchemaValidateRequestDataSchema.
|
|
1825
1692
|
---
|
|
1826
1693
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1827
1694
|
|
|
1828
1695
|
client = AsyncLlamaCloud(
|
|
1829
1696
|
token="YOUR_TOKEN",
|
|
1830
1697
|
)
|
|
1831
|
-
await client.llama_extract.
|
|
1832
|
-
extraction_agent_id="string",
|
|
1833
|
-
)
|
|
1698
|
+
await client.llama_extract.validate_extraction_schema()
|
|
1834
1699
|
"""
|
|
1835
1700
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1836
|
-
"
|
|
1837
|
-
urllib.parse.urljoin(
|
|
1838
|
-
|
|
1701
|
+
"POST",
|
|
1702
|
+
urllib.parse.urljoin(
|
|
1703
|
+
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
|
|
1704
|
+
),
|
|
1705
|
+
json=jsonable_encoder({"data_schema": data_schema}),
|
|
1839
1706
|
headers=self._client_wrapper.get_headers(),
|
|
1840
1707
|
timeout=60,
|
|
1841
1708
|
)
|
|
1842
1709
|
if 200 <= _response.status_code < 300:
|
|
1843
|
-
return pydantic.parse_obj_as(
|
|
1710
|
+
return pydantic.parse_obj_as(ExtractSchemaValidateResponse, _response.json()) # type: ignore
|
|
1844
1711
|
if _response.status_code == 422:
|
|
1845
1712
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1846
1713
|
try:
|
|
@@ -1849,29 +1716,50 @@ class AsyncLlamaExtractClient:
|
|
|
1849
1716
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1850
1717
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1851
1718
|
|
|
1852
|
-
async def
|
|
1719
|
+
async def generate_extraction_schema(
|
|
1720
|
+
self,
|
|
1721
|
+
*,
|
|
1722
|
+
project_id: typing.Optional[str] = None,
|
|
1723
|
+
organization_id: typing.Optional[str] = None,
|
|
1724
|
+
prompt: typing.Optional[str] = OMIT,
|
|
1725
|
+
file_id: typing.Optional[str] = OMIT,
|
|
1726
|
+
) -> ExtractSchemaGenerateResponse:
|
|
1853
1727
|
"""
|
|
1728
|
+
Generates an extraction agent's schema definition from a file and/or natural language prompt.
|
|
1729
|
+
|
|
1854
1730
|
Parameters:
|
|
1855
|
-
-
|
|
1731
|
+
- project_id: typing.Optional[str].
|
|
1732
|
+
|
|
1733
|
+
- organization_id: typing.Optional[str].
|
|
1734
|
+
|
|
1735
|
+
- prompt: typing.Optional[str].
|
|
1736
|
+
|
|
1737
|
+
- file_id: typing.Optional[str].
|
|
1856
1738
|
---
|
|
1857
1739
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1858
1740
|
|
|
1859
1741
|
client = AsyncLlamaCloud(
|
|
1860
1742
|
token="YOUR_TOKEN",
|
|
1861
1743
|
)
|
|
1862
|
-
await client.llama_extract.
|
|
1863
|
-
extraction_agent_id="string",
|
|
1864
|
-
)
|
|
1744
|
+
await client.llama_extract.generate_extraction_schema()
|
|
1865
1745
|
"""
|
|
1746
|
+
_request: typing.Dict[str, typing.Any] = {}
|
|
1747
|
+
if prompt is not OMIT:
|
|
1748
|
+
_request["prompt"] = prompt
|
|
1749
|
+
if file_id is not OMIT:
|
|
1750
|
+
_request["file_id"] = file_id
|
|
1866
1751
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1867
|
-
"
|
|
1868
|
-
urllib.parse.urljoin(
|
|
1869
|
-
|
|
1752
|
+
"POST",
|
|
1753
|
+
urllib.parse.urljoin(
|
|
1754
|
+
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/generate"
|
|
1755
|
+
),
|
|
1756
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1757
|
+
json=jsonable_encoder(_request),
|
|
1870
1758
|
headers=self._client_wrapper.get_headers(),
|
|
1871
1759
|
timeout=60,
|
|
1872
1760
|
)
|
|
1873
1761
|
if 200 <= _response.status_code < 300:
|
|
1874
|
-
return pydantic.parse_obj_as(
|
|
1762
|
+
return pydantic.parse_obj_as(ExtractSchemaGenerateResponse, _response.json()) # type: ignore
|
|
1875
1763
|
if _response.status_code == 422:
|
|
1876
1764
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1877
1765
|
try:
|
|
@@ -1880,12 +1768,12 @@ class AsyncLlamaExtractClient:
|
|
|
1880
1768
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1881
1769
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1882
1770
|
|
|
1883
|
-
async def
|
|
1884
|
-
self,
|
|
1885
|
-
) ->
|
|
1771
|
+
async def get_extraction_agent_by_name(
|
|
1772
|
+
self, name: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
1773
|
+
) -> ExtractAgent:
|
|
1886
1774
|
"""
|
|
1887
1775
|
Parameters:
|
|
1888
|
-
-
|
|
1776
|
+
- name: str.
|
|
1889
1777
|
|
|
1890
1778
|
- project_id: typing.Optional[str].
|
|
1891
1779
|
|
|
@@ -1896,19 +1784,21 @@ class AsyncLlamaExtractClient:
|
|
|
1896
1784
|
client = AsyncLlamaCloud(
|
|
1897
1785
|
token="YOUR_TOKEN",
|
|
1898
1786
|
)
|
|
1899
|
-
await client.llama_extract.
|
|
1900
|
-
|
|
1787
|
+
await client.llama_extract.get_extraction_agent_by_name(
|
|
1788
|
+
name="string",
|
|
1901
1789
|
)
|
|
1902
1790
|
"""
|
|
1903
1791
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1904
1792
|
"GET",
|
|
1905
|
-
urllib.parse.urljoin(
|
|
1793
|
+
urllib.parse.urljoin(
|
|
1794
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/by-name/{name}"
|
|
1795
|
+
),
|
|
1906
1796
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1907
1797
|
headers=self._client_wrapper.get_headers(),
|
|
1908
1798
|
timeout=60,
|
|
1909
1799
|
)
|
|
1910
1800
|
if 200 <= _response.status_code < 300:
|
|
1911
|
-
return pydantic.parse_obj_as(
|
|
1801
|
+
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
1912
1802
|
if _response.status_code == 422:
|
|
1913
1803
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1914
1804
|
try:
|
|
@@ -1917,13 +1807,14 @@ class AsyncLlamaExtractClient:
|
|
|
1917
1807
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1918
1808
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1919
1809
|
|
|
1920
|
-
async def
|
|
1921
|
-
self,
|
|
1922
|
-
) ->
|
|
1810
|
+
async def get_or_create_default_extraction_agent(
|
|
1811
|
+
self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
1812
|
+
) -> ExtractAgent:
|
|
1923
1813
|
"""
|
|
1924
|
-
|
|
1925
|
-
|
|
1814
|
+
Get or create a default extraction agent for the current project.
|
|
1815
|
+
The default agent has an empty schema and default configuration.
|
|
1926
1816
|
|
|
1817
|
+
Parameters:
|
|
1927
1818
|
- project_id: typing.Optional[str].
|
|
1928
1819
|
|
|
1929
1820
|
- organization_id: typing.Optional[str].
|
|
@@ -1933,19 +1824,19 @@ class AsyncLlamaExtractClient:
|
|
|
1933
1824
|
client = AsyncLlamaCloud(
|
|
1934
1825
|
token="YOUR_TOKEN",
|
|
1935
1826
|
)
|
|
1936
|
-
await client.llama_extract.
|
|
1937
|
-
run_id="string",
|
|
1938
|
-
)
|
|
1827
|
+
await client.llama_extract.get_or_create_default_extraction_agent()
|
|
1939
1828
|
"""
|
|
1940
1829
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1941
1830
|
"GET",
|
|
1942
|
-
urllib.parse.urljoin(
|
|
1831
|
+
urllib.parse.urljoin(
|
|
1832
|
+
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/default"
|
|
1833
|
+
),
|
|
1943
1834
|
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1944
1835
|
headers=self._client_wrapper.get_headers(),
|
|
1945
1836
|
timeout=60,
|
|
1946
1837
|
)
|
|
1947
1838
|
if 200 <= _response.status_code < 300:
|
|
1948
|
-
return pydantic.parse_obj_as(
|
|
1839
|
+
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
1949
1840
|
if _response.status_code == 422:
|
|
1950
1841
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1951
1842
|
try:
|
|
@@ -1954,35 +1845,30 @@ class AsyncLlamaExtractClient:
|
|
|
1954
1845
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1955
1846
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1956
1847
|
|
|
1957
|
-
async def
|
|
1958
|
-
self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
1959
|
-
) -> typing.Any:
|
|
1848
|
+
async def get_extraction_agent(self, extraction_agent_id: str) -> ExtractAgent:
|
|
1960
1849
|
"""
|
|
1961
1850
|
Parameters:
|
|
1962
|
-
-
|
|
1963
|
-
|
|
1964
|
-
- project_id: typing.Optional[str].
|
|
1965
|
-
|
|
1966
|
-
- organization_id: typing.Optional[str].
|
|
1851
|
+
- extraction_agent_id: str.
|
|
1967
1852
|
---
|
|
1968
1853
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1969
1854
|
|
|
1970
1855
|
client = AsyncLlamaCloud(
|
|
1971
1856
|
token="YOUR_TOKEN",
|
|
1972
1857
|
)
|
|
1973
|
-
await client.llama_extract.
|
|
1974
|
-
|
|
1858
|
+
await client.llama_extract.get_extraction_agent(
|
|
1859
|
+
extraction_agent_id="string",
|
|
1975
1860
|
)
|
|
1976
1861
|
"""
|
|
1977
1862
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1978
|
-
"
|
|
1979
|
-
urllib.parse.urljoin(
|
|
1980
|
-
|
|
1863
|
+
"GET",
|
|
1864
|
+
urllib.parse.urljoin(
|
|
1865
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
|
|
1866
|
+
),
|
|
1981
1867
|
headers=self._client_wrapper.get_headers(),
|
|
1982
1868
|
timeout=60,
|
|
1983
1869
|
)
|
|
1984
1870
|
if 200 <= _response.status_code < 300:
|
|
1985
|
-
return pydantic.parse_obj_as(
|
|
1871
|
+
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
1986
1872
|
if _response.status_code == 422:
|
|
1987
1873
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1988
1874
|
try:
|
|
@@ -1991,38 +1877,16 @@ class AsyncLlamaExtractClient:
|
|
|
1991
1877
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1992
1878
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1993
1879
|
|
|
1994
|
-
async def
|
|
1995
|
-
self,
|
|
1996
|
-
|
|
1997
|
-
project_id: typing.Optional[str] = None,
|
|
1998
|
-
organization_id: typing.Optional[str] = None,
|
|
1999
|
-
webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]] = OMIT,
|
|
2000
|
-
data_schema: ExtractStatelessRequestDataSchema,
|
|
2001
|
-
config: ExtractConfig,
|
|
2002
|
-
file_id: typing.Optional[str] = OMIT,
|
|
2003
|
-
text: typing.Optional[str] = OMIT,
|
|
2004
|
-
file: typing.Optional[FileData] = OMIT,
|
|
2005
|
-
) -> ExtractJob:
|
|
1880
|
+
async def update_extraction_agent(
|
|
1881
|
+
self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
|
|
1882
|
+
) -> ExtractAgent:
|
|
2006
1883
|
"""
|
|
2007
|
-
Stateless extraction endpoint that uses a default extraction agent in the user's default project.
|
|
2008
|
-
Requires data_schema, config, and either file_id, text, or base64 encoded file data.
|
|
2009
|
-
|
|
2010
1884
|
Parameters:
|
|
2011
|
-
-
|
|
2012
|
-
|
|
2013
|
-
- organization_id: typing.Optional[str].
|
|
2014
|
-
|
|
2015
|
-
- webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]].
|
|
2016
|
-
|
|
2017
|
-
- data_schema: ExtractStatelessRequestDataSchema. The schema of the data to extract
|
|
2018
|
-
|
|
2019
|
-
- config: ExtractConfig. The configuration parameters for the extraction
|
|
2020
|
-
|
|
2021
|
-
- file_id: typing.Optional[str].
|
|
1885
|
+
- extraction_agent_id: str.
|
|
2022
1886
|
|
|
2023
|
-
-
|
|
1887
|
+
- data_schema: ExtractAgentUpdateDataSchema. The schema of the data
|
|
2024
1888
|
|
|
2025
|
-
-
|
|
1889
|
+
- config: ExtractConfig. The configuration parameters for the extraction agent.
|
|
2026
1890
|
---
|
|
2027
1891
|
from llama_cloud import (
|
|
2028
1892
|
DocumentChunkMode,
|
|
@@ -2031,7 +1895,6 @@ class AsyncLlamaExtractClient:
|
|
|
2031
1895
|
ExtractMode,
|
|
2032
1896
|
ExtractModels,
|
|
2033
1897
|
ExtractTarget,
|
|
2034
|
-
FileData,
|
|
2035
1898
|
PublicModelName,
|
|
2036
1899
|
)
|
|
2037
1900
|
from llama_cloud.client import AsyncLlamaCloud
|
|
@@ -2039,7 +1902,8 @@ class AsyncLlamaExtractClient:
|
|
|
2039
1902
|
client = AsyncLlamaCloud(
|
|
2040
1903
|
token="YOUR_TOKEN",
|
|
2041
1904
|
)
|
|
2042
|
-
await client.llama_extract.
|
|
1905
|
+
await client.llama_extract.update_extraction_agent(
|
|
1906
|
+
extraction_agent_id="string",
|
|
2043
1907
|
config=ExtractConfig(
|
|
2044
1908
|
priority=ExtractConfigPriority.LOW,
|
|
2045
1909
|
extraction_target=ExtractTarget.PER_DOC,
|
|
@@ -2048,31 +1912,51 @@ class AsyncLlamaExtractClient:
|
|
|
2048
1912
|
extract_model=ExtractModels.OPENAI_GPT_4_1,
|
|
2049
1913
|
chunk_mode=DocumentChunkMode.PAGE,
|
|
2050
1914
|
),
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
1915
|
+
)
|
|
1916
|
+
"""
|
|
1917
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1918
|
+
"PUT",
|
|
1919
|
+
urllib.parse.urljoin(
|
|
1920
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
|
|
2054
1921
|
),
|
|
1922
|
+
json=jsonable_encoder({"data_schema": data_schema, "config": config}),
|
|
1923
|
+
headers=self._client_wrapper.get_headers(),
|
|
1924
|
+
timeout=60,
|
|
1925
|
+
)
|
|
1926
|
+
if 200 <= _response.status_code < 300:
|
|
1927
|
+
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
1928
|
+
if _response.status_code == 422:
|
|
1929
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1930
|
+
try:
|
|
1931
|
+
_response_json = _response.json()
|
|
1932
|
+
except JSONDecodeError:
|
|
1933
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1934
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1935
|
+
|
|
1936
|
+
async def delete_extraction_agent(self, extraction_agent_id: str) -> typing.Any:
|
|
1937
|
+
"""
|
|
1938
|
+
Parameters:
|
|
1939
|
+
- extraction_agent_id: str.
|
|
1940
|
+
---
|
|
1941
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
1942
|
+
|
|
1943
|
+
client = AsyncLlamaCloud(
|
|
1944
|
+
token="YOUR_TOKEN",
|
|
1945
|
+
)
|
|
1946
|
+
await client.llama_extract.delete_extraction_agent(
|
|
1947
|
+
extraction_agent_id="string",
|
|
2055
1948
|
)
|
|
2056
1949
|
"""
|
|
2057
|
-
_request: typing.Dict[str, typing.Any] = {"data_schema": data_schema, "config": config}
|
|
2058
|
-
if webhook_configurations is not OMIT:
|
|
2059
|
-
_request["webhook_configurations"] = webhook_configurations
|
|
2060
|
-
if file_id is not OMIT:
|
|
2061
|
-
_request["file_id"] = file_id
|
|
2062
|
-
if text is not OMIT:
|
|
2063
|
-
_request["text"] = text
|
|
2064
|
-
if file is not OMIT:
|
|
2065
|
-
_request["file"] = file
|
|
2066
1950
|
_response = await self._client_wrapper.httpx_client.request(
|
|
2067
|
-
"
|
|
2068
|
-
urllib.parse.urljoin(
|
|
2069
|
-
|
|
2070
|
-
|
|
1951
|
+
"DELETE",
|
|
1952
|
+
urllib.parse.urljoin(
|
|
1953
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
|
|
1954
|
+
),
|
|
2071
1955
|
headers=self._client_wrapper.get_headers(),
|
|
2072
1956
|
timeout=60,
|
|
2073
1957
|
)
|
|
2074
1958
|
if 200 <= _response.status_code < 300:
|
|
2075
|
-
return pydantic.parse_obj_as(
|
|
1959
|
+
return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
|
|
2076
1960
|
if _response.status_code == 422:
|
|
2077
1961
|
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
2078
1962
|
try:
|