llama-cloud 0.1.41__py3-none-any.whl → 0.1.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (43) hide show
  1. llama_cloud/__init__.py +19 -19
  2. llama_cloud/resources/__init__.py +6 -0
  3. llama_cloud/resources/alpha/client.py +14 -30
  4. llama_cloud/resources/beta/client.py +1045 -59
  5. llama_cloud/resources/jobs/client.py +0 -8
  6. llama_cloud/resources/llama_extract/__init__.py +6 -0
  7. llama_cloud/resources/llama_extract/client.py +825 -941
  8. llama_cloud/resources/llama_extract/types/__init__.py +6 -0
  9. llama_cloud/resources/organizations/client.py +18 -4
  10. llama_cloud/resources/parsing/client.py +56 -0
  11. llama_cloud/resources/pipelines/client.py +164 -0
  12. llama_cloud/types/__init__.py +16 -22
  13. llama_cloud/types/agent_data.py +1 -1
  14. llama_cloud/types/agent_deployment_summary.py +1 -2
  15. llama_cloud/types/{prompt_conf.py → api_key.py} +14 -9
  16. llama_cloud/types/{extract_job_create.py → api_key_query_response.py} +6 -14
  17. llama_cloud/types/api_key_type.py +17 -0
  18. llama_cloud/types/delete_response.py +35 -0
  19. llama_cloud/types/extract_config.py +1 -0
  20. llama_cloud/types/extract_models.py +4 -0
  21. llama_cloud/types/extracted_table.py +40 -0
  22. llama_cloud/types/legacy_parse_job_config.py +3 -0
  23. llama_cloud/types/llama_parse_parameters.py +7 -0
  24. llama_cloud/types/organization.py +1 -0
  25. llama_cloud/types/paginated_response_spreadsheet_job.py +34 -0
  26. llama_cloud/types/parse_job_config.py +7 -0
  27. llama_cloud/types/public_model_name.py +4 -0
  28. llama_cloud/types/quota_configuration_configuration_type.py +4 -0
  29. llama_cloud/types/spreadsheet_job.py +50 -0
  30. llama_cloud/types/spreadsheet_parsing_config.py +35 -0
  31. {llama_cloud-0.1.41.dist-info → llama_cloud-0.1.43.dist-info}/METADATA +1 -1
  32. {llama_cloud-0.1.41.dist-info → llama_cloud-0.1.43.dist-info}/RECORD +37 -37
  33. llama_cloud/types/chunk_mode.py +0 -29
  34. llama_cloud/types/llama_extract_settings.py +0 -67
  35. llama_cloud/types/multimodal_parse_resolution.py +0 -17
  36. llama_cloud/types/schema_relax_mode.py +0 -25
  37. llama_cloud/types/struct_mode.py +0 -33
  38. llama_cloud/types/struct_parse_conf.py +0 -63
  39. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_data_schema_override.py +0 -0
  40. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_data_schema_override_zero_value.py +0 -0
  41. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_priority.py +0 -0
  42. {llama_cloud-0.1.41.dist-info → llama_cloud-0.1.43.dist-info}/LICENSE +0 -0
  43. {llama_cloud-0.1.41.dist-info → llama_cloud-0.1.43.dist-info}/WHEEL +0 -0
@@ -12,19 +12,19 @@ from ...errors.unprocessable_entity_error import UnprocessableEntityError
12
12
  from ...types.extract_agent import ExtractAgent
13
13
  from ...types.extract_config import ExtractConfig
14
14
  from ...types.extract_job import ExtractJob
15
- from ...types.extract_job_create import ExtractJobCreate
16
15
  from ...types.extract_resultset import ExtractResultset
17
16
  from ...types.extract_run import ExtractRun
18
17
  from ...types.extract_schema_generate_response import ExtractSchemaGenerateResponse
19
18
  from ...types.extract_schema_validate_response import ExtractSchemaValidateResponse
20
19
  from ...types.file_data import FileData
21
20
  from ...types.http_validation_error import HttpValidationError
22
- from ...types.llama_extract_settings import LlamaExtractSettings
23
21
  from ...types.paginated_extract_runs_response import PaginatedExtractRunsResponse
24
22
  from ...types.webhook_configuration import WebhookConfiguration
25
23
  from .types.extract_agent_create_data_schema import ExtractAgentCreateDataSchema
26
24
  from .types.extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
27
25
  from .types.extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
26
+ from .types.extract_job_create_data_schema_override import ExtractJobCreateDataSchemaOverride
27
+ from .types.extract_job_create_priority import ExtractJobCreatePriority
28
28
  from .types.extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
29
29
  from .types.extract_stateless_request_data_schema import ExtractStatelessRequestDataSchema
30
30
 
@@ -44,39 +44,29 @@ class LlamaExtractClient:
44
44
  def __init__(self, *, client_wrapper: SyncClientWrapper):
45
45
  self._client_wrapper = client_wrapper
46
46
 
47
- def list_extraction_agents(
48
- self,
49
- *,
50
- include_default: typing.Optional[bool] = None,
51
- project_id: typing.Optional[str] = None,
52
- organization_id: typing.Optional[str] = None,
53
- ) -> typing.List[ExtractAgent]:
47
+ def list_jobs(self, *, extraction_agent_id: str) -> typing.List[ExtractJob]:
54
48
  """
55
49
  Parameters:
56
- - include_default: typing.Optional[bool]. Whether to include default agents in the results
57
-
58
- - project_id: typing.Optional[str].
59
-
60
- - organization_id: typing.Optional[str].
50
+ - extraction_agent_id: str.
61
51
  ---
62
52
  from llama_cloud.client import LlamaCloud
63
53
 
64
54
  client = LlamaCloud(
65
55
  token="YOUR_TOKEN",
66
56
  )
67
- client.llama_extract.list_extraction_agents()
57
+ client.llama_extract.list_jobs(
58
+ extraction_agent_id="string",
59
+ )
68
60
  """
69
61
  _response = self._client_wrapper.httpx_client.request(
70
62
  "GET",
71
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
72
- params=remove_none_from_dict(
73
- {"include_default": include_default, "project_id": project_id, "organization_id": organization_id}
74
- ),
63
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
64
+ params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
75
65
  headers=self._client_wrapper.get_headers(),
76
66
  timeout=60,
77
67
  )
78
68
  if 200 <= _response.status_code < 300:
79
- return pydantic.parse_obj_as(typing.List[ExtractAgent], _response.json()) # type: ignore
69
+ return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
80
70
  if _response.status_code == 422:
81
71
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
82
72
  try:
@@ -85,31 +75,38 @@ class LlamaExtractClient:
85
75
  raise ApiError(status_code=_response.status_code, body=_response.text)
86
76
  raise ApiError(status_code=_response.status_code, body=_response_json)
87
77
 
88
- def create_extraction_agent(
78
+ def run_job(
89
79
  self,
90
80
  *,
91
- project_id: typing.Optional[str] = None,
92
- organization_id: typing.Optional[str] = None,
93
- name: str,
94
- data_schema: ExtractAgentCreateDataSchema,
95
- config: ExtractConfig,
96
- ) -> ExtractAgent:
81
+ from_ui: typing.Optional[bool] = None,
82
+ priority: typing.Optional[ExtractJobCreatePriority] = OMIT,
83
+ webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]] = OMIT,
84
+ extraction_agent_id: str,
85
+ file_id: str,
86
+ data_schema_override: typing.Optional[ExtractJobCreateDataSchemaOverride] = OMIT,
87
+ config_override: typing.Optional[ExtractConfig] = OMIT,
88
+ ) -> ExtractJob:
97
89
  """
98
90
  Parameters:
99
- - project_id: typing.Optional[str].
91
+ - from_ui: typing.Optional[bool].
100
92
 
101
- - organization_id: typing.Optional[str].
93
+ - priority: typing.Optional[ExtractJobCreatePriority].
102
94
 
103
- - name: str. The name of the extraction schema
95
+ - webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]].
104
96
 
105
- - data_schema: ExtractAgentCreateDataSchema. The schema of the data.
97
+ - extraction_agent_id: str. The id of the extraction agent
106
98
 
107
- - config: ExtractConfig. The configuration parameters for the extraction agent.
99
+ - file_id: str. The id of the file
100
+
101
+ - data_schema_override: typing.Optional[ExtractJobCreateDataSchemaOverride]. The data schema to override the extraction agent's data schema with
102
+
103
+ - config_override: typing.Optional[ExtractConfig].
108
104
  ---
109
105
  from llama_cloud import (
110
106
  DocumentChunkMode,
111
107
  ExtractConfig,
112
108
  ExtractConfigPriority,
109
+ ExtractJobCreatePriority,
113
110
  ExtractMode,
114
111
  ExtractModels,
115
112
  ExtractTarget,
@@ -120,9 +117,11 @@ class LlamaExtractClient:
120
117
  client = LlamaCloud(
121
118
  token="YOUR_TOKEN",
122
119
  )
123
- client.llama_extract.create_extraction_agent(
124
- name="string",
125
- config=ExtractConfig(
120
+ client.llama_extract.run_job(
121
+ priority=ExtractJobCreatePriority.LOW,
122
+ extraction_agent_id="string",
123
+ file_id="string",
124
+ config_override=ExtractConfig(
126
125
  priority=ExtractConfigPriority.LOW,
127
126
  extraction_target=ExtractTarget.PER_DOC,
128
127
  extraction_mode=ExtractMode.FAST,
@@ -132,16 +131,25 @@ class LlamaExtractClient:
132
131
  ),
133
132
  )
134
133
  """
134
+ _request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_id": file_id}
135
+ if priority is not OMIT:
136
+ _request["priority"] = priority
137
+ if webhook_configurations is not OMIT:
138
+ _request["webhook_configurations"] = webhook_configurations
139
+ if data_schema_override is not OMIT:
140
+ _request["data_schema_override"] = data_schema_override
141
+ if config_override is not OMIT:
142
+ _request["config_override"] = config_override
135
143
  _response = self._client_wrapper.httpx_client.request(
136
144
  "POST",
137
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
138
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
139
- json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
145
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
146
+ params=remove_none_from_dict({"from_ui": from_ui}),
147
+ json=jsonable_encoder(_request),
140
148
  headers=self._client_wrapper.get_headers(),
141
149
  timeout=60,
142
150
  )
143
151
  if 200 <= _response.status_code < 300:
144
- return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
152
+ return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
145
153
  if _response.status_code == 422:
146
154
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
147
155
  try:
@@ -150,34 +158,74 @@ class LlamaExtractClient:
150
158
  raise ApiError(status_code=_response.status_code, body=_response.text)
151
159
  raise ApiError(status_code=_response.status_code, body=_response_json)
152
160
 
153
- def validate_extraction_schema(
154
- self, *, data_schema: ExtractSchemaValidateRequestDataSchema
155
- ) -> ExtractSchemaValidateResponse:
161
+ def get_job(self, job_id: str) -> ExtractJob:
156
162
  """
157
- Validates an extraction agent's schema definition.
158
- Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
159
-
160
163
  Parameters:
161
- - data_schema: ExtractSchemaValidateRequestDataSchema.
164
+ - job_id: str.
162
165
  ---
163
166
  from llama_cloud.client import LlamaCloud
164
167
 
165
168
  client = LlamaCloud(
166
169
  token="YOUR_TOKEN",
167
170
  )
168
- client.llama_extract.validate_extraction_schema()
171
+ client.llama_extract.get_job(
172
+ job_id="string",
173
+ )
174
+ """
175
+ _response = self._client_wrapper.httpx_client.request(
176
+ "GET",
177
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}"),
178
+ headers=self._client_wrapper.get_headers(),
179
+ timeout=60,
180
+ )
181
+ if 200 <= _response.status_code < 300:
182
+ return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
183
+ if _response.status_code == 422:
184
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
185
+ try:
186
+ _response_json = _response.json()
187
+ except JSONDecodeError:
188
+ raise ApiError(status_code=_response.status_code, body=_response.text)
189
+ raise ApiError(status_code=_response.status_code, body=_response_json)
190
+
191
+ def run_job_on_file(
192
+ self,
193
+ *,
194
+ from_ui: typing.Optional[bool] = None,
195
+ extraction_agent_id: str,
196
+ file: typing.IO,
197
+ data_schema_override: typing.Optional[str] = None,
198
+ config_override: typing.Optional[str] = None,
199
+ ) -> ExtractJob:
200
+ """
201
+ Parameters:
202
+ - from_ui: typing.Optional[bool].
203
+
204
+ - extraction_agent_id: str.
205
+
206
+ - file: typing.IO.
207
+
208
+ - data_schema_override: typing.Optional[str].
209
+
210
+ - config_override: typing.Optional[str].
169
211
  """
170
212
  _response = self._client_wrapper.httpx_client.request(
171
213
  "POST",
172
- urllib.parse.urljoin(
173
- f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
214
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
215
+ params=remove_none_from_dict({"from_ui": from_ui}),
216
+ data=jsonable_encoder(
217
+ {
218
+ "extraction_agent_id": extraction_agent_id,
219
+ "data_schema_override": data_schema_override,
220
+ "config_override": config_override,
221
+ }
174
222
  ),
175
- json=jsonable_encoder({"data_schema": data_schema}),
223
+ files={"file": file},
176
224
  headers=self._client_wrapper.get_headers(),
177
225
  timeout=60,
178
226
  )
179
227
  if 200 <= _response.status_code < 300:
180
- return pydantic.parse_obj_as(ExtractSchemaValidateResponse, _response.json()) # type: ignore
228
+ return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
181
229
  if _response.status_code == 422:
182
230
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
183
231
  try:
@@ -186,50 +234,69 @@ class LlamaExtractClient:
186
234
  raise ApiError(status_code=_response.status_code, body=_response.text)
187
235
  raise ApiError(status_code=_response.status_code, body=_response_json)
188
236
 
189
- def generate_extraction_schema(
237
+ def run_batch_jobs(
190
238
  self,
191
239
  *,
192
- project_id: typing.Optional[str] = None,
193
- organization_id: typing.Optional[str] = None,
194
- prompt: typing.Optional[str] = OMIT,
195
- file_id: typing.Optional[str] = OMIT,
196
- ) -> ExtractSchemaGenerateResponse:
240
+ from_ui: typing.Optional[bool] = None,
241
+ extraction_agent_id: str,
242
+ file_ids: typing.List[str],
243
+ data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
244
+ config_override: typing.Optional[ExtractConfig] = OMIT,
245
+ ) -> typing.List[ExtractJob]:
197
246
  """
198
- Generates an extraction agent's schema definition from a file and/or natural language prompt.
199
-
200
247
  Parameters:
201
- - project_id: typing.Optional[str].
248
+ - from_ui: typing.Optional[bool].
202
249
 
203
- - organization_id: typing.Optional[str].
250
+ - extraction_agent_id: str. The id of the extraction agent
204
251
 
205
- - prompt: typing.Optional[str].
252
+ - file_ids: typing.List[str]. The ids of the files
206
253
 
207
- - file_id: typing.Optional[str].
254
+ - data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
255
+
256
+ - config_override: typing.Optional[ExtractConfig].
208
257
  ---
258
+ from llama_cloud import (
259
+ DocumentChunkMode,
260
+ ExtractConfig,
261
+ ExtractConfigPriority,
262
+ ExtractMode,
263
+ ExtractModels,
264
+ ExtractTarget,
265
+ PublicModelName,
266
+ )
209
267
  from llama_cloud.client import LlamaCloud
210
268
 
211
269
  client = LlamaCloud(
212
270
  token="YOUR_TOKEN",
213
271
  )
214
- client.llama_extract.generate_extraction_schema()
272
+ client.llama_extract.run_batch_jobs(
273
+ extraction_agent_id="string",
274
+ file_ids=[],
275
+ config_override=ExtractConfig(
276
+ priority=ExtractConfigPriority.LOW,
277
+ extraction_target=ExtractTarget.PER_DOC,
278
+ extraction_mode=ExtractMode.FAST,
279
+ parse_model=PublicModelName.OPENAI_GPT_4_O,
280
+ extract_model=ExtractModels.OPENAI_GPT_4_1,
281
+ chunk_mode=DocumentChunkMode.PAGE,
282
+ ),
283
+ )
215
284
  """
216
- _request: typing.Dict[str, typing.Any] = {}
217
- if prompt is not OMIT:
218
- _request["prompt"] = prompt
219
- if file_id is not OMIT:
220
- _request["file_id"] = file_id
285
+ _request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
286
+ if data_schema_override is not OMIT:
287
+ _request["data_schema_override"] = data_schema_override
288
+ if config_override is not OMIT:
289
+ _request["config_override"] = config_override
221
290
  _response = self._client_wrapper.httpx_client.request(
222
291
  "POST",
223
- urllib.parse.urljoin(
224
- f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/generate"
225
- ),
226
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
292
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
293
+ params=remove_none_from_dict({"from_ui": from_ui}),
227
294
  json=jsonable_encoder(_request),
228
295
  headers=self._client_wrapper.get_headers(),
229
296
  timeout=60,
230
297
  )
231
298
  if 200 <= _response.status_code < 300:
232
- return pydantic.parse_obj_as(ExtractSchemaGenerateResponse, _response.json()) # type: ignore
299
+ return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
233
300
  if _response.status_code == 422:
234
301
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
235
302
  try:
@@ -238,12 +305,12 @@ class LlamaExtractClient:
238
305
  raise ApiError(status_code=_response.status_code, body=_response.text)
239
306
  raise ApiError(status_code=_response.status_code, body=_response_json)
240
307
 
241
- def get_extraction_agent_by_name(
242
- self, name: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
243
- ) -> ExtractAgent:
308
+ def get_job_result(
309
+ self, job_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
310
+ ) -> ExtractResultset:
244
311
  """
245
312
  Parameters:
246
- - name: str.
313
+ - job_id: str.
247
314
 
248
315
  - project_id: typing.Optional[str].
249
316
 
@@ -254,21 +321,19 @@ class LlamaExtractClient:
254
321
  client = LlamaCloud(
255
322
  token="YOUR_TOKEN",
256
323
  )
257
- client.llama_extract.get_extraction_agent_by_name(
258
- name="string",
324
+ client.llama_extract.get_job_result(
325
+ job_id="string",
259
326
  )
260
327
  """
261
328
  _response = self._client_wrapper.httpx_client.request(
262
329
  "GET",
263
- urllib.parse.urljoin(
264
- f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/by-name/{name}"
265
- ),
330
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}/result"),
266
331
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
267
332
  headers=self._client_wrapper.get_headers(),
268
333
  timeout=60,
269
334
  )
270
335
  if 200 <= _response.status_code < 300:
271
- return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
336
+ return pydantic.parse_obj_as(ExtractResultset, _response.json()) # type: ignore
272
337
  if _response.status_code == 422:
273
338
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
274
339
  try:
@@ -277,36 +342,35 @@ class LlamaExtractClient:
277
342
  raise ApiError(status_code=_response.status_code, body=_response.text)
278
343
  raise ApiError(status_code=_response.status_code, body=_response_json)
279
344
 
280
- def get_or_create_default_extraction_agent(
281
- self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
282
- ) -> ExtractAgent:
345
+ def list_extract_runs(
346
+ self, *, extraction_agent_id: str, skip: typing.Optional[int] = None, limit: typing.Optional[int] = None
347
+ ) -> PaginatedExtractRunsResponse:
283
348
  """
284
- Get or create a default extraction agent for the current project.
285
- The default agent has an empty schema and default configuration.
286
-
287
349
  Parameters:
288
- - project_id: typing.Optional[str].
350
+ - extraction_agent_id: str.
289
351
 
290
- - organization_id: typing.Optional[str].
352
+ - skip: typing.Optional[int].
353
+
354
+ - limit: typing.Optional[int].
291
355
  ---
292
356
  from llama_cloud.client import LlamaCloud
293
357
 
294
358
  client = LlamaCloud(
295
359
  token="YOUR_TOKEN",
296
360
  )
297
- client.llama_extract.get_or_create_default_extraction_agent()
361
+ client.llama_extract.list_extract_runs(
362
+ extraction_agent_id="string",
363
+ )
298
364
  """
299
365
  _response = self._client_wrapper.httpx_client.request(
300
366
  "GET",
301
- urllib.parse.urljoin(
302
- f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/default"
303
- ),
304
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
367
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/runs"),
368
+ params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id, "skip": skip, "limit": limit}),
305
369
  headers=self._client_wrapper.get_headers(),
306
370
  timeout=60,
307
371
  )
308
372
  if 200 <= _response.status_code < 300:
309
- return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
373
+ return pydantic.parse_obj_as(PaginatedExtractRunsResponse, _response.json()) # type: ignore
310
374
  if _response.status_code == 422:
311
375
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
312
376
  try:
@@ -315,7 +379,7 @@ class LlamaExtractClient:
315
379
  raise ApiError(status_code=_response.status_code, body=_response.text)
316
380
  raise ApiError(status_code=_response.status_code, body=_response_json)
317
381
 
318
- def get_extraction_agent(self, extraction_agent_id: str) -> ExtractAgent:
382
+ def get_latest_run_from_ui(self, *, extraction_agent_id: str) -> typing.Optional[ExtractRun]:
319
383
  """
320
384
  Parameters:
321
385
  - extraction_agent_id: str.
@@ -325,20 +389,19 @@ class LlamaExtractClient:
325
389
  client = LlamaCloud(
326
390
  token="YOUR_TOKEN",
327
391
  )
328
- client.llama_extract.get_extraction_agent(
392
+ client.llama_extract.get_latest_run_from_ui(
329
393
  extraction_agent_id="string",
330
394
  )
331
395
  """
332
396
  _response = self._client_wrapper.httpx_client.request(
333
397
  "GET",
334
- urllib.parse.urljoin(
335
- f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
336
- ),
398
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/runs/latest-from-ui"),
399
+ params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
337
400
  headers=self._client_wrapper.get_headers(),
338
401
  timeout=60,
339
402
  )
340
403
  if 200 <= _response.status_code < 300:
341
- return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
404
+ return pydantic.parse_obj_as(typing.Optional[ExtractRun], _response.json()) # type: ignore
342
405
  if _response.status_code == 422:
343
406
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
344
407
  try:
@@ -347,54 +410,35 @@ class LlamaExtractClient:
347
410
  raise ApiError(status_code=_response.status_code, body=_response.text)
348
411
  raise ApiError(status_code=_response.status_code, body=_response_json)
349
412
 
350
- def update_extraction_agent(
351
- self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
352
- ) -> ExtractAgent:
413
+ def get_run_by_job_id(
414
+ self, job_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
415
+ ) -> ExtractRun:
353
416
  """
354
417
  Parameters:
355
- - extraction_agent_id: str.
418
+ - job_id: str.
356
419
 
357
- - data_schema: ExtractAgentUpdateDataSchema. The schema of the data
420
+ - project_id: typing.Optional[str].
358
421
 
359
- - config: ExtractConfig. The configuration parameters for the extraction agent.
422
+ - organization_id: typing.Optional[str].
360
423
  ---
361
- from llama_cloud import (
362
- DocumentChunkMode,
363
- ExtractConfig,
364
- ExtractConfigPriority,
365
- ExtractMode,
366
- ExtractModels,
367
- ExtractTarget,
368
- PublicModelName,
369
- )
370
424
  from llama_cloud.client import LlamaCloud
371
425
 
372
426
  client = LlamaCloud(
373
427
  token="YOUR_TOKEN",
374
428
  )
375
- client.llama_extract.update_extraction_agent(
376
- extraction_agent_id="string",
377
- config=ExtractConfig(
378
- priority=ExtractConfigPriority.LOW,
379
- extraction_target=ExtractTarget.PER_DOC,
380
- extraction_mode=ExtractMode.FAST,
381
- parse_model=PublicModelName.OPENAI_GPT_4_O,
382
- extract_model=ExtractModels.OPENAI_GPT_4_1,
383
- chunk_mode=DocumentChunkMode.PAGE,
384
- ),
429
+ client.llama_extract.get_run_by_job_id(
430
+ job_id="string",
385
431
  )
386
432
  """
387
433
  _response = self._client_wrapper.httpx_client.request(
388
- "PUT",
389
- urllib.parse.urljoin(
390
- f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
391
- ),
392
- json=jsonable_encoder({"data_schema": data_schema, "config": config}),
434
+ "GET",
435
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/by-job/{job_id}"),
436
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
393
437
  headers=self._client_wrapper.get_headers(),
394
438
  timeout=60,
395
439
  )
396
440
  if 200 <= _response.status_code < 300:
397
- return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
441
+ return pydantic.parse_obj_as(ExtractRun, _response.json()) # type: ignore
398
442
  if _response.status_code == 422:
399
443
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
400
444
  try:
@@ -403,30 +447,35 @@ class LlamaExtractClient:
403
447
  raise ApiError(status_code=_response.status_code, body=_response.text)
404
448
  raise ApiError(status_code=_response.status_code, body=_response_json)
405
449
 
406
- def delete_extraction_agent(self, extraction_agent_id: str) -> typing.Any:
450
+ def get_run(
451
+ self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
452
+ ) -> ExtractRun:
407
453
  """
408
454
  Parameters:
409
- - extraction_agent_id: str.
455
+ - run_id: str.
456
+
457
+ - project_id: typing.Optional[str].
458
+
459
+ - organization_id: typing.Optional[str].
410
460
  ---
411
461
  from llama_cloud.client import LlamaCloud
412
462
 
413
463
  client = LlamaCloud(
414
464
  token="YOUR_TOKEN",
415
465
  )
416
- client.llama_extract.delete_extraction_agent(
417
- extraction_agent_id="string",
466
+ client.llama_extract.get_run(
467
+ run_id="string",
418
468
  )
419
469
  """
420
470
  _response = self._client_wrapper.httpx_client.request(
421
- "DELETE",
422
- urllib.parse.urljoin(
423
- f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
424
- ),
471
+ "GET",
472
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/{run_id}"),
473
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
425
474
  headers=self._client_wrapper.get_headers(),
426
475
  timeout=60,
427
476
  )
428
477
  if 200 <= _response.status_code < 300:
429
- return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
478
+ return pydantic.parse_obj_as(ExtractRun, _response.json()) # type: ignore
430
479
  if _response.status_code == 422:
431
480
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
432
481
  try:
@@ -435,29 +484,35 @@ class LlamaExtractClient:
435
484
  raise ApiError(status_code=_response.status_code, body=_response.text)
436
485
  raise ApiError(status_code=_response.status_code, body=_response_json)
437
486
 
438
- def list_jobs(self, *, extraction_agent_id: str) -> typing.List[ExtractJob]:
487
+ def delete_extraction_run(
488
+ self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
489
+ ) -> typing.Any:
439
490
  """
440
491
  Parameters:
441
- - extraction_agent_id: str.
492
+ - run_id: str.
493
+
494
+ - project_id: typing.Optional[str].
495
+
496
+ - organization_id: typing.Optional[str].
442
497
  ---
443
498
  from llama_cloud.client import LlamaCloud
444
499
 
445
500
  client = LlamaCloud(
446
501
  token="YOUR_TOKEN",
447
502
  )
448
- client.llama_extract.list_jobs(
449
- extraction_agent_id="string",
503
+ client.llama_extract.delete_extraction_run(
504
+ run_id="string",
450
505
  )
451
506
  """
452
507
  _response = self._client_wrapper.httpx_client.request(
453
- "GET",
454
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
455
- params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
508
+ "DELETE",
509
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/{run_id}"),
510
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
456
511
  headers=self._client_wrapper.get_headers(),
457
512
  timeout=60,
458
513
  )
459
514
  if 200 <= _response.status_code < 300:
460
- return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
515
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
461
516
  if _response.status_code == 422:
462
517
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
463
518
  try:
@@ -466,22 +521,47 @@ class LlamaExtractClient:
466
521
  raise ApiError(status_code=_response.status_code, body=_response.text)
467
522
  raise ApiError(status_code=_response.status_code, body=_response_json)
468
523
 
469
- def run_job(self, *, from_ui: typing.Optional[bool] = None, request: ExtractJobCreate) -> ExtractJob:
524
+ def extract_stateless(
525
+ self,
526
+ *,
527
+ project_id: typing.Optional[str] = None,
528
+ organization_id: typing.Optional[str] = None,
529
+ webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]] = OMIT,
530
+ data_schema: ExtractStatelessRequestDataSchema,
531
+ config: ExtractConfig,
532
+ file_id: typing.Optional[str] = OMIT,
533
+ text: typing.Optional[str] = OMIT,
534
+ file: typing.Optional[FileData] = OMIT,
535
+ ) -> ExtractJob:
470
536
  """
537
+ Stateless extraction endpoint that uses a default extraction agent in the user's default project.
538
+ Requires data_schema, config, and either file_id, text, or base64 encoded file data.
539
+
471
540
  Parameters:
472
- - from_ui: typing.Optional[bool].
541
+ - project_id: typing.Optional[str].
542
+
543
+ - organization_id: typing.Optional[str].
544
+
545
+ - webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]].
546
+
547
+ - data_schema: ExtractStatelessRequestDataSchema. The schema of the data to extract
548
+
549
+ - config: ExtractConfig. The configuration parameters for the extraction
473
550
 
474
- - request: ExtractJobCreate.
551
+ - file_id: typing.Optional[str].
552
+
553
+ - text: typing.Optional[str].
554
+
555
+ - file: typing.Optional[FileData].
475
556
  ---
476
557
  from llama_cloud import (
477
558
  DocumentChunkMode,
478
559
  ExtractConfig,
479
560
  ExtractConfigPriority,
480
- ExtractJobCreate,
481
- ExtractJobCreatePriority,
482
561
  ExtractMode,
483
562
  ExtractModels,
484
563
  ExtractTarget,
564
+ FileData,
485
565
  PublicModelName,
486
566
  )
487
567
  from llama_cloud.client import LlamaCloud
@@ -489,27 +569,35 @@ class LlamaExtractClient:
489
569
  client = LlamaCloud(
490
570
  token="YOUR_TOKEN",
491
571
  )
492
- client.llama_extract.run_job(
493
- request=ExtractJobCreate(
494
- priority=ExtractJobCreatePriority.LOW,
495
- extraction_agent_id="string",
496
- file_id="string",
497
- config_override=ExtractConfig(
498
- priority=ExtractConfigPriority.LOW,
499
- extraction_target=ExtractTarget.PER_DOC,
500
- extraction_mode=ExtractMode.FAST,
501
- parse_model=PublicModelName.OPENAI_GPT_4_O,
502
- extract_model=ExtractModels.OPENAI_GPT_4_1,
503
- chunk_mode=DocumentChunkMode.PAGE,
504
- ),
572
+ client.llama_extract.extract_stateless(
573
+ config=ExtractConfig(
574
+ priority=ExtractConfigPriority.LOW,
575
+ extraction_target=ExtractTarget.PER_DOC,
576
+ extraction_mode=ExtractMode.FAST,
577
+ parse_model=PublicModelName.OPENAI_GPT_4_O,
578
+ extract_model=ExtractModels.OPENAI_GPT_4_1,
579
+ chunk_mode=DocumentChunkMode.PAGE,
580
+ ),
581
+ file=FileData(
582
+ data="string",
583
+ mime_type="string",
505
584
  ),
506
585
  )
507
586
  """
587
+ _request: typing.Dict[str, typing.Any] = {"data_schema": data_schema, "config": config}
588
+ if webhook_configurations is not OMIT:
589
+ _request["webhook_configurations"] = webhook_configurations
590
+ if file_id is not OMIT:
591
+ _request["file_id"] = file_id
592
+ if text is not OMIT:
593
+ _request["text"] = text
594
+ if file is not OMIT:
595
+ _request["file"] = file
508
596
  _response = self._client_wrapper.httpx_client.request(
509
597
  "POST",
510
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
511
- params=remove_none_from_dict({"from_ui": from_ui}),
512
- json=jsonable_encoder(request),
598
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/run"),
599
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
600
+ json=jsonable_encoder(_request),
513
601
  headers=self._client_wrapper.get_headers(),
514
602
  timeout=60,
515
603
  )
@@ -523,28 +611,39 @@ class LlamaExtractClient:
523
611
  raise ApiError(status_code=_response.status_code, body=_response.text)
524
612
  raise ApiError(status_code=_response.status_code, body=_response_json)
525
613
 
526
- def get_job(self, job_id: str) -> ExtractJob:
614
+ def list_extraction_agents(
615
+ self,
616
+ *,
617
+ include_default: typing.Optional[bool] = None,
618
+ project_id: typing.Optional[str] = None,
619
+ organization_id: typing.Optional[str] = None,
620
+ ) -> typing.List[ExtractAgent]:
527
621
  """
528
622
  Parameters:
529
- - job_id: str.
623
+ - include_default: typing.Optional[bool]. Whether to include default agents in the results
624
+
625
+ - project_id: typing.Optional[str].
626
+
627
+ - organization_id: typing.Optional[str].
530
628
  ---
531
629
  from llama_cloud.client import LlamaCloud
532
630
 
533
631
  client = LlamaCloud(
534
632
  token="YOUR_TOKEN",
535
633
  )
536
- client.llama_extract.get_job(
537
- job_id="string",
538
- )
634
+ client.llama_extract.list_extraction_agents()
539
635
  """
540
636
  _response = self._client_wrapper.httpx_client.request(
541
637
  "GET",
542
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}"),
638
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
639
+ params=remove_none_from_dict(
640
+ {"include_default": include_default, "project_id": project_id, "organization_id": organization_id}
641
+ ),
543
642
  headers=self._client_wrapper.get_headers(),
544
643
  timeout=60,
545
644
  )
546
645
  if 200 <= _response.status_code < 300:
547
- return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
646
+ return pydantic.parse_obj_as(typing.List[ExtractAgent], _response.json()) # type: ignore
548
647
  if _response.status_code == 422:
549
648
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
550
649
  try:
@@ -553,37 +652,34 @@ class LlamaExtractClient:
553
652
  raise ApiError(status_code=_response.status_code, body=_response.text)
554
653
  raise ApiError(status_code=_response.status_code, body=_response_json)
555
654
 
556
- def run_job_test_user(
655
+ def create_extraction_agent(
557
656
  self,
558
657
  *,
559
- from_ui: typing.Optional[bool] = None,
560
- job_create: ExtractJobCreate,
561
- extract_settings: typing.Optional[LlamaExtractSettings] = OMIT,
562
- ) -> ExtractJob:
658
+ project_id: typing.Optional[str] = None,
659
+ organization_id: typing.Optional[str] = None,
660
+ name: str,
661
+ data_schema: ExtractAgentCreateDataSchema,
662
+ config: ExtractConfig,
663
+ ) -> ExtractAgent:
563
664
  """
564
665
  Parameters:
565
- - from_ui: typing.Optional[bool].
666
+ - project_id: typing.Optional[str].
667
+
668
+ - organization_id: typing.Optional[str].
566
669
 
567
- - job_create: ExtractJobCreate.
670
+ - name: str. The name of the extraction schema
671
+
672
+ - data_schema: ExtractAgentCreateDataSchema. The schema of the data.
568
673
 
569
- - extract_settings: typing.Optional[LlamaExtractSettings].
674
+ - config: ExtractConfig. The configuration parameters for the extraction agent.
570
675
  ---
571
676
  from llama_cloud import (
572
- ChunkMode,
573
677
  DocumentChunkMode,
574
678
  ExtractConfig,
575
679
  ExtractConfigPriority,
576
- ExtractJobCreate,
577
- ExtractJobCreatePriority,
578
680
  ExtractMode,
579
681
  ExtractModels,
580
682
  ExtractTarget,
581
- FailPageMode,
582
- LlamaExtractSettings,
583
- LlamaParseParameters,
584
- LlamaParseParametersPriority,
585
- MultimodalParseResolution,
586
- ParsingMode,
587
683
  PublicModelName,
588
684
  )
589
685
  from llama_cloud.client import LlamaCloud
@@ -591,44 +687,28 @@ class LlamaExtractClient:
591
687
  client = LlamaCloud(
592
688
  token="YOUR_TOKEN",
593
689
  )
594
- client.llama_extract.run_job_test_user(
595
- job_create=ExtractJobCreate(
596
- priority=ExtractJobCreatePriority.LOW,
597
- extraction_agent_id="string",
598
- file_id="string",
599
- config_override=ExtractConfig(
600
- priority=ExtractConfigPriority.LOW,
601
- extraction_target=ExtractTarget.PER_DOC,
602
- extraction_mode=ExtractMode.FAST,
603
- parse_model=PublicModelName.OPENAI_GPT_4_O,
604
- extract_model=ExtractModels.OPENAI_GPT_4_1,
605
- chunk_mode=DocumentChunkMode.PAGE,
606
- ),
607
- ),
608
- extract_settings=LlamaExtractSettings(
609
- chunk_mode=ChunkMode.PAGE,
610
- llama_parse_params=LlamaParseParameters(
611
- priority=LlamaParseParametersPriority.LOW,
612
- parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
613
- replace_failed_page_mode=FailPageMode.RAW_TEXT,
614
- ),
615
- multimodal_parse_resolution=MultimodalParseResolution.MEDIUM,
690
+ client.llama_extract.create_extraction_agent(
691
+ name="string",
692
+ config=ExtractConfig(
693
+ priority=ExtractConfigPriority.LOW,
694
+ extraction_target=ExtractTarget.PER_DOC,
695
+ extraction_mode=ExtractMode.FAST,
696
+ parse_model=PublicModelName.OPENAI_GPT_4_O,
697
+ extract_model=ExtractModels.OPENAI_GPT_4_1,
698
+ chunk_mode=DocumentChunkMode.PAGE,
616
699
  ),
617
700
  )
618
701
  """
619
- _request: typing.Dict[str, typing.Any] = {"job_create": job_create}
620
- if extract_settings is not OMIT:
621
- _request["extract_settings"] = extract_settings
622
702
  _response = self._client_wrapper.httpx_client.request(
623
703
  "POST",
624
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/test"),
625
- params=remove_none_from_dict({"from_ui": from_ui}),
626
- json=jsonable_encoder(_request),
704
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
705
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
706
+ json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
627
707
  headers=self._client_wrapper.get_headers(),
628
708
  timeout=60,
629
709
  )
630
710
  if 200 <= _response.status_code < 300:
631
- return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
711
+ return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
632
712
  if _response.status_code == 422:
633
713
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
634
714
  try:
@@ -637,44 +717,34 @@ class LlamaExtractClient:
637
717
  raise ApiError(status_code=_response.status_code, body=_response.text)
638
718
  raise ApiError(status_code=_response.status_code, body=_response_json)
639
719
 
640
- def run_job_on_file(
641
- self,
642
- *,
643
- from_ui: typing.Optional[bool] = None,
644
- extraction_agent_id: str,
645
- file: typing.IO,
646
- data_schema_override: typing.Optional[str] = None,
647
- config_override: typing.Optional[str] = None,
648
- ) -> ExtractJob:
720
+ def validate_extraction_schema(
721
+ self, *, data_schema: ExtractSchemaValidateRequestDataSchema
722
+ ) -> ExtractSchemaValidateResponse:
649
723
  """
650
- Parameters:
651
- - from_ui: typing.Optional[bool].
652
-
653
- - extraction_agent_id: str.
654
-
655
- - file: typing.IO.
724
+ Validates an extraction agent's schema definition.
725
+ Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
656
726
 
657
- - data_schema_override: typing.Optional[str].
727
+ Parameters:
728
+ - data_schema: ExtractSchemaValidateRequestDataSchema.
729
+ ---
730
+ from llama_cloud.client import LlamaCloud
658
731
 
659
- - config_override: typing.Optional[str].
732
+ client = LlamaCloud(
733
+ token="YOUR_TOKEN",
734
+ )
735
+ client.llama_extract.validate_extraction_schema()
660
736
  """
661
737
  _response = self._client_wrapper.httpx_client.request(
662
738
  "POST",
663
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
664
- params=remove_none_from_dict({"from_ui": from_ui}),
665
- data=jsonable_encoder(
666
- {
667
- "extraction_agent_id": extraction_agent_id,
668
- "data_schema_override": data_schema_override,
669
- "config_override": config_override,
670
- }
739
+ urllib.parse.urljoin(
740
+ f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
671
741
  ),
672
- files={"file": file},
742
+ json=jsonable_encoder({"data_schema": data_schema}),
673
743
  headers=self._client_wrapper.get_headers(),
674
744
  timeout=60,
675
745
  )
676
746
  if 200 <= _response.status_code < 300:
677
- return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
747
+ return pydantic.parse_obj_as(ExtractSchemaValidateResponse, _response.json()) # type: ignore
678
748
  if _response.status_code == 422:
679
749
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
680
750
  try:
@@ -683,69 +753,50 @@ class LlamaExtractClient:
683
753
  raise ApiError(status_code=_response.status_code, body=_response.text)
684
754
  raise ApiError(status_code=_response.status_code, body=_response_json)
685
755
 
686
- def run_batch_jobs(
756
+ def generate_extraction_schema(
687
757
  self,
688
758
  *,
689
- from_ui: typing.Optional[bool] = None,
690
- extraction_agent_id: str,
691
- file_ids: typing.List[str],
692
- data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
693
- config_override: typing.Optional[ExtractConfig] = OMIT,
694
- ) -> typing.List[ExtractJob]:
759
+ project_id: typing.Optional[str] = None,
760
+ organization_id: typing.Optional[str] = None,
761
+ prompt: typing.Optional[str] = OMIT,
762
+ file_id: typing.Optional[str] = OMIT,
763
+ ) -> ExtractSchemaGenerateResponse:
695
764
  """
696
- Parameters:
697
- - from_ui: typing.Optional[bool].
765
+ Generates an extraction agent's schema definition from a file and/or natural language prompt.
698
766
 
699
- - extraction_agent_id: str. The id of the extraction agent
767
+ Parameters:
768
+ - project_id: typing.Optional[str].
700
769
 
701
- - file_ids: typing.List[str]. The ids of the files
770
+ - organization_id: typing.Optional[str].
702
771
 
703
- - data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
772
+ - prompt: typing.Optional[str].
704
773
 
705
- - config_override: typing.Optional[ExtractConfig].
774
+ - file_id: typing.Optional[str].
706
775
  ---
707
- from llama_cloud import (
708
- DocumentChunkMode,
709
- ExtractConfig,
710
- ExtractConfigPriority,
711
- ExtractMode,
712
- ExtractModels,
713
- ExtractTarget,
714
- PublicModelName,
715
- )
716
776
  from llama_cloud.client import LlamaCloud
717
777
 
718
778
  client = LlamaCloud(
719
779
  token="YOUR_TOKEN",
720
780
  )
721
- client.llama_extract.run_batch_jobs(
722
- extraction_agent_id="string",
723
- file_ids=[],
724
- config_override=ExtractConfig(
725
- priority=ExtractConfigPriority.LOW,
726
- extraction_target=ExtractTarget.PER_DOC,
727
- extraction_mode=ExtractMode.FAST,
728
- parse_model=PublicModelName.OPENAI_GPT_4_O,
729
- extract_model=ExtractModels.OPENAI_GPT_4_1,
730
- chunk_mode=DocumentChunkMode.PAGE,
731
- ),
732
- )
781
+ client.llama_extract.generate_extraction_schema()
733
782
  """
734
- _request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
735
- if data_schema_override is not OMIT:
736
- _request["data_schema_override"] = data_schema_override
737
- if config_override is not OMIT:
738
- _request["config_override"] = config_override
783
+ _request: typing.Dict[str, typing.Any] = {}
784
+ if prompt is not OMIT:
785
+ _request["prompt"] = prompt
786
+ if file_id is not OMIT:
787
+ _request["file_id"] = file_id
739
788
  _response = self._client_wrapper.httpx_client.request(
740
789
  "POST",
741
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
742
- params=remove_none_from_dict({"from_ui": from_ui}),
790
+ urllib.parse.urljoin(
791
+ f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/generate"
792
+ ),
793
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
743
794
  json=jsonable_encoder(_request),
744
795
  headers=self._client_wrapper.get_headers(),
745
796
  timeout=60,
746
797
  )
747
798
  if 200 <= _response.status_code < 300:
748
- return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
799
+ return pydantic.parse_obj_as(ExtractSchemaGenerateResponse, _response.json()) # type: ignore
749
800
  if _response.status_code == 422:
750
801
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
751
802
  try:
@@ -754,12 +805,12 @@ class LlamaExtractClient:
754
805
  raise ApiError(status_code=_response.status_code, body=_response.text)
755
806
  raise ApiError(status_code=_response.status_code, body=_response_json)
756
807
 
757
- def get_job_result(
758
- self, job_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
759
- ) -> ExtractResultset:
808
+ def get_extraction_agent_by_name(
809
+ self, name: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
810
+ ) -> ExtractAgent:
760
811
  """
761
812
  Parameters:
762
- - job_id: str.
813
+ - name: str.
763
814
 
764
815
  - project_id: typing.Optional[str].
765
816
 
@@ -770,19 +821,21 @@ class LlamaExtractClient:
770
821
  client = LlamaCloud(
771
822
  token="YOUR_TOKEN",
772
823
  )
773
- client.llama_extract.get_job_result(
774
- job_id="string",
824
+ client.llama_extract.get_extraction_agent_by_name(
825
+ name="string",
775
826
  )
776
827
  """
777
828
  _response = self._client_wrapper.httpx_client.request(
778
829
  "GET",
779
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}/result"),
830
+ urllib.parse.urljoin(
831
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/by-name/{name}"
832
+ ),
780
833
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
781
834
  headers=self._client_wrapper.get_headers(),
782
835
  timeout=60,
783
836
  )
784
837
  if 200 <= _response.status_code < 300:
785
- return pydantic.parse_obj_as(ExtractResultset, _response.json()) # type: ignore
838
+ return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
786
839
  if _response.status_code == 422:
787
840
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
788
841
  try:
@@ -791,35 +844,36 @@ class LlamaExtractClient:
791
844
  raise ApiError(status_code=_response.status_code, body=_response.text)
792
845
  raise ApiError(status_code=_response.status_code, body=_response_json)
793
846
 
794
- def list_extract_runs(
795
- self, *, extraction_agent_id: str, skip: typing.Optional[int] = None, limit: typing.Optional[int] = None
796
- ) -> PaginatedExtractRunsResponse:
847
+ def get_or_create_default_extraction_agent(
848
+ self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
849
+ ) -> ExtractAgent:
797
850
  """
798
- Parameters:
799
- - extraction_agent_id: str.
851
+ Get or create a default extraction agent for the current project.
852
+ The default agent has an empty schema and default configuration.
800
853
 
801
- - skip: typing.Optional[int].
854
+ Parameters:
855
+ - project_id: typing.Optional[str].
802
856
 
803
- - limit: typing.Optional[int].
857
+ - organization_id: typing.Optional[str].
804
858
  ---
805
859
  from llama_cloud.client import LlamaCloud
806
860
 
807
861
  client = LlamaCloud(
808
862
  token="YOUR_TOKEN",
809
863
  )
810
- client.llama_extract.list_extract_runs(
811
- extraction_agent_id="string",
812
- )
864
+ client.llama_extract.get_or_create_default_extraction_agent()
813
865
  """
814
866
  _response = self._client_wrapper.httpx_client.request(
815
867
  "GET",
816
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/runs"),
817
- params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id, "skip": skip, "limit": limit}),
868
+ urllib.parse.urljoin(
869
+ f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/default"
870
+ ),
871
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
818
872
  headers=self._client_wrapper.get_headers(),
819
873
  timeout=60,
820
874
  )
821
875
  if 200 <= _response.status_code < 300:
822
- return pydantic.parse_obj_as(PaginatedExtractRunsResponse, _response.json()) # type: ignore
876
+ return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
823
877
  if _response.status_code == 422:
824
878
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
825
879
  try:
@@ -828,7 +882,7 @@ class LlamaExtractClient:
828
882
  raise ApiError(status_code=_response.status_code, body=_response.text)
829
883
  raise ApiError(status_code=_response.status_code, body=_response_json)
830
884
 
831
- def get_latest_run_from_ui(self, *, extraction_agent_id: str) -> typing.Optional[ExtractRun]:
885
+ def get_extraction_agent(self, extraction_agent_id: str) -> ExtractAgent:
832
886
  """
833
887
  Parameters:
834
888
  - extraction_agent_id: str.
@@ -838,19 +892,20 @@ class LlamaExtractClient:
838
892
  client = LlamaCloud(
839
893
  token="YOUR_TOKEN",
840
894
  )
841
- client.llama_extract.get_latest_run_from_ui(
895
+ client.llama_extract.get_extraction_agent(
842
896
  extraction_agent_id="string",
843
897
  )
844
898
  """
845
899
  _response = self._client_wrapper.httpx_client.request(
846
900
  "GET",
847
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/runs/latest-from-ui"),
848
- params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
901
+ urllib.parse.urljoin(
902
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
903
+ ),
849
904
  headers=self._client_wrapper.get_headers(),
850
905
  timeout=60,
851
906
  )
852
907
  if 200 <= _response.status_code < 300:
853
- return pydantic.parse_obj_as(typing.Optional[ExtractRun], _response.json()) # type: ignore
908
+ return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
854
909
  if _response.status_code == 422:
855
910
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
856
911
  try:
@@ -859,35 +914,54 @@ class LlamaExtractClient:
859
914
  raise ApiError(status_code=_response.status_code, body=_response.text)
860
915
  raise ApiError(status_code=_response.status_code, body=_response_json)
861
916
 
862
- def get_run_by_job_id(
863
- self, job_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
864
- ) -> ExtractRun:
917
+ def update_extraction_agent(
918
+ self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
919
+ ) -> ExtractAgent:
865
920
  """
866
921
  Parameters:
867
- - job_id: str.
922
+ - extraction_agent_id: str.
868
923
 
869
- - project_id: typing.Optional[str].
924
+ - data_schema: ExtractAgentUpdateDataSchema. The schema of the data
870
925
 
871
- - organization_id: typing.Optional[str].
926
+ - config: ExtractConfig. The configuration parameters for the extraction agent.
872
927
  ---
928
+ from llama_cloud import (
929
+ DocumentChunkMode,
930
+ ExtractConfig,
931
+ ExtractConfigPriority,
932
+ ExtractMode,
933
+ ExtractModels,
934
+ ExtractTarget,
935
+ PublicModelName,
936
+ )
873
937
  from llama_cloud.client import LlamaCloud
874
938
 
875
939
  client = LlamaCloud(
876
940
  token="YOUR_TOKEN",
877
941
  )
878
- client.llama_extract.get_run_by_job_id(
879
- job_id="string",
942
+ client.llama_extract.update_extraction_agent(
943
+ extraction_agent_id="string",
944
+ config=ExtractConfig(
945
+ priority=ExtractConfigPriority.LOW,
946
+ extraction_target=ExtractTarget.PER_DOC,
947
+ extraction_mode=ExtractMode.FAST,
948
+ parse_model=PublicModelName.OPENAI_GPT_4_O,
949
+ extract_model=ExtractModels.OPENAI_GPT_4_1,
950
+ chunk_mode=DocumentChunkMode.PAGE,
951
+ ),
880
952
  )
881
953
  """
882
954
  _response = self._client_wrapper.httpx_client.request(
883
- "GET",
884
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/by-job/{job_id}"),
885
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
955
+ "PUT",
956
+ urllib.parse.urljoin(
957
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
958
+ ),
959
+ json=jsonable_encoder({"data_schema": data_schema, "config": config}),
886
960
  headers=self._client_wrapper.get_headers(),
887
961
  timeout=60,
888
962
  )
889
963
  if 200 <= _response.status_code < 300:
890
- return pydantic.parse_obj_as(ExtractRun, _response.json()) # type: ignore
964
+ return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
891
965
  if _response.status_code == 422:
892
966
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
893
967
  try:
@@ -896,35 +970,30 @@ class LlamaExtractClient:
896
970
  raise ApiError(status_code=_response.status_code, body=_response.text)
897
971
  raise ApiError(status_code=_response.status_code, body=_response_json)
898
972
 
899
- def get_run(
900
- self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
901
- ) -> ExtractRun:
973
+ def delete_extraction_agent(self, extraction_agent_id: str) -> typing.Any:
902
974
  """
903
975
  Parameters:
904
- - run_id: str.
905
-
906
- - project_id: typing.Optional[str].
907
-
908
- - organization_id: typing.Optional[str].
976
+ - extraction_agent_id: str.
909
977
  ---
910
978
  from llama_cloud.client import LlamaCloud
911
979
 
912
980
  client = LlamaCloud(
913
981
  token="YOUR_TOKEN",
914
982
  )
915
- client.llama_extract.get_run(
916
- run_id="string",
983
+ client.llama_extract.delete_extraction_agent(
984
+ extraction_agent_id="string",
917
985
  )
918
986
  """
919
987
  _response = self._client_wrapper.httpx_client.request(
920
- "GET",
921
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/{run_id}"),
922
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
988
+ "DELETE",
989
+ urllib.parse.urljoin(
990
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
991
+ ),
923
992
  headers=self._client_wrapper.get_headers(),
924
993
  timeout=60,
925
994
  )
926
995
  if 200 <= _response.status_code < 300:
927
- return pydantic.parse_obj_as(ExtractRun, _response.json()) # type: ignore
996
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
928
997
  if _response.status_code == 422:
929
998
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
930
999
  try:
@@ -933,35 +1002,34 @@ class LlamaExtractClient:
933
1002
  raise ApiError(status_code=_response.status_code, body=_response.text)
934
1003
  raise ApiError(status_code=_response.status_code, body=_response_json)
935
1004
 
936
- def delete_extraction_run(
937
- self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
938
- ) -> typing.Any:
939
- """
940
- Parameters:
941
- - run_id: str.
942
1005
 
943
- - project_id: typing.Optional[str].
1006
+ class AsyncLlamaExtractClient:
1007
+ def __init__(self, *, client_wrapper: AsyncClientWrapper):
1008
+ self._client_wrapper = client_wrapper
944
1009
 
945
- - organization_id: typing.Optional[str].
1010
+ async def list_jobs(self, *, extraction_agent_id: str) -> typing.List[ExtractJob]:
1011
+ """
1012
+ Parameters:
1013
+ - extraction_agent_id: str.
946
1014
  ---
947
- from llama_cloud.client import LlamaCloud
1015
+ from llama_cloud.client import AsyncLlamaCloud
948
1016
 
949
- client = LlamaCloud(
1017
+ client = AsyncLlamaCloud(
950
1018
  token="YOUR_TOKEN",
951
1019
  )
952
- client.llama_extract.delete_extraction_run(
953
- run_id="string",
1020
+ await client.llama_extract.list_jobs(
1021
+ extraction_agent_id="string",
954
1022
  )
955
1023
  """
956
- _response = self._client_wrapper.httpx_client.request(
957
- "DELETE",
958
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/{run_id}"),
959
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1024
+ _response = await self._client_wrapper.httpx_client.request(
1025
+ "GET",
1026
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
1027
+ params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
960
1028
  headers=self._client_wrapper.get_headers(),
961
1029
  timeout=60,
962
1030
  )
963
1031
  if 200 <= _response.status_code < 300:
964
- return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
1032
+ return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
965
1033
  if _response.status_code == 422:
966
1034
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
967
1035
  try:
@@ -970,56 +1038,53 @@ class LlamaExtractClient:
970
1038
  raise ApiError(status_code=_response.status_code, body=_response.text)
971
1039
  raise ApiError(status_code=_response.status_code, body=_response_json)
972
1040
 
973
- def extract_stateless(
1041
+ async def run_job(
974
1042
  self,
975
1043
  *,
976
- project_id: typing.Optional[str] = None,
977
- organization_id: typing.Optional[str] = None,
1044
+ from_ui: typing.Optional[bool] = None,
1045
+ priority: typing.Optional[ExtractJobCreatePriority] = OMIT,
978
1046
  webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]] = OMIT,
979
- data_schema: ExtractStatelessRequestDataSchema,
980
- config: ExtractConfig,
981
- file_id: typing.Optional[str] = OMIT,
982
- text: typing.Optional[str] = OMIT,
983
- file: typing.Optional[FileData] = OMIT,
1047
+ extraction_agent_id: str,
1048
+ file_id: str,
1049
+ data_schema_override: typing.Optional[ExtractJobCreateDataSchemaOverride] = OMIT,
1050
+ config_override: typing.Optional[ExtractConfig] = OMIT,
984
1051
  ) -> ExtractJob:
985
1052
  """
986
- Stateless extraction endpoint that uses a default extraction agent in the user's default project.
987
- Requires data_schema, config, and either file_id, text, or base64 encoded file data.
988
-
989
1053
  Parameters:
990
- - project_id: typing.Optional[str].
1054
+ - from_ui: typing.Optional[bool].
991
1055
 
992
- - organization_id: typing.Optional[str].
1056
+ - priority: typing.Optional[ExtractJobCreatePriority].
993
1057
 
994
1058
  - webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]].
995
1059
 
996
- - data_schema: ExtractStatelessRequestDataSchema. The schema of the data to extract
997
-
998
- - config: ExtractConfig. The configuration parameters for the extraction
1060
+ - extraction_agent_id: str. The id of the extraction agent
999
1061
 
1000
- - file_id: typing.Optional[str].
1062
+ - file_id: str. The id of the file
1001
1063
 
1002
- - text: typing.Optional[str].
1064
+ - data_schema_override: typing.Optional[ExtractJobCreateDataSchemaOverride]. The data schema to override the extraction agent's data schema with
1003
1065
 
1004
- - file: typing.Optional[FileData].
1066
+ - config_override: typing.Optional[ExtractConfig].
1005
1067
  ---
1006
1068
  from llama_cloud import (
1007
1069
  DocumentChunkMode,
1008
1070
  ExtractConfig,
1009
1071
  ExtractConfigPriority,
1072
+ ExtractJobCreatePriority,
1010
1073
  ExtractMode,
1011
1074
  ExtractModels,
1012
1075
  ExtractTarget,
1013
- FileData,
1014
1076
  PublicModelName,
1015
1077
  )
1016
- from llama_cloud.client import LlamaCloud
1078
+ from llama_cloud.client import AsyncLlamaCloud
1017
1079
 
1018
- client = LlamaCloud(
1080
+ client = AsyncLlamaCloud(
1019
1081
  token="YOUR_TOKEN",
1020
1082
  )
1021
- client.llama_extract.extract_stateless(
1022
- config=ExtractConfig(
1083
+ await client.llama_extract.run_job(
1084
+ priority=ExtractJobCreatePriority.LOW,
1085
+ extraction_agent_id="string",
1086
+ file_id="string",
1087
+ config_override=ExtractConfig(
1023
1088
  priority=ExtractConfigPriority.LOW,
1024
1089
  extraction_target=ExtractTarget.PER_DOC,
1025
1090
  extraction_mode=ExtractMode.FAST,
@@ -1027,25 +1092,21 @@ class LlamaExtractClient:
1027
1092
  extract_model=ExtractModels.OPENAI_GPT_4_1,
1028
1093
  chunk_mode=DocumentChunkMode.PAGE,
1029
1094
  ),
1030
- file=FileData(
1031
- data="string",
1032
- mime_type="string",
1033
- ),
1034
1095
  )
1035
1096
  """
1036
- _request: typing.Dict[str, typing.Any] = {"data_schema": data_schema, "config": config}
1097
+ _request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_id": file_id}
1098
+ if priority is not OMIT:
1099
+ _request["priority"] = priority
1037
1100
  if webhook_configurations is not OMIT:
1038
1101
  _request["webhook_configurations"] = webhook_configurations
1039
- if file_id is not OMIT:
1040
- _request["file_id"] = file_id
1041
- if text is not OMIT:
1042
- _request["text"] = text
1043
- if file is not OMIT:
1044
- _request["file"] = file
1045
- _response = self._client_wrapper.httpx_client.request(
1102
+ if data_schema_override is not OMIT:
1103
+ _request["data_schema_override"] = data_schema_override
1104
+ if config_override is not OMIT:
1105
+ _request["config_override"] = config_override
1106
+ _response = await self._client_wrapper.httpx_client.request(
1046
1107
  "POST",
1047
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/run"),
1048
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1108
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
1109
+ params=remove_none_from_dict({"from_ui": from_ui}),
1049
1110
  json=jsonable_encoder(_request),
1050
1111
  headers=self._client_wrapper.get_headers(),
1051
1112
  timeout=60,
@@ -1060,44 +1121,74 @@ class LlamaExtractClient:
1060
1121
  raise ApiError(status_code=_response.status_code, body=_response.text)
1061
1122
  raise ApiError(status_code=_response.status_code, body=_response_json)
1062
1123
 
1063
-
1064
- class AsyncLlamaExtractClient:
1065
- def __init__(self, *, client_wrapper: AsyncClientWrapper):
1066
- self._client_wrapper = client_wrapper
1067
-
1068
- async def list_extraction_agents(
1069
- self,
1070
- *,
1071
- include_default: typing.Optional[bool] = None,
1072
- project_id: typing.Optional[str] = None,
1073
- organization_id: typing.Optional[str] = None,
1074
- ) -> typing.List[ExtractAgent]:
1124
+ async def get_job(self, job_id: str) -> ExtractJob:
1075
1125
  """
1076
1126
  Parameters:
1077
- - include_default: typing.Optional[bool]. Whether to include default agents in the results
1078
-
1079
- - project_id: typing.Optional[str].
1080
-
1081
- - organization_id: typing.Optional[str].
1127
+ - job_id: str.
1082
1128
  ---
1083
1129
  from llama_cloud.client import AsyncLlamaCloud
1084
1130
 
1085
1131
  client = AsyncLlamaCloud(
1086
1132
  token="YOUR_TOKEN",
1087
1133
  )
1088
- await client.llama_extract.list_extraction_agents()
1134
+ await client.llama_extract.get_job(
1135
+ job_id="string",
1136
+ )
1089
1137
  """
1090
1138
  _response = await self._client_wrapper.httpx_client.request(
1091
1139
  "GET",
1092
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
1093
- params=remove_none_from_dict(
1094
- {"include_default": include_default, "project_id": project_id, "organization_id": organization_id}
1140
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}"),
1141
+ headers=self._client_wrapper.get_headers(),
1142
+ timeout=60,
1143
+ )
1144
+ if 200 <= _response.status_code < 300:
1145
+ return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
1146
+ if _response.status_code == 422:
1147
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1148
+ try:
1149
+ _response_json = _response.json()
1150
+ except JSONDecodeError:
1151
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1152
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1153
+
1154
+ async def run_job_on_file(
1155
+ self,
1156
+ *,
1157
+ from_ui: typing.Optional[bool] = None,
1158
+ extraction_agent_id: str,
1159
+ file: typing.IO,
1160
+ data_schema_override: typing.Optional[str] = None,
1161
+ config_override: typing.Optional[str] = None,
1162
+ ) -> ExtractJob:
1163
+ """
1164
+ Parameters:
1165
+ - from_ui: typing.Optional[bool].
1166
+
1167
+ - extraction_agent_id: str.
1168
+
1169
+ - file: typing.IO.
1170
+
1171
+ - data_schema_override: typing.Optional[str].
1172
+
1173
+ - config_override: typing.Optional[str].
1174
+ """
1175
+ _response = await self._client_wrapper.httpx_client.request(
1176
+ "POST",
1177
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
1178
+ params=remove_none_from_dict({"from_ui": from_ui}),
1179
+ data=jsonable_encoder(
1180
+ {
1181
+ "extraction_agent_id": extraction_agent_id,
1182
+ "data_schema_override": data_schema_override,
1183
+ "config_override": config_override,
1184
+ }
1095
1185
  ),
1186
+ files={"file": file},
1096
1187
  headers=self._client_wrapper.get_headers(),
1097
1188
  timeout=60,
1098
1189
  )
1099
1190
  if 200 <= _response.status_code < 300:
1100
- return pydantic.parse_obj_as(typing.List[ExtractAgent], _response.json()) # type: ignore
1191
+ return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
1101
1192
  if _response.status_code == 422:
1102
1193
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1103
1194
  try:
@@ -1106,26 +1197,26 @@ class AsyncLlamaExtractClient:
1106
1197
  raise ApiError(status_code=_response.status_code, body=_response.text)
1107
1198
  raise ApiError(status_code=_response.status_code, body=_response_json)
1108
1199
 
1109
- async def create_extraction_agent(
1200
+ async def run_batch_jobs(
1110
1201
  self,
1111
1202
  *,
1112
- project_id: typing.Optional[str] = None,
1113
- organization_id: typing.Optional[str] = None,
1114
- name: str,
1115
- data_schema: ExtractAgentCreateDataSchema,
1116
- config: ExtractConfig,
1117
- ) -> ExtractAgent:
1203
+ from_ui: typing.Optional[bool] = None,
1204
+ extraction_agent_id: str,
1205
+ file_ids: typing.List[str],
1206
+ data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
1207
+ config_override: typing.Optional[ExtractConfig] = OMIT,
1208
+ ) -> typing.List[ExtractJob]:
1118
1209
  """
1119
1210
  Parameters:
1120
- - project_id: typing.Optional[str].
1211
+ - from_ui: typing.Optional[bool].
1121
1212
 
1122
- - organization_id: typing.Optional[str].
1213
+ - extraction_agent_id: str. The id of the extraction agent
1123
1214
 
1124
- - name: str. The name of the extraction schema
1215
+ - file_ids: typing.List[str]. The ids of the files
1125
1216
 
1126
- - data_schema: ExtractAgentCreateDataSchema. The schema of the data.
1217
+ - data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
1127
1218
 
1128
- - config: ExtractConfig. The configuration parameters for the extraction agent.
1219
+ - config_override: typing.Optional[ExtractConfig].
1129
1220
  ---
1130
1221
  from llama_cloud import (
1131
1222
  DocumentChunkMode,
@@ -1141,9 +1232,10 @@ class AsyncLlamaExtractClient:
1141
1232
  client = AsyncLlamaCloud(
1142
1233
  token="YOUR_TOKEN",
1143
1234
  )
1144
- await client.llama_extract.create_extraction_agent(
1145
- name="string",
1146
- config=ExtractConfig(
1235
+ await client.llama_extract.run_batch_jobs(
1236
+ extraction_agent_id="string",
1237
+ file_ids=[],
1238
+ config_override=ExtractConfig(
1147
1239
  priority=ExtractConfigPriority.LOW,
1148
1240
  extraction_target=ExtractTarget.PER_DOC,
1149
1241
  extraction_mode=ExtractMode.FAST,
@@ -1153,16 +1245,21 @@ class AsyncLlamaExtractClient:
1153
1245
  ),
1154
1246
  )
1155
1247
  """
1248
+ _request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
1249
+ if data_schema_override is not OMIT:
1250
+ _request["data_schema_override"] = data_schema_override
1251
+ if config_override is not OMIT:
1252
+ _request["config_override"] = config_override
1156
1253
  _response = await self._client_wrapper.httpx_client.request(
1157
1254
  "POST",
1158
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
1159
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1160
- json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
1255
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
1256
+ params=remove_none_from_dict({"from_ui": from_ui}),
1257
+ json=jsonable_encoder(_request),
1161
1258
  headers=self._client_wrapper.get_headers(),
1162
1259
  timeout=60,
1163
1260
  )
1164
1261
  if 200 <= _response.status_code < 300:
1165
- return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
1262
+ return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
1166
1263
  if _response.status_code == 422:
1167
1264
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1168
1265
  try:
@@ -1171,34 +1268,35 @@ class AsyncLlamaExtractClient:
1171
1268
  raise ApiError(status_code=_response.status_code, body=_response.text)
1172
1269
  raise ApiError(status_code=_response.status_code, body=_response_json)
1173
1270
 
1174
- async def validate_extraction_schema(
1175
- self, *, data_schema: ExtractSchemaValidateRequestDataSchema
1176
- ) -> ExtractSchemaValidateResponse:
1271
+ async def get_job_result(
1272
+ self, job_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1273
+ ) -> ExtractResultset:
1177
1274
  """
1178
- Validates an extraction agent's schema definition.
1179
- Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
1180
-
1181
1275
  Parameters:
1182
- - data_schema: ExtractSchemaValidateRequestDataSchema.
1276
+ - job_id: str.
1277
+
1278
+ - project_id: typing.Optional[str].
1279
+
1280
+ - organization_id: typing.Optional[str].
1183
1281
  ---
1184
1282
  from llama_cloud.client import AsyncLlamaCloud
1185
1283
 
1186
1284
  client = AsyncLlamaCloud(
1187
1285
  token="YOUR_TOKEN",
1188
1286
  )
1189
- await client.llama_extract.validate_extraction_schema()
1287
+ await client.llama_extract.get_job_result(
1288
+ job_id="string",
1289
+ )
1190
1290
  """
1191
1291
  _response = await self._client_wrapper.httpx_client.request(
1192
- "POST",
1193
- urllib.parse.urljoin(
1194
- f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
1195
- ),
1196
- json=jsonable_encoder({"data_schema": data_schema}),
1292
+ "GET",
1293
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}/result"),
1294
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1197
1295
  headers=self._client_wrapper.get_headers(),
1198
1296
  timeout=60,
1199
1297
  )
1200
1298
  if 200 <= _response.status_code < 300:
1201
- return pydantic.parse_obj_as(ExtractSchemaValidateResponse, _response.json()) # type: ignore
1299
+ return pydantic.parse_obj_as(ExtractResultset, _response.json()) # type: ignore
1202
1300
  if _response.status_code == 422:
1203
1301
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1204
1302
  try:
@@ -1207,50 +1305,35 @@ class AsyncLlamaExtractClient:
1207
1305
  raise ApiError(status_code=_response.status_code, body=_response.text)
1208
1306
  raise ApiError(status_code=_response.status_code, body=_response_json)
1209
1307
 
1210
- async def generate_extraction_schema(
1211
- self,
1212
- *,
1213
- project_id: typing.Optional[str] = None,
1214
- organization_id: typing.Optional[str] = None,
1215
- prompt: typing.Optional[str] = OMIT,
1216
- file_id: typing.Optional[str] = OMIT,
1217
- ) -> ExtractSchemaGenerateResponse:
1308
+ async def list_extract_runs(
1309
+ self, *, extraction_agent_id: str, skip: typing.Optional[int] = None, limit: typing.Optional[int] = None
1310
+ ) -> PaginatedExtractRunsResponse:
1218
1311
  """
1219
- Generates an extraction agent's schema definition from a file and/or natural language prompt.
1220
-
1221
1312
  Parameters:
1222
- - project_id: typing.Optional[str].
1223
-
1224
- - organization_id: typing.Optional[str].
1313
+ - extraction_agent_id: str.
1225
1314
 
1226
- - prompt: typing.Optional[str].
1315
+ - skip: typing.Optional[int].
1227
1316
 
1228
- - file_id: typing.Optional[str].
1317
+ - limit: typing.Optional[int].
1229
1318
  ---
1230
1319
  from llama_cloud.client import AsyncLlamaCloud
1231
1320
 
1232
1321
  client = AsyncLlamaCloud(
1233
1322
  token="YOUR_TOKEN",
1234
1323
  )
1235
- await client.llama_extract.generate_extraction_schema()
1324
+ await client.llama_extract.list_extract_runs(
1325
+ extraction_agent_id="string",
1326
+ )
1236
1327
  """
1237
- _request: typing.Dict[str, typing.Any] = {}
1238
- if prompt is not OMIT:
1239
- _request["prompt"] = prompt
1240
- if file_id is not OMIT:
1241
- _request["file_id"] = file_id
1242
1328
  _response = await self._client_wrapper.httpx_client.request(
1243
- "POST",
1244
- urllib.parse.urljoin(
1245
- f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/generate"
1246
- ),
1247
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1248
- json=jsonable_encoder(_request),
1329
+ "GET",
1330
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/runs"),
1331
+ params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id, "skip": skip, "limit": limit}),
1249
1332
  headers=self._client_wrapper.get_headers(),
1250
1333
  timeout=60,
1251
1334
  )
1252
1335
  if 200 <= _response.status_code < 300:
1253
- return pydantic.parse_obj_as(ExtractSchemaGenerateResponse, _response.json()) # type: ignore
1336
+ return pydantic.parse_obj_as(PaginatedExtractRunsResponse, _response.json()) # type: ignore
1254
1337
  if _response.status_code == 422:
1255
1338
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1256
1339
  try:
@@ -1259,37 +1342,29 @@ class AsyncLlamaExtractClient:
1259
1342
  raise ApiError(status_code=_response.status_code, body=_response.text)
1260
1343
  raise ApiError(status_code=_response.status_code, body=_response_json)
1261
1344
 
1262
- async def get_extraction_agent_by_name(
1263
- self, name: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1264
- ) -> ExtractAgent:
1345
+ async def get_latest_run_from_ui(self, *, extraction_agent_id: str) -> typing.Optional[ExtractRun]:
1265
1346
  """
1266
1347
  Parameters:
1267
- - name: str.
1268
-
1269
- - project_id: typing.Optional[str].
1270
-
1271
- - organization_id: typing.Optional[str].
1348
+ - extraction_agent_id: str.
1272
1349
  ---
1273
1350
  from llama_cloud.client import AsyncLlamaCloud
1274
1351
 
1275
1352
  client = AsyncLlamaCloud(
1276
1353
  token="YOUR_TOKEN",
1277
1354
  )
1278
- await client.llama_extract.get_extraction_agent_by_name(
1279
- name="string",
1355
+ await client.llama_extract.get_latest_run_from_ui(
1356
+ extraction_agent_id="string",
1280
1357
  )
1281
1358
  """
1282
1359
  _response = await self._client_wrapper.httpx_client.request(
1283
1360
  "GET",
1284
- urllib.parse.urljoin(
1285
- f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/by-name/{name}"
1286
- ),
1287
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1361
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/runs/latest-from-ui"),
1362
+ params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
1288
1363
  headers=self._client_wrapper.get_headers(),
1289
1364
  timeout=60,
1290
1365
  )
1291
1366
  if 200 <= _response.status_code < 300:
1292
- return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
1367
+ return pydantic.parse_obj_as(typing.Optional[ExtractRun], _response.json()) # type: ignore
1293
1368
  if _response.status_code == 422:
1294
1369
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1295
1370
  try:
@@ -1298,14 +1373,13 @@ class AsyncLlamaExtractClient:
1298
1373
  raise ApiError(status_code=_response.status_code, body=_response.text)
1299
1374
  raise ApiError(status_code=_response.status_code, body=_response_json)
1300
1375
 
1301
- async def get_or_create_default_extraction_agent(
1302
- self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1303
- ) -> ExtractAgent:
1376
+ async def get_run_by_job_id(
1377
+ self, job_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1378
+ ) -> ExtractRun:
1304
1379
  """
1305
- Get or create a default extraction agent for the current project.
1306
- The default agent has an empty schema and default configuration.
1307
-
1308
1380
  Parameters:
1381
+ - job_id: str.
1382
+
1309
1383
  - project_id: typing.Optional[str].
1310
1384
 
1311
1385
  - organization_id: typing.Optional[str].
@@ -1315,19 +1389,19 @@ class AsyncLlamaExtractClient:
1315
1389
  client = AsyncLlamaCloud(
1316
1390
  token="YOUR_TOKEN",
1317
1391
  )
1318
- await client.llama_extract.get_or_create_default_extraction_agent()
1392
+ await client.llama_extract.get_run_by_job_id(
1393
+ job_id="string",
1394
+ )
1319
1395
  """
1320
1396
  _response = await self._client_wrapper.httpx_client.request(
1321
1397
  "GET",
1322
- urllib.parse.urljoin(
1323
- f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/default"
1324
- ),
1398
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/by-job/{job_id}"),
1325
1399
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1326
1400
  headers=self._client_wrapper.get_headers(),
1327
1401
  timeout=60,
1328
1402
  )
1329
1403
  if 200 <= _response.status_code < 300:
1330
- return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
1404
+ return pydantic.parse_obj_as(ExtractRun, _response.json()) # type: ignore
1331
1405
  if _response.status_code == 422:
1332
1406
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1333
1407
  try:
@@ -1336,30 +1410,35 @@ class AsyncLlamaExtractClient:
1336
1410
  raise ApiError(status_code=_response.status_code, body=_response.text)
1337
1411
  raise ApiError(status_code=_response.status_code, body=_response_json)
1338
1412
 
1339
- async def get_extraction_agent(self, extraction_agent_id: str) -> ExtractAgent:
1413
+ async def get_run(
1414
+ self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1415
+ ) -> ExtractRun:
1340
1416
  """
1341
1417
  Parameters:
1342
- - extraction_agent_id: str.
1418
+ - run_id: str.
1419
+
1420
+ - project_id: typing.Optional[str].
1421
+
1422
+ - organization_id: typing.Optional[str].
1343
1423
  ---
1344
1424
  from llama_cloud.client import AsyncLlamaCloud
1345
1425
 
1346
1426
  client = AsyncLlamaCloud(
1347
1427
  token="YOUR_TOKEN",
1348
1428
  )
1349
- await client.llama_extract.get_extraction_agent(
1350
- extraction_agent_id="string",
1429
+ await client.llama_extract.get_run(
1430
+ run_id="string",
1351
1431
  )
1352
1432
  """
1353
1433
  _response = await self._client_wrapper.httpx_client.request(
1354
1434
  "GET",
1355
- urllib.parse.urljoin(
1356
- f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
1357
- ),
1435
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/{run_id}"),
1436
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1358
1437
  headers=self._client_wrapper.get_headers(),
1359
1438
  timeout=60,
1360
1439
  )
1361
1440
  if 200 <= _response.status_code < 300:
1362
- return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
1441
+ return pydantic.parse_obj_as(ExtractRun, _response.json()) # type: ignore
1363
1442
  if _response.status_code == 422:
1364
1443
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1365
1444
  try:
@@ -1368,81 +1447,30 @@ class AsyncLlamaExtractClient:
1368
1447
  raise ApiError(status_code=_response.status_code, body=_response.text)
1369
1448
  raise ApiError(status_code=_response.status_code, body=_response_json)
1370
1449
 
1371
- async def update_extraction_agent(
1372
- self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
1373
- ) -> ExtractAgent:
1450
+ async def delete_extraction_run(
1451
+ self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1452
+ ) -> typing.Any:
1374
1453
  """
1375
1454
  Parameters:
1376
- - extraction_agent_id: str.
1377
-
1378
- - data_schema: ExtractAgentUpdateDataSchema. The schema of the data
1379
-
1380
- - config: ExtractConfig. The configuration parameters for the extraction agent.
1381
- ---
1382
- from llama_cloud import (
1383
- DocumentChunkMode,
1384
- ExtractConfig,
1385
- ExtractConfigPriority,
1386
- ExtractMode,
1387
- ExtractModels,
1388
- ExtractTarget,
1389
- PublicModelName,
1390
- )
1391
- from llama_cloud.client import AsyncLlamaCloud
1455
+ - run_id: str.
1392
1456
 
1393
- client = AsyncLlamaCloud(
1394
- token="YOUR_TOKEN",
1395
- )
1396
- await client.llama_extract.update_extraction_agent(
1397
- extraction_agent_id="string",
1398
- config=ExtractConfig(
1399
- priority=ExtractConfigPriority.LOW,
1400
- extraction_target=ExtractTarget.PER_DOC,
1401
- extraction_mode=ExtractMode.FAST,
1402
- parse_model=PublicModelName.OPENAI_GPT_4_O,
1403
- extract_model=ExtractModels.OPENAI_GPT_4_1,
1404
- chunk_mode=DocumentChunkMode.PAGE,
1405
- ),
1406
- )
1407
- """
1408
- _response = await self._client_wrapper.httpx_client.request(
1409
- "PUT",
1410
- urllib.parse.urljoin(
1411
- f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
1412
- ),
1413
- json=jsonable_encoder({"data_schema": data_schema, "config": config}),
1414
- headers=self._client_wrapper.get_headers(),
1415
- timeout=60,
1416
- )
1417
- if 200 <= _response.status_code < 300:
1418
- return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
1419
- if _response.status_code == 422:
1420
- raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1421
- try:
1422
- _response_json = _response.json()
1423
- except JSONDecodeError:
1424
- raise ApiError(status_code=_response.status_code, body=_response.text)
1425
- raise ApiError(status_code=_response.status_code, body=_response_json)
1457
+ - project_id: typing.Optional[str].
1426
1458
 
1427
- async def delete_extraction_agent(self, extraction_agent_id: str) -> typing.Any:
1428
- """
1429
- Parameters:
1430
- - extraction_agent_id: str.
1459
+ - organization_id: typing.Optional[str].
1431
1460
  ---
1432
1461
  from llama_cloud.client import AsyncLlamaCloud
1433
1462
 
1434
1463
  client = AsyncLlamaCloud(
1435
1464
  token="YOUR_TOKEN",
1436
1465
  )
1437
- await client.llama_extract.delete_extraction_agent(
1438
- extraction_agent_id="string",
1466
+ await client.llama_extract.delete_extraction_run(
1467
+ run_id="string",
1439
1468
  )
1440
1469
  """
1441
1470
  _response = await self._client_wrapper.httpx_client.request(
1442
1471
  "DELETE",
1443
- urllib.parse.urljoin(
1444
- f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
1445
- ),
1472
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/{run_id}"),
1473
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1446
1474
  headers=self._client_wrapper.get_headers(),
1447
1475
  timeout=60,
1448
1476
  )
@@ -1456,155 +1484,47 @@ class AsyncLlamaExtractClient:
1456
1484
  raise ApiError(status_code=_response.status_code, body=_response.text)
1457
1485
  raise ApiError(status_code=_response.status_code, body=_response_json)
1458
1486
 
1459
- async def list_jobs(self, *, extraction_agent_id: str) -> typing.List[ExtractJob]:
1460
- """
1461
- Parameters:
1462
- - extraction_agent_id: str.
1463
- ---
1464
- from llama_cloud.client import AsyncLlamaCloud
1465
-
1466
- client = AsyncLlamaCloud(
1467
- token="YOUR_TOKEN",
1468
- )
1469
- await client.llama_extract.list_jobs(
1470
- extraction_agent_id="string",
1471
- )
1487
+ async def extract_stateless(
1488
+ self,
1489
+ *,
1490
+ project_id: typing.Optional[str] = None,
1491
+ organization_id: typing.Optional[str] = None,
1492
+ webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]] = OMIT,
1493
+ data_schema: ExtractStatelessRequestDataSchema,
1494
+ config: ExtractConfig,
1495
+ file_id: typing.Optional[str] = OMIT,
1496
+ text: typing.Optional[str] = OMIT,
1497
+ file: typing.Optional[FileData] = OMIT,
1498
+ ) -> ExtractJob:
1472
1499
  """
1473
- _response = await self._client_wrapper.httpx_client.request(
1474
- "GET",
1475
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
1476
- params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
1477
- headers=self._client_wrapper.get_headers(),
1478
- timeout=60,
1479
- )
1480
- if 200 <= _response.status_code < 300:
1481
- return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
1482
- if _response.status_code == 422:
1483
- raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1484
- try:
1485
- _response_json = _response.json()
1486
- except JSONDecodeError:
1487
- raise ApiError(status_code=_response.status_code, body=_response.text)
1488
- raise ApiError(status_code=_response.status_code, body=_response_json)
1500
+ Stateless extraction endpoint that uses a default extraction agent in the user's default project.
1501
+ Requires data_schema, config, and either file_id, text, or base64 encoded file data.
1489
1502
 
1490
- async def run_job(self, *, from_ui: typing.Optional[bool] = None, request: ExtractJobCreate) -> ExtractJob:
1491
- """
1492
1503
  Parameters:
1493
- - from_ui: typing.Optional[bool].
1504
+ - project_id: typing.Optional[str].
1494
1505
 
1495
- - request: ExtractJobCreate.
1496
- ---
1497
- from llama_cloud import (
1498
- DocumentChunkMode,
1499
- ExtractConfig,
1500
- ExtractConfigPriority,
1501
- ExtractJobCreate,
1502
- ExtractJobCreatePriority,
1503
- ExtractMode,
1504
- ExtractModels,
1505
- ExtractTarget,
1506
- PublicModelName,
1507
- )
1508
- from llama_cloud.client import AsyncLlamaCloud
1506
+ - organization_id: typing.Optional[str].
1509
1507
 
1510
- client = AsyncLlamaCloud(
1511
- token="YOUR_TOKEN",
1512
- )
1513
- await client.llama_extract.run_job(
1514
- request=ExtractJobCreate(
1515
- priority=ExtractJobCreatePriority.LOW,
1516
- extraction_agent_id="string",
1517
- file_id="string",
1518
- config_override=ExtractConfig(
1519
- priority=ExtractConfigPriority.LOW,
1520
- extraction_target=ExtractTarget.PER_DOC,
1521
- extraction_mode=ExtractMode.FAST,
1522
- parse_model=PublicModelName.OPENAI_GPT_4_O,
1523
- extract_model=ExtractModels.OPENAI_GPT_4_1,
1524
- chunk_mode=DocumentChunkMode.PAGE,
1525
- ),
1526
- ),
1527
- )
1528
- """
1529
- _response = await self._client_wrapper.httpx_client.request(
1530
- "POST",
1531
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs"),
1532
- params=remove_none_from_dict({"from_ui": from_ui}),
1533
- json=jsonable_encoder(request),
1534
- headers=self._client_wrapper.get_headers(),
1535
- timeout=60,
1536
- )
1537
- if 200 <= _response.status_code < 300:
1538
- return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
1539
- if _response.status_code == 422:
1540
- raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1541
- try:
1542
- _response_json = _response.json()
1543
- except JSONDecodeError:
1544
- raise ApiError(status_code=_response.status_code, body=_response.text)
1545
- raise ApiError(status_code=_response.status_code, body=_response_json)
1508
+ - webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]].
1546
1509
 
1547
- async def get_job(self, job_id: str) -> ExtractJob:
1548
- """
1549
- Parameters:
1550
- - job_id: str.
1551
- ---
1552
- from llama_cloud.client import AsyncLlamaCloud
1510
+ - data_schema: ExtractStatelessRequestDataSchema. The schema of the data to extract
1553
1511
 
1554
- client = AsyncLlamaCloud(
1555
- token="YOUR_TOKEN",
1556
- )
1557
- await client.llama_extract.get_job(
1558
- job_id="string",
1559
- )
1560
- """
1561
- _response = await self._client_wrapper.httpx_client.request(
1562
- "GET",
1563
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}"),
1564
- headers=self._client_wrapper.get_headers(),
1565
- timeout=60,
1566
- )
1567
- if 200 <= _response.status_code < 300:
1568
- return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
1569
- if _response.status_code == 422:
1570
- raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1571
- try:
1572
- _response_json = _response.json()
1573
- except JSONDecodeError:
1574
- raise ApiError(status_code=_response.status_code, body=_response.text)
1575
- raise ApiError(status_code=_response.status_code, body=_response_json)
1512
+ - config: ExtractConfig. The configuration parameters for the extraction
1576
1513
 
1577
- async def run_job_test_user(
1578
- self,
1579
- *,
1580
- from_ui: typing.Optional[bool] = None,
1581
- job_create: ExtractJobCreate,
1582
- extract_settings: typing.Optional[LlamaExtractSettings] = OMIT,
1583
- ) -> ExtractJob:
1584
- """
1585
- Parameters:
1586
- - from_ui: typing.Optional[bool].
1514
+ - file_id: typing.Optional[str].
1587
1515
 
1588
- - job_create: ExtractJobCreate.
1516
+ - text: typing.Optional[str].
1589
1517
 
1590
- - extract_settings: typing.Optional[LlamaExtractSettings].
1518
+ - file: typing.Optional[FileData].
1591
1519
  ---
1592
1520
  from llama_cloud import (
1593
- ChunkMode,
1594
1521
  DocumentChunkMode,
1595
1522
  ExtractConfig,
1596
1523
  ExtractConfigPriority,
1597
- ExtractJobCreate,
1598
- ExtractJobCreatePriority,
1599
1524
  ExtractMode,
1600
1525
  ExtractModels,
1601
1526
  ExtractTarget,
1602
- FailPageMode,
1603
- LlamaExtractSettings,
1604
- LlamaParseParameters,
1605
- LlamaParseParametersPriority,
1606
- MultimodalParseResolution,
1607
- ParsingMode,
1527
+ FileData,
1608
1528
  PublicModelName,
1609
1529
  )
1610
1530
  from llama_cloud.client import AsyncLlamaCloud
@@ -1612,38 +1532,34 @@ class AsyncLlamaExtractClient:
1612
1532
  client = AsyncLlamaCloud(
1613
1533
  token="YOUR_TOKEN",
1614
1534
  )
1615
- await client.llama_extract.run_job_test_user(
1616
- job_create=ExtractJobCreate(
1617
- priority=ExtractJobCreatePriority.LOW,
1618
- extraction_agent_id="string",
1619
- file_id="string",
1620
- config_override=ExtractConfig(
1621
- priority=ExtractConfigPriority.LOW,
1622
- extraction_target=ExtractTarget.PER_DOC,
1623
- extraction_mode=ExtractMode.FAST,
1624
- parse_model=PublicModelName.OPENAI_GPT_4_O,
1625
- extract_model=ExtractModels.OPENAI_GPT_4_1,
1626
- chunk_mode=DocumentChunkMode.PAGE,
1627
- ),
1535
+ await client.llama_extract.extract_stateless(
1536
+ config=ExtractConfig(
1537
+ priority=ExtractConfigPriority.LOW,
1538
+ extraction_target=ExtractTarget.PER_DOC,
1539
+ extraction_mode=ExtractMode.FAST,
1540
+ parse_model=PublicModelName.OPENAI_GPT_4_O,
1541
+ extract_model=ExtractModels.OPENAI_GPT_4_1,
1542
+ chunk_mode=DocumentChunkMode.PAGE,
1628
1543
  ),
1629
- extract_settings=LlamaExtractSettings(
1630
- chunk_mode=ChunkMode.PAGE,
1631
- llama_parse_params=LlamaParseParameters(
1632
- priority=LlamaParseParametersPriority.LOW,
1633
- parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
1634
- replace_failed_page_mode=FailPageMode.RAW_TEXT,
1635
- ),
1636
- multimodal_parse_resolution=MultimodalParseResolution.MEDIUM,
1544
+ file=FileData(
1545
+ data="string",
1546
+ mime_type="string",
1637
1547
  ),
1638
1548
  )
1639
1549
  """
1640
- _request: typing.Dict[str, typing.Any] = {"job_create": job_create}
1641
- if extract_settings is not OMIT:
1642
- _request["extract_settings"] = extract_settings
1550
+ _request: typing.Dict[str, typing.Any] = {"data_schema": data_schema, "config": config}
1551
+ if webhook_configurations is not OMIT:
1552
+ _request["webhook_configurations"] = webhook_configurations
1553
+ if file_id is not OMIT:
1554
+ _request["file_id"] = file_id
1555
+ if text is not OMIT:
1556
+ _request["text"] = text
1557
+ if file is not OMIT:
1558
+ _request["file"] = file
1643
1559
  _response = await self._client_wrapper.httpx_client.request(
1644
1560
  "POST",
1645
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/test"),
1646
- params=remove_none_from_dict({"from_ui": from_ui}),
1561
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/run"),
1562
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1647
1563
  json=jsonable_encoder(_request),
1648
1564
  headers=self._client_wrapper.get_headers(),
1649
1565
  timeout=60,
@@ -1658,115 +1574,39 @@ class AsyncLlamaExtractClient:
1658
1574
  raise ApiError(status_code=_response.status_code, body=_response.text)
1659
1575
  raise ApiError(status_code=_response.status_code, body=_response_json)
1660
1576
 
1661
- async def run_job_on_file(
1662
- self,
1663
- *,
1664
- from_ui: typing.Optional[bool] = None,
1665
- extraction_agent_id: str,
1666
- file: typing.IO,
1667
- data_schema_override: typing.Optional[str] = None,
1668
- config_override: typing.Optional[str] = None,
1669
- ) -> ExtractJob:
1670
- """
1671
- Parameters:
1672
- - from_ui: typing.Optional[bool].
1673
-
1674
- - extraction_agent_id: str.
1675
-
1676
- - file: typing.IO.
1677
-
1678
- - data_schema_override: typing.Optional[str].
1679
-
1680
- - config_override: typing.Optional[str].
1681
- """
1682
- _response = await self._client_wrapper.httpx_client.request(
1683
- "POST",
1684
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
1685
- params=remove_none_from_dict({"from_ui": from_ui}),
1686
- data=jsonable_encoder(
1687
- {
1688
- "extraction_agent_id": extraction_agent_id,
1689
- "data_schema_override": data_schema_override,
1690
- "config_override": config_override,
1691
- }
1692
- ),
1693
- files={"file": file},
1694
- headers=self._client_wrapper.get_headers(),
1695
- timeout=60,
1696
- )
1697
- if 200 <= _response.status_code < 300:
1698
- return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
1699
- if _response.status_code == 422:
1700
- raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1701
- try:
1702
- _response_json = _response.json()
1703
- except JSONDecodeError:
1704
- raise ApiError(status_code=_response.status_code, body=_response.text)
1705
- raise ApiError(status_code=_response.status_code, body=_response_json)
1706
-
1707
- async def run_batch_jobs(
1577
+ async def list_extraction_agents(
1708
1578
  self,
1709
1579
  *,
1710
- from_ui: typing.Optional[bool] = None,
1711
- extraction_agent_id: str,
1712
- file_ids: typing.List[str],
1713
- data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
1714
- config_override: typing.Optional[ExtractConfig] = OMIT,
1715
- ) -> typing.List[ExtractJob]:
1580
+ include_default: typing.Optional[bool] = None,
1581
+ project_id: typing.Optional[str] = None,
1582
+ organization_id: typing.Optional[str] = None,
1583
+ ) -> typing.List[ExtractAgent]:
1716
1584
  """
1717
1585
  Parameters:
1718
- - from_ui: typing.Optional[bool].
1719
-
1720
- - extraction_agent_id: str. The id of the extraction agent
1721
-
1722
- - file_ids: typing.List[str]. The ids of the files
1586
+ - include_default: typing.Optional[bool]. Whether to include default agents in the results
1723
1587
 
1724
- - data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
1588
+ - project_id: typing.Optional[str].
1725
1589
 
1726
- - config_override: typing.Optional[ExtractConfig].
1590
+ - organization_id: typing.Optional[str].
1727
1591
  ---
1728
- from llama_cloud import (
1729
- DocumentChunkMode,
1730
- ExtractConfig,
1731
- ExtractConfigPriority,
1732
- ExtractMode,
1733
- ExtractModels,
1734
- ExtractTarget,
1735
- PublicModelName,
1736
- )
1737
1592
  from llama_cloud.client import AsyncLlamaCloud
1738
1593
 
1739
1594
  client = AsyncLlamaCloud(
1740
1595
  token="YOUR_TOKEN",
1741
1596
  )
1742
- await client.llama_extract.run_batch_jobs(
1743
- extraction_agent_id="string",
1744
- file_ids=[],
1745
- config_override=ExtractConfig(
1746
- priority=ExtractConfigPriority.LOW,
1747
- extraction_target=ExtractTarget.PER_DOC,
1748
- extraction_mode=ExtractMode.FAST,
1749
- parse_model=PublicModelName.OPENAI_GPT_4_O,
1750
- extract_model=ExtractModels.OPENAI_GPT_4_1,
1751
- chunk_mode=DocumentChunkMode.PAGE,
1752
- ),
1753
- )
1597
+ await client.llama_extract.list_extraction_agents()
1754
1598
  """
1755
- _request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
1756
- if data_schema_override is not OMIT:
1757
- _request["data_schema_override"] = data_schema_override
1758
- if config_override is not OMIT:
1759
- _request["config_override"] = config_override
1760
1599
  _response = await self._client_wrapper.httpx_client.request(
1761
- "POST",
1762
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
1763
- params=remove_none_from_dict({"from_ui": from_ui}),
1764
- json=jsonable_encoder(_request),
1600
+ "GET",
1601
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
1602
+ params=remove_none_from_dict(
1603
+ {"include_default": include_default, "project_id": project_id, "organization_id": organization_id}
1604
+ ),
1765
1605
  headers=self._client_wrapper.get_headers(),
1766
1606
  timeout=60,
1767
1607
  )
1768
1608
  if 200 <= _response.status_code < 300:
1769
- return pydantic.parse_obj_as(typing.List[ExtractJob], _response.json()) # type: ignore
1609
+ return pydantic.parse_obj_as(typing.List[ExtractAgent], _response.json()) # type: ignore
1770
1610
  if _response.status_code == 422:
1771
1611
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1772
1612
  try:
@@ -1775,35 +1615,63 @@ class AsyncLlamaExtractClient:
1775
1615
  raise ApiError(status_code=_response.status_code, body=_response.text)
1776
1616
  raise ApiError(status_code=_response.status_code, body=_response_json)
1777
1617
 
1778
- async def get_job_result(
1779
- self, job_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1780
- ) -> ExtractResultset:
1618
+ async def create_extraction_agent(
1619
+ self,
1620
+ *,
1621
+ project_id: typing.Optional[str] = None,
1622
+ organization_id: typing.Optional[str] = None,
1623
+ name: str,
1624
+ data_schema: ExtractAgentCreateDataSchema,
1625
+ config: ExtractConfig,
1626
+ ) -> ExtractAgent:
1781
1627
  """
1782
1628
  Parameters:
1783
- - job_id: str.
1784
-
1785
1629
  - project_id: typing.Optional[str].
1786
1630
 
1787
1631
  - organization_id: typing.Optional[str].
1632
+
1633
+ - name: str. The name of the extraction schema
1634
+
1635
+ - data_schema: ExtractAgentCreateDataSchema. The schema of the data.
1636
+
1637
+ - config: ExtractConfig. The configuration parameters for the extraction agent.
1788
1638
  ---
1639
+ from llama_cloud import (
1640
+ DocumentChunkMode,
1641
+ ExtractConfig,
1642
+ ExtractConfigPriority,
1643
+ ExtractMode,
1644
+ ExtractModels,
1645
+ ExtractTarget,
1646
+ PublicModelName,
1647
+ )
1789
1648
  from llama_cloud.client import AsyncLlamaCloud
1790
1649
 
1791
1650
  client = AsyncLlamaCloud(
1792
1651
  token="YOUR_TOKEN",
1793
1652
  )
1794
- await client.llama_extract.get_job_result(
1795
- job_id="string",
1653
+ await client.llama_extract.create_extraction_agent(
1654
+ name="string",
1655
+ config=ExtractConfig(
1656
+ priority=ExtractConfigPriority.LOW,
1657
+ extraction_target=ExtractTarget.PER_DOC,
1658
+ extraction_mode=ExtractMode.FAST,
1659
+ parse_model=PublicModelName.OPENAI_GPT_4_O,
1660
+ extract_model=ExtractModels.OPENAI_GPT_4_1,
1661
+ chunk_mode=DocumentChunkMode.PAGE,
1662
+ ),
1796
1663
  )
1797
1664
  """
1798
1665
  _response = await self._client_wrapper.httpx_client.request(
1799
- "GET",
1800
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/jobs/{job_id}/result"),
1666
+ "POST",
1667
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
1801
1668
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1669
+ json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
1802
1670
  headers=self._client_wrapper.get_headers(),
1803
1671
  timeout=60,
1804
1672
  )
1805
1673
  if 200 <= _response.status_code < 300:
1806
- return pydantic.parse_obj_as(ExtractResultset, _response.json()) # type: ignore
1674
+ return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
1807
1675
  if _response.status_code == 422:
1808
1676
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1809
1677
  try:
@@ -1812,35 +1680,34 @@ class AsyncLlamaExtractClient:
1812
1680
  raise ApiError(status_code=_response.status_code, body=_response.text)
1813
1681
  raise ApiError(status_code=_response.status_code, body=_response_json)
1814
1682
 
1815
- async def list_extract_runs(
1816
- self, *, extraction_agent_id: str, skip: typing.Optional[int] = None, limit: typing.Optional[int] = None
1817
- ) -> PaginatedExtractRunsResponse:
1683
+ async def validate_extraction_schema(
1684
+ self, *, data_schema: ExtractSchemaValidateRequestDataSchema
1685
+ ) -> ExtractSchemaValidateResponse:
1818
1686
  """
1819
- Parameters:
1820
- - extraction_agent_id: str.
1821
-
1822
- - skip: typing.Optional[int].
1687
+ Validates an extraction agent's schema definition.
1688
+ Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
1823
1689
 
1824
- - limit: typing.Optional[int].
1690
+ Parameters:
1691
+ - data_schema: ExtractSchemaValidateRequestDataSchema.
1825
1692
  ---
1826
1693
  from llama_cloud.client import AsyncLlamaCloud
1827
1694
 
1828
1695
  client = AsyncLlamaCloud(
1829
1696
  token="YOUR_TOKEN",
1830
1697
  )
1831
- await client.llama_extract.list_extract_runs(
1832
- extraction_agent_id="string",
1833
- )
1698
+ await client.llama_extract.validate_extraction_schema()
1834
1699
  """
1835
1700
  _response = await self._client_wrapper.httpx_client.request(
1836
- "GET",
1837
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/runs"),
1838
- params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id, "skip": skip, "limit": limit}),
1701
+ "POST",
1702
+ urllib.parse.urljoin(
1703
+ f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
1704
+ ),
1705
+ json=jsonable_encoder({"data_schema": data_schema}),
1839
1706
  headers=self._client_wrapper.get_headers(),
1840
1707
  timeout=60,
1841
1708
  )
1842
1709
  if 200 <= _response.status_code < 300:
1843
- return pydantic.parse_obj_as(PaginatedExtractRunsResponse, _response.json()) # type: ignore
1710
+ return pydantic.parse_obj_as(ExtractSchemaValidateResponse, _response.json()) # type: ignore
1844
1711
  if _response.status_code == 422:
1845
1712
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1846
1713
  try:
@@ -1849,29 +1716,50 @@ class AsyncLlamaExtractClient:
1849
1716
  raise ApiError(status_code=_response.status_code, body=_response.text)
1850
1717
  raise ApiError(status_code=_response.status_code, body=_response_json)
1851
1718
 
1852
- async def get_latest_run_from_ui(self, *, extraction_agent_id: str) -> typing.Optional[ExtractRun]:
1719
+ async def generate_extraction_schema(
1720
+ self,
1721
+ *,
1722
+ project_id: typing.Optional[str] = None,
1723
+ organization_id: typing.Optional[str] = None,
1724
+ prompt: typing.Optional[str] = OMIT,
1725
+ file_id: typing.Optional[str] = OMIT,
1726
+ ) -> ExtractSchemaGenerateResponse:
1853
1727
  """
1728
+ Generates an extraction agent's schema definition from a file and/or natural language prompt.
1729
+
1854
1730
  Parameters:
1855
- - extraction_agent_id: str.
1731
+ - project_id: typing.Optional[str].
1732
+
1733
+ - organization_id: typing.Optional[str].
1734
+
1735
+ - prompt: typing.Optional[str].
1736
+
1737
+ - file_id: typing.Optional[str].
1856
1738
  ---
1857
1739
  from llama_cloud.client import AsyncLlamaCloud
1858
1740
 
1859
1741
  client = AsyncLlamaCloud(
1860
1742
  token="YOUR_TOKEN",
1861
1743
  )
1862
- await client.llama_extract.get_latest_run_from_ui(
1863
- extraction_agent_id="string",
1864
- )
1744
+ await client.llama_extract.generate_extraction_schema()
1865
1745
  """
1746
+ _request: typing.Dict[str, typing.Any] = {}
1747
+ if prompt is not OMIT:
1748
+ _request["prompt"] = prompt
1749
+ if file_id is not OMIT:
1750
+ _request["file_id"] = file_id
1866
1751
  _response = await self._client_wrapper.httpx_client.request(
1867
- "GET",
1868
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/runs/latest-from-ui"),
1869
- params=remove_none_from_dict({"extraction_agent_id": extraction_agent_id}),
1752
+ "POST",
1753
+ urllib.parse.urljoin(
1754
+ f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/generate"
1755
+ ),
1756
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1757
+ json=jsonable_encoder(_request),
1870
1758
  headers=self._client_wrapper.get_headers(),
1871
1759
  timeout=60,
1872
1760
  )
1873
1761
  if 200 <= _response.status_code < 300:
1874
- return pydantic.parse_obj_as(typing.Optional[ExtractRun], _response.json()) # type: ignore
1762
+ return pydantic.parse_obj_as(ExtractSchemaGenerateResponse, _response.json()) # type: ignore
1875
1763
  if _response.status_code == 422:
1876
1764
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1877
1765
  try:
@@ -1880,12 +1768,12 @@ class AsyncLlamaExtractClient:
1880
1768
  raise ApiError(status_code=_response.status_code, body=_response.text)
1881
1769
  raise ApiError(status_code=_response.status_code, body=_response_json)
1882
1770
 
1883
- async def get_run_by_job_id(
1884
- self, job_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1885
- ) -> ExtractRun:
1771
+ async def get_extraction_agent_by_name(
1772
+ self, name: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1773
+ ) -> ExtractAgent:
1886
1774
  """
1887
1775
  Parameters:
1888
- - job_id: str.
1776
+ - name: str.
1889
1777
 
1890
1778
  - project_id: typing.Optional[str].
1891
1779
 
@@ -1896,19 +1784,21 @@ class AsyncLlamaExtractClient:
1896
1784
  client = AsyncLlamaCloud(
1897
1785
  token="YOUR_TOKEN",
1898
1786
  )
1899
- await client.llama_extract.get_run_by_job_id(
1900
- job_id="string",
1787
+ await client.llama_extract.get_extraction_agent_by_name(
1788
+ name="string",
1901
1789
  )
1902
1790
  """
1903
1791
  _response = await self._client_wrapper.httpx_client.request(
1904
1792
  "GET",
1905
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/by-job/{job_id}"),
1793
+ urllib.parse.urljoin(
1794
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/by-name/{name}"
1795
+ ),
1906
1796
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1907
1797
  headers=self._client_wrapper.get_headers(),
1908
1798
  timeout=60,
1909
1799
  )
1910
1800
  if 200 <= _response.status_code < 300:
1911
- return pydantic.parse_obj_as(ExtractRun, _response.json()) # type: ignore
1801
+ return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
1912
1802
  if _response.status_code == 422:
1913
1803
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1914
1804
  try:
@@ -1917,13 +1807,14 @@ class AsyncLlamaExtractClient:
1917
1807
  raise ApiError(status_code=_response.status_code, body=_response.text)
1918
1808
  raise ApiError(status_code=_response.status_code, body=_response_json)
1919
1809
 
1920
- async def get_run(
1921
- self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1922
- ) -> ExtractRun:
1810
+ async def get_or_create_default_extraction_agent(
1811
+ self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1812
+ ) -> ExtractAgent:
1923
1813
  """
1924
- Parameters:
1925
- - run_id: str.
1814
+ Get or create a default extraction agent for the current project.
1815
+ The default agent has an empty schema and default configuration.
1926
1816
 
1817
+ Parameters:
1927
1818
  - project_id: typing.Optional[str].
1928
1819
 
1929
1820
  - organization_id: typing.Optional[str].
@@ -1933,19 +1824,19 @@ class AsyncLlamaExtractClient:
1933
1824
  client = AsyncLlamaCloud(
1934
1825
  token="YOUR_TOKEN",
1935
1826
  )
1936
- await client.llama_extract.get_run(
1937
- run_id="string",
1938
- )
1827
+ await client.llama_extract.get_or_create_default_extraction_agent()
1939
1828
  """
1940
1829
  _response = await self._client_wrapper.httpx_client.request(
1941
1830
  "GET",
1942
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/{run_id}"),
1831
+ urllib.parse.urljoin(
1832
+ f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/default"
1833
+ ),
1943
1834
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1944
1835
  headers=self._client_wrapper.get_headers(),
1945
1836
  timeout=60,
1946
1837
  )
1947
1838
  if 200 <= _response.status_code < 300:
1948
- return pydantic.parse_obj_as(ExtractRun, _response.json()) # type: ignore
1839
+ return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
1949
1840
  if _response.status_code == 422:
1950
1841
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1951
1842
  try:
@@ -1954,35 +1845,30 @@ class AsyncLlamaExtractClient:
1954
1845
  raise ApiError(status_code=_response.status_code, body=_response.text)
1955
1846
  raise ApiError(status_code=_response.status_code, body=_response_json)
1956
1847
 
1957
- async def delete_extraction_run(
1958
- self, run_id: str, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1959
- ) -> typing.Any:
1848
+ async def get_extraction_agent(self, extraction_agent_id: str) -> ExtractAgent:
1960
1849
  """
1961
1850
  Parameters:
1962
- - run_id: str.
1963
-
1964
- - project_id: typing.Optional[str].
1965
-
1966
- - organization_id: typing.Optional[str].
1851
+ - extraction_agent_id: str.
1967
1852
  ---
1968
1853
  from llama_cloud.client import AsyncLlamaCloud
1969
1854
 
1970
1855
  client = AsyncLlamaCloud(
1971
1856
  token="YOUR_TOKEN",
1972
1857
  )
1973
- await client.llama_extract.delete_extraction_run(
1974
- run_id="string",
1858
+ await client.llama_extract.get_extraction_agent(
1859
+ extraction_agent_id="string",
1975
1860
  )
1976
1861
  """
1977
1862
  _response = await self._client_wrapper.httpx_client.request(
1978
- "DELETE",
1979
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/runs/{run_id}"),
1980
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1863
+ "GET",
1864
+ urllib.parse.urljoin(
1865
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
1866
+ ),
1981
1867
  headers=self._client_wrapper.get_headers(),
1982
1868
  timeout=60,
1983
1869
  )
1984
1870
  if 200 <= _response.status_code < 300:
1985
- return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
1871
+ return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
1986
1872
  if _response.status_code == 422:
1987
1873
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1988
1874
  try:
@@ -1991,38 +1877,16 @@ class AsyncLlamaExtractClient:
1991
1877
  raise ApiError(status_code=_response.status_code, body=_response.text)
1992
1878
  raise ApiError(status_code=_response.status_code, body=_response_json)
1993
1879
 
1994
- async def extract_stateless(
1995
- self,
1996
- *,
1997
- project_id: typing.Optional[str] = None,
1998
- organization_id: typing.Optional[str] = None,
1999
- webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]] = OMIT,
2000
- data_schema: ExtractStatelessRequestDataSchema,
2001
- config: ExtractConfig,
2002
- file_id: typing.Optional[str] = OMIT,
2003
- text: typing.Optional[str] = OMIT,
2004
- file: typing.Optional[FileData] = OMIT,
2005
- ) -> ExtractJob:
1880
+ async def update_extraction_agent(
1881
+ self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
1882
+ ) -> ExtractAgent:
2006
1883
  """
2007
- Stateless extraction endpoint that uses a default extraction agent in the user's default project.
2008
- Requires data_schema, config, and either file_id, text, or base64 encoded file data.
2009
-
2010
1884
  Parameters:
2011
- - project_id: typing.Optional[str].
2012
-
2013
- - organization_id: typing.Optional[str].
2014
-
2015
- - webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]].
2016
-
2017
- - data_schema: ExtractStatelessRequestDataSchema. The schema of the data to extract
2018
-
2019
- - config: ExtractConfig. The configuration parameters for the extraction
2020
-
2021
- - file_id: typing.Optional[str].
1885
+ - extraction_agent_id: str.
2022
1886
 
2023
- - text: typing.Optional[str].
1887
+ - data_schema: ExtractAgentUpdateDataSchema. The schema of the data
2024
1888
 
2025
- - file: typing.Optional[FileData].
1889
+ - config: ExtractConfig. The configuration parameters for the extraction agent.
2026
1890
  ---
2027
1891
  from llama_cloud import (
2028
1892
  DocumentChunkMode,
@@ -2031,7 +1895,6 @@ class AsyncLlamaExtractClient:
2031
1895
  ExtractMode,
2032
1896
  ExtractModels,
2033
1897
  ExtractTarget,
2034
- FileData,
2035
1898
  PublicModelName,
2036
1899
  )
2037
1900
  from llama_cloud.client import AsyncLlamaCloud
@@ -2039,7 +1902,8 @@ class AsyncLlamaExtractClient:
2039
1902
  client = AsyncLlamaCloud(
2040
1903
  token="YOUR_TOKEN",
2041
1904
  )
2042
- await client.llama_extract.extract_stateless(
1905
+ await client.llama_extract.update_extraction_agent(
1906
+ extraction_agent_id="string",
2043
1907
  config=ExtractConfig(
2044
1908
  priority=ExtractConfigPriority.LOW,
2045
1909
  extraction_target=ExtractTarget.PER_DOC,
@@ -2048,31 +1912,51 @@ class AsyncLlamaExtractClient:
2048
1912
  extract_model=ExtractModels.OPENAI_GPT_4_1,
2049
1913
  chunk_mode=DocumentChunkMode.PAGE,
2050
1914
  ),
2051
- file=FileData(
2052
- data="string",
2053
- mime_type="string",
1915
+ )
1916
+ """
1917
+ _response = await self._client_wrapper.httpx_client.request(
1918
+ "PUT",
1919
+ urllib.parse.urljoin(
1920
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
2054
1921
  ),
1922
+ json=jsonable_encoder({"data_schema": data_schema, "config": config}),
1923
+ headers=self._client_wrapper.get_headers(),
1924
+ timeout=60,
1925
+ )
1926
+ if 200 <= _response.status_code < 300:
1927
+ return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
1928
+ if _response.status_code == 422:
1929
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1930
+ try:
1931
+ _response_json = _response.json()
1932
+ except JSONDecodeError:
1933
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1934
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1935
+
1936
+ async def delete_extraction_agent(self, extraction_agent_id: str) -> typing.Any:
1937
+ """
1938
+ Parameters:
1939
+ - extraction_agent_id: str.
1940
+ ---
1941
+ from llama_cloud.client import AsyncLlamaCloud
1942
+
1943
+ client = AsyncLlamaCloud(
1944
+ token="YOUR_TOKEN",
1945
+ )
1946
+ await client.llama_extract.delete_extraction_agent(
1947
+ extraction_agent_id="string",
2055
1948
  )
2056
1949
  """
2057
- _request: typing.Dict[str, typing.Any] = {"data_schema": data_schema, "config": config}
2058
- if webhook_configurations is not OMIT:
2059
- _request["webhook_configurations"] = webhook_configurations
2060
- if file_id is not OMIT:
2061
- _request["file_id"] = file_id
2062
- if text is not OMIT:
2063
- _request["text"] = text
2064
- if file is not OMIT:
2065
- _request["file"] = file
2066
1950
  _response = await self._client_wrapper.httpx_client.request(
2067
- "POST",
2068
- urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/run"),
2069
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
2070
- json=jsonable_encoder(_request),
1951
+ "DELETE",
1952
+ urllib.parse.urljoin(
1953
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
1954
+ ),
2071
1955
  headers=self._client_wrapper.get_headers(),
2072
1956
  timeout=60,
2073
1957
  )
2074
1958
  if 200 <= _response.status_code < 300:
2075
- return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
1959
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
2076
1960
  if _response.status_code == 422:
2077
1961
  raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
2078
1962
  try: