llama-cloud 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (55) hide show
  1. llama_cloud/__init__.py +36 -18
  2. llama_cloud/client.py +3 -0
  3. llama_cloud/resources/__init__.py +20 -0
  4. llama_cloud/resources/beta/__init__.py +2 -0
  5. llama_cloud/resources/beta/client.py +371 -0
  6. llama_cloud/resources/chat_apps/client.py +4 -4
  7. llama_cloud/resources/embedding_model_configs/client.py +82 -22
  8. llama_cloud/resources/llama_extract/__init__.py +21 -0
  9. llama_cloud/resources/llama_extract/client.py +223 -114
  10. llama_cloud/resources/llama_extract/types/__init__.py +21 -0
  11. llama_cloud/resources/parsing/client.py +83 -29
  12. llama_cloud/resources/pipelines/client.py +107 -2
  13. llama_cloud/resources/projects/client.py +70 -0
  14. llama_cloud/types/__init__.py +26 -26
  15. llama_cloud/types/{parsing_usage.py → audio_block.py} +5 -3
  16. llama_cloud/types/batch.py +47 -0
  17. llama_cloud/types/batch_item.py +40 -0
  18. llama_cloud/types/{extract_agent_update.py → batch_paginated_list.py} +6 -9
  19. llama_cloud/types/{extract_schema_validate_request.py → batch_public_output.py} +7 -3
  20. llama_cloud/types/cloud_confluence_data_source.py +1 -0
  21. llama_cloud/types/cloud_postgres_vector_store.py +2 -0
  22. llama_cloud/types/cloud_sharepoint_data_source.py +1 -0
  23. llama_cloud/types/extract_config.py +2 -0
  24. llama_cloud/types/fail_page_mode.py +29 -0
  25. llama_cloud/types/{extract_agent_create.py → file_count_by_status_response.py} +8 -10
  26. llama_cloud/types/file_parse_public.py +36 -0
  27. llama_cloud/types/job_names.py +8 -12
  28. llama_cloud/types/llama_extract_settings.py +2 -2
  29. llama_cloud/types/llama_index_core_base_llms_types_chat_message_blocks_item.py +13 -1
  30. llama_cloud/types/llama_parse_parameters.py +10 -2
  31. llama_cloud/types/markdown_node_parser.py +4 -0
  32. llama_cloud/types/message_role.py +4 -0
  33. llama_cloud/types/pg_vector_distance_method.py +43 -0
  34. llama_cloud/types/{extract_job_create_batch.py → pg_vector_hnsw_settings.py} +12 -9
  35. llama_cloud/types/pg_vector_vector_type.py +35 -0
  36. llama_cloud/types/pipeline_create.py +1 -0
  37. llama_cloud/types/pipeline_data_source.py +3 -0
  38. llama_cloud/types/pipeline_data_source_status.py +33 -0
  39. llama_cloud/types/pipeline_file.py +1 -0
  40. llama_cloud/types/prompt_conf.py +3 -0
  41. llama_cloud/types/struct_parse_conf.py +4 -1
  42. llama_cloud/types/supported_llm_model_names.py +0 -12
  43. llama_cloud/types/token_text_splitter.py +3 -0
  44. {llama_cloud-0.1.17.dist-info → llama_cloud-0.1.19.dist-info}/METADATA +1 -1
  45. {llama_cloud-0.1.17.dist-info → llama_cloud-0.1.19.dist-info}/RECORD +55 -45
  46. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema.py +0 -0
  47. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema_zero_value.py +0 -0
  48. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema.py +0 -0
  49. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema_zero_value.py +0 -0
  50. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override.py +0 -0
  51. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override_zero_value.py +0 -0
  52. /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema.py +0 -0
  53. /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema_zero_value.py +0 -0
  54. {llama_cloud-0.1.17.dist-info → llama_cloud-0.1.19.dist-info}/LICENSE +0 -0
  55. {llama_cloud-0.1.17.dist-info → llama_cloud-0.1.19.dist-info}/WHEEL +0 -0
@@ -10,18 +10,19 @@ from ...core.jsonable_encoder import jsonable_encoder
10
10
  from ...core.remove_none_from_dict import remove_none_from_dict
11
11
  from ...errors.unprocessable_entity_error import UnprocessableEntityError
12
12
  from ...types.extract_agent import ExtractAgent
13
- from ...types.extract_agent_create import ExtractAgentCreate
14
- from ...types.extract_agent_update import ExtractAgentUpdate
13
+ from ...types.extract_config import ExtractConfig
15
14
  from ...types.extract_job import ExtractJob
16
15
  from ...types.extract_job_create import ExtractJobCreate
17
- from ...types.extract_job_create_batch import ExtractJobCreateBatch
18
16
  from ...types.extract_resultset import ExtractResultset
19
17
  from ...types.extract_run import ExtractRun
20
- from ...types.extract_schema_validate_request import ExtractSchemaValidateRequest
21
18
  from ...types.extract_schema_validate_response import ExtractSchemaValidateResponse
22
19
  from ...types.http_validation_error import HttpValidationError
23
20
  from ...types.llama_extract_settings import LlamaExtractSettings
24
21
  from ...types.paginated_extract_runs_response import PaginatedExtractRunsResponse
22
+ from .types.extract_agent_create_data_schema import ExtractAgentCreateDataSchema
23
+ from .types.extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
24
+ from .types.extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
25
+ from .types.extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
25
26
 
26
27
  try:
27
28
  import pydantic
@@ -39,17 +40,19 @@ class LlamaExtractClient:
39
40
  def __init__(self, *, client_wrapper: SyncClientWrapper):
40
41
  self._client_wrapper = client_wrapper
41
42
 
42
- def list_extraction_agents(self, *, project_id: typing.Optional[str] = None) -> typing.List[ExtractAgent]:
43
+ def list_extraction_agents(self, *, project_id: str) -> typing.List[ExtractAgent]:
43
44
  """
44
45
  Parameters:
45
- - project_id: typing.Optional[str].
46
+ - project_id: str.
46
47
  ---
47
48
  from llama_cloud.client import LlamaCloud
48
49
 
49
50
  client = LlamaCloud(
50
51
  token="YOUR_TOKEN",
51
52
  )
52
- client.llama_extract.list_extraction_agents()
53
+ client.llama_extract.list_extraction_agents(
54
+ project_id="string",
55
+ )
53
56
  """
54
57
  _response = self._client_wrapper.httpx_client.request(
55
58
  "GET",
@@ -73,7 +76,9 @@ class LlamaExtractClient:
73
76
  *,
74
77
  project_id: typing.Optional[str] = None,
75
78
  organization_id: typing.Optional[str] = None,
76
- request: ExtractAgentCreate,
79
+ name: str,
80
+ data_schema: ExtractAgentCreateDataSchema,
81
+ config: ExtractConfig,
77
82
  ) -> ExtractAgent:
78
83
  """
79
84
  Parameters:
@@ -81,26 +86,23 @@ class LlamaExtractClient:
81
86
 
82
87
  - organization_id: typing.Optional[str].
83
88
 
84
- - request: ExtractAgentCreate.
89
+ - name: str. The name of the extraction schema
90
+
91
+ - data_schema: ExtractAgentCreateDataSchema. The schema of the data.
92
+
93
+ - config: ExtractConfig. The configuration parameters for the extraction agent.
85
94
  ---
86
- from llama_cloud import (
87
- ExtractAgentCreate,
88
- ExtractConfig,
89
- ExtractMode,
90
- ExtractTarget,
91
- )
95
+ from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
92
96
  from llama_cloud.client import LlamaCloud
93
97
 
94
98
  client = LlamaCloud(
95
99
  token="YOUR_TOKEN",
96
100
  )
97
101
  client.llama_extract.create_extraction_agent(
98
- request=ExtractAgentCreate(
99
- name="string",
100
- config=ExtractConfig(
101
- extraction_target=ExtractTarget.PER_DOC,
102
- extraction_mode=ExtractMode.FAST,
103
- ),
102
+ name="string",
103
+ config=ExtractConfig(
104
+ extraction_target=ExtractTarget.PER_DOC,
105
+ extraction_mode=ExtractMode.FAST,
104
106
  ),
105
107
  )
106
108
  """
@@ -108,7 +110,7 @@ class LlamaExtractClient:
108
110
  "POST",
109
111
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
110
112
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
111
- json=jsonable_encoder(request),
113
+ json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
112
114
  headers=self._client_wrapper.get_headers(),
113
115
  timeout=60,
114
116
  )
@@ -122,30 +124,29 @@ class LlamaExtractClient:
122
124
  raise ApiError(status_code=_response.status_code, body=_response.text)
123
125
  raise ApiError(status_code=_response.status_code, body=_response_json)
124
126
 
125
- def validate_extraction_schema(self, *, request: ExtractSchemaValidateRequest) -> ExtractSchemaValidateResponse:
127
+ def validate_extraction_schema(
128
+ self, *, data_schema: ExtractSchemaValidateRequestDataSchema
129
+ ) -> ExtractSchemaValidateResponse:
126
130
  """
127
131
  Validates an extraction agent's schema definition.
128
132
  Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
129
133
 
130
134
  Parameters:
131
- - request: ExtractSchemaValidateRequest.
135
+ - data_schema: ExtractSchemaValidateRequestDataSchema.
132
136
  ---
133
- from llama_cloud import ExtractSchemaValidateRequest
134
137
  from llama_cloud.client import LlamaCloud
135
138
 
136
139
  client = LlamaCloud(
137
140
  token="YOUR_TOKEN",
138
141
  )
139
- client.llama_extract.validate_extraction_schema(
140
- request=ExtractSchemaValidateRequest(),
141
- )
142
+ client.llama_extract.validate_extraction_schema()
142
143
  """
143
144
  _response = self._client_wrapper.httpx_client.request(
144
145
  "POST",
145
146
  urllib.parse.urljoin(
146
147
  f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
147
148
  ),
148
- json=jsonable_encoder(request),
149
+ json=jsonable_encoder({"data_schema": data_schema}),
149
150
  headers=self._client_wrapper.get_headers(),
150
151
  timeout=60,
151
152
  )
@@ -226,19 +227,18 @@ class LlamaExtractClient:
226
227
  raise ApiError(status_code=_response.status_code, body=_response.text)
227
228
  raise ApiError(status_code=_response.status_code, body=_response_json)
228
229
 
229
- def update_extraction_agent(self, extraction_agent_id: str, *, request: ExtractAgentUpdate) -> ExtractAgent:
230
+ def update_extraction_agent(
231
+ self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
232
+ ) -> ExtractAgent:
230
233
  """
231
234
  Parameters:
232
235
  - extraction_agent_id: str.
233
236
 
234
- - request: ExtractAgentUpdate.
237
+ - data_schema: ExtractAgentUpdateDataSchema. The schema of the data
238
+
239
+ - config: ExtractConfig. The configuration parameters for the extraction agent.
235
240
  ---
236
- from llama_cloud import (
237
- ExtractAgentUpdate,
238
- ExtractConfig,
239
- ExtractMode,
240
- ExtractTarget,
241
- )
241
+ from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
242
242
  from llama_cloud.client import LlamaCloud
243
243
 
244
244
  client = LlamaCloud(
@@ -246,11 +246,9 @@ class LlamaExtractClient:
246
246
  )
247
247
  client.llama_extract.update_extraction_agent(
248
248
  extraction_agent_id="string",
249
- request=ExtractAgentUpdate(
250
- config=ExtractConfig(
251
- extraction_target=ExtractTarget.PER_DOC,
252
- extraction_mode=ExtractMode.FAST,
253
- ),
249
+ config=ExtractConfig(
250
+ extraction_target=ExtractTarget.PER_DOC,
251
+ extraction_mode=ExtractMode.FAST,
254
252
  ),
255
253
  )
256
254
  """
@@ -259,7 +257,7 @@ class LlamaExtractClient:
259
257
  urllib.parse.urljoin(
260
258
  f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
261
259
  ),
262
- json=jsonable_encoder(request),
260
+ json=jsonable_encoder({"data_schema": data_schema, "config": config}),
263
261
  headers=self._client_wrapper.get_headers(),
264
262
  timeout=60,
265
263
  )
@@ -434,6 +432,7 @@ class LlamaExtractClient:
434
432
  ExtractJobCreate,
435
433
  ExtractMode,
436
434
  ExtractTarget,
435
+ FailPageMode,
437
436
  LlamaExtractSettings,
438
437
  LlamaParseParameters,
439
438
  ParsingMode,
@@ -456,6 +455,7 @@ class LlamaExtractClient:
456
455
  chunk_mode=ChunkMode.PAGE,
457
456
  llama_parse_params=LlamaParseParameters(
458
457
  parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
458
+ replace_failed_page_mode=FailPageMode.RAW_TEXT,
459
459
  ),
460
460
  ),
461
461
  )
@@ -481,42 +481,98 @@ class LlamaExtractClient:
481
481
  raise ApiError(status_code=_response.status_code, body=_response.text)
482
482
  raise ApiError(status_code=_response.status_code, body=_response_json)
483
483
 
484
+ def run_job_on_file(
485
+ self,
486
+ *,
487
+ from_ui: typing.Optional[bool] = None,
488
+ extraction_agent_id: str,
489
+ file: typing.IO,
490
+ data_schema_override: typing.Optional[str] = None,
491
+ config_override: typing.Optional[str] = None,
492
+ ) -> ExtractJob:
493
+ """
494
+ Parameters:
495
+ - from_ui: typing.Optional[bool].
496
+
497
+ - extraction_agent_id: str.
498
+
499
+ - file: typing.IO.
500
+
501
+ - data_schema_override: typing.Optional[str].
502
+
503
+ - config_override: typing.Optional[str].
504
+ """
505
+ _response = self._client_wrapper.httpx_client.request(
506
+ "POST",
507
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
508
+ params=remove_none_from_dict({"from_ui": from_ui}),
509
+ data=jsonable_encoder(
510
+ {
511
+ "extraction_agent_id": extraction_agent_id,
512
+ "data_schema_override": data_schema_override,
513
+ "config_override": config_override,
514
+ }
515
+ ),
516
+ files={"file": file},
517
+ headers=self._client_wrapper.get_headers(),
518
+ timeout=60,
519
+ )
520
+ if 200 <= _response.status_code < 300:
521
+ return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
522
+ if _response.status_code == 422:
523
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
524
+ try:
525
+ _response_json = _response.json()
526
+ except JSONDecodeError:
527
+ raise ApiError(status_code=_response.status_code, body=_response.text)
528
+ raise ApiError(status_code=_response.status_code, body=_response_json)
529
+
484
530
  def run_batch_jobs(
485
- self, *, from_ui: typing.Optional[bool] = None, request: ExtractJobCreateBatch
531
+ self,
532
+ *,
533
+ from_ui: typing.Optional[bool] = None,
534
+ extraction_agent_id: str,
535
+ file_ids: typing.List[str],
536
+ data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
537
+ config_override: typing.Optional[ExtractConfig] = OMIT,
486
538
  ) -> typing.List[ExtractJob]:
487
539
  """
488
540
  Parameters:
489
541
  - from_ui: typing.Optional[bool].
490
542
 
491
- - request: ExtractJobCreateBatch.
543
+ - extraction_agent_id: str. The id of the extraction agent
544
+
545
+ - file_ids: typing.List[str]. The ids of the files
546
+
547
+ - data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
548
+
549
+ - config_override: typing.Optional[ExtractConfig].
492
550
  ---
493
- from llama_cloud import (
494
- ExtractConfig,
495
- ExtractJobCreateBatch,
496
- ExtractMode,
497
- ExtractTarget,
498
- )
551
+ from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
499
552
  from llama_cloud.client import LlamaCloud
500
553
 
501
554
  client = LlamaCloud(
502
555
  token="YOUR_TOKEN",
503
556
  )
504
557
  client.llama_extract.run_batch_jobs(
505
- request=ExtractJobCreateBatch(
506
- extraction_agent_id="string",
507
- file_ids=[],
508
- config_override=ExtractConfig(
509
- extraction_target=ExtractTarget.PER_DOC,
510
- extraction_mode=ExtractMode.FAST,
511
- ),
558
+ extraction_agent_id="string",
559
+ file_ids=[],
560
+ config_override=ExtractConfig(
561
+ extraction_target=ExtractTarget.PER_DOC,
562
+ extraction_mode=ExtractMode.FAST,
512
563
  ),
513
564
  )
514
565
  """
566
+ _request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
567
+ if data_schema_override is not OMIT:
568
+ _request["data_schema_override"] = data_schema_override
569
+ if config_override is not OMIT:
570
+ _request["config_override"] = config_override
515
571
  _response = self._client_wrapper.httpx_client.request(
516
572
  "POST",
517
573
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
518
574
  params=remove_none_from_dict({"from_ui": from_ui}),
519
- json=jsonable_encoder(request),
575
+ json=jsonable_encoder(_request),
520
576
  headers=self._client_wrapper.get_headers(),
521
577
  timeout=60,
522
578
  )
@@ -723,17 +779,19 @@ class AsyncLlamaExtractClient:
723
779
  def __init__(self, *, client_wrapper: AsyncClientWrapper):
724
780
  self._client_wrapper = client_wrapper
725
781
 
726
- async def list_extraction_agents(self, *, project_id: typing.Optional[str] = None) -> typing.List[ExtractAgent]:
782
+ async def list_extraction_agents(self, *, project_id: str) -> typing.List[ExtractAgent]:
727
783
  """
728
784
  Parameters:
729
- - project_id: typing.Optional[str].
785
+ - project_id: str.
730
786
  ---
731
787
  from llama_cloud.client import AsyncLlamaCloud
732
788
 
733
789
  client = AsyncLlamaCloud(
734
790
  token="YOUR_TOKEN",
735
791
  )
736
- await client.llama_extract.list_extraction_agents()
792
+ await client.llama_extract.list_extraction_agents(
793
+ project_id="string",
794
+ )
737
795
  """
738
796
  _response = await self._client_wrapper.httpx_client.request(
739
797
  "GET",
@@ -757,7 +815,9 @@ class AsyncLlamaExtractClient:
757
815
  *,
758
816
  project_id: typing.Optional[str] = None,
759
817
  organization_id: typing.Optional[str] = None,
760
- request: ExtractAgentCreate,
818
+ name: str,
819
+ data_schema: ExtractAgentCreateDataSchema,
820
+ config: ExtractConfig,
761
821
  ) -> ExtractAgent:
762
822
  """
763
823
  Parameters:
@@ -765,26 +825,23 @@ class AsyncLlamaExtractClient:
765
825
 
766
826
  - organization_id: typing.Optional[str].
767
827
 
768
- - request: ExtractAgentCreate.
828
+ - name: str. The name of the extraction schema
829
+
830
+ - data_schema: ExtractAgentCreateDataSchema. The schema of the data.
831
+
832
+ - config: ExtractConfig. The configuration parameters for the extraction agent.
769
833
  ---
770
- from llama_cloud import (
771
- ExtractAgentCreate,
772
- ExtractConfig,
773
- ExtractMode,
774
- ExtractTarget,
775
- )
834
+ from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
776
835
  from llama_cloud.client import AsyncLlamaCloud
777
836
 
778
837
  client = AsyncLlamaCloud(
779
838
  token="YOUR_TOKEN",
780
839
  )
781
840
  await client.llama_extract.create_extraction_agent(
782
- request=ExtractAgentCreate(
783
- name="string",
784
- config=ExtractConfig(
785
- extraction_target=ExtractTarget.PER_DOC,
786
- extraction_mode=ExtractMode.FAST,
787
- ),
841
+ name="string",
842
+ config=ExtractConfig(
843
+ extraction_target=ExtractTarget.PER_DOC,
844
+ extraction_mode=ExtractMode.FAST,
788
845
  ),
789
846
  )
790
847
  """
@@ -792,7 +849,7 @@ class AsyncLlamaExtractClient:
792
849
  "POST",
793
850
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
794
851
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
795
- json=jsonable_encoder(request),
852
+ json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
796
853
  headers=self._client_wrapper.get_headers(),
797
854
  timeout=60,
798
855
  )
@@ -807,31 +864,28 @@ class AsyncLlamaExtractClient:
807
864
  raise ApiError(status_code=_response.status_code, body=_response_json)
808
865
 
809
866
  async def validate_extraction_schema(
810
- self, *, request: ExtractSchemaValidateRequest
867
+ self, *, data_schema: ExtractSchemaValidateRequestDataSchema
811
868
  ) -> ExtractSchemaValidateResponse:
812
869
  """
813
870
  Validates an extraction agent's schema definition.
814
871
  Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
815
872
 
816
873
  Parameters:
817
- - request: ExtractSchemaValidateRequest.
874
+ - data_schema: ExtractSchemaValidateRequestDataSchema.
818
875
  ---
819
- from llama_cloud import ExtractSchemaValidateRequest
820
876
  from llama_cloud.client import AsyncLlamaCloud
821
877
 
822
878
  client = AsyncLlamaCloud(
823
879
  token="YOUR_TOKEN",
824
880
  )
825
- await client.llama_extract.validate_extraction_schema(
826
- request=ExtractSchemaValidateRequest(),
827
- )
881
+ await client.llama_extract.validate_extraction_schema()
828
882
  """
829
883
  _response = await self._client_wrapper.httpx_client.request(
830
884
  "POST",
831
885
  urllib.parse.urljoin(
832
886
  f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
833
887
  ),
834
- json=jsonable_encoder(request),
888
+ json=jsonable_encoder({"data_schema": data_schema}),
835
889
  headers=self._client_wrapper.get_headers(),
836
890
  timeout=60,
837
891
  )
@@ -912,19 +966,18 @@ class AsyncLlamaExtractClient:
912
966
  raise ApiError(status_code=_response.status_code, body=_response.text)
913
967
  raise ApiError(status_code=_response.status_code, body=_response_json)
914
968
 
915
- async def update_extraction_agent(self, extraction_agent_id: str, *, request: ExtractAgentUpdate) -> ExtractAgent:
969
+ async def update_extraction_agent(
970
+ self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
971
+ ) -> ExtractAgent:
916
972
  """
917
973
  Parameters:
918
974
  - extraction_agent_id: str.
919
975
 
920
- - request: ExtractAgentUpdate.
976
+ - data_schema: ExtractAgentUpdateDataSchema. The schema of the data
977
+
978
+ - config: ExtractConfig. The configuration parameters for the extraction agent.
921
979
  ---
922
- from llama_cloud import (
923
- ExtractAgentUpdate,
924
- ExtractConfig,
925
- ExtractMode,
926
- ExtractTarget,
927
- )
980
+ from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
928
981
  from llama_cloud.client import AsyncLlamaCloud
929
982
 
930
983
  client = AsyncLlamaCloud(
@@ -932,11 +985,9 @@ class AsyncLlamaExtractClient:
932
985
  )
933
986
  await client.llama_extract.update_extraction_agent(
934
987
  extraction_agent_id="string",
935
- request=ExtractAgentUpdate(
936
- config=ExtractConfig(
937
- extraction_target=ExtractTarget.PER_DOC,
938
- extraction_mode=ExtractMode.FAST,
939
- ),
988
+ config=ExtractConfig(
989
+ extraction_target=ExtractTarget.PER_DOC,
990
+ extraction_mode=ExtractMode.FAST,
940
991
  ),
941
992
  )
942
993
  """
@@ -945,7 +996,7 @@ class AsyncLlamaExtractClient:
945
996
  urllib.parse.urljoin(
946
997
  f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
947
998
  ),
948
- json=jsonable_encoder(request),
999
+ json=jsonable_encoder({"data_schema": data_schema, "config": config}),
949
1000
  headers=self._client_wrapper.get_headers(),
950
1001
  timeout=60,
951
1002
  )
@@ -1120,6 +1171,7 @@ class AsyncLlamaExtractClient:
1120
1171
  ExtractJobCreate,
1121
1172
  ExtractMode,
1122
1173
  ExtractTarget,
1174
+ FailPageMode,
1123
1175
  LlamaExtractSettings,
1124
1176
  LlamaParseParameters,
1125
1177
  ParsingMode,
@@ -1142,6 +1194,7 @@ class AsyncLlamaExtractClient:
1142
1194
  chunk_mode=ChunkMode.PAGE,
1143
1195
  llama_parse_params=LlamaParseParameters(
1144
1196
  parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
1197
+ replace_failed_page_mode=FailPageMode.RAW_TEXT,
1145
1198
  ),
1146
1199
  ),
1147
1200
  )
@@ -1167,42 +1220,98 @@ class AsyncLlamaExtractClient:
1167
1220
  raise ApiError(status_code=_response.status_code, body=_response.text)
1168
1221
  raise ApiError(status_code=_response.status_code, body=_response_json)
1169
1222
 
1223
+ async def run_job_on_file(
1224
+ self,
1225
+ *,
1226
+ from_ui: typing.Optional[bool] = None,
1227
+ extraction_agent_id: str,
1228
+ file: typing.IO,
1229
+ data_schema_override: typing.Optional[str] = None,
1230
+ config_override: typing.Optional[str] = None,
1231
+ ) -> ExtractJob:
1232
+ """
1233
+ Parameters:
1234
+ - from_ui: typing.Optional[bool].
1235
+
1236
+ - extraction_agent_id: str.
1237
+
1238
+ - file: typing.IO.
1239
+
1240
+ - data_schema_override: typing.Optional[str].
1241
+
1242
+ - config_override: typing.Optional[str].
1243
+ """
1244
+ _response = await self._client_wrapper.httpx_client.request(
1245
+ "POST",
1246
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
1247
+ params=remove_none_from_dict({"from_ui": from_ui}),
1248
+ data=jsonable_encoder(
1249
+ {
1250
+ "extraction_agent_id": extraction_agent_id,
1251
+ "data_schema_override": data_schema_override,
1252
+ "config_override": config_override,
1253
+ }
1254
+ ),
1255
+ files={"file": file},
1256
+ headers=self._client_wrapper.get_headers(),
1257
+ timeout=60,
1258
+ )
1259
+ if 200 <= _response.status_code < 300:
1260
+ return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
1261
+ if _response.status_code == 422:
1262
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1263
+ try:
1264
+ _response_json = _response.json()
1265
+ except JSONDecodeError:
1266
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1267
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1268
+
1170
1269
  async def run_batch_jobs(
1171
- self, *, from_ui: typing.Optional[bool] = None, request: ExtractJobCreateBatch
1270
+ self,
1271
+ *,
1272
+ from_ui: typing.Optional[bool] = None,
1273
+ extraction_agent_id: str,
1274
+ file_ids: typing.List[str],
1275
+ data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
1276
+ config_override: typing.Optional[ExtractConfig] = OMIT,
1172
1277
  ) -> typing.List[ExtractJob]:
1173
1278
  """
1174
1279
  Parameters:
1175
1280
  - from_ui: typing.Optional[bool].
1176
1281
 
1177
- - request: ExtractJobCreateBatch.
1282
+ - extraction_agent_id: str. The id of the extraction agent
1283
+
1284
+ - file_ids: typing.List[str]. The ids of the files
1285
+
1286
+ - data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
1287
+
1288
+ - config_override: typing.Optional[ExtractConfig].
1178
1289
  ---
1179
- from llama_cloud import (
1180
- ExtractConfig,
1181
- ExtractJobCreateBatch,
1182
- ExtractMode,
1183
- ExtractTarget,
1184
- )
1290
+ from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
1185
1291
  from llama_cloud.client import AsyncLlamaCloud
1186
1292
 
1187
1293
  client = AsyncLlamaCloud(
1188
1294
  token="YOUR_TOKEN",
1189
1295
  )
1190
1296
  await client.llama_extract.run_batch_jobs(
1191
- request=ExtractJobCreateBatch(
1192
- extraction_agent_id="string",
1193
- file_ids=[],
1194
- config_override=ExtractConfig(
1195
- extraction_target=ExtractTarget.PER_DOC,
1196
- extraction_mode=ExtractMode.FAST,
1197
- ),
1297
+ extraction_agent_id="string",
1298
+ file_ids=[],
1299
+ config_override=ExtractConfig(
1300
+ extraction_target=ExtractTarget.PER_DOC,
1301
+ extraction_mode=ExtractMode.FAST,
1198
1302
  ),
1199
1303
  )
1200
1304
  """
1305
+ _request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
1306
+ if data_schema_override is not OMIT:
1307
+ _request["data_schema_override"] = data_schema_override
1308
+ if config_override is not OMIT:
1309
+ _request["config_override"] = config_override
1201
1310
  _response = await self._client_wrapper.httpx_client.request(
1202
1311
  "POST",
1203
1312
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
1204
1313
  params=remove_none_from_dict({"from_ui": from_ui}),
1205
- json=jsonable_encoder(request),
1314
+ json=jsonable_encoder(_request),
1206
1315
  headers=self._client_wrapper.get_headers(),
1207
1316
  timeout=60,
1208
1317
  )
@@ -0,0 +1,21 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from .extract_agent_create_data_schema import ExtractAgentCreateDataSchema
4
+ from .extract_agent_create_data_schema_zero_value import ExtractAgentCreateDataSchemaZeroValue
5
+ from .extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
6
+ from .extract_agent_update_data_schema_zero_value import ExtractAgentUpdateDataSchemaZeroValue
7
+ from .extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
8
+ from .extract_job_create_batch_data_schema_override_zero_value import ExtractJobCreateBatchDataSchemaOverrideZeroValue
9
+ from .extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
10
+ from .extract_schema_validate_request_data_schema_zero_value import ExtractSchemaValidateRequestDataSchemaZeroValue
11
+
12
+ __all__ = [
13
+ "ExtractAgentCreateDataSchema",
14
+ "ExtractAgentCreateDataSchemaZeroValue",
15
+ "ExtractAgentUpdateDataSchema",
16
+ "ExtractAgentUpdateDataSchemaZeroValue",
17
+ "ExtractJobCreateBatchDataSchemaOverride",
18
+ "ExtractJobCreateBatchDataSchemaOverrideZeroValue",
19
+ "ExtractSchemaValidateRequestDataSchema",
20
+ "ExtractSchemaValidateRequestDataSchemaZeroValue",
21
+ ]