llama-cloud 0.1.18__py3-none-any.whl → 0.1.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (92) hide show
  1. llama_cloud/__init__.py +202 -42
  2. llama_cloud/client.py +3 -0
  3. llama_cloud/resources/__init__.py +61 -2
  4. llama_cloud/resources/beta/__init__.py +2 -0
  5. llama_cloud/resources/beta/client.py +371 -0
  6. llama_cloud/resources/data_sinks/__init__.py +18 -2
  7. llama_cloud/resources/data_sinks/client.py +2 -94
  8. llama_cloud/resources/data_sinks/types/__init__.py +18 -2
  9. llama_cloud/resources/data_sinks/types/data_sink_update_component.py +65 -7
  10. llama_cloud/resources/data_sources/__init__.py +30 -2
  11. llama_cloud/resources/data_sources/types/__init__.py +28 -1
  12. llama_cloud/resources/data_sources/types/data_source_update_component.py +2 -23
  13. llama_cloud/resources/data_sources/types/data_source_update_component_one.py +122 -0
  14. llama_cloud/resources/embedding_model_configs/client.py +82 -22
  15. llama_cloud/resources/files/client.py +18 -4
  16. llama_cloud/resources/llama_extract/__init__.py +21 -0
  17. llama_cloud/resources/llama_extract/client.py +227 -114
  18. llama_cloud/resources/llama_extract/types/__init__.py +21 -0
  19. llama_cloud/resources/parsing/client.py +123 -4
  20. llama_cloud/resources/pipelines/client.py +116 -11
  21. llama_cloud/types/__init__.py +172 -52
  22. llama_cloud/types/{extract_schema_validate_request.py → audio_block.py} +5 -3
  23. llama_cloud/types/batch.py +47 -0
  24. llama_cloud/types/batch_item.py +40 -0
  25. llama_cloud/types/batch_paginated_list.py +35 -0
  26. llama_cloud/types/{base_prompt_template.py → batch_public_output.py} +7 -7
  27. llama_cloud/types/cloud_confluence_data_source.py +1 -0
  28. llama_cloud/types/cloud_jira_data_source.py +0 -4
  29. llama_cloud/types/cloud_postgres_vector_store.py +2 -0
  30. llama_cloud/types/cloud_sharepoint_data_source.py +1 -0
  31. llama_cloud/types/data_sink_component.py +65 -7
  32. llama_cloud/types/data_sink_create_component.py +65 -7
  33. llama_cloud/types/data_source_component.py +2 -23
  34. llama_cloud/types/data_source_component_one.py +122 -0
  35. llama_cloud/types/data_source_create_component.py +2 -23
  36. llama_cloud/types/data_source_create_component_one.py +122 -0
  37. llama_cloud/types/{extract_agent_update.py → data_source_update_dispatcher_config.py} +6 -6
  38. llama_cloud/types/{node_parser.py → delete_params.py} +7 -9
  39. llama_cloud/types/{extract_agent_create.py → document_ingestion_job_params.py} +11 -7
  40. llama_cloud/types/extract_config.py +2 -0
  41. llama_cloud/types/extract_job_create.py +1 -2
  42. llama_cloud/types/fail_page_mode.py +29 -0
  43. llama_cloud/types/file_count_by_status_response.py +37 -0
  44. llama_cloud/types/file_parse_public.py +36 -0
  45. llama_cloud/types/job_names.py +8 -12
  46. llama_cloud/types/job_record.py +2 -2
  47. llama_cloud/types/job_record_parameters.py +111 -0
  48. llama_cloud/types/l_lama_parse_transform_config.py +37 -0
  49. llama_cloud/types/legacy_parse_job_config.py +189 -0
  50. llama_cloud/types/llama_index_core_base_llms_types_chat_message_blocks_item.py +13 -1
  51. llama_cloud/types/llama_parse_parameters.py +8 -0
  52. llama_cloud/types/load_files_job_config.py +35 -0
  53. llama_cloud/types/message_role.py +4 -0
  54. llama_cloud/types/parse_job_config.py +134 -0
  55. llama_cloud/types/pg_vector_distance_method.py +43 -0
  56. llama_cloud/types/{extract_job_create_batch.py → pg_vector_hnsw_settings.py} +12 -9
  57. llama_cloud/types/pg_vector_vector_type.py +35 -0
  58. llama_cloud/types/pipeline.py +2 -4
  59. llama_cloud/types/pipeline_create.py +3 -2
  60. llama_cloud/types/pipeline_data_source.py +3 -0
  61. llama_cloud/types/pipeline_data_source_component.py +2 -23
  62. llama_cloud/types/pipeline_data_source_component_one.py +122 -0
  63. llama_cloud/types/pipeline_data_source_status.py +33 -0
  64. llama_cloud/types/pipeline_file.py +1 -0
  65. llama_cloud/types/pipeline_file_update_dispatcher_config.py +38 -0
  66. llama_cloud/types/{markdown_node_parser.py → pipeline_file_updater_config.py} +14 -15
  67. llama_cloud/types/pipeline_managed_ingestion_job_params.py +37 -0
  68. llama_cloud/types/pipeline_metadata_config.py +36 -0
  69. llama_cloud/types/prompt_conf.py +3 -0
  70. llama_cloud/types/struct_parse_conf.py +4 -1
  71. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.20.dist-info}/METADATA +4 -2
  72. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.20.dist-info}/RECORD +82 -68
  73. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.20.dist-info}/WHEEL +1 -1
  74. llama_cloud/types/character_splitter.py +0 -46
  75. llama_cloud/types/code_splitter.py +0 -50
  76. llama_cloud/types/configured_transformation_item.py +0 -46
  77. llama_cloud/types/configured_transformation_item_component.py +0 -22
  78. llama_cloud/types/llm.py +0 -60
  79. llama_cloud/types/markdown_element_node_parser.py +0 -51
  80. llama_cloud/types/page_splitter_node_parser.py +0 -42
  81. llama_cloud/types/pydantic_program_mode.py +0 -41
  82. llama_cloud/types/sentence_splitter.py +0 -50
  83. llama_cloud/types/token_text_splitter.py +0 -47
  84. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema.py +0 -0
  85. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema_zero_value.py +0 -0
  86. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema.py +0 -0
  87. /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema_zero_value.py +0 -0
  88. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override.py +0 -0
  89. /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override_zero_value.py +0 -0
  90. /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema.py +0 -0
  91. /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema_zero_value.py +0 -0
  92. {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.20.dist-info}/LICENSE +0 -0
@@ -10,18 +10,19 @@ from ...core.jsonable_encoder import jsonable_encoder
10
10
  from ...core.remove_none_from_dict import remove_none_from_dict
11
11
  from ...errors.unprocessable_entity_error import UnprocessableEntityError
12
12
  from ...types.extract_agent import ExtractAgent
13
- from ...types.extract_agent_create import ExtractAgentCreate
14
- from ...types.extract_agent_update import ExtractAgentUpdate
13
+ from ...types.extract_config import ExtractConfig
15
14
  from ...types.extract_job import ExtractJob
16
15
  from ...types.extract_job_create import ExtractJobCreate
17
- from ...types.extract_job_create_batch import ExtractJobCreateBatch
18
16
  from ...types.extract_resultset import ExtractResultset
19
17
  from ...types.extract_run import ExtractRun
20
- from ...types.extract_schema_validate_request import ExtractSchemaValidateRequest
21
18
  from ...types.extract_schema_validate_response import ExtractSchemaValidateResponse
22
19
  from ...types.http_validation_error import HttpValidationError
23
20
  from ...types.llama_extract_settings import LlamaExtractSettings
24
21
  from ...types.paginated_extract_runs_response import PaginatedExtractRunsResponse
22
+ from .types.extract_agent_create_data_schema import ExtractAgentCreateDataSchema
23
+ from .types.extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
24
+ from .types.extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
25
+ from .types.extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
25
26
 
26
27
  try:
27
28
  import pydantic
@@ -39,17 +40,19 @@ class LlamaExtractClient:
39
40
  def __init__(self, *, client_wrapper: SyncClientWrapper):
40
41
  self._client_wrapper = client_wrapper
41
42
 
42
- def list_extraction_agents(self, *, project_id: typing.Optional[str] = None) -> typing.List[ExtractAgent]:
43
+ def list_extraction_agents(self, *, project_id: str) -> typing.List[ExtractAgent]:
43
44
  """
44
45
  Parameters:
45
- - project_id: typing.Optional[str].
46
+ - project_id: str.
46
47
  ---
47
48
  from llama_cloud.client import LlamaCloud
48
49
 
49
50
  client = LlamaCloud(
50
51
  token="YOUR_TOKEN",
51
52
  )
52
- client.llama_extract.list_extraction_agents()
53
+ client.llama_extract.list_extraction_agents(
54
+ project_id="string",
55
+ )
53
56
  """
54
57
  _response = self._client_wrapper.httpx_client.request(
55
58
  "GET",
@@ -73,7 +76,9 @@ class LlamaExtractClient:
73
76
  *,
74
77
  project_id: typing.Optional[str] = None,
75
78
  organization_id: typing.Optional[str] = None,
76
- request: ExtractAgentCreate,
79
+ name: str,
80
+ data_schema: ExtractAgentCreateDataSchema,
81
+ config: ExtractConfig,
77
82
  ) -> ExtractAgent:
78
83
  """
79
84
  Parameters:
@@ -81,26 +86,23 @@ class LlamaExtractClient:
81
86
 
82
87
  - organization_id: typing.Optional[str].
83
88
 
84
- - request: ExtractAgentCreate.
89
+ - name: str. The name of the extraction schema
90
+
91
+ - data_schema: ExtractAgentCreateDataSchema. The schema of the data.
92
+
93
+ - config: ExtractConfig. The configuration parameters for the extraction agent.
85
94
  ---
86
- from llama_cloud import (
87
- ExtractAgentCreate,
88
- ExtractConfig,
89
- ExtractMode,
90
- ExtractTarget,
91
- )
95
+ from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
92
96
  from llama_cloud.client import LlamaCloud
93
97
 
94
98
  client = LlamaCloud(
95
99
  token="YOUR_TOKEN",
96
100
  )
97
101
  client.llama_extract.create_extraction_agent(
98
- request=ExtractAgentCreate(
99
- name="string",
100
- config=ExtractConfig(
101
- extraction_target=ExtractTarget.PER_DOC,
102
- extraction_mode=ExtractMode.FAST,
103
- ),
102
+ name="string",
103
+ config=ExtractConfig(
104
+ extraction_target=ExtractTarget.PER_DOC,
105
+ extraction_mode=ExtractMode.FAST,
104
106
  ),
105
107
  )
106
108
  """
@@ -108,7 +110,7 @@ class LlamaExtractClient:
108
110
  "POST",
109
111
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
110
112
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
111
- json=jsonable_encoder(request),
113
+ json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
112
114
  headers=self._client_wrapper.get_headers(),
113
115
  timeout=60,
114
116
  )
@@ -122,30 +124,29 @@ class LlamaExtractClient:
122
124
  raise ApiError(status_code=_response.status_code, body=_response.text)
123
125
  raise ApiError(status_code=_response.status_code, body=_response_json)
124
126
 
125
- def validate_extraction_schema(self, *, request: ExtractSchemaValidateRequest) -> ExtractSchemaValidateResponse:
127
+ def validate_extraction_schema(
128
+ self, *, data_schema: ExtractSchemaValidateRequestDataSchema
129
+ ) -> ExtractSchemaValidateResponse:
126
130
  """
127
131
  Validates an extraction agent's schema definition.
128
132
  Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
129
133
 
130
134
  Parameters:
131
- - request: ExtractSchemaValidateRequest.
135
+ - data_schema: ExtractSchemaValidateRequestDataSchema.
132
136
  ---
133
- from llama_cloud import ExtractSchemaValidateRequest
134
137
  from llama_cloud.client import LlamaCloud
135
138
 
136
139
  client = LlamaCloud(
137
140
  token="YOUR_TOKEN",
138
141
  )
139
- client.llama_extract.validate_extraction_schema(
140
- request=ExtractSchemaValidateRequest(),
141
- )
142
+ client.llama_extract.validate_extraction_schema()
142
143
  """
143
144
  _response = self._client_wrapper.httpx_client.request(
144
145
  "POST",
145
146
  urllib.parse.urljoin(
146
147
  f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
147
148
  ),
148
- json=jsonable_encoder(request),
149
+ json=jsonable_encoder({"data_schema": data_schema}),
149
150
  headers=self._client_wrapper.get_headers(),
150
151
  timeout=60,
151
152
  )
@@ -226,19 +227,18 @@ class LlamaExtractClient:
226
227
  raise ApiError(status_code=_response.status_code, body=_response.text)
227
228
  raise ApiError(status_code=_response.status_code, body=_response_json)
228
229
 
229
- def update_extraction_agent(self, extraction_agent_id: str, *, request: ExtractAgentUpdate) -> ExtractAgent:
230
+ def update_extraction_agent(
231
+ self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
232
+ ) -> ExtractAgent:
230
233
  """
231
234
  Parameters:
232
235
  - extraction_agent_id: str.
233
236
 
234
- - request: ExtractAgentUpdate.
237
+ - data_schema: ExtractAgentUpdateDataSchema. The schema of the data
238
+
239
+ - config: ExtractConfig. The configuration parameters for the extraction agent.
235
240
  ---
236
- from llama_cloud import (
237
- ExtractAgentUpdate,
238
- ExtractConfig,
239
- ExtractMode,
240
- ExtractTarget,
241
- )
241
+ from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
242
242
  from llama_cloud.client import LlamaCloud
243
243
 
244
244
  client = LlamaCloud(
@@ -246,11 +246,9 @@ class LlamaExtractClient:
246
246
  )
247
247
  client.llama_extract.update_extraction_agent(
248
248
  extraction_agent_id="string",
249
- request=ExtractAgentUpdate(
250
- config=ExtractConfig(
251
- extraction_target=ExtractTarget.PER_DOC,
252
- extraction_mode=ExtractMode.FAST,
253
- ),
249
+ config=ExtractConfig(
250
+ extraction_target=ExtractTarget.PER_DOC,
251
+ extraction_mode=ExtractMode.FAST,
254
252
  ),
255
253
  )
256
254
  """
@@ -259,7 +257,7 @@ class LlamaExtractClient:
259
257
  urllib.parse.urljoin(
260
258
  f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
261
259
  ),
262
- json=jsonable_encoder(request),
260
+ json=jsonable_encoder({"data_schema": data_schema, "config": config}),
263
261
  headers=self._client_wrapper.get_headers(),
264
262
  timeout=60,
265
263
  )
@@ -357,6 +355,7 @@ class LlamaExtractClient:
357
355
  client.llama_extract.run_job(
358
356
  request=ExtractJobCreate(
359
357
  extraction_agent_id="string",
358
+ file_id="string",
360
359
  config_override=ExtractConfig(
361
360
  extraction_target=ExtractTarget.PER_DOC,
362
361
  extraction_mode=ExtractMode.FAST,
@@ -433,6 +432,7 @@ class LlamaExtractClient:
433
432
  ExtractJobCreate,
434
433
  ExtractMode,
435
434
  ExtractTarget,
435
+ FailPageMode,
436
436
  LlamaExtractSettings,
437
437
  LlamaParseParameters,
438
438
  ParsingMode,
@@ -445,6 +445,7 @@ class LlamaExtractClient:
445
445
  client.llama_extract.run_job_test_user(
446
446
  job_create=ExtractJobCreate(
447
447
  extraction_agent_id="string",
448
+ file_id="string",
448
449
  config_override=ExtractConfig(
449
450
  extraction_target=ExtractTarget.PER_DOC,
450
451
  extraction_mode=ExtractMode.FAST,
@@ -454,6 +455,7 @@ class LlamaExtractClient:
454
455
  chunk_mode=ChunkMode.PAGE,
455
456
  llama_parse_params=LlamaParseParameters(
456
457
  parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
458
+ replace_failed_page_mode=FailPageMode.RAW_TEXT,
457
459
  ),
458
460
  ),
459
461
  )
@@ -479,42 +481,98 @@ class LlamaExtractClient:
479
481
  raise ApiError(status_code=_response.status_code, body=_response.text)
480
482
  raise ApiError(status_code=_response.status_code, body=_response_json)
481
483
 
484
+ def run_job_on_file(
485
+ self,
486
+ *,
487
+ from_ui: typing.Optional[bool] = None,
488
+ extraction_agent_id: str,
489
+ file: typing.IO,
490
+ data_schema_override: typing.Optional[str] = None,
491
+ config_override: typing.Optional[str] = None,
492
+ ) -> ExtractJob:
493
+ """
494
+ Parameters:
495
+ - from_ui: typing.Optional[bool].
496
+
497
+ - extraction_agent_id: str.
498
+
499
+ - file: typing.IO.
500
+
501
+ - data_schema_override: typing.Optional[str].
502
+
503
+ - config_override: typing.Optional[str].
504
+ """
505
+ _response = self._client_wrapper.httpx_client.request(
506
+ "POST",
507
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
508
+ params=remove_none_from_dict({"from_ui": from_ui}),
509
+ data=jsonable_encoder(
510
+ {
511
+ "extraction_agent_id": extraction_agent_id,
512
+ "data_schema_override": data_schema_override,
513
+ "config_override": config_override,
514
+ }
515
+ ),
516
+ files={"file": file},
517
+ headers=self._client_wrapper.get_headers(),
518
+ timeout=60,
519
+ )
520
+ if 200 <= _response.status_code < 300:
521
+ return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
522
+ if _response.status_code == 422:
523
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
524
+ try:
525
+ _response_json = _response.json()
526
+ except JSONDecodeError:
527
+ raise ApiError(status_code=_response.status_code, body=_response.text)
528
+ raise ApiError(status_code=_response.status_code, body=_response_json)
529
+
482
530
  def run_batch_jobs(
483
- self, *, from_ui: typing.Optional[bool] = None, request: ExtractJobCreateBatch
531
+ self,
532
+ *,
533
+ from_ui: typing.Optional[bool] = None,
534
+ extraction_agent_id: str,
535
+ file_ids: typing.List[str],
536
+ data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
537
+ config_override: typing.Optional[ExtractConfig] = OMIT,
484
538
  ) -> typing.List[ExtractJob]:
485
539
  """
486
540
  Parameters:
487
541
  - from_ui: typing.Optional[bool].
488
542
 
489
- - request: ExtractJobCreateBatch.
543
+ - extraction_agent_id: str. The id of the extraction agent
544
+
545
+ - file_ids: typing.List[str]. The ids of the files
546
+
547
+ - data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
548
+
549
+ - config_override: typing.Optional[ExtractConfig].
490
550
  ---
491
- from llama_cloud import (
492
- ExtractConfig,
493
- ExtractJobCreateBatch,
494
- ExtractMode,
495
- ExtractTarget,
496
- )
551
+ from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
497
552
  from llama_cloud.client import LlamaCloud
498
553
 
499
554
  client = LlamaCloud(
500
555
  token="YOUR_TOKEN",
501
556
  )
502
557
  client.llama_extract.run_batch_jobs(
503
- request=ExtractJobCreateBatch(
504
- extraction_agent_id="string",
505
- file_ids=[],
506
- config_override=ExtractConfig(
507
- extraction_target=ExtractTarget.PER_DOC,
508
- extraction_mode=ExtractMode.FAST,
509
- ),
558
+ extraction_agent_id="string",
559
+ file_ids=[],
560
+ config_override=ExtractConfig(
561
+ extraction_target=ExtractTarget.PER_DOC,
562
+ extraction_mode=ExtractMode.FAST,
510
563
  ),
511
564
  )
512
565
  """
566
+ _request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
567
+ if data_schema_override is not OMIT:
568
+ _request["data_schema_override"] = data_schema_override
569
+ if config_override is not OMIT:
570
+ _request["config_override"] = config_override
513
571
  _response = self._client_wrapper.httpx_client.request(
514
572
  "POST",
515
573
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
516
574
  params=remove_none_from_dict({"from_ui": from_ui}),
517
- json=jsonable_encoder(request),
575
+ json=jsonable_encoder(_request),
518
576
  headers=self._client_wrapper.get_headers(),
519
577
  timeout=60,
520
578
  )
@@ -721,17 +779,19 @@ class AsyncLlamaExtractClient:
721
779
  def __init__(self, *, client_wrapper: AsyncClientWrapper):
722
780
  self._client_wrapper = client_wrapper
723
781
 
724
- async def list_extraction_agents(self, *, project_id: typing.Optional[str] = None) -> typing.List[ExtractAgent]:
782
+ async def list_extraction_agents(self, *, project_id: str) -> typing.List[ExtractAgent]:
725
783
  """
726
784
  Parameters:
727
- - project_id: typing.Optional[str].
785
+ - project_id: str.
728
786
  ---
729
787
  from llama_cloud.client import AsyncLlamaCloud
730
788
 
731
789
  client = AsyncLlamaCloud(
732
790
  token="YOUR_TOKEN",
733
791
  )
734
- await client.llama_extract.list_extraction_agents()
792
+ await client.llama_extract.list_extraction_agents(
793
+ project_id="string",
794
+ )
735
795
  """
736
796
  _response = await self._client_wrapper.httpx_client.request(
737
797
  "GET",
@@ -755,7 +815,9 @@ class AsyncLlamaExtractClient:
755
815
  *,
756
816
  project_id: typing.Optional[str] = None,
757
817
  organization_id: typing.Optional[str] = None,
758
- request: ExtractAgentCreate,
818
+ name: str,
819
+ data_schema: ExtractAgentCreateDataSchema,
820
+ config: ExtractConfig,
759
821
  ) -> ExtractAgent:
760
822
  """
761
823
  Parameters:
@@ -763,26 +825,23 @@ class AsyncLlamaExtractClient:
763
825
 
764
826
  - organization_id: typing.Optional[str].
765
827
 
766
- - request: ExtractAgentCreate.
828
+ - name: str. The name of the extraction schema
829
+
830
+ - data_schema: ExtractAgentCreateDataSchema. The schema of the data.
831
+
832
+ - config: ExtractConfig. The configuration parameters for the extraction agent.
767
833
  ---
768
- from llama_cloud import (
769
- ExtractAgentCreate,
770
- ExtractConfig,
771
- ExtractMode,
772
- ExtractTarget,
773
- )
834
+ from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
774
835
  from llama_cloud.client import AsyncLlamaCloud
775
836
 
776
837
  client = AsyncLlamaCloud(
777
838
  token="YOUR_TOKEN",
778
839
  )
779
840
  await client.llama_extract.create_extraction_agent(
780
- request=ExtractAgentCreate(
781
- name="string",
782
- config=ExtractConfig(
783
- extraction_target=ExtractTarget.PER_DOC,
784
- extraction_mode=ExtractMode.FAST,
785
- ),
841
+ name="string",
842
+ config=ExtractConfig(
843
+ extraction_target=ExtractTarget.PER_DOC,
844
+ extraction_mode=ExtractMode.FAST,
786
845
  ),
787
846
  )
788
847
  """
@@ -790,7 +849,7 @@ class AsyncLlamaExtractClient:
790
849
  "POST",
791
850
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
792
851
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
793
- json=jsonable_encoder(request),
852
+ json=jsonable_encoder({"name": name, "data_schema": data_schema, "config": config}),
794
853
  headers=self._client_wrapper.get_headers(),
795
854
  timeout=60,
796
855
  )
@@ -805,31 +864,28 @@ class AsyncLlamaExtractClient:
805
864
  raise ApiError(status_code=_response.status_code, body=_response_json)
806
865
 
807
866
  async def validate_extraction_schema(
808
- self, *, request: ExtractSchemaValidateRequest
867
+ self, *, data_schema: ExtractSchemaValidateRequestDataSchema
809
868
  ) -> ExtractSchemaValidateResponse:
810
869
  """
811
870
  Validates an extraction agent's schema definition.
812
871
  Returns the normalized and validated schema if valid, otherwise raises an HTTP 400.
813
872
 
814
873
  Parameters:
815
- - request: ExtractSchemaValidateRequest.
874
+ - data_schema: ExtractSchemaValidateRequestDataSchema.
816
875
  ---
817
- from llama_cloud import ExtractSchemaValidateRequest
818
876
  from llama_cloud.client import AsyncLlamaCloud
819
877
 
820
878
  client = AsyncLlamaCloud(
821
879
  token="YOUR_TOKEN",
822
880
  )
823
- await client.llama_extract.validate_extraction_schema(
824
- request=ExtractSchemaValidateRequest(),
825
- )
881
+ await client.llama_extract.validate_extraction_schema()
826
882
  """
827
883
  _response = await self._client_wrapper.httpx_client.request(
828
884
  "POST",
829
885
  urllib.parse.urljoin(
830
886
  f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/schema/validation"
831
887
  ),
832
- json=jsonable_encoder(request),
888
+ json=jsonable_encoder({"data_schema": data_schema}),
833
889
  headers=self._client_wrapper.get_headers(),
834
890
  timeout=60,
835
891
  )
@@ -910,19 +966,18 @@ class AsyncLlamaExtractClient:
910
966
  raise ApiError(status_code=_response.status_code, body=_response.text)
911
967
  raise ApiError(status_code=_response.status_code, body=_response_json)
912
968
 
913
- async def update_extraction_agent(self, extraction_agent_id: str, *, request: ExtractAgentUpdate) -> ExtractAgent:
969
+ async def update_extraction_agent(
970
+ self, extraction_agent_id: str, *, data_schema: ExtractAgentUpdateDataSchema, config: ExtractConfig
971
+ ) -> ExtractAgent:
914
972
  """
915
973
  Parameters:
916
974
  - extraction_agent_id: str.
917
975
 
918
- - request: ExtractAgentUpdate.
976
+ - data_schema: ExtractAgentUpdateDataSchema. The schema of the data
977
+
978
+ - config: ExtractConfig. The configuration parameters for the extraction agent.
919
979
  ---
920
- from llama_cloud import (
921
- ExtractAgentUpdate,
922
- ExtractConfig,
923
- ExtractMode,
924
- ExtractTarget,
925
- )
980
+ from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
926
981
  from llama_cloud.client import AsyncLlamaCloud
927
982
 
928
983
  client = AsyncLlamaCloud(
@@ -930,11 +985,9 @@ class AsyncLlamaExtractClient:
930
985
  )
931
986
  await client.llama_extract.update_extraction_agent(
932
987
  extraction_agent_id="string",
933
- request=ExtractAgentUpdate(
934
- config=ExtractConfig(
935
- extraction_target=ExtractTarget.PER_DOC,
936
- extraction_mode=ExtractMode.FAST,
937
- ),
988
+ config=ExtractConfig(
989
+ extraction_target=ExtractTarget.PER_DOC,
990
+ extraction_mode=ExtractMode.FAST,
938
991
  ),
939
992
  )
940
993
  """
@@ -943,7 +996,7 @@ class AsyncLlamaExtractClient:
943
996
  urllib.parse.urljoin(
944
997
  f"{self._client_wrapper.get_base_url()}/", f"api/v1/extraction/extraction-agents/{extraction_agent_id}"
945
998
  ),
946
- json=jsonable_encoder(request),
999
+ json=jsonable_encoder({"data_schema": data_schema, "config": config}),
947
1000
  headers=self._client_wrapper.get_headers(),
948
1001
  timeout=60,
949
1002
  )
@@ -1041,6 +1094,7 @@ class AsyncLlamaExtractClient:
1041
1094
  await client.llama_extract.run_job(
1042
1095
  request=ExtractJobCreate(
1043
1096
  extraction_agent_id="string",
1097
+ file_id="string",
1044
1098
  config_override=ExtractConfig(
1045
1099
  extraction_target=ExtractTarget.PER_DOC,
1046
1100
  extraction_mode=ExtractMode.FAST,
@@ -1117,6 +1171,7 @@ class AsyncLlamaExtractClient:
1117
1171
  ExtractJobCreate,
1118
1172
  ExtractMode,
1119
1173
  ExtractTarget,
1174
+ FailPageMode,
1120
1175
  LlamaExtractSettings,
1121
1176
  LlamaParseParameters,
1122
1177
  ParsingMode,
@@ -1129,6 +1184,7 @@ class AsyncLlamaExtractClient:
1129
1184
  await client.llama_extract.run_job_test_user(
1130
1185
  job_create=ExtractJobCreate(
1131
1186
  extraction_agent_id="string",
1187
+ file_id="string",
1132
1188
  config_override=ExtractConfig(
1133
1189
  extraction_target=ExtractTarget.PER_DOC,
1134
1190
  extraction_mode=ExtractMode.FAST,
@@ -1138,6 +1194,7 @@ class AsyncLlamaExtractClient:
1138
1194
  chunk_mode=ChunkMode.PAGE,
1139
1195
  llama_parse_params=LlamaParseParameters(
1140
1196
  parse_mode=ParsingMode.PARSE_PAGE_WITHOUT_LLM,
1197
+ replace_failed_page_mode=FailPageMode.RAW_TEXT,
1141
1198
  ),
1142
1199
  ),
1143
1200
  )
@@ -1163,42 +1220,98 @@ class AsyncLlamaExtractClient:
1163
1220
  raise ApiError(status_code=_response.status_code, body=_response.text)
1164
1221
  raise ApiError(status_code=_response.status_code, body=_response_json)
1165
1222
 
1223
+ async def run_job_on_file(
1224
+ self,
1225
+ *,
1226
+ from_ui: typing.Optional[bool] = None,
1227
+ extraction_agent_id: str,
1228
+ file: typing.IO,
1229
+ data_schema_override: typing.Optional[str] = None,
1230
+ config_override: typing.Optional[str] = None,
1231
+ ) -> ExtractJob:
1232
+ """
1233
+ Parameters:
1234
+ - from_ui: typing.Optional[bool].
1235
+
1236
+ - extraction_agent_id: str.
1237
+
1238
+ - file: typing.IO.
1239
+
1240
+ - data_schema_override: typing.Optional[str].
1241
+
1242
+ - config_override: typing.Optional[str].
1243
+ """
1244
+ _response = await self._client_wrapper.httpx_client.request(
1245
+ "POST",
1246
+ urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/file"),
1247
+ params=remove_none_from_dict({"from_ui": from_ui}),
1248
+ data=jsonable_encoder(
1249
+ {
1250
+ "extraction_agent_id": extraction_agent_id,
1251
+ "data_schema_override": data_schema_override,
1252
+ "config_override": config_override,
1253
+ }
1254
+ ),
1255
+ files={"file": file},
1256
+ headers=self._client_wrapper.get_headers(),
1257
+ timeout=60,
1258
+ )
1259
+ if 200 <= _response.status_code < 300:
1260
+ return pydantic.parse_obj_as(ExtractJob, _response.json()) # type: ignore
1261
+ if _response.status_code == 422:
1262
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1263
+ try:
1264
+ _response_json = _response.json()
1265
+ except JSONDecodeError:
1266
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1267
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1268
+
1166
1269
  async def run_batch_jobs(
1167
- self, *, from_ui: typing.Optional[bool] = None, request: ExtractJobCreateBatch
1270
+ self,
1271
+ *,
1272
+ from_ui: typing.Optional[bool] = None,
1273
+ extraction_agent_id: str,
1274
+ file_ids: typing.List[str],
1275
+ data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = OMIT,
1276
+ config_override: typing.Optional[ExtractConfig] = OMIT,
1168
1277
  ) -> typing.List[ExtractJob]:
1169
1278
  """
1170
1279
  Parameters:
1171
1280
  - from_ui: typing.Optional[bool].
1172
1281
 
1173
- - request: ExtractJobCreateBatch.
1282
+ - extraction_agent_id: str. The id of the extraction agent
1283
+
1284
+ - file_ids: typing.List[str]. The ids of the files
1285
+
1286
+ - data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride]. The data schema to override the extraction agent's data schema with
1287
+
1288
+ - config_override: typing.Optional[ExtractConfig].
1174
1289
  ---
1175
- from llama_cloud import (
1176
- ExtractConfig,
1177
- ExtractJobCreateBatch,
1178
- ExtractMode,
1179
- ExtractTarget,
1180
- )
1290
+ from llama_cloud import ExtractConfig, ExtractMode, ExtractTarget
1181
1291
  from llama_cloud.client import AsyncLlamaCloud
1182
1292
 
1183
1293
  client = AsyncLlamaCloud(
1184
1294
  token="YOUR_TOKEN",
1185
1295
  )
1186
1296
  await client.llama_extract.run_batch_jobs(
1187
- request=ExtractJobCreateBatch(
1188
- extraction_agent_id="string",
1189
- file_ids=[],
1190
- config_override=ExtractConfig(
1191
- extraction_target=ExtractTarget.PER_DOC,
1192
- extraction_mode=ExtractMode.FAST,
1193
- ),
1297
+ extraction_agent_id="string",
1298
+ file_ids=[],
1299
+ config_override=ExtractConfig(
1300
+ extraction_target=ExtractTarget.PER_DOC,
1301
+ extraction_mode=ExtractMode.FAST,
1194
1302
  ),
1195
1303
  )
1196
1304
  """
1305
+ _request: typing.Dict[str, typing.Any] = {"extraction_agent_id": extraction_agent_id, "file_ids": file_ids}
1306
+ if data_schema_override is not OMIT:
1307
+ _request["data_schema_override"] = data_schema_override
1308
+ if config_override is not OMIT:
1309
+ _request["config_override"] = config_override
1197
1310
  _response = await self._client_wrapper.httpx_client.request(
1198
1311
  "POST",
1199
1312
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/jobs/batch"),
1200
1313
  params=remove_none_from_dict({"from_ui": from_ui}),
1201
- json=jsonable_encoder(request),
1314
+ json=jsonable_encoder(_request),
1202
1315
  headers=self._client_wrapper.get_headers(),
1203
1316
  timeout=60,
1204
1317
  )
@@ -0,0 +1,21 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from .extract_agent_create_data_schema import ExtractAgentCreateDataSchema
4
+ from .extract_agent_create_data_schema_zero_value import ExtractAgentCreateDataSchemaZeroValue
5
+ from .extract_agent_update_data_schema import ExtractAgentUpdateDataSchema
6
+ from .extract_agent_update_data_schema_zero_value import ExtractAgentUpdateDataSchemaZeroValue
7
+ from .extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
8
+ from .extract_job_create_batch_data_schema_override_zero_value import ExtractJobCreateBatchDataSchemaOverrideZeroValue
9
+ from .extract_schema_validate_request_data_schema import ExtractSchemaValidateRequestDataSchema
10
+ from .extract_schema_validate_request_data_schema_zero_value import ExtractSchemaValidateRequestDataSchemaZeroValue
11
+
12
+ __all__ = [
13
+ "ExtractAgentCreateDataSchema",
14
+ "ExtractAgentCreateDataSchemaZeroValue",
15
+ "ExtractAgentUpdateDataSchema",
16
+ "ExtractAgentUpdateDataSchemaZeroValue",
17
+ "ExtractJobCreateBatchDataSchemaOverride",
18
+ "ExtractJobCreateBatchDataSchemaOverrideZeroValue",
19
+ "ExtractSchemaValidateRequestDataSchema",
20
+ "ExtractSchemaValidateRequestDataSchemaZeroValue",
21
+ ]