llama-cloud 0.1.18__py3-none-any.whl → 0.1.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +202 -42
- llama_cloud/client.py +3 -0
- llama_cloud/resources/__init__.py +61 -2
- llama_cloud/resources/beta/__init__.py +2 -0
- llama_cloud/resources/beta/client.py +371 -0
- llama_cloud/resources/data_sinks/__init__.py +18 -2
- llama_cloud/resources/data_sinks/client.py +2 -94
- llama_cloud/resources/data_sinks/types/__init__.py +18 -2
- llama_cloud/resources/data_sinks/types/data_sink_update_component.py +65 -7
- llama_cloud/resources/data_sources/__init__.py +30 -2
- llama_cloud/resources/data_sources/types/__init__.py +28 -1
- llama_cloud/resources/data_sources/types/data_source_update_component.py +2 -23
- llama_cloud/resources/data_sources/types/data_source_update_component_one.py +122 -0
- llama_cloud/resources/embedding_model_configs/client.py +82 -22
- llama_cloud/resources/files/client.py +18 -4
- llama_cloud/resources/llama_extract/__init__.py +21 -0
- llama_cloud/resources/llama_extract/client.py +227 -114
- llama_cloud/resources/llama_extract/types/__init__.py +21 -0
- llama_cloud/resources/parsing/client.py +123 -4
- llama_cloud/resources/pipelines/client.py +116 -11
- llama_cloud/types/__init__.py +172 -52
- llama_cloud/types/{extract_schema_validate_request.py → audio_block.py} +5 -3
- llama_cloud/types/batch.py +47 -0
- llama_cloud/types/batch_item.py +40 -0
- llama_cloud/types/batch_paginated_list.py +35 -0
- llama_cloud/types/{base_prompt_template.py → batch_public_output.py} +7 -7
- llama_cloud/types/cloud_confluence_data_source.py +1 -0
- llama_cloud/types/cloud_jira_data_source.py +0 -4
- llama_cloud/types/cloud_postgres_vector_store.py +2 -0
- llama_cloud/types/cloud_sharepoint_data_source.py +1 -0
- llama_cloud/types/data_sink_component.py +65 -7
- llama_cloud/types/data_sink_create_component.py +65 -7
- llama_cloud/types/data_source_component.py +2 -23
- llama_cloud/types/data_source_component_one.py +122 -0
- llama_cloud/types/data_source_create_component.py +2 -23
- llama_cloud/types/data_source_create_component_one.py +122 -0
- llama_cloud/types/{extract_agent_update.py → data_source_update_dispatcher_config.py} +6 -6
- llama_cloud/types/{node_parser.py → delete_params.py} +7 -9
- llama_cloud/types/{extract_agent_create.py → document_ingestion_job_params.py} +11 -7
- llama_cloud/types/extract_config.py +2 -0
- llama_cloud/types/extract_job_create.py +1 -2
- llama_cloud/types/fail_page_mode.py +29 -0
- llama_cloud/types/file_count_by_status_response.py +37 -0
- llama_cloud/types/file_parse_public.py +36 -0
- llama_cloud/types/job_names.py +8 -12
- llama_cloud/types/job_record.py +2 -2
- llama_cloud/types/job_record_parameters.py +111 -0
- llama_cloud/types/l_lama_parse_transform_config.py +37 -0
- llama_cloud/types/legacy_parse_job_config.py +189 -0
- llama_cloud/types/llama_index_core_base_llms_types_chat_message_blocks_item.py +13 -1
- llama_cloud/types/llama_parse_parameters.py +8 -0
- llama_cloud/types/load_files_job_config.py +35 -0
- llama_cloud/types/message_role.py +4 -0
- llama_cloud/types/parse_job_config.py +134 -0
- llama_cloud/types/pg_vector_distance_method.py +43 -0
- llama_cloud/types/{extract_job_create_batch.py → pg_vector_hnsw_settings.py} +12 -9
- llama_cloud/types/pg_vector_vector_type.py +35 -0
- llama_cloud/types/pipeline.py +2 -4
- llama_cloud/types/pipeline_create.py +3 -2
- llama_cloud/types/pipeline_data_source.py +3 -0
- llama_cloud/types/pipeline_data_source_component.py +2 -23
- llama_cloud/types/pipeline_data_source_component_one.py +122 -0
- llama_cloud/types/pipeline_data_source_status.py +33 -0
- llama_cloud/types/pipeline_file.py +1 -0
- llama_cloud/types/pipeline_file_update_dispatcher_config.py +38 -0
- llama_cloud/types/{markdown_node_parser.py → pipeline_file_updater_config.py} +14 -15
- llama_cloud/types/pipeline_managed_ingestion_job_params.py +37 -0
- llama_cloud/types/pipeline_metadata_config.py +36 -0
- llama_cloud/types/prompt_conf.py +3 -0
- llama_cloud/types/struct_parse_conf.py +4 -1
- {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.20.dist-info}/METADATA +4 -2
- {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.20.dist-info}/RECORD +82 -68
- {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.20.dist-info}/WHEEL +1 -1
- llama_cloud/types/character_splitter.py +0 -46
- llama_cloud/types/code_splitter.py +0 -50
- llama_cloud/types/configured_transformation_item.py +0 -46
- llama_cloud/types/configured_transformation_item_component.py +0 -22
- llama_cloud/types/llm.py +0 -60
- llama_cloud/types/markdown_element_node_parser.py +0 -51
- llama_cloud/types/page_splitter_node_parser.py +0 -42
- llama_cloud/types/pydantic_program_mode.py +0 -41
- llama_cloud/types/sentence_splitter.py +0 -50
- llama_cloud/types/token_text_splitter.py +0 -47
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema_zero_value.py +0 -0
- {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.20.dist-info}/LICENSE +0 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .fail_page_mode import FailPageMode
|
|
8
|
+
from .parser_languages import ParserLanguages
|
|
9
|
+
from .parsing_mode import ParsingMode
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import pydantic
|
|
13
|
+
if pydantic.__version__.startswith("1."):
|
|
14
|
+
raise ImportError
|
|
15
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
16
|
+
except ImportError:
|
|
17
|
+
import pydantic # type: ignore
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ParseJobConfig(pydantic.BaseModel):
|
|
21
|
+
"""
|
|
22
|
+
Configuration for llamaparse job
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
custom_metadata: typing.Optional[typing.Dict[str, typing.Any]]
|
|
26
|
+
resource_info: typing.Optional[typing.Dict[str, typing.Any]]
|
|
27
|
+
languages: typing.Optional[typing.List[ParserLanguages]]
|
|
28
|
+
parsing_instruction: typing.Optional[str]
|
|
29
|
+
disable_ocr: typing.Optional[bool]
|
|
30
|
+
annotate_links: typing.Optional[bool]
|
|
31
|
+
adaptive_long_table: typing.Optional[bool]
|
|
32
|
+
compact_markdown_table: typing.Optional[bool]
|
|
33
|
+
disable_reconstruction: typing.Optional[bool]
|
|
34
|
+
disable_image_extraction: typing.Optional[bool]
|
|
35
|
+
invalidate_cache: typing.Optional[bool]
|
|
36
|
+
output_pdf_of_document: typing.Optional[bool]
|
|
37
|
+
do_not_cache: typing.Optional[bool]
|
|
38
|
+
fast_mode: typing.Optional[bool]
|
|
39
|
+
skip_diagonal_text: typing.Optional[bool]
|
|
40
|
+
preserve_layout_alignment_across_pages: typing.Optional[bool]
|
|
41
|
+
gpt_4_o_mode: typing.Optional[bool] = pydantic.Field(alias="gpt4o_mode")
|
|
42
|
+
gpt_4_o_api_key: typing.Optional[str] = pydantic.Field(alias="gpt4o_api_key")
|
|
43
|
+
do_not_unroll_columns: typing.Optional[bool]
|
|
44
|
+
extract_layout: typing.Optional[bool]
|
|
45
|
+
html_make_all_elements_visible: typing.Optional[bool]
|
|
46
|
+
html_remove_navigation_elements: typing.Optional[bool]
|
|
47
|
+
html_remove_fixed_elements: typing.Optional[bool]
|
|
48
|
+
guess_xlsx_sheet_name: typing.Optional[bool]
|
|
49
|
+
page_separator: typing.Optional[str]
|
|
50
|
+
bounding_box: typing.Optional[str]
|
|
51
|
+
bbox_top: typing.Optional[float]
|
|
52
|
+
bbox_right: typing.Optional[float]
|
|
53
|
+
bbox_bottom: typing.Optional[float]
|
|
54
|
+
bbox_left: typing.Optional[float]
|
|
55
|
+
target_pages: typing.Optional[str]
|
|
56
|
+
use_vendor_multimodal_model: typing.Optional[bool]
|
|
57
|
+
vendor_multimodal_model_name: typing.Optional[str]
|
|
58
|
+
model: typing.Optional[str]
|
|
59
|
+
vendor_multimodal_api_key: typing.Optional[str]
|
|
60
|
+
page_prefix: typing.Optional[str]
|
|
61
|
+
page_suffix: typing.Optional[str]
|
|
62
|
+
webhook_url: typing.Optional[str]
|
|
63
|
+
preset: typing.Optional[str]
|
|
64
|
+
take_screenshot: typing.Optional[bool]
|
|
65
|
+
is_formatting_instruction: typing.Optional[bool]
|
|
66
|
+
premium_mode: typing.Optional[bool]
|
|
67
|
+
continuous_mode: typing.Optional[bool]
|
|
68
|
+
input_s_3_path: typing.Optional[str] = pydantic.Field(alias="input_s3_path")
|
|
69
|
+
input_s_3_region: typing.Optional[str] = pydantic.Field(alias="input_s3_region")
|
|
70
|
+
output_s_3_path_prefix: typing.Optional[str] = pydantic.Field(alias="output_s3_path_prefix")
|
|
71
|
+
output_s_3_region: typing.Optional[str] = pydantic.Field(alias="output_s3_region")
|
|
72
|
+
project_id: typing.Optional[str]
|
|
73
|
+
azure_openai_deployment_name: typing.Optional[str]
|
|
74
|
+
azure_openai_endpoint: typing.Optional[str]
|
|
75
|
+
azure_openai_api_version: typing.Optional[str]
|
|
76
|
+
azure_openai_key: typing.Optional[str]
|
|
77
|
+
input_url: typing.Optional[str]
|
|
78
|
+
http_proxy: typing.Optional[str]
|
|
79
|
+
auto_mode: typing.Optional[bool]
|
|
80
|
+
auto_mode_trigger_on_regexp_in_page: typing.Optional[str]
|
|
81
|
+
auto_mode_trigger_on_text_in_page: typing.Optional[str]
|
|
82
|
+
auto_mode_trigger_on_table_in_page: typing.Optional[bool]
|
|
83
|
+
auto_mode_trigger_on_image_in_page: typing.Optional[bool]
|
|
84
|
+
auto_mode_configuration_json: typing.Optional[str]
|
|
85
|
+
structured_output: typing.Optional[bool]
|
|
86
|
+
structured_output_json_schema: typing.Optional[str]
|
|
87
|
+
structured_output_json_schema_name: typing.Optional[str]
|
|
88
|
+
max_pages: typing.Optional[int]
|
|
89
|
+
max_pages_enforced: typing.Optional[int]
|
|
90
|
+
extract_charts: typing.Optional[bool]
|
|
91
|
+
formatting_instruction: typing.Optional[str]
|
|
92
|
+
complemental_formatting_instruction: typing.Optional[str]
|
|
93
|
+
content_guideline_instruction: typing.Optional[str]
|
|
94
|
+
spreadsheet_extract_sub_tables: typing.Optional[bool]
|
|
95
|
+
job_timeout_in_seconds: typing.Optional[float]
|
|
96
|
+
job_timeout_extra_time_per_page_in_seconds: typing.Optional[float]
|
|
97
|
+
strict_mode_image_extraction: typing.Optional[bool]
|
|
98
|
+
strict_mode_image_ocr: typing.Optional[bool]
|
|
99
|
+
strict_mode_reconstruction: typing.Optional[bool]
|
|
100
|
+
strict_mode_buggy_font: typing.Optional[bool]
|
|
101
|
+
save_images: typing.Optional[bool]
|
|
102
|
+
ignore_document_elements_for_layout_detection: typing.Optional[bool]
|
|
103
|
+
output_tables_as_html: typing.Optional[bool] = pydantic.Field(alias="output_tables_as_HTML")
|
|
104
|
+
internal_is_screenshot_job: typing.Optional[bool]
|
|
105
|
+
parse_mode: typing.Optional[ParsingMode]
|
|
106
|
+
system_prompt: typing.Optional[str]
|
|
107
|
+
system_prompt_append: typing.Optional[str]
|
|
108
|
+
user_prompt: typing.Optional[str]
|
|
109
|
+
page_error_tolerance: typing.Optional[float]
|
|
110
|
+
replace_failed_page_mode: typing.Optional[FailPageMode]
|
|
111
|
+
replace_failed_page_with_error_message_prefix: typing.Optional[str]
|
|
112
|
+
replace_failed_page_with_error_message_suffix: typing.Optional[str]
|
|
113
|
+
markdown_table_multiline_header_separator: typing.Optional[str]
|
|
114
|
+
file_name: str = pydantic.Field(description="The file name.")
|
|
115
|
+
original_file_name: str = pydantic.Field(description="The original file name.")
|
|
116
|
+
file_key: str = pydantic.Field(description="The file key.")
|
|
117
|
+
lang: str = pydantic.Field(description="The language.")
|
|
118
|
+
output_bucket: typing.Optional[str] = pydantic.Field(alias="outputBucket")
|
|
119
|
+
file_id: typing.Optional[str]
|
|
120
|
+
pipeline_id: typing.Optional[str]
|
|
121
|
+
|
|
122
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
123
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
124
|
+
return super().json(**kwargs_with_defaults)
|
|
125
|
+
|
|
126
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
127
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
128
|
+
return super().dict(**kwargs_with_defaults)
|
|
129
|
+
|
|
130
|
+
class Config:
|
|
131
|
+
frozen = True
|
|
132
|
+
smart_union = True
|
|
133
|
+
allow_population_by_field_name = True
|
|
134
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
T_Result = typing.TypeVar("T_Result")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PgVectorDistanceMethod(str, enum.Enum):
|
|
10
|
+
"""
|
|
11
|
+
Distance methods for PGVector.
|
|
12
|
+
Docs:
|
|
13
|
+
https://github.com/pgvector/pgvector?tab=readme-ov-file#query-options
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
L_2 = "l2"
|
|
17
|
+
IP = "ip"
|
|
18
|
+
COSINE = "cosine"
|
|
19
|
+
L_1 = "l1"
|
|
20
|
+
HAMMING = "hamming"
|
|
21
|
+
JACCARD = "jaccard"
|
|
22
|
+
|
|
23
|
+
def visit(
|
|
24
|
+
self,
|
|
25
|
+
l_2: typing.Callable[[], T_Result],
|
|
26
|
+
ip: typing.Callable[[], T_Result],
|
|
27
|
+
cosine: typing.Callable[[], T_Result],
|
|
28
|
+
l_1: typing.Callable[[], T_Result],
|
|
29
|
+
hamming: typing.Callable[[], T_Result],
|
|
30
|
+
jaccard: typing.Callable[[], T_Result],
|
|
31
|
+
) -> T_Result:
|
|
32
|
+
if self is PgVectorDistanceMethod.L_2:
|
|
33
|
+
return l_2()
|
|
34
|
+
if self is PgVectorDistanceMethod.IP:
|
|
35
|
+
return ip()
|
|
36
|
+
if self is PgVectorDistanceMethod.COSINE:
|
|
37
|
+
return cosine()
|
|
38
|
+
if self is PgVectorDistanceMethod.L_1:
|
|
39
|
+
return l_1()
|
|
40
|
+
if self is PgVectorDistanceMethod.HAMMING:
|
|
41
|
+
return hamming()
|
|
42
|
+
if self is PgVectorDistanceMethod.JACCARD:
|
|
43
|
+
return jaccard()
|
|
@@ -4,8 +4,8 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
-
from .
|
|
8
|
-
from .
|
|
7
|
+
from .pg_vector_distance_method import PgVectorDistanceMethod
|
|
8
|
+
from .pg_vector_vector_type import PgVectorVectorType
|
|
9
9
|
|
|
10
10
|
try:
|
|
11
11
|
import pydantic
|
|
@@ -16,17 +16,20 @@ except ImportError:
|
|
|
16
16
|
import pydantic # type: ignore
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
class
|
|
19
|
+
class PgVectorHnswSettings(pydantic.BaseModel):
|
|
20
20
|
"""
|
|
21
|
-
|
|
21
|
+
HNSW settings for PGVector.
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = pydantic.Field(
|
|
27
|
-
description="The data schema to override the extraction agent's data schema with"
|
|
24
|
+
ef_construction: typing.Optional[int] = pydantic.Field(
|
|
25
|
+
description="The number of edges to use during the construction phase."
|
|
28
26
|
)
|
|
29
|
-
|
|
27
|
+
ef_search: typing.Optional[int] = pydantic.Field(description="The number of edges to use during the search phase.")
|
|
28
|
+
m: typing.Optional[int] = pydantic.Field(
|
|
29
|
+
description="The number of bi-directional links created for each new element."
|
|
30
|
+
)
|
|
31
|
+
vector_type: typing.Optional[PgVectorVectorType] = pydantic.Field(description="The type of vector to use.")
|
|
32
|
+
distance_method: typing.Optional[PgVectorDistanceMethod] = pydantic.Field(description="The distance method to use.")
|
|
30
33
|
|
|
31
34
|
def json(self, **kwargs: typing.Any) -> str:
|
|
32
35
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
T_Result = typing.TypeVar("T_Result")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PgVectorVectorType(str, enum.Enum):
|
|
10
|
+
"""
|
|
11
|
+
Vector storage formats for PGVector.
|
|
12
|
+
Docs:
|
|
13
|
+
https://github.com/pgvector/pgvector?tab=readme-ov-file#query-options
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
VECTOR = "vector"
|
|
17
|
+
HALF_VEC = "half_vec"
|
|
18
|
+
BIT = "bit"
|
|
19
|
+
SPARSE_VEC = "sparse_vec"
|
|
20
|
+
|
|
21
|
+
def visit(
|
|
22
|
+
self,
|
|
23
|
+
vector: typing.Callable[[], T_Result],
|
|
24
|
+
half_vec: typing.Callable[[], T_Result],
|
|
25
|
+
bit: typing.Callable[[], T_Result],
|
|
26
|
+
sparse_vec: typing.Callable[[], T_Result],
|
|
27
|
+
) -> T_Result:
|
|
28
|
+
if self is PgVectorVectorType.VECTOR:
|
|
29
|
+
return vector()
|
|
30
|
+
if self is PgVectorVectorType.HALF_VEC:
|
|
31
|
+
return half_vec()
|
|
32
|
+
if self is PgVectorVectorType.BIT:
|
|
33
|
+
return bit()
|
|
34
|
+
if self is PgVectorVectorType.SPARSE_VEC:
|
|
35
|
+
return sparse_vec()
|
llama_cloud/types/pipeline.py
CHANGED
|
@@ -4,12 +4,12 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
-
from .configured_transformation_item import ConfiguredTransformationItem
|
|
8
7
|
from .data_sink import DataSink
|
|
9
8
|
from .eval_execution_params import EvalExecutionParams
|
|
10
9
|
from .llama_parse_parameters import LlamaParseParameters
|
|
11
10
|
from .pipeline_configuration_hashes import PipelineConfigurationHashes
|
|
12
11
|
from .pipeline_embedding_config import PipelineEmbeddingConfig
|
|
12
|
+
from .pipeline_metadata_config import PipelineMetadataConfig
|
|
13
13
|
from .pipeline_transform_config import PipelineTransformConfig
|
|
14
14
|
from .pipeline_type import PipelineType
|
|
15
15
|
from .preset_retrieval_params import PresetRetrievalParams
|
|
@@ -39,9 +39,6 @@ class Pipeline(pydantic.BaseModel):
|
|
|
39
39
|
)
|
|
40
40
|
managed_pipeline_id: typing.Optional[str]
|
|
41
41
|
embedding_config: PipelineEmbeddingConfig
|
|
42
|
-
configured_transformations: typing.Optional[typing.List[ConfiguredTransformationItem]] = pydantic.Field(
|
|
43
|
-
description="Deprecated don't use it, List of configured transformations."
|
|
44
|
-
)
|
|
45
42
|
config_hash: typing.Optional[PipelineConfigurationHashes]
|
|
46
43
|
transform_config: typing.Optional[PipelineTransformConfig] = pydantic.Field(
|
|
47
44
|
description="Configuration for the transformation."
|
|
@@ -54,6 +51,7 @@ class Pipeline(pydantic.BaseModel):
|
|
|
54
51
|
)
|
|
55
52
|
llama_parse_parameters: typing.Optional[LlamaParseParameters]
|
|
56
53
|
data_sink: typing.Optional[DataSink]
|
|
54
|
+
metadata_config: typing.Optional[PipelineMetadataConfig]
|
|
57
55
|
|
|
58
56
|
def json(self, **kwargs: typing.Any) -> str:
|
|
59
57
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -4,12 +4,12 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
-
from .configured_transformation_item import ConfiguredTransformationItem
|
|
8
7
|
from .data_sink_create import DataSinkCreate
|
|
9
8
|
from .eval_execution_params import EvalExecutionParams
|
|
10
9
|
from .llama_parse_parameters import LlamaParseParameters
|
|
11
10
|
from .pipeline_create_embedding_config import PipelineCreateEmbeddingConfig
|
|
12
11
|
from .pipeline_create_transform_config import PipelineCreateTransformConfig
|
|
12
|
+
from .pipeline_metadata_config import PipelineMetadataConfig
|
|
13
13
|
from .pipeline_type import PipelineType
|
|
14
14
|
from .preset_retrieval_params import PresetRetrievalParams
|
|
15
15
|
|
|
@@ -31,7 +31,6 @@ class PipelineCreate(pydantic.BaseModel):
|
|
|
31
31
|
transform_config: typing.Optional[PipelineCreateTransformConfig] = pydantic.Field(
|
|
32
32
|
description="Configuration for the transformation."
|
|
33
33
|
)
|
|
34
|
-
configured_transformations: typing.Optional[typing.List[ConfiguredTransformationItem]]
|
|
35
34
|
data_sink_id: typing.Optional[str]
|
|
36
35
|
embedding_model_config_id: typing.Optional[str]
|
|
37
36
|
data_sink: typing.Optional[DataSinkCreate]
|
|
@@ -44,6 +43,8 @@ class PipelineCreate(pydantic.BaseModel):
|
|
|
44
43
|
llama_parse_parameters: typing.Optional[LlamaParseParameters] = pydantic.Field(
|
|
45
44
|
description="Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline."
|
|
46
45
|
)
|
|
46
|
+
status: typing.Optional[str]
|
|
47
|
+
metadata_config: typing.Optional[PipelineMetadataConfig]
|
|
47
48
|
name: str
|
|
48
49
|
pipeline_type: typing.Optional[PipelineType] = pydantic.Field(
|
|
49
50
|
description="Type of pipeline. Either PLAYGROUND or MANAGED."
|
|
@@ -7,6 +7,7 @@ from ..core.datetime_utils import serialize_datetime
|
|
|
7
7
|
from .configurable_data_source_names import ConfigurableDataSourceNames
|
|
8
8
|
from .pipeline_data_source_component import PipelineDataSourceComponent
|
|
9
9
|
from .pipeline_data_source_custom_metadata_value import PipelineDataSourceCustomMetadataValue
|
|
10
|
+
from .pipeline_data_source_status import PipelineDataSourceStatus
|
|
10
11
|
|
|
11
12
|
try:
|
|
12
13
|
import pydantic
|
|
@@ -36,6 +37,8 @@ class PipelineDataSource(pydantic.BaseModel):
|
|
|
36
37
|
last_synced_at: dt.datetime = pydantic.Field(description="The last time the data source was automatically synced.")
|
|
37
38
|
sync_interval: typing.Optional[float]
|
|
38
39
|
sync_schedule_set_by: typing.Optional[str]
|
|
40
|
+
status: typing.Optional[PipelineDataSourceStatus]
|
|
41
|
+
status_updated_at: typing.Optional[dt.datetime]
|
|
39
42
|
|
|
40
43
|
def json(self, **kwargs: typing.Any) -> str:
|
|
41
44
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -2,27 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
-
from .
|
|
6
|
-
from .cloud_box_data_source import CloudBoxDataSource
|
|
7
|
-
from .cloud_confluence_data_source import CloudConfluenceDataSource
|
|
8
|
-
from .cloud_google_drive_data_source import CloudGoogleDriveDataSource
|
|
9
|
-
from .cloud_jira_data_source import CloudJiraDataSource
|
|
10
|
-
from .cloud_notion_page_data_source import CloudNotionPageDataSource
|
|
11
|
-
from .cloud_one_drive_data_source import CloudOneDriveDataSource
|
|
12
|
-
from .cloud_s_3_data_source import CloudS3DataSource
|
|
13
|
-
from .cloud_sharepoint_data_source import CloudSharepointDataSource
|
|
14
|
-
from .cloud_slack_data_source import CloudSlackDataSource
|
|
5
|
+
from .pipeline_data_source_component_one import PipelineDataSourceComponentOne
|
|
15
6
|
|
|
16
|
-
PipelineDataSourceComponent = typing.Union[
|
|
17
|
-
typing.Dict[str, typing.Any],
|
|
18
|
-
CloudS3DataSource,
|
|
19
|
-
CloudAzStorageBlobDataSource,
|
|
20
|
-
CloudGoogleDriveDataSource,
|
|
21
|
-
CloudOneDriveDataSource,
|
|
22
|
-
CloudSharepointDataSource,
|
|
23
|
-
CloudSlackDataSource,
|
|
24
|
-
CloudNotionPageDataSource,
|
|
25
|
-
CloudConfluenceDataSource,
|
|
26
|
-
CloudJiraDataSource,
|
|
27
|
-
CloudBoxDataSource,
|
|
28
|
-
]
|
|
7
|
+
PipelineDataSourceComponent = typing.Union[typing.Dict[str, typing.Any], PipelineDataSourceComponentOne]
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import typing
|
|
6
|
+
|
|
7
|
+
import typing_extensions
|
|
8
|
+
|
|
9
|
+
from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
|
|
10
|
+
from .cloud_box_data_source import CloudBoxDataSource
|
|
11
|
+
from .cloud_confluence_data_source import CloudConfluenceDataSource
|
|
12
|
+
from .cloud_google_drive_data_source import CloudGoogleDriveDataSource
|
|
13
|
+
from .cloud_jira_data_source import CloudJiraDataSource
|
|
14
|
+
from .cloud_notion_page_data_source import CloudNotionPageDataSource
|
|
15
|
+
from .cloud_one_drive_data_source import CloudOneDriveDataSource
|
|
16
|
+
from .cloud_s_3_data_source import CloudS3DataSource
|
|
17
|
+
from .cloud_sharepoint_data_source import CloudSharepointDataSource
|
|
18
|
+
from .cloud_slack_data_source import CloudSlackDataSource
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PipelineDataSourceComponentOne_AzureStorageBlob(CloudAzStorageBlobDataSource):
|
|
22
|
+
type: typing_extensions.Literal["AZURE_STORAGE_BLOB"]
|
|
23
|
+
|
|
24
|
+
class Config:
|
|
25
|
+
frozen = True
|
|
26
|
+
smart_union = True
|
|
27
|
+
allow_population_by_field_name = True
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class PipelineDataSourceComponentOne_Box(CloudBoxDataSource):
|
|
31
|
+
type: typing_extensions.Literal["BOX"]
|
|
32
|
+
|
|
33
|
+
class Config:
|
|
34
|
+
frozen = True
|
|
35
|
+
smart_union = True
|
|
36
|
+
allow_population_by_field_name = True
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class PipelineDataSourceComponentOne_Confluence(CloudConfluenceDataSource):
|
|
40
|
+
type: typing_extensions.Literal["CONFLUENCE"]
|
|
41
|
+
|
|
42
|
+
class Config:
|
|
43
|
+
frozen = True
|
|
44
|
+
smart_union = True
|
|
45
|
+
allow_population_by_field_name = True
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class PipelineDataSourceComponentOne_GoogleDrive(CloudGoogleDriveDataSource):
|
|
49
|
+
type: typing_extensions.Literal["GOOGLE_DRIVE"]
|
|
50
|
+
|
|
51
|
+
class Config:
|
|
52
|
+
frozen = True
|
|
53
|
+
smart_union = True
|
|
54
|
+
allow_population_by_field_name = True
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class PipelineDataSourceComponentOne_Jira(CloudJiraDataSource):
|
|
58
|
+
type: typing_extensions.Literal["JIRA"]
|
|
59
|
+
|
|
60
|
+
class Config:
|
|
61
|
+
frozen = True
|
|
62
|
+
smart_union = True
|
|
63
|
+
allow_population_by_field_name = True
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class PipelineDataSourceComponentOne_MicrosoftOnedrive(CloudOneDriveDataSource):
|
|
67
|
+
type: typing_extensions.Literal["MICROSOFT_ONEDRIVE"]
|
|
68
|
+
|
|
69
|
+
class Config:
|
|
70
|
+
frozen = True
|
|
71
|
+
smart_union = True
|
|
72
|
+
allow_population_by_field_name = True
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class PipelineDataSourceComponentOne_MicrosoftSharepoint(CloudSharepointDataSource):
|
|
76
|
+
type: typing_extensions.Literal["MICROSOFT_SHAREPOINT"]
|
|
77
|
+
|
|
78
|
+
class Config:
|
|
79
|
+
frozen = True
|
|
80
|
+
smart_union = True
|
|
81
|
+
allow_population_by_field_name = True
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class PipelineDataSourceComponentOne_NotionPage(CloudNotionPageDataSource):
|
|
85
|
+
type: typing_extensions.Literal["NOTION_PAGE"]
|
|
86
|
+
|
|
87
|
+
class Config:
|
|
88
|
+
frozen = True
|
|
89
|
+
smart_union = True
|
|
90
|
+
allow_population_by_field_name = True
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class PipelineDataSourceComponentOne_S3(CloudS3DataSource):
|
|
94
|
+
type: typing_extensions.Literal["S3"]
|
|
95
|
+
|
|
96
|
+
class Config:
|
|
97
|
+
frozen = True
|
|
98
|
+
smart_union = True
|
|
99
|
+
allow_population_by_field_name = True
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class PipelineDataSourceComponentOne_Slack(CloudSlackDataSource):
|
|
103
|
+
type: typing_extensions.Literal["SLACK"]
|
|
104
|
+
|
|
105
|
+
class Config:
|
|
106
|
+
frozen = True
|
|
107
|
+
smart_union = True
|
|
108
|
+
allow_population_by_field_name = True
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
PipelineDataSourceComponentOne = typing.Union[
|
|
112
|
+
PipelineDataSourceComponentOne_AzureStorageBlob,
|
|
113
|
+
PipelineDataSourceComponentOne_Box,
|
|
114
|
+
PipelineDataSourceComponentOne_Confluence,
|
|
115
|
+
PipelineDataSourceComponentOne_GoogleDrive,
|
|
116
|
+
PipelineDataSourceComponentOne_Jira,
|
|
117
|
+
PipelineDataSourceComponentOne_MicrosoftOnedrive,
|
|
118
|
+
PipelineDataSourceComponentOne_MicrosoftSharepoint,
|
|
119
|
+
PipelineDataSourceComponentOne_NotionPage,
|
|
120
|
+
PipelineDataSourceComponentOne_S3,
|
|
121
|
+
PipelineDataSourceComponentOne_Slack,
|
|
122
|
+
]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
T_Result = typing.TypeVar("T_Result")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PipelineDataSourceStatus(str, enum.Enum):
|
|
10
|
+
NOT_STARTED = "NOT_STARTED"
|
|
11
|
+
IN_PROGRESS = "IN_PROGRESS"
|
|
12
|
+
SUCCESS = "SUCCESS"
|
|
13
|
+
ERROR = "ERROR"
|
|
14
|
+
CANCELLED = "CANCELLED"
|
|
15
|
+
|
|
16
|
+
def visit(
|
|
17
|
+
self,
|
|
18
|
+
not_started: typing.Callable[[], T_Result],
|
|
19
|
+
in_progress: typing.Callable[[], T_Result],
|
|
20
|
+
success: typing.Callable[[], T_Result],
|
|
21
|
+
error: typing.Callable[[], T_Result],
|
|
22
|
+
cancelled: typing.Callable[[], T_Result],
|
|
23
|
+
) -> T_Result:
|
|
24
|
+
if self is PipelineDataSourceStatus.NOT_STARTED:
|
|
25
|
+
return not_started()
|
|
26
|
+
if self is PipelineDataSourceStatus.IN_PROGRESS:
|
|
27
|
+
return in_progress()
|
|
28
|
+
if self is PipelineDataSourceStatus.SUCCESS:
|
|
29
|
+
return success()
|
|
30
|
+
if self is PipelineDataSourceStatus.ERROR:
|
|
31
|
+
return error()
|
|
32
|
+
if self is PipelineDataSourceStatus.CANCELLED:
|
|
33
|
+
return cancelled()
|
|
@@ -42,6 +42,7 @@ class PipelineFile(pydantic.BaseModel):
|
|
|
42
42
|
config_hash: typing.Optional[typing.Dict[str, typing.Optional[PipelineFileConfigHashValue]]]
|
|
43
43
|
indexed_page_count: typing.Optional[int]
|
|
44
44
|
status: typing.Optional[PipelineFileStatus]
|
|
45
|
+
status_updated_at: typing.Optional[dt.datetime]
|
|
45
46
|
|
|
46
47
|
def json(self, **kwargs: typing.Any) -> str:
|
|
47
48
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .delete_params import DeleteParams
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pydantic
|
|
11
|
+
if pydantic.__version__.startswith("1."):
|
|
12
|
+
raise ImportError
|
|
13
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
14
|
+
except ImportError:
|
|
15
|
+
import pydantic # type: ignore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PipelineFileUpdateDispatcherConfig(pydantic.BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
Schema for the parameters of a load files job.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
pipeline_file_ids: typing.Optional[typing.List[str]]
|
|
24
|
+
should_delete: typing.Optional[bool]
|
|
25
|
+
delete_info: typing.Optional[DeleteParams]
|
|
26
|
+
|
|
27
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
28
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
29
|
+
return super().json(**kwargs_with_defaults)
|
|
30
|
+
|
|
31
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
32
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
33
|
+
return super().dict(**kwargs_with_defaults)
|
|
34
|
+
|
|
35
|
+
class Config:
|
|
36
|
+
frozen = True
|
|
37
|
+
smart_union = True
|
|
38
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -4,6 +4,7 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .delete_params import DeleteParams
|
|
7
8
|
|
|
8
9
|
try:
|
|
9
10
|
import pydantic
|
|
@@ -14,25 +15,23 @@ except ImportError:
|
|
|
14
15
|
import pydantic # type: ignore
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
class
|
|
18
|
+
class PipelineFileUpdaterConfig(pydantic.BaseModel):
|
|
18
19
|
"""
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
Splits a document into Nodes using Markdown header-based splitting logic.
|
|
22
|
-
Each node contains its text content and the path of headers leading to it.
|
|
23
|
-
|
|
24
|
-
Args:
|
|
25
|
-
include_metadata (bool): whether to include metadata in nodes
|
|
26
|
-
include_prev_next_rel (bool): whether to include prev/next relationships
|
|
20
|
+
Schema for the parameters of a load files job.
|
|
27
21
|
"""
|
|
28
22
|
|
|
29
|
-
|
|
30
|
-
|
|
23
|
+
custom_metadata: typing.Optional[typing.Dict[str, typing.Any]]
|
|
24
|
+
resource_info: typing.Optional[typing.Dict[str, typing.Any]]
|
|
25
|
+
should_delete: typing.Optional[bool]
|
|
26
|
+
should_parse: typing.Optional[bool]
|
|
27
|
+
delete_info: typing.Optional[DeleteParams]
|
|
28
|
+
is_new_file: typing.Optional[bool] = pydantic.Field(description="Whether the file is new")
|
|
29
|
+
data_source_project_file_changed: typing.Optional[bool] = pydantic.Field(
|
|
30
|
+
description="Whether the data source project file has changed"
|
|
31
|
+
)
|
|
32
|
+
should_migrate_pipeline_file_to_external_file_id: typing.Optional[bool] = pydantic.Field(
|
|
33
|
+
description="Whether to migrate the pipeline file to the external file id"
|
|
31
34
|
)
|
|
32
|
-
include_prev_next_rel: typing.Optional[bool] = pydantic.Field(description="Include prev/next node relationships.")
|
|
33
|
-
callback_manager: typing.Optional[typing.Any]
|
|
34
|
-
id_func: typing.Optional[str]
|
|
35
|
-
class_name: typing.Optional[str]
|
|
36
35
|
|
|
37
36
|
def json(self, **kwargs: typing.Any) -> str:
|
|
38
37
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .delete_params import DeleteParams
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pydantic
|
|
11
|
+
if pydantic.__version__.startswith("1."):
|
|
12
|
+
raise ImportError
|
|
13
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
14
|
+
except ImportError:
|
|
15
|
+
import pydantic # type: ignore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PipelineManagedIngestionJobParams(pydantic.BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
Schema for the parameters of a managed pipeline ingestion job.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
should_delete: typing.Optional[bool]
|
|
24
|
+
delete_info: typing.Optional[DeleteParams]
|
|
25
|
+
|
|
26
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
27
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
28
|
+
return super().json(**kwargs_with_defaults)
|
|
29
|
+
|
|
30
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
31
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
32
|
+
return super().dict(**kwargs_with_defaults)
|
|
33
|
+
|
|
34
|
+
class Config:
|
|
35
|
+
frozen = True
|
|
36
|
+
smart_union = True
|
|
37
|
+
json_encoders = {dt.datetime: serialize_datetime}
|