llama-cloud 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +20 -0
- llama_cloud/resources/__init__.py +2 -0
- llama_cloud/resources/files/client.py +159 -0
- llama_cloud/resources/parsing/client.py +40 -0
- llama_cloud/resources/pipelines/__init__.py +2 -0
- llama_cloud/resources/pipelines/client.py +188 -2
- llama_cloud/resources/pipelines/types/__init__.py +2 -0
- llama_cloud/resources/pipelines/types/pipeline_update_embedding_config.py +11 -0
- llama_cloud/types/__init__.py +18 -0
- llama_cloud/types/cloud_az_storage_blob_data_source.py +1 -2
- llama_cloud/types/cloud_postgres_vector_store.py +6 -8
- llama_cloud/types/configurable_transformation_names.py +4 -0
- llama_cloud/types/configured_transformation_item_component_one.py +2 -0
- llama_cloud/types/extend_vertex_text_embedding.py +58 -0
- llama_cloud/types/llama_parse_parameters.py +3 -1
- llama_cloud/types/llm_model_data.py +1 -0
- llama_cloud/types/llm_parameters.py +4 -1
- llama_cloud/types/page_screenshot_metadata.py +33 -0
- llama_cloud/types/page_screenshot_node_with_score.py +38 -0
- llama_cloud/types/pipeline.py +4 -0
- llama_cloud/types/pipeline_configuration_hashes.py +37 -0
- llama_cloud/types/pipeline_create_embedding_config.py +11 -0
- llama_cloud/types/pipeline_data_source.py +7 -0
- llama_cloud/types/pipeline_data_source_create.py +3 -0
- llama_cloud/types/pipeline_embedding_config.py +11 -0
- llama_cloud/types/pipeline_file.py +4 -0
- llama_cloud/types/pipeline_file_config_hash_value.py +5 -0
- llama_cloud/types/preset_retrieval_params.py +1 -0
- llama_cloud/types/retrieve_results.py +4 -0
- llama_cloud/types/vertex_ai_embedding_config.py +34 -0
- llama_cloud/types/vertex_embedding_mode.py +45 -0
- {llama_cloud-0.0.14.dist-info → llama_cloud-0.0.16.dist-info}/METADATA +1 -1
- {llama_cloud-0.0.14.dist-info → llama_cloud-0.0.16.dist-info}/RECORD +35 -28
- {llama_cloud-0.0.14.dist-info → llama_cloud-0.0.16.dist-info}/LICENSE +0 -0
- {llama_cloud-0.0.14.dist-info → llama_cloud-0.0.16.dist-info}/WHEEL +0 -0
|
@@ -1040,6 +1040,52 @@ class PipelinesClient:
|
|
|
1040
1040
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1041
1041
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1042
1042
|
|
|
1043
|
+
def update_pipeline_data_source(
|
|
1044
|
+
self, pipeline_id: str, data_source_id: str, *, sync_interval: typing.Optional[float] = OMIT
|
|
1045
|
+
) -> PipelineDataSource:
|
|
1046
|
+
"""
|
|
1047
|
+
Update the configuration of a data source in a pipeline.
|
|
1048
|
+
|
|
1049
|
+
Parameters:
|
|
1050
|
+
- pipeline_id: str.
|
|
1051
|
+
|
|
1052
|
+
- data_source_id: str.
|
|
1053
|
+
|
|
1054
|
+
- sync_interval: typing.Optional[float]. The interval at which the data source should be synced.
|
|
1055
|
+
---
|
|
1056
|
+
from llama_cloud.client import LlamaCloud
|
|
1057
|
+
|
|
1058
|
+
client = LlamaCloud(
|
|
1059
|
+
token="YOUR_TOKEN",
|
|
1060
|
+
)
|
|
1061
|
+
client.pipelines.update_pipeline_data_source(
|
|
1062
|
+
pipeline_id="string",
|
|
1063
|
+
data_source_id="string",
|
|
1064
|
+
)
|
|
1065
|
+
"""
|
|
1066
|
+
_request: typing.Dict[str, typing.Any] = {}
|
|
1067
|
+
if sync_interval is not OMIT:
|
|
1068
|
+
_request["sync_interval"] = sync_interval
|
|
1069
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
1070
|
+
"PUT",
|
|
1071
|
+
urllib.parse.urljoin(
|
|
1072
|
+
f"{self._client_wrapper.get_base_url()}/",
|
|
1073
|
+
f"api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}",
|
|
1074
|
+
),
|
|
1075
|
+
json=jsonable_encoder(_request),
|
|
1076
|
+
headers=self._client_wrapper.get_headers(),
|
|
1077
|
+
timeout=60,
|
|
1078
|
+
)
|
|
1079
|
+
if 200 <= _response.status_code < 300:
|
|
1080
|
+
return pydantic.parse_obj_as(PipelineDataSource, _response.json()) # type: ignore
|
|
1081
|
+
if _response.status_code == 422:
|
|
1082
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1083
|
+
try:
|
|
1084
|
+
_response_json = _response.json()
|
|
1085
|
+
except JSONDecodeError:
|
|
1086
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1087
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1088
|
+
|
|
1043
1089
|
def delete_pipeline_data_source(self, pipeline_id: str, data_source_id: str) -> None:
|
|
1044
1090
|
"""
|
|
1045
1091
|
Delete a data source from a pipeline.
|
|
@@ -1116,6 +1162,44 @@ class PipelinesClient:
|
|
|
1116
1162
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1117
1163
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1118
1164
|
|
|
1165
|
+
def get_pipeline_data_source_status(self, pipeline_id: str, data_source_id: str) -> ManagedIngestionStatusResponse:
|
|
1166
|
+
"""
|
|
1167
|
+
Get the status of a data source for a pipeline.
|
|
1168
|
+
|
|
1169
|
+
Parameters:
|
|
1170
|
+
- pipeline_id: str.
|
|
1171
|
+
|
|
1172
|
+
- data_source_id: str.
|
|
1173
|
+
---
|
|
1174
|
+
from llama_cloud.client import LlamaCloud
|
|
1175
|
+
|
|
1176
|
+
client = LlamaCloud(
|
|
1177
|
+
token="YOUR_TOKEN",
|
|
1178
|
+
)
|
|
1179
|
+
client.pipelines.get_pipeline_data_source_status(
|
|
1180
|
+
pipeline_id="string",
|
|
1181
|
+
data_source_id="string",
|
|
1182
|
+
)
|
|
1183
|
+
"""
|
|
1184
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
1185
|
+
"GET",
|
|
1186
|
+
urllib.parse.urljoin(
|
|
1187
|
+
f"{self._client_wrapper.get_base_url()}/",
|
|
1188
|
+
f"api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}/status",
|
|
1189
|
+
),
|
|
1190
|
+
headers=self._client_wrapper.get_headers(),
|
|
1191
|
+
timeout=60,
|
|
1192
|
+
)
|
|
1193
|
+
if 200 <= _response.status_code < 300:
|
|
1194
|
+
return pydantic.parse_obj_as(ManagedIngestionStatusResponse, _response.json()) # type: ignore
|
|
1195
|
+
if _response.status_code == 422:
|
|
1196
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1197
|
+
try:
|
|
1198
|
+
_response_json = _response.json()
|
|
1199
|
+
except JSONDecodeError:
|
|
1200
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1201
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1202
|
+
|
|
1119
1203
|
def run_search(
|
|
1120
1204
|
self,
|
|
1121
1205
|
pipeline_id: str,
|
|
@@ -1128,6 +1212,7 @@ class PipelinesClient:
|
|
|
1128
1212
|
search_filters: typing.Optional[MetadataFilters] = OMIT,
|
|
1129
1213
|
files_top_k: typing.Optional[int] = OMIT,
|
|
1130
1214
|
retrieval_mode: typing.Optional[RetrievalMode] = OMIT,
|
|
1215
|
+
retrieve_image_nodes: typing.Optional[bool] = OMIT,
|
|
1131
1216
|
query: str,
|
|
1132
1217
|
) -> RetrieveResults:
|
|
1133
1218
|
"""
|
|
@@ -1152,6 +1237,8 @@ class PipelinesClient:
|
|
|
1152
1237
|
|
|
1153
1238
|
- retrieval_mode: typing.Optional[RetrievalMode]. The retrieval mode for the query.
|
|
1154
1239
|
|
|
1240
|
+
- retrieve_image_nodes: typing.Optional[bool]. Whether to retrieve image nodes.
|
|
1241
|
+
|
|
1155
1242
|
- query: str. The query to retrieve against.
|
|
1156
1243
|
---
|
|
1157
1244
|
from llama_cloud import FilterCondition, MetadataFilters, RetrievalMode
|
|
@@ -1187,6 +1274,8 @@ class PipelinesClient:
|
|
|
1187
1274
|
_request["files_top_k"] = files_top_k
|
|
1188
1275
|
if retrieval_mode is not OMIT:
|
|
1189
1276
|
_request["retrieval_mode"] = retrieval_mode
|
|
1277
|
+
if retrieve_image_nodes is not OMIT:
|
|
1278
|
+
_request["retrieve_image_nodes"] = retrieve_image_nodes
|
|
1190
1279
|
_response = self._client_wrapper.httpx_client.request(
|
|
1191
1280
|
"POST",
|
|
1192
1281
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/retrieve"),
|
|
@@ -1334,6 +1423,7 @@ class PipelinesClient:
|
|
|
1334
1423
|
MetadataFilters,
|
|
1335
1424
|
PresetRetrievalParams,
|
|
1336
1425
|
RetrievalMode,
|
|
1426
|
+
SupportedLlmModelNames,
|
|
1337
1427
|
)
|
|
1338
1428
|
from llama_cloud.client import LlamaCloud
|
|
1339
1429
|
|
|
@@ -1350,7 +1440,9 @@ class PipelinesClient:
|
|
|
1350
1440
|
),
|
|
1351
1441
|
retrieval_mode=RetrievalMode.CHUNKS,
|
|
1352
1442
|
),
|
|
1353
|
-
llm_parameters=LlmParameters(
|
|
1443
|
+
llm_parameters=LlmParameters(
|
|
1444
|
+
model_name=SupportedLlmModelNames.GPT_3_5_TURBO,
|
|
1445
|
+
),
|
|
1354
1446
|
),
|
|
1355
1447
|
)
|
|
1356
1448
|
"""
|
|
@@ -2652,6 +2744,52 @@ class AsyncPipelinesClient:
|
|
|
2652
2744
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
2653
2745
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
2654
2746
|
|
|
2747
|
+
async def update_pipeline_data_source(
|
|
2748
|
+
self, pipeline_id: str, data_source_id: str, *, sync_interval: typing.Optional[float] = OMIT
|
|
2749
|
+
) -> PipelineDataSource:
|
|
2750
|
+
"""
|
|
2751
|
+
Update the configuration of a data source in a pipeline.
|
|
2752
|
+
|
|
2753
|
+
Parameters:
|
|
2754
|
+
- pipeline_id: str.
|
|
2755
|
+
|
|
2756
|
+
- data_source_id: str.
|
|
2757
|
+
|
|
2758
|
+
- sync_interval: typing.Optional[float]. The interval at which the data source should be synced.
|
|
2759
|
+
---
|
|
2760
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
2761
|
+
|
|
2762
|
+
client = AsyncLlamaCloud(
|
|
2763
|
+
token="YOUR_TOKEN",
|
|
2764
|
+
)
|
|
2765
|
+
await client.pipelines.update_pipeline_data_source(
|
|
2766
|
+
pipeline_id="string",
|
|
2767
|
+
data_source_id="string",
|
|
2768
|
+
)
|
|
2769
|
+
"""
|
|
2770
|
+
_request: typing.Dict[str, typing.Any] = {}
|
|
2771
|
+
if sync_interval is not OMIT:
|
|
2772
|
+
_request["sync_interval"] = sync_interval
|
|
2773
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
2774
|
+
"PUT",
|
|
2775
|
+
urllib.parse.urljoin(
|
|
2776
|
+
f"{self._client_wrapper.get_base_url()}/",
|
|
2777
|
+
f"api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}",
|
|
2778
|
+
),
|
|
2779
|
+
json=jsonable_encoder(_request),
|
|
2780
|
+
headers=self._client_wrapper.get_headers(),
|
|
2781
|
+
timeout=60,
|
|
2782
|
+
)
|
|
2783
|
+
if 200 <= _response.status_code < 300:
|
|
2784
|
+
return pydantic.parse_obj_as(PipelineDataSource, _response.json()) # type: ignore
|
|
2785
|
+
if _response.status_code == 422:
|
|
2786
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
2787
|
+
try:
|
|
2788
|
+
_response_json = _response.json()
|
|
2789
|
+
except JSONDecodeError:
|
|
2790
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
2791
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
2792
|
+
|
|
2655
2793
|
async def delete_pipeline_data_source(self, pipeline_id: str, data_source_id: str) -> None:
|
|
2656
2794
|
"""
|
|
2657
2795
|
Delete a data source from a pipeline.
|
|
@@ -2728,6 +2866,46 @@ class AsyncPipelinesClient:
|
|
|
2728
2866
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
2729
2867
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
2730
2868
|
|
|
2869
|
+
async def get_pipeline_data_source_status(
|
|
2870
|
+
self, pipeline_id: str, data_source_id: str
|
|
2871
|
+
) -> ManagedIngestionStatusResponse:
|
|
2872
|
+
"""
|
|
2873
|
+
Get the status of a data source for a pipeline.
|
|
2874
|
+
|
|
2875
|
+
Parameters:
|
|
2876
|
+
- pipeline_id: str.
|
|
2877
|
+
|
|
2878
|
+
- data_source_id: str.
|
|
2879
|
+
---
|
|
2880
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
2881
|
+
|
|
2882
|
+
client = AsyncLlamaCloud(
|
|
2883
|
+
token="YOUR_TOKEN",
|
|
2884
|
+
)
|
|
2885
|
+
await client.pipelines.get_pipeline_data_source_status(
|
|
2886
|
+
pipeline_id="string",
|
|
2887
|
+
data_source_id="string",
|
|
2888
|
+
)
|
|
2889
|
+
"""
|
|
2890
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
2891
|
+
"GET",
|
|
2892
|
+
urllib.parse.urljoin(
|
|
2893
|
+
f"{self._client_wrapper.get_base_url()}/",
|
|
2894
|
+
f"api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}/status",
|
|
2895
|
+
),
|
|
2896
|
+
headers=self._client_wrapper.get_headers(),
|
|
2897
|
+
timeout=60,
|
|
2898
|
+
)
|
|
2899
|
+
if 200 <= _response.status_code < 300:
|
|
2900
|
+
return pydantic.parse_obj_as(ManagedIngestionStatusResponse, _response.json()) # type: ignore
|
|
2901
|
+
if _response.status_code == 422:
|
|
2902
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
2903
|
+
try:
|
|
2904
|
+
_response_json = _response.json()
|
|
2905
|
+
except JSONDecodeError:
|
|
2906
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
2907
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
2908
|
+
|
|
2731
2909
|
async def run_search(
|
|
2732
2910
|
self,
|
|
2733
2911
|
pipeline_id: str,
|
|
@@ -2740,6 +2918,7 @@ class AsyncPipelinesClient:
|
|
|
2740
2918
|
search_filters: typing.Optional[MetadataFilters] = OMIT,
|
|
2741
2919
|
files_top_k: typing.Optional[int] = OMIT,
|
|
2742
2920
|
retrieval_mode: typing.Optional[RetrievalMode] = OMIT,
|
|
2921
|
+
retrieve_image_nodes: typing.Optional[bool] = OMIT,
|
|
2743
2922
|
query: str,
|
|
2744
2923
|
) -> RetrieveResults:
|
|
2745
2924
|
"""
|
|
@@ -2764,6 +2943,8 @@ class AsyncPipelinesClient:
|
|
|
2764
2943
|
|
|
2765
2944
|
- retrieval_mode: typing.Optional[RetrievalMode]. The retrieval mode for the query.
|
|
2766
2945
|
|
|
2946
|
+
- retrieve_image_nodes: typing.Optional[bool]. Whether to retrieve image nodes.
|
|
2947
|
+
|
|
2767
2948
|
- query: str. The query to retrieve against.
|
|
2768
2949
|
---
|
|
2769
2950
|
from llama_cloud import FilterCondition, MetadataFilters, RetrievalMode
|
|
@@ -2799,6 +2980,8 @@ class AsyncPipelinesClient:
|
|
|
2799
2980
|
_request["files_top_k"] = files_top_k
|
|
2800
2981
|
if retrieval_mode is not OMIT:
|
|
2801
2982
|
_request["retrieval_mode"] = retrieval_mode
|
|
2983
|
+
if retrieve_image_nodes is not OMIT:
|
|
2984
|
+
_request["retrieve_image_nodes"] = retrieve_image_nodes
|
|
2802
2985
|
_response = await self._client_wrapper.httpx_client.request(
|
|
2803
2986
|
"POST",
|
|
2804
2987
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/retrieve"),
|
|
@@ -2946,6 +3129,7 @@ class AsyncPipelinesClient:
|
|
|
2946
3129
|
MetadataFilters,
|
|
2947
3130
|
PresetRetrievalParams,
|
|
2948
3131
|
RetrievalMode,
|
|
3132
|
+
SupportedLlmModelNames,
|
|
2949
3133
|
)
|
|
2950
3134
|
from llama_cloud.client import AsyncLlamaCloud
|
|
2951
3135
|
|
|
@@ -2962,7 +3146,9 @@ class AsyncPipelinesClient:
|
|
|
2962
3146
|
),
|
|
2963
3147
|
retrieval_mode=RetrievalMode.CHUNKS,
|
|
2964
3148
|
),
|
|
2965
|
-
llm_parameters=LlmParameters(
|
|
3149
|
+
llm_parameters=LlmParameters(
|
|
3150
|
+
model_name=SupportedLlmModelNames.GPT_3_5_TURBO,
|
|
3151
|
+
),
|
|
2966
3152
|
),
|
|
2967
3153
|
)
|
|
2968
3154
|
"""
|
|
@@ -9,6 +9,7 @@ from .pipeline_update_embedding_config import (
|
|
|
9
9
|
PipelineUpdateEmbeddingConfig_GeminiEmbedding,
|
|
10
10
|
PipelineUpdateEmbeddingConfig_HuggingfaceApiEmbedding,
|
|
11
11
|
PipelineUpdateEmbeddingConfig_OpenaiEmbedding,
|
|
12
|
+
PipelineUpdateEmbeddingConfig_VertexaiEmbedding,
|
|
12
13
|
)
|
|
13
14
|
from .pipeline_update_transform_config import (
|
|
14
15
|
PipelineUpdateTransformConfig,
|
|
@@ -25,6 +26,7 @@ __all__ = [
|
|
|
25
26
|
"PipelineUpdateEmbeddingConfig_GeminiEmbedding",
|
|
26
27
|
"PipelineUpdateEmbeddingConfig_HuggingfaceApiEmbedding",
|
|
27
28
|
"PipelineUpdateEmbeddingConfig_OpenaiEmbedding",
|
|
29
|
+
"PipelineUpdateEmbeddingConfig_VertexaiEmbedding",
|
|
28
30
|
"PipelineUpdateTransformConfig",
|
|
29
31
|
"PipelineUpdateTransformConfig_Advanced",
|
|
30
32
|
"PipelineUpdateTransformConfig_Auto",
|
|
@@ -12,6 +12,7 @@ from ....types.cohere_embedding_config import CohereEmbeddingConfig
|
|
|
12
12
|
from ....types.gemini_embedding_config import GeminiEmbeddingConfig
|
|
13
13
|
from ....types.hugging_face_inference_api_embedding_config import HuggingFaceInferenceApiEmbeddingConfig
|
|
14
14
|
from ....types.open_ai_embedding_config import OpenAiEmbeddingConfig
|
|
15
|
+
from ....types.vertex_ai_embedding_config import VertexAiEmbeddingConfig
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class PipelineUpdateEmbeddingConfig_OpenaiEmbedding(OpenAiEmbeddingConfig):
|
|
@@ -68,6 +69,15 @@ class PipelineUpdateEmbeddingConfig_CohereEmbedding(CohereEmbeddingConfig):
|
|
|
68
69
|
allow_population_by_field_name = True
|
|
69
70
|
|
|
70
71
|
|
|
72
|
+
class PipelineUpdateEmbeddingConfig_VertexaiEmbedding(VertexAiEmbeddingConfig):
|
|
73
|
+
type: typing_extensions.Literal["VERTEXAI_EMBEDDING"]
|
|
74
|
+
|
|
75
|
+
class Config:
|
|
76
|
+
frozen = True
|
|
77
|
+
smart_union = True
|
|
78
|
+
allow_population_by_field_name = True
|
|
79
|
+
|
|
80
|
+
|
|
71
81
|
PipelineUpdateEmbeddingConfig = typing.Union[
|
|
72
82
|
PipelineUpdateEmbeddingConfig_OpenaiEmbedding,
|
|
73
83
|
PipelineUpdateEmbeddingConfig_AzureEmbedding,
|
|
@@ -75,4 +85,5 @@ PipelineUpdateEmbeddingConfig = typing.Union[
|
|
|
75
85
|
PipelineUpdateEmbeddingConfig_BedrockEmbedding,
|
|
76
86
|
PipelineUpdateEmbeddingConfig_GeminiEmbedding,
|
|
77
87
|
PipelineUpdateEmbeddingConfig_CohereEmbedding,
|
|
88
|
+
PipelineUpdateEmbeddingConfig_VertexaiEmbedding,
|
|
78
89
|
]
|
llama_cloud/types/__init__.py
CHANGED
|
@@ -82,6 +82,7 @@ from .eval_execution_params_override import EvalExecutionParamsOverride
|
|
|
82
82
|
from .eval_question import EvalQuestion
|
|
83
83
|
from .eval_question_create import EvalQuestionCreate
|
|
84
84
|
from .eval_question_result import EvalQuestionResult
|
|
85
|
+
from .extend_vertex_text_embedding import ExtendVertexTextEmbedding
|
|
85
86
|
from .extraction_job import ExtractionJob
|
|
86
87
|
from .extraction_result import ExtractionResult
|
|
87
88
|
from .extraction_result_data_value import ExtractionResultDataValue
|
|
@@ -127,6 +128,8 @@ from .open_ai_embedding import OpenAiEmbedding
|
|
|
127
128
|
from .open_ai_embedding_config import OpenAiEmbeddingConfig
|
|
128
129
|
from .organization import Organization
|
|
129
130
|
from .organization_create import OrganizationCreate
|
|
131
|
+
from .page_screenshot_metadata import PageScreenshotMetadata
|
|
132
|
+
from .page_screenshot_node_with_score import PageScreenshotNodeWithScore
|
|
130
133
|
from .page_segmentation_config import PageSegmentationConfig
|
|
131
134
|
from .page_splitter_node_parser import PageSplitterNodeParser
|
|
132
135
|
from .parser_languages import ParserLanguages
|
|
@@ -137,6 +140,7 @@ from .parsing_job_markdown_result import ParsingJobMarkdownResult
|
|
|
137
140
|
from .parsing_job_text_result import ParsingJobTextResult
|
|
138
141
|
from .parsing_usage import ParsingUsage
|
|
139
142
|
from .pipeline import Pipeline
|
|
143
|
+
from .pipeline_configuration_hashes import PipelineConfigurationHashes
|
|
140
144
|
from .pipeline_create import PipelineCreate
|
|
141
145
|
from .pipeline_create_embedding_config import (
|
|
142
146
|
PipelineCreateEmbeddingConfig,
|
|
@@ -146,6 +150,7 @@ from .pipeline_create_embedding_config import (
|
|
|
146
150
|
PipelineCreateEmbeddingConfig_GeminiEmbedding,
|
|
147
151
|
PipelineCreateEmbeddingConfig_HuggingfaceApiEmbedding,
|
|
148
152
|
PipelineCreateEmbeddingConfig_OpenaiEmbedding,
|
|
153
|
+
PipelineCreateEmbeddingConfig_VertexaiEmbedding,
|
|
149
154
|
)
|
|
150
155
|
from .pipeline_create_transform_config import (
|
|
151
156
|
PipelineCreateTransformConfig,
|
|
@@ -166,8 +171,10 @@ from .pipeline_embedding_config import (
|
|
|
166
171
|
PipelineEmbeddingConfig_GeminiEmbedding,
|
|
167
172
|
PipelineEmbeddingConfig_HuggingfaceApiEmbedding,
|
|
168
173
|
PipelineEmbeddingConfig_OpenaiEmbedding,
|
|
174
|
+
PipelineEmbeddingConfig_VertexaiEmbedding,
|
|
169
175
|
)
|
|
170
176
|
from .pipeline_file import PipelineFile
|
|
177
|
+
from .pipeline_file_config_hash_value import PipelineFileConfigHashValue
|
|
171
178
|
from .pipeline_file_create import PipelineFileCreate
|
|
172
179
|
from .pipeline_file_create_custom_metadata_value import PipelineFileCreateCustomMetadataValue
|
|
173
180
|
from .pipeline_file_custom_metadata_value import PipelineFileCustomMetadataValue
|
|
@@ -208,6 +215,8 @@ from .user_organization_create import UserOrganizationCreate
|
|
|
208
215
|
from .user_organization_delete import UserOrganizationDelete
|
|
209
216
|
from .validation_error import ValidationError
|
|
210
217
|
from .validation_error_loc_item import ValidationErrorLocItem
|
|
218
|
+
from .vertex_ai_embedding_config import VertexAiEmbeddingConfig
|
|
219
|
+
from .vertex_embedding_mode import VertexEmbeddingMode
|
|
211
220
|
|
|
212
221
|
__all__ = [
|
|
213
222
|
"AdvancedModeTransformConfig",
|
|
@@ -288,6 +297,7 @@ __all__ = [
|
|
|
288
297
|
"EvalQuestion",
|
|
289
298
|
"EvalQuestionCreate",
|
|
290
299
|
"EvalQuestionResult",
|
|
300
|
+
"ExtendVertexTextEmbedding",
|
|
291
301
|
"ExtractionJob",
|
|
292
302
|
"ExtractionResult",
|
|
293
303
|
"ExtractionResultDataValue",
|
|
@@ -333,6 +343,8 @@ __all__ = [
|
|
|
333
343
|
"OpenAiEmbeddingConfig",
|
|
334
344
|
"Organization",
|
|
335
345
|
"OrganizationCreate",
|
|
346
|
+
"PageScreenshotMetadata",
|
|
347
|
+
"PageScreenshotNodeWithScore",
|
|
336
348
|
"PageSegmentationConfig",
|
|
337
349
|
"PageSplitterNodeParser",
|
|
338
350
|
"ParserLanguages",
|
|
@@ -343,6 +355,7 @@ __all__ = [
|
|
|
343
355
|
"ParsingJobTextResult",
|
|
344
356
|
"ParsingUsage",
|
|
345
357
|
"Pipeline",
|
|
358
|
+
"PipelineConfigurationHashes",
|
|
346
359
|
"PipelineCreate",
|
|
347
360
|
"PipelineCreateEmbeddingConfig",
|
|
348
361
|
"PipelineCreateEmbeddingConfig_AzureEmbedding",
|
|
@@ -351,6 +364,7 @@ __all__ = [
|
|
|
351
364
|
"PipelineCreateEmbeddingConfig_GeminiEmbedding",
|
|
352
365
|
"PipelineCreateEmbeddingConfig_HuggingfaceApiEmbedding",
|
|
353
366
|
"PipelineCreateEmbeddingConfig_OpenaiEmbedding",
|
|
367
|
+
"PipelineCreateEmbeddingConfig_VertexaiEmbedding",
|
|
354
368
|
"PipelineCreateTransformConfig",
|
|
355
369
|
"PipelineCreateTransformConfig_Advanced",
|
|
356
370
|
"PipelineCreateTransformConfig_Auto",
|
|
@@ -367,7 +381,9 @@ __all__ = [
|
|
|
367
381
|
"PipelineEmbeddingConfig_GeminiEmbedding",
|
|
368
382
|
"PipelineEmbeddingConfig_HuggingfaceApiEmbedding",
|
|
369
383
|
"PipelineEmbeddingConfig_OpenaiEmbedding",
|
|
384
|
+
"PipelineEmbeddingConfig_VertexaiEmbedding",
|
|
370
385
|
"PipelineFile",
|
|
386
|
+
"PipelineFileConfigHashValue",
|
|
371
387
|
"PipelineFileCreate",
|
|
372
388
|
"PipelineFileCreateCustomMetadataValue",
|
|
373
389
|
"PipelineFileCustomMetadataValue",
|
|
@@ -406,4 +422,6 @@ __all__ = [
|
|
|
406
422
|
"UserOrganizationDelete",
|
|
407
423
|
"ValidationError",
|
|
408
424
|
"ValidationErrorLocItem",
|
|
425
|
+
"VertexAiEmbeddingConfig",
|
|
426
|
+
"VertexEmbeddingMode",
|
|
409
427
|
]
|
|
@@ -21,9 +21,8 @@ class CloudAzStorageBlobDataSource(pydantic.BaseModel):
|
|
|
21
21
|
|
|
22
22
|
container_name: str = pydantic.Field(description="The name of the Azure Storage Blob container to read from.")
|
|
23
23
|
account_url: str = pydantic.Field(description="The Azure Storage Blob account URL to use for authentication.")
|
|
24
|
-
blob: typing.Optional[str] = pydantic.Field(description="The blob name to read from.")
|
|
25
24
|
prefix: typing.Optional[str] = pydantic.Field(
|
|
26
|
-
description="The prefix of the Azure Storage Blob objects to read from."
|
|
25
|
+
description="The prefix of the Azure Storage Blob objects to read from. Use this to filter files at the subdirectory level"
|
|
27
26
|
)
|
|
28
27
|
account_name: typing.Optional[str] = pydantic.Field(
|
|
29
28
|
description="The Azure Storage Blob account name to use for authentication."
|
|
@@ -20,17 +20,15 @@ class CloudPostgresVectorStore(pydantic.BaseModel):
|
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
supports_nested_metadata_filters: typing.Optional[bool]
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
database: str
|
|
24
|
+
host: str
|
|
25
|
+
password: str
|
|
26
|
+
port: str
|
|
27
|
+
user: str
|
|
25
28
|
table_name: str
|
|
26
29
|
schema_name: str
|
|
27
30
|
embed_dim: int
|
|
28
|
-
hybrid_search: bool
|
|
29
|
-
text_search_config: str
|
|
30
|
-
cache_ok: bool
|
|
31
|
-
perform_setup: bool
|
|
32
|
-
debug: bool
|
|
33
|
-
use_jsonb: bool
|
|
31
|
+
hybrid_search: typing.Optional[bool]
|
|
34
32
|
class_name: typing.Optional[str]
|
|
35
33
|
|
|
36
34
|
def json(self, **kwargs: typing.Any) -> str:
|
|
@@ -24,6 +24,7 @@ class ConfigurableTransformationNames(str, enum.Enum):
|
|
|
24
24
|
BEDROCK_EMBEDDING = "BEDROCK_EMBEDDING"
|
|
25
25
|
HUGGINGFACE_API_EMBEDDING = "HUGGINGFACE_API_EMBEDDING"
|
|
26
26
|
GEMINI_EMBEDDING = "GEMINI_EMBEDDING"
|
|
27
|
+
VERTEXAI_EMBEDDING = "VERTEXAI_EMBEDDING"
|
|
27
28
|
|
|
28
29
|
def visit(
|
|
29
30
|
self,
|
|
@@ -40,6 +41,7 @@ class ConfigurableTransformationNames(str, enum.Enum):
|
|
|
40
41
|
bedrock_embedding: typing.Callable[[], T_Result],
|
|
41
42
|
huggingface_api_embedding: typing.Callable[[], T_Result],
|
|
42
43
|
gemini_embedding: typing.Callable[[], T_Result],
|
|
44
|
+
vertexai_embedding: typing.Callable[[], T_Result],
|
|
43
45
|
) -> T_Result:
|
|
44
46
|
if self is ConfigurableTransformationNames.CHARACTER_SPLITTER:
|
|
45
47
|
return character_splitter()
|
|
@@ -67,3 +69,5 @@ class ConfigurableTransformationNames(str, enum.Enum):
|
|
|
67
69
|
return huggingface_api_embedding()
|
|
68
70
|
if self is ConfigurableTransformationNames.GEMINI_EMBEDDING:
|
|
69
71
|
return gemini_embedding()
|
|
72
|
+
if self is ConfigurableTransformationNames.VERTEXAI_EMBEDDING:
|
|
73
|
+
return vertexai_embedding()
|
|
@@ -7,6 +7,7 @@ from .bedrock_embedding import BedrockEmbedding
|
|
|
7
7
|
from .character_splitter import CharacterSplitter
|
|
8
8
|
from .code_splitter import CodeSplitter
|
|
9
9
|
from .cohere_embedding import CohereEmbedding
|
|
10
|
+
from .extend_vertex_text_embedding import ExtendVertexTextEmbedding
|
|
10
11
|
from .gemini_embedding import GeminiEmbedding
|
|
11
12
|
from .hugging_face_inference_api_embedding import HuggingFaceInferenceApiEmbedding
|
|
12
13
|
from .markdown_element_node_parser import MarkdownElementNodeParser
|
|
@@ -30,4 +31,5 @@ ConfiguredTransformationItemComponentOne = typing.Union[
|
|
|
30
31
|
BedrockEmbedding,
|
|
31
32
|
HuggingFaceInferenceApiEmbedding,
|
|
32
33
|
GeminiEmbedding,
|
|
34
|
+
ExtendVertexTextEmbedding,
|
|
33
35
|
]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .vertex_embedding_mode import VertexEmbeddingMode
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pydantic
|
|
11
|
+
if pydantic.__version__.startswith("1."):
|
|
12
|
+
raise ImportError
|
|
13
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
14
|
+
except ImportError:
|
|
15
|
+
import pydantic # type: ignore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ExtendVertexTextEmbedding(pydantic.BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
Base class for embeddings.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
model_name: typing.Optional[str] = pydantic.Field(description="The name of the embedding model.")
|
|
24
|
+
embed_batch_size: typing.Optional[int] = pydantic.Field(description="The batch size for embedding calls.")
|
|
25
|
+
callback_manager: typing.Optional[typing.Dict[str, typing.Any]]
|
|
26
|
+
num_workers: typing.Optional[int] = pydantic.Field(
|
|
27
|
+
description="The number of workers to use for async embedding calls."
|
|
28
|
+
)
|
|
29
|
+
embed_mode: VertexEmbeddingMode = pydantic.Field(description="The embedding mode to use.")
|
|
30
|
+
additional_kwargs: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(
|
|
31
|
+
description="Additional kwargs for the Vertex."
|
|
32
|
+
)
|
|
33
|
+
client_email: typing.Optional[str] = pydantic.Field(
|
|
34
|
+
description="The client email to use when making Vertex API calls."
|
|
35
|
+
)
|
|
36
|
+
token_uri: typing.Optional[str] = pydantic.Field(description="The token uri to use when making Vertex API calls.")
|
|
37
|
+
private_key_id: typing.Optional[str] = pydantic.Field(
|
|
38
|
+
description="The private key id to use when making Vertex API calls."
|
|
39
|
+
)
|
|
40
|
+
private_key: typing.Optional[str] = pydantic.Field(
|
|
41
|
+
description="The private key to use when making Vertex API calls."
|
|
42
|
+
)
|
|
43
|
+
project: str = pydantic.Field(description="The default GCP project to use when making Vertex API calls.")
|
|
44
|
+
location: str = pydantic.Field(description="The default location to use when making API calls.")
|
|
45
|
+
class_name: typing.Optional[str]
|
|
46
|
+
|
|
47
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
48
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
49
|
+
return super().json(**kwargs_with_defaults)
|
|
50
|
+
|
|
51
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
52
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
53
|
+
return super().dict(**kwargs_with_defaults)
|
|
54
|
+
|
|
55
|
+
class Config:
|
|
56
|
+
frozen = True
|
|
57
|
+
smart_union = True
|
|
58
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -23,6 +23,7 @@ class LlamaParseParameters(pydantic.BaseModel):
|
|
|
23
23
|
languages: typing.Optional[typing.List[ParserLanguages]]
|
|
24
24
|
parsing_instruction: typing.Optional[str]
|
|
25
25
|
disable_ocr: typing.Optional[bool]
|
|
26
|
+
disable_reconstruction: typing.Optional[bool]
|
|
26
27
|
invalidate_cache: typing.Optional[bool]
|
|
27
28
|
do_not_cache: typing.Optional[bool]
|
|
28
29
|
fast_mode: typing.Optional[bool]
|
|
@@ -33,11 +34,12 @@ class LlamaParseParameters(pydantic.BaseModel):
|
|
|
33
34
|
page_separator: typing.Optional[str]
|
|
34
35
|
bounding_box: typing.Optional[str]
|
|
35
36
|
target_pages: typing.Optional[str]
|
|
36
|
-
use_vendor_multimodal_model: typing.Optional[
|
|
37
|
+
use_vendor_multimodal_model: typing.Optional[bool]
|
|
37
38
|
vendor_multimodal_model_name: typing.Optional[str]
|
|
38
39
|
vendor_multimodal_api_key: typing.Optional[str]
|
|
39
40
|
page_prefix: typing.Optional[str]
|
|
40
41
|
page_suffix: typing.Optional[str]
|
|
42
|
+
webhook_url: typing.Optional[str]
|
|
41
43
|
take_screenshot: typing.Optional[bool]
|
|
42
44
|
s_3_input_path: typing.Optional[str] = pydantic.Field(alias="s3_input_path")
|
|
43
45
|
s_3_output_path_prefix: typing.Optional[str] = pydantic.Field(alias="s3_output_path_prefix")
|
|
@@ -21,6 +21,7 @@ class LlmModelData(pydantic.BaseModel):
|
|
|
21
21
|
|
|
22
22
|
name: str = pydantic.Field(description="The name of the LLM model.")
|
|
23
23
|
description: str = pydantic.Field(description="The description of the LLM model.")
|
|
24
|
+
multi_modal: bool = pydantic.Field(description="Whether the model supports multi-modal image input")
|
|
24
25
|
|
|
25
26
|
def json(self, **kwargs: typing.Any) -> str:
|
|
26
27
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -4,6 +4,7 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .supported_llm_model_names import SupportedLlmModelNames
|
|
7
8
|
|
|
8
9
|
try:
|
|
9
10
|
import pydantic
|
|
@@ -20,7 +21,9 @@ class LlmParameters(pydantic.BaseModel):
|
|
|
20
21
|
Comes with special serialization logic for types used commonly in platform codebase.
|
|
21
22
|
"""
|
|
22
23
|
|
|
23
|
-
model_name: typing.Optional[
|
|
24
|
+
model_name: typing.Optional[SupportedLlmModelNames] = pydantic.Field(
|
|
25
|
+
description="The name of the model to use for LLM completions."
|
|
26
|
+
)
|
|
24
27
|
system_prompt: typing.Optional[str] = pydantic.Field(description="The system prompt to use for the completion.")
|
|
25
28
|
temperature: typing.Optional[float] = pydantic.Field(description="The temperature value for the model.")
|
|
26
29
|
class_name: typing.Optional[str]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PageScreenshotMetadata(pydantic.BaseModel):
|
|
18
|
+
page_index: int = pydantic.Field(description="The index of the page for which the screenshot is taken (0-indexed)")
|
|
19
|
+
file_id: str = pydantic.Field(description="The ID of the file that the page screenshot was taken from")
|
|
20
|
+
image_size: int = pydantic.Field(description="The size of the image in bytes")
|
|
21
|
+
|
|
22
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
23
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
24
|
+
return super().json(**kwargs_with_defaults)
|
|
25
|
+
|
|
26
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
27
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
28
|
+
return super().dict(**kwargs_with_defaults)
|
|
29
|
+
|
|
30
|
+
class Config:
|
|
31
|
+
frozen = True
|
|
32
|
+
smart_union = True
|
|
33
|
+
json_encoders = {dt.datetime: serialize_datetime}
|