llama-cloud 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (35) hide show
  1. llama_cloud/__init__.py +20 -0
  2. llama_cloud/resources/__init__.py +2 -0
  3. llama_cloud/resources/files/client.py +159 -0
  4. llama_cloud/resources/parsing/client.py +40 -0
  5. llama_cloud/resources/pipelines/__init__.py +2 -0
  6. llama_cloud/resources/pipelines/client.py +188 -2
  7. llama_cloud/resources/pipelines/types/__init__.py +2 -0
  8. llama_cloud/resources/pipelines/types/pipeline_update_embedding_config.py +11 -0
  9. llama_cloud/types/__init__.py +18 -0
  10. llama_cloud/types/cloud_az_storage_blob_data_source.py +1 -2
  11. llama_cloud/types/cloud_postgres_vector_store.py +6 -8
  12. llama_cloud/types/configurable_transformation_names.py +4 -0
  13. llama_cloud/types/configured_transformation_item_component_one.py +2 -0
  14. llama_cloud/types/extend_vertex_text_embedding.py +58 -0
  15. llama_cloud/types/llama_parse_parameters.py +3 -1
  16. llama_cloud/types/llm_model_data.py +1 -0
  17. llama_cloud/types/llm_parameters.py +4 -1
  18. llama_cloud/types/page_screenshot_metadata.py +33 -0
  19. llama_cloud/types/page_screenshot_node_with_score.py +38 -0
  20. llama_cloud/types/pipeline.py +4 -0
  21. llama_cloud/types/pipeline_configuration_hashes.py +37 -0
  22. llama_cloud/types/pipeline_create_embedding_config.py +11 -0
  23. llama_cloud/types/pipeline_data_source.py +7 -0
  24. llama_cloud/types/pipeline_data_source_create.py +3 -0
  25. llama_cloud/types/pipeline_embedding_config.py +11 -0
  26. llama_cloud/types/pipeline_file.py +4 -0
  27. llama_cloud/types/pipeline_file_config_hash_value.py +5 -0
  28. llama_cloud/types/preset_retrieval_params.py +1 -0
  29. llama_cloud/types/retrieve_results.py +4 -0
  30. llama_cloud/types/vertex_ai_embedding_config.py +34 -0
  31. llama_cloud/types/vertex_embedding_mode.py +45 -0
  32. {llama_cloud-0.0.14.dist-info → llama_cloud-0.0.16.dist-info}/METADATA +1 -1
  33. {llama_cloud-0.0.14.dist-info → llama_cloud-0.0.16.dist-info}/RECORD +35 -28
  34. {llama_cloud-0.0.14.dist-info → llama_cloud-0.0.16.dist-info}/LICENSE +0 -0
  35. {llama_cloud-0.0.14.dist-info → llama_cloud-0.0.16.dist-info}/WHEEL +0 -0
@@ -1040,6 +1040,52 @@ class PipelinesClient:
1040
1040
  raise ApiError(status_code=_response.status_code, body=_response.text)
1041
1041
  raise ApiError(status_code=_response.status_code, body=_response_json)
1042
1042
 
1043
+ def update_pipeline_data_source(
1044
+ self, pipeline_id: str, data_source_id: str, *, sync_interval: typing.Optional[float] = OMIT
1045
+ ) -> PipelineDataSource:
1046
+ """
1047
+ Update the configuration of a data source in a pipeline.
1048
+
1049
+ Parameters:
1050
+ - pipeline_id: str.
1051
+
1052
+ - data_source_id: str.
1053
+
1054
+ - sync_interval: typing.Optional[float]. The interval at which the data source should be synced.
1055
+ ---
1056
+ from llama_cloud.client import LlamaCloud
1057
+
1058
+ client = LlamaCloud(
1059
+ token="YOUR_TOKEN",
1060
+ )
1061
+ client.pipelines.update_pipeline_data_source(
1062
+ pipeline_id="string",
1063
+ data_source_id="string",
1064
+ )
1065
+ """
1066
+ _request: typing.Dict[str, typing.Any] = {}
1067
+ if sync_interval is not OMIT:
1068
+ _request["sync_interval"] = sync_interval
1069
+ _response = self._client_wrapper.httpx_client.request(
1070
+ "PUT",
1071
+ urllib.parse.urljoin(
1072
+ f"{self._client_wrapper.get_base_url()}/",
1073
+ f"api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}",
1074
+ ),
1075
+ json=jsonable_encoder(_request),
1076
+ headers=self._client_wrapper.get_headers(),
1077
+ timeout=60,
1078
+ )
1079
+ if 200 <= _response.status_code < 300:
1080
+ return pydantic.parse_obj_as(PipelineDataSource, _response.json()) # type: ignore
1081
+ if _response.status_code == 422:
1082
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1083
+ try:
1084
+ _response_json = _response.json()
1085
+ except JSONDecodeError:
1086
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1087
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1088
+
1043
1089
  def delete_pipeline_data_source(self, pipeline_id: str, data_source_id: str) -> None:
1044
1090
  """
1045
1091
  Delete a data source from a pipeline.
@@ -1116,6 +1162,44 @@ class PipelinesClient:
1116
1162
  raise ApiError(status_code=_response.status_code, body=_response.text)
1117
1163
  raise ApiError(status_code=_response.status_code, body=_response_json)
1118
1164
 
1165
+ def get_pipeline_data_source_status(self, pipeline_id: str, data_source_id: str) -> ManagedIngestionStatusResponse:
1166
+ """
1167
+ Get the status of a data source for a pipeline.
1168
+
1169
+ Parameters:
1170
+ - pipeline_id: str.
1171
+
1172
+ - data_source_id: str.
1173
+ ---
1174
+ from llama_cloud.client import LlamaCloud
1175
+
1176
+ client = LlamaCloud(
1177
+ token="YOUR_TOKEN",
1178
+ )
1179
+ client.pipelines.get_pipeline_data_source_status(
1180
+ pipeline_id="string",
1181
+ data_source_id="string",
1182
+ )
1183
+ """
1184
+ _response = self._client_wrapper.httpx_client.request(
1185
+ "GET",
1186
+ urllib.parse.urljoin(
1187
+ f"{self._client_wrapper.get_base_url()}/",
1188
+ f"api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}/status",
1189
+ ),
1190
+ headers=self._client_wrapper.get_headers(),
1191
+ timeout=60,
1192
+ )
1193
+ if 200 <= _response.status_code < 300:
1194
+ return pydantic.parse_obj_as(ManagedIngestionStatusResponse, _response.json()) # type: ignore
1195
+ if _response.status_code == 422:
1196
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1197
+ try:
1198
+ _response_json = _response.json()
1199
+ except JSONDecodeError:
1200
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1201
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1202
+
1119
1203
  def run_search(
1120
1204
  self,
1121
1205
  pipeline_id: str,
@@ -1128,6 +1212,7 @@ class PipelinesClient:
1128
1212
  search_filters: typing.Optional[MetadataFilters] = OMIT,
1129
1213
  files_top_k: typing.Optional[int] = OMIT,
1130
1214
  retrieval_mode: typing.Optional[RetrievalMode] = OMIT,
1215
+ retrieve_image_nodes: typing.Optional[bool] = OMIT,
1131
1216
  query: str,
1132
1217
  ) -> RetrieveResults:
1133
1218
  """
@@ -1152,6 +1237,8 @@ class PipelinesClient:
1152
1237
 
1153
1238
  - retrieval_mode: typing.Optional[RetrievalMode]. The retrieval mode for the query.
1154
1239
 
1240
+ - retrieve_image_nodes: typing.Optional[bool]. Whether to retrieve image nodes.
1241
+
1155
1242
  - query: str. The query to retrieve against.
1156
1243
  ---
1157
1244
  from llama_cloud import FilterCondition, MetadataFilters, RetrievalMode
@@ -1187,6 +1274,8 @@ class PipelinesClient:
1187
1274
  _request["files_top_k"] = files_top_k
1188
1275
  if retrieval_mode is not OMIT:
1189
1276
  _request["retrieval_mode"] = retrieval_mode
1277
+ if retrieve_image_nodes is not OMIT:
1278
+ _request["retrieve_image_nodes"] = retrieve_image_nodes
1190
1279
  _response = self._client_wrapper.httpx_client.request(
1191
1280
  "POST",
1192
1281
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/retrieve"),
@@ -1334,6 +1423,7 @@ class PipelinesClient:
1334
1423
  MetadataFilters,
1335
1424
  PresetRetrievalParams,
1336
1425
  RetrievalMode,
1426
+ SupportedLlmModelNames,
1337
1427
  )
1338
1428
  from llama_cloud.client import LlamaCloud
1339
1429
 
@@ -1350,7 +1440,9 @@ class PipelinesClient:
1350
1440
  ),
1351
1441
  retrieval_mode=RetrievalMode.CHUNKS,
1352
1442
  ),
1353
- llm_parameters=LlmParameters(),
1443
+ llm_parameters=LlmParameters(
1444
+ model_name=SupportedLlmModelNames.GPT_3_5_TURBO,
1445
+ ),
1354
1446
  ),
1355
1447
  )
1356
1448
  """
@@ -2652,6 +2744,52 @@ class AsyncPipelinesClient:
2652
2744
  raise ApiError(status_code=_response.status_code, body=_response.text)
2653
2745
  raise ApiError(status_code=_response.status_code, body=_response_json)
2654
2746
 
2747
+ async def update_pipeline_data_source(
2748
+ self, pipeline_id: str, data_source_id: str, *, sync_interval: typing.Optional[float] = OMIT
2749
+ ) -> PipelineDataSource:
2750
+ """
2751
+ Update the configuration of a data source in a pipeline.
2752
+
2753
+ Parameters:
2754
+ - pipeline_id: str.
2755
+
2756
+ - data_source_id: str.
2757
+
2758
+ - sync_interval: typing.Optional[float]. The interval at which the data source should be synced.
2759
+ ---
2760
+ from llama_cloud.client import AsyncLlamaCloud
2761
+
2762
+ client = AsyncLlamaCloud(
2763
+ token="YOUR_TOKEN",
2764
+ )
2765
+ await client.pipelines.update_pipeline_data_source(
2766
+ pipeline_id="string",
2767
+ data_source_id="string",
2768
+ )
2769
+ """
2770
+ _request: typing.Dict[str, typing.Any] = {}
2771
+ if sync_interval is not OMIT:
2772
+ _request["sync_interval"] = sync_interval
2773
+ _response = await self._client_wrapper.httpx_client.request(
2774
+ "PUT",
2775
+ urllib.parse.urljoin(
2776
+ f"{self._client_wrapper.get_base_url()}/",
2777
+ f"api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}",
2778
+ ),
2779
+ json=jsonable_encoder(_request),
2780
+ headers=self._client_wrapper.get_headers(),
2781
+ timeout=60,
2782
+ )
2783
+ if 200 <= _response.status_code < 300:
2784
+ return pydantic.parse_obj_as(PipelineDataSource, _response.json()) # type: ignore
2785
+ if _response.status_code == 422:
2786
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
2787
+ try:
2788
+ _response_json = _response.json()
2789
+ except JSONDecodeError:
2790
+ raise ApiError(status_code=_response.status_code, body=_response.text)
2791
+ raise ApiError(status_code=_response.status_code, body=_response_json)
2792
+
2655
2793
  async def delete_pipeline_data_source(self, pipeline_id: str, data_source_id: str) -> None:
2656
2794
  """
2657
2795
  Delete a data source from a pipeline.
@@ -2728,6 +2866,46 @@ class AsyncPipelinesClient:
2728
2866
  raise ApiError(status_code=_response.status_code, body=_response.text)
2729
2867
  raise ApiError(status_code=_response.status_code, body=_response_json)
2730
2868
 
2869
+ async def get_pipeline_data_source_status(
2870
+ self, pipeline_id: str, data_source_id: str
2871
+ ) -> ManagedIngestionStatusResponse:
2872
+ """
2873
+ Get the status of a data source for a pipeline.
2874
+
2875
+ Parameters:
2876
+ - pipeline_id: str.
2877
+
2878
+ - data_source_id: str.
2879
+ ---
2880
+ from llama_cloud.client import AsyncLlamaCloud
2881
+
2882
+ client = AsyncLlamaCloud(
2883
+ token="YOUR_TOKEN",
2884
+ )
2885
+ await client.pipelines.get_pipeline_data_source_status(
2886
+ pipeline_id="string",
2887
+ data_source_id="string",
2888
+ )
2889
+ """
2890
+ _response = await self._client_wrapper.httpx_client.request(
2891
+ "GET",
2892
+ urllib.parse.urljoin(
2893
+ f"{self._client_wrapper.get_base_url()}/",
2894
+ f"api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}/status",
2895
+ ),
2896
+ headers=self._client_wrapper.get_headers(),
2897
+ timeout=60,
2898
+ )
2899
+ if 200 <= _response.status_code < 300:
2900
+ return pydantic.parse_obj_as(ManagedIngestionStatusResponse, _response.json()) # type: ignore
2901
+ if _response.status_code == 422:
2902
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
2903
+ try:
2904
+ _response_json = _response.json()
2905
+ except JSONDecodeError:
2906
+ raise ApiError(status_code=_response.status_code, body=_response.text)
2907
+ raise ApiError(status_code=_response.status_code, body=_response_json)
2908
+
2731
2909
  async def run_search(
2732
2910
  self,
2733
2911
  pipeline_id: str,
@@ -2740,6 +2918,7 @@ class AsyncPipelinesClient:
2740
2918
  search_filters: typing.Optional[MetadataFilters] = OMIT,
2741
2919
  files_top_k: typing.Optional[int] = OMIT,
2742
2920
  retrieval_mode: typing.Optional[RetrievalMode] = OMIT,
2921
+ retrieve_image_nodes: typing.Optional[bool] = OMIT,
2743
2922
  query: str,
2744
2923
  ) -> RetrieveResults:
2745
2924
  """
@@ -2764,6 +2943,8 @@ class AsyncPipelinesClient:
2764
2943
 
2765
2944
  - retrieval_mode: typing.Optional[RetrievalMode]. The retrieval mode for the query.
2766
2945
 
2946
+ - retrieve_image_nodes: typing.Optional[bool]. Whether to retrieve image nodes.
2947
+
2767
2948
  - query: str. The query to retrieve against.
2768
2949
  ---
2769
2950
  from llama_cloud import FilterCondition, MetadataFilters, RetrievalMode
@@ -2799,6 +2980,8 @@ class AsyncPipelinesClient:
2799
2980
  _request["files_top_k"] = files_top_k
2800
2981
  if retrieval_mode is not OMIT:
2801
2982
  _request["retrieval_mode"] = retrieval_mode
2983
+ if retrieve_image_nodes is not OMIT:
2984
+ _request["retrieve_image_nodes"] = retrieve_image_nodes
2802
2985
  _response = await self._client_wrapper.httpx_client.request(
2803
2986
  "POST",
2804
2987
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/retrieve"),
@@ -2946,6 +3129,7 @@ class AsyncPipelinesClient:
2946
3129
  MetadataFilters,
2947
3130
  PresetRetrievalParams,
2948
3131
  RetrievalMode,
3132
+ SupportedLlmModelNames,
2949
3133
  )
2950
3134
  from llama_cloud.client import AsyncLlamaCloud
2951
3135
 
@@ -2962,7 +3146,9 @@ class AsyncPipelinesClient:
2962
3146
  ),
2963
3147
  retrieval_mode=RetrievalMode.CHUNKS,
2964
3148
  ),
2965
- llm_parameters=LlmParameters(),
3149
+ llm_parameters=LlmParameters(
3150
+ model_name=SupportedLlmModelNames.GPT_3_5_TURBO,
3151
+ ),
2966
3152
  ),
2967
3153
  )
2968
3154
  """
@@ -9,6 +9,7 @@ from .pipeline_update_embedding_config import (
9
9
  PipelineUpdateEmbeddingConfig_GeminiEmbedding,
10
10
  PipelineUpdateEmbeddingConfig_HuggingfaceApiEmbedding,
11
11
  PipelineUpdateEmbeddingConfig_OpenaiEmbedding,
12
+ PipelineUpdateEmbeddingConfig_VertexaiEmbedding,
12
13
  )
13
14
  from .pipeline_update_transform_config import (
14
15
  PipelineUpdateTransformConfig,
@@ -25,6 +26,7 @@ __all__ = [
25
26
  "PipelineUpdateEmbeddingConfig_GeminiEmbedding",
26
27
  "PipelineUpdateEmbeddingConfig_HuggingfaceApiEmbedding",
27
28
  "PipelineUpdateEmbeddingConfig_OpenaiEmbedding",
29
+ "PipelineUpdateEmbeddingConfig_VertexaiEmbedding",
28
30
  "PipelineUpdateTransformConfig",
29
31
  "PipelineUpdateTransformConfig_Advanced",
30
32
  "PipelineUpdateTransformConfig_Auto",
@@ -12,6 +12,7 @@ from ....types.cohere_embedding_config import CohereEmbeddingConfig
12
12
  from ....types.gemini_embedding_config import GeminiEmbeddingConfig
13
13
  from ....types.hugging_face_inference_api_embedding_config import HuggingFaceInferenceApiEmbeddingConfig
14
14
  from ....types.open_ai_embedding_config import OpenAiEmbeddingConfig
15
+ from ....types.vertex_ai_embedding_config import VertexAiEmbeddingConfig
15
16
 
16
17
 
17
18
  class PipelineUpdateEmbeddingConfig_OpenaiEmbedding(OpenAiEmbeddingConfig):
@@ -68,6 +69,15 @@ class PipelineUpdateEmbeddingConfig_CohereEmbedding(CohereEmbeddingConfig):
68
69
  allow_population_by_field_name = True
69
70
 
70
71
 
72
+ class PipelineUpdateEmbeddingConfig_VertexaiEmbedding(VertexAiEmbeddingConfig):
73
+ type: typing_extensions.Literal["VERTEXAI_EMBEDDING"]
74
+
75
+ class Config:
76
+ frozen = True
77
+ smart_union = True
78
+ allow_population_by_field_name = True
79
+
80
+
71
81
  PipelineUpdateEmbeddingConfig = typing.Union[
72
82
  PipelineUpdateEmbeddingConfig_OpenaiEmbedding,
73
83
  PipelineUpdateEmbeddingConfig_AzureEmbedding,
@@ -75,4 +85,5 @@ PipelineUpdateEmbeddingConfig = typing.Union[
75
85
  PipelineUpdateEmbeddingConfig_BedrockEmbedding,
76
86
  PipelineUpdateEmbeddingConfig_GeminiEmbedding,
77
87
  PipelineUpdateEmbeddingConfig_CohereEmbedding,
88
+ PipelineUpdateEmbeddingConfig_VertexaiEmbedding,
78
89
  ]
@@ -82,6 +82,7 @@ from .eval_execution_params_override import EvalExecutionParamsOverride
82
82
  from .eval_question import EvalQuestion
83
83
  from .eval_question_create import EvalQuestionCreate
84
84
  from .eval_question_result import EvalQuestionResult
85
+ from .extend_vertex_text_embedding import ExtendVertexTextEmbedding
85
86
  from .extraction_job import ExtractionJob
86
87
  from .extraction_result import ExtractionResult
87
88
  from .extraction_result_data_value import ExtractionResultDataValue
@@ -127,6 +128,8 @@ from .open_ai_embedding import OpenAiEmbedding
127
128
  from .open_ai_embedding_config import OpenAiEmbeddingConfig
128
129
  from .organization import Organization
129
130
  from .organization_create import OrganizationCreate
131
+ from .page_screenshot_metadata import PageScreenshotMetadata
132
+ from .page_screenshot_node_with_score import PageScreenshotNodeWithScore
130
133
  from .page_segmentation_config import PageSegmentationConfig
131
134
  from .page_splitter_node_parser import PageSplitterNodeParser
132
135
  from .parser_languages import ParserLanguages
@@ -137,6 +140,7 @@ from .parsing_job_markdown_result import ParsingJobMarkdownResult
137
140
  from .parsing_job_text_result import ParsingJobTextResult
138
141
  from .parsing_usage import ParsingUsage
139
142
  from .pipeline import Pipeline
143
+ from .pipeline_configuration_hashes import PipelineConfigurationHashes
140
144
  from .pipeline_create import PipelineCreate
141
145
  from .pipeline_create_embedding_config import (
142
146
  PipelineCreateEmbeddingConfig,
@@ -146,6 +150,7 @@ from .pipeline_create_embedding_config import (
146
150
  PipelineCreateEmbeddingConfig_GeminiEmbedding,
147
151
  PipelineCreateEmbeddingConfig_HuggingfaceApiEmbedding,
148
152
  PipelineCreateEmbeddingConfig_OpenaiEmbedding,
153
+ PipelineCreateEmbeddingConfig_VertexaiEmbedding,
149
154
  )
150
155
  from .pipeline_create_transform_config import (
151
156
  PipelineCreateTransformConfig,
@@ -166,8 +171,10 @@ from .pipeline_embedding_config import (
166
171
  PipelineEmbeddingConfig_GeminiEmbedding,
167
172
  PipelineEmbeddingConfig_HuggingfaceApiEmbedding,
168
173
  PipelineEmbeddingConfig_OpenaiEmbedding,
174
+ PipelineEmbeddingConfig_VertexaiEmbedding,
169
175
  )
170
176
  from .pipeline_file import PipelineFile
177
+ from .pipeline_file_config_hash_value import PipelineFileConfigHashValue
171
178
  from .pipeline_file_create import PipelineFileCreate
172
179
  from .pipeline_file_create_custom_metadata_value import PipelineFileCreateCustomMetadataValue
173
180
  from .pipeline_file_custom_metadata_value import PipelineFileCustomMetadataValue
@@ -208,6 +215,8 @@ from .user_organization_create import UserOrganizationCreate
208
215
  from .user_organization_delete import UserOrganizationDelete
209
216
  from .validation_error import ValidationError
210
217
  from .validation_error_loc_item import ValidationErrorLocItem
218
+ from .vertex_ai_embedding_config import VertexAiEmbeddingConfig
219
+ from .vertex_embedding_mode import VertexEmbeddingMode
211
220
 
212
221
  __all__ = [
213
222
  "AdvancedModeTransformConfig",
@@ -288,6 +297,7 @@ __all__ = [
288
297
  "EvalQuestion",
289
298
  "EvalQuestionCreate",
290
299
  "EvalQuestionResult",
300
+ "ExtendVertexTextEmbedding",
291
301
  "ExtractionJob",
292
302
  "ExtractionResult",
293
303
  "ExtractionResultDataValue",
@@ -333,6 +343,8 @@ __all__ = [
333
343
  "OpenAiEmbeddingConfig",
334
344
  "Organization",
335
345
  "OrganizationCreate",
346
+ "PageScreenshotMetadata",
347
+ "PageScreenshotNodeWithScore",
336
348
  "PageSegmentationConfig",
337
349
  "PageSplitterNodeParser",
338
350
  "ParserLanguages",
@@ -343,6 +355,7 @@ __all__ = [
343
355
  "ParsingJobTextResult",
344
356
  "ParsingUsage",
345
357
  "Pipeline",
358
+ "PipelineConfigurationHashes",
346
359
  "PipelineCreate",
347
360
  "PipelineCreateEmbeddingConfig",
348
361
  "PipelineCreateEmbeddingConfig_AzureEmbedding",
@@ -351,6 +364,7 @@ __all__ = [
351
364
  "PipelineCreateEmbeddingConfig_GeminiEmbedding",
352
365
  "PipelineCreateEmbeddingConfig_HuggingfaceApiEmbedding",
353
366
  "PipelineCreateEmbeddingConfig_OpenaiEmbedding",
367
+ "PipelineCreateEmbeddingConfig_VertexaiEmbedding",
354
368
  "PipelineCreateTransformConfig",
355
369
  "PipelineCreateTransformConfig_Advanced",
356
370
  "PipelineCreateTransformConfig_Auto",
@@ -367,7 +381,9 @@ __all__ = [
367
381
  "PipelineEmbeddingConfig_GeminiEmbedding",
368
382
  "PipelineEmbeddingConfig_HuggingfaceApiEmbedding",
369
383
  "PipelineEmbeddingConfig_OpenaiEmbedding",
384
+ "PipelineEmbeddingConfig_VertexaiEmbedding",
370
385
  "PipelineFile",
386
+ "PipelineFileConfigHashValue",
371
387
  "PipelineFileCreate",
372
388
  "PipelineFileCreateCustomMetadataValue",
373
389
  "PipelineFileCustomMetadataValue",
@@ -406,4 +422,6 @@ __all__ = [
406
422
  "UserOrganizationDelete",
407
423
  "ValidationError",
408
424
  "ValidationErrorLocItem",
425
+ "VertexAiEmbeddingConfig",
426
+ "VertexEmbeddingMode",
409
427
  ]
@@ -21,9 +21,8 @@ class CloudAzStorageBlobDataSource(pydantic.BaseModel):
21
21
 
22
22
  container_name: str = pydantic.Field(description="The name of the Azure Storage Blob container to read from.")
23
23
  account_url: str = pydantic.Field(description="The Azure Storage Blob account URL to use for authentication.")
24
- blob: typing.Optional[str] = pydantic.Field(description="The blob name to read from.")
25
24
  prefix: typing.Optional[str] = pydantic.Field(
26
- description="The prefix of the Azure Storage Blob objects to read from."
25
+ description="The prefix of the Azure Storage Blob objects to read from. Use this to filter files at the subdirectory level"
27
26
  )
28
27
  account_name: typing.Optional[str] = pydantic.Field(
29
28
  description="The Azure Storage Blob account name to use for authentication."
@@ -20,17 +20,15 @@ class CloudPostgresVectorStore(pydantic.BaseModel):
20
20
  """
21
21
 
22
22
  supports_nested_metadata_filters: typing.Optional[bool]
23
- connection_string: str
24
- async_connection_string: str
23
+ database: str
24
+ host: str
25
+ password: str
26
+ port: str
27
+ user: str
25
28
  table_name: str
26
29
  schema_name: str
27
30
  embed_dim: int
28
- hybrid_search: bool
29
- text_search_config: str
30
- cache_ok: bool
31
- perform_setup: bool
32
- debug: bool
33
- use_jsonb: bool
31
+ hybrid_search: typing.Optional[bool]
34
32
  class_name: typing.Optional[str]
35
33
 
36
34
  def json(self, **kwargs: typing.Any) -> str:
@@ -24,6 +24,7 @@ class ConfigurableTransformationNames(str, enum.Enum):
24
24
  BEDROCK_EMBEDDING = "BEDROCK_EMBEDDING"
25
25
  HUGGINGFACE_API_EMBEDDING = "HUGGINGFACE_API_EMBEDDING"
26
26
  GEMINI_EMBEDDING = "GEMINI_EMBEDDING"
27
+ VERTEXAI_EMBEDDING = "VERTEXAI_EMBEDDING"
27
28
 
28
29
  def visit(
29
30
  self,
@@ -40,6 +41,7 @@ class ConfigurableTransformationNames(str, enum.Enum):
40
41
  bedrock_embedding: typing.Callable[[], T_Result],
41
42
  huggingface_api_embedding: typing.Callable[[], T_Result],
42
43
  gemini_embedding: typing.Callable[[], T_Result],
44
+ vertexai_embedding: typing.Callable[[], T_Result],
43
45
  ) -> T_Result:
44
46
  if self is ConfigurableTransformationNames.CHARACTER_SPLITTER:
45
47
  return character_splitter()
@@ -67,3 +69,5 @@ class ConfigurableTransformationNames(str, enum.Enum):
67
69
  return huggingface_api_embedding()
68
70
  if self is ConfigurableTransformationNames.GEMINI_EMBEDDING:
69
71
  return gemini_embedding()
72
+ if self is ConfigurableTransformationNames.VERTEXAI_EMBEDDING:
73
+ return vertexai_embedding()
@@ -7,6 +7,7 @@ from .bedrock_embedding import BedrockEmbedding
7
7
  from .character_splitter import CharacterSplitter
8
8
  from .code_splitter import CodeSplitter
9
9
  from .cohere_embedding import CohereEmbedding
10
+ from .extend_vertex_text_embedding import ExtendVertexTextEmbedding
10
11
  from .gemini_embedding import GeminiEmbedding
11
12
  from .hugging_face_inference_api_embedding import HuggingFaceInferenceApiEmbedding
12
13
  from .markdown_element_node_parser import MarkdownElementNodeParser
@@ -30,4 +31,5 @@ ConfiguredTransformationItemComponentOne = typing.Union[
30
31
  BedrockEmbedding,
31
32
  HuggingFaceInferenceApiEmbedding,
32
33
  GeminiEmbedding,
34
+ ExtendVertexTextEmbedding,
33
35
  ]
@@ -0,0 +1,58 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .vertex_embedding_mode import VertexEmbeddingMode
8
+
9
+ try:
10
+ import pydantic
11
+ if pydantic.__version__.startswith("1."):
12
+ raise ImportError
13
+ import pydantic.v1 as pydantic # type: ignore
14
+ except ImportError:
15
+ import pydantic # type: ignore
16
+
17
+
18
+ class ExtendVertexTextEmbedding(pydantic.BaseModel):
19
+ """
20
+ Base class for embeddings.
21
+ """
22
+
23
+ model_name: typing.Optional[str] = pydantic.Field(description="The name of the embedding model.")
24
+ embed_batch_size: typing.Optional[int] = pydantic.Field(description="The batch size for embedding calls.")
25
+ callback_manager: typing.Optional[typing.Dict[str, typing.Any]]
26
+ num_workers: typing.Optional[int] = pydantic.Field(
27
+ description="The number of workers to use for async embedding calls."
28
+ )
29
+ embed_mode: VertexEmbeddingMode = pydantic.Field(description="The embedding mode to use.")
30
+ additional_kwargs: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(
31
+ description="Additional kwargs for the Vertex."
32
+ )
33
+ client_email: typing.Optional[str] = pydantic.Field(
34
+ description="The client email to use when making Vertex API calls."
35
+ )
36
+ token_uri: typing.Optional[str] = pydantic.Field(description="The token uri to use when making Vertex API calls.")
37
+ private_key_id: typing.Optional[str] = pydantic.Field(
38
+ description="The private key id to use when making Vertex API calls."
39
+ )
40
+ private_key: typing.Optional[str] = pydantic.Field(
41
+ description="The private key to use when making Vertex API calls."
42
+ )
43
+ project: str = pydantic.Field(description="The default GCP project to use when making Vertex API calls.")
44
+ location: str = pydantic.Field(description="The default location to use when making API calls.")
45
+ class_name: typing.Optional[str]
46
+
47
+ def json(self, **kwargs: typing.Any) -> str:
48
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
49
+ return super().json(**kwargs_with_defaults)
50
+
51
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
52
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
53
+ return super().dict(**kwargs_with_defaults)
54
+
55
+ class Config:
56
+ frozen = True
57
+ smart_union = True
58
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -23,6 +23,7 @@ class LlamaParseParameters(pydantic.BaseModel):
23
23
  languages: typing.Optional[typing.List[ParserLanguages]]
24
24
  parsing_instruction: typing.Optional[str]
25
25
  disable_ocr: typing.Optional[bool]
26
+ disable_reconstruction: typing.Optional[bool]
26
27
  invalidate_cache: typing.Optional[bool]
27
28
  do_not_cache: typing.Optional[bool]
28
29
  fast_mode: typing.Optional[bool]
@@ -33,11 +34,12 @@ class LlamaParseParameters(pydantic.BaseModel):
33
34
  page_separator: typing.Optional[str]
34
35
  bounding_box: typing.Optional[str]
35
36
  target_pages: typing.Optional[str]
36
- use_vendor_multimodal_model: typing.Optional[str]
37
+ use_vendor_multimodal_model: typing.Optional[bool]
37
38
  vendor_multimodal_model_name: typing.Optional[str]
38
39
  vendor_multimodal_api_key: typing.Optional[str]
39
40
  page_prefix: typing.Optional[str]
40
41
  page_suffix: typing.Optional[str]
42
+ webhook_url: typing.Optional[str]
41
43
  take_screenshot: typing.Optional[bool]
42
44
  s_3_input_path: typing.Optional[str] = pydantic.Field(alias="s3_input_path")
43
45
  s_3_output_path_prefix: typing.Optional[str] = pydantic.Field(alias="s3_output_path_prefix")
@@ -21,6 +21,7 @@ class LlmModelData(pydantic.BaseModel):
21
21
 
22
22
  name: str = pydantic.Field(description="The name of the LLM model.")
23
23
  description: str = pydantic.Field(description="The description of the LLM model.")
24
+ multi_modal: bool = pydantic.Field(description="Whether the model supports multi-modal image input")
24
25
 
25
26
  def json(self, **kwargs: typing.Any) -> str:
26
27
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -4,6 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
+ from .supported_llm_model_names import SupportedLlmModelNames
7
8
 
8
9
  try:
9
10
  import pydantic
@@ -20,7 +21,9 @@ class LlmParameters(pydantic.BaseModel):
20
21
  Comes with special serialization logic for types used commonly in platform codebase.
21
22
  """
22
23
 
23
- model_name: typing.Optional[str] = pydantic.Field(description="The name of the model to use for LLM completions.")
24
+ model_name: typing.Optional[SupportedLlmModelNames] = pydantic.Field(
25
+ description="The name of the model to use for LLM completions."
26
+ )
24
27
  system_prompt: typing.Optional[str] = pydantic.Field(description="The system prompt to use for the completion.")
25
28
  temperature: typing.Optional[float] = pydantic.Field(description="The temperature value for the model.")
26
29
  class_name: typing.Optional[str]
@@ -0,0 +1,33 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+
8
+ try:
9
+ import pydantic
10
+ if pydantic.__version__.startswith("1."):
11
+ raise ImportError
12
+ import pydantic.v1 as pydantic # type: ignore
13
+ except ImportError:
14
+ import pydantic # type: ignore
15
+
16
+
17
+ class PageScreenshotMetadata(pydantic.BaseModel):
18
+ page_index: int = pydantic.Field(description="The index of the page for which the screenshot is taken (0-indexed)")
19
+ file_id: str = pydantic.Field(description="The ID of the file that the page screenshot was taken from")
20
+ image_size: int = pydantic.Field(description="The size of the image in bytes")
21
+
22
+ def json(self, **kwargs: typing.Any) -> str:
23
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
24
+ return super().json(**kwargs_with_defaults)
25
+
26
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
27
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
28
+ return super().dict(**kwargs_with_defaults)
29
+
30
+ class Config:
31
+ frozen = True
32
+ smart_union = True
33
+ json_encoders = {dt.datetime: serialize_datetime}