llama-cloud 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +18 -4
- llama_cloud/client.py +3 -0
- llama_cloud/resources/__init__.py +4 -1
- llama_cloud/resources/data_sinks/types/data_sink_update_component_one.py +2 -0
- llama_cloud/resources/data_sources/types/data_source_update_component_one.py +4 -4
- llama_cloud/resources/extraction/__init__.py +5 -0
- llama_cloud/resources/extraction/client.py +648 -0
- llama_cloud/resources/extraction/types/__init__.py +5 -0
- llama_cloud/resources/extraction/types/extraction_schema_update_data_schema_value.py +7 -0
- llama_cloud/resources/pipelines/client.py +146 -2
- llama_cloud/types/__init__.py +14 -4
- llama_cloud/types/azure_open_ai_embedding.py +3 -0
- llama_cloud/types/{cloud_gcs_data_source.py → cloud_azure_ai_search_vector_store.py} +9 -7
- llama_cloud/types/{cloud_google_drive_data_source.py → cloud_notion_page_data_source.py} +4 -5
- llama_cloud/types/cloud_slack_data_source.py +42 -0
- llama_cloud/types/configurable_data_sink_names.py +4 -0
- llama_cloud/types/configurable_data_source_names.py +8 -8
- llama_cloud/types/data_sink_component_one.py +2 -0
- llama_cloud/types/data_sink_create_component_one.py +2 -0
- llama_cloud/types/data_source_component_one.py +4 -4
- llama_cloud/types/data_source_create_component_one.py +4 -4
- llama_cloud/types/extraction_result.py +42 -0
- llama_cloud/types/extraction_result_data_value.py +5 -0
- llama_cloud/types/extraction_schema.py +44 -0
- llama_cloud/types/extraction_schema_data_schema_value.py +7 -0
- llama_cloud/types/pipeline_data_source_component_one.py +4 -4
- llama_cloud/types/text_node.py +1 -0
- {llama_cloud-0.0.7.dist-info → llama_cloud-0.0.8.dist-info}/METADATA +1 -2
- {llama_cloud-0.0.7.dist-info → llama_cloud-0.0.8.dist-info}/RECORD +31 -22
- {llama_cloud-0.0.7.dist-info → llama_cloud-0.0.8.dist-info}/WHEEL +1 -1
- {llama_cloud-0.0.7.dist-info → llama_cloud-0.0.8.dist-info}/LICENSE +0 -0
|
@@ -644,12 +644,22 @@ class PipelinesClient:
|
|
|
644
644
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
645
645
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
646
646
|
|
|
647
|
-
def list_pipeline_files(
|
|
647
|
+
def list_pipeline_files(
|
|
648
|
+
self,
|
|
649
|
+
pipeline_id: str,
|
|
650
|
+
*,
|
|
651
|
+
data_source_id: typing.Optional[str] = None,
|
|
652
|
+
only_manually_uploaded: typing.Optional[bool] = None,
|
|
653
|
+
) -> typing.List[PipelineFile]:
|
|
648
654
|
"""
|
|
649
655
|
Get files for a pipeline.
|
|
650
656
|
|
|
651
657
|
Parameters:
|
|
652
658
|
- pipeline_id: str.
|
|
659
|
+
|
|
660
|
+
- data_source_id: typing.Optional[str].
|
|
661
|
+
|
|
662
|
+
- only_manually_uploaded: typing.Optional[bool].
|
|
653
663
|
---
|
|
654
664
|
from llama_cloud.client import LlamaCloud
|
|
655
665
|
|
|
@@ -663,6 +673,9 @@ class PipelinesClient:
|
|
|
663
673
|
_response = self._client_wrapper.httpx_client.request(
|
|
664
674
|
"GET",
|
|
665
675
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/files"),
|
|
676
|
+
params=remove_none_from_dict(
|
|
677
|
+
{"data_source_id": data_source_id, "only_manually_uploaded": only_manually_uploaded}
|
|
678
|
+
),
|
|
666
679
|
headers=self._client_wrapper.get_headers(),
|
|
667
680
|
timeout=60,
|
|
668
681
|
)
|
|
@@ -837,6 +850,65 @@ class PipelinesClient:
|
|
|
837
850
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
838
851
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
839
852
|
|
|
853
|
+
def import_pipeline_metadata(self, pipeline_id: str, *, upload_file: typing.IO) -> typing.Dict[str, str]:
|
|
854
|
+
"""
|
|
855
|
+
Import metadata for a pipeline.
|
|
856
|
+
|
|
857
|
+
Parameters:
|
|
858
|
+
- pipeline_id: str.
|
|
859
|
+
|
|
860
|
+
- upload_file: typing.IO.
|
|
861
|
+
"""
|
|
862
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
863
|
+
"PUT",
|
|
864
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/metadata"),
|
|
865
|
+
data=jsonable_encoder({}),
|
|
866
|
+
files={"upload_file": upload_file},
|
|
867
|
+
headers=self._client_wrapper.get_headers(),
|
|
868
|
+
timeout=60,
|
|
869
|
+
)
|
|
870
|
+
if 200 <= _response.status_code < 300:
|
|
871
|
+
return pydantic.parse_obj_as(typing.Dict[str, str], _response.json()) # type: ignore
|
|
872
|
+
if _response.status_code == 422:
|
|
873
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
874
|
+
try:
|
|
875
|
+
_response_json = _response.json()
|
|
876
|
+
except JSONDecodeError:
|
|
877
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
878
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
879
|
+
|
|
880
|
+
def delete_pipeline_files_metadata(self, pipeline_id: str) -> None:
|
|
881
|
+
"""
|
|
882
|
+
Delete metadata for all files in a pipeline.
|
|
883
|
+
|
|
884
|
+
Parameters:
|
|
885
|
+
- pipeline_id: str.
|
|
886
|
+
---
|
|
887
|
+
from llama_cloud.client import LlamaCloud
|
|
888
|
+
|
|
889
|
+
client = LlamaCloud(
|
|
890
|
+
token="YOUR_TOKEN",
|
|
891
|
+
)
|
|
892
|
+
client.pipelines.delete_pipeline_files_metadata(
|
|
893
|
+
pipeline_id="string",
|
|
894
|
+
)
|
|
895
|
+
"""
|
|
896
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
897
|
+
"DELETE",
|
|
898
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/metadata"),
|
|
899
|
+
headers=self._client_wrapper.get_headers(),
|
|
900
|
+
timeout=60,
|
|
901
|
+
)
|
|
902
|
+
if 200 <= _response.status_code < 300:
|
|
903
|
+
return
|
|
904
|
+
if _response.status_code == 422:
|
|
905
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
906
|
+
try:
|
|
907
|
+
_response_json = _response.json()
|
|
908
|
+
except JSONDecodeError:
|
|
909
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
910
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
911
|
+
|
|
840
912
|
def list_pipeline_data_sources(self, pipeline_id: str) -> typing.List[PipelineDataSource]:
|
|
841
913
|
"""
|
|
842
914
|
Get data sources for a pipeline.
|
|
@@ -1968,12 +2040,22 @@ class AsyncPipelinesClient:
|
|
|
1968
2040
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1969
2041
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1970
2042
|
|
|
1971
|
-
async def list_pipeline_files(
|
|
2043
|
+
async def list_pipeline_files(
|
|
2044
|
+
self,
|
|
2045
|
+
pipeline_id: str,
|
|
2046
|
+
*,
|
|
2047
|
+
data_source_id: typing.Optional[str] = None,
|
|
2048
|
+
only_manually_uploaded: typing.Optional[bool] = None,
|
|
2049
|
+
) -> typing.List[PipelineFile]:
|
|
1972
2050
|
"""
|
|
1973
2051
|
Get files for a pipeline.
|
|
1974
2052
|
|
|
1975
2053
|
Parameters:
|
|
1976
2054
|
- pipeline_id: str.
|
|
2055
|
+
|
|
2056
|
+
- data_source_id: typing.Optional[str].
|
|
2057
|
+
|
|
2058
|
+
- only_manually_uploaded: typing.Optional[bool].
|
|
1977
2059
|
---
|
|
1978
2060
|
from llama_cloud.client import AsyncLlamaCloud
|
|
1979
2061
|
|
|
@@ -1987,6 +2069,9 @@ class AsyncPipelinesClient:
|
|
|
1987
2069
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1988
2070
|
"GET",
|
|
1989
2071
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/files"),
|
|
2072
|
+
params=remove_none_from_dict(
|
|
2073
|
+
{"data_source_id": data_source_id, "only_manually_uploaded": only_manually_uploaded}
|
|
2074
|
+
),
|
|
1990
2075
|
headers=self._client_wrapper.get_headers(),
|
|
1991
2076
|
timeout=60,
|
|
1992
2077
|
)
|
|
@@ -2161,6 +2246,65 @@ class AsyncPipelinesClient:
|
|
|
2161
2246
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
2162
2247
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
2163
2248
|
|
|
2249
|
+
async def import_pipeline_metadata(self, pipeline_id: str, *, upload_file: typing.IO) -> typing.Dict[str, str]:
|
|
2250
|
+
"""
|
|
2251
|
+
Import metadata for a pipeline.
|
|
2252
|
+
|
|
2253
|
+
Parameters:
|
|
2254
|
+
- pipeline_id: str.
|
|
2255
|
+
|
|
2256
|
+
- upload_file: typing.IO.
|
|
2257
|
+
"""
|
|
2258
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
2259
|
+
"PUT",
|
|
2260
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/metadata"),
|
|
2261
|
+
data=jsonable_encoder({}),
|
|
2262
|
+
files={"upload_file": upload_file},
|
|
2263
|
+
headers=self._client_wrapper.get_headers(),
|
|
2264
|
+
timeout=60,
|
|
2265
|
+
)
|
|
2266
|
+
if 200 <= _response.status_code < 300:
|
|
2267
|
+
return pydantic.parse_obj_as(typing.Dict[str, str], _response.json()) # type: ignore
|
|
2268
|
+
if _response.status_code == 422:
|
|
2269
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
2270
|
+
try:
|
|
2271
|
+
_response_json = _response.json()
|
|
2272
|
+
except JSONDecodeError:
|
|
2273
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
2274
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
2275
|
+
|
|
2276
|
+
async def delete_pipeline_files_metadata(self, pipeline_id: str) -> None:
|
|
2277
|
+
"""
|
|
2278
|
+
Delete metadata for all files in a pipeline.
|
|
2279
|
+
|
|
2280
|
+
Parameters:
|
|
2281
|
+
- pipeline_id: str.
|
|
2282
|
+
---
|
|
2283
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
2284
|
+
|
|
2285
|
+
client = AsyncLlamaCloud(
|
|
2286
|
+
token="YOUR_TOKEN",
|
|
2287
|
+
)
|
|
2288
|
+
await client.pipelines.delete_pipeline_files_metadata(
|
|
2289
|
+
pipeline_id="string",
|
|
2290
|
+
)
|
|
2291
|
+
"""
|
|
2292
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
2293
|
+
"DELETE",
|
|
2294
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/metadata"),
|
|
2295
|
+
headers=self._client_wrapper.get_headers(),
|
|
2296
|
+
timeout=60,
|
|
2297
|
+
)
|
|
2298
|
+
if 200 <= _response.status_code < 300:
|
|
2299
|
+
return
|
|
2300
|
+
if _response.status_code == 422:
|
|
2301
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
2302
|
+
try:
|
|
2303
|
+
_response_json = _response.json()
|
|
2304
|
+
except JSONDecodeError:
|
|
2305
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
2306
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
2307
|
+
|
|
2164
2308
|
async def list_pipeline_data_sources(self, pipeline_id: str) -> typing.List[PipelineDataSource]:
|
|
2165
2309
|
"""
|
|
2166
2310
|
Get data sources for a pipeline.
|
llama_cloud/types/__init__.py
CHANGED
|
@@ -6,17 +6,18 @@ from .base_prompt_template import BasePromptTemplate
|
|
|
6
6
|
from .bedrock_embedding import BedrockEmbedding
|
|
7
7
|
from .chat_message import ChatMessage
|
|
8
8
|
from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
|
|
9
|
+
from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
|
|
9
10
|
from .cloud_chroma_vector_store import CloudChromaVectorStore
|
|
10
11
|
from .cloud_document import CloudDocument
|
|
11
12
|
from .cloud_document_create import CloudDocumentCreate
|
|
12
|
-
from .
|
|
13
|
-
from .cloud_google_drive_data_source import CloudGoogleDriveDataSource
|
|
13
|
+
from .cloud_notion_page_data_source import CloudNotionPageDataSource
|
|
14
14
|
from .cloud_one_drive_data_source import CloudOneDriveDataSource
|
|
15
15
|
from .cloud_pinecone_vector_store import CloudPineconeVectorStore
|
|
16
16
|
from .cloud_postgres_vector_store import CloudPostgresVectorStore
|
|
17
17
|
from .cloud_qdrant_vector_store import CloudQdrantVectorStore
|
|
18
18
|
from .cloud_s_3_data_source import CloudS3DataSource
|
|
19
19
|
from .cloud_sharepoint_data_source import CloudSharepointDataSource
|
|
20
|
+
from .cloud_slack_data_source import CloudSlackDataSource
|
|
20
21
|
from .cloud_weaviate_vector_store import CloudWeaviateVectorStore
|
|
21
22
|
from .code_splitter import CodeSplitter
|
|
22
23
|
from .cohere_embedding import CohereEmbedding
|
|
@@ -52,6 +53,10 @@ from .eval_llm_model_data import EvalLlmModelData
|
|
|
52
53
|
from .eval_question import EvalQuestion
|
|
53
54
|
from .eval_question_create import EvalQuestionCreate
|
|
54
55
|
from .eval_question_result import EvalQuestionResult
|
|
56
|
+
from .extraction_result import ExtractionResult
|
|
57
|
+
from .extraction_result_data_value import ExtractionResultDataValue
|
|
58
|
+
from .extraction_schema import ExtractionSchema
|
|
59
|
+
from .extraction_schema_data_schema_value import ExtractionSchemaDataSchemaValue
|
|
55
60
|
from .file import File
|
|
56
61
|
from .file_resource_info_value import FileResourceInfoValue
|
|
57
62
|
from .filter_condition import FilterCondition
|
|
@@ -132,17 +137,18 @@ __all__ = [
|
|
|
132
137
|
"BedrockEmbedding",
|
|
133
138
|
"ChatMessage",
|
|
134
139
|
"CloudAzStorageBlobDataSource",
|
|
140
|
+
"CloudAzureAiSearchVectorStore",
|
|
135
141
|
"CloudChromaVectorStore",
|
|
136
142
|
"CloudDocument",
|
|
137
143
|
"CloudDocumentCreate",
|
|
138
|
-
"
|
|
139
|
-
"CloudGoogleDriveDataSource",
|
|
144
|
+
"CloudNotionPageDataSource",
|
|
140
145
|
"CloudOneDriveDataSource",
|
|
141
146
|
"CloudPineconeVectorStore",
|
|
142
147
|
"CloudPostgresVectorStore",
|
|
143
148
|
"CloudQdrantVectorStore",
|
|
144
149
|
"CloudS3DataSource",
|
|
145
150
|
"CloudSharepointDataSource",
|
|
151
|
+
"CloudSlackDataSource",
|
|
146
152
|
"CloudWeaviateVectorStore",
|
|
147
153
|
"CodeSplitter",
|
|
148
154
|
"CohereEmbedding",
|
|
@@ -178,6 +184,10 @@ __all__ = [
|
|
|
178
184
|
"EvalQuestion",
|
|
179
185
|
"EvalQuestionCreate",
|
|
180
186
|
"EvalQuestionResult",
|
|
187
|
+
"ExtractionResult",
|
|
188
|
+
"ExtractionResultDataValue",
|
|
189
|
+
"ExtractionSchema",
|
|
190
|
+
"ExtractionSchemaDataSchemaValue",
|
|
181
191
|
"File",
|
|
182
192
|
"FileResourceInfoValue",
|
|
183
193
|
"FilterCondition",
|
|
@@ -62,6 +62,9 @@ class AzureOpenAiEmbedding(pydantic.BaseModel):
|
|
|
62
62
|
)
|
|
63
63
|
azure_endpoint: typing.Optional[str] = pydantic.Field(description="The Azure endpoint to use.")
|
|
64
64
|
azure_deployment: typing.Optional[str] = pydantic.Field(description="The Azure deployment to use.")
|
|
65
|
+
use_azure_ad: bool = pydantic.Field(
|
|
66
|
+
description="Indicates if Microsoft Entra ID (former Azure AD) is used for token authentication"
|
|
67
|
+
)
|
|
65
68
|
class_name: typing.Optional[str]
|
|
66
69
|
|
|
67
70
|
def json(self, **kwargs: typing.Any) -> str:
|
|
@@ -14,16 +14,18 @@ except ImportError:
|
|
|
14
14
|
import pydantic # type: ignore
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
class
|
|
17
|
+
class CloudAzureAiSearchVectorStore(pydantic.BaseModel):
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Cloud Azure AI Search Vector Store.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
22
|
+
supports_nested_metadata_filters: typing.Optional[bool]
|
|
23
|
+
search_service_api_key: str
|
|
24
|
+
search_service_endpoint: str
|
|
25
|
+
search_service_api_version: typing.Optional[str]
|
|
26
|
+
index_name: typing.Optional[str]
|
|
27
|
+
filterable_metadata_field_keys: typing.Optional[typing.List[str]]
|
|
28
|
+
embedding_dimension: typing.Optional[int]
|
|
27
29
|
class_name: typing.Optional[str]
|
|
28
30
|
|
|
29
31
|
def json(self, **kwargs: typing.Any) -> str:
|
|
@@ -14,15 +14,14 @@ except ImportError:
|
|
|
14
14
|
import pydantic # type: ignore
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
class
|
|
17
|
+
class CloudNotionPageDataSource(pydantic.BaseModel):
|
|
18
18
|
"""
|
|
19
19
|
Base component object to capture class names.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
)
|
|
22
|
+
integration_token: str = pydantic.Field(description="The integration token to use for authentication.")
|
|
23
|
+
database_ids: typing.Optional[str] = pydantic.Field(description="The Notion Database Id to read content from.")
|
|
24
|
+
page_ids: typing.Optional[str] = pydantic.Field(description="The Page ID's of the Notion to read from.")
|
|
26
25
|
class_name: typing.Optional[str]
|
|
27
26
|
|
|
28
27
|
def json(self, **kwargs: typing.Any) -> str:
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CloudSlackDataSource(pydantic.BaseModel):
|
|
18
|
+
"""
|
|
19
|
+
Base component object to capture class names.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
slack_token: str = pydantic.Field(description="Slack Bot Token.")
|
|
23
|
+
channel_ids: typing.Optional[str] = pydantic.Field(description="Slack Channel.")
|
|
24
|
+
latest_date: typing.Optional[str] = pydantic.Field(description="Latest date.")
|
|
25
|
+
earliest_date: typing.Optional[str] = pydantic.Field(description="Earliest date.")
|
|
26
|
+
earliest_date_timestamp: typing.Optional[float] = pydantic.Field(description="Earliest date timestamp.")
|
|
27
|
+
latest_date_timestamp: typing.Optional[float] = pydantic.Field(description="Latest date timestamp.")
|
|
28
|
+
channel_patterns: typing.Optional[str] = pydantic.Field(description="Slack Channel name pattern.")
|
|
29
|
+
class_name: typing.Optional[str]
|
|
30
|
+
|
|
31
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
32
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
33
|
+
return super().json(**kwargs_with_defaults)
|
|
34
|
+
|
|
35
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
36
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
37
|
+
return super().dict(**kwargs_with_defaults)
|
|
38
|
+
|
|
39
|
+
class Config:
|
|
40
|
+
frozen = True
|
|
41
|
+
smart_union = True
|
|
42
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -16,6 +16,7 @@ class ConfigurableDataSinkNames(str, enum.Enum):
|
|
|
16
16
|
POSTGRES = "POSTGRES"
|
|
17
17
|
QDRANT = "QDRANT"
|
|
18
18
|
WEAVIATE = "WEAVIATE"
|
|
19
|
+
AZUREAI_SEARCH = "AZUREAI_SEARCH"
|
|
19
20
|
|
|
20
21
|
def visit(
|
|
21
22
|
self,
|
|
@@ -24,6 +25,7 @@ class ConfigurableDataSinkNames(str, enum.Enum):
|
|
|
24
25
|
postgres: typing.Callable[[], T_Result],
|
|
25
26
|
qdrant: typing.Callable[[], T_Result],
|
|
26
27
|
weaviate: typing.Callable[[], T_Result],
|
|
28
|
+
azureai_search: typing.Callable[[], T_Result],
|
|
27
29
|
) -> T_Result:
|
|
28
30
|
if self is ConfigurableDataSinkNames.CHROMA:
|
|
29
31
|
return chroma()
|
|
@@ -35,3 +37,5 @@ class ConfigurableDataSinkNames(str, enum.Enum):
|
|
|
35
37
|
return qdrant()
|
|
36
38
|
if self is ConfigurableDataSinkNames.WEAVIATE:
|
|
37
39
|
return weaviate()
|
|
40
|
+
if self is ConfigurableDataSinkNames.AZUREAI_SEARCH:
|
|
41
|
+
return azureai_search()
|
|
@@ -13,29 +13,29 @@ class ConfigurableDataSourceNames(str, enum.Enum):
|
|
|
13
13
|
|
|
14
14
|
S_3 = "S3"
|
|
15
15
|
AZURE_STORAGE_BLOB = "AZURE_STORAGE_BLOB"
|
|
16
|
-
GCS = "GCS"
|
|
17
|
-
GOOGLE_DRIVE = "GOOGLE_DRIVE"
|
|
18
16
|
MICROSOFT_ONEDRIVE = "MICROSOFT_ONEDRIVE"
|
|
19
17
|
MICROSOFT_SHAREPOINT = "MICROSOFT_SHAREPOINT"
|
|
18
|
+
SLACK = "SLACK"
|
|
19
|
+
NOTION_PAGE = "NOTION_PAGE"
|
|
20
20
|
|
|
21
21
|
def visit(
|
|
22
22
|
self,
|
|
23
23
|
s_3: typing.Callable[[], T_Result],
|
|
24
24
|
azure_storage_blob: typing.Callable[[], T_Result],
|
|
25
|
-
gcs: typing.Callable[[], T_Result],
|
|
26
|
-
google_drive: typing.Callable[[], T_Result],
|
|
27
25
|
microsoft_onedrive: typing.Callable[[], T_Result],
|
|
28
26
|
microsoft_sharepoint: typing.Callable[[], T_Result],
|
|
27
|
+
slack: typing.Callable[[], T_Result],
|
|
28
|
+
notion_page: typing.Callable[[], T_Result],
|
|
29
29
|
) -> T_Result:
|
|
30
30
|
if self is ConfigurableDataSourceNames.S_3:
|
|
31
31
|
return s_3()
|
|
32
32
|
if self is ConfigurableDataSourceNames.AZURE_STORAGE_BLOB:
|
|
33
33
|
return azure_storage_blob()
|
|
34
|
-
if self is ConfigurableDataSourceNames.GCS:
|
|
35
|
-
return gcs()
|
|
36
|
-
if self is ConfigurableDataSourceNames.GOOGLE_DRIVE:
|
|
37
|
-
return google_drive()
|
|
38
34
|
if self is ConfigurableDataSourceNames.MICROSOFT_ONEDRIVE:
|
|
39
35
|
return microsoft_onedrive()
|
|
40
36
|
if self is ConfigurableDataSourceNames.MICROSOFT_SHAREPOINT:
|
|
41
37
|
return microsoft_sharepoint()
|
|
38
|
+
if self is ConfigurableDataSourceNames.SLACK:
|
|
39
|
+
return slack()
|
|
40
|
+
if self is ConfigurableDataSourceNames.NOTION_PAGE:
|
|
41
|
+
return notion_page()
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
+
from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
|
|
5
6
|
from .cloud_chroma_vector_store import CloudChromaVectorStore
|
|
6
7
|
from .cloud_pinecone_vector_store import CloudPineconeVectorStore
|
|
7
8
|
from .cloud_postgres_vector_store import CloudPostgresVectorStore
|
|
@@ -14,4 +15,5 @@ DataSinkComponentOne = typing.Union[
|
|
|
14
15
|
CloudPostgresVectorStore,
|
|
15
16
|
CloudQdrantVectorStore,
|
|
16
17
|
CloudWeaviateVectorStore,
|
|
18
|
+
CloudAzureAiSearchVectorStore,
|
|
17
19
|
]
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
+
from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
|
|
5
6
|
from .cloud_chroma_vector_store import CloudChromaVectorStore
|
|
6
7
|
from .cloud_pinecone_vector_store import CloudPineconeVectorStore
|
|
7
8
|
from .cloud_postgres_vector_store import CloudPostgresVectorStore
|
|
@@ -14,4 +15,5 @@ DataSinkCreateComponentOne = typing.Union[
|
|
|
14
15
|
CloudPostgresVectorStore,
|
|
15
16
|
CloudQdrantVectorStore,
|
|
16
17
|
CloudWeaviateVectorStore,
|
|
18
|
+
CloudAzureAiSearchVectorStore,
|
|
17
19
|
]
|
|
@@ -3,17 +3,17 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
|
|
6
|
-
from .
|
|
7
|
-
from .cloud_google_drive_data_source import CloudGoogleDriveDataSource
|
|
6
|
+
from .cloud_notion_page_data_source import CloudNotionPageDataSource
|
|
8
7
|
from .cloud_one_drive_data_source import CloudOneDriveDataSource
|
|
9
8
|
from .cloud_s_3_data_source import CloudS3DataSource
|
|
10
9
|
from .cloud_sharepoint_data_source import CloudSharepointDataSource
|
|
10
|
+
from .cloud_slack_data_source import CloudSlackDataSource
|
|
11
11
|
|
|
12
12
|
DataSourceComponentOne = typing.Union[
|
|
13
13
|
CloudS3DataSource,
|
|
14
14
|
CloudAzStorageBlobDataSource,
|
|
15
|
-
CloudGcsDataSource,
|
|
16
|
-
CloudGoogleDriveDataSource,
|
|
17
15
|
CloudOneDriveDataSource,
|
|
18
16
|
CloudSharepointDataSource,
|
|
17
|
+
CloudSlackDataSource,
|
|
18
|
+
CloudNotionPageDataSource,
|
|
19
19
|
]
|
|
@@ -3,17 +3,17 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
|
|
6
|
-
from .
|
|
7
|
-
from .cloud_google_drive_data_source import CloudGoogleDriveDataSource
|
|
6
|
+
from .cloud_notion_page_data_source import CloudNotionPageDataSource
|
|
8
7
|
from .cloud_one_drive_data_source import CloudOneDriveDataSource
|
|
9
8
|
from .cloud_s_3_data_source import CloudS3DataSource
|
|
10
9
|
from .cloud_sharepoint_data_source import CloudSharepointDataSource
|
|
10
|
+
from .cloud_slack_data_source import CloudSlackDataSource
|
|
11
11
|
|
|
12
12
|
DataSourceCreateComponentOne = typing.Union[
|
|
13
13
|
CloudS3DataSource,
|
|
14
14
|
CloudAzStorageBlobDataSource,
|
|
15
|
-
CloudGcsDataSource,
|
|
16
|
-
CloudGoogleDriveDataSource,
|
|
17
15
|
CloudOneDriveDataSource,
|
|
18
16
|
CloudSharepointDataSource,
|
|
17
|
+
CloudSlackDataSource,
|
|
18
|
+
CloudNotionPageDataSource,
|
|
19
19
|
]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .extraction_result_data_value import ExtractionResultDataValue
|
|
8
|
+
from .file import File
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import pydantic
|
|
12
|
+
if pydantic.__version__.startswith("1."):
|
|
13
|
+
raise ImportError
|
|
14
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
15
|
+
except ImportError:
|
|
16
|
+
import pydantic # type: ignore
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ExtractionResult(pydantic.BaseModel):
|
|
20
|
+
"""
|
|
21
|
+
Schema for an extraction result.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
id: str = pydantic.Field(description="Unique identifier")
|
|
25
|
+
created_at: typing.Optional[dt.datetime] = pydantic.Field(description="Creation datetime")
|
|
26
|
+
updated_at: typing.Optional[dt.datetime] = pydantic.Field(description="Update datetime")
|
|
27
|
+
schema_id: str = pydantic.Field(description="The id of the schema")
|
|
28
|
+
data: typing.Dict[str, ExtractionResultDataValue] = pydantic.Field(description="The data extracted from the file")
|
|
29
|
+
file: File = pydantic.Field(description="The file that the extract was extracted from")
|
|
30
|
+
|
|
31
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
32
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
33
|
+
return super().json(**kwargs_with_defaults)
|
|
34
|
+
|
|
35
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
36
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
37
|
+
return super().dict(**kwargs_with_defaults)
|
|
38
|
+
|
|
39
|
+
class Config:
|
|
40
|
+
frozen = True
|
|
41
|
+
smart_union = True
|
|
42
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .extraction_schema_data_schema_value import ExtractionSchemaDataSchemaValue
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pydantic
|
|
11
|
+
if pydantic.__version__.startswith("1."):
|
|
12
|
+
raise ImportError
|
|
13
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
14
|
+
except ImportError:
|
|
15
|
+
import pydantic # type: ignore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ExtractionSchema(pydantic.BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
Schema for extraction schema.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
id: str = pydantic.Field(description="Unique identifier")
|
|
24
|
+
created_at: typing.Optional[dt.datetime] = pydantic.Field(description="Creation datetime")
|
|
25
|
+
updated_at: typing.Optional[dt.datetime] = pydantic.Field(description="Update datetime")
|
|
26
|
+
name: str = pydantic.Field(description="The name of the extraction schema")
|
|
27
|
+
project_id: str = pydantic.Field(description="The ID of the project that the extraction schema belongs to")
|
|
28
|
+
data_schema: typing.Dict[str, ExtractionSchemaDataSchemaValue] = pydantic.Field(
|
|
29
|
+
description="The schema of the data"
|
|
30
|
+
)
|
|
31
|
+
openai_api_key: str = pydantic.Field(description="The API key for the OpenAI API")
|
|
32
|
+
|
|
33
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
34
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
35
|
+
return super().json(**kwargs_with_defaults)
|
|
36
|
+
|
|
37
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
38
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
39
|
+
return super().dict(**kwargs_with_defaults)
|
|
40
|
+
|
|
41
|
+
class Config:
|
|
42
|
+
frozen = True
|
|
43
|
+
smart_union = True
|
|
44
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -3,17 +3,17 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
|
|
6
|
-
from .
|
|
7
|
-
from .cloud_google_drive_data_source import CloudGoogleDriveDataSource
|
|
6
|
+
from .cloud_notion_page_data_source import CloudNotionPageDataSource
|
|
8
7
|
from .cloud_one_drive_data_source import CloudOneDriveDataSource
|
|
9
8
|
from .cloud_s_3_data_source import CloudS3DataSource
|
|
10
9
|
from .cloud_sharepoint_data_source import CloudSharepointDataSource
|
|
10
|
+
from .cloud_slack_data_source import CloudSlackDataSource
|
|
11
11
|
|
|
12
12
|
PipelineDataSourceComponentOne = typing.Union[
|
|
13
13
|
CloudS3DataSource,
|
|
14
14
|
CloudAzStorageBlobDataSource,
|
|
15
|
-
CloudGcsDataSource,
|
|
16
|
-
CloudGoogleDriveDataSource,
|
|
17
15
|
CloudOneDriveDataSource,
|
|
18
16
|
CloudSharepointDataSource,
|
|
17
|
+
CloudSlackDataSource,
|
|
18
|
+
CloudNotionPageDataSource,
|
|
19
19
|
]
|
llama_cloud/types/text_node.py
CHANGED
|
@@ -37,6 +37,7 @@ class TextNode(pydantic.BaseModel):
|
|
|
37
37
|
description="A mapping of relationships to other node information."
|
|
38
38
|
)
|
|
39
39
|
text: typing.Optional[str] = pydantic.Field(description="Text content of the node.")
|
|
40
|
+
mimetype: typing.Optional[str] = pydantic.Field(description="MIME type of the node content.")
|
|
40
41
|
start_char_idx: typing.Optional[int] = pydantic.Field(description="Start char index of the node.")
|
|
41
42
|
end_char_idx: typing.Optional[int] = pydantic.Field(description="End char index of the node.")
|
|
42
43
|
text_template: typing.Optional[str] = pydantic.Field(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llama-cloud
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.8
|
|
4
4
|
Summary:
|
|
5
5
|
Author: Logan Markewich
|
|
6
6
|
Author-email: logan@runllama.ai
|
|
@@ -10,7 +10,6 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.9
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
14
13
|
Requires-Dist: httpx (>=0.20.0)
|
|
15
14
|
Requires-Dist: pydantic (>=1.10)
|
|
16
15
|
Description-Content-Type: text/markdown
|