llama-cloud 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +18 -4
- llama_cloud/client.py +3 -0
- llama_cloud/resources/__init__.py +4 -1
- llama_cloud/resources/component_definitions/client.py +18 -18
- llama_cloud/resources/data_sinks/client.py +2 -2
- llama_cloud/resources/data_sinks/types/data_sink_update_component_one.py +2 -0
- llama_cloud/resources/data_sources/client.py +2 -2
- llama_cloud/resources/data_sources/types/data_source_update_component_one.py +4 -4
- llama_cloud/resources/evals/client.py +12 -12
- llama_cloud/resources/extraction/__init__.py +5 -0
- llama_cloud/resources/extraction/client.py +648 -0
- llama_cloud/resources/extraction/types/__init__.py +5 -0
- llama_cloud/resources/extraction/types/extraction_schema_update_data_schema_value.py +7 -0
- llama_cloud/resources/files/client.py +8 -8
- llama_cloud/resources/parsing/client.py +16 -0
- llama_cloud/resources/pipelines/client.py +156 -12
- llama_cloud/resources/projects/client.py +24 -24
- llama_cloud/types/__init__.py +14 -4
- llama_cloud/types/azure_open_ai_embedding.py +3 -0
- llama_cloud/types/{cloud_gcs_data_source.py → cloud_azure_ai_search_vector_store.py} +9 -7
- llama_cloud/types/{cloud_google_drive_data_source.py → cloud_notion_page_data_source.py} +4 -5
- llama_cloud/types/cloud_slack_data_source.py +42 -0
- llama_cloud/types/configurable_data_sink_names.py +4 -0
- llama_cloud/types/configurable_data_source_names.py +8 -8
- llama_cloud/types/data_sink_component_one.py +2 -0
- llama_cloud/types/data_sink_create_component_one.py +2 -0
- llama_cloud/types/data_source_component_one.py +4 -4
- llama_cloud/types/data_source_create_component_one.py +4 -4
- llama_cloud/types/eval_dataset_job_record.py +1 -1
- llama_cloud/types/extraction_result.py +42 -0
- llama_cloud/types/extraction_result_data_value.py +5 -0
- llama_cloud/types/extraction_schema.py +44 -0
- llama_cloud/types/extraction_schema_data_schema_value.py +7 -0
- llama_cloud/types/llama_parse_parameters.py +2 -0
- llama_cloud/types/llama_parse_supported_file_extensions.py +124 -0
- llama_cloud/types/pipeline.py +0 -4
- llama_cloud/types/pipeline_data_source_component_one.py +4 -4
- llama_cloud/types/text_node.py +1 -0
- {llama_cloud-0.0.6.dist-info → llama_cloud-0.0.8.dist-info}/METADATA +1 -2
- {llama_cloud-0.0.6.dist-info → llama_cloud-0.0.8.dist-info}/RECORD +42 -33
- {llama_cloud-0.0.6.dist-info → llama_cloud-0.0.8.dist-info}/WHEEL +1 -1
- {llama_cloud-0.0.6.dist-info → llama_cloud-0.0.8.dist-info}/LICENSE +0 -0
|
@@ -14,15 +14,14 @@ except ImportError:
|
|
|
14
14
|
import pydantic # type: ignore
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
class
|
|
17
|
+
class CloudNotionPageDataSource(pydantic.BaseModel):
|
|
18
18
|
"""
|
|
19
19
|
Base component object to capture class names.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
)
|
|
22
|
+
integration_token: str = pydantic.Field(description="The integration token to use for authentication.")
|
|
23
|
+
database_ids: typing.Optional[str] = pydantic.Field(description="The Notion Database Id to read content from.")
|
|
24
|
+
page_ids: typing.Optional[str] = pydantic.Field(description="The Page ID's of the Notion to read from.")
|
|
26
25
|
class_name: typing.Optional[str]
|
|
27
26
|
|
|
28
27
|
def json(self, **kwargs: typing.Any) -> str:
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CloudSlackDataSource(pydantic.BaseModel):
|
|
18
|
+
"""
|
|
19
|
+
Base component object to capture class names.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
slack_token: str = pydantic.Field(description="Slack Bot Token.")
|
|
23
|
+
channel_ids: typing.Optional[str] = pydantic.Field(description="Slack Channel.")
|
|
24
|
+
latest_date: typing.Optional[str] = pydantic.Field(description="Latest date.")
|
|
25
|
+
earliest_date: typing.Optional[str] = pydantic.Field(description="Earliest date.")
|
|
26
|
+
earliest_date_timestamp: typing.Optional[float] = pydantic.Field(description="Earliest date timestamp.")
|
|
27
|
+
latest_date_timestamp: typing.Optional[float] = pydantic.Field(description="Latest date timestamp.")
|
|
28
|
+
channel_patterns: typing.Optional[str] = pydantic.Field(description="Slack Channel name pattern.")
|
|
29
|
+
class_name: typing.Optional[str]
|
|
30
|
+
|
|
31
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
32
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
33
|
+
return super().json(**kwargs_with_defaults)
|
|
34
|
+
|
|
35
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
36
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
37
|
+
return super().dict(**kwargs_with_defaults)
|
|
38
|
+
|
|
39
|
+
class Config:
|
|
40
|
+
frozen = True
|
|
41
|
+
smart_union = True
|
|
42
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -16,6 +16,7 @@ class ConfigurableDataSinkNames(str, enum.Enum):
|
|
|
16
16
|
POSTGRES = "POSTGRES"
|
|
17
17
|
QDRANT = "QDRANT"
|
|
18
18
|
WEAVIATE = "WEAVIATE"
|
|
19
|
+
AZUREAI_SEARCH = "AZUREAI_SEARCH"
|
|
19
20
|
|
|
20
21
|
def visit(
|
|
21
22
|
self,
|
|
@@ -24,6 +25,7 @@ class ConfigurableDataSinkNames(str, enum.Enum):
|
|
|
24
25
|
postgres: typing.Callable[[], T_Result],
|
|
25
26
|
qdrant: typing.Callable[[], T_Result],
|
|
26
27
|
weaviate: typing.Callable[[], T_Result],
|
|
28
|
+
azureai_search: typing.Callable[[], T_Result],
|
|
27
29
|
) -> T_Result:
|
|
28
30
|
if self is ConfigurableDataSinkNames.CHROMA:
|
|
29
31
|
return chroma()
|
|
@@ -35,3 +37,5 @@ class ConfigurableDataSinkNames(str, enum.Enum):
|
|
|
35
37
|
return qdrant()
|
|
36
38
|
if self is ConfigurableDataSinkNames.WEAVIATE:
|
|
37
39
|
return weaviate()
|
|
40
|
+
if self is ConfigurableDataSinkNames.AZUREAI_SEARCH:
|
|
41
|
+
return azureai_search()
|
|
@@ -13,29 +13,29 @@ class ConfigurableDataSourceNames(str, enum.Enum):
|
|
|
13
13
|
|
|
14
14
|
S_3 = "S3"
|
|
15
15
|
AZURE_STORAGE_BLOB = "AZURE_STORAGE_BLOB"
|
|
16
|
-
GCS = "GCS"
|
|
17
|
-
GOOGLE_DRIVE = "GOOGLE_DRIVE"
|
|
18
16
|
MICROSOFT_ONEDRIVE = "MICROSOFT_ONEDRIVE"
|
|
19
17
|
MICROSOFT_SHAREPOINT = "MICROSOFT_SHAREPOINT"
|
|
18
|
+
SLACK = "SLACK"
|
|
19
|
+
NOTION_PAGE = "NOTION_PAGE"
|
|
20
20
|
|
|
21
21
|
def visit(
|
|
22
22
|
self,
|
|
23
23
|
s_3: typing.Callable[[], T_Result],
|
|
24
24
|
azure_storage_blob: typing.Callable[[], T_Result],
|
|
25
|
-
gcs: typing.Callable[[], T_Result],
|
|
26
|
-
google_drive: typing.Callable[[], T_Result],
|
|
27
25
|
microsoft_onedrive: typing.Callable[[], T_Result],
|
|
28
26
|
microsoft_sharepoint: typing.Callable[[], T_Result],
|
|
27
|
+
slack: typing.Callable[[], T_Result],
|
|
28
|
+
notion_page: typing.Callable[[], T_Result],
|
|
29
29
|
) -> T_Result:
|
|
30
30
|
if self is ConfigurableDataSourceNames.S_3:
|
|
31
31
|
return s_3()
|
|
32
32
|
if self is ConfigurableDataSourceNames.AZURE_STORAGE_BLOB:
|
|
33
33
|
return azure_storage_blob()
|
|
34
|
-
if self is ConfigurableDataSourceNames.GCS:
|
|
35
|
-
return gcs()
|
|
36
|
-
if self is ConfigurableDataSourceNames.GOOGLE_DRIVE:
|
|
37
|
-
return google_drive()
|
|
38
34
|
if self is ConfigurableDataSourceNames.MICROSOFT_ONEDRIVE:
|
|
39
35
|
return microsoft_onedrive()
|
|
40
36
|
if self is ConfigurableDataSourceNames.MICROSOFT_SHAREPOINT:
|
|
41
37
|
return microsoft_sharepoint()
|
|
38
|
+
if self is ConfigurableDataSourceNames.SLACK:
|
|
39
|
+
return slack()
|
|
40
|
+
if self is ConfigurableDataSourceNames.NOTION_PAGE:
|
|
41
|
+
return notion_page()
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
+
from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
|
|
5
6
|
from .cloud_chroma_vector_store import CloudChromaVectorStore
|
|
6
7
|
from .cloud_pinecone_vector_store import CloudPineconeVectorStore
|
|
7
8
|
from .cloud_postgres_vector_store import CloudPostgresVectorStore
|
|
@@ -14,4 +15,5 @@ DataSinkComponentOne = typing.Union[
|
|
|
14
15
|
CloudPostgresVectorStore,
|
|
15
16
|
CloudQdrantVectorStore,
|
|
16
17
|
CloudWeaviateVectorStore,
|
|
18
|
+
CloudAzureAiSearchVectorStore,
|
|
17
19
|
]
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
+
from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
|
|
5
6
|
from .cloud_chroma_vector_store import CloudChromaVectorStore
|
|
6
7
|
from .cloud_pinecone_vector_store import CloudPineconeVectorStore
|
|
7
8
|
from .cloud_postgres_vector_store import CloudPostgresVectorStore
|
|
@@ -14,4 +15,5 @@ DataSinkCreateComponentOne = typing.Union[
|
|
|
14
15
|
CloudPostgresVectorStore,
|
|
15
16
|
CloudQdrantVectorStore,
|
|
16
17
|
CloudWeaviateVectorStore,
|
|
18
|
+
CloudAzureAiSearchVectorStore,
|
|
17
19
|
]
|
|
@@ -3,17 +3,17 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
|
|
6
|
-
from .
|
|
7
|
-
from .cloud_google_drive_data_source import CloudGoogleDriveDataSource
|
|
6
|
+
from .cloud_notion_page_data_source import CloudNotionPageDataSource
|
|
8
7
|
from .cloud_one_drive_data_source import CloudOneDriveDataSource
|
|
9
8
|
from .cloud_s_3_data_source import CloudS3DataSource
|
|
10
9
|
from .cloud_sharepoint_data_source import CloudSharepointDataSource
|
|
10
|
+
from .cloud_slack_data_source import CloudSlackDataSource
|
|
11
11
|
|
|
12
12
|
DataSourceComponentOne = typing.Union[
|
|
13
13
|
CloudS3DataSource,
|
|
14
14
|
CloudAzStorageBlobDataSource,
|
|
15
|
-
CloudGcsDataSource,
|
|
16
|
-
CloudGoogleDriveDataSource,
|
|
17
15
|
CloudOneDriveDataSource,
|
|
18
16
|
CloudSharepointDataSource,
|
|
17
|
+
CloudSlackDataSource,
|
|
18
|
+
CloudNotionPageDataSource,
|
|
19
19
|
]
|
|
@@ -3,17 +3,17 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
|
|
6
|
-
from .
|
|
7
|
-
from .cloud_google_drive_data_source import CloudGoogleDriveDataSource
|
|
6
|
+
from .cloud_notion_page_data_source import CloudNotionPageDataSource
|
|
8
7
|
from .cloud_one_drive_data_source import CloudOneDriveDataSource
|
|
9
8
|
from .cloud_s_3_data_source import CloudS3DataSource
|
|
10
9
|
from .cloud_sharepoint_data_source import CloudSharepointDataSource
|
|
10
|
+
from .cloud_slack_data_source import CloudSlackDataSource
|
|
11
11
|
|
|
12
12
|
DataSourceCreateComponentOne = typing.Union[
|
|
13
13
|
CloudS3DataSource,
|
|
14
14
|
CloudAzStorageBlobDataSource,
|
|
15
|
-
CloudGcsDataSource,
|
|
16
|
-
CloudGoogleDriveDataSource,
|
|
17
15
|
CloudOneDriveDataSource,
|
|
18
16
|
CloudSharepointDataSource,
|
|
17
|
+
CloudSlackDataSource,
|
|
18
|
+
CloudNotionPageDataSource,
|
|
19
19
|
]
|
|
@@ -38,13 +38,13 @@ class EvalDatasetJobRecord(pydantic.BaseModel):
|
|
|
38
38
|
description="The correlation ID for this job. Used for tracking the job across services."
|
|
39
39
|
)
|
|
40
40
|
parent_job_execution_id: typing.Optional[str] = pydantic.Field(description="The ID of the parent job execution.")
|
|
41
|
+
created_at: typing.Optional[dt.datetime] = pydantic.Field(description="Creation datetime")
|
|
41
42
|
id: typing.Optional[str] = pydantic.Field(description="Unique identifier")
|
|
42
43
|
status: StatusEnum
|
|
43
44
|
error_message: typing.Optional[str]
|
|
44
45
|
attempts: typing.Optional[int] = pydantic.Field(description="The number of times this job has been attempted")
|
|
45
46
|
started_at: typing.Optional[dt.datetime]
|
|
46
47
|
ended_at: typing.Optional[dt.datetime]
|
|
47
|
-
created_at: typing.Optional[dt.datetime] = pydantic.Field(description="Creation datetime")
|
|
48
48
|
updated_at: typing.Optional[dt.datetime] = pydantic.Field(description="Update datetime")
|
|
49
49
|
data: typing.Optional[Base] = pydantic.Field(description="Additional metadata for the job execution.")
|
|
50
50
|
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .extraction_result_data_value import ExtractionResultDataValue
|
|
8
|
+
from .file import File
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import pydantic
|
|
12
|
+
if pydantic.__version__.startswith("1."):
|
|
13
|
+
raise ImportError
|
|
14
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
15
|
+
except ImportError:
|
|
16
|
+
import pydantic # type: ignore
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ExtractionResult(pydantic.BaseModel):
|
|
20
|
+
"""
|
|
21
|
+
Schema for an extraction result.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
id: str = pydantic.Field(description="Unique identifier")
|
|
25
|
+
created_at: typing.Optional[dt.datetime] = pydantic.Field(description="Creation datetime")
|
|
26
|
+
updated_at: typing.Optional[dt.datetime] = pydantic.Field(description="Update datetime")
|
|
27
|
+
schema_id: str = pydantic.Field(description="The id of the schema")
|
|
28
|
+
data: typing.Dict[str, ExtractionResultDataValue] = pydantic.Field(description="The data extracted from the file")
|
|
29
|
+
file: File = pydantic.Field(description="The file that the extract was extracted from")
|
|
30
|
+
|
|
31
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
32
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
33
|
+
return super().json(**kwargs_with_defaults)
|
|
34
|
+
|
|
35
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
36
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
37
|
+
return super().dict(**kwargs_with_defaults)
|
|
38
|
+
|
|
39
|
+
class Config:
|
|
40
|
+
frozen = True
|
|
41
|
+
smart_union = True
|
|
42
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .extraction_schema_data_schema_value import ExtractionSchemaDataSchemaValue
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pydantic
|
|
11
|
+
if pydantic.__version__.startswith("1."):
|
|
12
|
+
raise ImportError
|
|
13
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
14
|
+
except ImportError:
|
|
15
|
+
import pydantic # type: ignore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ExtractionSchema(pydantic.BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
Schema for extraction schema.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
id: str = pydantic.Field(description="Unique identifier")
|
|
24
|
+
created_at: typing.Optional[dt.datetime] = pydantic.Field(description="Creation datetime")
|
|
25
|
+
updated_at: typing.Optional[dt.datetime] = pydantic.Field(description="Update datetime")
|
|
26
|
+
name: str = pydantic.Field(description="The name of the extraction schema")
|
|
27
|
+
project_id: str = pydantic.Field(description="The ID of the project that the extraction schema belongs to")
|
|
28
|
+
data_schema: typing.Dict[str, ExtractionSchemaDataSchemaValue] = pydantic.Field(
|
|
29
|
+
description="The schema of the data"
|
|
30
|
+
)
|
|
31
|
+
openai_api_key: str = pydantic.Field(description="The API key for the OpenAI API")
|
|
32
|
+
|
|
33
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
34
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
35
|
+
return super().json(**kwargs_with_defaults)
|
|
36
|
+
|
|
37
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
38
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
39
|
+
return super().dict(**kwargs_with_defaults)
|
|
40
|
+
|
|
41
|
+
class Config:
|
|
42
|
+
frozen = True
|
|
43
|
+
smart_union = True
|
|
44
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -31,6 +31,8 @@ class LlamaParseParameters(pydantic.BaseModel):
|
|
|
31
31
|
gpt_4_o_api_key: typing.Optional[str] = pydantic.Field(alias="gpt4o_api_key")
|
|
32
32
|
do_not_unroll_columns: typing.Optional[bool]
|
|
33
33
|
page_separator: typing.Optional[str]
|
|
34
|
+
bounding_box: typing.Optional[str]
|
|
35
|
+
target_pages: typing.Optional[str]
|
|
34
36
|
|
|
35
37
|
def json(self, **kwargs: typing.Any) -> str:
|
|
36
38
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -47,6 +47,37 @@ class LlamaParseSupportedFileExtensions(str, enum.Enum):
|
|
|
47
47
|
EPUB = ".epub"
|
|
48
48
|
HTML = ".html"
|
|
49
49
|
HTM = ".htm"
|
|
50
|
+
XLS = ".xls"
|
|
51
|
+
XLSX = ".xlsx"
|
|
52
|
+
XLSM = ".xlsm"
|
|
53
|
+
XLSB = ".xlsb"
|
|
54
|
+
XLW = ".xlw"
|
|
55
|
+
CSV = ".csv"
|
|
56
|
+
DIF = ".dif"
|
|
57
|
+
SYLK = ".sylk"
|
|
58
|
+
SLK = ".slk"
|
|
59
|
+
PRN = ".prn"
|
|
60
|
+
NUMBERS = ".numbers"
|
|
61
|
+
ET = ".et"
|
|
62
|
+
ODS = ".ods"
|
|
63
|
+
FODS = ".fods"
|
|
64
|
+
UOS_1 = ".uos1"
|
|
65
|
+
UOS_2 = ".uos2"
|
|
66
|
+
DBF = ".dbf"
|
|
67
|
+
WK_1 = ".wk1"
|
|
68
|
+
WK_2 = ".wk2"
|
|
69
|
+
WK_3 = ".wk3"
|
|
70
|
+
WK_4 = ".wk4"
|
|
71
|
+
WKS = ".wks"
|
|
72
|
+
WQ_1 = ".wq1"
|
|
73
|
+
WQ_2 = ".wq2"
|
|
74
|
+
WB_1 = ".wb1"
|
|
75
|
+
WB_2 = ".wb2"
|
|
76
|
+
WB_3 = ".wb3"
|
|
77
|
+
QPW = ".qpw"
|
|
78
|
+
XLR = ".xlr"
|
|
79
|
+
ETH = ".eth"
|
|
80
|
+
TSV = ".tsv"
|
|
50
81
|
|
|
51
82
|
def visit(
|
|
52
83
|
self,
|
|
@@ -86,6 +117,37 @@ class LlamaParseSupportedFileExtensions(str, enum.Enum):
|
|
|
86
117
|
epub: typing.Callable[[], T_Result],
|
|
87
118
|
html: typing.Callable[[], T_Result],
|
|
88
119
|
htm: typing.Callable[[], T_Result],
|
|
120
|
+
xls: typing.Callable[[], T_Result],
|
|
121
|
+
xlsx: typing.Callable[[], T_Result],
|
|
122
|
+
xlsm: typing.Callable[[], T_Result],
|
|
123
|
+
xlsb: typing.Callable[[], T_Result],
|
|
124
|
+
xlw: typing.Callable[[], T_Result],
|
|
125
|
+
csv: typing.Callable[[], T_Result],
|
|
126
|
+
dif: typing.Callable[[], T_Result],
|
|
127
|
+
sylk: typing.Callable[[], T_Result],
|
|
128
|
+
slk: typing.Callable[[], T_Result],
|
|
129
|
+
prn: typing.Callable[[], T_Result],
|
|
130
|
+
numbers: typing.Callable[[], T_Result],
|
|
131
|
+
et: typing.Callable[[], T_Result],
|
|
132
|
+
ods: typing.Callable[[], T_Result],
|
|
133
|
+
fods: typing.Callable[[], T_Result],
|
|
134
|
+
uos_1: typing.Callable[[], T_Result],
|
|
135
|
+
uos_2: typing.Callable[[], T_Result],
|
|
136
|
+
dbf: typing.Callable[[], T_Result],
|
|
137
|
+
wk_1: typing.Callable[[], T_Result],
|
|
138
|
+
wk_2: typing.Callable[[], T_Result],
|
|
139
|
+
wk_3: typing.Callable[[], T_Result],
|
|
140
|
+
wk_4: typing.Callable[[], T_Result],
|
|
141
|
+
wks: typing.Callable[[], T_Result],
|
|
142
|
+
wq_1: typing.Callable[[], T_Result],
|
|
143
|
+
wq_2: typing.Callable[[], T_Result],
|
|
144
|
+
wb_1: typing.Callable[[], T_Result],
|
|
145
|
+
wb_2: typing.Callable[[], T_Result],
|
|
146
|
+
wb_3: typing.Callable[[], T_Result],
|
|
147
|
+
qpw: typing.Callable[[], T_Result],
|
|
148
|
+
xlr: typing.Callable[[], T_Result],
|
|
149
|
+
eth: typing.Callable[[], T_Result],
|
|
150
|
+
tsv: typing.Callable[[], T_Result],
|
|
89
151
|
) -> T_Result:
|
|
90
152
|
if self is LlamaParseSupportedFileExtensions.PDF:
|
|
91
153
|
return pdf()
|
|
@@ -159,3 +221,65 @@ class LlamaParseSupportedFileExtensions(str, enum.Enum):
|
|
|
159
221
|
return html()
|
|
160
222
|
if self is LlamaParseSupportedFileExtensions.HTM:
|
|
161
223
|
return htm()
|
|
224
|
+
if self is LlamaParseSupportedFileExtensions.XLS:
|
|
225
|
+
return xls()
|
|
226
|
+
if self is LlamaParseSupportedFileExtensions.XLSX:
|
|
227
|
+
return xlsx()
|
|
228
|
+
if self is LlamaParseSupportedFileExtensions.XLSM:
|
|
229
|
+
return xlsm()
|
|
230
|
+
if self is LlamaParseSupportedFileExtensions.XLSB:
|
|
231
|
+
return xlsb()
|
|
232
|
+
if self is LlamaParseSupportedFileExtensions.XLW:
|
|
233
|
+
return xlw()
|
|
234
|
+
if self is LlamaParseSupportedFileExtensions.CSV:
|
|
235
|
+
return csv()
|
|
236
|
+
if self is LlamaParseSupportedFileExtensions.DIF:
|
|
237
|
+
return dif()
|
|
238
|
+
if self is LlamaParseSupportedFileExtensions.SYLK:
|
|
239
|
+
return sylk()
|
|
240
|
+
if self is LlamaParseSupportedFileExtensions.SLK:
|
|
241
|
+
return slk()
|
|
242
|
+
if self is LlamaParseSupportedFileExtensions.PRN:
|
|
243
|
+
return prn()
|
|
244
|
+
if self is LlamaParseSupportedFileExtensions.NUMBERS:
|
|
245
|
+
return numbers()
|
|
246
|
+
if self is LlamaParseSupportedFileExtensions.ET:
|
|
247
|
+
return et()
|
|
248
|
+
if self is LlamaParseSupportedFileExtensions.ODS:
|
|
249
|
+
return ods()
|
|
250
|
+
if self is LlamaParseSupportedFileExtensions.FODS:
|
|
251
|
+
return fods()
|
|
252
|
+
if self is LlamaParseSupportedFileExtensions.UOS_1:
|
|
253
|
+
return uos_1()
|
|
254
|
+
if self is LlamaParseSupportedFileExtensions.UOS_2:
|
|
255
|
+
return uos_2()
|
|
256
|
+
if self is LlamaParseSupportedFileExtensions.DBF:
|
|
257
|
+
return dbf()
|
|
258
|
+
if self is LlamaParseSupportedFileExtensions.WK_1:
|
|
259
|
+
return wk_1()
|
|
260
|
+
if self is LlamaParseSupportedFileExtensions.WK_2:
|
|
261
|
+
return wk_2()
|
|
262
|
+
if self is LlamaParseSupportedFileExtensions.WK_3:
|
|
263
|
+
return wk_3()
|
|
264
|
+
if self is LlamaParseSupportedFileExtensions.WK_4:
|
|
265
|
+
return wk_4()
|
|
266
|
+
if self is LlamaParseSupportedFileExtensions.WKS:
|
|
267
|
+
return wks()
|
|
268
|
+
if self is LlamaParseSupportedFileExtensions.WQ_1:
|
|
269
|
+
return wq_1()
|
|
270
|
+
if self is LlamaParseSupportedFileExtensions.WQ_2:
|
|
271
|
+
return wq_2()
|
|
272
|
+
if self is LlamaParseSupportedFileExtensions.WB_1:
|
|
273
|
+
return wb_1()
|
|
274
|
+
if self is LlamaParseSupportedFileExtensions.WB_2:
|
|
275
|
+
return wb_2()
|
|
276
|
+
if self is LlamaParseSupportedFileExtensions.WB_3:
|
|
277
|
+
return wb_3()
|
|
278
|
+
if self is LlamaParseSupportedFileExtensions.QPW:
|
|
279
|
+
return qpw()
|
|
280
|
+
if self is LlamaParseSupportedFileExtensions.XLR:
|
|
281
|
+
return xlr()
|
|
282
|
+
if self is LlamaParseSupportedFileExtensions.ETH:
|
|
283
|
+
return eth()
|
|
284
|
+
if self is LlamaParseSupportedFileExtensions.TSV:
|
|
285
|
+
return tsv()
|
llama_cloud/types/pipeline.py
CHANGED
|
@@ -8,7 +8,6 @@ from .configured_transformation_item import ConfiguredTransformationItem
|
|
|
8
8
|
from .data_sink import DataSink
|
|
9
9
|
from .eval_execution_params import EvalExecutionParams
|
|
10
10
|
from .llama_parse_parameters import LlamaParseParameters
|
|
11
|
-
from .managed_ingestion_status import ManagedIngestionStatus
|
|
12
11
|
from .pipeline_type import PipelineType
|
|
13
12
|
from .preset_retrieval_params import PresetRetrievalParams
|
|
14
13
|
|
|
@@ -50,9 +49,6 @@ class Pipeline(pydantic.BaseModel):
|
|
|
50
49
|
llama_parse_parameters: typing.Optional[LlamaParseParameters] = pydantic.Field(
|
|
51
50
|
description="Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline."
|
|
52
51
|
)
|
|
53
|
-
managed_ingestion_status: typing.Optional[ManagedIngestionStatus] = pydantic.Field(
|
|
54
|
-
description="Status of Managed Ingestion."
|
|
55
|
-
)
|
|
56
52
|
data_sink: typing.Optional[DataSink] = pydantic.Field(
|
|
57
53
|
description="The data sink for the pipeline. If None, the pipeline will use the fully managed data sink."
|
|
58
54
|
)
|
|
@@ -3,17 +3,17 @@
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
|
|
6
|
-
from .
|
|
7
|
-
from .cloud_google_drive_data_source import CloudGoogleDriveDataSource
|
|
6
|
+
from .cloud_notion_page_data_source import CloudNotionPageDataSource
|
|
8
7
|
from .cloud_one_drive_data_source import CloudOneDriveDataSource
|
|
9
8
|
from .cloud_s_3_data_source import CloudS3DataSource
|
|
10
9
|
from .cloud_sharepoint_data_source import CloudSharepointDataSource
|
|
10
|
+
from .cloud_slack_data_source import CloudSlackDataSource
|
|
11
11
|
|
|
12
12
|
PipelineDataSourceComponentOne = typing.Union[
|
|
13
13
|
CloudS3DataSource,
|
|
14
14
|
CloudAzStorageBlobDataSource,
|
|
15
|
-
CloudGcsDataSource,
|
|
16
|
-
CloudGoogleDriveDataSource,
|
|
17
15
|
CloudOneDriveDataSource,
|
|
18
16
|
CloudSharepointDataSource,
|
|
17
|
+
CloudSlackDataSource,
|
|
18
|
+
CloudNotionPageDataSource,
|
|
19
19
|
]
|
llama_cloud/types/text_node.py
CHANGED
|
@@ -37,6 +37,7 @@ class TextNode(pydantic.BaseModel):
|
|
|
37
37
|
description="A mapping of relationships to other node information."
|
|
38
38
|
)
|
|
39
39
|
text: typing.Optional[str] = pydantic.Field(description="Text content of the node.")
|
|
40
|
+
mimetype: typing.Optional[str] = pydantic.Field(description="MIME type of the node content.")
|
|
40
41
|
start_char_idx: typing.Optional[int] = pydantic.Field(description="Start char index of the node.")
|
|
41
42
|
end_char_idx: typing.Optional[int] = pydantic.Field(description="End char index of the node.")
|
|
42
43
|
text_template: typing.Optional[str] = pydantic.Field(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llama-cloud
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.8
|
|
4
4
|
Summary:
|
|
5
5
|
Author: Logan Markewich
|
|
6
6
|
Author-email: logan@runllama.ai
|
|
@@ -10,7 +10,6 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.9
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
14
13
|
Requires-Dist: httpx (>=0.20.0)
|
|
15
14
|
Requires-Dist: pydantic (>=1.10)
|
|
16
15
|
Description-Content-Type: text/markdown
|