llama-cloud 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +82 -6
- llama_cloud/client.py +3 -0
- llama_cloud/resources/__init__.py +13 -2
- llama_cloud/resources/auth/__init__.py +2 -0
- llama_cloud/resources/auth/client.py +124 -0
- llama_cloud/resources/data_sinks/types/data_sink_update_component_one.py +4 -0
- llama_cloud/resources/extraction/__init__.py +2 -2
- llama_cloud/resources/extraction/client.py +139 -48
- llama_cloud/resources/extraction/types/__init__.py +2 -1
- llama_cloud/resources/extraction/types/extraction_schema_create_data_schema_value.py +7 -0
- llama_cloud/resources/pipelines/__init__.py +12 -2
- llama_cloud/resources/pipelines/client.py +58 -2
- llama_cloud/resources/pipelines/types/__init__.py +11 -1
- llama_cloud/resources/pipelines/types/pipeline_update_transform_config.py +31 -0
- llama_cloud/types/__init__.py +78 -6
- llama_cloud/types/advanced_mode_transform_config.py +38 -0
- llama_cloud/types/advanced_mode_transform_config_chunking_config.py +67 -0
- llama_cloud/types/advanced_mode_transform_config_segmentation_config.py +45 -0
- llama_cloud/types/auto_transform_config.py +32 -0
- llama_cloud/types/character_chunking_config.py +32 -0
- llama_cloud/types/{html_node_parser.py → character_splitter.py} +9 -9
- llama_cloud/types/chat_data.py +2 -0
- llama_cloud/types/cloud_az_storage_blob_data_source.py +11 -2
- llama_cloud/types/{simple_file_node_parser.py → cloud_milvus_vector_store.py} +7 -14
- llama_cloud/types/cloud_mongo_db_atlas_vector_search.py +51 -0
- llama_cloud/types/configurable_data_sink_names.py +8 -0
- llama_cloud/types/configurable_transformation_names.py +8 -12
- llama_cloud/types/configured_transformation_item_component_one.py +4 -6
- llama_cloud/types/custom_claims.py +61 -0
- llama_cloud/types/data_sink_component_one.py +4 -0
- llama_cloud/types/data_sink_create_component_one.py +4 -0
- llama_cloud/types/element_segmentation_config.py +29 -0
- llama_cloud/types/embedding_config.py +36 -0
- llama_cloud/types/embedding_config_component.py +7 -0
- llama_cloud/types/embedding_config_component_one.py +19 -0
- llama_cloud/types/embedding_config_type.py +41 -0
- llama_cloud/types/eval_dataset_job_record.py +1 -0
- llama_cloud/types/ingestion_error_response.py +34 -0
- llama_cloud/types/job_name_mapping.py +45 -0
- llama_cloud/types/llama_parse_supported_file_extensions.py +32 -0
- llama_cloud/types/llm_parameters.py +39 -0
- llama_cloud/types/managed_ingestion_status_response.py +6 -0
- llama_cloud/types/none_chunking_config.py +29 -0
- llama_cloud/types/none_segmentation_config.py +29 -0
- llama_cloud/types/page_segmentation_config.py +29 -0
- llama_cloud/types/{json_node_parser.py → page_splitter_node_parser.py} +3 -8
- llama_cloud/types/parsing_job.py +2 -0
- llama_cloud/types/pipeline_create.py +8 -0
- llama_cloud/types/pipeline_create_transform_config.py +31 -0
- llama_cloud/types/semantic_chunking_config.py +32 -0
- llama_cloud/types/sentence_chunking_config.py +34 -0
- llama_cloud/types/token_chunking_config.py +33 -0
- llama_cloud/types/user.py +35 -0
- {llama_cloud-0.0.10.dist-info → llama_cloud-0.0.12.dist-info}/METADATA +1 -1
- {llama_cloud-0.0.10.dist-info → llama_cloud-0.0.12.dist-info}/RECORD +57 -30
- {llama_cloud-0.0.10.dist-info → llama_cloud-0.0.12.dist-info}/LICENSE +0 -0
- {llama_cloud-0.0.10.dist-info → llama_cloud-0.0.12.dist-info}/WHEEL +0 -0
|
@@ -11,13 +11,12 @@ class ConfigurableTransformationNames(str, enum.Enum):
|
|
|
11
11
|
An enumeration.
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
|
+
CHARACTER_SPLITTER = "CHARACTER_SPLITTER"
|
|
15
|
+
PAGE_SPLITTER_NODE_PARSER = "PAGE_SPLITTER_NODE_PARSER"
|
|
14
16
|
CODE_NODE_PARSER = "CODE_NODE_PARSER"
|
|
15
17
|
SENTENCE_AWARE_NODE_PARSER = "SENTENCE_AWARE_NODE_PARSER"
|
|
16
18
|
TOKEN_AWARE_NODE_PARSER = "TOKEN_AWARE_NODE_PARSER"
|
|
17
|
-
HTML_NODE_PARSER = "HTML_NODE_PARSER"
|
|
18
19
|
MARKDOWN_NODE_PARSER = "MARKDOWN_NODE_PARSER"
|
|
19
|
-
JSON_NODE_PARSER = "JSON_NODE_PARSER"
|
|
20
|
-
SIMPLE_FILE_NODE_PARSER = "SIMPLE_FILE_NODE_PARSER"
|
|
21
20
|
MARKDOWN_ELEMENT_NODE_PARSER = "MARKDOWN_ELEMENT_NODE_PARSER"
|
|
22
21
|
OPENAI_EMBEDDING = "OPENAI_EMBEDDING"
|
|
23
22
|
AZURE_EMBEDDING = "AZURE_EMBEDDING"
|
|
@@ -28,13 +27,12 @@ class ConfigurableTransformationNames(str, enum.Enum):
|
|
|
28
27
|
|
|
29
28
|
def visit(
|
|
30
29
|
self,
|
|
30
|
+
character_splitter: typing.Callable[[], T_Result],
|
|
31
|
+
page_splitter_node_parser: typing.Callable[[], T_Result],
|
|
31
32
|
code_node_parser: typing.Callable[[], T_Result],
|
|
32
33
|
sentence_aware_node_parser: typing.Callable[[], T_Result],
|
|
33
34
|
token_aware_node_parser: typing.Callable[[], T_Result],
|
|
34
|
-
html_node_parser: typing.Callable[[], T_Result],
|
|
35
35
|
markdown_node_parser: typing.Callable[[], T_Result],
|
|
36
|
-
json_node_parser: typing.Callable[[], T_Result],
|
|
37
|
-
simple_file_node_parser: typing.Callable[[], T_Result],
|
|
38
36
|
markdown_element_node_parser: typing.Callable[[], T_Result],
|
|
39
37
|
openai_embedding: typing.Callable[[], T_Result],
|
|
40
38
|
azure_embedding: typing.Callable[[], T_Result],
|
|
@@ -43,20 +41,18 @@ class ConfigurableTransformationNames(str, enum.Enum):
|
|
|
43
41
|
huggingface_api_embedding: typing.Callable[[], T_Result],
|
|
44
42
|
gemini_embedding: typing.Callable[[], T_Result],
|
|
45
43
|
) -> T_Result:
|
|
44
|
+
if self is ConfigurableTransformationNames.CHARACTER_SPLITTER:
|
|
45
|
+
return character_splitter()
|
|
46
|
+
if self is ConfigurableTransformationNames.PAGE_SPLITTER_NODE_PARSER:
|
|
47
|
+
return page_splitter_node_parser()
|
|
46
48
|
if self is ConfigurableTransformationNames.CODE_NODE_PARSER:
|
|
47
49
|
return code_node_parser()
|
|
48
50
|
if self is ConfigurableTransformationNames.SENTENCE_AWARE_NODE_PARSER:
|
|
49
51
|
return sentence_aware_node_parser()
|
|
50
52
|
if self is ConfigurableTransformationNames.TOKEN_AWARE_NODE_PARSER:
|
|
51
53
|
return token_aware_node_parser()
|
|
52
|
-
if self is ConfigurableTransformationNames.HTML_NODE_PARSER:
|
|
53
|
-
return html_node_parser()
|
|
54
54
|
if self is ConfigurableTransformationNames.MARKDOWN_NODE_PARSER:
|
|
55
55
|
return markdown_node_parser()
|
|
56
|
-
if self is ConfigurableTransformationNames.JSON_NODE_PARSER:
|
|
57
|
-
return json_node_parser()
|
|
58
|
-
if self is ConfigurableTransformationNames.SIMPLE_FILE_NODE_PARSER:
|
|
59
|
-
return simple_file_node_parser()
|
|
60
56
|
if self is ConfigurableTransformationNames.MARKDOWN_ELEMENT_NODE_PARSER:
|
|
61
57
|
return markdown_element_node_parser()
|
|
62
58
|
if self is ConfigurableTransformationNames.OPENAI_EMBEDDING:
|
|
@@ -4,27 +4,25 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
from .azure_open_ai_embedding import AzureOpenAiEmbedding
|
|
6
6
|
from .bedrock_embedding import BedrockEmbedding
|
|
7
|
+
from .character_splitter import CharacterSplitter
|
|
7
8
|
from .code_splitter import CodeSplitter
|
|
8
9
|
from .cohere_embedding import CohereEmbedding
|
|
9
10
|
from .gemini_embedding import GeminiEmbedding
|
|
10
|
-
from .html_node_parser import HtmlNodeParser
|
|
11
11
|
from .hugging_face_inference_api_embedding import HuggingFaceInferenceApiEmbedding
|
|
12
|
-
from .json_node_parser import JsonNodeParser
|
|
13
12
|
from .markdown_element_node_parser import MarkdownElementNodeParser
|
|
14
13
|
from .markdown_node_parser import MarkdownNodeParser
|
|
15
14
|
from .open_ai_embedding import OpenAiEmbedding
|
|
15
|
+
from .page_splitter_node_parser import PageSplitterNodeParser
|
|
16
16
|
from .sentence_splitter import SentenceSplitter
|
|
17
|
-
from .simple_file_node_parser import SimpleFileNodeParser
|
|
18
17
|
from .token_text_splitter import TokenTextSplitter
|
|
19
18
|
|
|
20
19
|
ConfiguredTransformationItemComponentOne = typing.Union[
|
|
20
|
+
CharacterSplitter,
|
|
21
|
+
PageSplitterNodeParser,
|
|
21
22
|
CodeSplitter,
|
|
22
23
|
SentenceSplitter,
|
|
23
24
|
TokenTextSplitter,
|
|
24
|
-
HtmlNodeParser,
|
|
25
25
|
MarkdownNodeParser,
|
|
26
|
-
JsonNodeParser,
|
|
27
|
-
SimpleFileNodeParser,
|
|
28
26
|
MarkdownElementNodeParser,
|
|
29
27
|
OpenAiEmbedding,
|
|
30
28
|
AzureOpenAiEmbedding,
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CustomClaims(pydantic.BaseModel):
|
|
18
|
+
"""
|
|
19
|
+
Custom claims that dictate various limits or allowed behaviors.
|
|
20
|
+
Currently these claims reside at a per user level. Claims may expand to a per organization level or project in the future.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
allowed_index: typing.Optional[bool] = pydantic.Field(
|
|
24
|
+
description="Whether the user is allowed to access the index."
|
|
25
|
+
)
|
|
26
|
+
allowed_playground: typing.Optional[bool] = pydantic.Field(description="Deprecated. Use allowed_index instead")
|
|
27
|
+
usage_pdf_max_pages_per_day: typing.Optional[int] = pydantic.Field(
|
|
28
|
+
description="The maximum number of PDF pages the user can generate per day."
|
|
29
|
+
)
|
|
30
|
+
parse_premium: typing.Optional[bool] = pydantic.Field(
|
|
31
|
+
description="Whether the user has a LlamaParse premium claim."
|
|
32
|
+
)
|
|
33
|
+
usage_index_max_files_per_pipeline: typing.Optional[int] = pydantic.Field(
|
|
34
|
+
description="The maximum number of files per pipeline the user can index without LlamaParse premium."
|
|
35
|
+
)
|
|
36
|
+
max_jobs_in_execution: typing.Optional[int] = pydantic.Field(
|
|
37
|
+
description="The maximum number of jobs the user can have in execution."
|
|
38
|
+
)
|
|
39
|
+
max_jobs_in_execution_per_job_type: typing.Optional[int] = pydantic.Field(
|
|
40
|
+
description="The maximum number of jobs the user can have in execution per job type."
|
|
41
|
+
)
|
|
42
|
+
max_document_ingestion_jobs_in_execution: typing.Optional[int] = pydantic.Field(
|
|
43
|
+
description="The maximum number of document ingestion jobs the user can have in execution."
|
|
44
|
+
)
|
|
45
|
+
allowed_extraction: typing.Optional[bool] = pydantic.Field(
|
|
46
|
+
description="Whether the user is allowed to use structured data extraction features."
|
|
47
|
+
)
|
|
48
|
+
allowed_eval: typing.Optional[bool] = pydantic.Field(description="Whether the user is allowed to run evals.")
|
|
49
|
+
|
|
50
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
51
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
52
|
+
return super().json(**kwargs_with_defaults)
|
|
53
|
+
|
|
54
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
55
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
56
|
+
return super().dict(**kwargs_with_defaults)
|
|
57
|
+
|
|
58
|
+
class Config:
|
|
59
|
+
frozen = True
|
|
60
|
+
smart_union = True
|
|
61
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -4,6 +4,8 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
|
|
6
6
|
from .cloud_chroma_vector_store import CloudChromaVectorStore
|
|
7
|
+
from .cloud_milvus_vector_store import CloudMilvusVectorStore
|
|
8
|
+
from .cloud_mongo_db_atlas_vector_search import CloudMongoDbAtlasVectorSearch
|
|
7
9
|
from .cloud_pinecone_vector_store import CloudPineconeVectorStore
|
|
8
10
|
from .cloud_postgres_vector_store import CloudPostgresVectorStore
|
|
9
11
|
from .cloud_qdrant_vector_store import CloudQdrantVectorStore
|
|
@@ -16,4 +18,6 @@ DataSinkComponentOne = typing.Union[
|
|
|
16
18
|
CloudQdrantVectorStore,
|
|
17
19
|
CloudWeaviateVectorStore,
|
|
18
20
|
CloudAzureAiSearchVectorStore,
|
|
21
|
+
CloudMongoDbAtlasVectorSearch,
|
|
22
|
+
CloudMilvusVectorStore,
|
|
19
23
|
]
|
|
@@ -4,6 +4,8 @@ import typing
|
|
|
4
4
|
|
|
5
5
|
from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
|
|
6
6
|
from .cloud_chroma_vector_store import CloudChromaVectorStore
|
|
7
|
+
from .cloud_milvus_vector_store import CloudMilvusVectorStore
|
|
8
|
+
from .cloud_mongo_db_atlas_vector_search import CloudMongoDbAtlasVectorSearch
|
|
7
9
|
from .cloud_pinecone_vector_store import CloudPineconeVectorStore
|
|
8
10
|
from .cloud_postgres_vector_store import CloudPostgresVectorStore
|
|
9
11
|
from .cloud_qdrant_vector_store import CloudQdrantVectorStore
|
|
@@ -16,4 +18,6 @@ DataSinkCreateComponentOne = typing.Union[
|
|
|
16
18
|
CloudQdrantVectorStore,
|
|
17
19
|
CloudWeaviateVectorStore,
|
|
18
20
|
CloudAzureAiSearchVectorStore,
|
|
21
|
+
CloudMongoDbAtlasVectorSearch,
|
|
22
|
+
CloudMilvusVectorStore,
|
|
19
23
|
]
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ElementSegmentationConfig(pydantic.BaseModel):
|
|
18
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
19
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
20
|
+
return super().json(**kwargs_with_defaults)
|
|
21
|
+
|
|
22
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
23
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
24
|
+
return super().dict(**kwargs_with_defaults)
|
|
25
|
+
|
|
26
|
+
class Config:
|
|
27
|
+
frozen = True
|
|
28
|
+
smart_union = True
|
|
29
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .embedding_config_component import EmbeddingConfigComponent
|
|
8
|
+
from .embedding_config_type import EmbeddingConfigType
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import pydantic
|
|
12
|
+
if pydantic.__version__.startswith("1."):
|
|
13
|
+
raise ImportError
|
|
14
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
15
|
+
except ImportError:
|
|
16
|
+
import pydantic # type: ignore
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class EmbeddingConfig(pydantic.BaseModel):
|
|
20
|
+
type: typing.Optional[EmbeddingConfigType] = pydantic.Field(description="Type of the embedding model.")
|
|
21
|
+
component: typing.Optional[EmbeddingConfigComponent] = pydantic.Field(
|
|
22
|
+
description="Configuration for the transformation."
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
26
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
27
|
+
return super().json(**kwargs_with_defaults)
|
|
28
|
+
|
|
29
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
30
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
31
|
+
return super().dict(**kwargs_with_defaults)
|
|
32
|
+
|
|
33
|
+
class Config:
|
|
34
|
+
frozen = True
|
|
35
|
+
smart_union = True
|
|
36
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
from .azure_open_ai_embedding import AzureOpenAiEmbedding
|
|
6
|
+
from .bedrock_embedding import BedrockEmbedding
|
|
7
|
+
from .cohere_embedding import CohereEmbedding
|
|
8
|
+
from .gemini_embedding import GeminiEmbedding
|
|
9
|
+
from .hugging_face_inference_api_embedding import HuggingFaceInferenceApiEmbedding
|
|
10
|
+
from .open_ai_embedding import OpenAiEmbedding
|
|
11
|
+
|
|
12
|
+
EmbeddingConfigComponentOne = typing.Union[
|
|
13
|
+
OpenAiEmbedding,
|
|
14
|
+
AzureOpenAiEmbedding,
|
|
15
|
+
CohereEmbedding,
|
|
16
|
+
BedrockEmbedding,
|
|
17
|
+
HuggingFaceInferenceApiEmbedding,
|
|
18
|
+
GeminiEmbedding,
|
|
19
|
+
]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
T_Result = typing.TypeVar("T_Result")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class EmbeddingConfigType(str, enum.Enum):
|
|
10
|
+
"""
|
|
11
|
+
An enumeration.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
OPENAI_EMBEDDING = "OPENAI_EMBEDDING"
|
|
15
|
+
AZURE_EMBEDDING = "AZURE_EMBEDDING"
|
|
16
|
+
BEDROCK_EMBEDDING = "BEDROCK_EMBEDDING"
|
|
17
|
+
COHERE_EMBEDDING = "COHERE_EMBEDDING"
|
|
18
|
+
GEMINI_EMBEDDING = "GEMINI_EMBEDDING"
|
|
19
|
+
HUGGINGFACE_API_EMBEDDING = "HUGGINGFACE_API_EMBEDDING"
|
|
20
|
+
|
|
21
|
+
def visit(
|
|
22
|
+
self,
|
|
23
|
+
openai_embedding: typing.Callable[[], T_Result],
|
|
24
|
+
azure_embedding: typing.Callable[[], T_Result],
|
|
25
|
+
bedrock_embedding: typing.Callable[[], T_Result],
|
|
26
|
+
cohere_embedding: typing.Callable[[], T_Result],
|
|
27
|
+
gemini_embedding: typing.Callable[[], T_Result],
|
|
28
|
+
huggingface_api_embedding: typing.Callable[[], T_Result],
|
|
29
|
+
) -> T_Result:
|
|
30
|
+
if self is EmbeddingConfigType.OPENAI_EMBEDDING:
|
|
31
|
+
return openai_embedding()
|
|
32
|
+
if self is EmbeddingConfigType.AZURE_EMBEDDING:
|
|
33
|
+
return azure_embedding()
|
|
34
|
+
if self is EmbeddingConfigType.BEDROCK_EMBEDDING:
|
|
35
|
+
return bedrock_embedding()
|
|
36
|
+
if self is EmbeddingConfigType.COHERE_EMBEDDING:
|
|
37
|
+
return cohere_embedding()
|
|
38
|
+
if self is EmbeddingConfigType.GEMINI_EMBEDDING:
|
|
39
|
+
return gemini_embedding()
|
|
40
|
+
if self is EmbeddingConfigType.HUGGINGFACE_API_EMBEDDING:
|
|
41
|
+
return huggingface_api_embedding()
|
|
@@ -42,6 +42,7 @@ class EvalDatasetJobRecord(pydantic.BaseModel):
|
|
|
42
42
|
created_at: typing.Optional[dt.datetime] = pydantic.Field(description="Creation datetime")
|
|
43
43
|
id: typing.Optional[str] = pydantic.Field(description="Unique identifier")
|
|
44
44
|
status: StatusEnum
|
|
45
|
+
error_code: typing.Optional[str]
|
|
45
46
|
error_message: typing.Optional[str]
|
|
46
47
|
attempts: typing.Optional[int] = pydantic.Field(description="The number of times this job has been attempted")
|
|
47
48
|
started_at: typing.Optional[dt.datetime]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .job_name_mapping import JobNameMapping
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pydantic
|
|
11
|
+
if pydantic.__version__.startswith("1."):
|
|
12
|
+
raise ImportError
|
|
13
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
14
|
+
except ImportError:
|
|
15
|
+
import pydantic # type: ignore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class IngestionErrorResponse(pydantic.BaseModel):
|
|
19
|
+
job_id: str = pydantic.Field(description="ID of the job that failed.")
|
|
20
|
+
message: str = pydantic.Field(description="List of errors that occurred during ingestion.")
|
|
21
|
+
step: JobNameMapping = pydantic.Field(description="Name of the job that failed.")
|
|
22
|
+
|
|
23
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
24
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
25
|
+
return super().json(**kwargs_with_defaults)
|
|
26
|
+
|
|
27
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
28
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
29
|
+
return super().dict(**kwargs_with_defaults)
|
|
30
|
+
|
|
31
|
+
class Config:
|
|
32
|
+
frozen = True
|
|
33
|
+
smart_union = True
|
|
34
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
T_Result = typing.TypeVar("T_Result")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class JobNameMapping(str, enum.Enum):
|
|
10
|
+
"""
|
|
11
|
+
Enum for mapping original job names to readable names.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
MANAGED_INGESTION = "MANAGED_INGESTION"
|
|
15
|
+
DATA_SOURCE = "DATA_SOURCE"
|
|
16
|
+
FILES_UPDATE = "FILES_UPDATE"
|
|
17
|
+
FILE_UPDATER = "FILE_UPDATER"
|
|
18
|
+
PARSE = "PARSE"
|
|
19
|
+
TRANSFORM = "TRANSFORM"
|
|
20
|
+
INGESTION = "INGESTION"
|
|
21
|
+
|
|
22
|
+
def visit(
|
|
23
|
+
self,
|
|
24
|
+
managed_ingestion: typing.Callable[[], T_Result],
|
|
25
|
+
data_source: typing.Callable[[], T_Result],
|
|
26
|
+
files_update: typing.Callable[[], T_Result],
|
|
27
|
+
file_updater: typing.Callable[[], T_Result],
|
|
28
|
+
parse: typing.Callable[[], T_Result],
|
|
29
|
+
transform: typing.Callable[[], T_Result],
|
|
30
|
+
ingestion: typing.Callable[[], T_Result],
|
|
31
|
+
) -> T_Result:
|
|
32
|
+
if self is JobNameMapping.MANAGED_INGESTION:
|
|
33
|
+
return managed_ingestion()
|
|
34
|
+
if self is JobNameMapping.DATA_SOURCE:
|
|
35
|
+
return data_source()
|
|
36
|
+
if self is JobNameMapping.FILES_UPDATE:
|
|
37
|
+
return files_update()
|
|
38
|
+
if self is JobNameMapping.FILE_UPDATER:
|
|
39
|
+
return file_updater()
|
|
40
|
+
if self is JobNameMapping.PARSE:
|
|
41
|
+
return parse()
|
|
42
|
+
if self is JobNameMapping.TRANSFORM:
|
|
43
|
+
return transform()
|
|
44
|
+
if self is JobNameMapping.INGESTION:
|
|
45
|
+
return ingestion()
|
|
@@ -45,6 +45,14 @@ class LlamaParseSupportedFileExtensions(str, enum.Enum):
|
|
|
45
45
|
SXI = ".sxi"
|
|
46
46
|
STI = ".sti"
|
|
47
47
|
EPUB = ".epub"
|
|
48
|
+
JPG = ".jpg"
|
|
49
|
+
JPEG = ".jpeg"
|
|
50
|
+
PNG = ".png"
|
|
51
|
+
GIF = ".gif"
|
|
52
|
+
BMP = ".bmp"
|
|
53
|
+
SVG = ".svg"
|
|
54
|
+
TIFF = ".tiff"
|
|
55
|
+
WEBP = ".webp"
|
|
48
56
|
HTML = ".html"
|
|
49
57
|
HTM = ".htm"
|
|
50
58
|
XLS = ".xls"
|
|
@@ -115,6 +123,14 @@ class LlamaParseSupportedFileExtensions(str, enum.Enum):
|
|
|
115
123
|
sxi: typing.Callable[[], T_Result],
|
|
116
124
|
sti: typing.Callable[[], T_Result],
|
|
117
125
|
epub: typing.Callable[[], T_Result],
|
|
126
|
+
jpg: typing.Callable[[], T_Result],
|
|
127
|
+
jpeg: typing.Callable[[], T_Result],
|
|
128
|
+
png: typing.Callable[[], T_Result],
|
|
129
|
+
gif: typing.Callable[[], T_Result],
|
|
130
|
+
bmp: typing.Callable[[], T_Result],
|
|
131
|
+
svg: typing.Callable[[], T_Result],
|
|
132
|
+
tiff: typing.Callable[[], T_Result],
|
|
133
|
+
webp: typing.Callable[[], T_Result],
|
|
118
134
|
html: typing.Callable[[], T_Result],
|
|
119
135
|
htm: typing.Callable[[], T_Result],
|
|
120
136
|
xls: typing.Callable[[], T_Result],
|
|
@@ -217,6 +233,22 @@ class LlamaParseSupportedFileExtensions(str, enum.Enum):
|
|
|
217
233
|
return sti()
|
|
218
234
|
if self is LlamaParseSupportedFileExtensions.EPUB:
|
|
219
235
|
return epub()
|
|
236
|
+
if self is LlamaParseSupportedFileExtensions.JPG:
|
|
237
|
+
return jpg()
|
|
238
|
+
if self is LlamaParseSupportedFileExtensions.JPEG:
|
|
239
|
+
return jpeg()
|
|
240
|
+
if self is LlamaParseSupportedFileExtensions.PNG:
|
|
241
|
+
return png()
|
|
242
|
+
if self is LlamaParseSupportedFileExtensions.GIF:
|
|
243
|
+
return gif()
|
|
244
|
+
if self is LlamaParseSupportedFileExtensions.BMP:
|
|
245
|
+
return bmp()
|
|
246
|
+
if self is LlamaParseSupportedFileExtensions.SVG:
|
|
247
|
+
return svg()
|
|
248
|
+
if self is LlamaParseSupportedFileExtensions.TIFF:
|
|
249
|
+
return tiff()
|
|
250
|
+
if self is LlamaParseSupportedFileExtensions.WEBP:
|
|
251
|
+
return webp()
|
|
220
252
|
if self is LlamaParseSupportedFileExtensions.HTML:
|
|
221
253
|
return html()
|
|
222
254
|
if self is LlamaParseSupportedFileExtensions.HTM:
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LlmParameters(pydantic.BaseModel):
|
|
18
|
+
"""
|
|
19
|
+
Base schema model for BaseComponent classes used in the platform.
|
|
20
|
+
Comes with special serialization logic for types used commonly in platform codebase.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
model_name: typing.Optional[str] = pydantic.Field(description="The name of the model to use for retrieval.")
|
|
24
|
+
system_prompt: typing.Optional[str] = pydantic.Field(description="The system prompt to use for the model.")
|
|
25
|
+
temperature: typing.Optional[float] = pydantic.Field(description="The temperature value for the model.")
|
|
26
|
+
class_name: typing.Optional[str]
|
|
27
|
+
|
|
28
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
29
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
30
|
+
return super().json(**kwargs_with_defaults)
|
|
31
|
+
|
|
32
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
33
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
34
|
+
return super().dict(**kwargs_with_defaults)
|
|
35
|
+
|
|
36
|
+
class Config:
|
|
37
|
+
frozen = True
|
|
38
|
+
smart_union = True
|
|
39
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -4,6 +4,7 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .ingestion_error_response import IngestionErrorResponse
|
|
7
8
|
from .managed_ingestion_status import ManagedIngestionStatus
|
|
8
9
|
|
|
9
10
|
try:
|
|
@@ -16,7 +17,12 @@ except ImportError:
|
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class ManagedIngestionStatusResponse(pydantic.BaseModel):
|
|
20
|
+
job_id: typing.Optional[str] = pydantic.Field(description="ID of the latest job.")
|
|
21
|
+
deployment_date: typing.Optional[dt.datetime] = pydantic.Field(description="Date of the deployment.")
|
|
19
22
|
status: ManagedIngestionStatus = pydantic.Field(description="Status of the ingestion.")
|
|
23
|
+
error: typing.Optional[typing.List[IngestionErrorResponse]] = pydantic.Field(
|
|
24
|
+
description="List of errors that occurred during ingestion."
|
|
25
|
+
)
|
|
20
26
|
|
|
21
27
|
def json(self, **kwargs: typing.Any) -> str:
|
|
22
28
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class NoneChunkingConfig(pydantic.BaseModel):
|
|
18
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
19
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
20
|
+
return super().json(**kwargs_with_defaults)
|
|
21
|
+
|
|
22
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
23
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
24
|
+
return super().dict(**kwargs_with_defaults)
|
|
25
|
+
|
|
26
|
+
class Config:
|
|
27
|
+
frozen = True
|
|
28
|
+
smart_union = True
|
|
29
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class NoneSegmentationConfig(pydantic.BaseModel):
|
|
18
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
19
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
20
|
+
return super().json(**kwargs_with_defaults)
|
|
21
|
+
|
|
22
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
23
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
24
|
+
return super().dict(**kwargs_with_defaults)
|
|
25
|
+
|
|
26
|
+
class Config:
|
|
27
|
+
frozen = True
|
|
28
|
+
smart_union = True
|
|
29
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PageSegmentationConfig(pydantic.BaseModel):
|
|
18
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
19
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
20
|
+
return super().json(**kwargs_with_defaults)
|
|
21
|
+
|
|
22
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
23
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
24
|
+
return super().dict(**kwargs_with_defaults)
|
|
25
|
+
|
|
26
|
+
class Config:
|
|
27
|
+
frozen = True
|
|
28
|
+
smart_union = True
|
|
29
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -14,15 +14,9 @@ except ImportError:
|
|
|
14
14
|
import pydantic # type: ignore
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
class
|
|
17
|
+
class PageSplitterNodeParser(pydantic.BaseModel):
|
|
18
18
|
"""
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
Splits a document into Nodes using custom JSON splitting logic.
|
|
22
|
-
|
|
23
|
-
Args:
|
|
24
|
-
include_metadata (bool): whether to include metadata in nodes
|
|
25
|
-
include_prev_next_rel (bool): whether to include prev/next relationships
|
|
19
|
+
Split text into pages.
|
|
26
20
|
"""
|
|
27
21
|
|
|
28
22
|
include_metadata: typing.Optional[bool] = pydantic.Field(
|
|
@@ -30,6 +24,7 @@ class JsonNodeParser(pydantic.BaseModel):
|
|
|
30
24
|
)
|
|
31
25
|
include_prev_next_rel: typing.Optional[bool] = pydantic.Field(description="Include prev/next node relationships.")
|
|
32
26
|
callback_manager: typing.Optional[typing.Dict[str, typing.Any]]
|
|
27
|
+
page_separator: typing.Optional[str] = pydantic.Field(description="Separator to split text into pages.")
|
|
33
28
|
class_name: typing.Optional[str]
|
|
34
29
|
|
|
35
30
|
def json(self, **kwargs: typing.Any) -> str:
|
llama_cloud/types/parsing_job.py
CHANGED
|
@@ -18,6 +18,8 @@ except ImportError:
|
|
|
18
18
|
class ParsingJob(pydantic.BaseModel):
|
|
19
19
|
id: str
|
|
20
20
|
status: StatusEnum
|
|
21
|
+
error_code: typing.Optional[str]
|
|
22
|
+
error_message: typing.Optional[str]
|
|
21
23
|
|
|
22
24
|
def json(self, **kwargs: typing.Any) -> str:
|
|
23
25
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|