llama-cloud 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (57) hide show
  1. llama_cloud/__init__.py +82 -6
  2. llama_cloud/client.py +3 -0
  3. llama_cloud/resources/__init__.py +13 -2
  4. llama_cloud/resources/auth/__init__.py +2 -0
  5. llama_cloud/resources/auth/client.py +124 -0
  6. llama_cloud/resources/data_sinks/types/data_sink_update_component_one.py +4 -0
  7. llama_cloud/resources/extraction/__init__.py +2 -2
  8. llama_cloud/resources/extraction/client.py +139 -48
  9. llama_cloud/resources/extraction/types/__init__.py +2 -1
  10. llama_cloud/resources/extraction/types/extraction_schema_create_data_schema_value.py +7 -0
  11. llama_cloud/resources/pipelines/__init__.py +12 -2
  12. llama_cloud/resources/pipelines/client.py +58 -2
  13. llama_cloud/resources/pipelines/types/__init__.py +11 -1
  14. llama_cloud/resources/pipelines/types/pipeline_update_transform_config.py +31 -0
  15. llama_cloud/types/__init__.py +78 -6
  16. llama_cloud/types/advanced_mode_transform_config.py +38 -0
  17. llama_cloud/types/advanced_mode_transform_config_chunking_config.py +67 -0
  18. llama_cloud/types/advanced_mode_transform_config_segmentation_config.py +45 -0
  19. llama_cloud/types/auto_transform_config.py +32 -0
  20. llama_cloud/types/character_chunking_config.py +32 -0
  21. llama_cloud/types/{html_node_parser.py → character_splitter.py} +9 -9
  22. llama_cloud/types/chat_data.py +2 -0
  23. llama_cloud/types/cloud_az_storage_blob_data_source.py +11 -2
  24. llama_cloud/types/{simple_file_node_parser.py → cloud_milvus_vector_store.py} +7 -14
  25. llama_cloud/types/cloud_mongo_db_atlas_vector_search.py +51 -0
  26. llama_cloud/types/configurable_data_sink_names.py +8 -0
  27. llama_cloud/types/configurable_transformation_names.py +8 -12
  28. llama_cloud/types/configured_transformation_item_component_one.py +4 -6
  29. llama_cloud/types/custom_claims.py +61 -0
  30. llama_cloud/types/data_sink_component_one.py +4 -0
  31. llama_cloud/types/data_sink_create_component_one.py +4 -0
  32. llama_cloud/types/element_segmentation_config.py +29 -0
  33. llama_cloud/types/embedding_config.py +36 -0
  34. llama_cloud/types/embedding_config_component.py +7 -0
  35. llama_cloud/types/embedding_config_component_one.py +19 -0
  36. llama_cloud/types/embedding_config_type.py +41 -0
  37. llama_cloud/types/eval_dataset_job_record.py +1 -0
  38. llama_cloud/types/ingestion_error_response.py +34 -0
  39. llama_cloud/types/job_name_mapping.py +45 -0
  40. llama_cloud/types/llama_parse_supported_file_extensions.py +32 -0
  41. llama_cloud/types/llm_parameters.py +39 -0
  42. llama_cloud/types/managed_ingestion_status_response.py +6 -0
  43. llama_cloud/types/none_chunking_config.py +29 -0
  44. llama_cloud/types/none_segmentation_config.py +29 -0
  45. llama_cloud/types/page_segmentation_config.py +29 -0
  46. llama_cloud/types/{json_node_parser.py → page_splitter_node_parser.py} +3 -8
  47. llama_cloud/types/parsing_job.py +2 -0
  48. llama_cloud/types/pipeline_create.py +8 -0
  49. llama_cloud/types/pipeline_create_transform_config.py +31 -0
  50. llama_cloud/types/semantic_chunking_config.py +32 -0
  51. llama_cloud/types/sentence_chunking_config.py +34 -0
  52. llama_cloud/types/token_chunking_config.py +33 -0
  53. llama_cloud/types/user.py +35 -0
  54. {llama_cloud-0.0.10.dist-info → llama_cloud-0.0.12.dist-info}/METADATA +1 -1
  55. {llama_cloud-0.0.10.dist-info → llama_cloud-0.0.12.dist-info}/RECORD +57 -30
  56. {llama_cloud-0.0.10.dist-info → llama_cloud-0.0.12.dist-info}/LICENSE +0 -0
  57. {llama_cloud-0.0.10.dist-info → llama_cloud-0.0.12.dist-info}/WHEEL +0 -0
@@ -1,9 +1,27 @@
1
1
  # This file was auto-generated by Fern from our API Definition.
2
2
 
3
+ from .advanced_mode_transform_config import AdvancedModeTransformConfig
4
+ from .advanced_mode_transform_config_chunking_config import (
5
+ AdvancedModeTransformConfigChunkingConfig,
6
+ AdvancedModeTransformConfigChunkingConfig_Character,
7
+ AdvancedModeTransformConfigChunkingConfig_None,
8
+ AdvancedModeTransformConfigChunkingConfig_Semantic,
9
+ AdvancedModeTransformConfigChunkingConfig_Sentence,
10
+ AdvancedModeTransformConfigChunkingConfig_Token,
11
+ )
12
+ from .advanced_mode_transform_config_segmentation_config import (
13
+ AdvancedModeTransformConfigSegmentationConfig,
14
+ AdvancedModeTransformConfigSegmentationConfig_Element,
15
+ AdvancedModeTransformConfigSegmentationConfig_None,
16
+ AdvancedModeTransformConfigSegmentationConfig_Page,
17
+ )
18
+ from .auto_transform_config import AutoTransformConfig
3
19
  from .azure_open_ai_embedding import AzureOpenAiEmbedding
4
20
  from .base import Base
5
21
  from .base_prompt_template import BasePromptTemplate
6
22
  from .bedrock_embedding import BedrockEmbedding
23
+ from .character_chunking_config import CharacterChunkingConfig
24
+ from .character_splitter import CharacterSplitter
7
25
  from .chat_data import ChatData
8
26
  from .chat_message import ChatMessage
9
27
  from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
@@ -13,6 +31,8 @@ from .cloud_confluence_data_source import CloudConfluenceDataSource
13
31
  from .cloud_document import CloudDocument
14
32
  from .cloud_document_create import CloudDocumentCreate
15
33
  from .cloud_jira_data_source import CloudJiraDataSource
34
+ from .cloud_milvus_vector_store import CloudMilvusVectorStore
35
+ from .cloud_mongo_db_atlas_vector_search import CloudMongoDbAtlasVectorSearch
16
36
  from .cloud_notion_page_data_source import CloudNotionPageDataSource
17
37
  from .cloud_one_drive_data_source import CloudOneDriveDataSource
18
38
  from .cloud_pinecone_vector_store import CloudPineconeVectorStore
@@ -31,6 +51,7 @@ from .configurable_transformation_names import ConfigurableTransformationNames
31
51
  from .configured_transformation_item import ConfiguredTransformationItem
32
52
  from .configured_transformation_item_component import ConfiguredTransformationItemComponent
33
53
  from .configured_transformation_item_component_one import ConfiguredTransformationItemComponentOne
54
+ from .custom_claims import CustomClaims
34
55
  from .data_sink import DataSink
35
56
  from .data_sink_component import DataSinkComponent
36
57
  from .data_sink_component_one import DataSinkComponentOne
@@ -47,6 +68,11 @@ from .data_source_create_component_one import DataSourceCreateComponentOne
47
68
  from .data_source_create_custom_metadata_value import DataSourceCreateCustomMetadataValue
48
69
  from .data_source_custom_metadata_value import DataSourceCustomMetadataValue
49
70
  from .data_source_definition import DataSourceDefinition
71
+ from .element_segmentation_config import ElementSegmentationConfig
72
+ from .embedding_config import EmbeddingConfig
73
+ from .embedding_config_component import EmbeddingConfigComponent
74
+ from .embedding_config_component_one import EmbeddingConfigComponentOne
75
+ from .embedding_config_type import EmbeddingConfigType
50
76
  from .eval_dataset import EvalDataset
51
77
  from .eval_dataset_job_params import EvalDatasetJobParams
52
78
  from .eval_dataset_job_record import EvalDatasetJobRecord
@@ -66,14 +92,15 @@ from .file_resource_info_value import FileResourceInfoValue
66
92
  from .filter_condition import FilterCondition
67
93
  from .filter_operator import FilterOperator
68
94
  from .gemini_embedding import GeminiEmbedding
69
- from .html_node_parser import HtmlNodeParser
70
95
  from .http_validation_error import HttpValidationError
71
96
  from .hugging_face_inference_api_embedding import HuggingFaceInferenceApiEmbedding
72
97
  from .hugging_face_inference_api_embedding_token import HuggingFaceInferenceApiEmbeddingToken
73
- from .json_node_parser import JsonNodeParser
98
+ from .ingestion_error_response import IngestionErrorResponse
99
+ from .job_name_mapping import JobNameMapping
74
100
  from .llama_parse_parameters import LlamaParseParameters
75
101
  from .llama_parse_supported_file_extensions import LlamaParseSupportedFileExtensions
76
102
  from .llm import Llm
103
+ from .llm_parameters import LlmParameters
77
104
  from .local_eval import LocalEval
78
105
  from .local_eval_results import LocalEvalResults
79
106
  from .local_eval_sets import LocalEvalSets
@@ -88,10 +115,14 @@ from .metadata_filters import MetadataFilters
88
115
  from .metadata_filters_filters_item import MetadataFiltersFiltersItem
89
116
  from .metric_result import MetricResult
90
117
  from .node_parser import NodeParser
118
+ from .none_chunking_config import NoneChunkingConfig
119
+ from .none_segmentation_config import NoneSegmentationConfig
91
120
  from .object_type import ObjectType
92
121
  from .open_ai_embedding import OpenAiEmbedding
93
122
  from .organization import Organization
94
123
  from .organization_create import OrganizationCreate
124
+ from .page_segmentation_config import PageSegmentationConfig
125
+ from .page_splitter_node_parser import PageSplitterNodeParser
95
126
  from .parser_languages import ParserLanguages
96
127
  from .parsing_history_item import ParsingHistoryItem
97
128
  from .parsing_job import ParsingJob
@@ -101,6 +132,11 @@ from .parsing_job_text_result import ParsingJobTextResult
101
132
  from .parsing_usage import ParsingUsage
102
133
  from .pipeline import Pipeline
103
134
  from .pipeline_create import PipelineCreate
135
+ from .pipeline_create_transform_config import (
136
+ PipelineCreateTransformConfig,
137
+ PipelineCreateTransformConfig_Advanced,
138
+ PipelineCreateTransformConfig_Auto,
139
+ )
104
140
  from .pipeline_data_source import PipelineDataSource
105
141
  from .pipeline_data_source_component import PipelineDataSourceComponent
106
142
  from .pipeline_data_source_component_one import PipelineDataSourceComponentOne
@@ -124,16 +160,19 @@ from .pydantic_program_mode import PydanticProgramMode
124
160
  from .related_node_info import RelatedNodeInfo
125
161
  from .retrieval_mode import RetrievalMode
126
162
  from .retrieve_results import RetrieveResults
163
+ from .semantic_chunking_config import SemanticChunkingConfig
164
+ from .sentence_chunking_config import SentenceChunkingConfig
127
165
  from .sentence_splitter import SentenceSplitter
128
- from .simple_file_node_parser import SimpleFileNodeParser
129
166
  from .status_enum import StatusEnum
130
167
  from .supported_eval_llm_model import SupportedEvalLlmModel
131
168
  from .supported_eval_llm_model_names import SupportedEvalLlmModelNames
132
169
  from .text_node import TextNode
133
170
  from .text_node_relationships_value import TextNodeRelationshipsValue
134
171
  from .text_node_with_score import TextNodeWithScore
172
+ from .token_chunking_config import TokenChunkingConfig
135
173
  from .token_text_splitter import TokenTextSplitter
136
174
  from .transformation_category_names import TransformationCategoryNames
175
+ from .user import User
137
176
  from .user_organization import UserOrganization
138
177
  from .user_organization_create import UserOrganizationCreate
139
178
  from .user_organization_delete import UserOrganizationDelete
@@ -141,10 +180,24 @@ from .validation_error import ValidationError
141
180
  from .validation_error_loc_item import ValidationErrorLocItem
142
181
 
143
182
  __all__ = [
183
+ "AdvancedModeTransformConfig",
184
+ "AdvancedModeTransformConfigChunkingConfig",
185
+ "AdvancedModeTransformConfigChunkingConfig_Character",
186
+ "AdvancedModeTransformConfigChunkingConfig_None",
187
+ "AdvancedModeTransformConfigChunkingConfig_Semantic",
188
+ "AdvancedModeTransformConfigChunkingConfig_Sentence",
189
+ "AdvancedModeTransformConfigChunkingConfig_Token",
190
+ "AdvancedModeTransformConfigSegmentationConfig",
191
+ "AdvancedModeTransformConfigSegmentationConfig_Element",
192
+ "AdvancedModeTransformConfigSegmentationConfig_None",
193
+ "AdvancedModeTransformConfigSegmentationConfig_Page",
194
+ "AutoTransformConfig",
144
195
  "AzureOpenAiEmbedding",
145
196
  "Base",
146
197
  "BasePromptTemplate",
147
198
  "BedrockEmbedding",
199
+ "CharacterChunkingConfig",
200
+ "CharacterSplitter",
148
201
  "ChatData",
149
202
  "ChatMessage",
150
203
  "CloudAzStorageBlobDataSource",
@@ -154,6 +207,8 @@ __all__ = [
154
207
  "CloudDocument",
155
208
  "CloudDocumentCreate",
156
209
  "CloudJiraDataSource",
210
+ "CloudMilvusVectorStore",
211
+ "CloudMongoDbAtlasVectorSearch",
157
212
  "CloudNotionPageDataSource",
158
213
  "CloudOneDriveDataSource",
159
214
  "CloudPineconeVectorStore",
@@ -172,6 +227,7 @@ __all__ = [
172
227
  "ConfiguredTransformationItem",
173
228
  "ConfiguredTransformationItemComponent",
174
229
  "ConfiguredTransformationItemComponentOne",
230
+ "CustomClaims",
175
231
  "DataSink",
176
232
  "DataSinkComponent",
177
233
  "DataSinkComponentOne",
@@ -188,6 +244,11 @@ __all__ = [
188
244
  "DataSourceCreateCustomMetadataValue",
189
245
  "DataSourceCustomMetadataValue",
190
246
  "DataSourceDefinition",
247
+ "ElementSegmentationConfig",
248
+ "EmbeddingConfig",
249
+ "EmbeddingConfigComponent",
250
+ "EmbeddingConfigComponentOne",
251
+ "EmbeddingConfigType",
191
252
  "EvalDataset",
192
253
  "EvalDatasetJobParams",
193
254
  "EvalDatasetJobRecord",
@@ -207,14 +268,15 @@ __all__ = [
207
268
  "FilterCondition",
208
269
  "FilterOperator",
209
270
  "GeminiEmbedding",
210
- "HtmlNodeParser",
211
271
  "HttpValidationError",
212
272
  "HuggingFaceInferenceApiEmbedding",
213
273
  "HuggingFaceInferenceApiEmbeddingToken",
214
- "JsonNodeParser",
274
+ "IngestionErrorResponse",
275
+ "JobNameMapping",
215
276
  "LlamaParseParameters",
216
277
  "LlamaParseSupportedFileExtensions",
217
278
  "Llm",
279
+ "LlmParameters",
218
280
  "LocalEval",
219
281
  "LocalEvalResults",
220
282
  "LocalEvalSets",
@@ -229,10 +291,14 @@ __all__ = [
229
291
  "MetadataFiltersFiltersItem",
230
292
  "MetricResult",
231
293
  "NodeParser",
294
+ "NoneChunkingConfig",
295
+ "NoneSegmentationConfig",
232
296
  "ObjectType",
233
297
  "OpenAiEmbedding",
234
298
  "Organization",
235
299
  "OrganizationCreate",
300
+ "PageSegmentationConfig",
301
+ "PageSplitterNodeParser",
236
302
  "ParserLanguages",
237
303
  "ParsingHistoryItem",
238
304
  "ParsingJob",
@@ -242,6 +308,9 @@ __all__ = [
242
308
  "ParsingUsage",
243
309
  "Pipeline",
244
310
  "PipelineCreate",
311
+ "PipelineCreateTransformConfig",
312
+ "PipelineCreateTransformConfig_Advanced",
313
+ "PipelineCreateTransformConfig_Auto",
245
314
  "PipelineDataSource",
246
315
  "PipelineDataSourceComponent",
247
316
  "PipelineDataSourceComponentOne",
@@ -265,16 +334,19 @@ __all__ = [
265
334
  "RelatedNodeInfo",
266
335
  "RetrievalMode",
267
336
  "RetrieveResults",
337
+ "SemanticChunkingConfig",
338
+ "SentenceChunkingConfig",
268
339
  "SentenceSplitter",
269
- "SimpleFileNodeParser",
270
340
  "StatusEnum",
271
341
  "SupportedEvalLlmModel",
272
342
  "SupportedEvalLlmModelNames",
273
343
  "TextNode",
274
344
  "TextNodeRelationshipsValue",
275
345
  "TextNodeWithScore",
346
+ "TokenChunkingConfig",
276
347
  "TokenTextSplitter",
277
348
  "TransformationCategoryNames",
349
+ "User",
278
350
  "UserOrganization",
279
351
  "UserOrganizationCreate",
280
352
  "UserOrganizationDelete",
@@ -0,0 +1,38 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .advanced_mode_transform_config_chunking_config import AdvancedModeTransformConfigChunkingConfig
8
+ from .advanced_mode_transform_config_segmentation_config import AdvancedModeTransformConfigSegmentationConfig
9
+
10
+ try:
11
+ import pydantic
12
+ if pydantic.__version__.startswith("1."):
13
+ raise ImportError
14
+ import pydantic.v1 as pydantic # type: ignore
15
+ except ImportError:
16
+ import pydantic # type: ignore
17
+
18
+
19
+ class AdvancedModeTransformConfig(pydantic.BaseModel):
20
+ segmentation_config: typing.Optional[AdvancedModeTransformConfigSegmentationConfig] = pydantic.Field(
21
+ description="Configuration for the segmentation."
22
+ )
23
+ chunking_config: typing.Optional[AdvancedModeTransformConfigChunkingConfig] = pydantic.Field(
24
+ description="Configuration for the chunking."
25
+ )
26
+
27
+ def json(self, **kwargs: typing.Any) -> str:
28
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
29
+ return super().json(**kwargs_with_defaults)
30
+
31
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
32
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
33
+ return super().dict(**kwargs_with_defaults)
34
+
35
+ class Config:
36
+ frozen = True
37
+ smart_union = True
38
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -0,0 +1,67 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from __future__ import annotations
4
+
5
+ import typing
6
+
7
+ import typing_extensions
8
+
9
+ from .character_chunking_config import CharacterChunkingConfig
10
+ from .none_chunking_config import NoneChunkingConfig
11
+ from .semantic_chunking_config import SemanticChunkingConfig
12
+ from .sentence_chunking_config import SentenceChunkingConfig
13
+ from .token_chunking_config import TokenChunkingConfig
14
+
15
+
16
+ class AdvancedModeTransformConfigChunkingConfig_None(NoneChunkingConfig):
17
+ mode: typing_extensions.Literal["none"]
18
+
19
+ class Config:
20
+ frozen = True
21
+ smart_union = True
22
+ allow_population_by_field_name = True
23
+
24
+
25
+ class AdvancedModeTransformConfigChunkingConfig_Character(CharacterChunkingConfig):
26
+ mode: typing_extensions.Literal["character"]
27
+
28
+ class Config:
29
+ frozen = True
30
+ smart_union = True
31
+ allow_population_by_field_name = True
32
+
33
+
34
+ class AdvancedModeTransformConfigChunkingConfig_Token(TokenChunkingConfig):
35
+ mode: typing_extensions.Literal["token"]
36
+
37
+ class Config:
38
+ frozen = True
39
+ smart_union = True
40
+ allow_population_by_field_name = True
41
+
42
+
43
+ class AdvancedModeTransformConfigChunkingConfig_Sentence(SentenceChunkingConfig):
44
+ mode: typing_extensions.Literal["sentence"]
45
+
46
+ class Config:
47
+ frozen = True
48
+ smart_union = True
49
+ allow_population_by_field_name = True
50
+
51
+
52
+ class AdvancedModeTransformConfigChunkingConfig_Semantic(SemanticChunkingConfig):
53
+ mode: typing_extensions.Literal["semantic"]
54
+
55
+ class Config:
56
+ frozen = True
57
+ smart_union = True
58
+ allow_population_by_field_name = True
59
+
60
+
61
+ AdvancedModeTransformConfigChunkingConfig = typing.Union[
62
+ AdvancedModeTransformConfigChunkingConfig_None,
63
+ AdvancedModeTransformConfigChunkingConfig_Character,
64
+ AdvancedModeTransformConfigChunkingConfig_Token,
65
+ AdvancedModeTransformConfigChunkingConfig_Sentence,
66
+ AdvancedModeTransformConfigChunkingConfig_Semantic,
67
+ ]
@@ -0,0 +1,45 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ from __future__ import annotations
4
+
5
+ import typing
6
+
7
+ import typing_extensions
8
+
9
+ from .element_segmentation_config import ElementSegmentationConfig
10
+ from .none_segmentation_config import NoneSegmentationConfig
11
+ from .page_segmentation_config import PageSegmentationConfig
12
+
13
+
14
+ class AdvancedModeTransformConfigSegmentationConfig_None(NoneSegmentationConfig):
15
+ mode: typing_extensions.Literal["none"]
16
+
17
+ class Config:
18
+ frozen = True
19
+ smart_union = True
20
+ allow_population_by_field_name = True
21
+
22
+
23
+ class AdvancedModeTransformConfigSegmentationConfig_Page(PageSegmentationConfig):
24
+ mode: typing_extensions.Literal["page"]
25
+
26
+ class Config:
27
+ frozen = True
28
+ smart_union = True
29
+ allow_population_by_field_name = True
30
+
31
+
32
+ class AdvancedModeTransformConfigSegmentationConfig_Element(ElementSegmentationConfig):
33
+ mode: typing_extensions.Literal["element"]
34
+
35
+ class Config:
36
+ frozen = True
37
+ smart_union = True
38
+ allow_population_by_field_name = True
39
+
40
+
41
+ AdvancedModeTransformConfigSegmentationConfig = typing.Union[
42
+ AdvancedModeTransformConfigSegmentationConfig_None,
43
+ AdvancedModeTransformConfigSegmentationConfig_Page,
44
+ AdvancedModeTransformConfigSegmentationConfig_Element,
45
+ ]
@@ -0,0 +1,32 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+
8
+ try:
9
+ import pydantic
10
+ if pydantic.__version__.startswith("1."):
11
+ raise ImportError
12
+ import pydantic.v1 as pydantic # type: ignore
13
+ except ImportError:
14
+ import pydantic # type: ignore
15
+
16
+
17
+ class AutoTransformConfig(pydantic.BaseModel):
18
+ chunk_size: typing.Optional[int] = pydantic.Field(description="Chunk size for the transformation.")
19
+ chunk_overlap: typing.Optional[int] = pydantic.Field(description="Chunk overlap for the transformation.")
20
+
21
+ def json(self, **kwargs: typing.Any) -> str:
22
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
23
+ return super().json(**kwargs_with_defaults)
24
+
25
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
26
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
27
+ return super().dict(**kwargs_with_defaults)
28
+
29
+ class Config:
30
+ frozen = True
31
+ smart_union = True
32
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -0,0 +1,32 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+
8
+ try:
9
+ import pydantic
10
+ if pydantic.__version__.startswith("1."):
11
+ raise ImportError
12
+ import pydantic.v1 as pydantic # type: ignore
13
+ except ImportError:
14
+ import pydantic # type: ignore
15
+
16
+
17
+ class CharacterChunkingConfig(pydantic.BaseModel):
18
+ chunk_size: typing.Optional[int]
19
+ chunk_overlap: typing.Optional[int]
20
+
21
+ def json(self, **kwargs: typing.Any) -> str:
22
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
23
+ return super().json(**kwargs_with_defaults)
24
+
25
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
26
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
27
+ return super().dict(**kwargs_with_defaults)
28
+
29
+ class Config:
30
+ frozen = True
31
+ smart_union = True
32
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -14,15 +14,9 @@ except ImportError:
14
14
  import pydantic # type: ignore
15
15
 
16
16
 
17
- class HtmlNodeParser(pydantic.BaseModel):
17
+ class CharacterSplitter(pydantic.BaseModel):
18
18
  """
19
- HTML node parser.
20
-
21
- Splits a document into Nodes using custom HTML splitting logic.
22
-
23
- Args:
24
- include_metadata (bool): whether to include metadata in nodes
25
- include_prev_next_rel (bool): whether to include prev/next relationships
19
+ A splitter that splits text into characters.
26
20
  """
27
21
 
28
22
  include_metadata: typing.Optional[bool] = pydantic.Field(
@@ -30,7 +24,13 @@ class HtmlNodeParser(pydantic.BaseModel):
30
24
  )
31
25
  include_prev_next_rel: typing.Optional[bool] = pydantic.Field(description="Include prev/next node relationships.")
32
26
  callback_manager: typing.Optional[typing.Dict[str, typing.Any]]
33
- tags: typing.Optional[typing.List[str]] = pydantic.Field(description="HTML tags to extract text from.")
27
+ chunk_size: typing.Optional[int] = pydantic.Field(description="The token chunk size for each chunk.")
28
+ chunk_overlap: typing.Optional[int] = pydantic.Field(description="The token overlap of each chunk when splitting.")
29
+ separator: typing.Optional[str] = pydantic.Field(description="Default separator for splitting into words")
30
+ paragraph_separator: typing.Optional[str] = pydantic.Field(description="Separator between paragraphs.")
31
+ secondary_chunking_regex: typing.Optional[str] = pydantic.Field(
32
+ description="Backup regex for splitting into sentences."
33
+ )
34
34
  class_name: typing.Optional[str]
35
35
 
36
36
  def json(self, **kwargs: typing.Any) -> str:
@@ -4,6 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
+ from .llm_parameters import LlmParameters
7
8
  from .preset_retrieval_params import PresetRetrievalParams
8
9
 
9
10
  try:
@@ -22,6 +23,7 @@ class ChatData(pydantic.BaseModel):
22
23
  """
23
24
 
24
25
  retrieval_parameters: PresetRetrievalParams
26
+ llm_parameters: typing.Optional[LlmParameters]
25
27
  class_name: typing.Optional[str]
26
28
 
27
29
  def json(self, **kwargs: typing.Any) -> str:
@@ -25,8 +25,17 @@ class CloudAzStorageBlobDataSource(pydantic.BaseModel):
25
25
  prefix: typing.Optional[str] = pydantic.Field(
26
26
  description="The prefix of the Azure Storage Blob objects to read from."
27
27
  )
28
- account_name: str = pydantic.Field(description="The Azure Storage Blob account name to use for authentication.")
29
- account_key: str = pydantic.Field(description="The Azure Storage Blob account key to use for authentication.")
28
+ account_name: typing.Optional[str] = pydantic.Field(
29
+ description="The Azure Storage Blob account name to use for authentication."
30
+ )
31
+ account_key: typing.Optional[str] = pydantic.Field(
32
+ description="The Azure Storage Blob account key to use for authentication."
33
+ )
34
+ tenant_id: typing.Optional[str] = pydantic.Field(description="The Azure AD tenant ID to use for authentication.")
35
+ client_id: typing.Optional[str] = pydantic.Field(description="The Azure AD client ID to use for authentication.")
36
+ client_secret: typing.Optional[str] = pydantic.Field(
37
+ description="The Azure AD client secret to use for authentication."
38
+ )
30
39
  class_name: typing.Optional[str]
31
40
 
32
41
  def json(self, **kwargs: typing.Any) -> str:
@@ -14,23 +14,16 @@ except ImportError:
14
14
  import pydantic # type: ignore
15
15
 
16
16
 
17
- class SimpleFileNodeParser(pydantic.BaseModel):
17
+ class CloudMilvusVectorStore(pydantic.BaseModel):
18
18
  """
19
- Simple file node parser.
20
-
21
- Splits a document loaded from a file into Nodes using logic based on the file type
22
- automatically detects the NodeParser to use based on file type
23
-
24
- Args:
25
- include_metadata (bool): whether to include metadata in nodes
26
- include_prev_next_rel (bool): whether to include prev/next relationships
19
+ Cloud Milvus Vector Store.
27
20
  """
28
21
 
29
- include_metadata: typing.Optional[bool] = pydantic.Field(
30
- description="Whether or not to consider metadata when splitting."
31
- )
32
- include_prev_next_rel: typing.Optional[bool] = pydantic.Field(description="Include prev/next node relationships.")
33
- callback_manager: typing.Optional[typing.Dict[str, typing.Any]]
22
+ supports_nested_metadata_filters: typing.Optional[bool]
23
+ uri: str
24
+ collection_name: typing.Optional[str]
25
+ token: typing.Optional[str]
26
+ embedding_dimension: typing.Optional[int]
34
27
  class_name: typing.Optional[str]
35
28
 
36
29
  def json(self, **kwargs: typing.Any) -> str:
@@ -0,0 +1,51 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+
8
+ try:
9
+ import pydantic
10
+ if pydantic.__version__.startswith("1."):
11
+ raise ImportError
12
+ import pydantic.v1 as pydantic # type: ignore
13
+ except ImportError:
14
+ import pydantic # type: ignore
15
+
16
+
17
+ class CloudMongoDbAtlasVectorSearch(pydantic.BaseModel):
18
+ """
19
+ Cloud MongoDB Atlas Vector Store.
20
+
21
+ This class is used to store the configuration for a MongoDB Atlas vector store,
22
+ so that it can be created and used in LlamaCloud.
23
+
24
+ Args:
25
+ mongodb_uri (str): URI for connecting to MongoDB Atlas
26
+ db_name (str): name of the MongoDB database
27
+ collection_name (str): name of the MongoDB collection
28
+ vector_index_name (str): name of the MongoDB Atlas vector index
29
+ fulltext_index_name (str): name of the MongoDB Atlas full-text index
30
+ """
31
+
32
+ supports_nested_metadata_filters: typing.Optional[bool]
33
+ mongodb_uri: str
34
+ db_name: str
35
+ collection_name: str
36
+ vector_index_name: typing.Optional[str]
37
+ fulltext_index_name: typing.Optional[str]
38
+ class_name: typing.Optional[str]
39
+
40
+ def json(self, **kwargs: typing.Any) -> str:
41
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
42
+ return super().json(**kwargs_with_defaults)
43
+
44
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
45
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
46
+ return super().dict(**kwargs_with_defaults)
47
+
48
+ class Config:
49
+ frozen = True
50
+ smart_union = True
51
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -17,6 +17,8 @@ class ConfigurableDataSinkNames(str, enum.Enum):
17
17
  QDRANT = "QDRANT"
18
18
  WEAVIATE = "WEAVIATE"
19
19
  AZUREAI_SEARCH = "AZUREAI_SEARCH"
20
+ MONGODB_ATLAS = "MONGODB_ATLAS"
21
+ MILVUS = "MILVUS"
20
22
 
21
23
  def visit(
22
24
  self,
@@ -26,6 +28,8 @@ class ConfigurableDataSinkNames(str, enum.Enum):
26
28
  qdrant: typing.Callable[[], T_Result],
27
29
  weaviate: typing.Callable[[], T_Result],
28
30
  azureai_search: typing.Callable[[], T_Result],
31
+ mongodb_atlas: typing.Callable[[], T_Result],
32
+ milvus: typing.Callable[[], T_Result],
29
33
  ) -> T_Result:
30
34
  if self is ConfigurableDataSinkNames.CHROMA:
31
35
  return chroma()
@@ -39,3 +43,7 @@ class ConfigurableDataSinkNames(str, enum.Enum):
39
43
  return weaviate()
40
44
  if self is ConfigurableDataSinkNames.AZUREAI_SEARCH:
41
45
  return azureai_search()
46
+ if self is ConfigurableDataSinkNames.MONGODB_ATLAS:
47
+ return mongodb_atlas()
48
+ if self is ConfigurableDataSinkNames.MILVUS:
49
+ return milvus()