llama-cloud 0.1.35__py3-none-any.whl → 0.1.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

@@ -45,10 +45,16 @@ class LlamaExtractClient:
45
45
  self._client_wrapper = client_wrapper
46
46
 
47
47
  def list_extraction_agents(
48
- self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
48
+ self,
49
+ *,
50
+ include_default: typing.Optional[bool] = None,
51
+ project_id: typing.Optional[str] = None,
52
+ organization_id: typing.Optional[str] = None,
49
53
  ) -> typing.List[ExtractAgent]:
50
54
  """
51
55
  Parameters:
56
+ - include_default: typing.Optional[bool]. Whether to include default agents in the results
57
+
52
58
  - project_id: typing.Optional[str].
53
59
 
54
60
  - organization_id: typing.Optional[str].
@@ -63,7 +69,9 @@ class LlamaExtractClient:
63
69
  _response = self._client_wrapper.httpx_client.request(
64
70
  "GET",
65
71
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
66
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
72
+ params=remove_none_from_dict(
73
+ {"include_default": include_default, "project_id": project_id, "organization_id": organization_id}
74
+ ),
67
75
  headers=self._client_wrapper.get_headers(),
68
76
  timeout=60,
69
77
  )
@@ -265,6 +273,44 @@ class LlamaExtractClient:
265
273
  raise ApiError(status_code=_response.status_code, body=_response.text)
266
274
  raise ApiError(status_code=_response.status_code, body=_response_json)
267
275
 
276
+ def get_or_create_default_extraction_agent(
277
+ self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
278
+ ) -> ExtractAgent:
279
+ """
280
+ Get or create a default extraction agent for the current project.
281
+ The default agent has an empty schema and default configuration.
282
+
283
+ Parameters:
284
+ - project_id: typing.Optional[str].
285
+
286
+ - organization_id: typing.Optional[str].
287
+ ---
288
+ from llama_cloud.client import LlamaCloud
289
+
290
+ client = LlamaCloud(
291
+ token="YOUR_TOKEN",
292
+ )
293
+ client.llama_extract.get_or_create_default_extraction_agent()
294
+ """
295
+ _response = self._client_wrapper.httpx_client.request(
296
+ "GET",
297
+ urllib.parse.urljoin(
298
+ f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/default"
299
+ ),
300
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
301
+ headers=self._client_wrapper.get_headers(),
302
+ timeout=60,
303
+ )
304
+ if 200 <= _response.status_code < 300:
305
+ return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
306
+ if _response.status_code == 422:
307
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
308
+ try:
309
+ _response_json = _response.json()
310
+ except JSONDecodeError:
311
+ raise ApiError(status_code=_response.status_code, body=_response.text)
312
+ raise ApiError(status_code=_response.status_code, body=_response_json)
313
+
268
314
  def get_extraction_agent(self, extraction_agent_id: str) -> ExtractAgent:
269
315
  """
270
316
  Parameters:
@@ -992,10 +1038,16 @@ class AsyncLlamaExtractClient:
992
1038
  self._client_wrapper = client_wrapper
993
1039
 
994
1040
  async def list_extraction_agents(
995
- self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1041
+ self,
1042
+ *,
1043
+ include_default: typing.Optional[bool] = None,
1044
+ project_id: typing.Optional[str] = None,
1045
+ organization_id: typing.Optional[str] = None,
996
1046
  ) -> typing.List[ExtractAgent]:
997
1047
  """
998
1048
  Parameters:
1049
+ - include_default: typing.Optional[bool]. Whether to include default agents in the results
1050
+
999
1051
  - project_id: typing.Optional[str].
1000
1052
 
1001
1053
  - organization_id: typing.Optional[str].
@@ -1010,7 +1062,9 @@ class AsyncLlamaExtractClient:
1010
1062
  _response = await self._client_wrapper.httpx_client.request(
1011
1063
  "GET",
1012
1064
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
1013
- params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1065
+ params=remove_none_from_dict(
1066
+ {"include_default": include_default, "project_id": project_id, "organization_id": organization_id}
1067
+ ),
1014
1068
  headers=self._client_wrapper.get_headers(),
1015
1069
  timeout=60,
1016
1070
  )
@@ -1212,6 +1266,44 @@ class AsyncLlamaExtractClient:
1212
1266
  raise ApiError(status_code=_response.status_code, body=_response.text)
1213
1267
  raise ApiError(status_code=_response.status_code, body=_response_json)
1214
1268
 
1269
+ async def get_or_create_default_extraction_agent(
1270
+ self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
1271
+ ) -> ExtractAgent:
1272
+ """
1273
+ Get or create a default extraction agent for the current project.
1274
+ The default agent has an empty schema and default configuration.
1275
+
1276
+ Parameters:
1277
+ - project_id: typing.Optional[str].
1278
+
1279
+ - organization_id: typing.Optional[str].
1280
+ ---
1281
+ from llama_cloud.client import AsyncLlamaCloud
1282
+
1283
+ client = AsyncLlamaCloud(
1284
+ token="YOUR_TOKEN",
1285
+ )
1286
+ await client.llama_extract.get_or_create_default_extraction_agent()
1287
+ """
1288
+ _response = await self._client_wrapper.httpx_client.request(
1289
+ "GET",
1290
+ urllib.parse.urljoin(
1291
+ f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/default"
1292
+ ),
1293
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
1294
+ headers=self._client_wrapper.get_headers(),
1295
+ timeout=60,
1296
+ )
1297
+ if 200 <= _response.status_code < 300:
1298
+ return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
1299
+ if _response.status_code == 422:
1300
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1301
+ try:
1302
+ _response_json = _response.json()
1303
+ except JSONDecodeError:
1304
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1305
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1306
+
1215
1307
  async def get_extraction_agent(self, extraction_agent_id: str) -> ExtractAgent:
1216
1308
  """
1217
1309
  Parameters:
@@ -41,7 +41,12 @@ from .chat_app_response import ChatAppResponse
41
41
  from .chat_data import ChatData
42
42
  from .chunk_mode import ChunkMode
43
43
  from .classification_result import ClassificationResult
44
- from .classify_response import ClassifyResponse
44
+ from .classifier_rule import ClassifierRule
45
+ from .classify_job import ClassifyJob
46
+ from .classify_job_results import ClassifyJobResults
47
+ from .classify_job_with_status import ClassifyJobWithStatus
48
+ from .classify_parsing_configuration import ClassifyParsingConfiguration
49
+ from .cloud_astra_db_vector_store import CloudAstraDbVectorStore
45
50
  from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
46
51
  from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
47
52
  from .cloud_box_data_source import CloudBoxDataSource
@@ -139,13 +144,17 @@ from .extract_schema_validate_response_data_schema_value import ExtractSchemaVal
139
144
  from .extract_state import ExtractState
140
145
  from .extract_target import ExtractTarget
141
146
  from .fail_page_mode import FailPageMode
147
+ from .failure_handling_config import FailureHandlingConfig
142
148
  from .file import File
149
+ from .file_classification import FileClassification
143
150
  from .file_count_by_status_response import FileCountByStatusResponse
144
151
  from .file_data import FileData
145
152
  from .file_id_presigned_url import FileIdPresignedUrl
146
153
  from .file_parse_public import FileParsePublic
147
154
  from .file_permission_info_value import FilePermissionInfoValue
148
155
  from .file_resource_info_value import FileResourceInfoValue
156
+ from .file_store_info_response import FileStoreInfoResponse
157
+ from .file_store_info_response_status import FileStoreInfoResponseStatus
149
158
  from .filter_condition import FilterCondition
150
159
  from .filter_operation import FilterOperation
151
160
  from .filter_operation_eq import FilterOperationEq
@@ -418,7 +427,12 @@ __all__ = [
418
427
  "ChatData",
419
428
  "ChunkMode",
420
429
  "ClassificationResult",
421
- "ClassifyResponse",
430
+ "ClassifierRule",
431
+ "ClassifyJob",
432
+ "ClassifyJobResults",
433
+ "ClassifyJobWithStatus",
434
+ "ClassifyParsingConfiguration",
435
+ "CloudAstraDbVectorStore",
422
436
  "CloudAzStorageBlobDataSource",
423
437
  "CloudAzureAiSearchVectorStore",
424
438
  "CloudBoxDataSource",
@@ -512,13 +526,17 @@ __all__ = [
512
526
  "ExtractState",
513
527
  "ExtractTarget",
514
528
  "FailPageMode",
529
+ "FailureHandlingConfig",
515
530
  "File",
531
+ "FileClassification",
516
532
  "FileCountByStatusResponse",
517
533
  "FileData",
518
534
  "FileIdPresignedUrl",
519
535
  "FileParsePublic",
520
536
  "FilePermissionInfoValue",
521
537
  "FileResourceInfoValue",
538
+ "FileStoreInfoResponse",
539
+ "FileStoreInfoResponseStatus",
522
540
  "FilterCondition",
523
541
  "FilterOperation",
524
542
  "FilterOperationEq",
@@ -17,14 +17,13 @@ except ImportError:
17
17
  class ClassificationResult(pydantic.BaseModel):
18
18
  """
19
19
  Result of classifying a single file.
20
-
21
- Contains the classification outcome with confidence score and matched rule info.
22
20
  """
23
21
 
24
- file_id: str = pydantic.Field(description="The ID of the classified file")
25
- type: str = pydantic.Field(description="The assigned document type ('unknown' if no rules matched)")
22
+ reasoning: str = pydantic.Field(
23
+ description="Step-by-step explanation of why this classification was chosen and the confidence score assigned"
24
+ )
26
25
  confidence: float = pydantic.Field(description="Confidence score of the classification (0.0-1.0)")
27
- matched_rule: typing.Optional[str]
26
+ type: typing.Optional[str]
28
27
 
29
28
  def json(self, **kwargs: typing.Any) -> str:
30
29
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -0,0 +1,43 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+
8
+ try:
9
+ import pydantic
10
+ if pydantic.__version__.startswith("1."):
11
+ raise ImportError
12
+ import pydantic.v1 as pydantic # type: ignore
13
+ except ImportError:
14
+ import pydantic # type: ignore
15
+
16
+
17
+ class ClassifierRule(pydantic.BaseModel):
18
+ """
19
+ A rule for classifying documents - v0 simplified version.
20
+
21
+ This represents a single classification rule that will be applied to documents.
22
+ All rules are content-based and use natural language descriptions.
23
+ """
24
+
25
+ type: str = pydantic.Field(
26
+ description="The document type to assign when this rule matches (e.g., 'invoice', 'receipt', 'contract')"
27
+ )
28
+ description: str = pydantic.Field(
29
+ description="Natural language description of what to classify. Be specific about the content characteristics that identify this document type."
30
+ )
31
+
32
+ def json(self, **kwargs: typing.Any) -> str:
33
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
34
+ return super().json(**kwargs_with_defaults)
35
+
36
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
37
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
38
+ return super().dict(**kwargs_with_defaults)
39
+
40
+ class Config:
41
+ frozen = True
42
+ smart_union = True
43
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -0,0 +1,45 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .classifier_rule import ClassifierRule
8
+ from .classify_parsing_configuration import ClassifyParsingConfiguration
9
+
10
+ try:
11
+ import pydantic
12
+ if pydantic.__version__.startswith("1."):
13
+ raise ImportError
14
+ import pydantic.v1 as pydantic # type: ignore
15
+ except ImportError:
16
+ import pydantic # type: ignore
17
+
18
+
19
+ class ClassifyJob(pydantic.BaseModel):
20
+ """
21
+ A classify job.
22
+ """
23
+
24
+ id: str = pydantic.Field(description="Unique identifier")
25
+ created_at: typing.Optional[dt.datetime]
26
+ updated_at: typing.Optional[dt.datetime]
27
+ rules: typing.List[ClassifierRule] = pydantic.Field(description="The rules to classify the files")
28
+ user_id: str = pydantic.Field(description="The ID of the user")
29
+ project_id: str = pydantic.Field(description="The ID of the project")
30
+ parsing_configuration: typing.Optional[ClassifyParsingConfiguration] = pydantic.Field(
31
+ description="The configuration for the parsing job"
32
+ )
33
+
34
+ def json(self, **kwargs: typing.Any) -> str:
35
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
36
+ return super().json(**kwargs_with_defaults)
37
+
38
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
39
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
40
+ return super().dict(**kwargs_with_defaults)
41
+
42
+ class Config:
43
+ frozen = True
44
+ smart_union = True
45
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -4,7 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
- from .classification_result import ClassificationResult
7
+ from .file_classification import FileClassification
8
8
 
9
9
  try:
10
10
  import pydantic
@@ -15,17 +15,14 @@ except ImportError:
15
15
  import pydantic # type: ignore
16
16
 
17
17
 
18
- class ClassifyResponse(pydantic.BaseModel):
18
+ class ClassifyJobResults(pydantic.BaseModel):
19
19
  """
20
20
  Response model for the classify endpoint following AIP-132 pagination standard.
21
-
22
- Contains classification results with pagination support and summary statistics.
23
21
  """
24
22
 
25
- items: typing.List[ClassificationResult] = pydantic.Field(description="The list of items.")
23
+ items: typing.List[FileClassification] = pydantic.Field(description="The list of items.")
26
24
  next_page_token: typing.Optional[str]
27
25
  total_size: typing.Optional[int]
28
- unknown_count: int = pydantic.Field(description="Number of files that couldn't be classified")
29
26
 
30
27
  def json(self, **kwargs: typing.Any) -> str:
31
28
  kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
@@ -0,0 +1,47 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .classifier_rule import ClassifierRule
8
+ from .classify_parsing_configuration import ClassifyParsingConfiguration
9
+ from .status_enum import StatusEnum
10
+
11
+ try:
12
+ import pydantic
13
+ if pydantic.__version__.startswith("1."):
14
+ raise ImportError
15
+ import pydantic.v1 as pydantic # type: ignore
16
+ except ImportError:
17
+ import pydantic # type: ignore
18
+
19
+
20
+ class ClassifyJobWithStatus(pydantic.BaseModel):
21
+ """
22
+ A classify job with status.
23
+ """
24
+
25
+ id: str = pydantic.Field(description="Unique identifier")
26
+ created_at: typing.Optional[dt.datetime]
27
+ updated_at: typing.Optional[dt.datetime]
28
+ rules: typing.List[ClassifierRule] = pydantic.Field(description="The rules to classify the files")
29
+ user_id: str = pydantic.Field(description="The ID of the user")
30
+ project_id: str = pydantic.Field(description="The ID of the project")
31
+ parsing_configuration: typing.Optional[ClassifyParsingConfiguration] = pydantic.Field(
32
+ description="The configuration for the parsing job"
33
+ )
34
+ status: StatusEnum = pydantic.Field(description="The status of the classify job")
35
+
36
+ def json(self, **kwargs: typing.Any) -> str:
37
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
38
+ return super().json(**kwargs_with_defaults)
39
+
40
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
41
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
42
+ return super().dict(**kwargs_with_defaults)
43
+
44
+ class Config:
45
+ frozen = True
46
+ smart_union = True
47
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -0,0 +1,38 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .parser_languages import ParserLanguages
8
+
9
+ try:
10
+ import pydantic
11
+ if pydantic.__version__.startswith("1."):
12
+ raise ImportError
13
+ import pydantic.v1 as pydantic # type: ignore
14
+ except ImportError:
15
+ import pydantic # type: ignore
16
+
17
+
18
+ class ClassifyParsingConfiguration(pydantic.BaseModel):
19
+ """
20
+ Parsing configuration for a classify job.
21
+ """
22
+
23
+ lang: typing.Optional[ParserLanguages] = pydantic.Field(description="The language to parse the files in")
24
+ max_pages: typing.Optional[int]
25
+ target_pages: typing.Optional[typing.List[int]]
26
+
27
+ def json(self, **kwargs: typing.Any) -> str:
28
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
29
+ return super().json(**kwargs_with_defaults)
30
+
31
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
32
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
33
+ return super().dict(**kwargs_with_defaults)
34
+
35
+ class Config:
36
+ frozen = True
37
+ smart_union = True
38
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -0,0 +1,51 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+
8
+ try:
9
+ import pydantic
10
+ if pydantic.__version__.startswith("1."):
11
+ raise ImportError
12
+ import pydantic.v1 as pydantic # type: ignore
13
+ except ImportError:
14
+ import pydantic # type: ignore
15
+
16
+
17
+ class CloudAstraDbVectorStore(pydantic.BaseModel):
18
+ """
19
+ Cloud AstraDB Vector Store.
20
+
21
+ This class is used to store the configuration for an AstraDB vector store, so that it can be
22
+ created and used in LlamaCloud.
23
+
24
+ Args:
25
+ token (str): The Astra DB Application Token to use.
26
+ api_endpoint (str): The Astra DB JSON API endpoint for your database.
27
+ collection_name (str): Collection name to use. If not existing, it will be created.
28
+ embedding_dimension (int): Length of the embedding vectors in use.
29
+ keyspace (optional[str]): The keyspace to use. If not provided, 'default_keyspace'
30
+ """
31
+
32
+ supports_nested_metadata_filters: typing.Optional[bool]
33
+ token: str = pydantic.Field(description="The Astra DB Application Token to use")
34
+ api_endpoint: str = pydantic.Field(description="The Astra DB JSON API endpoint for your database")
35
+ collection_name: str = pydantic.Field(description="Collection name to use. If not existing, it will be created")
36
+ embedding_dimension: int = pydantic.Field(description="Length of the embedding vectors in use")
37
+ keyspace: typing.Optional[str]
38
+ class_name: typing.Optional[str]
39
+
40
+ def json(self, **kwargs: typing.Any) -> str:
41
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
42
+ return super().json(**kwargs_with_defaults)
43
+
44
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
45
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
46
+ return super().dict(**kwargs_with_defaults)
47
+
48
+ class Config:
49
+ frozen = True
50
+ smart_union = True
51
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -4,6 +4,7 @@ import datetime as dt
4
4
  import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
+ from .failure_handling_config import FailureHandlingConfig
7
8
 
8
9
  try:
9
10
  import pydantic
@@ -28,6 +29,20 @@ class CloudConfluenceDataSource(pydantic.BaseModel):
28
29
  label: typing.Optional[str]
29
30
  index_restricted_pages: typing.Optional[bool] = pydantic.Field(description="Whether to index restricted pages.")
30
31
  keep_markdown_format: typing.Optional[bool] = pydantic.Field(description="Whether to keep the markdown format.")
32
+ failure_handling: typing.Optional[FailureHandlingConfig] = pydantic.Field(
33
+ description=(
34
+ "Configuration for handling failures during processing. Key-value object controlling failure handling behaviors.\n"
35
+ "\n"
36
+ "Example:\n"
37
+ "{\n"
38
+ '"skip_list_failures": true\n'
39
+ "}\n"
40
+ "\n"
41
+ "Currently supports:\n"
42
+ "\n"
43
+ "- skip_list_failures: Skip failed batches/lists and continue processing\n"
44
+ )
45
+ )
31
46
  class_name: typing.Optional[str]
32
47
 
33
48
  def json(self, **kwargs: typing.Any) -> str:
@@ -13,6 +13,7 @@ class ConfigurableDataSinkNames(str, enum.Enum):
13
13
  AZUREAI_SEARCH = "AZUREAI_SEARCH"
14
14
  MONGODB_ATLAS = "MONGODB_ATLAS"
15
15
  MILVUS = "MILVUS"
16
+ ASTRA_DB = "ASTRA_DB"
16
17
 
17
18
  def visit(
18
19
  self,
@@ -22,6 +23,7 @@ class ConfigurableDataSinkNames(str, enum.Enum):
22
23
  azureai_search: typing.Callable[[], T_Result],
23
24
  mongodb_atlas: typing.Callable[[], T_Result],
24
25
  milvus: typing.Callable[[], T_Result],
26
+ astra_db: typing.Callable[[], T_Result],
25
27
  ) -> T_Result:
26
28
  if self is ConfigurableDataSinkNames.PINECONE:
27
29
  return pinecone()
@@ -35,3 +37,5 @@ class ConfigurableDataSinkNames(str, enum.Enum):
35
37
  return mongodb_atlas()
36
38
  if self is ConfigurableDataSinkNames.MILVUS:
37
39
  return milvus()
40
+ if self is ConfigurableDataSinkNames.ASTRA_DB:
41
+ return astra_db()
@@ -2,6 +2,7 @@
2
2
 
3
3
  import typing
4
4
 
5
+ from .cloud_astra_db_vector_store import CloudAstraDbVectorStore
5
6
  from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
6
7
  from .cloud_milvus_vector_store import CloudMilvusVectorStore
7
8
  from .cloud_mongo_db_atlas_vector_search import CloudMongoDbAtlasVectorSearch
@@ -17,4 +18,5 @@ DataSinkComponent = typing.Union[
17
18
  CloudAzureAiSearchVectorStore,
18
19
  CloudMongoDbAtlasVectorSearch,
19
20
  CloudMilvusVectorStore,
21
+ CloudAstraDbVectorStore,
20
22
  ]
@@ -2,6 +2,7 @@
2
2
 
3
3
  import typing
4
4
 
5
+ from .cloud_astra_db_vector_store import CloudAstraDbVectorStore
5
6
  from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
6
7
  from .cloud_milvus_vector_store import CloudMilvusVectorStore
7
8
  from .cloud_mongo_db_atlas_vector_search import CloudMongoDbAtlasVectorSearch
@@ -17,4 +18,5 @@ DataSinkCreateComponent = typing.Union[
17
18
  CloudAzureAiSearchVectorStore,
18
19
  CloudMongoDbAtlasVectorSearch,
19
20
  CloudMilvusVectorStore,
21
+ CloudAstraDbVectorStore,
20
22
  ]
@@ -0,0 +1,37 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+
8
+ try:
9
+ import pydantic
10
+ if pydantic.__version__.startswith("1."):
11
+ raise ImportError
12
+ import pydantic.v1 as pydantic # type: ignore
13
+ except ImportError:
14
+ import pydantic # type: ignore
15
+
16
+
17
+ class FailureHandlingConfig(pydantic.BaseModel):
18
+ """
19
+ Configuration for handling different types of failures during data source processing.
20
+ """
21
+
22
+ skip_list_failures: typing.Optional[bool] = pydantic.Field(
23
+ description="Whether to skip failed batches/lists and continue processing"
24
+ )
25
+
26
+ def json(self, **kwargs: typing.Any) -> str:
27
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
28
+ return super().json(**kwargs_with_defaults)
29
+
30
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
31
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
32
+ return super().dict(**kwargs_with_defaults)
33
+
34
+ class Config:
35
+ frozen = True
36
+ smart_union = True
37
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -0,0 +1,41 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .classification_result import ClassificationResult
8
+
9
+ try:
10
+ import pydantic
11
+ if pydantic.__version__.startswith("1."):
12
+ raise ImportError
13
+ import pydantic.v1 as pydantic # type: ignore
14
+ except ImportError:
15
+ import pydantic # type: ignore
16
+
17
+
18
+ class FileClassification(pydantic.BaseModel):
19
+ """
20
+ A file classification.
21
+ """
22
+
23
+ id: str = pydantic.Field(description="Unique identifier")
24
+ created_at: typing.Optional[dt.datetime]
25
+ updated_at: typing.Optional[dt.datetime]
26
+ classify_job_id: str = pydantic.Field(description="The ID of the classify job")
27
+ file_id: str = pydantic.Field(description="The ID of the classified file")
28
+ result: typing.Optional[ClassificationResult]
29
+
30
+ def json(self, **kwargs: typing.Any) -> str:
31
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
32
+ return super().json(**kwargs_with_defaults)
33
+
34
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
35
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
36
+ return super().dict(**kwargs_with_defaults)
37
+
38
+ class Config:
39
+ frozen = True
40
+ smart_union = True
41
+ json_encoders = {dt.datetime: serialize_datetime}