llama-cloud 0.1.35__py3-none-any.whl → 0.1.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +20 -2
- llama_cloud/resources/admin/client.py +51 -0
- llama_cloud/resources/classifier/client.py +231 -181
- llama_cloud/resources/data_sinks/types/data_sink_update_component.py +2 -0
- llama_cloud/resources/llama_extract/client.py +96 -4
- llama_cloud/types/__init__.py +20 -2
- llama_cloud/types/classification_result.py +4 -5
- llama_cloud/types/classifier_rule.py +43 -0
- llama_cloud/types/classify_job.py +45 -0
- llama_cloud/types/{classify_response.py → classify_job_results.py} +3 -6
- llama_cloud/types/classify_job_with_status.py +47 -0
- llama_cloud/types/classify_parsing_configuration.py +38 -0
- llama_cloud/types/cloud_astra_db_vector_store.py +51 -0
- llama_cloud/types/cloud_confluence_data_source.py +15 -0
- llama_cloud/types/configurable_data_sink_names.py +4 -0
- llama_cloud/types/data_sink_component.py +2 -0
- llama_cloud/types/data_sink_create_component.py +2 -0
- llama_cloud/types/failure_handling_config.py +37 -0
- llama_cloud/types/file_classification.py +41 -0
- llama_cloud/types/file_store_info_response.py +34 -0
- llama_cloud/types/file_store_info_response_status.py +25 -0
- llama_cloud/types/supported_llm_model_names.py +12 -0
- {llama_cloud-0.1.35.dist-info → llama_cloud-0.1.36.dist-info}/METADATA +2 -4
- {llama_cloud-0.1.35.dist-info → llama_cloud-0.1.36.dist-info}/RECORD +26 -17
- {llama_cloud-0.1.35.dist-info → llama_cloud-0.1.36.dist-info}/WHEEL +1 -1
- {llama_cloud-0.1.35.dist-info → llama_cloud-0.1.36.dist-info}/LICENSE +0 -0
|
@@ -45,10 +45,16 @@ class LlamaExtractClient:
|
|
|
45
45
|
self._client_wrapper = client_wrapper
|
|
46
46
|
|
|
47
47
|
def list_extraction_agents(
|
|
48
|
-
self,
|
|
48
|
+
self,
|
|
49
|
+
*,
|
|
50
|
+
include_default: typing.Optional[bool] = None,
|
|
51
|
+
project_id: typing.Optional[str] = None,
|
|
52
|
+
organization_id: typing.Optional[str] = None,
|
|
49
53
|
) -> typing.List[ExtractAgent]:
|
|
50
54
|
"""
|
|
51
55
|
Parameters:
|
|
56
|
+
- include_default: typing.Optional[bool]. Whether to include default agents in the results
|
|
57
|
+
|
|
52
58
|
- project_id: typing.Optional[str].
|
|
53
59
|
|
|
54
60
|
- organization_id: typing.Optional[str].
|
|
@@ -63,7 +69,9 @@ class LlamaExtractClient:
|
|
|
63
69
|
_response = self._client_wrapper.httpx_client.request(
|
|
64
70
|
"GET",
|
|
65
71
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
|
|
66
|
-
params=remove_none_from_dict(
|
|
72
|
+
params=remove_none_from_dict(
|
|
73
|
+
{"include_default": include_default, "project_id": project_id, "organization_id": organization_id}
|
|
74
|
+
),
|
|
67
75
|
headers=self._client_wrapper.get_headers(),
|
|
68
76
|
timeout=60,
|
|
69
77
|
)
|
|
@@ -265,6 +273,44 @@ class LlamaExtractClient:
|
|
|
265
273
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
266
274
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
267
275
|
|
|
276
|
+
def get_or_create_default_extraction_agent(
|
|
277
|
+
self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
278
|
+
) -> ExtractAgent:
|
|
279
|
+
"""
|
|
280
|
+
Get or create a default extraction agent for the current project.
|
|
281
|
+
The default agent has an empty schema and default configuration.
|
|
282
|
+
|
|
283
|
+
Parameters:
|
|
284
|
+
- project_id: typing.Optional[str].
|
|
285
|
+
|
|
286
|
+
- organization_id: typing.Optional[str].
|
|
287
|
+
---
|
|
288
|
+
from llama_cloud.client import LlamaCloud
|
|
289
|
+
|
|
290
|
+
client = LlamaCloud(
|
|
291
|
+
token="YOUR_TOKEN",
|
|
292
|
+
)
|
|
293
|
+
client.llama_extract.get_or_create_default_extraction_agent()
|
|
294
|
+
"""
|
|
295
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
296
|
+
"GET",
|
|
297
|
+
urllib.parse.urljoin(
|
|
298
|
+
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/default"
|
|
299
|
+
),
|
|
300
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
301
|
+
headers=self._client_wrapper.get_headers(),
|
|
302
|
+
timeout=60,
|
|
303
|
+
)
|
|
304
|
+
if 200 <= _response.status_code < 300:
|
|
305
|
+
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
306
|
+
if _response.status_code == 422:
|
|
307
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
308
|
+
try:
|
|
309
|
+
_response_json = _response.json()
|
|
310
|
+
except JSONDecodeError:
|
|
311
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
312
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
313
|
+
|
|
268
314
|
def get_extraction_agent(self, extraction_agent_id: str) -> ExtractAgent:
|
|
269
315
|
"""
|
|
270
316
|
Parameters:
|
|
@@ -992,10 +1038,16 @@ class AsyncLlamaExtractClient:
|
|
|
992
1038
|
self._client_wrapper = client_wrapper
|
|
993
1039
|
|
|
994
1040
|
async def list_extraction_agents(
|
|
995
|
-
self,
|
|
1041
|
+
self,
|
|
1042
|
+
*,
|
|
1043
|
+
include_default: typing.Optional[bool] = None,
|
|
1044
|
+
project_id: typing.Optional[str] = None,
|
|
1045
|
+
organization_id: typing.Optional[str] = None,
|
|
996
1046
|
) -> typing.List[ExtractAgent]:
|
|
997
1047
|
"""
|
|
998
1048
|
Parameters:
|
|
1049
|
+
- include_default: typing.Optional[bool]. Whether to include default agents in the results
|
|
1050
|
+
|
|
999
1051
|
- project_id: typing.Optional[str].
|
|
1000
1052
|
|
|
1001
1053
|
- organization_id: typing.Optional[str].
|
|
@@ -1010,7 +1062,9 @@ class AsyncLlamaExtractClient:
|
|
|
1010
1062
|
_response = await self._client_wrapper.httpx_client.request(
|
|
1011
1063
|
"GET",
|
|
1012
1064
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents"),
|
|
1013
|
-
params=remove_none_from_dict(
|
|
1065
|
+
params=remove_none_from_dict(
|
|
1066
|
+
{"include_default": include_default, "project_id": project_id, "organization_id": organization_id}
|
|
1067
|
+
),
|
|
1014
1068
|
headers=self._client_wrapper.get_headers(),
|
|
1015
1069
|
timeout=60,
|
|
1016
1070
|
)
|
|
@@ -1212,6 +1266,44 @@ class AsyncLlamaExtractClient:
|
|
|
1212
1266
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1213
1267
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1214
1268
|
|
|
1269
|
+
async def get_or_create_default_extraction_agent(
|
|
1270
|
+
self, *, project_id: typing.Optional[str] = None, organization_id: typing.Optional[str] = None
|
|
1271
|
+
) -> ExtractAgent:
|
|
1272
|
+
"""
|
|
1273
|
+
Get or create a default extraction agent for the current project.
|
|
1274
|
+
The default agent has an empty schema and default configuration.
|
|
1275
|
+
|
|
1276
|
+
Parameters:
|
|
1277
|
+
- project_id: typing.Optional[str].
|
|
1278
|
+
|
|
1279
|
+
- organization_id: typing.Optional[str].
|
|
1280
|
+
---
|
|
1281
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
1282
|
+
|
|
1283
|
+
client = AsyncLlamaCloud(
|
|
1284
|
+
token="YOUR_TOKEN",
|
|
1285
|
+
)
|
|
1286
|
+
await client.llama_extract.get_or_create_default_extraction_agent()
|
|
1287
|
+
"""
|
|
1288
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
1289
|
+
"GET",
|
|
1290
|
+
urllib.parse.urljoin(
|
|
1291
|
+
f"{self._client_wrapper.get_base_url()}/", "api/v1/extraction/extraction-agents/default"
|
|
1292
|
+
),
|
|
1293
|
+
params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
|
|
1294
|
+
headers=self._client_wrapper.get_headers(),
|
|
1295
|
+
timeout=60,
|
|
1296
|
+
)
|
|
1297
|
+
if 200 <= _response.status_code < 300:
|
|
1298
|
+
return pydantic.parse_obj_as(ExtractAgent, _response.json()) # type: ignore
|
|
1299
|
+
if _response.status_code == 422:
|
|
1300
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
1301
|
+
try:
|
|
1302
|
+
_response_json = _response.json()
|
|
1303
|
+
except JSONDecodeError:
|
|
1304
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
1305
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
1306
|
+
|
|
1215
1307
|
async def get_extraction_agent(self, extraction_agent_id: str) -> ExtractAgent:
|
|
1216
1308
|
"""
|
|
1217
1309
|
Parameters:
|
llama_cloud/types/__init__.py
CHANGED
|
@@ -41,7 +41,12 @@ from .chat_app_response import ChatAppResponse
|
|
|
41
41
|
from .chat_data import ChatData
|
|
42
42
|
from .chunk_mode import ChunkMode
|
|
43
43
|
from .classification_result import ClassificationResult
|
|
44
|
-
from .
|
|
44
|
+
from .classifier_rule import ClassifierRule
|
|
45
|
+
from .classify_job import ClassifyJob
|
|
46
|
+
from .classify_job_results import ClassifyJobResults
|
|
47
|
+
from .classify_job_with_status import ClassifyJobWithStatus
|
|
48
|
+
from .classify_parsing_configuration import ClassifyParsingConfiguration
|
|
49
|
+
from .cloud_astra_db_vector_store import CloudAstraDbVectorStore
|
|
45
50
|
from .cloud_az_storage_blob_data_source import CloudAzStorageBlobDataSource
|
|
46
51
|
from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
|
|
47
52
|
from .cloud_box_data_source import CloudBoxDataSource
|
|
@@ -139,13 +144,17 @@ from .extract_schema_validate_response_data_schema_value import ExtractSchemaVal
|
|
|
139
144
|
from .extract_state import ExtractState
|
|
140
145
|
from .extract_target import ExtractTarget
|
|
141
146
|
from .fail_page_mode import FailPageMode
|
|
147
|
+
from .failure_handling_config import FailureHandlingConfig
|
|
142
148
|
from .file import File
|
|
149
|
+
from .file_classification import FileClassification
|
|
143
150
|
from .file_count_by_status_response import FileCountByStatusResponse
|
|
144
151
|
from .file_data import FileData
|
|
145
152
|
from .file_id_presigned_url import FileIdPresignedUrl
|
|
146
153
|
from .file_parse_public import FileParsePublic
|
|
147
154
|
from .file_permission_info_value import FilePermissionInfoValue
|
|
148
155
|
from .file_resource_info_value import FileResourceInfoValue
|
|
156
|
+
from .file_store_info_response import FileStoreInfoResponse
|
|
157
|
+
from .file_store_info_response_status import FileStoreInfoResponseStatus
|
|
149
158
|
from .filter_condition import FilterCondition
|
|
150
159
|
from .filter_operation import FilterOperation
|
|
151
160
|
from .filter_operation_eq import FilterOperationEq
|
|
@@ -418,7 +427,12 @@ __all__ = [
|
|
|
418
427
|
"ChatData",
|
|
419
428
|
"ChunkMode",
|
|
420
429
|
"ClassificationResult",
|
|
421
|
-
"
|
|
430
|
+
"ClassifierRule",
|
|
431
|
+
"ClassifyJob",
|
|
432
|
+
"ClassifyJobResults",
|
|
433
|
+
"ClassifyJobWithStatus",
|
|
434
|
+
"ClassifyParsingConfiguration",
|
|
435
|
+
"CloudAstraDbVectorStore",
|
|
422
436
|
"CloudAzStorageBlobDataSource",
|
|
423
437
|
"CloudAzureAiSearchVectorStore",
|
|
424
438
|
"CloudBoxDataSource",
|
|
@@ -512,13 +526,17 @@ __all__ = [
|
|
|
512
526
|
"ExtractState",
|
|
513
527
|
"ExtractTarget",
|
|
514
528
|
"FailPageMode",
|
|
529
|
+
"FailureHandlingConfig",
|
|
515
530
|
"File",
|
|
531
|
+
"FileClassification",
|
|
516
532
|
"FileCountByStatusResponse",
|
|
517
533
|
"FileData",
|
|
518
534
|
"FileIdPresignedUrl",
|
|
519
535
|
"FileParsePublic",
|
|
520
536
|
"FilePermissionInfoValue",
|
|
521
537
|
"FileResourceInfoValue",
|
|
538
|
+
"FileStoreInfoResponse",
|
|
539
|
+
"FileStoreInfoResponseStatus",
|
|
522
540
|
"FilterCondition",
|
|
523
541
|
"FilterOperation",
|
|
524
542
|
"FilterOperationEq",
|
|
@@ -17,14 +17,13 @@ except ImportError:
|
|
|
17
17
|
class ClassificationResult(pydantic.BaseModel):
|
|
18
18
|
"""
|
|
19
19
|
Result of classifying a single file.
|
|
20
|
-
|
|
21
|
-
Contains the classification outcome with confidence score and matched rule info.
|
|
22
20
|
"""
|
|
23
21
|
|
|
24
|
-
|
|
25
|
-
|
|
22
|
+
reasoning: str = pydantic.Field(
|
|
23
|
+
description="Step-by-step explanation of why this classification was chosen and the confidence score assigned"
|
|
24
|
+
)
|
|
26
25
|
confidence: float = pydantic.Field(description="Confidence score of the classification (0.0-1.0)")
|
|
27
|
-
|
|
26
|
+
type: typing.Optional[str]
|
|
28
27
|
|
|
29
28
|
def json(self, **kwargs: typing.Any) -> str:
|
|
30
29
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ClassifierRule(pydantic.BaseModel):
|
|
18
|
+
"""
|
|
19
|
+
A rule for classifying documents - v0 simplified version.
|
|
20
|
+
|
|
21
|
+
This represents a single classification rule that will be applied to documents.
|
|
22
|
+
All rules are content-based and use natural language descriptions.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
type: str = pydantic.Field(
|
|
26
|
+
description="The document type to assign when this rule matches (e.g., 'invoice', 'receipt', 'contract')"
|
|
27
|
+
)
|
|
28
|
+
description: str = pydantic.Field(
|
|
29
|
+
description="Natural language description of what to classify. Be specific about the content characteristics that identify this document type."
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
33
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
34
|
+
return super().json(**kwargs_with_defaults)
|
|
35
|
+
|
|
36
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
37
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
38
|
+
return super().dict(**kwargs_with_defaults)
|
|
39
|
+
|
|
40
|
+
class Config:
|
|
41
|
+
frozen = True
|
|
42
|
+
smart_union = True
|
|
43
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .classifier_rule import ClassifierRule
|
|
8
|
+
from .classify_parsing_configuration import ClassifyParsingConfiguration
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import pydantic
|
|
12
|
+
if pydantic.__version__.startswith("1."):
|
|
13
|
+
raise ImportError
|
|
14
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
15
|
+
except ImportError:
|
|
16
|
+
import pydantic # type: ignore
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ClassifyJob(pydantic.BaseModel):
|
|
20
|
+
"""
|
|
21
|
+
A classify job.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
id: str = pydantic.Field(description="Unique identifier")
|
|
25
|
+
created_at: typing.Optional[dt.datetime]
|
|
26
|
+
updated_at: typing.Optional[dt.datetime]
|
|
27
|
+
rules: typing.List[ClassifierRule] = pydantic.Field(description="The rules to classify the files")
|
|
28
|
+
user_id: str = pydantic.Field(description="The ID of the user")
|
|
29
|
+
project_id: str = pydantic.Field(description="The ID of the project")
|
|
30
|
+
parsing_configuration: typing.Optional[ClassifyParsingConfiguration] = pydantic.Field(
|
|
31
|
+
description="The configuration for the parsing job"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
35
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
36
|
+
return super().json(**kwargs_with_defaults)
|
|
37
|
+
|
|
38
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
39
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
40
|
+
return super().dict(**kwargs_with_defaults)
|
|
41
|
+
|
|
42
|
+
class Config:
|
|
43
|
+
frozen = True
|
|
44
|
+
smart_union = True
|
|
45
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -4,7 +4,7 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
-
from .
|
|
7
|
+
from .file_classification import FileClassification
|
|
8
8
|
|
|
9
9
|
try:
|
|
10
10
|
import pydantic
|
|
@@ -15,17 +15,14 @@ except ImportError:
|
|
|
15
15
|
import pydantic # type: ignore
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
class
|
|
18
|
+
class ClassifyJobResults(pydantic.BaseModel):
|
|
19
19
|
"""
|
|
20
20
|
Response model for the classify endpoint following AIP-132 pagination standard.
|
|
21
|
-
|
|
22
|
-
Contains classification results with pagination support and summary statistics.
|
|
23
21
|
"""
|
|
24
22
|
|
|
25
|
-
items: typing.List[
|
|
23
|
+
items: typing.List[FileClassification] = pydantic.Field(description="The list of items.")
|
|
26
24
|
next_page_token: typing.Optional[str]
|
|
27
25
|
total_size: typing.Optional[int]
|
|
28
|
-
unknown_count: int = pydantic.Field(description="Number of files that couldn't be classified")
|
|
29
26
|
|
|
30
27
|
def json(self, **kwargs: typing.Any) -> str:
|
|
31
28
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .classifier_rule import ClassifierRule
|
|
8
|
+
from .classify_parsing_configuration import ClassifyParsingConfiguration
|
|
9
|
+
from .status_enum import StatusEnum
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import pydantic
|
|
13
|
+
if pydantic.__version__.startswith("1."):
|
|
14
|
+
raise ImportError
|
|
15
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
16
|
+
except ImportError:
|
|
17
|
+
import pydantic # type: ignore
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ClassifyJobWithStatus(pydantic.BaseModel):
|
|
21
|
+
"""
|
|
22
|
+
A classify job with status.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
id: str = pydantic.Field(description="Unique identifier")
|
|
26
|
+
created_at: typing.Optional[dt.datetime]
|
|
27
|
+
updated_at: typing.Optional[dt.datetime]
|
|
28
|
+
rules: typing.List[ClassifierRule] = pydantic.Field(description="The rules to classify the files")
|
|
29
|
+
user_id: str = pydantic.Field(description="The ID of the user")
|
|
30
|
+
project_id: str = pydantic.Field(description="The ID of the project")
|
|
31
|
+
parsing_configuration: typing.Optional[ClassifyParsingConfiguration] = pydantic.Field(
|
|
32
|
+
description="The configuration for the parsing job"
|
|
33
|
+
)
|
|
34
|
+
status: StatusEnum = pydantic.Field(description="The status of the classify job")
|
|
35
|
+
|
|
36
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
37
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
38
|
+
return super().json(**kwargs_with_defaults)
|
|
39
|
+
|
|
40
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
41
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
42
|
+
return super().dict(**kwargs_with_defaults)
|
|
43
|
+
|
|
44
|
+
class Config:
|
|
45
|
+
frozen = True
|
|
46
|
+
smart_union = True
|
|
47
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .parser_languages import ParserLanguages
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pydantic
|
|
11
|
+
if pydantic.__version__.startswith("1."):
|
|
12
|
+
raise ImportError
|
|
13
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
14
|
+
except ImportError:
|
|
15
|
+
import pydantic # type: ignore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ClassifyParsingConfiguration(pydantic.BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
Parsing configuration for a classify job.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
lang: typing.Optional[ParserLanguages] = pydantic.Field(description="The language to parse the files in")
|
|
24
|
+
max_pages: typing.Optional[int]
|
|
25
|
+
target_pages: typing.Optional[typing.List[int]]
|
|
26
|
+
|
|
27
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
28
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
29
|
+
return super().json(**kwargs_with_defaults)
|
|
30
|
+
|
|
31
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
32
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
33
|
+
return super().dict(**kwargs_with_defaults)
|
|
34
|
+
|
|
35
|
+
class Config:
|
|
36
|
+
frozen = True
|
|
37
|
+
smart_union = True
|
|
38
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CloudAstraDbVectorStore(pydantic.BaseModel):
|
|
18
|
+
"""
|
|
19
|
+
Cloud AstraDB Vector Store.
|
|
20
|
+
|
|
21
|
+
This class is used to store the configuration for an AstraDB vector store, so that it can be
|
|
22
|
+
created and used in LlamaCloud.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
token (str): The Astra DB Application Token to use.
|
|
26
|
+
api_endpoint (str): The Astra DB JSON API endpoint for your database.
|
|
27
|
+
collection_name (str): Collection name to use. If not existing, it will be created.
|
|
28
|
+
embedding_dimension (int): Length of the embedding vectors in use.
|
|
29
|
+
keyspace (optional[str]): The keyspace to use. If not provided, 'default_keyspace'
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
supports_nested_metadata_filters: typing.Optional[bool]
|
|
33
|
+
token: str = pydantic.Field(description="The Astra DB Application Token to use")
|
|
34
|
+
api_endpoint: str = pydantic.Field(description="The Astra DB JSON API endpoint for your database")
|
|
35
|
+
collection_name: str = pydantic.Field(description="Collection name to use. If not existing, it will be created")
|
|
36
|
+
embedding_dimension: int = pydantic.Field(description="Length of the embedding vectors in use")
|
|
37
|
+
keyspace: typing.Optional[str]
|
|
38
|
+
class_name: typing.Optional[str]
|
|
39
|
+
|
|
40
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
41
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
42
|
+
return super().json(**kwargs_with_defaults)
|
|
43
|
+
|
|
44
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
45
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
46
|
+
return super().dict(**kwargs_with_defaults)
|
|
47
|
+
|
|
48
|
+
class Config:
|
|
49
|
+
frozen = True
|
|
50
|
+
smart_union = True
|
|
51
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -4,6 +4,7 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .failure_handling_config import FailureHandlingConfig
|
|
7
8
|
|
|
8
9
|
try:
|
|
9
10
|
import pydantic
|
|
@@ -28,6 +29,20 @@ class CloudConfluenceDataSource(pydantic.BaseModel):
|
|
|
28
29
|
label: typing.Optional[str]
|
|
29
30
|
index_restricted_pages: typing.Optional[bool] = pydantic.Field(description="Whether to index restricted pages.")
|
|
30
31
|
keep_markdown_format: typing.Optional[bool] = pydantic.Field(description="Whether to keep the markdown format.")
|
|
32
|
+
failure_handling: typing.Optional[FailureHandlingConfig] = pydantic.Field(
|
|
33
|
+
description=(
|
|
34
|
+
"Configuration for handling failures during processing. Key-value object controlling failure handling behaviors.\n"
|
|
35
|
+
"\n"
|
|
36
|
+
"Example:\n"
|
|
37
|
+
"{\n"
|
|
38
|
+
'"skip_list_failures": true\n'
|
|
39
|
+
"}\n"
|
|
40
|
+
"\n"
|
|
41
|
+
"Currently supports:\n"
|
|
42
|
+
"\n"
|
|
43
|
+
"- skip_list_failures: Skip failed batches/lists and continue processing\n"
|
|
44
|
+
)
|
|
45
|
+
)
|
|
31
46
|
class_name: typing.Optional[str]
|
|
32
47
|
|
|
33
48
|
def json(self, **kwargs: typing.Any) -> str:
|
|
@@ -13,6 +13,7 @@ class ConfigurableDataSinkNames(str, enum.Enum):
|
|
|
13
13
|
AZUREAI_SEARCH = "AZUREAI_SEARCH"
|
|
14
14
|
MONGODB_ATLAS = "MONGODB_ATLAS"
|
|
15
15
|
MILVUS = "MILVUS"
|
|
16
|
+
ASTRA_DB = "ASTRA_DB"
|
|
16
17
|
|
|
17
18
|
def visit(
|
|
18
19
|
self,
|
|
@@ -22,6 +23,7 @@ class ConfigurableDataSinkNames(str, enum.Enum):
|
|
|
22
23
|
azureai_search: typing.Callable[[], T_Result],
|
|
23
24
|
mongodb_atlas: typing.Callable[[], T_Result],
|
|
24
25
|
milvus: typing.Callable[[], T_Result],
|
|
26
|
+
astra_db: typing.Callable[[], T_Result],
|
|
25
27
|
) -> T_Result:
|
|
26
28
|
if self is ConfigurableDataSinkNames.PINECONE:
|
|
27
29
|
return pinecone()
|
|
@@ -35,3 +37,5 @@ class ConfigurableDataSinkNames(str, enum.Enum):
|
|
|
35
37
|
return mongodb_atlas()
|
|
36
38
|
if self is ConfigurableDataSinkNames.MILVUS:
|
|
37
39
|
return milvus()
|
|
40
|
+
if self is ConfigurableDataSinkNames.ASTRA_DB:
|
|
41
|
+
return astra_db()
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
+
from .cloud_astra_db_vector_store import CloudAstraDbVectorStore
|
|
5
6
|
from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
|
|
6
7
|
from .cloud_milvus_vector_store import CloudMilvusVectorStore
|
|
7
8
|
from .cloud_mongo_db_atlas_vector_search import CloudMongoDbAtlasVectorSearch
|
|
@@ -17,4 +18,5 @@ DataSinkComponent = typing.Union[
|
|
|
17
18
|
CloudAzureAiSearchVectorStore,
|
|
18
19
|
CloudMongoDbAtlasVectorSearch,
|
|
19
20
|
CloudMilvusVectorStore,
|
|
21
|
+
CloudAstraDbVectorStore,
|
|
20
22
|
]
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
|
+
from .cloud_astra_db_vector_store import CloudAstraDbVectorStore
|
|
5
6
|
from .cloud_azure_ai_search_vector_store import CloudAzureAiSearchVectorStore
|
|
6
7
|
from .cloud_milvus_vector_store import CloudMilvusVectorStore
|
|
7
8
|
from .cloud_mongo_db_atlas_vector_search import CloudMongoDbAtlasVectorSearch
|
|
@@ -17,4 +18,5 @@ DataSinkCreateComponent = typing.Union[
|
|
|
17
18
|
CloudAzureAiSearchVectorStore,
|
|
18
19
|
CloudMongoDbAtlasVectorSearch,
|
|
19
20
|
CloudMilvusVectorStore,
|
|
21
|
+
CloudAstraDbVectorStore,
|
|
20
22
|
]
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FailureHandlingConfig(pydantic.BaseModel):
|
|
18
|
+
"""
|
|
19
|
+
Configuration for handling different types of failures during data source processing.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
skip_list_failures: typing.Optional[bool] = pydantic.Field(
|
|
23
|
+
description="Whether to skip failed batches/lists and continue processing"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
27
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
28
|
+
return super().json(**kwargs_with_defaults)
|
|
29
|
+
|
|
30
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
31
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
32
|
+
return super().dict(**kwargs_with_defaults)
|
|
33
|
+
|
|
34
|
+
class Config:
|
|
35
|
+
frozen = True
|
|
36
|
+
smart_union = True
|
|
37
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .classification_result import ClassificationResult
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pydantic
|
|
11
|
+
if pydantic.__version__.startswith("1."):
|
|
12
|
+
raise ImportError
|
|
13
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
14
|
+
except ImportError:
|
|
15
|
+
import pydantic # type: ignore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FileClassification(pydantic.BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
A file classification.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
id: str = pydantic.Field(description="Unique identifier")
|
|
24
|
+
created_at: typing.Optional[dt.datetime]
|
|
25
|
+
updated_at: typing.Optional[dt.datetime]
|
|
26
|
+
classify_job_id: str = pydantic.Field(description="The ID of the classify job")
|
|
27
|
+
file_id: str = pydantic.Field(description="The ID of the classified file")
|
|
28
|
+
result: typing.Optional[ClassificationResult]
|
|
29
|
+
|
|
30
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
31
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
32
|
+
return super().json(**kwargs_with_defaults)
|
|
33
|
+
|
|
34
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
35
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
36
|
+
return super().dict(**kwargs_with_defaults)
|
|
37
|
+
|
|
38
|
+
class Config:
|
|
39
|
+
frozen = True
|
|
40
|
+
smart_union = True
|
|
41
|
+
json_encoders = {dt.datetime: serialize_datetime}
|