llama-cloud 0.1.18__py3-none-any.whl → 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +36 -16
- llama_cloud/client.py +3 -0
- llama_cloud/resources/__init__.py +20 -0
- llama_cloud/resources/beta/__init__.py +2 -0
- llama_cloud/resources/beta/client.py +371 -0
- llama_cloud/resources/embedding_model_configs/client.py +82 -22
- llama_cloud/resources/llama_extract/__init__.py +21 -0
- llama_cloud/resources/llama_extract/client.py +227 -114
- llama_cloud/resources/llama_extract/types/__init__.py +21 -0
- llama_cloud/resources/parsing/client.py +115 -4
- llama_cloud/resources/pipelines/client.py +105 -0
- llama_cloud/types/__init__.py +26 -24
- llama_cloud/types/{extract_schema_validate_request.py → audio_block.py} +5 -3
- llama_cloud/types/batch.py +47 -0
- llama_cloud/types/batch_item.py +40 -0
- llama_cloud/types/{extract_agent_update.py → batch_paginated_list.py} +6 -9
- llama_cloud/types/{extract_agent_create.py → batch_public_output.py} +7 -10
- llama_cloud/types/cloud_confluence_data_source.py +1 -0
- llama_cloud/types/cloud_postgres_vector_store.py +2 -0
- llama_cloud/types/cloud_sharepoint_data_source.py +1 -0
- llama_cloud/types/extract_config.py +2 -0
- llama_cloud/types/extract_job_create.py +1 -2
- llama_cloud/types/fail_page_mode.py +29 -0
- llama_cloud/types/{extract_job_create_batch.py → file_count_by_status_response.py} +7 -12
- llama_cloud/types/file_parse_public.py +36 -0
- llama_cloud/types/job_names.py +8 -12
- llama_cloud/types/llama_index_core_base_llms_types_chat_message_blocks_item.py +13 -1
- llama_cloud/types/llama_parse_parameters.py +7 -0
- llama_cloud/types/markdown_node_parser.py +4 -0
- llama_cloud/types/message_role.py +4 -0
- llama_cloud/types/pg_vector_distance_method.py +43 -0
- llama_cloud/types/pg_vector_hnsw_settings.py +45 -0
- llama_cloud/types/pg_vector_vector_type.py +35 -0
- llama_cloud/types/pipeline_create.py +1 -0
- llama_cloud/types/pipeline_data_source.py +3 -0
- llama_cloud/types/pipeline_data_source_status.py +33 -0
- llama_cloud/types/pipeline_file.py +1 -0
- llama_cloud/types/prompt_conf.py +3 -0
- llama_cloud/types/struct_parse_conf.py +4 -1
- llama_cloud/types/token_text_splitter.py +3 -0
- {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/METADATA +1 -1
- {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/RECORD +52 -41
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_create_data_schema_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_agent_update_data_schema_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_job_create_batch_data_schema_override_zero_value.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema.py +0 -0
- /llama_cloud/{types → resources/llama_extract/types}/extract_schema_validate_request_data_schema_zero_value.py +0 -0
- {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/LICENSE +0 -0
- {llama_cloud-0.1.18.dist-info → llama_cloud-0.1.19.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .file_parse_public import FileParsePublic
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pydantic
|
|
11
|
+
if pydantic.__version__.startswith("1."):
|
|
12
|
+
raise ImportError
|
|
13
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
14
|
+
except ImportError:
|
|
15
|
+
import pydantic # type: ignore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BatchItem(pydantic.BaseModel):
|
|
19
|
+
id: str = pydantic.Field(description="Unique identifier for the batch item")
|
|
20
|
+
batch_id: str = pydantic.Field(description="The ID of the batch to which the item belongs")
|
|
21
|
+
status: str = pydantic.Field(description="The current status of the batch item")
|
|
22
|
+
status_updated_at: typing.Optional[dt.datetime]
|
|
23
|
+
created_at: typing.Optional[dt.datetime]
|
|
24
|
+
updated_at: typing.Optional[dt.datetime]
|
|
25
|
+
input_file: str = pydantic.Field(description="The input file associated with the batch item")
|
|
26
|
+
output_file: typing.Optional[str]
|
|
27
|
+
task: typing.Optional[FileParsePublic]
|
|
28
|
+
|
|
29
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
30
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
31
|
+
return super().json(**kwargs_with_defaults)
|
|
32
|
+
|
|
33
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
34
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
35
|
+
return super().dict(**kwargs_with_defaults)
|
|
36
|
+
|
|
37
|
+
class Config:
|
|
38
|
+
frozen = True
|
|
39
|
+
smart_union = True
|
|
40
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -4,8 +4,7 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
-
from .
|
|
8
|
-
from .extract_config import ExtractConfig
|
|
7
|
+
from .batch import Batch
|
|
9
8
|
|
|
10
9
|
try:
|
|
11
10
|
import pydantic
|
|
@@ -16,13 +15,11 @@ except ImportError:
|
|
|
16
15
|
import pydantic # type: ignore
|
|
17
16
|
|
|
18
17
|
|
|
19
|
-
class
|
|
20
|
-
""
|
|
21
|
-
|
|
22
|
-
""
|
|
23
|
-
|
|
24
|
-
data_schema: ExtractAgentUpdateDataSchema = pydantic.Field(description="The schema of the data")
|
|
25
|
-
config: ExtractConfig = pydantic.Field(description="The configuration parameters for the extraction agent.")
|
|
18
|
+
class BatchPaginatedList(pydantic.BaseModel):
|
|
19
|
+
data: typing.List[Batch] = pydantic.Field(description="List of batches")
|
|
20
|
+
limit: int = pydantic.Field(description="Pagination limit")
|
|
21
|
+
offset: int = pydantic.Field(description="Pagination offset")
|
|
22
|
+
total_count: int = pydantic.Field(description="Total number of batches")
|
|
26
23
|
|
|
27
24
|
def json(self, **kwargs: typing.Any) -> str:
|
|
28
25
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -4,8 +4,9 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
-
from .
|
|
8
|
-
from .
|
|
7
|
+
from .batch import Batch
|
|
8
|
+
from .batch_item import BatchItem
|
|
9
|
+
from .managed_ingestion_status_response import ManagedIngestionStatusResponse
|
|
9
10
|
|
|
10
11
|
try:
|
|
11
12
|
import pydantic
|
|
@@ -16,14 +17,10 @@ except ImportError:
|
|
|
16
17
|
import pydantic # type: ignore
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
class
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
name: str = pydantic.Field(description="The name of the extraction schema")
|
|
25
|
-
data_schema: ExtractAgentCreateDataSchema = pydantic.Field(description="The schema of the data.")
|
|
26
|
-
config: ExtractConfig = pydantic.Field(description="The configuration parameters for the extraction agent.")
|
|
20
|
+
class BatchPublicOutput(pydantic.BaseModel):
|
|
21
|
+
batch: Batch
|
|
22
|
+
batch_items: typing.List[BatchItem]
|
|
23
|
+
ingestion_status: ManagedIngestionStatusResponse
|
|
27
24
|
|
|
28
25
|
def json(self, **kwargs: typing.Any) -> str:
|
|
29
26
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -27,6 +27,7 @@ class CloudConfluenceDataSource(pydantic.BaseModel):
|
|
|
27
27
|
cql: typing.Optional[str]
|
|
28
28
|
label: typing.Optional[str]
|
|
29
29
|
index_restricted_pages: typing.Optional[bool] = pydantic.Field(description="Whether to index restricted pages.")
|
|
30
|
+
keep_markdown_format: typing.Optional[bool] = pydantic.Field(description="Whether to keep the markdown format.")
|
|
30
31
|
class_name: typing.Optional[str]
|
|
31
32
|
|
|
32
33
|
def json(self, **kwargs: typing.Any) -> str:
|
|
@@ -4,6 +4,7 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .pg_vector_hnsw_settings import PgVectorHnswSettings
|
|
7
8
|
|
|
8
9
|
try:
|
|
9
10
|
import pydantic
|
|
@@ -26,6 +27,7 @@ class CloudPostgresVectorStore(pydantic.BaseModel):
|
|
|
26
27
|
embed_dim: int
|
|
27
28
|
hybrid_search: typing.Optional[bool]
|
|
28
29
|
perform_setup: typing.Optional[bool]
|
|
30
|
+
hnsw_settings: typing.Optional[PgVectorHnswSettings]
|
|
29
31
|
class_name: typing.Optional[str]
|
|
30
32
|
|
|
31
33
|
def json(self, **kwargs: typing.Any) -> str:
|
|
@@ -25,6 +25,7 @@ class CloudSharepointDataSource(pydantic.BaseModel):
|
|
|
25
25
|
client_secret: str = pydantic.Field(description="The client secret to use for authentication.")
|
|
26
26
|
tenant_id: str = pydantic.Field(description="The tenant ID to use for authentication.")
|
|
27
27
|
required_exts: typing.Optional[typing.List[str]]
|
|
28
|
+
get_permissions: typing.Optional[bool]
|
|
28
29
|
class_name: typing.Optional[str]
|
|
29
30
|
|
|
30
31
|
def json(self, **kwargs: typing.Any) -> str:
|
|
@@ -24,6 +24,8 @@ class ExtractConfig(pydantic.BaseModel):
|
|
|
24
24
|
extraction_target: typing.Optional[ExtractTarget] = pydantic.Field(description="The extraction target specified.")
|
|
25
25
|
extraction_mode: typing.Optional[ExtractMode] = pydantic.Field(description="The extraction mode specified.")
|
|
26
26
|
system_prompt: typing.Optional[str]
|
|
27
|
+
use_reasoning: typing.Optional[bool] = pydantic.Field(description="Whether to use reasoning for the extraction.")
|
|
28
|
+
cite_sources: typing.Optional[bool] = pydantic.Field(description="Whether to cite sources for the extraction.")
|
|
27
29
|
|
|
28
30
|
def json(self, **kwargs: typing.Any) -> str:
|
|
29
31
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -22,8 +22,7 @@ class ExtractJobCreate(pydantic.BaseModel):
|
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
24
|
extraction_agent_id: str = pydantic.Field(description="The id of the extraction agent")
|
|
25
|
-
file_id:
|
|
26
|
-
file: typing.Optional[str]
|
|
25
|
+
file_id: str = pydantic.Field(description="The id of the file")
|
|
27
26
|
data_schema_override: typing.Optional[ExtractJobCreateDataSchemaOverride] = pydantic.Field(
|
|
28
27
|
description="The data schema to override the extraction agent's data schema with"
|
|
29
28
|
)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
T_Result = typing.TypeVar("T_Result")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FailPageMode(str, enum.Enum):
|
|
10
|
+
"""
|
|
11
|
+
Enum for representing the different available page error handling modes
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
RAW_TEXT = "raw_text"
|
|
15
|
+
BLANK_PAGE = "blank_page"
|
|
16
|
+
ERROR_MESSAGE = "error_message"
|
|
17
|
+
|
|
18
|
+
def visit(
|
|
19
|
+
self,
|
|
20
|
+
raw_text: typing.Callable[[], T_Result],
|
|
21
|
+
blank_page: typing.Callable[[], T_Result],
|
|
22
|
+
error_message: typing.Callable[[], T_Result],
|
|
23
|
+
) -> T_Result:
|
|
24
|
+
if self is FailPageMode.RAW_TEXT:
|
|
25
|
+
return raw_text()
|
|
26
|
+
if self is FailPageMode.BLANK_PAGE:
|
|
27
|
+
return blank_page()
|
|
28
|
+
if self is FailPageMode.ERROR_MESSAGE:
|
|
29
|
+
return error_message()
|
|
@@ -4,8 +4,6 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
-
from .extract_config import ExtractConfig
|
|
8
|
-
from .extract_job_create_batch_data_schema_override import ExtractJobCreateBatchDataSchemaOverride
|
|
9
7
|
|
|
10
8
|
try:
|
|
11
9
|
import pydantic
|
|
@@ -16,17 +14,14 @@ except ImportError:
|
|
|
16
14
|
import pydantic # type: ignore
|
|
17
15
|
|
|
18
16
|
|
|
19
|
-
class
|
|
20
|
-
""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
data_schema_override: typing.Optional[ExtractJobCreateBatchDataSchemaOverride] = pydantic.Field(
|
|
27
|
-
description="The data schema to override the extraction agent's data schema with"
|
|
17
|
+
class FileCountByStatusResponse(pydantic.BaseModel):
|
|
18
|
+
counts: typing.Dict[str, int] = pydantic.Field(description="The counts of files by status")
|
|
19
|
+
total_count: int = pydantic.Field(description="The total number of files")
|
|
20
|
+
pipeline_id: typing.Optional[str]
|
|
21
|
+
data_source_id: typing.Optional[str]
|
|
22
|
+
only_manually_uploaded: typing.Optional[bool] = pydantic.Field(
|
|
23
|
+
description="Whether to only count manually uploaded files"
|
|
28
24
|
)
|
|
29
|
-
config_override: typing.Optional[ExtractConfig]
|
|
30
25
|
|
|
31
26
|
def json(self, **kwargs: typing.Any) -> str:
|
|
32
27
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FileParsePublic(pydantic.BaseModel):
|
|
18
|
+
created_at: dt.datetime = pydantic.Field(description="The date and time when the file was parsed.")
|
|
19
|
+
status: str = pydantic.Field(description="The status of the parse task.")
|
|
20
|
+
started_at: typing.Optional[dt.datetime]
|
|
21
|
+
ended_at: typing.Optional[dt.datetime]
|
|
22
|
+
input_path: str = pydantic.Field(description="The path to the input file.")
|
|
23
|
+
data_path: str = pydantic.Field(description="The path to the data file.")
|
|
24
|
+
|
|
25
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
26
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
27
|
+
return super().json(**kwargs_with_defaults)
|
|
28
|
+
|
|
29
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
30
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
31
|
+
return super().dict(**kwargs_with_defaults)
|
|
32
|
+
|
|
33
|
+
class Config:
|
|
34
|
+
frozen = True
|
|
35
|
+
smart_union = True
|
|
36
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
llama_cloud/types/job_names.py
CHANGED
|
@@ -14,7 +14,6 @@ class JobNames(str, enum.Enum):
|
|
|
14
14
|
LOAD_DOCUMENTS_JOB = "load_documents_job"
|
|
15
15
|
LOAD_FILES_JOB = "load_files_job"
|
|
16
16
|
PLAYGROUND_JOB = "playground_job"
|
|
17
|
-
EVAL_DATASET_JOB = "eval_dataset_job"
|
|
18
17
|
PIPELINE_MANAGED_INGESTION_JOB = "pipeline_managed_ingestion_job"
|
|
19
18
|
DATA_SOURCE_MANAGED_INGESTION_JOB = "data_source_managed_ingestion_job"
|
|
20
19
|
DATA_SOURCE_UPDATE_DISPATCHER_JOB = "data_source_update_dispatcher_job"
|
|
@@ -22,20 +21,19 @@ class JobNames(str, enum.Enum):
|
|
|
22
21
|
PIPELINE_FILE_UPDATER_JOB = "pipeline_file_updater_job"
|
|
23
22
|
FILE_MANAGED_INGESTION_JOB = "file_managed_ingestion_job"
|
|
24
23
|
DOCUMENT_INGESTION_JOB = "document_ingestion_job"
|
|
25
|
-
PARSE_RAW_FILE_JOB = "parse_raw_file_job"
|
|
26
|
-
LLAMA_PARSE_TRANSFORM_JOB = "llama_parse_transform_job"
|
|
27
24
|
METADATA_UPDATE_JOB = "metadata_update_job"
|
|
28
25
|
PARSE_RAW_FILE_JOB_CACHED = "parse_raw_file_job_cached"
|
|
29
26
|
EXTRACTION_JOB = "extraction_job"
|
|
30
27
|
EXTRACT_JOB = "extract_job"
|
|
31
28
|
ASYNCIO_TEST_JOB = "asyncio_test_job"
|
|
29
|
+
PARSE_RAW_FILE_JOB = "parse_raw_file_job"
|
|
30
|
+
LLAMA_PARSE_TRANSFORM_JOB = "llama_parse_transform_job"
|
|
32
31
|
|
|
33
32
|
def visit(
|
|
34
33
|
self,
|
|
35
34
|
load_documents_job: typing.Callable[[], T_Result],
|
|
36
35
|
load_files_job: typing.Callable[[], T_Result],
|
|
37
36
|
playground_job: typing.Callable[[], T_Result],
|
|
38
|
-
eval_dataset_job: typing.Callable[[], T_Result],
|
|
39
37
|
pipeline_managed_ingestion_job: typing.Callable[[], T_Result],
|
|
40
38
|
data_source_managed_ingestion_job: typing.Callable[[], T_Result],
|
|
41
39
|
data_source_update_dispatcher_job: typing.Callable[[], T_Result],
|
|
@@ -43,13 +41,13 @@ class JobNames(str, enum.Enum):
|
|
|
43
41
|
pipeline_file_updater_job: typing.Callable[[], T_Result],
|
|
44
42
|
file_managed_ingestion_job: typing.Callable[[], T_Result],
|
|
45
43
|
document_ingestion_job: typing.Callable[[], T_Result],
|
|
46
|
-
parse_raw_file_job: typing.Callable[[], T_Result],
|
|
47
|
-
llama_parse_transform_job: typing.Callable[[], T_Result],
|
|
48
44
|
metadata_update_job: typing.Callable[[], T_Result],
|
|
49
45
|
parse_raw_file_job_cached: typing.Callable[[], T_Result],
|
|
50
46
|
extraction_job: typing.Callable[[], T_Result],
|
|
51
47
|
extract_job: typing.Callable[[], T_Result],
|
|
52
48
|
asyncio_test_job: typing.Callable[[], T_Result],
|
|
49
|
+
parse_raw_file_job: typing.Callable[[], T_Result],
|
|
50
|
+
llama_parse_transform_job: typing.Callable[[], T_Result],
|
|
53
51
|
) -> T_Result:
|
|
54
52
|
if self is JobNames.LOAD_DOCUMENTS_JOB:
|
|
55
53
|
return load_documents_job()
|
|
@@ -57,8 +55,6 @@ class JobNames(str, enum.Enum):
|
|
|
57
55
|
return load_files_job()
|
|
58
56
|
if self is JobNames.PLAYGROUND_JOB:
|
|
59
57
|
return playground_job()
|
|
60
|
-
if self is JobNames.EVAL_DATASET_JOB:
|
|
61
|
-
return eval_dataset_job()
|
|
62
58
|
if self is JobNames.PIPELINE_MANAGED_INGESTION_JOB:
|
|
63
59
|
return pipeline_managed_ingestion_job()
|
|
64
60
|
if self is JobNames.DATA_SOURCE_MANAGED_INGESTION_JOB:
|
|
@@ -73,10 +69,6 @@ class JobNames(str, enum.Enum):
|
|
|
73
69
|
return file_managed_ingestion_job()
|
|
74
70
|
if self is JobNames.DOCUMENT_INGESTION_JOB:
|
|
75
71
|
return document_ingestion_job()
|
|
76
|
-
if self is JobNames.PARSE_RAW_FILE_JOB:
|
|
77
|
-
return parse_raw_file_job()
|
|
78
|
-
if self is JobNames.LLAMA_PARSE_TRANSFORM_JOB:
|
|
79
|
-
return llama_parse_transform_job()
|
|
80
72
|
if self is JobNames.METADATA_UPDATE_JOB:
|
|
81
73
|
return metadata_update_job()
|
|
82
74
|
if self is JobNames.PARSE_RAW_FILE_JOB_CACHED:
|
|
@@ -87,3 +79,7 @@ class JobNames(str, enum.Enum):
|
|
|
87
79
|
return extract_job()
|
|
88
80
|
if self is JobNames.ASYNCIO_TEST_JOB:
|
|
89
81
|
return asyncio_test_job()
|
|
82
|
+
if self is JobNames.PARSE_RAW_FILE_JOB:
|
|
83
|
+
return parse_raw_file_job()
|
|
84
|
+
if self is JobNames.LLAMA_PARSE_TRANSFORM_JOB:
|
|
85
|
+
return llama_parse_transform_job()
|
|
@@ -6,10 +6,20 @@ import typing
|
|
|
6
6
|
|
|
7
7
|
import typing_extensions
|
|
8
8
|
|
|
9
|
+
from .audio_block import AudioBlock
|
|
9
10
|
from .image_block import ImageBlock
|
|
10
11
|
from .text_block import TextBlock
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
class LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Audio(AudioBlock):
|
|
15
|
+
block_type: typing_extensions.Literal["audio"]
|
|
16
|
+
|
|
17
|
+
class Config:
|
|
18
|
+
frozen = True
|
|
19
|
+
smart_union = True
|
|
20
|
+
allow_population_by_field_name = True
|
|
21
|
+
|
|
22
|
+
|
|
13
23
|
class LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image(ImageBlock):
|
|
14
24
|
block_type: typing_extensions.Literal["image"]
|
|
15
25
|
|
|
@@ -29,5 +39,7 @@ class LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Text(TextBlock):
|
|
|
29
39
|
|
|
30
40
|
|
|
31
41
|
LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem = typing.Union[
|
|
32
|
-
|
|
42
|
+
LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Audio,
|
|
43
|
+
LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Image,
|
|
44
|
+
LlamaIndexCoreBaseLlmsTypesChatMessageBlocksItem_Text,
|
|
33
45
|
]
|
|
@@ -4,6 +4,7 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .fail_page_mode import FailPageMode
|
|
7
8
|
from .parser_languages import ParserLanguages
|
|
8
9
|
from .parsing_mode import ParsingMode
|
|
9
10
|
|
|
@@ -94,6 +95,7 @@ class LlamaParseParameters(pydantic.BaseModel):
|
|
|
94
95
|
strict_mode_image_ocr: typing.Optional[bool]
|
|
95
96
|
strict_mode_reconstruction: typing.Optional[bool]
|
|
96
97
|
strict_mode_buggy_font: typing.Optional[bool]
|
|
98
|
+
save_images: typing.Optional[bool]
|
|
97
99
|
ignore_document_elements_for_layout_detection: typing.Optional[bool]
|
|
98
100
|
output_tables_as_html: typing.Optional[bool] = pydantic.Field(alias="output_tables_as_HTML")
|
|
99
101
|
internal_is_screenshot_job: typing.Optional[bool]
|
|
@@ -101,6 +103,11 @@ class LlamaParseParameters(pydantic.BaseModel):
|
|
|
101
103
|
system_prompt: typing.Optional[str]
|
|
102
104
|
system_prompt_append: typing.Optional[str]
|
|
103
105
|
user_prompt: typing.Optional[str]
|
|
106
|
+
page_error_tolerance: typing.Optional[float]
|
|
107
|
+
replace_failed_page_mode: typing.Optional[FailPageMode]
|
|
108
|
+
replace_failed_page_with_error_message_prefix: typing.Optional[str]
|
|
109
|
+
replace_failed_page_with_error_message_suffix: typing.Optional[str]
|
|
110
|
+
markdown_table_multiline_header_separator: typing.Optional[str]
|
|
104
111
|
|
|
105
112
|
def json(self, **kwargs: typing.Any) -> str:
|
|
106
113
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -24,6 +24,7 @@ class MarkdownNodeParser(pydantic.BaseModel):
|
|
|
24
24
|
Args:
|
|
25
25
|
include_metadata (bool): whether to include metadata in nodes
|
|
26
26
|
include_prev_next_rel (bool): whether to include prev/next relationships
|
|
27
|
+
header_path_separator (str): separator char used for section header path metadata
|
|
27
28
|
"""
|
|
28
29
|
|
|
29
30
|
include_metadata: typing.Optional[bool] = pydantic.Field(
|
|
@@ -32,6 +33,9 @@ class MarkdownNodeParser(pydantic.BaseModel):
|
|
|
32
33
|
include_prev_next_rel: typing.Optional[bool] = pydantic.Field(description="Include prev/next node relationships.")
|
|
33
34
|
callback_manager: typing.Optional[typing.Any]
|
|
34
35
|
id_func: typing.Optional[str]
|
|
36
|
+
header_path_separator: typing.Optional[str] = pydantic.Field(
|
|
37
|
+
description="Separator char used for section header path metadata."
|
|
38
|
+
)
|
|
35
39
|
class_name: typing.Optional[str]
|
|
36
40
|
|
|
37
41
|
def json(self, **kwargs: typing.Any) -> str:
|
|
@@ -12,6 +12,7 @@ class MessageRole(str, enum.Enum):
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
SYSTEM = "system"
|
|
15
|
+
DEVELOPER = "developer"
|
|
15
16
|
USER = "user"
|
|
16
17
|
ASSISTANT = "assistant"
|
|
17
18
|
FUNCTION = "function"
|
|
@@ -22,6 +23,7 @@ class MessageRole(str, enum.Enum):
|
|
|
22
23
|
def visit(
|
|
23
24
|
self,
|
|
24
25
|
system: typing.Callable[[], T_Result],
|
|
26
|
+
developer: typing.Callable[[], T_Result],
|
|
25
27
|
user: typing.Callable[[], T_Result],
|
|
26
28
|
assistant: typing.Callable[[], T_Result],
|
|
27
29
|
function: typing.Callable[[], T_Result],
|
|
@@ -31,6 +33,8 @@ class MessageRole(str, enum.Enum):
|
|
|
31
33
|
) -> T_Result:
|
|
32
34
|
if self is MessageRole.SYSTEM:
|
|
33
35
|
return system()
|
|
36
|
+
if self is MessageRole.DEVELOPER:
|
|
37
|
+
return developer()
|
|
34
38
|
if self is MessageRole.USER:
|
|
35
39
|
return user()
|
|
36
40
|
if self is MessageRole.ASSISTANT:
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
T_Result = typing.TypeVar("T_Result")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PgVectorDistanceMethod(str, enum.Enum):
|
|
10
|
+
"""
|
|
11
|
+
Distance methods for PGVector.
|
|
12
|
+
Docs:
|
|
13
|
+
https://github.com/pgvector/pgvector?tab=readme-ov-file#query-options
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
L_2 = "l2"
|
|
17
|
+
IP = "ip"
|
|
18
|
+
COSINE = "cosine"
|
|
19
|
+
L_1 = "l1"
|
|
20
|
+
HAMMING = "hamming"
|
|
21
|
+
JACCARD = "jaccard"
|
|
22
|
+
|
|
23
|
+
def visit(
|
|
24
|
+
self,
|
|
25
|
+
l_2: typing.Callable[[], T_Result],
|
|
26
|
+
ip: typing.Callable[[], T_Result],
|
|
27
|
+
cosine: typing.Callable[[], T_Result],
|
|
28
|
+
l_1: typing.Callable[[], T_Result],
|
|
29
|
+
hamming: typing.Callable[[], T_Result],
|
|
30
|
+
jaccard: typing.Callable[[], T_Result],
|
|
31
|
+
) -> T_Result:
|
|
32
|
+
if self is PgVectorDistanceMethod.L_2:
|
|
33
|
+
return l_2()
|
|
34
|
+
if self is PgVectorDistanceMethod.IP:
|
|
35
|
+
return ip()
|
|
36
|
+
if self is PgVectorDistanceMethod.COSINE:
|
|
37
|
+
return cosine()
|
|
38
|
+
if self is PgVectorDistanceMethod.L_1:
|
|
39
|
+
return l_1()
|
|
40
|
+
if self is PgVectorDistanceMethod.HAMMING:
|
|
41
|
+
return hamming()
|
|
42
|
+
if self is PgVectorDistanceMethod.JACCARD:
|
|
43
|
+
return jaccard()
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .pg_vector_distance_method import PgVectorDistanceMethod
|
|
8
|
+
from .pg_vector_vector_type import PgVectorVectorType
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import pydantic
|
|
12
|
+
if pydantic.__version__.startswith("1."):
|
|
13
|
+
raise ImportError
|
|
14
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
15
|
+
except ImportError:
|
|
16
|
+
import pydantic # type: ignore
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PgVectorHnswSettings(pydantic.BaseModel):
|
|
20
|
+
"""
|
|
21
|
+
HNSW settings for PGVector.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
ef_construction: typing.Optional[int] = pydantic.Field(
|
|
25
|
+
description="The number of edges to use during the construction phase."
|
|
26
|
+
)
|
|
27
|
+
ef_search: typing.Optional[int] = pydantic.Field(description="The number of edges to use during the search phase.")
|
|
28
|
+
m: typing.Optional[int] = pydantic.Field(
|
|
29
|
+
description="The number of bi-directional links created for each new element."
|
|
30
|
+
)
|
|
31
|
+
vector_type: typing.Optional[PgVectorVectorType] = pydantic.Field(description="The type of vector to use.")
|
|
32
|
+
distance_method: typing.Optional[PgVectorDistanceMethod] = pydantic.Field(description="The distance method to use.")
|
|
33
|
+
|
|
34
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
35
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
36
|
+
return super().json(**kwargs_with_defaults)
|
|
37
|
+
|
|
38
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
39
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
40
|
+
return super().dict(**kwargs_with_defaults)
|
|
41
|
+
|
|
42
|
+
class Config:
|
|
43
|
+
frozen = True
|
|
44
|
+
smart_union = True
|
|
45
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
T_Result = typing.TypeVar("T_Result")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PgVectorVectorType(str, enum.Enum):
|
|
10
|
+
"""
|
|
11
|
+
Vector storage formats for PGVector.
|
|
12
|
+
Docs:
|
|
13
|
+
https://github.com/pgvector/pgvector?tab=readme-ov-file#query-options
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
VECTOR = "vector"
|
|
17
|
+
HALF_VEC = "half_vec"
|
|
18
|
+
BIT = "bit"
|
|
19
|
+
SPARSE_VEC = "sparse_vec"
|
|
20
|
+
|
|
21
|
+
def visit(
|
|
22
|
+
self,
|
|
23
|
+
vector: typing.Callable[[], T_Result],
|
|
24
|
+
half_vec: typing.Callable[[], T_Result],
|
|
25
|
+
bit: typing.Callable[[], T_Result],
|
|
26
|
+
sparse_vec: typing.Callable[[], T_Result],
|
|
27
|
+
) -> T_Result:
|
|
28
|
+
if self is PgVectorVectorType.VECTOR:
|
|
29
|
+
return vector()
|
|
30
|
+
if self is PgVectorVectorType.HALF_VEC:
|
|
31
|
+
return half_vec()
|
|
32
|
+
if self is PgVectorVectorType.BIT:
|
|
33
|
+
return bit()
|
|
34
|
+
if self is PgVectorVectorType.SPARSE_VEC:
|
|
35
|
+
return sparse_vec()
|
|
@@ -44,6 +44,7 @@ class PipelineCreate(pydantic.BaseModel):
|
|
|
44
44
|
llama_parse_parameters: typing.Optional[LlamaParseParameters] = pydantic.Field(
|
|
45
45
|
description="Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline."
|
|
46
46
|
)
|
|
47
|
+
status: typing.Optional[str]
|
|
47
48
|
name: str
|
|
48
49
|
pipeline_type: typing.Optional[PipelineType] = pydantic.Field(
|
|
49
50
|
description="Type of pipeline. Either PLAYGROUND or MANAGED."
|
|
@@ -7,6 +7,7 @@ from ..core.datetime_utils import serialize_datetime
|
|
|
7
7
|
from .configurable_data_source_names import ConfigurableDataSourceNames
|
|
8
8
|
from .pipeline_data_source_component import PipelineDataSourceComponent
|
|
9
9
|
from .pipeline_data_source_custom_metadata_value import PipelineDataSourceCustomMetadataValue
|
|
10
|
+
from .pipeline_data_source_status import PipelineDataSourceStatus
|
|
10
11
|
|
|
11
12
|
try:
|
|
12
13
|
import pydantic
|
|
@@ -36,6 +37,8 @@ class PipelineDataSource(pydantic.BaseModel):
|
|
|
36
37
|
last_synced_at: dt.datetime = pydantic.Field(description="The last time the data source was automatically synced.")
|
|
37
38
|
sync_interval: typing.Optional[float]
|
|
38
39
|
sync_schedule_set_by: typing.Optional[str]
|
|
40
|
+
status: typing.Optional[PipelineDataSourceStatus]
|
|
41
|
+
status_updated_at: typing.Optional[dt.datetime]
|
|
39
42
|
|
|
40
43
|
def json(self, **kwargs: typing.Any) -> str:
|
|
41
44
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
T_Result = typing.TypeVar("T_Result")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PipelineDataSourceStatus(str, enum.Enum):
|
|
10
|
+
NOT_STARTED = "NOT_STARTED"
|
|
11
|
+
IN_PROGRESS = "IN_PROGRESS"
|
|
12
|
+
SUCCESS = "SUCCESS"
|
|
13
|
+
ERROR = "ERROR"
|
|
14
|
+
CANCELLED = "CANCELLED"
|
|
15
|
+
|
|
16
|
+
def visit(
|
|
17
|
+
self,
|
|
18
|
+
not_started: typing.Callable[[], T_Result],
|
|
19
|
+
in_progress: typing.Callable[[], T_Result],
|
|
20
|
+
success: typing.Callable[[], T_Result],
|
|
21
|
+
error: typing.Callable[[], T_Result],
|
|
22
|
+
cancelled: typing.Callable[[], T_Result],
|
|
23
|
+
) -> T_Result:
|
|
24
|
+
if self is PipelineDataSourceStatus.NOT_STARTED:
|
|
25
|
+
return not_started()
|
|
26
|
+
if self is PipelineDataSourceStatus.IN_PROGRESS:
|
|
27
|
+
return in_progress()
|
|
28
|
+
if self is PipelineDataSourceStatus.SUCCESS:
|
|
29
|
+
return success()
|
|
30
|
+
if self is PipelineDataSourceStatus.ERROR:
|
|
31
|
+
return error()
|
|
32
|
+
if self is PipelineDataSourceStatus.CANCELLED:
|
|
33
|
+
return cancelled()
|
|
@@ -42,6 +42,7 @@ class PipelineFile(pydantic.BaseModel):
|
|
|
42
42
|
config_hash: typing.Optional[typing.Dict[str, typing.Optional[PipelineFileConfigHashValue]]]
|
|
43
43
|
indexed_page_count: typing.Optional[int]
|
|
44
44
|
status: typing.Optional[PipelineFileStatus]
|
|
45
|
+
status_updated_at: typing.Optional[dt.datetime]
|
|
45
46
|
|
|
46
47
|
def json(self, **kwargs: typing.Any) -> str:
|
|
47
48
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
llama_cloud/types/prompt_conf.py
CHANGED
|
@@ -19,6 +19,9 @@ class PromptConf(pydantic.BaseModel):
|
|
|
19
19
|
extraction_prompt: typing.Optional[str] = pydantic.Field(description="The prompt to use for the extraction.")
|
|
20
20
|
error_handling_prompt: typing.Optional[str] = pydantic.Field(description="The prompt to use for error handling.")
|
|
21
21
|
reasoning_prompt: typing.Optional[str] = pydantic.Field(description="The prompt to use for reasoning.")
|
|
22
|
+
cite_sources_prompt: typing.Optional[typing.Dict[str, str]] = pydantic.Field(
|
|
23
|
+
description="The prompt to use for citing sources."
|
|
24
|
+
)
|
|
22
25
|
|
|
23
26
|
def json(self, **kwargs: typing.Any) -> str:
|
|
24
27
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -36,7 +36,10 @@ class StructParseConf(pydantic.BaseModel):
|
|
|
36
36
|
description="Whether to handle missing fields in the schema."
|
|
37
37
|
)
|
|
38
38
|
use_reasoning: typing.Optional[bool] = pydantic.Field(
|
|
39
|
-
description="Whether to use reasoning for the structured
|
|
39
|
+
description="Whether to use reasoning for the structured extraction."
|
|
40
|
+
)
|
|
41
|
+
cite_sources: typing.Optional[bool] = pydantic.Field(
|
|
42
|
+
description="Whether to cite sources for the structured extraction."
|
|
40
43
|
)
|
|
41
44
|
prompt_conf: typing.Optional[PromptConf] = pydantic.Field(
|
|
42
45
|
description="The prompt configuration for the structured parsing."
|