llama-cloud 0.1.19__py3-none-any.whl → 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +44 -26
- llama_cloud/resources/files/client.py +18 -4
- llama_cloud/resources/parsing/client.py +8 -0
- llama_cloud/resources/pipelines/client.py +25 -11
- llama_cloud/types/__init__.py +46 -26
- llama_cloud/types/{base_prompt_template.py → data_source_update_dispatcher_config.py} +9 -7
- llama_cloud/types/{node_parser.py → delete_params.py} +7 -9
- llama_cloud/types/document_ingestion_job_params.py +43 -0
- llama_cloud/types/extract_config.py +3 -0
- llama_cloud/types/job_record.py +2 -2
- llama_cloud/types/job_record_parameters.py +111 -0
- llama_cloud/types/{page_splitter_node_parser.py → l_lama_parse_transform_config.py} +5 -10
- llama_cloud/types/legacy_parse_job_config.py +189 -0
- llama_cloud/types/llama_parse_parameters.py +1 -0
- llama_cloud/types/load_files_job_config.py +35 -0
- llama_cloud/types/parse_job_config.py +134 -0
- llama_cloud/types/pipeline.py +4 -4
- llama_cloud/types/pipeline_create.py +2 -2
- llama_cloud/types/pipeline_file_update_dispatcher_config.py +38 -0
- llama_cloud/types/{configured_transformation_item.py → pipeline_file_updater_config.py} +13 -12
- llama_cloud/types/pipeline_managed_ingestion_job_params.py +37 -0
- llama_cloud/types/pipeline_metadata_config.py +36 -0
- llama_cloud/types/pipeline_status.py +17 -0
- llama_cloud/types/prompt_conf.py +1 -0
- llama_cloud/types/supported_llm_model.py +1 -2
- {llama_cloud-0.1.19.dist-info → llama_cloud-0.1.21.dist-info}/METADATA +6 -2
- {llama_cloud-0.1.19.dist-info → llama_cloud-0.1.21.dist-info}/RECORD +29 -29
- {llama_cloud-0.1.19.dist-info → llama_cloud-0.1.21.dist-info}/WHEEL +1 -1
- llama_cloud/types/character_splitter.py +0 -46
- llama_cloud/types/code_splitter.py +0 -50
- llama_cloud/types/configured_transformation_item_component.py +0 -22
- llama_cloud/types/llm.py +0 -60
- llama_cloud/types/markdown_element_node_parser.py +0 -51
- llama_cloud/types/markdown_node_parser.py +0 -52
- llama_cloud/types/pydantic_program_mode.py +0 -41
- llama_cloud/types/sentence_splitter.py +0 -50
- llama_cloud/types/token_text_splitter.py +0 -50
- {llama_cloud-0.1.19.dist-info → llama_cloud-0.1.21.dist-info}/LICENSE +0 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LegacyParseJobConfig(pydantic.BaseModel):
|
|
18
|
+
"""
|
|
19
|
+
Configuration for llamaparse job
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
custom_metadata: typing.Optional[typing.Dict[str, typing.Any]]
|
|
23
|
+
resource_info: typing.Optional[typing.Dict[str, typing.Any]]
|
|
24
|
+
user_id: str = pydantic.Field(alias="userId", description="The user ID.")
|
|
25
|
+
file_name: str = pydantic.Field(alias="fileName", description="The file name.")
|
|
26
|
+
original_file_name: str = pydantic.Field(alias="originalFileName", description="The original file name.")
|
|
27
|
+
file_key: str = pydantic.Field(alias="fileKey", description="The file key.")
|
|
28
|
+
input_url: typing.Optional[str] = pydantic.Field(alias="inputUrl")
|
|
29
|
+
http_proxy: typing.Optional[str] = pydantic.Field(alias="httpProxy")
|
|
30
|
+
fast_mode: typing.Optional[bool] = pydantic.Field(alias="fastMode")
|
|
31
|
+
lang: str = pydantic.Field(description="The language.")
|
|
32
|
+
template: typing.Optional[str] = pydantic.Field(description="The parsing instruction.")
|
|
33
|
+
pipeline_id: typing.Optional[str] = pydantic.Field(alias="pipelineId")
|
|
34
|
+
output_bucket: typing.Optional[str] = pydantic.Field(alias="outputBucket")
|
|
35
|
+
file_id: typing.Optional[str] = pydantic.Field(alias="fileId")
|
|
36
|
+
full_file_path: typing.Optional[str] = pydantic.Field(alias="fullFilePath")
|
|
37
|
+
from_l_lama_cloud: typing.Optional[bool] = pydantic.Field(
|
|
38
|
+
alias="fromLLamaCloud", description="Whether the file is from LLama cloud."
|
|
39
|
+
)
|
|
40
|
+
skip_diagonal_text: typing.Optional[bool] = pydantic.Field(
|
|
41
|
+
alias="skipDiagonalText", description="Whether to skip diagonal text."
|
|
42
|
+
)
|
|
43
|
+
preserve_layout_alignment_across_pages: typing.Optional[bool] = pydantic.Field(
|
|
44
|
+
alias="preserveLayoutAlignmentAcrossPages", description="Whether to preserve layout alignment across pages."
|
|
45
|
+
)
|
|
46
|
+
invalidate_cache: bool = pydantic.Field(alias="invalidateCache", description="Whether to invalidate the cache.")
|
|
47
|
+
output_pdf_of_document: typing.Optional[bool] = pydantic.Field(alias="outputPDFOfDocument")
|
|
48
|
+
save_images: typing.Optional[bool] = pydantic.Field(alias="saveImages")
|
|
49
|
+
gpt_4_o: typing.Optional[bool] = pydantic.Field(alias="gpt4o", description="Whether to use GPT4o.")
|
|
50
|
+
open_aiapi_key: str = pydantic.Field(alias="openAIAPIKey", description="The OpenAI API key.")
|
|
51
|
+
do_not_unroll_columns: typing.Optional[bool] = pydantic.Field(
|
|
52
|
+
alias="doNotUnrollColumns", description="Whether to unroll columns."
|
|
53
|
+
)
|
|
54
|
+
spread_sheet_extract_sub_tables: typing.Optional[bool] = pydantic.Field(alias="spreadSheetExtractSubTables")
|
|
55
|
+
extract_layout: typing.Optional[bool] = pydantic.Field(alias="extractLayout")
|
|
56
|
+
html_make_all_elements_visible: typing.Optional[bool] = pydantic.Field(alias="htmlMakeAllElementsVisible")
|
|
57
|
+
html_remove_fixed_elements: typing.Optional[bool] = pydantic.Field(alias="htmlRemoveFixedElements")
|
|
58
|
+
html_remove_navigation_elements: typing.Optional[bool] = pydantic.Field(alias="htmlRemoveNavigationElements")
|
|
59
|
+
guess_xlsx_sheet_name: typing.Optional[bool] = pydantic.Field(
|
|
60
|
+
alias="guessXLSXSheetName", description="Whether to guess the XLSX sheet name when generation output xlsx."
|
|
61
|
+
)
|
|
62
|
+
do_not_cache: typing.Optional[bool] = pydantic.Field(alias="doNotCache", description="Whether to cache.")
|
|
63
|
+
page_separator: typing.Optional[str] = pydantic.Field(alias="pageSeparator")
|
|
64
|
+
bounding_box: typing.Optional[str] = pydantic.Field(alias="boundingBox")
|
|
65
|
+
bbox_top: typing.Optional[float] = pydantic.Field(alias="bboxTop")
|
|
66
|
+
bbox_right: typing.Optional[float] = pydantic.Field(alias="bboxRight")
|
|
67
|
+
bbox_bottom: typing.Optional[float] = pydantic.Field(alias="bboxBottom")
|
|
68
|
+
bbox_left: typing.Optional[float] = pydantic.Field(alias="bboxLeft")
|
|
69
|
+
disable_reconstruction: typing.Optional[bool] = pydantic.Field(alias="disableReconstruction")
|
|
70
|
+
target_pages: typing.Optional[str] = pydantic.Field(alias="targetPages")
|
|
71
|
+
multimodal_pipeline: typing.Optional[bool] = pydantic.Field(alias="multimodalPipeline")
|
|
72
|
+
multimodal_model: typing.Optional[str] = pydantic.Field(alias="multimodalModel")
|
|
73
|
+
model: typing.Optional[str]
|
|
74
|
+
vendor_api_key: typing.Optional[str] = pydantic.Field(alias="vendorAPIKey")
|
|
75
|
+
page_prefix: typing.Optional[str] = pydantic.Field(alias="pagePrefix")
|
|
76
|
+
page_suffix: typing.Optional[str] = pydantic.Field(alias="pageSuffix")
|
|
77
|
+
webhook_url: typing.Optional[str] = pydantic.Field(alias="webhookUrl")
|
|
78
|
+
preset: typing.Optional[str]
|
|
79
|
+
take_screenshot: typing.Optional[bool] = pydantic.Field(
|
|
80
|
+
alias="takeScreenshot", description="Force to capture an image of each pages"
|
|
81
|
+
)
|
|
82
|
+
is_formatting_instruction: typing.Optional[bool] = pydantic.Field(
|
|
83
|
+
alias="isFormattingInstruction", description="Allow the parsing instruction to also format the output."
|
|
84
|
+
)
|
|
85
|
+
premium_mode: typing.Optional[bool] = pydantic.Field(
|
|
86
|
+
alias="premiumMode", description="Whether to use premiumMode pipeline."
|
|
87
|
+
)
|
|
88
|
+
continuous_mode: typing.Optional[bool] = pydantic.Field(
|
|
89
|
+
alias="continuousMode", description="Whether to use continuousMode pipeline."
|
|
90
|
+
)
|
|
91
|
+
disable_ocr: typing.Optional[bool] = pydantic.Field(
|
|
92
|
+
alias="disableOcr",
|
|
93
|
+
description="Disable the OCR on the document. LlamaParse will only extract the copyable text from the document",
|
|
94
|
+
)
|
|
95
|
+
disable_image_extraction: typing.Optional[bool] = pydantic.Field(
|
|
96
|
+
alias="disableImageExtraction",
|
|
97
|
+
description="Disable the image extraction from the document. LlamaParse will not extract any image from the document.",
|
|
98
|
+
)
|
|
99
|
+
annotate_links: typing.Optional[bool] = pydantic.Field(
|
|
100
|
+
alias="annotateLinks",
|
|
101
|
+
description="Annotate links in markdown. LlamaParse will try to add links from document into the markdown.",
|
|
102
|
+
)
|
|
103
|
+
adaptive_long_table: typing.Optional[bool] = pydantic.Field(
|
|
104
|
+
alias="adaptiveLongTable",
|
|
105
|
+
description="Adaptive long table. LlamaParse will try to detect long table and adapt the output.",
|
|
106
|
+
)
|
|
107
|
+
compact_markdown_table: typing.Optional[bool] = pydantic.Field(
|
|
108
|
+
alias="compactMarkdownTable",
|
|
109
|
+
description="Compact markdown table. LlamaParse will compact the markdown table to not include too many spaces.",
|
|
110
|
+
)
|
|
111
|
+
input_s_3_path: typing.Optional[str] = pydantic.Field(alias="inputS3Path")
|
|
112
|
+
input_s_3_region: typing.Optional[str] = pydantic.Field(alias="inputS3Region")
|
|
113
|
+
output_s_3_path_prefix: typing.Optional[str] = pydantic.Field(alias="outputS3PathPrefix")
|
|
114
|
+
output_s_3_region: typing.Optional[str] = pydantic.Field(alias="outputS3Region")
|
|
115
|
+
project_id: typing.Optional[str] = pydantic.Field(alias="projectId")
|
|
116
|
+
azure_open_ai_deployment_name: typing.Optional[str] = pydantic.Field(alias="azureOpenAiDeploymentName")
|
|
117
|
+
azure_open_ai_endpoint: typing.Optional[str] = pydantic.Field(alias="azureOpenAiEndpoint")
|
|
118
|
+
azure_open_ai_api_version: typing.Optional[str] = pydantic.Field(alias="azureOpenAiApiVersion")
|
|
119
|
+
azure_open_ai_key: typing.Optional[str] = pydantic.Field(alias="azureOpenAiKey")
|
|
120
|
+
auto_mode: typing.Optional[bool] = pydantic.Field(alias="autoMode", description="Whether to use auto mode.")
|
|
121
|
+
auto_mode_trigger_on_table_in_page: typing.Optional[bool] = pydantic.Field(
|
|
122
|
+
alias="autoModeTriggerOnTableInPage", description="Whether to trigger on table in page."
|
|
123
|
+
)
|
|
124
|
+
auto_mode_trigger_on_image_in_page: typing.Optional[bool] = pydantic.Field(
|
|
125
|
+
alias="autoModeTriggerOnImageInPage", description="Whether to trigger on image in page."
|
|
126
|
+
)
|
|
127
|
+
auto_mode_trigger_on_regexp_in_page: typing.Optional[str] = pydantic.Field(alias="autoModeTriggerOnRegexpInPage")
|
|
128
|
+
auto_mode_trigger_on_text_in_page: typing.Optional[str] = pydantic.Field(alias="autoModeTriggerOnTextInPage")
|
|
129
|
+
auto_mode_configuration_json: typing.Optional[str] = pydantic.Field(alias="autoModeConfigurationJSON")
|
|
130
|
+
structured_output: typing.Optional[bool] = pydantic.Field(
|
|
131
|
+
alias="structuredOutput", description="Whether to use structured output."
|
|
132
|
+
)
|
|
133
|
+
structured_output_json_schema: typing.Optional[str] = pydantic.Field(alias="structuredOutputJSONSchema")
|
|
134
|
+
structured_output_json_schema_name: typing.Optional[str] = pydantic.Field(alias="structuredOutputJSONSchemaName")
|
|
135
|
+
max_pages: typing.Optional[int] = pydantic.Field(alias="maxPages")
|
|
136
|
+
extract_charts: typing.Optional[bool] = pydantic.Field(
|
|
137
|
+
alias="extractCharts", description="Extract charts from the document."
|
|
138
|
+
)
|
|
139
|
+
formatting_instruction: typing.Optional[str] = pydantic.Field(alias="formattingInstruction")
|
|
140
|
+
complemental_formatting_instruction: typing.Optional[str] = pydantic.Field(
|
|
141
|
+
alias="complementalFormattingInstruction"
|
|
142
|
+
)
|
|
143
|
+
content_guideline_instruction: typing.Optional[str] = pydantic.Field(alias="contentGuidelineInstruction")
|
|
144
|
+
job_timeout_in_seconds: typing.Optional[float] = pydantic.Field(alias="jobTimeoutInSeconds")
|
|
145
|
+
job_timeout_extra_time_per_page_in_seconds: typing.Optional[float] = pydantic.Field(
|
|
146
|
+
alias="jobTimeoutExtraTimePerPageInSeconds"
|
|
147
|
+
)
|
|
148
|
+
strict_mode_image_extraction: typing.Optional[bool] = pydantic.Field(
|
|
149
|
+
alias="strictModeImageExtraction",
|
|
150
|
+
description="If true, the job will fail when we are not able to extract an image from a document.",
|
|
151
|
+
)
|
|
152
|
+
strict_mode_image_ocr: typing.Optional[bool] = pydantic.Field(
|
|
153
|
+
alias="strictModeImageOCR",
|
|
154
|
+
description="If true, the job will fail when we are not able to OCR an image from a document.",
|
|
155
|
+
)
|
|
156
|
+
strict_mode_reconstruction: typing.Optional[bool] = pydantic.Field(
|
|
157
|
+
alias="strictModeReconstruction",
|
|
158
|
+
description="If true, the job will fail when we are not able to transform a page to Markdown in a document.",
|
|
159
|
+
)
|
|
160
|
+
strict_mode_buggy_font: typing.Optional[bool] = pydantic.Field(
|
|
161
|
+
alias="strictModeBuggyFont",
|
|
162
|
+
description="If true, the job will fail when we are not able to extract a glyph from the document due to buggy font.",
|
|
163
|
+
)
|
|
164
|
+
ignore_document_elements_for_layout_detection: typing.Optional[bool] = pydantic.Field(
|
|
165
|
+
alias="ignoreDocumentElementsForLayoutDetection",
|
|
166
|
+
description="If true, the job will ignore document element for layout detection, and instead just rely on a visual model, only apply to layout detection.",
|
|
167
|
+
)
|
|
168
|
+
output_tables_as_html: typing.Optional[bool] = pydantic.Field(
|
|
169
|
+
alias="outputTablesAsHTML",
|
|
170
|
+
description="If true, the job will output tables as HTML in the markdown output, useful for merged cells.",
|
|
171
|
+
)
|
|
172
|
+
parse_mode: typing.Optional[str] = pydantic.Field(alias="parseMode")
|
|
173
|
+
system_prompt: typing.Optional[str] = pydantic.Field(alias="systemPrompt")
|
|
174
|
+
system_prompt_append: typing.Optional[str] = pydantic.Field(alias="systemPromptAppend")
|
|
175
|
+
user_prompt: typing.Optional[str] = pydantic.Field(alias="userPrompt")
|
|
176
|
+
|
|
177
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
178
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
179
|
+
return super().json(**kwargs_with_defaults)
|
|
180
|
+
|
|
181
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
182
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
183
|
+
return super().dict(**kwargs_with_defaults)
|
|
184
|
+
|
|
185
|
+
class Config:
|
|
186
|
+
frozen = True
|
|
187
|
+
smart_union = True
|
|
188
|
+
allow_population_by_field_name = True
|
|
189
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -79,6 +79,7 @@ class LlamaParseParameters(pydantic.BaseModel):
|
|
|
79
79
|
auto_mode_trigger_on_text_in_page: typing.Optional[str]
|
|
80
80
|
auto_mode_trigger_on_table_in_page: typing.Optional[bool]
|
|
81
81
|
auto_mode_trigger_on_image_in_page: typing.Optional[bool]
|
|
82
|
+
auto_mode_configuration_json: typing.Optional[str]
|
|
82
83
|
structured_output: typing.Optional[bool]
|
|
83
84
|
structured_output_json_schema: typing.Optional[str]
|
|
84
85
|
structured_output_json_schema_name: typing.Optional[str]
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LoadFilesJobConfig(pydantic.BaseModel):
|
|
18
|
+
"""
|
|
19
|
+
Schema for the parameters of a load files job.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
file_ids: typing.Optional[typing.List[str]]
|
|
23
|
+
|
|
24
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
25
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
26
|
+
return super().json(**kwargs_with_defaults)
|
|
27
|
+
|
|
28
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
29
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
30
|
+
return super().dict(**kwargs_with_defaults)
|
|
31
|
+
|
|
32
|
+
class Config:
|
|
33
|
+
frozen = True
|
|
34
|
+
smart_union = True
|
|
35
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .fail_page_mode import FailPageMode
|
|
8
|
+
from .parser_languages import ParserLanguages
|
|
9
|
+
from .parsing_mode import ParsingMode
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import pydantic
|
|
13
|
+
if pydantic.__version__.startswith("1."):
|
|
14
|
+
raise ImportError
|
|
15
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
16
|
+
except ImportError:
|
|
17
|
+
import pydantic # type: ignore
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ParseJobConfig(pydantic.BaseModel):
|
|
21
|
+
"""
|
|
22
|
+
Configuration for llamaparse job
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
custom_metadata: typing.Optional[typing.Dict[str, typing.Any]]
|
|
26
|
+
resource_info: typing.Optional[typing.Dict[str, typing.Any]]
|
|
27
|
+
languages: typing.Optional[typing.List[ParserLanguages]]
|
|
28
|
+
parsing_instruction: typing.Optional[str]
|
|
29
|
+
disable_ocr: typing.Optional[bool]
|
|
30
|
+
annotate_links: typing.Optional[bool]
|
|
31
|
+
adaptive_long_table: typing.Optional[bool]
|
|
32
|
+
compact_markdown_table: typing.Optional[bool]
|
|
33
|
+
disable_reconstruction: typing.Optional[bool]
|
|
34
|
+
disable_image_extraction: typing.Optional[bool]
|
|
35
|
+
invalidate_cache: typing.Optional[bool]
|
|
36
|
+
output_pdf_of_document: typing.Optional[bool]
|
|
37
|
+
do_not_cache: typing.Optional[bool]
|
|
38
|
+
fast_mode: typing.Optional[bool]
|
|
39
|
+
skip_diagonal_text: typing.Optional[bool]
|
|
40
|
+
preserve_layout_alignment_across_pages: typing.Optional[bool]
|
|
41
|
+
gpt_4_o_mode: typing.Optional[bool] = pydantic.Field(alias="gpt4o_mode")
|
|
42
|
+
gpt_4_o_api_key: typing.Optional[str] = pydantic.Field(alias="gpt4o_api_key")
|
|
43
|
+
do_not_unroll_columns: typing.Optional[bool]
|
|
44
|
+
extract_layout: typing.Optional[bool]
|
|
45
|
+
html_make_all_elements_visible: typing.Optional[bool]
|
|
46
|
+
html_remove_navigation_elements: typing.Optional[bool]
|
|
47
|
+
html_remove_fixed_elements: typing.Optional[bool]
|
|
48
|
+
guess_xlsx_sheet_name: typing.Optional[bool]
|
|
49
|
+
page_separator: typing.Optional[str]
|
|
50
|
+
bounding_box: typing.Optional[str]
|
|
51
|
+
bbox_top: typing.Optional[float]
|
|
52
|
+
bbox_right: typing.Optional[float]
|
|
53
|
+
bbox_bottom: typing.Optional[float]
|
|
54
|
+
bbox_left: typing.Optional[float]
|
|
55
|
+
target_pages: typing.Optional[str]
|
|
56
|
+
use_vendor_multimodal_model: typing.Optional[bool]
|
|
57
|
+
vendor_multimodal_model_name: typing.Optional[str]
|
|
58
|
+
model: typing.Optional[str]
|
|
59
|
+
vendor_multimodal_api_key: typing.Optional[str]
|
|
60
|
+
page_prefix: typing.Optional[str]
|
|
61
|
+
page_suffix: typing.Optional[str]
|
|
62
|
+
webhook_url: typing.Optional[str]
|
|
63
|
+
preset: typing.Optional[str]
|
|
64
|
+
take_screenshot: typing.Optional[bool]
|
|
65
|
+
is_formatting_instruction: typing.Optional[bool]
|
|
66
|
+
premium_mode: typing.Optional[bool]
|
|
67
|
+
continuous_mode: typing.Optional[bool]
|
|
68
|
+
input_s_3_path: typing.Optional[str] = pydantic.Field(alias="input_s3_path")
|
|
69
|
+
input_s_3_region: typing.Optional[str] = pydantic.Field(alias="input_s3_region")
|
|
70
|
+
output_s_3_path_prefix: typing.Optional[str] = pydantic.Field(alias="output_s3_path_prefix")
|
|
71
|
+
output_s_3_region: typing.Optional[str] = pydantic.Field(alias="output_s3_region")
|
|
72
|
+
project_id: typing.Optional[str]
|
|
73
|
+
azure_openai_deployment_name: typing.Optional[str]
|
|
74
|
+
azure_openai_endpoint: typing.Optional[str]
|
|
75
|
+
azure_openai_api_version: typing.Optional[str]
|
|
76
|
+
azure_openai_key: typing.Optional[str]
|
|
77
|
+
input_url: typing.Optional[str]
|
|
78
|
+
http_proxy: typing.Optional[str]
|
|
79
|
+
auto_mode: typing.Optional[bool]
|
|
80
|
+
auto_mode_trigger_on_regexp_in_page: typing.Optional[str]
|
|
81
|
+
auto_mode_trigger_on_text_in_page: typing.Optional[str]
|
|
82
|
+
auto_mode_trigger_on_table_in_page: typing.Optional[bool]
|
|
83
|
+
auto_mode_trigger_on_image_in_page: typing.Optional[bool]
|
|
84
|
+
auto_mode_configuration_json: typing.Optional[str]
|
|
85
|
+
structured_output: typing.Optional[bool]
|
|
86
|
+
structured_output_json_schema: typing.Optional[str]
|
|
87
|
+
structured_output_json_schema_name: typing.Optional[str]
|
|
88
|
+
max_pages: typing.Optional[int]
|
|
89
|
+
max_pages_enforced: typing.Optional[int]
|
|
90
|
+
extract_charts: typing.Optional[bool]
|
|
91
|
+
formatting_instruction: typing.Optional[str]
|
|
92
|
+
complemental_formatting_instruction: typing.Optional[str]
|
|
93
|
+
content_guideline_instruction: typing.Optional[str]
|
|
94
|
+
spreadsheet_extract_sub_tables: typing.Optional[bool]
|
|
95
|
+
job_timeout_in_seconds: typing.Optional[float]
|
|
96
|
+
job_timeout_extra_time_per_page_in_seconds: typing.Optional[float]
|
|
97
|
+
strict_mode_image_extraction: typing.Optional[bool]
|
|
98
|
+
strict_mode_image_ocr: typing.Optional[bool]
|
|
99
|
+
strict_mode_reconstruction: typing.Optional[bool]
|
|
100
|
+
strict_mode_buggy_font: typing.Optional[bool]
|
|
101
|
+
save_images: typing.Optional[bool]
|
|
102
|
+
ignore_document_elements_for_layout_detection: typing.Optional[bool]
|
|
103
|
+
output_tables_as_html: typing.Optional[bool] = pydantic.Field(alias="output_tables_as_HTML")
|
|
104
|
+
internal_is_screenshot_job: typing.Optional[bool]
|
|
105
|
+
parse_mode: typing.Optional[ParsingMode]
|
|
106
|
+
system_prompt: typing.Optional[str]
|
|
107
|
+
system_prompt_append: typing.Optional[str]
|
|
108
|
+
user_prompt: typing.Optional[str]
|
|
109
|
+
page_error_tolerance: typing.Optional[float]
|
|
110
|
+
replace_failed_page_mode: typing.Optional[FailPageMode]
|
|
111
|
+
replace_failed_page_with_error_message_prefix: typing.Optional[str]
|
|
112
|
+
replace_failed_page_with_error_message_suffix: typing.Optional[str]
|
|
113
|
+
markdown_table_multiline_header_separator: typing.Optional[str]
|
|
114
|
+
file_name: str = pydantic.Field(description="The file name.")
|
|
115
|
+
original_file_name: str = pydantic.Field(description="The original file name.")
|
|
116
|
+
file_key: str = pydantic.Field(description="The file key.")
|
|
117
|
+
lang: str = pydantic.Field(description="The language.")
|
|
118
|
+
output_bucket: typing.Optional[str] = pydantic.Field(alias="outputBucket")
|
|
119
|
+
file_id: typing.Optional[str]
|
|
120
|
+
pipeline_id: typing.Optional[str]
|
|
121
|
+
|
|
122
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
123
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
124
|
+
return super().json(**kwargs_with_defaults)
|
|
125
|
+
|
|
126
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
127
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
128
|
+
return super().dict(**kwargs_with_defaults)
|
|
129
|
+
|
|
130
|
+
class Config:
|
|
131
|
+
frozen = True
|
|
132
|
+
smart_union = True
|
|
133
|
+
allow_population_by_field_name = True
|
|
134
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
llama_cloud/types/pipeline.py
CHANGED
|
@@ -4,12 +4,13 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
-
from .configured_transformation_item import ConfiguredTransformationItem
|
|
8
7
|
from .data_sink import DataSink
|
|
9
8
|
from .eval_execution_params import EvalExecutionParams
|
|
10
9
|
from .llama_parse_parameters import LlamaParseParameters
|
|
11
10
|
from .pipeline_configuration_hashes import PipelineConfigurationHashes
|
|
12
11
|
from .pipeline_embedding_config import PipelineEmbeddingConfig
|
|
12
|
+
from .pipeline_metadata_config import PipelineMetadataConfig
|
|
13
|
+
from .pipeline_status import PipelineStatus
|
|
13
14
|
from .pipeline_transform_config import PipelineTransformConfig
|
|
14
15
|
from .pipeline_type import PipelineType
|
|
15
16
|
from .preset_retrieval_params import PresetRetrievalParams
|
|
@@ -39,9 +40,6 @@ class Pipeline(pydantic.BaseModel):
|
|
|
39
40
|
)
|
|
40
41
|
managed_pipeline_id: typing.Optional[str]
|
|
41
42
|
embedding_config: PipelineEmbeddingConfig
|
|
42
|
-
configured_transformations: typing.Optional[typing.List[ConfiguredTransformationItem]] = pydantic.Field(
|
|
43
|
-
description="Deprecated don't use it, List of configured transformations."
|
|
44
|
-
)
|
|
45
43
|
config_hash: typing.Optional[PipelineConfigurationHashes]
|
|
46
44
|
transform_config: typing.Optional[PipelineTransformConfig] = pydantic.Field(
|
|
47
45
|
description="Configuration for the transformation."
|
|
@@ -54,6 +52,8 @@ class Pipeline(pydantic.BaseModel):
|
|
|
54
52
|
)
|
|
55
53
|
llama_parse_parameters: typing.Optional[LlamaParseParameters]
|
|
56
54
|
data_sink: typing.Optional[DataSink]
|
|
55
|
+
status: typing.Optional[PipelineStatus]
|
|
56
|
+
metadata_config: typing.Optional[PipelineMetadataConfig]
|
|
57
57
|
|
|
58
58
|
def json(self, **kwargs: typing.Any) -> str:
|
|
59
59
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -4,12 +4,12 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
-
from .configured_transformation_item import ConfiguredTransformationItem
|
|
8
7
|
from .data_sink_create import DataSinkCreate
|
|
9
8
|
from .eval_execution_params import EvalExecutionParams
|
|
10
9
|
from .llama_parse_parameters import LlamaParseParameters
|
|
11
10
|
from .pipeline_create_embedding_config import PipelineCreateEmbeddingConfig
|
|
12
11
|
from .pipeline_create_transform_config import PipelineCreateTransformConfig
|
|
12
|
+
from .pipeline_metadata_config import PipelineMetadataConfig
|
|
13
13
|
from .pipeline_type import PipelineType
|
|
14
14
|
from .preset_retrieval_params import PresetRetrievalParams
|
|
15
15
|
|
|
@@ -31,7 +31,6 @@ class PipelineCreate(pydantic.BaseModel):
|
|
|
31
31
|
transform_config: typing.Optional[PipelineCreateTransformConfig] = pydantic.Field(
|
|
32
32
|
description="Configuration for the transformation."
|
|
33
33
|
)
|
|
34
|
-
configured_transformations: typing.Optional[typing.List[ConfiguredTransformationItem]]
|
|
35
34
|
data_sink_id: typing.Optional[str]
|
|
36
35
|
embedding_model_config_id: typing.Optional[str]
|
|
37
36
|
data_sink: typing.Optional[DataSinkCreate]
|
|
@@ -45,6 +44,7 @@ class PipelineCreate(pydantic.BaseModel):
|
|
|
45
44
|
description="Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline."
|
|
46
45
|
)
|
|
47
46
|
status: typing.Optional[str]
|
|
47
|
+
metadata_config: typing.Optional[PipelineMetadataConfig]
|
|
48
48
|
name: str
|
|
49
49
|
pipeline_type: typing.Optional[PipelineType] = pydantic.Field(
|
|
50
50
|
description="Type of pipeline. Either PLAYGROUND or MANAGED."
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .delete_params import DeleteParams
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pydantic
|
|
11
|
+
if pydantic.__version__.startswith("1."):
|
|
12
|
+
raise ImportError
|
|
13
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
14
|
+
except ImportError:
|
|
15
|
+
import pydantic # type: ignore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PipelineFileUpdateDispatcherConfig(pydantic.BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
Schema for the parameters of a load files job.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
pipeline_file_ids: typing.Optional[typing.List[str]]
|
|
24
|
+
should_delete: typing.Optional[bool]
|
|
25
|
+
delete_info: typing.Optional[DeleteParams]
|
|
26
|
+
|
|
27
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
28
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
29
|
+
return super().json(**kwargs_with_defaults)
|
|
30
|
+
|
|
31
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
32
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
33
|
+
return super().dict(**kwargs_with_defaults)
|
|
34
|
+
|
|
35
|
+
class Config:
|
|
36
|
+
frozen = True
|
|
37
|
+
smart_union = True
|
|
38
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -4,8 +4,7 @@ import datetime as dt
|
|
|
4
4
|
import typing
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
|
-
from .
|
|
8
|
-
from .configured_transformation_item_component import ConfiguredTransformationItemComponent
|
|
7
|
+
from .delete_params import DeleteParams
|
|
9
8
|
|
|
10
9
|
try:
|
|
11
10
|
import pydantic
|
|
@@ -16,20 +15,22 @@ except ImportError:
|
|
|
16
15
|
import pydantic # type: ignore
|
|
17
16
|
|
|
18
17
|
|
|
19
|
-
class
|
|
18
|
+
class PipelineFileUpdaterConfig(pydantic.BaseModel):
|
|
20
19
|
"""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
Similar to ConfigurableTransformation but includes a few
|
|
24
|
-
more fields that are useful to the platform.
|
|
20
|
+
Schema for the parameters of a load files job.
|
|
25
21
|
"""
|
|
26
22
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
23
|
+
custom_metadata: typing.Optional[typing.Dict[str, typing.Any]]
|
|
24
|
+
resource_info: typing.Optional[typing.Dict[str, typing.Any]]
|
|
25
|
+
should_delete: typing.Optional[bool]
|
|
26
|
+
should_parse: typing.Optional[bool]
|
|
27
|
+
delete_info: typing.Optional[DeleteParams]
|
|
28
|
+
is_new_file: typing.Optional[bool] = pydantic.Field(description="Whether the file is new")
|
|
29
|
+
data_source_project_file_changed: typing.Optional[bool] = pydantic.Field(
|
|
30
|
+
description="Whether the data source project file has changed"
|
|
30
31
|
)
|
|
31
|
-
|
|
32
|
-
description="
|
|
32
|
+
should_migrate_pipeline_file_to_external_file_id: typing.Optional[bool] = pydantic.Field(
|
|
33
|
+
description="Whether to migrate the pipeline file to the external file id"
|
|
33
34
|
)
|
|
34
35
|
|
|
35
36
|
def json(self, **kwargs: typing.Any) -> str:
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
from .delete_params import DeleteParams
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import pydantic
|
|
11
|
+
if pydantic.__version__.startswith("1."):
|
|
12
|
+
raise ImportError
|
|
13
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
14
|
+
except ImportError:
|
|
15
|
+
import pydantic # type: ignore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PipelineManagedIngestionJobParams(pydantic.BaseModel):
|
|
19
|
+
"""
|
|
20
|
+
Schema for the parameters of a managed pipeline ingestion job.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
should_delete: typing.Optional[bool]
|
|
24
|
+
delete_info: typing.Optional[DeleteParams]
|
|
25
|
+
|
|
26
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
27
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
28
|
+
return super().json(**kwargs_with_defaults)
|
|
29
|
+
|
|
30
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
31
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
32
|
+
return super().dict(**kwargs_with_defaults)
|
|
33
|
+
|
|
34
|
+
class Config:
|
|
35
|
+
frozen = True
|
|
36
|
+
smart_union = True
|
|
37
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
from ..core.datetime_utils import serialize_datetime
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import pydantic
|
|
10
|
+
if pydantic.__version__.startswith("1."):
|
|
11
|
+
raise ImportError
|
|
12
|
+
import pydantic.v1 as pydantic # type: ignore
|
|
13
|
+
except ImportError:
|
|
14
|
+
import pydantic # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PipelineMetadataConfig(pydantic.BaseModel):
|
|
18
|
+
excluded_embed_metadata_keys: typing.Optional[typing.List[str]] = pydantic.Field(
|
|
19
|
+
description="List of metadata keys to exclude from embeddings"
|
|
20
|
+
)
|
|
21
|
+
excluded_llm_metadata_keys: typing.Optional[typing.List[str]] = pydantic.Field(
|
|
22
|
+
description="List of metadata keys to exclude from LLM during retrieval"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def json(self, **kwargs: typing.Any) -> str:
|
|
26
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
27
|
+
return super().json(**kwargs_with_defaults)
|
|
28
|
+
|
|
29
|
+
def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
|
|
30
|
+
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
31
|
+
return super().dict(**kwargs_with_defaults)
|
|
32
|
+
|
|
33
|
+
class Config:
|
|
34
|
+
frozen = True
|
|
35
|
+
smart_union = True
|
|
36
|
+
json_encoders = {dt.datetime: serialize_datetime}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# This file was auto-generated by Fern from our API Definition.
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
import typing
|
|
5
|
+
|
|
6
|
+
T_Result = typing.TypeVar("T_Result")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PipelineStatus(str, enum.Enum):
|
|
10
|
+
CREATED = "CREATED"
|
|
11
|
+
DELETING = "DELETING"
|
|
12
|
+
|
|
13
|
+
def visit(self, created: typing.Callable[[], T_Result], deleting: typing.Callable[[], T_Result]) -> T_Result:
|
|
14
|
+
if self is PipelineStatus.CREATED:
|
|
15
|
+
return created()
|
|
16
|
+
if self is PipelineStatus.DELETING:
|
|
17
|
+
return deleting()
|
llama_cloud/types/prompt_conf.py
CHANGED
|
@@ -22,6 +22,7 @@ class PromptConf(pydantic.BaseModel):
|
|
|
22
22
|
cite_sources_prompt: typing.Optional[typing.Dict[str, str]] = pydantic.Field(
|
|
23
23
|
description="The prompt to use for citing sources."
|
|
24
24
|
)
|
|
25
|
+
scratchpad_prompt: typing.Optional[str] = pydantic.Field(description="The prompt to use for scratchpad.")
|
|
25
26
|
|
|
26
27
|
def json(self, **kwargs: typing.Any) -> str:
|
|
27
28
|
kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
|
|
@@ -5,7 +5,6 @@ import typing
|
|
|
5
5
|
|
|
6
6
|
from ..core.datetime_utils import serialize_datetime
|
|
7
7
|
from .llm_model_data import LlmModelData
|
|
8
|
-
from .supported_llm_model_names import SupportedLlmModelNames
|
|
9
8
|
|
|
10
9
|
try:
|
|
11
10
|
import pydantic
|
|
@@ -21,7 +20,7 @@ class SupportedLlmModel(pydantic.BaseModel):
|
|
|
21
20
|
Response Schema for a supported eval LLM model.
|
|
22
21
|
"""
|
|
23
22
|
|
|
24
|
-
name:
|
|
23
|
+
name: str = pydantic.Field(description="The name of the supported LLM model.")
|
|
25
24
|
enabled: typing.Optional[bool] = pydantic.Field(
|
|
26
25
|
description="Whether the LLM model is enabled for use in LlamaCloud."
|
|
27
26
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: llama-cloud
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.21
|
|
4
4
|
Summary:
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Logan Markewich
|
|
@@ -27,3 +27,7 @@ To publish:
|
|
|
27
27
|
- update the version in `pyproject.toml`
|
|
28
28
|
- run `poetry publish --build`
|
|
29
29
|
|
|
30
|
+
Setup credentials:
|
|
31
|
+
- run `poetry config pypi-token.pypi <my-token>`
|
|
32
|
+
- Get token form PyPi once logged in with credentials in [1Password](https://start.1password.com/open/i?a=32SA66TZ3JCRXOCMASLSDCT5TI&v=lhv7hvb5o46cwo257c3hviqkle&i=yvslwei7jtf6tgqamzcdantqi4&h=llamaindex.1password.com)
|
|
33
|
+
|