llama-cloud 0.0.15__py3-none-any.whl → 0.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of llama-cloud might be problematic. Click here for more details.
- llama_cloud/__init__.py +20 -0
- llama_cloud/resources/__init__.py +2 -0
- llama_cloud/resources/files/client.py +159 -0
- llama_cloud/resources/parsing/client.py +40 -0
- llama_cloud/resources/pipelines/__init__.py +2 -0
- llama_cloud/resources/pipelines/client.py +18 -2
- llama_cloud/resources/pipelines/types/__init__.py +2 -0
- llama_cloud/resources/pipelines/types/pipeline_update_embedding_config.py +11 -0
- llama_cloud/types/__init__.py +18 -0
- llama_cloud/types/cloud_az_storage_blob_data_source.py +1 -2
- llama_cloud/types/cloud_postgres_vector_store.py +6 -8
- llama_cloud/types/configurable_transformation_names.py +4 -0
- llama_cloud/types/configured_transformation_item_component_one.py +2 -0
- llama_cloud/types/extend_vertex_text_embedding.py +58 -0
- llama_cloud/types/llama_parse_parameters.py +3 -1
- llama_cloud/types/llm_model_data.py +1 -0
- llama_cloud/types/llm_parameters.py +4 -1
- llama_cloud/types/page_screenshot_metadata.py +33 -0
- llama_cloud/types/page_screenshot_node_with_score.py +38 -0
- llama_cloud/types/pipeline.py +4 -0
- llama_cloud/types/pipeline_configuration_hashes.py +37 -0
- llama_cloud/types/pipeline_create_embedding_config.py +11 -0
- llama_cloud/types/pipeline_embedding_config.py +11 -0
- llama_cloud/types/pipeline_file.py +4 -0
- llama_cloud/types/pipeline_file_config_hash_value.py +5 -0
- llama_cloud/types/preset_retrieval_params.py +1 -0
- llama_cloud/types/retrieve_results.py +4 -0
- llama_cloud/types/vertex_ai_embedding_config.py +34 -0
- llama_cloud/types/vertex_embedding_mode.py +45 -0
- {llama_cloud-0.0.15.dist-info → llama_cloud-0.0.16.dist-info}/METADATA +1 -1
- {llama_cloud-0.0.15.dist-info → llama_cloud-0.0.16.dist-info}/RECORD +33 -26
- {llama_cloud-0.0.15.dist-info → llama_cloud-0.0.16.dist-info}/LICENSE +0 -0
- {llama_cloud-0.0.15.dist-info → llama_cloud-0.0.16.dist-info}/WHEEL +0 -0
llama_cloud/__init__.py
CHANGED
|
@@ -79,6 +79,7 @@ from .types import (
|
|
|
79
79
|
EvalQuestion,
|
|
80
80
|
EvalQuestionCreate,
|
|
81
81
|
EvalQuestionResult,
|
|
82
|
+
ExtendVertexTextEmbedding,
|
|
82
83
|
ExtractionJob,
|
|
83
84
|
ExtractionResult,
|
|
84
85
|
ExtractionResultDataValue,
|
|
@@ -124,6 +125,8 @@ from .types import (
|
|
|
124
125
|
OpenAiEmbeddingConfig,
|
|
125
126
|
Organization,
|
|
126
127
|
OrganizationCreate,
|
|
128
|
+
PageScreenshotMetadata,
|
|
129
|
+
PageScreenshotNodeWithScore,
|
|
127
130
|
PageSegmentationConfig,
|
|
128
131
|
PageSplitterNodeParser,
|
|
129
132
|
ParserLanguages,
|
|
@@ -134,6 +137,7 @@ from .types import (
|
|
|
134
137
|
ParsingJobTextResult,
|
|
135
138
|
ParsingUsage,
|
|
136
139
|
Pipeline,
|
|
140
|
+
PipelineConfigurationHashes,
|
|
137
141
|
PipelineCreate,
|
|
138
142
|
PipelineCreateEmbeddingConfig,
|
|
139
143
|
PipelineCreateEmbeddingConfig_AzureEmbedding,
|
|
@@ -142,6 +146,7 @@ from .types import (
|
|
|
142
146
|
PipelineCreateEmbeddingConfig_GeminiEmbedding,
|
|
143
147
|
PipelineCreateEmbeddingConfig_HuggingfaceApiEmbedding,
|
|
144
148
|
PipelineCreateEmbeddingConfig_OpenaiEmbedding,
|
|
149
|
+
PipelineCreateEmbeddingConfig_VertexaiEmbedding,
|
|
145
150
|
PipelineCreateTransformConfig,
|
|
146
151
|
PipelineCreateTransformConfig_Advanced,
|
|
147
152
|
PipelineCreateTransformConfig_Auto,
|
|
@@ -158,7 +163,9 @@ from .types import (
|
|
|
158
163
|
PipelineEmbeddingConfig_GeminiEmbedding,
|
|
159
164
|
PipelineEmbeddingConfig_HuggingfaceApiEmbedding,
|
|
160
165
|
PipelineEmbeddingConfig_OpenaiEmbedding,
|
|
166
|
+
PipelineEmbeddingConfig_VertexaiEmbedding,
|
|
161
167
|
PipelineFile,
|
|
168
|
+
PipelineFileConfigHashValue,
|
|
162
169
|
PipelineFileCreate,
|
|
163
170
|
PipelineFileCreateCustomMetadataValue,
|
|
164
171
|
PipelineFileCustomMetadataValue,
|
|
@@ -197,6 +204,8 @@ from .types import (
|
|
|
197
204
|
UserOrganizationDelete,
|
|
198
205
|
ValidationError,
|
|
199
206
|
ValidationErrorLocItem,
|
|
207
|
+
VertexAiEmbeddingConfig,
|
|
208
|
+
VertexEmbeddingMode,
|
|
200
209
|
)
|
|
201
210
|
from .errors import UnprocessableEntityError
|
|
202
211
|
from .resources import (
|
|
@@ -216,6 +225,7 @@ from .resources import (
|
|
|
216
225
|
PipelineUpdateEmbeddingConfig_GeminiEmbedding,
|
|
217
226
|
PipelineUpdateEmbeddingConfig_HuggingfaceApiEmbedding,
|
|
218
227
|
PipelineUpdateEmbeddingConfig_OpenaiEmbedding,
|
|
228
|
+
PipelineUpdateEmbeddingConfig_VertexaiEmbedding,
|
|
219
229
|
PipelineUpdateTransformConfig,
|
|
220
230
|
PipelineUpdateTransformConfig_Advanced,
|
|
221
231
|
PipelineUpdateTransformConfig_Auto,
|
|
@@ -317,6 +327,7 @@ __all__ = [
|
|
|
317
327
|
"EvalQuestion",
|
|
318
328
|
"EvalQuestionCreate",
|
|
319
329
|
"EvalQuestionResult",
|
|
330
|
+
"ExtendVertexTextEmbedding",
|
|
320
331
|
"ExtractionJob",
|
|
321
332
|
"ExtractionResult",
|
|
322
333
|
"ExtractionResultDataValue",
|
|
@@ -366,6 +377,8 @@ __all__ = [
|
|
|
366
377
|
"OpenAiEmbeddingConfig",
|
|
367
378
|
"Organization",
|
|
368
379
|
"OrganizationCreate",
|
|
380
|
+
"PageScreenshotMetadata",
|
|
381
|
+
"PageScreenshotNodeWithScore",
|
|
369
382
|
"PageSegmentationConfig",
|
|
370
383
|
"PageSplitterNodeParser",
|
|
371
384
|
"ParserLanguages",
|
|
@@ -376,6 +389,7 @@ __all__ = [
|
|
|
376
389
|
"ParsingJobTextResult",
|
|
377
390
|
"ParsingUsage",
|
|
378
391
|
"Pipeline",
|
|
392
|
+
"PipelineConfigurationHashes",
|
|
379
393
|
"PipelineCreate",
|
|
380
394
|
"PipelineCreateEmbeddingConfig",
|
|
381
395
|
"PipelineCreateEmbeddingConfig_AzureEmbedding",
|
|
@@ -384,6 +398,7 @@ __all__ = [
|
|
|
384
398
|
"PipelineCreateEmbeddingConfig_GeminiEmbedding",
|
|
385
399
|
"PipelineCreateEmbeddingConfig_HuggingfaceApiEmbedding",
|
|
386
400
|
"PipelineCreateEmbeddingConfig_OpenaiEmbedding",
|
|
401
|
+
"PipelineCreateEmbeddingConfig_VertexaiEmbedding",
|
|
387
402
|
"PipelineCreateTransformConfig",
|
|
388
403
|
"PipelineCreateTransformConfig_Advanced",
|
|
389
404
|
"PipelineCreateTransformConfig_Auto",
|
|
@@ -400,7 +415,9 @@ __all__ = [
|
|
|
400
415
|
"PipelineEmbeddingConfig_GeminiEmbedding",
|
|
401
416
|
"PipelineEmbeddingConfig_HuggingfaceApiEmbedding",
|
|
402
417
|
"PipelineEmbeddingConfig_OpenaiEmbedding",
|
|
418
|
+
"PipelineEmbeddingConfig_VertexaiEmbedding",
|
|
403
419
|
"PipelineFile",
|
|
420
|
+
"PipelineFileConfigHashValue",
|
|
404
421
|
"PipelineFileCreate",
|
|
405
422
|
"PipelineFileCreateCustomMetadataValue",
|
|
406
423
|
"PipelineFileCustomMetadataValue",
|
|
@@ -417,6 +434,7 @@ __all__ = [
|
|
|
417
434
|
"PipelineUpdateEmbeddingConfig_GeminiEmbedding",
|
|
418
435
|
"PipelineUpdateEmbeddingConfig_HuggingfaceApiEmbedding",
|
|
419
436
|
"PipelineUpdateEmbeddingConfig_OpenaiEmbedding",
|
|
437
|
+
"PipelineUpdateEmbeddingConfig_VertexaiEmbedding",
|
|
420
438
|
"PipelineUpdateTransformConfig",
|
|
421
439
|
"PipelineUpdateTransformConfig_Advanced",
|
|
422
440
|
"PipelineUpdateTransformConfig_Auto",
|
|
@@ -451,6 +469,8 @@ __all__ = [
|
|
|
451
469
|
"UserOrganizationDelete",
|
|
452
470
|
"ValidationError",
|
|
453
471
|
"ValidationErrorLocItem",
|
|
472
|
+
"VertexAiEmbeddingConfig",
|
|
473
|
+
"VertexEmbeddingMode",
|
|
454
474
|
"auth",
|
|
455
475
|
"component_definitions",
|
|
456
476
|
"data_sinks",
|
|
@@ -26,6 +26,7 @@ from .pipelines import (
|
|
|
26
26
|
PipelineUpdateEmbeddingConfig_GeminiEmbedding,
|
|
27
27
|
PipelineUpdateEmbeddingConfig_HuggingfaceApiEmbedding,
|
|
28
28
|
PipelineUpdateEmbeddingConfig_OpenaiEmbedding,
|
|
29
|
+
PipelineUpdateEmbeddingConfig_VertexaiEmbedding,
|
|
29
30
|
PipelineUpdateTransformConfig,
|
|
30
31
|
PipelineUpdateTransformConfig_Advanced,
|
|
31
32
|
PipelineUpdateTransformConfig_Auto,
|
|
@@ -48,6 +49,7 @@ __all__ = [
|
|
|
48
49
|
"PipelineUpdateEmbeddingConfig_GeminiEmbedding",
|
|
49
50
|
"PipelineUpdateEmbeddingConfig_HuggingfaceApiEmbedding",
|
|
50
51
|
"PipelineUpdateEmbeddingConfig_OpenaiEmbedding",
|
|
52
|
+
"PipelineUpdateEmbeddingConfig_VertexaiEmbedding",
|
|
51
53
|
"PipelineUpdateTransformConfig",
|
|
52
54
|
"PipelineUpdateTransformConfig_Advanced",
|
|
53
55
|
"PipelineUpdateTransformConfig_Auto",
|
|
@@ -12,6 +12,7 @@ from ...core.remove_none_from_dict import remove_none_from_dict
|
|
|
12
12
|
from ...errors.unprocessable_entity_error import UnprocessableEntityError
|
|
13
13
|
from ...types.file import File
|
|
14
14
|
from ...types.http_validation_error import HttpValidationError
|
|
15
|
+
from ...types.page_screenshot_metadata import PageScreenshotMetadata
|
|
15
16
|
from ...types.presigned_url import PresignedUrl
|
|
16
17
|
from .types.file_create_resource_info_value import FileCreateResourceInfoValue
|
|
17
18
|
|
|
@@ -288,6 +289,85 @@ class FilesClient:
|
|
|
288
289
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
289
290
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
290
291
|
|
|
292
|
+
def list_file_page_screenshots(
|
|
293
|
+
self, id: str, *, project_id: typing.Optional[str] = None
|
|
294
|
+
) -> typing.List[PageScreenshotMetadata]:
|
|
295
|
+
"""
|
|
296
|
+
List metadata for all screenshots of pages from a file.
|
|
297
|
+
|
|
298
|
+
Parameters:
|
|
299
|
+
- id: str.
|
|
300
|
+
|
|
301
|
+
- project_id: typing.Optional[str].
|
|
302
|
+
---
|
|
303
|
+
from llama_cloud.client import LlamaCloud
|
|
304
|
+
|
|
305
|
+
client = LlamaCloud(
|
|
306
|
+
token="YOUR_TOKEN",
|
|
307
|
+
)
|
|
308
|
+
client.files.list_file_page_screenshots(
|
|
309
|
+
id="string",
|
|
310
|
+
)
|
|
311
|
+
"""
|
|
312
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
313
|
+
"GET",
|
|
314
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/files/{id}/page_screenshots"),
|
|
315
|
+
params=remove_none_from_dict({"project_id": project_id}),
|
|
316
|
+
headers=self._client_wrapper.get_headers(),
|
|
317
|
+
timeout=60,
|
|
318
|
+
)
|
|
319
|
+
if 200 <= _response.status_code < 300:
|
|
320
|
+
return pydantic.parse_obj_as(typing.List[PageScreenshotMetadata], _response.json()) # type: ignore
|
|
321
|
+
if _response.status_code == 422:
|
|
322
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
323
|
+
try:
|
|
324
|
+
_response_json = _response.json()
|
|
325
|
+
except JSONDecodeError:
|
|
326
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
327
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
328
|
+
|
|
329
|
+
def get_file_page_screenshot(
|
|
330
|
+
self, id: str, page_index: int, *, project_id: typing.Optional[str] = None
|
|
331
|
+
) -> typing.Any:
|
|
332
|
+
"""
|
|
333
|
+
Get screenshot of a page from a file.
|
|
334
|
+
|
|
335
|
+
Parameters:
|
|
336
|
+
- id: str.
|
|
337
|
+
|
|
338
|
+
- page_index: int.
|
|
339
|
+
|
|
340
|
+
- project_id: typing.Optional[str].
|
|
341
|
+
---
|
|
342
|
+
from llama_cloud.client import LlamaCloud
|
|
343
|
+
|
|
344
|
+
client = LlamaCloud(
|
|
345
|
+
token="YOUR_TOKEN",
|
|
346
|
+
)
|
|
347
|
+
client.files.get_file_page_screenshot(
|
|
348
|
+
id="string",
|
|
349
|
+
page_index=1,
|
|
350
|
+
)
|
|
351
|
+
"""
|
|
352
|
+
_response = self._client_wrapper.httpx_client.request(
|
|
353
|
+
"GET",
|
|
354
|
+
urllib.parse.urljoin(
|
|
355
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/files/{id}/page_screenshots/{page_index}"
|
|
356
|
+
),
|
|
357
|
+
params=remove_none_from_dict({"project_id": project_id}),
|
|
358
|
+
headers=self._client_wrapper.get_headers(),
|
|
359
|
+
timeout=60,
|
|
360
|
+
)
|
|
361
|
+
if 200 <= _response.status_code < 300:
|
|
362
|
+
return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
|
|
363
|
+
if _response.status_code == 422:
|
|
364
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
365
|
+
try:
|
|
366
|
+
_response_json = _response.json()
|
|
367
|
+
except JSONDecodeError:
|
|
368
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
369
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
370
|
+
|
|
291
371
|
|
|
292
372
|
class AsyncFilesClient:
|
|
293
373
|
def __init__(self, *, client_wrapper: AsyncClientWrapper):
|
|
@@ -549,3 +629,82 @@ class AsyncFilesClient:
|
|
|
549
629
|
except JSONDecodeError:
|
|
550
630
|
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
551
631
|
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
632
|
+
|
|
633
|
+
async def list_file_page_screenshots(
|
|
634
|
+
self, id: str, *, project_id: typing.Optional[str] = None
|
|
635
|
+
) -> typing.List[PageScreenshotMetadata]:
|
|
636
|
+
"""
|
|
637
|
+
List metadata for all screenshots of pages from a file.
|
|
638
|
+
|
|
639
|
+
Parameters:
|
|
640
|
+
- id: str.
|
|
641
|
+
|
|
642
|
+
- project_id: typing.Optional[str].
|
|
643
|
+
---
|
|
644
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
645
|
+
|
|
646
|
+
client = AsyncLlamaCloud(
|
|
647
|
+
token="YOUR_TOKEN",
|
|
648
|
+
)
|
|
649
|
+
await client.files.list_file_page_screenshots(
|
|
650
|
+
id="string",
|
|
651
|
+
)
|
|
652
|
+
"""
|
|
653
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
654
|
+
"GET",
|
|
655
|
+
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/files/{id}/page_screenshots"),
|
|
656
|
+
params=remove_none_from_dict({"project_id": project_id}),
|
|
657
|
+
headers=self._client_wrapper.get_headers(),
|
|
658
|
+
timeout=60,
|
|
659
|
+
)
|
|
660
|
+
if 200 <= _response.status_code < 300:
|
|
661
|
+
return pydantic.parse_obj_as(typing.List[PageScreenshotMetadata], _response.json()) # type: ignore
|
|
662
|
+
if _response.status_code == 422:
|
|
663
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
664
|
+
try:
|
|
665
|
+
_response_json = _response.json()
|
|
666
|
+
except JSONDecodeError:
|
|
667
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
668
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
669
|
+
|
|
670
|
+
async def get_file_page_screenshot(
|
|
671
|
+
self, id: str, page_index: int, *, project_id: typing.Optional[str] = None
|
|
672
|
+
) -> typing.Any:
|
|
673
|
+
"""
|
|
674
|
+
Get screenshot of a page from a file.
|
|
675
|
+
|
|
676
|
+
Parameters:
|
|
677
|
+
- id: str.
|
|
678
|
+
|
|
679
|
+
- page_index: int.
|
|
680
|
+
|
|
681
|
+
- project_id: typing.Optional[str].
|
|
682
|
+
---
|
|
683
|
+
from llama_cloud.client import AsyncLlamaCloud
|
|
684
|
+
|
|
685
|
+
client = AsyncLlamaCloud(
|
|
686
|
+
token="YOUR_TOKEN",
|
|
687
|
+
)
|
|
688
|
+
await client.files.get_file_page_screenshot(
|
|
689
|
+
id="string",
|
|
690
|
+
page_index=1,
|
|
691
|
+
)
|
|
692
|
+
"""
|
|
693
|
+
_response = await self._client_wrapper.httpx_client.request(
|
|
694
|
+
"GET",
|
|
695
|
+
urllib.parse.urljoin(
|
|
696
|
+
f"{self._client_wrapper.get_base_url()}/", f"api/v1/files/{id}/page_screenshots/{page_index}"
|
|
697
|
+
),
|
|
698
|
+
params=remove_none_from_dict({"project_id": project_id}),
|
|
699
|
+
headers=self._client_wrapper.get_headers(),
|
|
700
|
+
timeout=60,
|
|
701
|
+
)
|
|
702
|
+
if 200 <= _response.status_code < 300:
|
|
703
|
+
return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
|
|
704
|
+
if _response.status_code == 422:
|
|
705
|
+
raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
|
|
706
|
+
try:
|
|
707
|
+
_response_json = _response.json()
|
|
708
|
+
except JSONDecodeError:
|
|
709
|
+
raise ApiError(status_code=_response.status_code, body=_response.text)
|
|
710
|
+
raise ApiError(status_code=_response.status_code, body=_response_json)
|
|
@@ -118,7 +118,12 @@ class ParsingClient:
|
|
|
118
118
|
vendor_multimodal_api_key: str,
|
|
119
119
|
page_prefix: str,
|
|
120
120
|
page_suffix: str,
|
|
121
|
+
webhook_url: str,
|
|
121
122
|
take_screenshot: bool,
|
|
123
|
+
disable_ocr: bool,
|
|
124
|
+
disable_reconstruction: bool,
|
|
125
|
+
input_s_3_path: str,
|
|
126
|
+
output_s_3_path_prefix: str,
|
|
122
127
|
file: typing.IO,
|
|
123
128
|
) -> ParsingJob:
|
|
124
129
|
"""
|
|
@@ -159,8 +164,18 @@ class ParsingClient:
|
|
|
159
164
|
|
|
160
165
|
- page_suffix: str.
|
|
161
166
|
|
|
167
|
+
- webhook_url: str.
|
|
168
|
+
|
|
162
169
|
- take_screenshot: bool.
|
|
163
170
|
|
|
171
|
+
- disable_ocr: bool.
|
|
172
|
+
|
|
173
|
+
- disable_reconstruction: bool.
|
|
174
|
+
|
|
175
|
+
- input_s_3_path: str.
|
|
176
|
+
|
|
177
|
+
- output_s_3_path_prefix: str.
|
|
178
|
+
|
|
164
179
|
- file: typing.IO.
|
|
165
180
|
"""
|
|
166
181
|
_response = self._client_wrapper.httpx_client.request(
|
|
@@ -185,7 +200,12 @@ class ParsingClient:
|
|
|
185
200
|
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
186
201
|
"page_prefix": page_prefix,
|
|
187
202
|
"page_suffix": page_suffix,
|
|
203
|
+
"webhook_url": webhook_url,
|
|
188
204
|
"take_screenshot": take_screenshot,
|
|
205
|
+
"disable_ocr": disable_ocr,
|
|
206
|
+
"disable_reconstruction": disable_reconstruction,
|
|
207
|
+
"input_s3_path": input_s_3_path,
|
|
208
|
+
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
189
209
|
}
|
|
190
210
|
),
|
|
191
211
|
files={"file": file},
|
|
@@ -647,7 +667,12 @@ class AsyncParsingClient:
|
|
|
647
667
|
vendor_multimodal_api_key: str,
|
|
648
668
|
page_prefix: str,
|
|
649
669
|
page_suffix: str,
|
|
670
|
+
webhook_url: str,
|
|
650
671
|
take_screenshot: bool,
|
|
672
|
+
disable_ocr: bool,
|
|
673
|
+
disable_reconstruction: bool,
|
|
674
|
+
input_s_3_path: str,
|
|
675
|
+
output_s_3_path_prefix: str,
|
|
651
676
|
file: typing.IO,
|
|
652
677
|
) -> ParsingJob:
|
|
653
678
|
"""
|
|
@@ -688,8 +713,18 @@ class AsyncParsingClient:
|
|
|
688
713
|
|
|
689
714
|
- page_suffix: str.
|
|
690
715
|
|
|
716
|
+
- webhook_url: str.
|
|
717
|
+
|
|
691
718
|
- take_screenshot: bool.
|
|
692
719
|
|
|
720
|
+
- disable_ocr: bool.
|
|
721
|
+
|
|
722
|
+
- disable_reconstruction: bool.
|
|
723
|
+
|
|
724
|
+
- input_s_3_path: str.
|
|
725
|
+
|
|
726
|
+
- output_s_3_path_prefix: str.
|
|
727
|
+
|
|
693
728
|
- file: typing.IO.
|
|
694
729
|
"""
|
|
695
730
|
_response = await self._client_wrapper.httpx_client.request(
|
|
@@ -714,7 +749,12 @@ class AsyncParsingClient:
|
|
|
714
749
|
"vendor_multimodal_api_key": vendor_multimodal_api_key,
|
|
715
750
|
"page_prefix": page_prefix,
|
|
716
751
|
"page_suffix": page_suffix,
|
|
752
|
+
"webhook_url": webhook_url,
|
|
717
753
|
"take_screenshot": take_screenshot,
|
|
754
|
+
"disable_ocr": disable_ocr,
|
|
755
|
+
"disable_reconstruction": disable_reconstruction,
|
|
756
|
+
"input_s3_path": input_s_3_path,
|
|
757
|
+
"output_s3_path_prefix": output_s_3_path_prefix,
|
|
718
758
|
}
|
|
719
759
|
),
|
|
720
760
|
files={"file": file},
|
|
@@ -9,6 +9,7 @@ from .types import (
|
|
|
9
9
|
PipelineUpdateEmbeddingConfig_GeminiEmbedding,
|
|
10
10
|
PipelineUpdateEmbeddingConfig_HuggingfaceApiEmbedding,
|
|
11
11
|
PipelineUpdateEmbeddingConfig_OpenaiEmbedding,
|
|
12
|
+
PipelineUpdateEmbeddingConfig_VertexaiEmbedding,
|
|
12
13
|
PipelineUpdateTransformConfig,
|
|
13
14
|
PipelineUpdateTransformConfig_Advanced,
|
|
14
15
|
PipelineUpdateTransformConfig_Auto,
|
|
@@ -23,6 +24,7 @@ __all__ = [
|
|
|
23
24
|
"PipelineUpdateEmbeddingConfig_GeminiEmbedding",
|
|
24
25
|
"PipelineUpdateEmbeddingConfig_HuggingfaceApiEmbedding",
|
|
25
26
|
"PipelineUpdateEmbeddingConfig_OpenaiEmbedding",
|
|
27
|
+
"PipelineUpdateEmbeddingConfig_VertexaiEmbedding",
|
|
26
28
|
"PipelineUpdateTransformConfig",
|
|
27
29
|
"PipelineUpdateTransformConfig_Advanced",
|
|
28
30
|
"PipelineUpdateTransformConfig_Auto",
|
|
@@ -1212,6 +1212,7 @@ class PipelinesClient:
|
|
|
1212
1212
|
search_filters: typing.Optional[MetadataFilters] = OMIT,
|
|
1213
1213
|
files_top_k: typing.Optional[int] = OMIT,
|
|
1214
1214
|
retrieval_mode: typing.Optional[RetrievalMode] = OMIT,
|
|
1215
|
+
retrieve_image_nodes: typing.Optional[bool] = OMIT,
|
|
1215
1216
|
query: str,
|
|
1216
1217
|
) -> RetrieveResults:
|
|
1217
1218
|
"""
|
|
@@ -1236,6 +1237,8 @@ class PipelinesClient:
|
|
|
1236
1237
|
|
|
1237
1238
|
- retrieval_mode: typing.Optional[RetrievalMode]. The retrieval mode for the query.
|
|
1238
1239
|
|
|
1240
|
+
- retrieve_image_nodes: typing.Optional[bool]. Whether to retrieve image nodes.
|
|
1241
|
+
|
|
1239
1242
|
- query: str. The query to retrieve against.
|
|
1240
1243
|
---
|
|
1241
1244
|
from llama_cloud import FilterCondition, MetadataFilters, RetrievalMode
|
|
@@ -1271,6 +1274,8 @@ class PipelinesClient:
|
|
|
1271
1274
|
_request["files_top_k"] = files_top_k
|
|
1272
1275
|
if retrieval_mode is not OMIT:
|
|
1273
1276
|
_request["retrieval_mode"] = retrieval_mode
|
|
1277
|
+
if retrieve_image_nodes is not OMIT:
|
|
1278
|
+
_request["retrieve_image_nodes"] = retrieve_image_nodes
|
|
1274
1279
|
_response = self._client_wrapper.httpx_client.request(
|
|
1275
1280
|
"POST",
|
|
1276
1281
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/retrieve"),
|
|
@@ -1418,6 +1423,7 @@ class PipelinesClient:
|
|
|
1418
1423
|
MetadataFilters,
|
|
1419
1424
|
PresetRetrievalParams,
|
|
1420
1425
|
RetrievalMode,
|
|
1426
|
+
SupportedLlmModelNames,
|
|
1421
1427
|
)
|
|
1422
1428
|
from llama_cloud.client import LlamaCloud
|
|
1423
1429
|
|
|
@@ -1434,7 +1440,9 @@ class PipelinesClient:
|
|
|
1434
1440
|
),
|
|
1435
1441
|
retrieval_mode=RetrievalMode.CHUNKS,
|
|
1436
1442
|
),
|
|
1437
|
-
llm_parameters=LlmParameters(
|
|
1443
|
+
llm_parameters=LlmParameters(
|
|
1444
|
+
model_name=SupportedLlmModelNames.GPT_3_5_TURBO,
|
|
1445
|
+
),
|
|
1438
1446
|
),
|
|
1439
1447
|
)
|
|
1440
1448
|
"""
|
|
@@ -2910,6 +2918,7 @@ class AsyncPipelinesClient:
|
|
|
2910
2918
|
search_filters: typing.Optional[MetadataFilters] = OMIT,
|
|
2911
2919
|
files_top_k: typing.Optional[int] = OMIT,
|
|
2912
2920
|
retrieval_mode: typing.Optional[RetrievalMode] = OMIT,
|
|
2921
|
+
retrieve_image_nodes: typing.Optional[bool] = OMIT,
|
|
2913
2922
|
query: str,
|
|
2914
2923
|
) -> RetrieveResults:
|
|
2915
2924
|
"""
|
|
@@ -2934,6 +2943,8 @@ class AsyncPipelinesClient:
|
|
|
2934
2943
|
|
|
2935
2944
|
- retrieval_mode: typing.Optional[RetrievalMode]. The retrieval mode for the query.
|
|
2936
2945
|
|
|
2946
|
+
- retrieve_image_nodes: typing.Optional[bool]. Whether to retrieve image nodes.
|
|
2947
|
+
|
|
2937
2948
|
- query: str. The query to retrieve against.
|
|
2938
2949
|
---
|
|
2939
2950
|
from llama_cloud import FilterCondition, MetadataFilters, RetrievalMode
|
|
@@ -2969,6 +2980,8 @@ class AsyncPipelinesClient:
|
|
|
2969
2980
|
_request["files_top_k"] = files_top_k
|
|
2970
2981
|
if retrieval_mode is not OMIT:
|
|
2971
2982
|
_request["retrieval_mode"] = retrieval_mode
|
|
2983
|
+
if retrieve_image_nodes is not OMIT:
|
|
2984
|
+
_request["retrieve_image_nodes"] = retrieve_image_nodes
|
|
2972
2985
|
_response = await self._client_wrapper.httpx_client.request(
|
|
2973
2986
|
"POST",
|
|
2974
2987
|
urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/pipelines/{pipeline_id}/retrieve"),
|
|
@@ -3116,6 +3129,7 @@ class AsyncPipelinesClient:
|
|
|
3116
3129
|
MetadataFilters,
|
|
3117
3130
|
PresetRetrievalParams,
|
|
3118
3131
|
RetrievalMode,
|
|
3132
|
+
SupportedLlmModelNames,
|
|
3119
3133
|
)
|
|
3120
3134
|
from llama_cloud.client import AsyncLlamaCloud
|
|
3121
3135
|
|
|
@@ -3132,7 +3146,9 @@ class AsyncPipelinesClient:
|
|
|
3132
3146
|
),
|
|
3133
3147
|
retrieval_mode=RetrievalMode.CHUNKS,
|
|
3134
3148
|
),
|
|
3135
|
-
llm_parameters=LlmParameters(
|
|
3149
|
+
llm_parameters=LlmParameters(
|
|
3150
|
+
model_name=SupportedLlmModelNames.GPT_3_5_TURBO,
|
|
3151
|
+
),
|
|
3136
3152
|
),
|
|
3137
3153
|
)
|
|
3138
3154
|
"""
|
|
@@ -9,6 +9,7 @@ from .pipeline_update_embedding_config import (
|
|
|
9
9
|
PipelineUpdateEmbeddingConfig_GeminiEmbedding,
|
|
10
10
|
PipelineUpdateEmbeddingConfig_HuggingfaceApiEmbedding,
|
|
11
11
|
PipelineUpdateEmbeddingConfig_OpenaiEmbedding,
|
|
12
|
+
PipelineUpdateEmbeddingConfig_VertexaiEmbedding,
|
|
12
13
|
)
|
|
13
14
|
from .pipeline_update_transform_config import (
|
|
14
15
|
PipelineUpdateTransformConfig,
|
|
@@ -25,6 +26,7 @@ __all__ = [
|
|
|
25
26
|
"PipelineUpdateEmbeddingConfig_GeminiEmbedding",
|
|
26
27
|
"PipelineUpdateEmbeddingConfig_HuggingfaceApiEmbedding",
|
|
27
28
|
"PipelineUpdateEmbeddingConfig_OpenaiEmbedding",
|
|
29
|
+
"PipelineUpdateEmbeddingConfig_VertexaiEmbedding",
|
|
28
30
|
"PipelineUpdateTransformConfig",
|
|
29
31
|
"PipelineUpdateTransformConfig_Advanced",
|
|
30
32
|
"PipelineUpdateTransformConfig_Auto",
|
|
@@ -12,6 +12,7 @@ from ....types.cohere_embedding_config import CohereEmbeddingConfig
|
|
|
12
12
|
from ....types.gemini_embedding_config import GeminiEmbeddingConfig
|
|
13
13
|
from ....types.hugging_face_inference_api_embedding_config import HuggingFaceInferenceApiEmbeddingConfig
|
|
14
14
|
from ....types.open_ai_embedding_config import OpenAiEmbeddingConfig
|
|
15
|
+
from ....types.vertex_ai_embedding_config import VertexAiEmbeddingConfig
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
class PipelineUpdateEmbeddingConfig_OpenaiEmbedding(OpenAiEmbeddingConfig):
|
|
@@ -68,6 +69,15 @@ class PipelineUpdateEmbeddingConfig_CohereEmbedding(CohereEmbeddingConfig):
|
|
|
68
69
|
allow_population_by_field_name = True
|
|
69
70
|
|
|
70
71
|
|
|
72
|
+
class PipelineUpdateEmbeddingConfig_VertexaiEmbedding(VertexAiEmbeddingConfig):
|
|
73
|
+
type: typing_extensions.Literal["VERTEXAI_EMBEDDING"]
|
|
74
|
+
|
|
75
|
+
class Config:
|
|
76
|
+
frozen = True
|
|
77
|
+
smart_union = True
|
|
78
|
+
allow_population_by_field_name = True
|
|
79
|
+
|
|
80
|
+
|
|
71
81
|
PipelineUpdateEmbeddingConfig = typing.Union[
|
|
72
82
|
PipelineUpdateEmbeddingConfig_OpenaiEmbedding,
|
|
73
83
|
PipelineUpdateEmbeddingConfig_AzureEmbedding,
|
|
@@ -75,4 +85,5 @@ PipelineUpdateEmbeddingConfig = typing.Union[
|
|
|
75
85
|
PipelineUpdateEmbeddingConfig_BedrockEmbedding,
|
|
76
86
|
PipelineUpdateEmbeddingConfig_GeminiEmbedding,
|
|
77
87
|
PipelineUpdateEmbeddingConfig_CohereEmbedding,
|
|
88
|
+
PipelineUpdateEmbeddingConfig_VertexaiEmbedding,
|
|
78
89
|
]
|
llama_cloud/types/__init__.py
CHANGED
|
@@ -82,6 +82,7 @@ from .eval_execution_params_override import EvalExecutionParamsOverride
|
|
|
82
82
|
from .eval_question import EvalQuestion
|
|
83
83
|
from .eval_question_create import EvalQuestionCreate
|
|
84
84
|
from .eval_question_result import EvalQuestionResult
|
|
85
|
+
from .extend_vertex_text_embedding import ExtendVertexTextEmbedding
|
|
85
86
|
from .extraction_job import ExtractionJob
|
|
86
87
|
from .extraction_result import ExtractionResult
|
|
87
88
|
from .extraction_result_data_value import ExtractionResultDataValue
|
|
@@ -127,6 +128,8 @@ from .open_ai_embedding import OpenAiEmbedding
|
|
|
127
128
|
from .open_ai_embedding_config import OpenAiEmbeddingConfig
|
|
128
129
|
from .organization import Organization
|
|
129
130
|
from .organization_create import OrganizationCreate
|
|
131
|
+
from .page_screenshot_metadata import PageScreenshotMetadata
|
|
132
|
+
from .page_screenshot_node_with_score import PageScreenshotNodeWithScore
|
|
130
133
|
from .page_segmentation_config import PageSegmentationConfig
|
|
131
134
|
from .page_splitter_node_parser import PageSplitterNodeParser
|
|
132
135
|
from .parser_languages import ParserLanguages
|
|
@@ -137,6 +140,7 @@ from .parsing_job_markdown_result import ParsingJobMarkdownResult
|
|
|
137
140
|
from .parsing_job_text_result import ParsingJobTextResult
|
|
138
141
|
from .parsing_usage import ParsingUsage
|
|
139
142
|
from .pipeline import Pipeline
|
|
143
|
+
from .pipeline_configuration_hashes import PipelineConfigurationHashes
|
|
140
144
|
from .pipeline_create import PipelineCreate
|
|
141
145
|
from .pipeline_create_embedding_config import (
|
|
142
146
|
PipelineCreateEmbeddingConfig,
|
|
@@ -146,6 +150,7 @@ from .pipeline_create_embedding_config import (
|
|
|
146
150
|
PipelineCreateEmbeddingConfig_GeminiEmbedding,
|
|
147
151
|
PipelineCreateEmbeddingConfig_HuggingfaceApiEmbedding,
|
|
148
152
|
PipelineCreateEmbeddingConfig_OpenaiEmbedding,
|
|
153
|
+
PipelineCreateEmbeddingConfig_VertexaiEmbedding,
|
|
149
154
|
)
|
|
150
155
|
from .pipeline_create_transform_config import (
|
|
151
156
|
PipelineCreateTransformConfig,
|
|
@@ -166,8 +171,10 @@ from .pipeline_embedding_config import (
|
|
|
166
171
|
PipelineEmbeddingConfig_GeminiEmbedding,
|
|
167
172
|
PipelineEmbeddingConfig_HuggingfaceApiEmbedding,
|
|
168
173
|
PipelineEmbeddingConfig_OpenaiEmbedding,
|
|
174
|
+
PipelineEmbeddingConfig_VertexaiEmbedding,
|
|
169
175
|
)
|
|
170
176
|
from .pipeline_file import PipelineFile
|
|
177
|
+
from .pipeline_file_config_hash_value import PipelineFileConfigHashValue
|
|
171
178
|
from .pipeline_file_create import PipelineFileCreate
|
|
172
179
|
from .pipeline_file_create_custom_metadata_value import PipelineFileCreateCustomMetadataValue
|
|
173
180
|
from .pipeline_file_custom_metadata_value import PipelineFileCustomMetadataValue
|
|
@@ -208,6 +215,8 @@ from .user_organization_create import UserOrganizationCreate
|
|
|
208
215
|
from .user_organization_delete import UserOrganizationDelete
|
|
209
216
|
from .validation_error import ValidationError
|
|
210
217
|
from .validation_error_loc_item import ValidationErrorLocItem
|
|
218
|
+
from .vertex_ai_embedding_config import VertexAiEmbeddingConfig
|
|
219
|
+
from .vertex_embedding_mode import VertexEmbeddingMode
|
|
211
220
|
|
|
212
221
|
__all__ = [
|
|
213
222
|
"AdvancedModeTransformConfig",
|
|
@@ -288,6 +297,7 @@ __all__ = [
|
|
|
288
297
|
"EvalQuestion",
|
|
289
298
|
"EvalQuestionCreate",
|
|
290
299
|
"EvalQuestionResult",
|
|
300
|
+
"ExtendVertexTextEmbedding",
|
|
291
301
|
"ExtractionJob",
|
|
292
302
|
"ExtractionResult",
|
|
293
303
|
"ExtractionResultDataValue",
|
|
@@ -333,6 +343,8 @@ __all__ = [
|
|
|
333
343
|
"OpenAiEmbeddingConfig",
|
|
334
344
|
"Organization",
|
|
335
345
|
"OrganizationCreate",
|
|
346
|
+
"PageScreenshotMetadata",
|
|
347
|
+
"PageScreenshotNodeWithScore",
|
|
336
348
|
"PageSegmentationConfig",
|
|
337
349
|
"PageSplitterNodeParser",
|
|
338
350
|
"ParserLanguages",
|
|
@@ -343,6 +355,7 @@ __all__ = [
|
|
|
343
355
|
"ParsingJobTextResult",
|
|
344
356
|
"ParsingUsage",
|
|
345
357
|
"Pipeline",
|
|
358
|
+
"PipelineConfigurationHashes",
|
|
346
359
|
"PipelineCreate",
|
|
347
360
|
"PipelineCreateEmbeddingConfig",
|
|
348
361
|
"PipelineCreateEmbeddingConfig_AzureEmbedding",
|
|
@@ -351,6 +364,7 @@ __all__ = [
|
|
|
351
364
|
"PipelineCreateEmbeddingConfig_GeminiEmbedding",
|
|
352
365
|
"PipelineCreateEmbeddingConfig_HuggingfaceApiEmbedding",
|
|
353
366
|
"PipelineCreateEmbeddingConfig_OpenaiEmbedding",
|
|
367
|
+
"PipelineCreateEmbeddingConfig_VertexaiEmbedding",
|
|
354
368
|
"PipelineCreateTransformConfig",
|
|
355
369
|
"PipelineCreateTransformConfig_Advanced",
|
|
356
370
|
"PipelineCreateTransformConfig_Auto",
|
|
@@ -367,7 +381,9 @@ __all__ = [
|
|
|
367
381
|
"PipelineEmbeddingConfig_GeminiEmbedding",
|
|
368
382
|
"PipelineEmbeddingConfig_HuggingfaceApiEmbedding",
|
|
369
383
|
"PipelineEmbeddingConfig_OpenaiEmbedding",
|
|
384
|
+
"PipelineEmbeddingConfig_VertexaiEmbedding",
|
|
370
385
|
"PipelineFile",
|
|
386
|
+
"PipelineFileConfigHashValue",
|
|
371
387
|
"PipelineFileCreate",
|
|
372
388
|
"PipelineFileCreateCustomMetadataValue",
|
|
373
389
|
"PipelineFileCustomMetadataValue",
|
|
@@ -406,4 +422,6 @@ __all__ = [
|
|
|
406
422
|
"UserOrganizationDelete",
|
|
407
423
|
"ValidationError",
|
|
408
424
|
"ValidationErrorLocItem",
|
|
425
|
+
"VertexAiEmbeddingConfig",
|
|
426
|
+
"VertexEmbeddingMode",
|
|
409
427
|
]
|
|
@@ -21,9 +21,8 @@ class CloudAzStorageBlobDataSource(pydantic.BaseModel):
|
|
|
21
21
|
|
|
22
22
|
container_name: str = pydantic.Field(description="The name of the Azure Storage Blob container to read from.")
|
|
23
23
|
account_url: str = pydantic.Field(description="The Azure Storage Blob account URL to use for authentication.")
|
|
24
|
-
blob: typing.Optional[str] = pydantic.Field(description="The blob name to read from.")
|
|
25
24
|
prefix: typing.Optional[str] = pydantic.Field(
|
|
26
|
-
description="The prefix of the Azure Storage Blob objects to read from."
|
|
25
|
+
description="The prefix of the Azure Storage Blob objects to read from. Use this to filter files at the subdirectory level"
|
|
27
26
|
)
|
|
28
27
|
account_name: typing.Optional[str] = pydantic.Field(
|
|
29
28
|
description="The Azure Storage Blob account name to use for authentication."
|
|
@@ -20,17 +20,15 @@ class CloudPostgresVectorStore(pydantic.BaseModel):
|
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
supports_nested_metadata_filters: typing.Optional[bool]
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
database: str
|
|
24
|
+
host: str
|
|
25
|
+
password: str
|
|
26
|
+
port: str
|
|
27
|
+
user: str
|
|
25
28
|
table_name: str
|
|
26
29
|
schema_name: str
|
|
27
30
|
embed_dim: int
|
|
28
|
-
hybrid_search: bool
|
|
29
|
-
text_search_config: str
|
|
30
|
-
cache_ok: bool
|
|
31
|
-
perform_setup: bool
|
|
32
|
-
debug: bool
|
|
33
|
-
use_jsonb: bool
|
|
31
|
+
hybrid_search: typing.Optional[bool]
|
|
34
32
|
class_name: typing.Optional[str]
|
|
35
33
|
|
|
36
34
|
def json(self, **kwargs: typing.Any) -> str:
|