llama-cloud 0.1.41__py3-none-any.whl → 1.0.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_cloud/__init__.py +101 -816
- llama_cloud/_base_client.py +2124 -0
- llama_cloud/_client.py +795 -0
- llama_cloud/_compat.py +219 -0
- llama_cloud/_constants.py +14 -0
- llama_cloud/_exceptions.py +108 -0
- llama_cloud/_files.py +127 -0
- llama_cloud/_models.py +872 -0
- llama_cloud/_polling.py +182 -0
- llama_cloud/_qs.py +150 -0
- llama_cloud/_resource.py +43 -0
- llama_cloud/_response.py +832 -0
- llama_cloud/_streaming.py +333 -0
- llama_cloud/_types.py +270 -0
- llama_cloud/_utils/__init__.py +64 -0
- llama_cloud/_utils/_compat.py +45 -0
- llama_cloud/_utils/_datetime_parse.py +136 -0
- llama_cloud/_utils/_logs.py +25 -0
- llama_cloud/_utils/_proxy.py +65 -0
- llama_cloud/_utils/_reflection.py +42 -0
- llama_cloud/_utils/_resources_proxy.py +24 -0
- llama_cloud/_utils/_streams.py +12 -0
- llama_cloud/_utils/_sync.py +58 -0
- llama_cloud/_utils/_transform.py +457 -0
- llama_cloud/_utils/_typing.py +156 -0
- llama_cloud/_utils/_utils.py +421 -0
- llama_cloud/_version.py +4 -0
- llama_cloud/lib/__init__.py +0 -0
- llama_cloud/lib/index/__init__.py +13 -0
- llama_cloud/lib/index/api_utils.py +300 -0
- llama_cloud/lib/index/base.py +1041 -0
- llama_cloud/lib/index/composite_retriever.py +272 -0
- llama_cloud/lib/index/retriever.py +233 -0
- llama_cloud/pagination.py +465 -0
- llama_cloud/py.typed +0 -0
- llama_cloud/resources/__init__.py +136 -107
- llama_cloud/resources/beta/__init__.py +102 -1
- llama_cloud/resources/beta/agent_data.py +1041 -0
- llama_cloud/resources/beta/batch/__init__.py +33 -0
- llama_cloud/resources/beta/batch/batch.py +664 -0
- llama_cloud/resources/beta/batch/job_items.py +348 -0
- llama_cloud/resources/beta/beta.py +262 -0
- llama_cloud/resources/beta/directories/__init__.py +33 -0
- llama_cloud/resources/beta/directories/directories.py +719 -0
- llama_cloud/resources/beta/directories/files.py +913 -0
- llama_cloud/resources/beta/parse_configurations.py +743 -0
- llama_cloud/resources/beta/sheets.py +1130 -0
- llama_cloud/resources/beta/split.py +917 -0
- llama_cloud/resources/classifier/__init__.py +32 -1
- llama_cloud/resources/classifier/classifier.py +588 -0
- llama_cloud/resources/classifier/jobs.py +563 -0
- llama_cloud/resources/data_sinks.py +579 -0
- llama_cloud/resources/data_sources.py +651 -0
- llama_cloud/resources/extraction/__init__.py +61 -0
- llama_cloud/resources/extraction/extraction.py +609 -0
- llama_cloud/resources/extraction/extraction_agents/__init__.py +33 -0
- llama_cloud/resources/extraction/extraction_agents/extraction_agents.py +633 -0
- llama_cloud/resources/extraction/extraction_agents/schema.py +308 -0
- llama_cloud/resources/extraction/jobs.py +1106 -0
- llama_cloud/resources/extraction/runs.py +498 -0
- llama_cloud/resources/files.py +784 -0
- llama_cloud/resources/parsing.py +1296 -0
- llama_cloud/resources/pipelines/__init__.py +98 -24
- llama_cloud/resources/pipelines/data_sources.py +529 -0
- llama_cloud/resources/pipelines/documents.py +810 -0
- llama_cloud/resources/pipelines/files.py +682 -0
- llama_cloud/resources/pipelines/images.py +513 -0
- llama_cloud/resources/pipelines/metadata.py +265 -0
- llama_cloud/resources/pipelines/pipelines.py +1525 -0
- llama_cloud/resources/pipelines/sync.py +243 -0
- llama_cloud/resources/projects.py +276 -0
- llama_cloud/resources/retrievers/__init__.py +32 -1
- llama_cloud/resources/retrievers/retriever.py +238 -0
- llama_cloud/resources/retrievers/retrievers.py +920 -0
- llama_cloud/types/__init__.py +171 -721
- llama_cloud/types/advanced_mode_transform_config.py +102 -38
- llama_cloud/types/advanced_mode_transform_config_param.py +102 -0
- llama_cloud/types/auto_transform_config.py +11 -25
- llama_cloud/types/auto_transform_config_param.py +17 -0
- llama_cloud/types/azure_openai_embedding.py +62 -0
- llama_cloud/types/azure_openai_embedding_config.py +17 -0
- llama_cloud/types/azure_openai_embedding_config_param.py +17 -0
- llama_cloud/types/azure_openai_embedding_param.py +61 -0
- llama_cloud/types/b_box.py +37 -0
- llama_cloud/types/bedrock_embedding.py +49 -46
- llama_cloud/types/bedrock_embedding_config.py +10 -27
- llama_cloud/types/bedrock_embedding_config_param.py +17 -0
- llama_cloud/types/bedrock_embedding_param.py +48 -0
- llama_cloud/types/beta/__init__.py +59 -0
- llama_cloud/types/beta/agent_data.py +26 -0
- llama_cloud/types/beta/agent_data_agent_data_params.py +20 -0
- llama_cloud/types/beta/agent_data_aggregate_params.py +79 -0
- llama_cloud/types/beta/agent_data_aggregate_response.py +17 -0
- llama_cloud/types/beta/agent_data_delete_by_query_params.py +43 -0
- llama_cloud/types/beta/agent_data_delete_by_query_response.py +11 -0
- llama_cloud/types/beta/agent_data_delete_params.py +14 -0
- llama_cloud/types/beta/agent_data_delete_response.py +8 -0
- llama_cloud/types/beta/agent_data_get_params.py +14 -0
- llama_cloud/types/beta/agent_data_search_params.py +69 -0
- llama_cloud/types/beta/agent_data_update_params.py +16 -0
- llama_cloud/types/beta/batch/__init__.py +12 -0
- llama_cloud/types/beta/batch/job_item_get_processing_results_params.py +17 -0
- llama_cloud/types/beta/batch/job_item_get_processing_results_response.py +409 -0
- llama_cloud/types/beta/batch/job_item_list_params.py +23 -0
- llama_cloud/types/beta/batch/job_item_list_response.py +42 -0
- llama_cloud/types/beta/batch_cancel_params.py +21 -0
- llama_cloud/types/beta/batch_cancel_response.py +23 -0
- llama_cloud/types/beta/batch_create_params.py +399 -0
- llama_cloud/types/beta/batch_create_response.py +63 -0
- llama_cloud/types/beta/batch_get_status_params.py +14 -0
- llama_cloud/types/beta/batch_get_status_response.py +73 -0
- llama_cloud/types/beta/batch_list_params.py +29 -0
- llama_cloud/types/beta/batch_list_response.py +63 -0
- llama_cloud/types/beta/directories/__init__.py +15 -0
- llama_cloud/types/beta/directories/file_add_params.py +26 -0
- llama_cloud/types/beta/directories/file_add_response.py +42 -0
- llama_cloud/types/beta/directories/file_delete_params.py +16 -0
- llama_cloud/types/beta/directories/file_get_params.py +16 -0
- llama_cloud/types/beta/directories/file_get_response.py +42 -0
- llama_cloud/types/beta/directories/file_list_params.py +28 -0
- llama_cloud/types/beta/directories/file_list_response.py +42 -0
- llama_cloud/types/beta/directories/file_update_params.py +27 -0
- llama_cloud/types/beta/directories/file_update_response.py +42 -0
- llama_cloud/types/beta/directories/file_upload_params.py +24 -0
- llama_cloud/types/beta/directories/file_upload_response.py +42 -0
- llama_cloud/types/beta/directory_create_params.py +23 -0
- llama_cloud/types/beta/directory_create_response.py +36 -0
- llama_cloud/types/beta/directory_delete_params.py +14 -0
- llama_cloud/types/beta/directory_get_params.py +14 -0
- llama_cloud/types/beta/directory_get_response.py +36 -0
- llama_cloud/types/beta/directory_list_params.py +24 -0
- llama_cloud/types/beta/directory_list_response.py +36 -0
- llama_cloud/types/beta/directory_update_params.py +20 -0
- llama_cloud/types/beta/directory_update_response.py +36 -0
- llama_cloud/types/beta/parse_configuration.py +40 -0
- llama_cloud/types/beta/parse_configuration_create_params.py +34 -0
- llama_cloud/types/beta/parse_configuration_delete_params.py +14 -0
- llama_cloud/types/beta/parse_configuration_get_params.py +14 -0
- llama_cloud/types/beta/parse_configuration_list_params.py +24 -0
- llama_cloud/types/beta/parse_configuration_query_response.py +28 -0
- llama_cloud/types/beta/parse_configuration_update_params.py +22 -0
- llama_cloud/types/beta/sheet_create_params.py +22 -0
- llama_cloud/types/beta/sheet_delete_job_params.py +14 -0
- llama_cloud/types/beta/sheet_get_params.py +16 -0
- llama_cloud/types/beta/sheet_get_result_table_params.py +20 -0
- llama_cloud/types/beta/sheet_list_params.py +20 -0
- llama_cloud/types/beta/sheets_job.py +88 -0
- llama_cloud/types/beta/sheets_parsing_config.py +49 -0
- llama_cloud/types/beta/sheets_parsing_config_param.py +51 -0
- llama_cloud/types/beta/split_category.py +17 -0
- llama_cloud/types/beta/split_category_param.py +18 -0
- llama_cloud/types/beta/split_create_params.py +36 -0
- llama_cloud/types/beta/split_create_response.py +48 -0
- llama_cloud/types/beta/split_document_input.py +15 -0
- llama_cloud/types/beta/split_document_input_param.py +17 -0
- llama_cloud/types/beta/split_get_params.py +14 -0
- llama_cloud/types/beta/split_get_response.py +48 -0
- llama_cloud/types/beta/split_list_params.py +18 -0
- llama_cloud/types/beta/split_list_response.py +48 -0
- llama_cloud/types/beta/split_result_response.py +15 -0
- llama_cloud/types/beta/split_segment_response.py +20 -0
- llama_cloud/types/classifier/__init__.py +15 -0
- llama_cloud/types/classifier/classifier_rule.py +25 -0
- llama_cloud/types/classifier/classifier_rule_param.py +27 -0
- llama_cloud/types/classifier/classify_job.py +51 -0
- llama_cloud/types/classifier/classify_job_param.py +53 -0
- llama_cloud/types/classifier/classify_parsing_configuration.py +21 -0
- llama_cloud/types/classifier/classify_parsing_configuration_param.py +23 -0
- llama_cloud/types/classifier/job_create_params.py +30 -0
- llama_cloud/types/classifier/job_get_params.py +14 -0
- llama_cloud/types/classifier/job_get_results_params.py +14 -0
- llama_cloud/types/classifier/job_get_results_response.py +66 -0
- llama_cloud/types/classifier/job_list_params.py +18 -0
- llama_cloud/types/cohere_embedding.py +37 -40
- llama_cloud/types/cohere_embedding_config.py +10 -27
- llama_cloud/types/cohere_embedding_config_param.py +17 -0
- llama_cloud/types/cohere_embedding_param.py +36 -0
- llama_cloud/types/composite_retrieval_mode.py +4 -18
- llama_cloud/types/composite_retrieval_result.py +52 -37
- llama_cloud/types/data_sink.py +46 -39
- llama_cloud/types/data_sink_create_param.py +41 -0
- llama_cloud/types/data_sink_create_params.py +44 -0
- llama_cloud/types/data_sink_list_params.py +14 -0
- llama_cloud/types/data_sink_list_response.py +10 -0
- llama_cloud/types/data_sink_update_params.py +40 -0
- llama_cloud/types/data_source.py +67 -39
- llama_cloud/types/data_source_create_params.py +65 -0
- llama_cloud/types/data_source_list_params.py +14 -0
- llama_cloud/types/data_source_list_response.py +10 -0
- llama_cloud/types/data_source_reader_version_metadata.py +8 -27
- llama_cloud/types/data_source_update_params.py +61 -0
- llama_cloud/types/extraction/__init__.py +25 -0
- llama_cloud/types/extraction/extract_agent.py +41 -0
- llama_cloud/types/extraction/extract_config.py +118 -0
- llama_cloud/types/extraction/extract_config_param.py +118 -0
- llama_cloud/types/extraction/extract_job.py +32 -0
- llama_cloud/types/extraction/extract_run.py +64 -0
- llama_cloud/types/extraction/extraction_agent_create_params.py +25 -0
- llama_cloud/types/extraction/extraction_agent_list_params.py +17 -0
- llama_cloud/types/extraction/extraction_agent_list_response.py +10 -0
- llama_cloud/types/extraction/extraction_agent_update_params.py +18 -0
- llama_cloud/types/extraction/extraction_agents/__init__.py +8 -0
- llama_cloud/types/extraction/extraction_agents/schema_generate_schema_params.py +23 -0
- llama_cloud/types/extraction/extraction_agents/schema_generate_schema_response.py +14 -0
- llama_cloud/types/extraction/extraction_agents/schema_validate_schema_params.py +12 -0
- llama_cloud/types/extraction/extraction_agents/schema_validate_schema_response.py +13 -0
- llama_cloud/types/extraction/job_create_params.py +38 -0
- llama_cloud/types/extraction/job_file_params.py +29 -0
- llama_cloud/types/extraction/job_get_result_params.py +14 -0
- llama_cloud/types/extraction/job_get_result_response.py +27 -0
- llama_cloud/types/extraction/job_list_params.py +11 -0
- llama_cloud/types/extraction/job_list_response.py +10 -0
- llama_cloud/types/extraction/run_delete_params.py +14 -0
- llama_cloud/types/extraction/run_get_by_job_params.py +14 -0
- llama_cloud/types/extraction/run_get_params.py +14 -0
- llama_cloud/types/extraction/run_list_params.py +15 -0
- llama_cloud/types/extraction/webhook_configuration.py +43 -0
- llama_cloud/types/extraction/webhook_configuration_param.py +43 -0
- llama_cloud/types/extraction_run_params.py +45 -0
- llama_cloud/types/fail_page_mode.py +4 -26
- llama_cloud/types/file.py +48 -40
- llama_cloud/types/file_create_params.py +28 -0
- llama_cloud/types/file_create_response.py +38 -0
- llama_cloud/types/file_delete_params.py +14 -0
- llama_cloud/types/file_get_params.py +16 -0
- llama_cloud/types/file_list_params.py +40 -0
- llama_cloud/types/file_list_response.py +38 -0
- llama_cloud/types/file_query_params.py +61 -0
- llama_cloud/types/file_query_response.py +47 -27
- llama_cloud/types/gemini_embedding.py +40 -39
- llama_cloud/types/gemini_embedding_config.py +10 -27
- llama_cloud/types/gemini_embedding_config_param.py +17 -0
- llama_cloud/types/gemini_embedding_param.py +39 -0
- llama_cloud/types/hugging_face_inference_api_embedding.py +62 -46
- llama_cloud/types/hugging_face_inference_api_embedding_config.py +11 -28
- llama_cloud/types/hugging_face_inference_api_embedding_config_param.py +17 -0
- llama_cloud/types/hugging_face_inference_api_embedding_param.py +60 -0
- llama_cloud/types/list_item.py +48 -0
- llama_cloud/types/llama_parse_parameters.py +251 -130
- llama_cloud/types/llama_parse_parameters_param.py +261 -0
- llama_cloud/types/llama_parse_supported_file_extensions.py +84 -310
- llama_cloud/types/managed_ingestion_status_response.py +39 -37
- llama_cloud/types/message_role.py +4 -46
- llama_cloud/types/metadata_filters.py +45 -29
- llama_cloud/types/metadata_filters_param.py +58 -0
- llama_cloud/types/openai_embedding.py +56 -0
- llama_cloud/types/openai_embedding_config.py +17 -0
- llama_cloud/types/openai_embedding_config_param.py +17 -0
- llama_cloud/types/openai_embedding_param.py +55 -0
- llama_cloud/types/page_figure_node_with_score.py +32 -29
- llama_cloud/types/page_screenshot_node_with_score.py +23 -29
- llama_cloud/types/parsing_create_params.py +586 -0
- llama_cloud/types/parsing_create_response.py +33 -0
- llama_cloud/types/parsing_get_params.py +27 -0
- llama_cloud/types/parsing_get_response.py +364 -0
- llama_cloud/types/parsing_languages.py +94 -0
- llama_cloud/types/parsing_list_params.py +23 -0
- llama_cloud/types/parsing_list_response.py +33 -0
- llama_cloud/types/parsing_mode.py +13 -46
- llama_cloud/types/parsing_upload_file_params.py +14 -0
- llama_cloud/types/parsing_upload_file_response.py +33 -0
- llama_cloud/types/pipeline.py +180 -62
- llama_cloud/types/pipeline_create_params.py +95 -0
- llama_cloud/types/pipeline_get_status_params.py +12 -0
- llama_cloud/types/pipeline_list_params.py +23 -0
- llama_cloud/types/pipeline_list_response.py +12 -0
- llama_cloud/types/pipeline_metadata_config.py +9 -30
- llama_cloud/types/pipeline_metadata_config_param.py +17 -0
- llama_cloud/types/pipeline_retrieve_params.py +74 -0
- llama_cloud/types/pipeline_retrieve_response.py +63 -0
- llama_cloud/types/pipeline_type.py +4 -18
- llama_cloud/types/pipeline_update_params.py +90 -0
- llama_cloud/types/pipeline_upsert_params.py +95 -0
- llama_cloud/types/pipelines/__init__.py +38 -0
- llama_cloud/types/pipelines/cloud_document.py +29 -0
- llama_cloud/types/pipelines/cloud_document_create_param.py +30 -0
- llama_cloud/types/pipelines/data_source_get_data_sources_response.py +10 -0
- llama_cloud/types/pipelines/data_source_sync_params.py +16 -0
- llama_cloud/types/pipelines/data_source_update_data_sources_params.py +25 -0
- llama_cloud/types/pipelines/data_source_update_data_sources_response.py +10 -0
- llama_cloud/types/pipelines/data_source_update_params.py +15 -0
- llama_cloud/types/pipelines/document_create_params.py +14 -0
- llama_cloud/types/pipelines/document_create_response.py +10 -0
- llama_cloud/types/pipelines/document_get_chunks_response.py +10 -0
- llama_cloud/types/pipelines/document_list_params.py +22 -0
- llama_cloud/types/pipelines/document_upsert_params.py +14 -0
- llama_cloud/types/pipelines/document_upsert_response.py +10 -0
- llama_cloud/types/pipelines/file_create_params.py +22 -0
- llama_cloud/types/pipelines/file_create_response.py +10 -0
- llama_cloud/types/pipelines/file_get_status_counts_params.py +14 -0
- llama_cloud/types/pipelines/file_get_status_counts_response.py +24 -0
- llama_cloud/types/pipelines/file_list_params.py +22 -0
- llama_cloud/types/pipelines/file_update_params.py +15 -0
- llama_cloud/types/pipelines/image_get_page_figure_params.py +18 -0
- llama_cloud/types/pipelines/image_get_page_screenshot_params.py +16 -0
- llama_cloud/types/pipelines/image_list_page_figures_params.py +14 -0
- llama_cloud/types/pipelines/image_list_page_figures_response.py +34 -0
- llama_cloud/types/pipelines/image_list_page_screenshots_params.py +14 -0
- llama_cloud/types/pipelines/image_list_page_screenshots_response.py +25 -0
- llama_cloud/types/pipelines/metadata_create_params.py +13 -0
- llama_cloud/types/pipelines/metadata_create_response.py +8 -0
- llama_cloud/types/pipelines/pipeline_data_source.py +96 -0
- llama_cloud/types/pipelines/pipeline_file.py +70 -0
- llama_cloud/types/pipelines/text_node.py +89 -0
- llama_cloud/types/preset_retrieval_params.py +61 -49
- llama_cloud/types/preset_retrieval_params_param.py +71 -0
- llama_cloud/types/presigned_url.py +13 -29
- llama_cloud/types/project.py +24 -36
- llama_cloud/types/project_get_params.py +12 -0
- llama_cloud/types/project_list_params.py +14 -0
- llama_cloud/types/project_list_response.py +10 -0
- llama_cloud/types/re_rank_config_param.py +18 -0
- llama_cloud/types/retrieval_mode.py +4 -26
- llama_cloud/types/retriever.py +31 -38
- llama_cloud/types/retriever_create_params.py +26 -0
- llama_cloud/types/retriever_get_params.py +14 -0
- llama_cloud/types/retriever_list_params.py +16 -0
- llama_cloud/types/retriever_list_response.py +12 -0
- llama_cloud/types/retriever_pipeline.py +26 -34
- llama_cloud/types/retriever_pipeline_param.py +28 -0
- llama_cloud/types/retriever_search_params.py +38 -0
- llama_cloud/types/retriever_update_params.py +19 -0
- llama_cloud/types/retriever_upsert_params.py +26 -0
- llama_cloud/types/retrievers/__init__.py +5 -0
- llama_cloud/types/retrievers/retriever_search_params.py +32 -0
- llama_cloud/types/shared/__init__.py +21 -0
- llama_cloud/types/shared/cloud_astra_db_vector_store.py +39 -0
- llama_cloud/types/shared/cloud_az_storage_blob_data_source.py +34 -0
- llama_cloud/types/shared/cloud_azure_ai_search_vector_store.py +30 -0
- llama_cloud/types/shared/cloud_box_data_source.py +31 -0
- llama_cloud/types/shared/cloud_confluence_data_source.py +53 -0
- llama_cloud/types/shared/cloud_jira_data_source.py +30 -0
- llama_cloud/types/shared/cloud_jira_data_source_v2.py +49 -0
- llama_cloud/types/shared/cloud_milvus_vector_store.py +21 -0
- llama_cloud/types/shared/cloud_mongodb_atlas_vector_search.py +36 -0
- llama_cloud/types/shared/cloud_notion_page_data_source.py +19 -0
- llama_cloud/types/shared/cloud_one_drive_data_source.py +32 -0
- llama_cloud/types/shared/cloud_pinecone_vector_store.py +32 -0
- llama_cloud/types/shared/cloud_postgres_vector_store.py +35 -0
- llama_cloud/types/shared/cloud_qdrant_vector_store.py +35 -0
- llama_cloud/types/shared/cloud_s3_data_source.py +28 -0
- llama_cloud/types/shared/cloud_sharepoint_data_source.py +55 -0
- llama_cloud/types/shared/cloud_slack_data_source.py +31 -0
- llama_cloud/types/shared/failure_handling_config.py +16 -0
- llama_cloud/types/shared/pg_vector_hnsw_settings.py +27 -0
- llama_cloud/types/shared_params/__init__.py +21 -0
- llama_cloud/types/shared_params/cloud_astra_db_vector_store.py +42 -0
- llama_cloud/types/shared_params/cloud_az_storage_blob_data_source.py +41 -0
- llama_cloud/types/shared_params/cloud_azure_ai_search_vector_store.py +34 -0
- llama_cloud/types/shared_params/cloud_box_data_source.py +40 -0
- llama_cloud/types/shared_params/cloud_confluence_data_source.py +58 -0
- llama_cloud/types/shared_params/cloud_jira_data_source.py +34 -0
- llama_cloud/types/shared_params/cloud_jira_data_source_v2.py +54 -0
- llama_cloud/types/shared_params/cloud_milvus_vector_store.py +24 -0
- llama_cloud/types/shared_params/cloud_mongodb_atlas_vector_search.py +39 -0
- llama_cloud/types/shared_params/cloud_notion_page_data_source.py +23 -0
- llama_cloud/types/shared_params/cloud_one_drive_data_source.py +37 -0
- llama_cloud/types/shared_params/cloud_pinecone_vector_store.py +35 -0
- llama_cloud/types/shared_params/cloud_postgres_vector_store.py +39 -0
- llama_cloud/types/shared_params/cloud_qdrant_vector_store.py +37 -0
- llama_cloud/types/shared_params/cloud_s3_data_source.py +32 -0
- llama_cloud/types/shared_params/cloud_sharepoint_data_source.py +60 -0
- llama_cloud/types/shared_params/cloud_slack_data_source.py +35 -0
- llama_cloud/types/shared_params/failure_handling_config.py +16 -0
- llama_cloud/types/shared_params/pg_vector_hnsw_settings.py +26 -0
- llama_cloud/types/sparse_model_config.py +16 -30
- llama_cloud/types/sparse_model_config_param.py +25 -0
- llama_cloud/types/status_enum.py +4 -34
- llama_cloud/types/vertex_ai_embedding_config.py +10 -27
- llama_cloud/types/vertex_ai_embedding_config_param.py +17 -0
- llama_cloud/types/vertex_text_embedding.py +47 -45
- llama_cloud/types/vertex_text_embedding_param.py +45 -0
- llama_cloud-1.0.0b4.dist-info/METADATA +546 -0
- llama_cloud-1.0.0b4.dist-info/RECORD +376 -0
- {llama_cloud-0.1.41.dist-info → llama_cloud-1.0.0b4.dist-info}/WHEEL +1 -1
- llama_cloud-1.0.0b4.dist-info/licenses/LICENSE +7 -0
- llama_cloud/client.py +0 -108
- llama_cloud/core/__init__.py +0 -17
- llama_cloud/core/api_error.py +0 -15
- llama_cloud/core/client_wrapper.py +0 -51
- llama_cloud/core/datetime_utils.py +0 -28
- llama_cloud/core/jsonable_encoder.py +0 -106
- llama_cloud/core/remove_none_from_dict.py +0 -11
- llama_cloud/environment.py +0 -7
- llama_cloud/errors/__init__.py +0 -5
- llama_cloud/errors/unprocessable_entity_error.py +0 -9
- llama_cloud/resources/admin/__init__.py +0 -2
- llama_cloud/resources/admin/client.py +0 -196
- llama_cloud/resources/agent_deployments/__init__.py +0 -2
- llama_cloud/resources/agent_deployments/client.py +0 -160
- llama_cloud/resources/alpha/__init__.py +0 -2
- llama_cloud/resources/alpha/client.py +0 -112
- llama_cloud/resources/beta/client.py +0 -2664
- llama_cloud/resources/chat_apps/__init__.py +0 -2
- llama_cloud/resources/chat_apps/client.py +0 -616
- llama_cloud/resources/classifier/client.py +0 -444
- llama_cloud/resources/data_sinks/__init__.py +0 -5
- llama_cloud/resources/data_sinks/client.py +0 -535
- llama_cloud/resources/data_sinks/types/__init__.py +0 -5
- llama_cloud/resources/data_sinks/types/data_sink_update_component.py +0 -22
- llama_cloud/resources/data_sources/__init__.py +0 -5
- llama_cloud/resources/data_sources/client.py +0 -548
- llama_cloud/resources/data_sources/types/__init__.py +0 -6
- llama_cloud/resources/data_sources/types/data_source_update_component.py +0 -28
- llama_cloud/resources/data_sources/types/data_source_update_custom_metadata_value.py +0 -7
- llama_cloud/resources/embedding_model_configs/__init__.py +0 -23
- llama_cloud/resources/embedding_model_configs/client.py +0 -420
- llama_cloud/resources/embedding_model_configs/types/__init__.py +0 -23
- llama_cloud/resources/embedding_model_configs/types/embedding_model_config_create_embedding_config.py +0 -89
- llama_cloud/resources/evals/__init__.py +0 -2
- llama_cloud/resources/evals/client.py +0 -85
- llama_cloud/resources/files/__init__.py +0 -5
- llama_cloud/resources/files/client.py +0 -1454
- llama_cloud/resources/files/types/__init__.py +0 -5
- llama_cloud/resources/files/types/file_create_from_url_resource_info_value.py +0 -7
- llama_cloud/resources/jobs/__init__.py +0 -2
- llama_cloud/resources/jobs/client.py +0 -164
- llama_cloud/resources/llama_extract/__init__.py +0 -27
- llama_cloud/resources/llama_extract/client.py +0 -2082
- llama_cloud/resources/llama_extract/types/__init__.py +0 -25
- llama_cloud/resources/llama_extract/types/extract_agent_create_data_schema.py +0 -9
- llama_cloud/resources/llama_extract/types/extract_agent_create_data_schema_zero_value.py +0 -7
- llama_cloud/resources/llama_extract/types/extract_agent_update_data_schema.py +0 -9
- llama_cloud/resources/llama_extract/types/extract_agent_update_data_schema_zero_value.py +0 -7
- llama_cloud/resources/llama_extract/types/extract_job_create_batch_data_schema_override.py +0 -9
- llama_cloud/resources/llama_extract/types/extract_job_create_batch_data_schema_override_zero_value.py +0 -7
- llama_cloud/resources/llama_extract/types/extract_schema_validate_request_data_schema.py +0 -9
- llama_cloud/resources/llama_extract/types/extract_schema_validate_request_data_schema_zero_value.py +0 -7
- llama_cloud/resources/llama_extract/types/extract_stateless_request_data_schema.py +0 -9
- llama_cloud/resources/llama_extract/types/extract_stateless_request_data_schema_zero_value.py +0 -7
- llama_cloud/resources/organizations/__init__.py +0 -2
- llama_cloud/resources/organizations/client.py +0 -1448
- llama_cloud/resources/parsing/__init__.py +0 -2
- llama_cloud/resources/parsing/client.py +0 -2392
- llama_cloud/resources/pipelines/client.py +0 -3436
- llama_cloud/resources/pipelines/types/__init__.py +0 -29
- llama_cloud/resources/pipelines/types/pipeline_file_update_custom_metadata_value.py +0 -7
- llama_cloud/resources/pipelines/types/pipeline_update_embedding_config.py +0 -89
- llama_cloud/resources/pipelines/types/pipeline_update_transform_config.py +0 -8
- llama_cloud/resources/pipelines/types/retrieval_params_search_filters_inference_schema_value.py +0 -7
- llama_cloud/resources/projects/__init__.py +0 -2
- llama_cloud/resources/projects/client.py +0 -636
- llama_cloud/resources/retrievers/client.py +0 -837
- llama_cloud/resources/users/__init__.py +0 -2
- llama_cloud/resources/users/client.py +0 -155
- llama_cloud/types/advanced_mode_transform_config_chunking_config.py +0 -67
- llama_cloud/types/advanced_mode_transform_config_segmentation_config.py +0 -45
- llama_cloud/types/agent_data.py +0 -40
- llama_cloud/types/agent_deployment_list.py +0 -32
- llama_cloud/types/agent_deployment_summary.py +0 -39
- llama_cloud/types/aggregate_group.py +0 -37
- llama_cloud/types/azure_open_ai_embedding.py +0 -49
- llama_cloud/types/azure_open_ai_embedding_config.py +0 -34
- llama_cloud/types/base_plan.py +0 -53
- llama_cloud/types/base_plan_metronome_plan_type.py +0 -17
- llama_cloud/types/base_plan_name.py +0 -57
- llama_cloud/types/base_plan_plan_frequency.py +0 -25
- llama_cloud/types/batch.py +0 -47
- llama_cloud/types/batch_item.py +0 -40
- llama_cloud/types/batch_paginated_list.py +0 -35
- llama_cloud/types/batch_public_output.py +0 -36
- llama_cloud/types/billing_period.py +0 -32
- llama_cloud/types/box_auth_mechanism.py +0 -17
- llama_cloud/types/character_chunking_config.py +0 -32
- llama_cloud/types/chat_app.py +0 -46
- llama_cloud/types/chat_app_response.py +0 -43
- llama_cloud/types/chat_data.py +0 -35
- llama_cloud/types/chat_message.py +0 -43
- llama_cloud/types/chunk_mode.py +0 -29
- llama_cloud/types/classification_result.py +0 -39
- llama_cloud/types/classifier_rule.py +0 -43
- llama_cloud/types/classify_job.py +0 -47
- llama_cloud/types/classify_job_results.py +0 -38
- llama_cloud/types/classify_parsing_configuration.py +0 -38
- llama_cloud/types/cloud_astra_db_vector_store.py +0 -51
- llama_cloud/types/cloud_az_storage_blob_data_source.py +0 -41
- llama_cloud/types/cloud_azure_ai_search_vector_store.py +0 -45
- llama_cloud/types/cloud_box_data_source.py +0 -42
- llama_cloud/types/cloud_confluence_data_source.py +0 -59
- llama_cloud/types/cloud_document.py +0 -40
- llama_cloud/types/cloud_document_create.py +0 -40
- llama_cloud/types/cloud_jira_data_source.py +0 -42
- llama_cloud/types/cloud_jira_data_source_v_2.py +0 -52
- llama_cloud/types/cloud_jira_data_source_v_2_api_version.py +0 -21
- llama_cloud/types/cloud_milvus_vector_store.py +0 -40
- llama_cloud/types/cloud_mongo_db_atlas_vector_search.py +0 -52
- llama_cloud/types/cloud_notion_page_data_source.py +0 -35
- llama_cloud/types/cloud_one_drive_data_source.py +0 -39
- llama_cloud/types/cloud_pinecone_vector_store.py +0 -49
- llama_cloud/types/cloud_postgres_vector_store.py +0 -44
- llama_cloud/types/cloud_qdrant_vector_store.py +0 -51
- llama_cloud/types/cloud_s_3_data_source.py +0 -39
- llama_cloud/types/cloud_sharepoint_data_source.py +0 -42
- llama_cloud/types/cloud_slack_data_source.py +0 -39
- llama_cloud/types/composite_retrieved_text_node.py +0 -42
- llama_cloud/types/composite_retrieved_text_node_with_score.py +0 -34
- llama_cloud/types/configurable_data_sink_names.py +0 -41
- llama_cloud/types/configurable_data_source_names.py +0 -57
- llama_cloud/types/credit_type.py +0 -32
- llama_cloud/types/data_sink_component.py +0 -22
- llama_cloud/types/data_sink_create.py +0 -39
- llama_cloud/types/data_sink_create_component.py +0 -22
- llama_cloud/types/data_source_component.py +0 -28
- llama_cloud/types/data_source_create.py +0 -41
- llama_cloud/types/data_source_create_component.py +0 -28
- llama_cloud/types/data_source_create_custom_metadata_value.py +0 -7
- llama_cloud/types/data_source_custom_metadata_value.py +0 -7
- llama_cloud/types/data_source_reader_version_metadata_reader_version.py +0 -25
- llama_cloud/types/data_source_update_dispatcher_config.py +0 -38
- llama_cloud/types/delete_params.py +0 -39
- llama_cloud/types/document_chunk_mode.py +0 -17
- llama_cloud/types/document_ingestion_job_params.py +0 -43
- llama_cloud/types/element_segmentation_config.py +0 -29
- llama_cloud/types/embedding_model_config.py +0 -43
- llama_cloud/types/embedding_model_config_embedding_config.py +0 -89
- llama_cloud/types/embedding_model_config_update.py +0 -33
- llama_cloud/types/embedding_model_config_update_embedding_config.py +0 -89
- llama_cloud/types/eval_execution_params.py +0 -41
- llama_cloud/types/extract_agent.py +0 -48
- llama_cloud/types/extract_agent_data_schema_value.py +0 -5
- llama_cloud/types/extract_config.py +0 -66
- llama_cloud/types/extract_config_priority.py +0 -29
- llama_cloud/types/extract_job.py +0 -38
- llama_cloud/types/extract_job_create.py +0 -46
- llama_cloud/types/extract_job_create_data_schema_override.py +0 -9
- llama_cloud/types/extract_job_create_data_schema_override_zero_value.py +0 -7
- llama_cloud/types/extract_job_create_priority.py +0 -29
- llama_cloud/types/extract_mode.py +0 -29
- llama_cloud/types/extract_models.py +0 -53
- llama_cloud/types/extract_resultset.py +0 -42
- llama_cloud/types/extract_resultset_data.py +0 -11
- llama_cloud/types/extract_resultset_data_item_value.py +0 -7
- llama_cloud/types/extract_resultset_data_zero_value.py +0 -7
- llama_cloud/types/extract_resultset_extraction_metadata_value.py +0 -7
- llama_cloud/types/extract_run.py +0 -55
- llama_cloud/types/extract_run_data.py +0 -11
- llama_cloud/types/extract_run_data_item_value.py +0 -5
- llama_cloud/types/extract_run_data_schema_value.py +0 -5
- llama_cloud/types/extract_run_data_zero_value.py +0 -5
- llama_cloud/types/extract_run_extraction_metadata_value.py +0 -7
- llama_cloud/types/extract_schema_generate_response.py +0 -38
- llama_cloud/types/extract_schema_generate_response_data_schema_value.py +0 -7
- llama_cloud/types/extract_schema_validate_response.py +0 -32
- llama_cloud/types/extract_schema_validate_response_data_schema_value.py +0 -7
- llama_cloud/types/extract_state.py +0 -29
- llama_cloud/types/extract_target.py +0 -17
- llama_cloud/types/failure_handling_config.py +0 -37
- llama_cloud/types/file_classification.py +0 -41
- llama_cloud/types/file_count_by_status_response.py +0 -37
- llama_cloud/types/file_create.py +0 -41
- llama_cloud/types/file_create_permission_info_value.py +0 -7
- llama_cloud/types/file_create_resource_info_value.py +0 -5
- llama_cloud/types/file_data.py +0 -36
- llama_cloud/types/file_filter.py +0 -40
- llama_cloud/types/file_id_presigned_url.py +0 -38
- llama_cloud/types/file_parse_public.py +0 -36
- llama_cloud/types/file_permission_info_value.py +0 -5
- llama_cloud/types/file_resource_info_value.py +0 -5
- llama_cloud/types/file_store_info_response.py +0 -34
- llama_cloud/types/file_store_info_response_status.py +0 -25
- llama_cloud/types/filter_condition.py +0 -29
- llama_cloud/types/filter_operation.py +0 -46
- llama_cloud/types/filter_operation_eq.py +0 -6
- llama_cloud/types/filter_operation_gt.py +0 -6
- llama_cloud/types/filter_operation_gte.py +0 -6
- llama_cloud/types/filter_operation_includes_item.py +0 -6
- llama_cloud/types/filter_operation_lt.py +0 -6
- llama_cloud/types/filter_operation_lte.py +0 -6
- llama_cloud/types/filter_operator.py +0 -73
- llama_cloud/types/free_credits_usage.py +0 -34
- llama_cloud/types/http_validation_error.py +0 -32
- llama_cloud/types/hugging_face_inference_api_embedding_token.py +0 -5
- llama_cloud/types/ingestion_error_response.py +0 -34
- llama_cloud/types/input_message.py +0 -40
- llama_cloud/types/job_name_mapping.py +0 -49
- llama_cloud/types/job_names.py +0 -81
- llama_cloud/types/job_record.py +0 -58
- llama_cloud/types/job_record_parameters.py +0 -111
- llama_cloud/types/job_record_with_usage_metrics.py +0 -36
- llama_cloud/types/l_lama_parse_transform_config.py +0 -37
- llama_cloud/types/legacy_parse_job_config.py +0 -207
- llama_cloud/types/license_info_response.py +0 -34
- llama_cloud/types/llama_extract_feature_availability.py +0 -34
- llama_cloud/types/llama_extract_mode_availability.py +0 -38
- llama_cloud/types/llama_extract_mode_availability_status.py +0 -17
- llama_cloud/types/llama_extract_settings.py +0 -67
- llama_cloud/types/llama_parse_parameters_priority.py +0 -29
- llama_cloud/types/llm_model_data.py +0 -38
- llama_cloud/types/llm_parameters.py +0 -39
- llama_cloud/types/load_files_job_config.py +0 -35
- llama_cloud/types/managed_ingestion_status.py +0 -41
- llama_cloud/types/managed_open_ai_embedding.py +0 -36
- llama_cloud/types/managed_open_ai_embedding_config.py +0 -34
- llama_cloud/types/message_annotation.py +0 -33
- llama_cloud/types/metadata_filter.py +0 -44
- llama_cloud/types/metadata_filter_value.py +0 -5
- llama_cloud/types/metadata_filters_filters_item.py +0 -8
- llama_cloud/types/multimodal_parse_resolution.py +0 -17
- llama_cloud/types/node_relationship.py +0 -44
- llama_cloud/types/none_chunking_config.py +0 -29
- llama_cloud/types/none_segmentation_config.py +0 -29
- llama_cloud/types/object_type.py +0 -33
- llama_cloud/types/open_ai_embedding.py +0 -47
- llama_cloud/types/open_ai_embedding_config.py +0 -34
- llama_cloud/types/organization.py +0 -43
- llama_cloud/types/organization_create.py +0 -35
- llama_cloud/types/page_figure_metadata.py +0 -37
- llama_cloud/types/page_screenshot_metadata.py +0 -34
- llama_cloud/types/page_segmentation_config.py +0 -31
- llama_cloud/types/paginated_extract_runs_response.py +0 -39
- llama_cloud/types/paginated_jobs_history_with_metrics.py +0 -35
- llama_cloud/types/paginated_list_cloud_documents_response.py +0 -35
- llama_cloud/types/paginated_list_pipeline_files_response.py +0 -35
- llama_cloud/types/paginated_response_agent_data.py +0 -34
- llama_cloud/types/paginated_response_aggregate_group.py +0 -34
- llama_cloud/types/paginated_response_classify_job.py +0 -34
- llama_cloud/types/paginated_response_quota_configuration.py +0 -36
- llama_cloud/types/parse_configuration.py +0 -44
- llama_cloud/types/parse_configuration_create.py +0 -41
- llama_cloud/types/parse_configuration_filter.py +0 -40
- llama_cloud/types/parse_configuration_query_response.py +0 -38
- llama_cloud/types/parse_job_config.py +0 -149
- llama_cloud/types/parse_job_config_priority.py +0 -29
- llama_cloud/types/parse_plan_level.py +0 -21
- llama_cloud/types/parser_languages.py +0 -361
- llama_cloud/types/parsing_history_item.py +0 -39
- llama_cloud/types/parsing_job.py +0 -35
- llama_cloud/types/parsing_job_json_result.py +0 -32
- llama_cloud/types/parsing_job_markdown_result.py +0 -32
- llama_cloud/types/parsing_job_structured_result.py +0 -32
- llama_cloud/types/parsing_job_text_result.py +0 -32
- llama_cloud/types/partition_names.py +0 -45
- llama_cloud/types/permission.py +0 -40
- llama_cloud/types/pg_vector_distance_method.py +0 -43
- llama_cloud/types/pg_vector_hnsw_settings.py +0 -45
- llama_cloud/types/pg_vector_vector_type.py +0 -35
- llama_cloud/types/pipeline_configuration_hashes.py +0 -37
- llama_cloud/types/pipeline_create.py +0 -65
- llama_cloud/types/pipeline_create_embedding_config.py +0 -89
- llama_cloud/types/pipeline_create_transform_config.py +0 -8
- llama_cloud/types/pipeline_data_source.py +0 -55
- llama_cloud/types/pipeline_data_source_component.py +0 -28
- llama_cloud/types/pipeline_data_source_create.py +0 -36
- llama_cloud/types/pipeline_data_source_custom_metadata_value.py +0 -7
- llama_cloud/types/pipeline_data_source_status.py +0 -33
- llama_cloud/types/pipeline_deployment.py +0 -37
- llama_cloud/types/pipeline_embedding_config.py +0 -100
- llama_cloud/types/pipeline_file.py +0 -58
- llama_cloud/types/pipeline_file_config_hash_value.py +0 -5
- llama_cloud/types/pipeline_file_create.py +0 -37
- llama_cloud/types/pipeline_file_create_custom_metadata_value.py +0 -7
- llama_cloud/types/pipeline_file_custom_metadata_value.py +0 -7
- llama_cloud/types/pipeline_file_permission_info_value.py +0 -7
- llama_cloud/types/pipeline_file_resource_info_value.py +0 -7
- llama_cloud/types/pipeline_file_status.py +0 -33
- llama_cloud/types/pipeline_file_update_dispatcher_config.py +0 -38
- llama_cloud/types/pipeline_file_updater_config.py +0 -44
- llama_cloud/types/pipeline_managed_ingestion_job_params.py +0 -37
- llama_cloud/types/pipeline_status.py +0 -17
- llama_cloud/types/pipeline_transform_config.py +0 -31
- llama_cloud/types/plan_limits.py +0 -53
- llama_cloud/types/playground_session.py +0 -51
- llama_cloud/types/pooling.py +0 -29
- llama_cloud/types/preset_composite_retrieval_params.py +0 -37
- llama_cloud/types/preset_retrieval_params_search_filters_inference_schema_value.py +0 -7
- llama_cloud/types/project_create.py +0 -35
- llama_cloud/types/prompt_conf.py +0 -38
- llama_cloud/types/public_model_name.py +0 -97
- llama_cloud/types/quota_configuration.py +0 -53
- llama_cloud/types/quota_configuration_configuration_type.py +0 -33
- llama_cloud/types/quota_configuration_status.py +0 -21
- llama_cloud/types/quota_rate_limit_configuration_value.py +0 -38
- llama_cloud/types/quota_rate_limit_configuration_value_denominator_units.py +0 -29
- llama_cloud/types/re_rank_config.py +0 -35
- llama_cloud/types/re_ranker_type.py +0 -41
- llama_cloud/types/recurring_credit_grant.py +0 -44
- llama_cloud/types/related_node_info.py +0 -36
- llama_cloud/types/related_node_info_node_type.py +0 -7
- llama_cloud/types/retrieve_results.py +0 -56
- llama_cloud/types/retriever_create.py +0 -37
- llama_cloud/types/role.py +0 -40
- llama_cloud/types/schema_generation_availability.py +0 -33
- llama_cloud/types/schema_generation_availability_status.py +0 -17
- llama_cloud/types/schema_relax_mode.py +0 -25
- llama_cloud/types/semantic_chunking_config.py +0 -32
- llama_cloud/types/sentence_chunking_config.py +0 -34
- llama_cloud/types/sparse_model_type.py +0 -33
- llama_cloud/types/struct_mode.py +0 -33
- llama_cloud/types/struct_parse_conf.py +0 -63
- llama_cloud/types/supported_llm_model.py +0 -40
- llama_cloud/types/supported_llm_model_names.py +0 -69
- llama_cloud/types/text_node.py +0 -67
- llama_cloud/types/text_node_relationships_value.py +0 -7
- llama_cloud/types/text_node_with_score.py +0 -39
- llama_cloud/types/token_chunking_config.py +0 -33
- llama_cloud/types/update_user_response.py +0 -33
- llama_cloud/types/usage_and_plan.py +0 -34
- llama_cloud/types/usage_metric_response.py +0 -34
- llama_cloud/types/usage_response.py +0 -43
- llama_cloud/types/usage_response_active_alerts_item.py +0 -37
- llama_cloud/types/user_job_record.py +0 -32
- llama_cloud/types/user_organization.py +0 -47
- llama_cloud/types/user_organization_create.py +0 -38
- llama_cloud/types/user_organization_delete.py +0 -37
- llama_cloud/types/user_organization_role.py +0 -42
- llama_cloud/types/user_summary.py +0 -38
- llama_cloud/types/validation_error.py +0 -34
- llama_cloud/types/validation_error_loc_item.py +0 -5
- llama_cloud/types/vertex_embedding_mode.py +0 -38
- llama_cloud/types/webhook_configuration.py +0 -39
- llama_cloud/types/webhook_configuration_webhook_events_item.py +0 -57
- llama_cloud-0.1.41.dist-info/LICENSE +0 -21
- llama_cloud-0.1.41.dist-info/METADATA +0 -106
- llama_cloud-0.1.41.dist-info/RECORD +0 -385
|
@@ -0,0 +1,1041 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Managed index.
|
|
3
|
+
|
|
4
|
+
A managed Index - where the index is accessible via some API that
|
|
5
|
+
interfaces a managed service.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import io
|
|
12
|
+
import os
|
|
13
|
+
import time
|
|
14
|
+
import asyncio
|
|
15
|
+
import logging
|
|
16
|
+
from typing import Any, Dict, List, Type, Callable, Optional, Sequence, Awaitable
|
|
17
|
+
from urllib.parse import quote_plus
|
|
18
|
+
from typing_extensions import override
|
|
19
|
+
|
|
20
|
+
import httpx
|
|
21
|
+
from llama_index.core.schema import BaseNode, Document, TransformComponent
|
|
22
|
+
from llama_index.core.settings import Settings
|
|
23
|
+
from llama_index.core.constants import DEFAULT_APP_URL, DEFAULT_PROJECT_NAME
|
|
24
|
+
from llama_index.core.llms.utils import LLMType # type: ignore
|
|
25
|
+
from llama_index.core.data_structs import IndexDict
|
|
26
|
+
from llama_index.core.callbacks.base import CallbackManager
|
|
27
|
+
from llama_index.core.base.base_retriever import BaseRetriever
|
|
28
|
+
from llama_index.core.indices.managed.base import BaseManagedIndex
|
|
29
|
+
from llama_index.core.base.base_query_engine import BaseQueryEngine
|
|
30
|
+
from llama_index.core.storage.docstore.types import RefDocInfo
|
|
31
|
+
|
|
32
|
+
from llama_cloud import LlamaCloud, AsyncLlamaCloud
|
|
33
|
+
from llama_cloud.types import LlamaParseParametersParam, ManagedIngestionStatusResponse
|
|
34
|
+
from llama_cloud._exceptions import APIStatusError
|
|
35
|
+
from llama_cloud.types.pipelines import CloudDocument, CloudDocumentCreateParam
|
|
36
|
+
from llama_cloud.types.pipeline_create_params import EmbeddingConfig, TransformConfig
|
|
37
|
+
from llama_cloud.types.pipelines.file_create_params import Body as PipelineFileCreate
|
|
38
|
+
|
|
39
|
+
from .api_utils import (
|
|
40
|
+
default_transform_config,
|
|
41
|
+
resolve_project_and_pipeline,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
logger = logging.getLogger(__name__)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class LlamaCloudIndex(BaseManagedIndex):
|
|
48
|
+
"""
|
|
49
|
+
A managed index that stores documents in LlamaCloud.
|
|
50
|
+
|
|
51
|
+
There are two main ways to use this index:
|
|
52
|
+
|
|
53
|
+
1. Connect to an existing LlamaCloud index:
|
|
54
|
+
```python
|
|
55
|
+
# Connect using index ID (same as pipeline ID)
|
|
56
|
+
index = LlamaCloudIndex(id="<index_id>")
|
|
57
|
+
|
|
58
|
+
# Or connect using index name
|
|
59
|
+
index = LlamaCloudIndex(name="my_index", project_name="my_project", organization_id="my_org_id")
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
2. Create a new index with documents:
|
|
63
|
+
```python
|
|
64
|
+
documents = [Document(...), Document(...)]
|
|
65
|
+
index = LlamaCloudIndex.from_documents(
|
|
66
|
+
documents, name="my_new_index", project_name="my_project", organization_id="my_org_id"
|
|
67
|
+
)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
The index supports standard operations like retrieval and querying
|
|
71
|
+
through the as_query_engine() and as_retriever() methods.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
# index identifier
|
|
77
|
+
name: Optional[str] = None,
|
|
78
|
+
pipeline_id: Optional[str] = None,
|
|
79
|
+
index_id: Optional[str] = None, # alias for pipeline_id
|
|
80
|
+
id: Optional[str] = None, # alias for pipeline_id
|
|
81
|
+
# project identifier
|
|
82
|
+
project_id: Optional[str] = None,
|
|
83
|
+
project_name: str = DEFAULT_PROJECT_NAME,
|
|
84
|
+
organization_id: Optional[str] = None,
|
|
85
|
+
# connection params
|
|
86
|
+
api_key: Optional[str] = None,
|
|
87
|
+
base_url: Optional[str] = None,
|
|
88
|
+
app_url: Optional[str] = None,
|
|
89
|
+
timeout: int = 60,
|
|
90
|
+
httpx_client: Optional[httpx.Client] = None,
|
|
91
|
+
async_httpx_client: Optional[httpx.AsyncClient] = None,
|
|
92
|
+
# misc
|
|
93
|
+
show_progress: bool = False,
|
|
94
|
+
callback_manager: Optional[CallbackManager] = None,
|
|
95
|
+
# deprecated
|
|
96
|
+
nodes: Optional[List[BaseNode]] = None,
|
|
97
|
+
transformations: Optional[List[TransformComponent]] = None,
|
|
98
|
+
**kwargs: Any,
|
|
99
|
+
) -> None:
|
|
100
|
+
"""Initialize the Platform Index."""
|
|
101
|
+
if sum([bool(id), bool(index_id), bool(pipeline_id), bool(name)]) != 1:
|
|
102
|
+
raise ValueError(
|
|
103
|
+
"Exactly one of `name`, `id`, `pipeline_id` or `index_id` must be provided to identify the index."
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
if nodes is not None:
|
|
107
|
+
# TODO: How to handle uploading nodes without running transforms on them?
|
|
108
|
+
raise ValueError("LlamaCloudIndex does not support nodes on initialization")
|
|
109
|
+
|
|
110
|
+
if transformations is not None:
|
|
111
|
+
raise ValueError(
|
|
112
|
+
"Setting transformations is deprecated for LlamaCloudIndex, please use the `transform_config` and `embedding_config` parameters instead."
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# initialize clients
|
|
116
|
+
self._httpx_client = httpx_client
|
|
117
|
+
self._async_httpx_client = async_httpx_client
|
|
118
|
+
self._client = LlamaCloud(
|
|
119
|
+
api_key=api_key,
|
|
120
|
+
base_url=base_url,
|
|
121
|
+
timeout=timeout,
|
|
122
|
+
http_client=httpx_client,
|
|
123
|
+
)
|
|
124
|
+
self._aclient = AsyncLlamaCloud(
|
|
125
|
+
api_key=api_key,
|
|
126
|
+
base_url=base_url,
|
|
127
|
+
timeout=timeout,
|
|
128
|
+
http_client=async_httpx_client,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
self.organization_id = organization_id
|
|
132
|
+
pipeline_id = id or index_id or pipeline_id
|
|
133
|
+
|
|
134
|
+
self.project, self.pipeline = resolve_project_and_pipeline(
|
|
135
|
+
self._client, name, pipeline_id, project_name, project_id, organization_id
|
|
136
|
+
)
|
|
137
|
+
self.name = self.pipeline.name
|
|
138
|
+
self.project_name = self.project.name
|
|
139
|
+
|
|
140
|
+
self._api_key = api_key
|
|
141
|
+
self._base_url = base_url
|
|
142
|
+
self._app_url = app_url
|
|
143
|
+
self._timeout = timeout
|
|
144
|
+
self._show_progress = show_progress
|
|
145
|
+
self._callback_manager = callback_manager or Settings.callback_manager
|
|
146
|
+
|
|
147
|
+
if kwargs:
|
|
148
|
+
logger.warning(f"Ignoring unrecognized kwargs: {kwargs}")
|
|
149
|
+
|
|
150
|
+
def __del__(self) -> None:
|
|
151
|
+
"""Close HTTPX clients if they were created by this instance."""
|
|
152
|
+
if self._httpx_client is None:
|
|
153
|
+
self._client.close()
|
|
154
|
+
|
|
155
|
+
if self._async_httpx_client is None:
|
|
156
|
+
event_loop = asyncio.get_event_loop()
|
|
157
|
+
event_loop.create_task(self._aclient.close())
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def id(self) -> str:
|
|
161
|
+
"""Return the pipeline (aka index) ID."""
|
|
162
|
+
return self.pipeline.id
|
|
163
|
+
|
|
164
|
+
def _wait_for_resources(
|
|
165
|
+
self,
|
|
166
|
+
resource_ids: Sequence[str],
|
|
167
|
+
get_status_fn: Callable[[str], ManagedIngestionStatusResponse],
|
|
168
|
+
resource_name: str,
|
|
169
|
+
verbose: bool,
|
|
170
|
+
raise_on_error: bool,
|
|
171
|
+
sleep_interval: float,
|
|
172
|
+
) -> None:
|
|
173
|
+
"""
|
|
174
|
+
Poll `get_status_fn` until every id in `resource_ids` is finished.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
resource_ids: Iterable of resource ids to watch.
|
|
178
|
+
get_status_fn: Callable that maps a resource id → ManagedIngestionStatus.
|
|
179
|
+
resource_name: Text used in log / error messages: "file", "document", ….
|
|
180
|
+
verbose: Print a progress bar.
|
|
181
|
+
raise_on_error: Whether to raise on ManagedIngestionStatus.ERROR.
|
|
182
|
+
sleep_interval: Seconds between polls (min 0.5 s to avoid rate-limits).
|
|
183
|
+
|
|
184
|
+
"""
|
|
185
|
+
if not resource_ids: # nothing to do
|
|
186
|
+
return
|
|
187
|
+
|
|
188
|
+
if verbose:
|
|
189
|
+
print(
|
|
190
|
+
f"Loading {resource_name}{'s' if len(resource_ids) > 1 else ''}",
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
pending: set[str] = set(resource_ids)
|
|
194
|
+
while pending:
|
|
195
|
+
finished: set[str] = set()
|
|
196
|
+
for rid in pending:
|
|
197
|
+
try:
|
|
198
|
+
status_response = get_status_fn(rid)
|
|
199
|
+
status = status_response.status
|
|
200
|
+
if status in (
|
|
201
|
+
"NOT_STARTED",
|
|
202
|
+
"IN_PROGRESS",
|
|
203
|
+
):
|
|
204
|
+
continue # still working
|
|
205
|
+
|
|
206
|
+
if status == "ERROR":
|
|
207
|
+
if verbose:
|
|
208
|
+
print(f"{resource_name.capitalize()} ingestion failed for {rid}")
|
|
209
|
+
if raise_on_error:
|
|
210
|
+
raise ValueError(f"{resource_name.capitalize()} ingestion failed for {rid}")
|
|
211
|
+
|
|
212
|
+
finished.add(rid)
|
|
213
|
+
if verbose:
|
|
214
|
+
print(f"{resource_name.capitalize()} ingestion finished for {rid}")
|
|
215
|
+
|
|
216
|
+
except httpx.HTTPStatusError as e:
|
|
217
|
+
if e.response.status_code in (429, 500, 502, 503, 504):
|
|
218
|
+
pass
|
|
219
|
+
else:
|
|
220
|
+
raise
|
|
221
|
+
|
|
222
|
+
pending -= finished
|
|
223
|
+
|
|
224
|
+
if pending:
|
|
225
|
+
time.sleep(sleep_interval)
|
|
226
|
+
|
|
227
|
+
if verbose:
|
|
228
|
+
print("Done!")
|
|
229
|
+
|
|
230
|
+
async def _await_for_resources(
|
|
231
|
+
self,
|
|
232
|
+
resource_ids: Sequence[str],
|
|
233
|
+
get_status_fn: Callable[[str], Awaitable[ManagedIngestionStatusResponse]],
|
|
234
|
+
resource_name: str,
|
|
235
|
+
verbose: bool,
|
|
236
|
+
raise_on_error: bool,
|
|
237
|
+
sleep_interval: float,
|
|
238
|
+
) -> None:
|
|
239
|
+
"""
|
|
240
|
+
Poll `get_status_fn` until every id in `resource_ids` is finished.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
resource_ids: Iterable of resource ids to watch.
|
|
244
|
+
get_status_fn: Callable that maps a resource id → ManagedIngestionStatus.
|
|
245
|
+
resource_name: Text used in log / error messages: "file", "document", ….
|
|
246
|
+
verbose: Print a progress bar.
|
|
247
|
+
raise_on_error: Whether to raise on ManagedIngestionStatus.ERROR.
|
|
248
|
+
sleep_interval: Seconds between polls (min 0.5 s to avoid rate-limits).
|
|
249
|
+
|
|
250
|
+
"""
|
|
251
|
+
if not resource_ids: # nothing to do
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
if verbose:
|
|
255
|
+
print(
|
|
256
|
+
f"Loading {resource_name}{'s' if len(resource_ids) > 1 else ''}",
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
pending: set[str] = set(resource_ids)
|
|
260
|
+
while pending:
|
|
261
|
+
finished: set[str] = set()
|
|
262
|
+
for rid in pending:
|
|
263
|
+
try:
|
|
264
|
+
status_response = await get_status_fn(rid)
|
|
265
|
+
status = status_response.status
|
|
266
|
+
if status in (
|
|
267
|
+
"NOT_STARTED",
|
|
268
|
+
"IN_PROGRESS",
|
|
269
|
+
):
|
|
270
|
+
continue # still working
|
|
271
|
+
|
|
272
|
+
if status == "ERROR":
|
|
273
|
+
if verbose:
|
|
274
|
+
print(f"{resource_name.capitalize()} ingestion failed for {rid}")
|
|
275
|
+
if raise_on_error:
|
|
276
|
+
raise ValueError(f"{resource_name.capitalize()} ingestion failed for {rid}")
|
|
277
|
+
|
|
278
|
+
finished.add(rid)
|
|
279
|
+
if verbose:
|
|
280
|
+
print(f"{resource_name.capitalize()} ingestion finished for {rid}")
|
|
281
|
+
|
|
282
|
+
except httpx.HTTPStatusError as e:
|
|
283
|
+
if e.response.status_code in (429, 500, 502, 503, 504):
|
|
284
|
+
pass
|
|
285
|
+
else:
|
|
286
|
+
raise
|
|
287
|
+
|
|
288
|
+
pending -= finished
|
|
289
|
+
|
|
290
|
+
if pending:
|
|
291
|
+
await asyncio.sleep(sleep_interval)
|
|
292
|
+
|
|
293
|
+
if verbose:
|
|
294
|
+
print("Done!")
|
|
295
|
+
|
|
296
|
+
def wait_for_completion(
|
|
297
|
+
self,
|
|
298
|
+
file_ids: Optional[Sequence[str]] = None,
|
|
299
|
+
doc_ids: Optional[Sequence[str]] = None,
|
|
300
|
+
verbose: bool = False,
|
|
301
|
+
raise_on_partial_success: bool = False,
|
|
302
|
+
raise_on_error: bool = False,
|
|
303
|
+
sleep_interval: float = 1.0,
|
|
304
|
+
) -> Optional[ManagedIngestionStatusResponse]:
|
|
305
|
+
"""
|
|
306
|
+
Block until the requested ingestion work is finished.
|
|
307
|
+
|
|
308
|
+
- If `file_ids` is given → wait for those files.
|
|
309
|
+
- If `doc_ids` is given → wait for those documents.
|
|
310
|
+
- If neither is given → wait for the pipeline itself last so that retrieval works.
|
|
311
|
+
- Always waits for the pipeline itself last so that retrieval works.
|
|
312
|
+
|
|
313
|
+
Returns the final PipelineStatus response (or None if only waiting on
|
|
314
|
+
files / documents).
|
|
315
|
+
"""
|
|
316
|
+
# Batch of files (if any)
|
|
317
|
+
if file_ids:
|
|
318
|
+
self._wait_for_resources(
|
|
319
|
+
file_ids,
|
|
320
|
+
lambda fid: self._client.pipelines.files.get_status(file_id=fid, pipeline_id=self.pipeline.id),
|
|
321
|
+
resource_name="file",
|
|
322
|
+
verbose=verbose,
|
|
323
|
+
raise_on_error=raise_on_error,
|
|
324
|
+
sleep_interval=sleep_interval,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Batch of documents (if any)
|
|
328
|
+
if doc_ids:
|
|
329
|
+
self._wait_for_resources(
|
|
330
|
+
doc_ids,
|
|
331
|
+
lambda did: self._client.pipelines.documents.get_status(
|
|
332
|
+
document_id=quote_plus(quote_plus(did)),
|
|
333
|
+
pipeline_id=self.pipeline.id,
|
|
334
|
+
),
|
|
335
|
+
resource_name="document",
|
|
336
|
+
verbose=verbose,
|
|
337
|
+
raise_on_error=raise_on_error,
|
|
338
|
+
sleep_interval=sleep_interval,
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# Finally, wait for the pipeline
|
|
342
|
+
if verbose:
|
|
343
|
+
print(f"Syncing pipeline {self.pipeline.id}")
|
|
344
|
+
|
|
345
|
+
status_response: Optional[ManagedIngestionStatusResponse] = None
|
|
346
|
+
while True:
|
|
347
|
+
try:
|
|
348
|
+
status_response = self._client.pipelines.get_status(pipeline_id=self.pipeline.id)
|
|
349
|
+
status = status_response.status
|
|
350
|
+
except httpx.HTTPStatusError as e:
|
|
351
|
+
if e.response.status_code in (429, 500, 502, 503, 504):
|
|
352
|
+
time.sleep(sleep_interval)
|
|
353
|
+
continue
|
|
354
|
+
else:
|
|
355
|
+
raise
|
|
356
|
+
|
|
357
|
+
if status == "ERROR" or (raise_on_partial_success and status == "PARTIAL_SUCCESS"):
|
|
358
|
+
raise ValueError(
|
|
359
|
+
f"Pipeline ingestion failed for {self.pipeline.id}. Details: {status_response.model_dump_json()}"
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
if status in (
|
|
363
|
+
"NOT_STARTED",
|
|
364
|
+
"IN_PROGRESS",
|
|
365
|
+
):
|
|
366
|
+
if verbose:
|
|
367
|
+
print(".", end="")
|
|
368
|
+
time.sleep(sleep_interval)
|
|
369
|
+
else:
|
|
370
|
+
if verbose:
|
|
371
|
+
print("Done!")
|
|
372
|
+
|
|
373
|
+
return status_response
|
|
374
|
+
|
|
375
|
+
async def await_for_completion(
|
|
376
|
+
self,
|
|
377
|
+
file_ids: Optional[Sequence[str]] = None,
|
|
378
|
+
doc_ids: Optional[Sequence[str]] = None,
|
|
379
|
+
verbose: bool = False,
|
|
380
|
+
raise_on_partial_success: bool = False,
|
|
381
|
+
raise_on_error: bool = False,
|
|
382
|
+
sleep_interval: float = 1.0,
|
|
383
|
+
) -> Optional[ManagedIngestionStatusResponse]:
|
|
384
|
+
"""
|
|
385
|
+
Block until the requested ingestion work is finished.
|
|
386
|
+
|
|
387
|
+
- If `file_ids` is given → wait for those files.
|
|
388
|
+
- If `doc_ids` is given → wait for those documents.
|
|
389
|
+
- If neither is given → wait for the pipeline itself last so that retrieval works.
|
|
390
|
+
- Always waits for the pipeline itself last so that retrieval works.
|
|
391
|
+
|
|
392
|
+
Returns the final PipelineStatus response (or None if only waiting on
|
|
393
|
+
files / documents).
|
|
394
|
+
"""
|
|
395
|
+
# Batch of files (if any)
|
|
396
|
+
if file_ids:
|
|
397
|
+
await self._await_for_resources(
|
|
398
|
+
file_ids,
|
|
399
|
+
lambda fid: self._aclient.pipelines.files.get_status(file_id=fid, pipeline_id=self.pipeline.id),
|
|
400
|
+
resource_name="file",
|
|
401
|
+
verbose=verbose,
|
|
402
|
+
raise_on_error=raise_on_error,
|
|
403
|
+
sleep_interval=sleep_interval,
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# Batch of documents (if any)
|
|
407
|
+
if doc_ids:
|
|
408
|
+
await self._await_for_resources(
|
|
409
|
+
doc_ids,
|
|
410
|
+
lambda did: self._aclient.pipelines.documents.get_status(
|
|
411
|
+
document_id=quote_plus(quote_plus(did)),
|
|
412
|
+
pipeline_id=self.pipeline.id,
|
|
413
|
+
),
|
|
414
|
+
resource_name="document",
|
|
415
|
+
verbose=verbose,
|
|
416
|
+
raise_on_error=raise_on_error,
|
|
417
|
+
sleep_interval=sleep_interval,
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
# Finally, wait for the pipeline
|
|
421
|
+
if verbose:
|
|
422
|
+
print(f"Syncing pipeline {self.pipeline.id}")
|
|
423
|
+
|
|
424
|
+
await self._aclient.pipelines.sync.create(pipeline_id=self.pipeline.id)
|
|
425
|
+
|
|
426
|
+
status_response: Optional[ManagedIngestionStatusResponse] = None
|
|
427
|
+
while True:
|
|
428
|
+
try:
|
|
429
|
+
status_response = await self._aclient.pipelines.get_status(pipeline_id=self.pipeline.id)
|
|
430
|
+
status = status_response.status
|
|
431
|
+
except httpx.HTTPStatusError as e:
|
|
432
|
+
if e.response.status_code in (429, 500, 502, 503, 504):
|
|
433
|
+
await asyncio.sleep(sleep_interval)
|
|
434
|
+
continue
|
|
435
|
+
else:
|
|
436
|
+
raise
|
|
437
|
+
|
|
438
|
+
if status == "ERROR" or (raise_on_partial_success and status == "PARTIAL_SUCCESS"):
|
|
439
|
+
raise ValueError(
|
|
440
|
+
f"Pipeline ingestion failed for {self.pipeline.id}. Details: {status_response.model_dump_json()}"
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
if status in (
|
|
444
|
+
"NOT_STARTED",
|
|
445
|
+
"IN_PROGRESS",
|
|
446
|
+
):
|
|
447
|
+
if verbose:
|
|
448
|
+
print(".", end="")
|
|
449
|
+
await asyncio.sleep(sleep_interval)
|
|
450
|
+
else:
|
|
451
|
+
if verbose:
|
|
452
|
+
print("Done!")
|
|
453
|
+
|
|
454
|
+
return status_response
|
|
455
|
+
|
|
456
|
+
@classmethod
|
|
457
|
+
def create_index(
|
|
458
|
+
cls: Type["LlamaCloudIndex"],
|
|
459
|
+
name: str,
|
|
460
|
+
project_name: Optional[str] = None,
|
|
461
|
+
project_id: Optional[str] = None,
|
|
462
|
+
api_key: Optional[str] = None,
|
|
463
|
+
base_url: Optional[str] = None,
|
|
464
|
+
app_url: Optional[str] = None,
|
|
465
|
+
timeout: int = 60,
|
|
466
|
+
verbose: bool = False,
|
|
467
|
+
# ingestion configs
|
|
468
|
+
embedding_config: Optional[EmbeddingConfig] = None,
|
|
469
|
+
transform_config: Optional[TransformConfig] = None,
|
|
470
|
+
llama_parse_parameters: Optional[LlamaParseParametersParam] = None,
|
|
471
|
+
**kwargs: Any,
|
|
472
|
+
) -> "LlamaCloudIndex":
|
|
473
|
+
"""Create a new LlamaCloud managed index."""
|
|
474
|
+
app_url = app_url or os.environ.get("LLAMA_CLOUD_APP_URL", DEFAULT_APP_URL)
|
|
475
|
+
client = LlamaCloud(
|
|
476
|
+
api_key=api_key,
|
|
477
|
+
base_url=base_url,
|
|
478
|
+
timeout=timeout,
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
if project_id is None and project_name is not None:
|
|
482
|
+
projects = client.projects.list(project_name=project_name)
|
|
483
|
+
for project in projects:
|
|
484
|
+
if project.name == project_name:
|
|
485
|
+
project_id = project.id
|
|
486
|
+
break
|
|
487
|
+
|
|
488
|
+
if project_id is None:
|
|
489
|
+
# create project if it doesn't exist
|
|
490
|
+
# Note: projects.upsert() is not available in new API, would need to be handled differently
|
|
491
|
+
# For now, assume project exists or needs manual creation
|
|
492
|
+
raise ValueError("project_id is required. Please provide a project_id or create the project manually.")
|
|
493
|
+
|
|
494
|
+
# create pipeline
|
|
495
|
+
pipeline = client.pipelines.upsert(
|
|
496
|
+
project_id=project_id,
|
|
497
|
+
name=name,
|
|
498
|
+
pipeline_type="MANAGED",
|
|
499
|
+
embedding_config=embedding_config, # If it's None, the default embedding config will be used
|
|
500
|
+
transform_config=transform_config or default_transform_config(),
|
|
501
|
+
llama_parse_parameters=llama_parse_parameters or LlamaParseParametersParam(),
|
|
502
|
+
)
|
|
503
|
+
if verbose:
|
|
504
|
+
print(f"Created pipeline {pipeline.id} with name {pipeline.name}")
|
|
505
|
+
|
|
506
|
+
return cls(
|
|
507
|
+
name,
|
|
508
|
+
project_id=project_id,
|
|
509
|
+
api_key=api_key,
|
|
510
|
+
base_url=base_url,
|
|
511
|
+
app_url=app_url,
|
|
512
|
+
timeout=timeout,
|
|
513
|
+
**kwargs,
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
@classmethod
|
|
517
|
+
async def acreate_index(
|
|
518
|
+
cls: Type["LlamaCloudIndex"],
|
|
519
|
+
name: str,
|
|
520
|
+
project_name: Optional[str] = None,
|
|
521
|
+
project_id: Optional[str] = None,
|
|
522
|
+
api_key: Optional[str] = None,
|
|
523
|
+
base_url: Optional[str] = None,
|
|
524
|
+
app_url: Optional[str] = None,
|
|
525
|
+
timeout: int = 60,
|
|
526
|
+
verbose: bool = False,
|
|
527
|
+
# ingestion configs
|
|
528
|
+
embedding_config: Optional[EmbeddingConfig] = None,
|
|
529
|
+
transform_config: Optional[TransformConfig] = None,
|
|
530
|
+
llama_parse_parameters: Optional[LlamaParseParametersParam] = None,
|
|
531
|
+
**kwargs: Any,
|
|
532
|
+
) -> "LlamaCloudIndex":
|
|
533
|
+
"""Create a new LlamaCloud managed index."""
|
|
534
|
+
app_url = app_url or os.environ.get("LLAMA_CLOUD_APP_URL", DEFAULT_APP_URL)
|
|
535
|
+
client = AsyncLlamaCloud(
|
|
536
|
+
api_key=api_key,
|
|
537
|
+
base_url=base_url,
|
|
538
|
+
timeout=timeout,
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
if project_id is None and project_name is not None:
|
|
542
|
+
projects = await client.projects.list(project_name=project_name)
|
|
543
|
+
for project in projects:
|
|
544
|
+
if project.name == project_name:
|
|
545
|
+
project_id = project.id
|
|
546
|
+
break
|
|
547
|
+
|
|
548
|
+
if project_id is None:
|
|
549
|
+
# create project if it doesn't exist
|
|
550
|
+
# Note: projects.upsert() is not available in new API, would need to be handled differently
|
|
551
|
+
# For now, assume project exists or needs manual creation
|
|
552
|
+
raise ValueError("project_id is required. Please provide a project_id or create the project manually.")
|
|
553
|
+
|
|
554
|
+
# create pipeline
|
|
555
|
+
pipeline = await client.pipelines.upsert(
|
|
556
|
+
project_id=project_id,
|
|
557
|
+
name=name,
|
|
558
|
+
pipeline_type="MANAGED",
|
|
559
|
+
embedding_config=embedding_config, # If it's None, the default embedding config will be used
|
|
560
|
+
transform_config=transform_config or default_transform_config(),
|
|
561
|
+
llama_parse_parameters=llama_parse_parameters or LlamaParseParametersParam(),
|
|
562
|
+
)
|
|
563
|
+
if verbose:
|
|
564
|
+
print(f"Created pipeline {pipeline.id} with name {pipeline.name}")
|
|
565
|
+
|
|
566
|
+
return cls(
|
|
567
|
+
name,
|
|
568
|
+
project_id=project_id,
|
|
569
|
+
api_key=api_key,
|
|
570
|
+
base_url=base_url,
|
|
571
|
+
app_url=app_url,
|
|
572
|
+
timeout=timeout,
|
|
573
|
+
**kwargs,
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
@classmethod
|
|
577
|
+
def from_documents( # type: ignore
|
|
578
|
+
cls: Type["LlamaCloudIndex"],
|
|
579
|
+
documents: List[Document],
|
|
580
|
+
name: str,
|
|
581
|
+
project_name: str = DEFAULT_PROJECT_NAME,
|
|
582
|
+
organization_id: Optional[str] = None,
|
|
583
|
+
project_id: Optional[str] = None,
|
|
584
|
+
api_key: Optional[str] = None,
|
|
585
|
+
base_url: Optional[str] = None,
|
|
586
|
+
app_url: Optional[str] = None,
|
|
587
|
+
timeout: int = 60,
|
|
588
|
+
verbose: bool = False,
|
|
589
|
+
raise_on_error: bool = False,
|
|
590
|
+
# ingestion configs
|
|
591
|
+
embedding_config: Optional[EmbeddingConfig] = None,
|
|
592
|
+
transform_config: Optional[TransformConfig] = None,
|
|
593
|
+
) -> "LlamaCloudIndex":
|
|
594
|
+
"""Build a LlamaCloud managed index from a sequence of documents."""
|
|
595
|
+
index = cls.create_index(
|
|
596
|
+
name=name,
|
|
597
|
+
project_name=project_name,
|
|
598
|
+
organization_id=organization_id,
|
|
599
|
+
api_key=api_key,
|
|
600
|
+
base_url=base_url,
|
|
601
|
+
app_url=app_url,
|
|
602
|
+
timeout=timeout,
|
|
603
|
+
verbose=verbose,
|
|
604
|
+
embedding_config=embedding_config,
|
|
605
|
+
transform_config=transform_config,
|
|
606
|
+
project_id=project_id,
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
app_url = app_url or os.environ.get("LLAMA_CLOUD_APP_URL", DEFAULT_APP_URL)
|
|
610
|
+
client = LlamaCloud(
|
|
611
|
+
api_key=api_key,
|
|
612
|
+
base_url=base_url,
|
|
613
|
+
timeout=timeout,
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
# this kicks off document ingestion
|
|
617
|
+
upserted_documents_response = client.pipelines.documents.create(
|
|
618
|
+
pipeline_id=index.pipeline.id,
|
|
619
|
+
body=[
|
|
620
|
+
CloudDocumentCreateParam(
|
|
621
|
+
text=doc.text,
|
|
622
|
+
metadata=doc.metadata,
|
|
623
|
+
excluded_embed_metadata_keys=doc.excluded_embed_metadata_keys,
|
|
624
|
+
excluded_llm_metadata_keys=doc.excluded_llm_metadata_keys,
|
|
625
|
+
id=doc.id_,
|
|
626
|
+
)
|
|
627
|
+
for doc in documents
|
|
628
|
+
],
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
# Trigger a sync
|
|
632
|
+
client.pipelines.sync.create(pipeline_id=index.pipeline.id)
|
|
633
|
+
|
|
634
|
+
doc_ids = [doc.id for doc in upserted_documents_response]
|
|
635
|
+
index.wait_for_completion(doc_ids=doc_ids, verbose=verbose, raise_on_error=raise_on_error)
|
|
636
|
+
|
|
637
|
+
print(f"Find your index at {app_url}/project/{index.project.id}/deploy/{index.pipeline.id}")
|
|
638
|
+
|
|
639
|
+
return index
|
|
640
|
+
|
|
641
|
+
@override
|
|
642
|
+
def as_retriever(self, **kwargs: Any) -> BaseRetriever:
|
|
643
|
+
"""Return a Retriever for this managed index."""
|
|
644
|
+
from .retriever import (
|
|
645
|
+
LlamaCloudRetriever,
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
similarity_top_k = kwargs.pop("similarity_top_k", None)
|
|
649
|
+
dense_similarity_top_k = kwargs.pop("dense_similarity_top_k", None)
|
|
650
|
+
if similarity_top_k is not None:
|
|
651
|
+
dense_similarity_top_k = similarity_top_k
|
|
652
|
+
|
|
653
|
+
return LlamaCloudRetriever(
|
|
654
|
+
project_id=self.project.id,
|
|
655
|
+
pipeline_id=self.pipeline.id,
|
|
656
|
+
api_key=self._api_key,
|
|
657
|
+
base_url=self._base_url,
|
|
658
|
+
app_url=self._app_url,
|
|
659
|
+
timeout=self._timeout,
|
|
660
|
+
organization_id=self.organization_id,
|
|
661
|
+
dense_similarity_top_k=dense_similarity_top_k,
|
|
662
|
+
httpx_client=self._httpx_client,
|
|
663
|
+
async_httpx_client=self._async_httpx_client,
|
|
664
|
+
**kwargs,
|
|
665
|
+
)
|
|
666
|
+
|
|
667
|
+
@override
|
|
668
|
+
def as_query_engine(self, llm: Optional[LLMType] = None, **kwargs: Any) -> BaseQueryEngine: # type: ignore
|
|
669
|
+
from llama_index.core.query_engine.retriever_query_engine import (
|
|
670
|
+
RetrieverQueryEngine,
|
|
671
|
+
)
|
|
672
|
+
|
|
673
|
+
kwargs["retriever"] = self.as_retriever(**kwargs)
|
|
674
|
+
return RetrieverQueryEngine.from_args(llm=llm, **kwargs) # type: ignore
|
|
675
|
+
|
|
676
|
+
@property
|
|
677
|
+
@override
|
|
678
|
+
def ref_doc_info(self, batch_size: int = 100) -> Dict[str, RefDocInfo]:
|
|
679
|
+
"""Retrieve a dict mapping of ingested documents and their metadata. The nodes list is empty."""
|
|
680
|
+
pipeline_id = self.pipeline.id
|
|
681
|
+
pipeline_documents: List[CloudDocument] = []
|
|
682
|
+
skip = 0
|
|
683
|
+
limit = batch_size
|
|
684
|
+
|
|
685
|
+
for doc in self._client.pipelines.documents.list(
|
|
686
|
+
pipeline_id=pipeline_id,
|
|
687
|
+
skip=skip,
|
|
688
|
+
limit=limit,
|
|
689
|
+
):
|
|
690
|
+
pipeline_documents.append(doc)
|
|
691
|
+
|
|
692
|
+
return {doc.id: RefDocInfo(metadata=doc.metadata, node_ids=[]) for doc in pipeline_documents}
|
|
693
|
+
|
|
694
|
+
@override
|
|
695
|
+
def insert(self, document: Document, verbose: bool = False, **insert_kwargs: Any) -> None:
|
|
696
|
+
"""Insert a document."""
|
|
697
|
+
with self._callback_manager.as_trace("insert"):
|
|
698
|
+
upserted_documents_response = self._client.pipelines.documents.create(
|
|
699
|
+
pipeline_id=self.pipeline.id,
|
|
700
|
+
body=[
|
|
701
|
+
CloudDocumentCreateParam(
|
|
702
|
+
text=document.text,
|
|
703
|
+
metadata=document.metadata,
|
|
704
|
+
excluded_embed_metadata_keys=document.excluded_embed_metadata_keys,
|
|
705
|
+
excluded_llm_metadata_keys=document.excluded_llm_metadata_keys,
|
|
706
|
+
id=document.id_,
|
|
707
|
+
)
|
|
708
|
+
],
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
# Trigger a sync
|
|
712
|
+
self._client.pipelines.sync.create(pipeline_id=self.pipeline.id)
|
|
713
|
+
|
|
714
|
+
upserted_document = upserted_documents_response[0]
|
|
715
|
+
self.wait_for_completion(doc_ids=[upserted_document.id], verbose=verbose, raise_on_error=True)
|
|
716
|
+
|
|
717
|
+
@override
|
|
718
|
+
async def ainsert(self, document: Document, verbose: bool = False, **insert_kwargs: Any) -> None:
|
|
719
|
+
"""Insert a document."""
|
|
720
|
+
with self._callback_manager.as_trace("insert"):
|
|
721
|
+
upserted_documents_response = await self._aclient.pipelines.documents.create(
|
|
722
|
+
pipeline_id=self.pipeline.id,
|
|
723
|
+
body=[
|
|
724
|
+
CloudDocumentCreateParam(
|
|
725
|
+
text=document.text,
|
|
726
|
+
metadata=document.metadata,
|
|
727
|
+
excluded_embed_metadata_keys=document.excluded_embed_metadata_keys,
|
|
728
|
+
excluded_llm_metadata_keys=document.excluded_llm_metadata_keys,
|
|
729
|
+
id=document.id_,
|
|
730
|
+
)
|
|
731
|
+
],
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
# Trigger a sync
|
|
735
|
+
await self._aclient.pipelines.sync.create(pipeline_id=self.pipeline.id)
|
|
736
|
+
|
|
737
|
+
upserted_document = upserted_documents_response[0]
|
|
738
|
+
await self.await_for_completion(doc_ids=[upserted_document.id], verbose=verbose, raise_on_error=True)
|
|
739
|
+
|
|
740
|
+
@override
|
|
741
|
+
def update_ref_doc(self, document: Document, verbose: bool = False, **update_kwargs: Any) -> None:
|
|
742
|
+
"""Upserts a document and its corresponding nodes."""
|
|
743
|
+
with self._callback_manager.as_trace("update"):
|
|
744
|
+
# Note: New API doesn't have explicit upsert - using create which may handle upsert internally
|
|
745
|
+
upserted_documents_response = self._client.pipelines.documents.upsert(
|
|
746
|
+
pipeline_id=self.pipeline.id,
|
|
747
|
+
body=[
|
|
748
|
+
CloudDocumentCreateParam(
|
|
749
|
+
text=document.text,
|
|
750
|
+
metadata=document.metadata,
|
|
751
|
+
excluded_embed_metadata_keys=document.excluded_embed_metadata_keys,
|
|
752
|
+
excluded_llm_metadata_keys=document.excluded_llm_metadata_keys,
|
|
753
|
+
id=document.id_,
|
|
754
|
+
)
|
|
755
|
+
],
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
# Trigger a sync
|
|
759
|
+
self._client.pipelines.sync.create(pipeline_id=self.pipeline.id)
|
|
760
|
+
|
|
761
|
+
upserted_document = upserted_documents_response[0]
|
|
762
|
+
self.wait_for_completion(doc_ids=[upserted_document.id], verbose=verbose, raise_on_error=True)
|
|
763
|
+
|
|
764
|
+
@override
|
|
765
|
+
async def aupdate_ref_doc(self, document: Document, verbose: bool = False, **update_kwargs: Any) -> None:
|
|
766
|
+
"""Upserts a document and its corresponding nodes."""
|
|
767
|
+
with self._callback_manager.as_trace("update"):
|
|
768
|
+
# Note: New API doesn't have explicit upsert - using create which may handle upsert internally
|
|
769
|
+
upserted_documents_response = await self._aclient.pipelines.documents.upsert(
|
|
770
|
+
pipeline_id=self.pipeline.id,
|
|
771
|
+
body=[
|
|
772
|
+
CloudDocumentCreateParam(
|
|
773
|
+
text=document.text,
|
|
774
|
+
metadata=document.metadata,
|
|
775
|
+
excluded_embed_metadata_keys=document.excluded_embed_metadata_keys,
|
|
776
|
+
excluded_llm_metadata_keys=document.excluded_llm_metadata_keys,
|
|
777
|
+
id=document.id_,
|
|
778
|
+
)
|
|
779
|
+
],
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
# Trigger a sync
|
|
783
|
+
await self._aclient.pipelines.sync.create(pipeline_id=self.pipeline.id)
|
|
784
|
+
|
|
785
|
+
upserted_document = upserted_documents_response[0]
|
|
786
|
+
await self.await_for_completion(doc_ids=[upserted_document.id], verbose=verbose, raise_on_error=True)
|
|
787
|
+
|
|
788
|
+
@override
|
|
789
|
+
def refresh_ref_docs(self, documents: Sequence[Document], **update_kwargs: Any) -> List[bool]:
|
|
790
|
+
"""Refresh an index with documents that have changed."""
|
|
791
|
+
with self._callback_manager.as_trace("refresh"):
|
|
792
|
+
# Note: New API doesn't have explicit upsert - using create which may handle upsert internally
|
|
793
|
+
upserted_documents_response = self._client.pipelines.documents.upsert(
|
|
794
|
+
pipeline_id=self.pipeline.id,
|
|
795
|
+
body=[
|
|
796
|
+
CloudDocumentCreateParam(
|
|
797
|
+
text=doc.text,
|
|
798
|
+
metadata=doc.metadata,
|
|
799
|
+
excluded_embed_metadata_keys=doc.excluded_embed_metadata_keys,
|
|
800
|
+
excluded_llm_metadata_keys=doc.excluded_llm_metadata_keys,
|
|
801
|
+
id=doc.id_,
|
|
802
|
+
)
|
|
803
|
+
for doc in documents
|
|
804
|
+
],
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
# Trigger a sync
|
|
808
|
+
self._client.pipelines.sync.create(pipeline_id=self.pipeline.id)
|
|
809
|
+
|
|
810
|
+
doc_ids = [doc.id for doc in upserted_documents_response]
|
|
811
|
+
self.wait_for_completion(doc_ids=doc_ids, verbose=True, raise_on_error=True)
|
|
812
|
+
return [True] * len(doc_ids)
|
|
813
|
+
|
|
814
|
+
@override
|
|
815
|
+
async def arefresh_ref_docs(self, documents: Sequence[Document], **update_kwargs: Any) -> List[bool]:
|
|
816
|
+
"""Refresh an index with documents that have changed."""
|
|
817
|
+
with self._callback_manager.as_trace("refresh"):
|
|
818
|
+
# Note: New API doesn't have explicit upsert - using create which may handle upsert internally
|
|
819
|
+
upserted_documents_response = await self._aclient.pipelines.documents.upsert(
|
|
820
|
+
pipeline_id=self.pipeline.id,
|
|
821
|
+
body=[
|
|
822
|
+
CloudDocumentCreateParam(
|
|
823
|
+
text=doc.text,
|
|
824
|
+
metadata=doc.metadata,
|
|
825
|
+
excluded_embed_metadata_keys=doc.excluded_embed_metadata_keys,
|
|
826
|
+
excluded_llm_metadata_keys=doc.excluded_llm_metadata_keys,
|
|
827
|
+
id=doc.id_,
|
|
828
|
+
)
|
|
829
|
+
for doc in documents
|
|
830
|
+
],
|
|
831
|
+
)
|
|
832
|
+
|
|
833
|
+
# Trigger a sync
|
|
834
|
+
await self._aclient.pipelines.sync.create(pipeline_id=self.pipeline.id)
|
|
835
|
+
|
|
836
|
+
doc_ids = [doc.id for doc in upserted_documents_response]
|
|
837
|
+
await self.await_for_completion(doc_ids=doc_ids, verbose=True, raise_on_error=True)
|
|
838
|
+
return [True] * len(doc_ids)
|
|
839
|
+
|
|
840
|
+
@override
|
|
841
|
+
def delete_ref_doc(
|
|
842
|
+
self,
|
|
843
|
+
ref_doc_id: str,
|
|
844
|
+
delete_from_docstore: bool = False,
|
|
845
|
+
verbose: bool = False,
|
|
846
|
+
raise_if_not_found: bool = False,
|
|
847
|
+
**delete_kwargs: Any,
|
|
848
|
+
) -> None:
|
|
849
|
+
"""Delete a document and its nodes by using ref_doc_id."""
|
|
850
|
+
try:
|
|
851
|
+
# we have to quote the ref_doc_id twice because it is used as a path parameter
|
|
852
|
+
self._client.pipelines.documents.delete(
|
|
853
|
+
document_id=quote_plus(quote_plus(ref_doc_id)),
|
|
854
|
+
pipeline_id=self.pipeline.id,
|
|
855
|
+
)
|
|
856
|
+
except APIStatusError as e:
|
|
857
|
+
if e.status_code == 404 and not raise_if_not_found:
|
|
858
|
+
logger.warning(f"ref_doc_id {ref_doc_id} not found, nothing deleted.")
|
|
859
|
+
else:
|
|
860
|
+
raise
|
|
861
|
+
|
|
862
|
+
# we have to wait for the pipeline instead of the document, because the document is already deleted
|
|
863
|
+
self.wait_for_completion(verbose=verbose, raise_on_partial_success=False)
|
|
864
|
+
|
|
865
|
+
@override
|
|
866
|
+
async def adelete_ref_doc(
|
|
867
|
+
self,
|
|
868
|
+
ref_doc_id: str,
|
|
869
|
+
delete_from_docstore: bool = False,
|
|
870
|
+
verbose: bool = False,
|
|
871
|
+
raise_if_not_found: bool = False,
|
|
872
|
+
**delete_kwargs: Any,
|
|
873
|
+
) -> None:
|
|
874
|
+
"""Delete a document and its nodes by using ref_doc_id."""
|
|
875
|
+
try:
|
|
876
|
+
# we have to quote the ref_doc_id twice because it is used as a path parameter
|
|
877
|
+
await self._aclient.pipelines.documents.delete(
|
|
878
|
+
document_id=quote_plus(quote_plus(ref_doc_id)),
|
|
879
|
+
pipeline_id=self.pipeline.id,
|
|
880
|
+
)
|
|
881
|
+
except APIStatusError as e:
|
|
882
|
+
if e.status_code == 404 and not raise_if_not_found:
|
|
883
|
+
logger.warning(f"ref_doc_id {ref_doc_id} not found, nothing deleted.")
|
|
884
|
+
else:
|
|
885
|
+
raise
|
|
886
|
+
|
|
887
|
+
# we have to wait for the pipeline instead of the document, because the document is already deleted
|
|
888
|
+
await self.await_for_completion(verbose=verbose, raise_on_partial_success=False)
|
|
889
|
+
|
|
890
|
+
def upload_file(
|
|
891
|
+
self,
|
|
892
|
+
file_path: str,
|
|
893
|
+
custom_metadata: Optional[dict[str, Any]] = None,
|
|
894
|
+
verbose: bool = False,
|
|
895
|
+
wait_for_ingestion: bool = True,
|
|
896
|
+
raise_on_error: bool = False,
|
|
897
|
+
) -> str:
|
|
898
|
+
"""Upload a file to the index."""
|
|
899
|
+
with open(file_path, "rb") as f:
|
|
900
|
+
file = self._client.files.create(project_id=self.project.id, file=f, purpose="user_data")
|
|
901
|
+
if verbose:
|
|
902
|
+
print(f"Uploaded file {file.id} with name {file.name}")
|
|
903
|
+
|
|
904
|
+
# Add file to pipeline
|
|
905
|
+
_ = self._client.pipelines.files.create(
|
|
906
|
+
pipeline_id=self.pipeline.id, body=[PipelineFileCreate(file_id=file.id, custom_metadata=custom_metadata)]
|
|
907
|
+
)
|
|
908
|
+
|
|
909
|
+
if wait_for_ingestion:
|
|
910
|
+
self.wait_for_completion(file_ids=[file.id], verbose=verbose, raise_on_error=raise_on_error)
|
|
911
|
+
return file.id
|
|
912
|
+
|
|
913
|
+
async def aupload_file(
|
|
914
|
+
self,
|
|
915
|
+
file_path: str,
|
|
916
|
+
custom_metadata: Optional[dict[str, Any]] = None,
|
|
917
|
+
verbose: bool = False,
|
|
918
|
+
wait_for_ingestion: bool = True,
|
|
919
|
+
raise_on_error: bool = False,
|
|
920
|
+
) -> str:
|
|
921
|
+
"""Upload a file to the index."""
|
|
922
|
+
with open(file_path, "rb") as f:
|
|
923
|
+
file = await self._aclient.files.create(project_id=self.project.id, file=f, purpose="user_data")
|
|
924
|
+
if verbose:
|
|
925
|
+
print(f"Uploaded file {file.id} with name {file.name}")
|
|
926
|
+
|
|
927
|
+
# Add file to pipeline
|
|
928
|
+
_ = await self._aclient.pipelines.files.create(
|
|
929
|
+
pipeline_id=self.pipeline.id, body=[PipelineFileCreate(file_id=file.id, custom_metadata=custom_metadata)]
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
if wait_for_ingestion:
|
|
933
|
+
await self.await_for_completion(file_ids=[file.id], verbose=verbose, raise_on_error=raise_on_error)
|
|
934
|
+
|
|
935
|
+
return file.id
|
|
936
|
+
|
|
937
|
+
def upload_file_from_url(
|
|
938
|
+
self,
|
|
939
|
+
file_name: str,
|
|
940
|
+
url: str,
|
|
941
|
+
custom_metadata: Optional[dict[str, Any]] = None,
|
|
942
|
+
proxy_url: Optional[str] = None,
|
|
943
|
+
request_headers: Optional[Dict[str, str]] = None,
|
|
944
|
+
verify_ssl: bool = True,
|
|
945
|
+
follow_redirects: bool = True,
|
|
946
|
+
verbose: bool = False,
|
|
947
|
+
wait_for_ingestion: bool = True,
|
|
948
|
+
raise_on_error: bool = False,
|
|
949
|
+
) -> str:
|
|
950
|
+
"""Upload a file from a URL to the index."""
|
|
951
|
+
with httpx.Client(verify=verify_ssl, proxy=proxy_url, headers=request_headers) as client:
|
|
952
|
+
response = client.get(
|
|
953
|
+
url, headers=request_headers, timeout=self._timeout, follow_redirects=follow_redirects
|
|
954
|
+
)
|
|
955
|
+
response.raise_for_status()
|
|
956
|
+
file_content = response.content
|
|
957
|
+
|
|
958
|
+
file = self._client.files.create(
|
|
959
|
+
file=io.BytesIO(file_content),
|
|
960
|
+
project_id=self.project.id,
|
|
961
|
+
external_file_id=file_name,
|
|
962
|
+
purpose="user_data",
|
|
963
|
+
)
|
|
964
|
+
if verbose:
|
|
965
|
+
print(f"Uploaded file {file.id} with ID {file.id}")
|
|
966
|
+
|
|
967
|
+
# Add file to pipeline
|
|
968
|
+
_ = self._client.pipelines.files.create(
|
|
969
|
+
pipeline_id=self.pipeline.id, body=[PipelineFileCreate(file_id=file.id, custom_metadata=custom_metadata)]
|
|
970
|
+
)
|
|
971
|
+
|
|
972
|
+
if wait_for_ingestion:
|
|
973
|
+
self.wait_for_completion(file_ids=[file.id], verbose=verbose, raise_on_error=raise_on_error)
|
|
974
|
+
return file.id
|
|
975
|
+
|
|
976
|
+
async def aupload_file_from_url(
|
|
977
|
+
self,
|
|
978
|
+
file_name: str,
|
|
979
|
+
url: str,
|
|
980
|
+
custom_metadata: Optional[dict[str, Any]] = None,
|
|
981
|
+
proxy_url: Optional[str] = None,
|
|
982
|
+
request_headers: Optional[Dict[str, str]] = None,
|
|
983
|
+
verify_ssl: bool = True,
|
|
984
|
+
follow_redirects: bool = True,
|
|
985
|
+
verbose: bool = False,
|
|
986
|
+
wait_for_ingestion: bool = True,
|
|
987
|
+
raise_on_error: bool = False,
|
|
988
|
+
) -> str:
|
|
989
|
+
"""Upload a file from a URL to the index."""
|
|
990
|
+
async with httpx.AsyncClient(verify=verify_ssl, proxy=proxy_url, headers=request_headers) as client:
|
|
991
|
+
response = await client.get(
|
|
992
|
+
url, headers=request_headers, timeout=self._timeout, follow_redirects=follow_redirects
|
|
993
|
+
)
|
|
994
|
+
response.raise_for_status()
|
|
995
|
+
file_content = response.content
|
|
996
|
+
|
|
997
|
+
file = await self._aclient.files.create(
|
|
998
|
+
file=io.BytesIO(file_content),
|
|
999
|
+
project_id=self.project.id,
|
|
1000
|
+
external_file_id=file_name,
|
|
1001
|
+
purpose="user_data",
|
|
1002
|
+
)
|
|
1003
|
+
if verbose:
|
|
1004
|
+
print(f"Uploaded file {file.id} with ID {file.id}")
|
|
1005
|
+
|
|
1006
|
+
# Add file to pipeline
|
|
1007
|
+
_ = await self._aclient.pipelines.files.create(
|
|
1008
|
+
pipeline_id=self.pipeline.id, body=[PipelineFileCreate(file_id=file.id, custom_metadata=custom_metadata)]
|
|
1009
|
+
)
|
|
1010
|
+
|
|
1011
|
+
if wait_for_ingestion:
|
|
1012
|
+
await self.await_for_completion(file_ids=[file.id], verbose=verbose, raise_on_error=raise_on_error)
|
|
1013
|
+
|
|
1014
|
+
return file.id
|
|
1015
|
+
|
|
1016
|
+
# Nodes related methods (not implemented for LlamaCloudIndex)
|
|
1017
|
+
|
|
1018
|
+
@override
|
|
1019
|
+
def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None:
|
|
1020
|
+
"""Index-specific logic for inserting nodes to the index struct."""
|
|
1021
|
+
raise NotImplementedError("_insert not implemented for LlamaCloudIndex.")
|
|
1022
|
+
|
|
1023
|
+
@override
|
|
1024
|
+
def build_index_from_nodes(self, nodes: Sequence[BaseNode], **build_kwargs: Any) -> IndexDict:
|
|
1025
|
+
"""Build the index from nodes."""
|
|
1026
|
+
raise NotImplementedError("build_index_from_nodes not implemented for LlamaCloudIndex.")
|
|
1027
|
+
|
|
1028
|
+
@override
|
|
1029
|
+
def insert_nodes(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None:
|
|
1030
|
+
"""Insert a set of nodes."""
|
|
1031
|
+
raise NotImplementedError("insert_nodes not implemented for LlamaCloudIndex.")
|
|
1032
|
+
|
|
1033
|
+
@override
|
|
1034
|
+
def delete_nodes(
|
|
1035
|
+
self,
|
|
1036
|
+
node_ids: List[str],
|
|
1037
|
+
delete_from_docstore: bool = False,
|
|
1038
|
+
**delete_kwargs: Any,
|
|
1039
|
+
) -> None:
|
|
1040
|
+
"""Delete a set of nodes."""
|
|
1041
|
+
raise NotImplementedError("delete_nodes not implemented for LlamaCloudIndex.")
|