lfx-nightly 0.1.11.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lfx/__init__.py +0 -0
- lfx/__main__.py +25 -0
- lfx/base/__init__.py +0 -0
- lfx/base/agents/__init__.py +0 -0
- lfx/base/agents/agent.py +268 -0
- lfx/base/agents/callback.py +130 -0
- lfx/base/agents/context.py +109 -0
- lfx/base/agents/crewai/__init__.py +0 -0
- lfx/base/agents/crewai/crew.py +231 -0
- lfx/base/agents/crewai/tasks.py +12 -0
- lfx/base/agents/default_prompts.py +23 -0
- lfx/base/agents/errors.py +15 -0
- lfx/base/agents/events.py +346 -0
- lfx/base/agents/utils.py +205 -0
- lfx/base/astra_assistants/__init__.py +0 -0
- lfx/base/astra_assistants/util.py +171 -0
- lfx/base/chains/__init__.py +0 -0
- lfx/base/chains/model.py +19 -0
- lfx/base/composio/__init__.py +0 -0
- lfx/base/composio/composio_base.py +1291 -0
- lfx/base/compressors/__init__.py +0 -0
- lfx/base/compressors/model.py +60 -0
- lfx/base/constants.py +46 -0
- lfx/base/curl/__init__.py +0 -0
- lfx/base/curl/parse.py +188 -0
- lfx/base/data/__init__.py +5 -0
- lfx/base/data/base_file.py +685 -0
- lfx/base/data/docling_utils.py +245 -0
- lfx/base/data/utils.py +198 -0
- lfx/base/document_transformers/__init__.py +0 -0
- lfx/base/document_transformers/model.py +43 -0
- lfx/base/embeddings/__init__.py +0 -0
- lfx/base/embeddings/aiml_embeddings.py +62 -0
- lfx/base/embeddings/model.py +26 -0
- lfx/base/flow_processing/__init__.py +0 -0
- lfx/base/flow_processing/utils.py +86 -0
- lfx/base/huggingface/__init__.py +0 -0
- lfx/base/huggingface/model_bridge.py +133 -0
- lfx/base/io/__init__.py +0 -0
- lfx/base/io/chat.py +20 -0
- lfx/base/io/text.py +22 -0
- lfx/base/langchain_utilities/__init__.py +0 -0
- lfx/base/langchain_utilities/model.py +35 -0
- lfx/base/langchain_utilities/spider_constants.py +1 -0
- lfx/base/langwatch/__init__.py +0 -0
- lfx/base/langwatch/utils.py +18 -0
- lfx/base/mcp/__init__.py +0 -0
- lfx/base/mcp/constants.py +2 -0
- lfx/base/mcp/util.py +1398 -0
- lfx/base/memory/__init__.py +0 -0
- lfx/base/memory/memory.py +49 -0
- lfx/base/memory/model.py +38 -0
- lfx/base/models/__init__.py +3 -0
- lfx/base/models/aiml_constants.py +51 -0
- lfx/base/models/anthropic_constants.py +47 -0
- lfx/base/models/aws_constants.py +151 -0
- lfx/base/models/chat_result.py +76 -0
- lfx/base/models/google_generative_ai_constants.py +70 -0
- lfx/base/models/groq_constants.py +134 -0
- lfx/base/models/model.py +375 -0
- lfx/base/models/model_input_constants.py +307 -0
- lfx/base/models/model_metadata.py +41 -0
- lfx/base/models/model_utils.py +8 -0
- lfx/base/models/novita_constants.py +35 -0
- lfx/base/models/ollama_constants.py +49 -0
- lfx/base/models/openai_constants.py +122 -0
- lfx/base/models/sambanova_constants.py +18 -0
- lfx/base/processing/__init__.py +0 -0
- lfx/base/prompts/__init__.py +0 -0
- lfx/base/prompts/api_utils.py +224 -0
- lfx/base/prompts/utils.py +61 -0
- lfx/base/textsplitters/__init__.py +0 -0
- lfx/base/textsplitters/model.py +28 -0
- lfx/base/tools/__init__.py +0 -0
- lfx/base/tools/base.py +26 -0
- lfx/base/tools/component_tool.py +325 -0
- lfx/base/tools/constants.py +49 -0
- lfx/base/tools/flow_tool.py +132 -0
- lfx/base/tools/run_flow.py +224 -0
- lfx/base/vectorstores/__init__.py +0 -0
- lfx/base/vectorstores/model.py +193 -0
- lfx/base/vectorstores/utils.py +22 -0
- lfx/base/vectorstores/vector_store_connection_decorator.py +52 -0
- lfx/cli/__init__.py +5 -0
- lfx/cli/commands.py +319 -0
- lfx/cli/common.py +650 -0
- lfx/cli/run.py +441 -0
- lfx/cli/script_loader.py +247 -0
- lfx/cli/serve_app.py +546 -0
- lfx/cli/validation.py +69 -0
- lfx/components/FAISS/__init__.py +34 -0
- lfx/components/FAISS/faiss.py +111 -0
- lfx/components/Notion/__init__.py +19 -0
- lfx/components/Notion/add_content_to_page.py +269 -0
- lfx/components/Notion/create_page.py +94 -0
- lfx/components/Notion/list_database_properties.py +68 -0
- lfx/components/Notion/list_pages.py +122 -0
- lfx/components/Notion/list_users.py +77 -0
- lfx/components/Notion/page_content_viewer.py +93 -0
- lfx/components/Notion/search.py +111 -0
- lfx/components/Notion/update_page_property.py +114 -0
- lfx/components/__init__.py +411 -0
- lfx/components/_importing.py +42 -0
- lfx/components/agentql/__init__.py +3 -0
- lfx/components/agentql/agentql_api.py +151 -0
- lfx/components/agents/__init__.py +34 -0
- lfx/components/agents/agent.py +558 -0
- lfx/components/agents/mcp_component.py +501 -0
- lfx/components/aiml/__init__.py +37 -0
- lfx/components/aiml/aiml.py +112 -0
- lfx/components/aiml/aiml_embeddings.py +37 -0
- lfx/components/amazon/__init__.py +36 -0
- lfx/components/amazon/amazon_bedrock_embedding.py +109 -0
- lfx/components/amazon/amazon_bedrock_model.py +124 -0
- lfx/components/amazon/s3_bucket_uploader.py +211 -0
- lfx/components/anthropic/__init__.py +34 -0
- lfx/components/anthropic/anthropic.py +187 -0
- lfx/components/apify/__init__.py +5 -0
- lfx/components/apify/apify_actor.py +325 -0
- lfx/components/arxiv/__init__.py +3 -0
- lfx/components/arxiv/arxiv.py +163 -0
- lfx/components/assemblyai/__init__.py +46 -0
- lfx/components/assemblyai/assemblyai_get_subtitles.py +83 -0
- lfx/components/assemblyai/assemblyai_lemur.py +183 -0
- lfx/components/assemblyai/assemblyai_list_transcripts.py +95 -0
- lfx/components/assemblyai/assemblyai_poll_transcript.py +72 -0
- lfx/components/assemblyai/assemblyai_start_transcript.py +188 -0
- lfx/components/azure/__init__.py +37 -0
- lfx/components/azure/azure_openai.py +95 -0
- lfx/components/azure/azure_openai_embeddings.py +83 -0
- lfx/components/baidu/__init__.py +32 -0
- lfx/components/baidu/baidu_qianfan_chat.py +113 -0
- lfx/components/bing/__init__.py +3 -0
- lfx/components/bing/bing_search_api.py +61 -0
- lfx/components/cassandra/__init__.py +40 -0
- lfx/components/cassandra/cassandra.py +264 -0
- lfx/components/cassandra/cassandra_chat.py +92 -0
- lfx/components/cassandra/cassandra_graph.py +238 -0
- lfx/components/chains/__init__.py +3 -0
- lfx/components/chroma/__init__.py +34 -0
- lfx/components/chroma/chroma.py +167 -0
- lfx/components/cleanlab/__init__.py +40 -0
- lfx/components/cleanlab/cleanlab_evaluator.py +155 -0
- lfx/components/cleanlab/cleanlab_rag_evaluator.py +254 -0
- lfx/components/cleanlab/cleanlab_remediator.py +131 -0
- lfx/components/clickhouse/__init__.py +34 -0
- lfx/components/clickhouse/clickhouse.py +135 -0
- lfx/components/cloudflare/__init__.py +32 -0
- lfx/components/cloudflare/cloudflare.py +81 -0
- lfx/components/cohere/__init__.py +40 -0
- lfx/components/cohere/cohere_embeddings.py +81 -0
- lfx/components/cohere/cohere_models.py +46 -0
- lfx/components/cohere/cohere_rerank.py +51 -0
- lfx/components/composio/__init__.py +74 -0
- lfx/components/composio/composio_api.py +268 -0
- lfx/components/composio/dropbox_compnent.py +11 -0
- lfx/components/composio/github_composio.py +11 -0
- lfx/components/composio/gmail_composio.py +38 -0
- lfx/components/composio/googlecalendar_composio.py +11 -0
- lfx/components/composio/googlemeet_composio.py +11 -0
- lfx/components/composio/googletasks_composio.py +8 -0
- lfx/components/composio/linear_composio.py +11 -0
- lfx/components/composio/outlook_composio.py +11 -0
- lfx/components/composio/reddit_composio.py +11 -0
- lfx/components/composio/slack_composio.py +582 -0
- lfx/components/composio/slackbot_composio.py +11 -0
- lfx/components/composio/supabase_composio.py +11 -0
- lfx/components/composio/todoist_composio.py +11 -0
- lfx/components/composio/youtube_composio.py +11 -0
- lfx/components/confluence/__init__.py +3 -0
- lfx/components/confluence/confluence.py +84 -0
- lfx/components/couchbase/__init__.py +34 -0
- lfx/components/couchbase/couchbase.py +102 -0
- lfx/components/crewai/__init__.py +49 -0
- lfx/components/crewai/crewai.py +107 -0
- lfx/components/crewai/hierarchical_crew.py +46 -0
- lfx/components/crewai/hierarchical_task.py +44 -0
- lfx/components/crewai/sequential_crew.py +52 -0
- lfx/components/crewai/sequential_task.py +73 -0
- lfx/components/crewai/sequential_task_agent.py +143 -0
- lfx/components/custom_component/__init__.py +34 -0
- lfx/components/custom_component/custom_component.py +31 -0
- lfx/components/data/__init__.py +64 -0
- lfx/components/data/api_request.py +544 -0
- lfx/components/data/csv_to_data.py +95 -0
- lfx/components/data/directory.py +113 -0
- lfx/components/data/file.py +577 -0
- lfx/components/data/json_to_data.py +98 -0
- lfx/components/data/news_search.py +164 -0
- lfx/components/data/rss.py +69 -0
- lfx/components/data/sql_executor.py +101 -0
- lfx/components/data/url.py +311 -0
- lfx/components/data/web_search.py +112 -0
- lfx/components/data/webhook.py +56 -0
- lfx/components/datastax/__init__.py +70 -0
- lfx/components/datastax/astra_assistant_manager.py +306 -0
- lfx/components/datastax/astra_db.py +75 -0
- lfx/components/datastax/astra_vectorize.py +124 -0
- lfx/components/datastax/astradb.py +1285 -0
- lfx/components/datastax/astradb_cql.py +314 -0
- lfx/components/datastax/astradb_graph.py +330 -0
- lfx/components/datastax/astradb_tool.py +414 -0
- lfx/components/datastax/astradb_vectorstore.py +1285 -0
- lfx/components/datastax/cassandra.py +92 -0
- lfx/components/datastax/create_assistant.py +58 -0
- lfx/components/datastax/create_thread.py +32 -0
- lfx/components/datastax/dotenv.py +35 -0
- lfx/components/datastax/get_assistant.py +37 -0
- lfx/components/datastax/getenvvar.py +30 -0
- lfx/components/datastax/graph_rag.py +141 -0
- lfx/components/datastax/hcd.py +314 -0
- lfx/components/datastax/list_assistants.py +25 -0
- lfx/components/datastax/run.py +89 -0
- lfx/components/deactivated/__init__.py +15 -0
- lfx/components/deactivated/amazon_kendra.py +66 -0
- lfx/components/deactivated/chat_litellm_model.py +158 -0
- lfx/components/deactivated/code_block_extractor.py +26 -0
- lfx/components/deactivated/documents_to_data.py +22 -0
- lfx/components/deactivated/embed.py +16 -0
- lfx/components/deactivated/extract_key_from_data.py +46 -0
- lfx/components/deactivated/json_document_builder.py +57 -0
- lfx/components/deactivated/list_flows.py +20 -0
- lfx/components/deactivated/mcp_sse.py +61 -0
- lfx/components/deactivated/mcp_stdio.py +62 -0
- lfx/components/deactivated/merge_data.py +93 -0
- lfx/components/deactivated/message.py +37 -0
- lfx/components/deactivated/metal.py +54 -0
- lfx/components/deactivated/multi_query.py +59 -0
- lfx/components/deactivated/retriever.py +43 -0
- lfx/components/deactivated/selective_passthrough.py +77 -0
- lfx/components/deactivated/should_run_next.py +40 -0
- lfx/components/deactivated/split_text.py +63 -0
- lfx/components/deactivated/store_message.py +24 -0
- lfx/components/deactivated/sub_flow.py +124 -0
- lfx/components/deactivated/vectara_self_query.py +76 -0
- lfx/components/deactivated/vector_store.py +24 -0
- lfx/components/deepseek/__init__.py +34 -0
- lfx/components/deepseek/deepseek.py +136 -0
- lfx/components/docling/__init__.py +43 -0
- lfx/components/docling/chunk_docling_document.py +186 -0
- lfx/components/docling/docling_inline.py +231 -0
- lfx/components/docling/docling_remote.py +193 -0
- lfx/components/docling/export_docling_document.py +117 -0
- lfx/components/documentloaders/__init__.py +3 -0
- lfx/components/duckduckgo/__init__.py +3 -0
- lfx/components/duckduckgo/duck_duck_go_search_run.py +92 -0
- lfx/components/elastic/__init__.py +37 -0
- lfx/components/elastic/elasticsearch.py +267 -0
- lfx/components/elastic/opensearch.py +243 -0
- lfx/components/embeddings/__init__.py +37 -0
- lfx/components/embeddings/similarity.py +76 -0
- lfx/components/embeddings/text_embedder.py +64 -0
- lfx/components/exa/__init__.py +3 -0
- lfx/components/exa/exa_search.py +68 -0
- lfx/components/firecrawl/__init__.py +43 -0
- lfx/components/firecrawl/firecrawl_crawl_api.py +88 -0
- lfx/components/firecrawl/firecrawl_extract_api.py +136 -0
- lfx/components/firecrawl/firecrawl_map_api.py +89 -0
- lfx/components/firecrawl/firecrawl_scrape_api.py +73 -0
- lfx/components/git/__init__.py +4 -0
- lfx/components/git/git.py +262 -0
- lfx/components/git/gitextractor.py +196 -0
- lfx/components/glean/__init__.py +3 -0
- lfx/components/glean/glean_search_api.py +173 -0
- lfx/components/google/__init__.py +17 -0
- lfx/components/google/gmail.py +192 -0
- lfx/components/google/google_bq_sql_executor.py +157 -0
- lfx/components/google/google_drive.py +92 -0
- lfx/components/google/google_drive_search.py +152 -0
- lfx/components/google/google_generative_ai.py +147 -0
- lfx/components/google/google_generative_ai_embeddings.py +141 -0
- lfx/components/google/google_oauth_token.py +89 -0
- lfx/components/google/google_search_api_core.py +68 -0
- lfx/components/google/google_serper_api_core.py +74 -0
- lfx/components/groq/__init__.py +34 -0
- lfx/components/groq/groq.py +136 -0
- lfx/components/helpers/__init__.py +52 -0
- lfx/components/helpers/calculator_core.py +89 -0
- lfx/components/helpers/create_list.py +40 -0
- lfx/components/helpers/current_date.py +42 -0
- lfx/components/helpers/id_generator.py +42 -0
- lfx/components/helpers/memory.py +251 -0
- lfx/components/helpers/output_parser.py +45 -0
- lfx/components/helpers/store_message.py +90 -0
- lfx/components/homeassistant/__init__.py +7 -0
- lfx/components/homeassistant/home_assistant_control.py +152 -0
- lfx/components/homeassistant/list_home_assistant_states.py +137 -0
- lfx/components/huggingface/__init__.py +37 -0
- lfx/components/huggingface/huggingface.py +197 -0
- lfx/components/huggingface/huggingface_inference_api.py +106 -0
- lfx/components/ibm/__init__.py +34 -0
- lfx/components/ibm/watsonx.py +203 -0
- lfx/components/ibm/watsonx_embeddings.py +135 -0
- lfx/components/icosacomputing/__init__.py +5 -0
- lfx/components/icosacomputing/combinatorial_reasoner.py +84 -0
- lfx/components/input_output/__init__.py +38 -0
- lfx/components/input_output/chat.py +120 -0
- lfx/components/input_output/chat_output.py +200 -0
- lfx/components/input_output/text.py +27 -0
- lfx/components/input_output/text_output.py +29 -0
- lfx/components/jigsawstack/__init__.py +23 -0
- lfx/components/jigsawstack/ai_scrape.py +126 -0
- lfx/components/jigsawstack/ai_web_search.py +136 -0
- lfx/components/jigsawstack/file_read.py +115 -0
- lfx/components/jigsawstack/file_upload.py +94 -0
- lfx/components/jigsawstack/image_generation.py +205 -0
- lfx/components/jigsawstack/nsfw.py +60 -0
- lfx/components/jigsawstack/object_detection.py +124 -0
- lfx/components/jigsawstack/sentiment.py +112 -0
- lfx/components/jigsawstack/text_to_sql.py +90 -0
- lfx/components/jigsawstack/text_translate.py +77 -0
- lfx/components/jigsawstack/vocr.py +107 -0
- lfx/components/langchain_utilities/__init__.py +109 -0
- lfx/components/langchain_utilities/character.py +53 -0
- lfx/components/langchain_utilities/conversation.py +59 -0
- lfx/components/langchain_utilities/csv_agent.py +107 -0
- lfx/components/langchain_utilities/fake_embeddings.py +26 -0
- lfx/components/langchain_utilities/html_link_extractor.py +35 -0
- lfx/components/langchain_utilities/json_agent.py +45 -0
- lfx/components/langchain_utilities/langchain_hub.py +126 -0
- lfx/components/langchain_utilities/language_recursive.py +49 -0
- lfx/components/langchain_utilities/language_semantic.py +138 -0
- lfx/components/langchain_utilities/llm_checker.py +39 -0
- lfx/components/langchain_utilities/llm_math.py +42 -0
- lfx/components/langchain_utilities/natural_language.py +61 -0
- lfx/components/langchain_utilities/openai_tools.py +53 -0
- lfx/components/langchain_utilities/openapi.py +48 -0
- lfx/components/langchain_utilities/recursive_character.py +60 -0
- lfx/components/langchain_utilities/retrieval_qa.py +83 -0
- lfx/components/langchain_utilities/runnable_executor.py +137 -0
- lfx/components/langchain_utilities/self_query.py +80 -0
- lfx/components/langchain_utilities/spider.py +142 -0
- lfx/components/langchain_utilities/sql.py +40 -0
- lfx/components/langchain_utilities/sql_database.py +35 -0
- lfx/components/langchain_utilities/sql_generator.py +78 -0
- lfx/components/langchain_utilities/tool_calling.py +59 -0
- lfx/components/langchain_utilities/vector_store_info.py +49 -0
- lfx/components/langchain_utilities/vector_store_router.py +33 -0
- lfx/components/langchain_utilities/xml_agent.py +71 -0
- lfx/components/langwatch/__init__.py +3 -0
- lfx/components/langwatch/langwatch.py +278 -0
- lfx/components/link_extractors/__init__.py +3 -0
- lfx/components/lmstudio/__init__.py +34 -0
- lfx/components/lmstudio/lmstudioembeddings.py +89 -0
- lfx/components/lmstudio/lmstudiomodel.py +129 -0
- lfx/components/logic/__init__.py +52 -0
- lfx/components/logic/conditional_router.py +171 -0
- lfx/components/logic/data_conditional_router.py +125 -0
- lfx/components/logic/flow_tool.py +110 -0
- lfx/components/logic/listen.py +29 -0
- lfx/components/logic/loop.py +125 -0
- lfx/components/logic/notify.py +88 -0
- lfx/components/logic/pass_message.py +35 -0
- lfx/components/logic/run_flow.py +71 -0
- lfx/components/logic/sub_flow.py +114 -0
- lfx/components/maritalk/__init__.py +32 -0
- lfx/components/maritalk/maritalk.py +52 -0
- lfx/components/mem0/__init__.py +3 -0
- lfx/components/mem0/mem0_chat_memory.py +136 -0
- lfx/components/milvus/__init__.py +34 -0
- lfx/components/milvus/milvus.py +115 -0
- lfx/components/mistral/__init__.py +37 -0
- lfx/components/mistral/mistral.py +114 -0
- lfx/components/mistral/mistral_embeddings.py +58 -0
- lfx/components/models/__init__.py +34 -0
- lfx/components/models/embedding_model.py +114 -0
- lfx/components/models/language_model.py +144 -0
- lfx/components/mongodb/__init__.py +34 -0
- lfx/components/mongodb/mongodb_atlas.py +213 -0
- lfx/components/needle/__init__.py +3 -0
- lfx/components/needle/needle.py +104 -0
- lfx/components/notdiamond/__init__.py +34 -0
- lfx/components/notdiamond/notdiamond.py +228 -0
- lfx/components/novita/__init__.py +32 -0
- lfx/components/novita/novita.py +130 -0
- lfx/components/nvidia/__init__.py +57 -0
- lfx/components/nvidia/nvidia.py +157 -0
- lfx/components/nvidia/nvidia_embedding.py +77 -0
- lfx/components/nvidia/nvidia_ingest.py +317 -0
- lfx/components/nvidia/nvidia_rerank.py +63 -0
- lfx/components/nvidia/system_assist.py +65 -0
- lfx/components/olivya/__init__.py +3 -0
- lfx/components/olivya/olivya.py +116 -0
- lfx/components/ollama/__init__.py +37 -0
- lfx/components/ollama/ollama.py +330 -0
- lfx/components/ollama/ollama_embeddings.py +106 -0
- lfx/components/openai/__init__.py +37 -0
- lfx/components/openai/openai.py +100 -0
- lfx/components/openai/openai_chat_model.py +176 -0
- lfx/components/openrouter/__init__.py +32 -0
- lfx/components/openrouter/openrouter.py +202 -0
- lfx/components/output_parsers/__init__.py +3 -0
- lfx/components/perplexity/__init__.py +34 -0
- lfx/components/perplexity/perplexity.py +75 -0
- lfx/components/pgvector/__init__.py +34 -0
- lfx/components/pgvector/pgvector.py +72 -0
- lfx/components/pinecone/__init__.py +34 -0
- lfx/components/pinecone/pinecone.py +134 -0
- lfx/components/processing/__init__.py +117 -0
- lfx/components/processing/alter_metadata.py +108 -0
- lfx/components/processing/batch_run.py +205 -0
- lfx/components/processing/combine_text.py +39 -0
- lfx/components/processing/converter.py +159 -0
- lfx/components/processing/create_data.py +110 -0
- lfx/components/processing/data_operations.py +438 -0
- lfx/components/processing/data_to_dataframe.py +70 -0
- lfx/components/processing/dataframe_operations.py +313 -0
- lfx/components/processing/extract_key.py +53 -0
- lfx/components/processing/filter_data.py +42 -0
- lfx/components/processing/filter_data_values.py +88 -0
- lfx/components/processing/json_cleaner.py +103 -0
- lfx/components/processing/lambda_filter.py +154 -0
- lfx/components/processing/llm_router.py +499 -0
- lfx/components/processing/merge_data.py +90 -0
- lfx/components/processing/message_to_data.py +36 -0
- lfx/components/processing/parse_data.py +70 -0
- lfx/components/processing/parse_dataframe.py +68 -0
- lfx/components/processing/parse_json_data.py +90 -0
- lfx/components/processing/parser.py +143 -0
- lfx/components/processing/prompt.py +67 -0
- lfx/components/processing/python_repl_core.py +98 -0
- lfx/components/processing/regex.py +82 -0
- lfx/components/processing/save_file.py +225 -0
- lfx/components/processing/select_data.py +48 -0
- lfx/components/processing/split_text.py +141 -0
- lfx/components/processing/structured_output.py +202 -0
- lfx/components/processing/update_data.py +160 -0
- lfx/components/prototypes/__init__.py +34 -0
- lfx/components/prototypes/python_function.py +73 -0
- lfx/components/qdrant/__init__.py +34 -0
- lfx/components/qdrant/qdrant.py +109 -0
- lfx/components/redis/__init__.py +37 -0
- lfx/components/redis/redis.py +89 -0
- lfx/components/redis/redis_chat.py +43 -0
- lfx/components/sambanova/__init__.py +32 -0
- lfx/components/sambanova/sambanova.py +84 -0
- lfx/components/scrapegraph/__init__.py +40 -0
- lfx/components/scrapegraph/scrapegraph_markdownify_api.py +64 -0
- lfx/components/scrapegraph/scrapegraph_search_api.py +64 -0
- lfx/components/scrapegraph/scrapegraph_smart_scraper_api.py +71 -0
- lfx/components/searchapi/__init__.py +34 -0
- lfx/components/searchapi/search.py +79 -0
- lfx/components/serpapi/__init__.py +3 -0
- lfx/components/serpapi/serp.py +115 -0
- lfx/components/supabase/__init__.py +34 -0
- lfx/components/supabase/supabase.py +76 -0
- lfx/components/tavily/__init__.py +4 -0
- lfx/components/tavily/tavily_extract.py +117 -0
- lfx/components/tavily/tavily_search.py +212 -0
- lfx/components/textsplitters/__init__.py +3 -0
- lfx/components/toolkits/__init__.py +3 -0
- lfx/components/tools/__init__.py +72 -0
- lfx/components/tools/calculator.py +108 -0
- lfx/components/tools/google_search_api.py +45 -0
- lfx/components/tools/google_serper_api.py +115 -0
- lfx/components/tools/python_code_structured_tool.py +327 -0
- lfx/components/tools/python_repl.py +97 -0
- lfx/components/tools/search_api.py +87 -0
- lfx/components/tools/searxng.py +145 -0
- lfx/components/tools/serp_api.py +119 -0
- lfx/components/tools/tavily_search_tool.py +344 -0
- lfx/components/tools/wikidata_api.py +102 -0
- lfx/components/tools/wikipedia_api.py +49 -0
- lfx/components/tools/yahoo_finance.py +129 -0
- lfx/components/twelvelabs/__init__.py +52 -0
- lfx/components/twelvelabs/convert_astra_results.py +84 -0
- lfx/components/twelvelabs/pegasus_index.py +311 -0
- lfx/components/twelvelabs/split_video.py +291 -0
- lfx/components/twelvelabs/text_embeddings.py +57 -0
- lfx/components/twelvelabs/twelvelabs_pegasus.py +408 -0
- lfx/components/twelvelabs/video_embeddings.py +100 -0
- lfx/components/twelvelabs/video_file.py +179 -0
- lfx/components/unstructured/__init__.py +3 -0
- lfx/components/unstructured/unstructured.py +121 -0
- lfx/components/upstash/__init__.py +34 -0
- lfx/components/upstash/upstash.py +124 -0
- lfx/components/vectara/__init__.py +37 -0
- lfx/components/vectara/vectara.py +97 -0
- lfx/components/vectara/vectara_rag.py +164 -0
- lfx/components/vectorstores/__init__.py +40 -0
- lfx/components/vectorstores/astradb.py +1285 -0
- lfx/components/vectorstores/astradb_graph.py +319 -0
- lfx/components/vectorstores/cassandra.py +264 -0
- lfx/components/vectorstores/cassandra_graph.py +238 -0
- lfx/components/vectorstores/chroma.py +167 -0
- lfx/components/vectorstores/clickhouse.py +135 -0
- lfx/components/vectorstores/couchbase.py +102 -0
- lfx/components/vectorstores/elasticsearch.py +267 -0
- lfx/components/vectorstores/faiss.py +111 -0
- lfx/components/vectorstores/graph_rag.py +141 -0
- lfx/components/vectorstores/hcd.py +314 -0
- lfx/components/vectorstores/local_db.py +261 -0
- lfx/components/vectorstores/milvus.py +115 -0
- lfx/components/vectorstores/mongodb_atlas.py +213 -0
- lfx/components/vectorstores/opensearch.py +243 -0
- lfx/components/vectorstores/pgvector.py +72 -0
- lfx/components/vectorstores/pinecone.py +134 -0
- lfx/components/vectorstores/qdrant.py +109 -0
- lfx/components/vectorstores/supabase.py +76 -0
- lfx/components/vectorstores/upstash.py +124 -0
- lfx/components/vectorstores/vectara.py +97 -0
- lfx/components/vectorstores/vectara_rag.py +164 -0
- lfx/components/vectorstores/weaviate.py +89 -0
- lfx/components/vertexai/__init__.py +37 -0
- lfx/components/vertexai/vertexai.py +71 -0
- lfx/components/vertexai/vertexai_embeddings.py +67 -0
- lfx/components/weaviate/__init__.py +34 -0
- lfx/components/weaviate/weaviate.py +89 -0
- lfx/components/wikipedia/__init__.py +4 -0
- lfx/components/wikipedia/wikidata.py +86 -0
- lfx/components/wikipedia/wikipedia.py +53 -0
- lfx/components/wolframalpha/__init__.py +3 -0
- lfx/components/wolframalpha/wolfram_alpha_api.py +54 -0
- lfx/components/xai/__init__.py +32 -0
- lfx/components/xai/xai.py +167 -0
- lfx/components/yahoosearch/__init__.py +3 -0
- lfx/components/yahoosearch/yahoo.py +137 -0
- lfx/components/youtube/__init__.py +52 -0
- lfx/components/youtube/channel.py +227 -0
- lfx/components/youtube/comments.py +231 -0
- lfx/components/youtube/playlist.py +33 -0
- lfx/components/youtube/search.py +120 -0
- lfx/components/youtube/trending.py +285 -0
- lfx/components/youtube/video_details.py +263 -0
- lfx/components/youtube/youtube_transcripts.py +118 -0
- lfx/components/zep/__init__.py +3 -0
- lfx/components/zep/zep.py +44 -0
- lfx/constants.py +6 -0
- lfx/custom/__init__.py +7 -0
- lfx/custom/attributes.py +86 -0
- lfx/custom/code_parser/__init__.py +3 -0
- lfx/custom/code_parser/code_parser.py +361 -0
- lfx/custom/custom_component/__init__.py +0 -0
- lfx/custom/custom_component/base_component.py +128 -0
- lfx/custom/custom_component/component.py +1808 -0
- lfx/custom/custom_component/component_with_cache.py +8 -0
- lfx/custom/custom_component/custom_component.py +588 -0
- lfx/custom/dependency_analyzer.py +165 -0
- lfx/custom/directory_reader/__init__.py +3 -0
- lfx/custom/directory_reader/directory_reader.py +359 -0
- lfx/custom/directory_reader/utils.py +171 -0
- lfx/custom/eval.py +12 -0
- lfx/custom/schema.py +32 -0
- lfx/custom/tree_visitor.py +21 -0
- lfx/custom/utils.py +877 -0
- lfx/custom/validate.py +488 -0
- lfx/events/__init__.py +1 -0
- lfx/events/event_manager.py +110 -0
- lfx/exceptions/__init__.py +0 -0
- lfx/exceptions/component.py +15 -0
- lfx/field_typing/__init__.py +91 -0
- lfx/field_typing/constants.py +215 -0
- lfx/field_typing/range_spec.py +35 -0
- lfx/graph/__init__.py +6 -0
- lfx/graph/edge/__init__.py +0 -0
- lfx/graph/edge/base.py +277 -0
- lfx/graph/edge/schema.py +119 -0
- lfx/graph/edge/utils.py +0 -0
- lfx/graph/graph/__init__.py +0 -0
- lfx/graph/graph/ascii.py +202 -0
- lfx/graph/graph/base.py +2238 -0
- lfx/graph/graph/constants.py +63 -0
- lfx/graph/graph/runnable_vertices_manager.py +133 -0
- lfx/graph/graph/schema.py +52 -0
- lfx/graph/graph/state_model.py +66 -0
- lfx/graph/graph/utils.py +1024 -0
- lfx/graph/schema.py +75 -0
- lfx/graph/state/__init__.py +0 -0
- lfx/graph/state/model.py +237 -0
- lfx/graph/utils.py +200 -0
- lfx/graph/vertex/__init__.py +0 -0
- lfx/graph/vertex/base.py +823 -0
- lfx/graph/vertex/constants.py +0 -0
- lfx/graph/vertex/exceptions.py +4 -0
- lfx/graph/vertex/param_handler.py +264 -0
- lfx/graph/vertex/schema.py +26 -0
- lfx/graph/vertex/utils.py +19 -0
- lfx/graph/vertex/vertex_types.py +489 -0
- lfx/helpers/__init__.py +1 -0
- lfx/helpers/base_model.py +71 -0
- lfx/helpers/custom.py +13 -0
- lfx/helpers/data.py +167 -0
- lfx/helpers/flow.py +194 -0
- lfx/inputs/__init__.py +68 -0
- lfx/inputs/constants.py +2 -0
- lfx/inputs/input_mixin.py +328 -0
- lfx/inputs/inputs.py +714 -0
- lfx/inputs/validators.py +19 -0
- lfx/interface/__init__.py +6 -0
- lfx/interface/components.py +489 -0
- lfx/interface/importing/__init__.py +5 -0
- lfx/interface/importing/utils.py +39 -0
- lfx/interface/initialize/__init__.py +3 -0
- lfx/interface/initialize/loading.py +224 -0
- lfx/interface/listing.py +26 -0
- lfx/interface/run.py +16 -0
- lfx/interface/utils.py +111 -0
- lfx/io/__init__.py +63 -0
- lfx/io/schema.py +289 -0
- lfx/load/__init__.py +8 -0
- lfx/load/load.py +256 -0
- lfx/load/utils.py +99 -0
- lfx/log/__init__.py +5 -0
- lfx/log/logger.py +385 -0
- lfx/memory/__init__.py +90 -0
- lfx/memory/stubs.py +283 -0
- lfx/processing/__init__.py +1 -0
- lfx/processing/process.py +238 -0
- lfx/processing/utils.py +25 -0
- lfx/py.typed +0 -0
- lfx/schema/__init__.py +66 -0
- lfx/schema/artifact.py +83 -0
- lfx/schema/content_block.py +62 -0
- lfx/schema/content_types.py +91 -0
- lfx/schema/data.py +308 -0
- lfx/schema/dataframe.py +210 -0
- lfx/schema/dotdict.py +74 -0
- lfx/schema/encoders.py +13 -0
- lfx/schema/graph.py +47 -0
- lfx/schema/image.py +131 -0
- lfx/schema/json_schema.py +141 -0
- lfx/schema/log.py +61 -0
- lfx/schema/message.py +473 -0
- lfx/schema/openai_responses_schemas.py +74 -0
- lfx/schema/properties.py +41 -0
- lfx/schema/schema.py +171 -0
- lfx/schema/serialize.py +13 -0
- lfx/schema/table.py +140 -0
- lfx/schema/validators.py +114 -0
- lfx/serialization/__init__.py +5 -0
- lfx/serialization/constants.py +2 -0
- lfx/serialization/serialization.py +314 -0
- lfx/services/__init__.py +23 -0
- lfx/services/base.py +28 -0
- lfx/services/cache/__init__.py +6 -0
- lfx/services/cache/base.py +183 -0
- lfx/services/cache/service.py +166 -0
- lfx/services/cache/utils.py +169 -0
- lfx/services/chat/__init__.py +1 -0
- lfx/services/chat/config.py +2 -0
- lfx/services/chat/schema.py +10 -0
- lfx/services/deps.py +129 -0
- lfx/services/factory.py +19 -0
- lfx/services/initialize.py +19 -0
- lfx/services/interfaces.py +103 -0
- lfx/services/manager.py +172 -0
- lfx/services/schema.py +20 -0
- lfx/services/session.py +82 -0
- lfx/services/settings/__init__.py +3 -0
- lfx/services/settings/auth.py +130 -0
- lfx/services/settings/base.py +539 -0
- lfx/services/settings/constants.py +31 -0
- lfx/services/settings/factory.py +23 -0
- lfx/services/settings/feature_flags.py +12 -0
- lfx/services/settings/service.py +35 -0
- lfx/services/settings/utils.py +40 -0
- lfx/services/shared_component_cache/__init__.py +1 -0
- lfx/services/shared_component_cache/factory.py +30 -0
- lfx/services/shared_component_cache/service.py +9 -0
- lfx/services/storage/__init__.py +5 -0
- lfx/services/storage/local.py +155 -0
- lfx/services/storage/service.py +54 -0
- lfx/services/tracing/__init__.py +1 -0
- lfx/services/tracing/service.py +21 -0
- lfx/settings.py +6 -0
- lfx/template/__init__.py +6 -0
- lfx/template/field/__init__.py +0 -0
- lfx/template/field/base.py +257 -0
- lfx/template/field/prompt.py +15 -0
- lfx/template/frontend_node/__init__.py +6 -0
- lfx/template/frontend_node/base.py +212 -0
- lfx/template/frontend_node/constants.py +65 -0
- lfx/template/frontend_node/custom_components.py +79 -0
- lfx/template/template/__init__.py +0 -0
- lfx/template/template/base.py +100 -0
- lfx/template/utils.py +217 -0
- lfx/type_extraction/__init__.py +19 -0
- lfx/type_extraction/type_extraction.py +75 -0
- lfx/type_extraction.py +80 -0
- lfx/utils/__init__.py +1 -0
- lfx/utils/async_helpers.py +42 -0
- lfx/utils/component_utils.py +154 -0
- lfx/utils/concurrency.py +60 -0
- lfx/utils/connection_string_parser.py +11 -0
- lfx/utils/constants.py +205 -0
- lfx/utils/data_structure.py +212 -0
- lfx/utils/exceptions.py +22 -0
- lfx/utils/helpers.py +28 -0
- lfx/utils/image.py +73 -0
- lfx/utils/lazy_load.py +15 -0
- lfx/utils/request_utils.py +18 -0
- lfx/utils/schemas.py +139 -0
- lfx/utils/util.py +481 -0
- lfx/utils/util_strings.py +56 -0
- lfx/utils/version.py +24 -0
- lfx_nightly-0.1.11.dev0.dist-info/METADATA +293 -0
- lfx_nightly-0.1.11.dev0.dist-info/RECORD +699 -0
- lfx_nightly-0.1.11.dev0.dist-info/WHEEL +4 -0
- lfx_nightly-0.1.11.dev0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,685 @@
|
|
1
|
+
import ast
|
2
|
+
import json
|
3
|
+
import shutil
|
4
|
+
import tarfile
|
5
|
+
from abc import ABC, abstractmethod
|
6
|
+
from pathlib import Path
|
7
|
+
from tempfile import TemporaryDirectory
|
8
|
+
from typing import TYPE_CHECKING
|
9
|
+
from zipfile import ZipFile, is_zipfile
|
10
|
+
|
11
|
+
import pandas as pd
|
12
|
+
|
13
|
+
from lfx.custom.custom_component.component import Component
|
14
|
+
from lfx.io import BoolInput, FileInput, HandleInput, Output, StrInput
|
15
|
+
from lfx.schema.data import Data
|
16
|
+
from lfx.schema.dataframe import DataFrame
|
17
|
+
from lfx.schema.message import Message
|
18
|
+
|
19
|
+
if TYPE_CHECKING:
|
20
|
+
from collections.abc import Callable
|
21
|
+
|
22
|
+
|
23
|
+
class BaseFileComponent(Component, ABC):
|
24
|
+
"""Base class for handling file processing components.
|
25
|
+
|
26
|
+
This class provides common functionality for resolving, validating, and
|
27
|
+
processing file paths. Child classes must define valid file extensions
|
28
|
+
and implement the `process_files` method.
|
29
|
+
"""
|
30
|
+
|
31
|
+
class BaseFile:
|
32
|
+
"""Internal class to represent a file with additional metadata."""
|
33
|
+
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
data: Data | list[Data],
|
37
|
+
path: Path,
|
38
|
+
*,
|
39
|
+
delete_after_processing: bool = False,
|
40
|
+
silent_errors: bool = False,
|
41
|
+
):
|
42
|
+
self._data = data if isinstance(data, list) else [data]
|
43
|
+
self.path = path
|
44
|
+
self.delete_after_processing = delete_after_processing
|
45
|
+
self._silent_errors = silent_errors
|
46
|
+
|
47
|
+
@property
|
48
|
+
def data(self) -> list[Data]:
|
49
|
+
return self._data or []
|
50
|
+
|
51
|
+
@data.setter
|
52
|
+
def data(self, value: Data | list[Data]):
|
53
|
+
if isinstance(value, Data):
|
54
|
+
self._data = [value]
|
55
|
+
elif isinstance(value, list) and all(isinstance(item, Data) for item in value):
|
56
|
+
self._data = value
|
57
|
+
else:
|
58
|
+
msg = f"data must be a Data object or a list of Data objects. Got: {type(value)}"
|
59
|
+
if not self._silent_errors:
|
60
|
+
raise ValueError(msg)
|
61
|
+
|
62
|
+
def merge_data(self, new_data: Data | list[Data] | None) -> list[Data]:
|
63
|
+
r"""Generate a new list of Data objects by merging `new_data` into the current `data`.
|
64
|
+
|
65
|
+
Args:
|
66
|
+
new_data (Data | list[Data] | None): The new Data object(s) to merge into each existing Data object.
|
67
|
+
If None, the current `data` is returned unchanged.
|
68
|
+
|
69
|
+
Returns:
|
70
|
+
list[Data]: A new list of Data objects with `new_data` merged.
|
71
|
+
"""
|
72
|
+
if new_data is None:
|
73
|
+
return self.data
|
74
|
+
|
75
|
+
if isinstance(new_data, Data):
|
76
|
+
new_data_list = [new_data]
|
77
|
+
elif isinstance(new_data, list) and all(isinstance(item, Data) for item in new_data):
|
78
|
+
new_data_list = new_data
|
79
|
+
else:
|
80
|
+
msg = "new_data must be a Data object, a list of Data objects, or None."
|
81
|
+
if not self._silent_errors:
|
82
|
+
raise ValueError(msg)
|
83
|
+
return self.data
|
84
|
+
|
85
|
+
return [
|
86
|
+
Data(data={**data.data, **new_data_item.data}) for data in self.data for new_data_item in new_data_list
|
87
|
+
]
|
88
|
+
|
89
|
+
def __str__(self):
|
90
|
+
if len(self.data) == 0:
|
91
|
+
text_preview = ""
|
92
|
+
elif len(self.data) == 1:
|
93
|
+
max_text_length = 50
|
94
|
+
text_preview = self.data.get_text()[:max_text_length]
|
95
|
+
if len(self.data.get_text()) > max_text_length:
|
96
|
+
text_preview += "..."
|
97
|
+
text_preview = f"text_preview='{text_preview}'"
|
98
|
+
else:
|
99
|
+
text_preview = f"{len(self.data)} data objects"
|
100
|
+
return f"BaseFile(path={self.path}, delete_after_processing={self.delete_after_processing}, {text_preview}"
|
101
|
+
|
102
|
+
# Subclasses can override these class variables
|
103
|
+
VALID_EXTENSIONS: list[str] = [] # To be overridden by child classes
|
104
|
+
IGNORE_STARTS_WITH = [".", "__MACOSX"]
|
105
|
+
|
106
|
+
SERVER_FILE_PATH_FIELDNAME = "file_path"
|
107
|
+
SUPPORTED_BUNDLE_EXTENSIONS = ["zip", "tar", "tgz", "bz2", "gz"]
|
108
|
+
|
109
|
+
def __init__(self, *args, **kwargs):
|
110
|
+
super().__init__(*args, **kwargs)
|
111
|
+
# Dynamically update FileInput to include valid extensions and bundles
|
112
|
+
self.get_base_inputs()[0].file_types = [
|
113
|
+
*self.valid_extensions,
|
114
|
+
*self.SUPPORTED_BUNDLE_EXTENSIONS,
|
115
|
+
]
|
116
|
+
|
117
|
+
file_types = ", ".join(self.valid_extensions)
|
118
|
+
bundles = ", ".join(self.SUPPORTED_BUNDLE_EXTENSIONS)
|
119
|
+
self.get_base_inputs()[
|
120
|
+
0
|
121
|
+
].info = f"Supported file extensions: {file_types}; optionally bundled in file extensions: {bundles}"
|
122
|
+
|
123
|
+
_base_inputs = [
|
124
|
+
FileInput(
|
125
|
+
name="path",
|
126
|
+
display_name="Files",
|
127
|
+
fileTypes=[], # Dynamically set in __init__
|
128
|
+
info="", # Dynamically set in __init__
|
129
|
+
required=False,
|
130
|
+
list=True,
|
131
|
+
value=[],
|
132
|
+
),
|
133
|
+
HandleInput(
|
134
|
+
name="file_path",
|
135
|
+
display_name="Server File Path",
|
136
|
+
info=(
|
137
|
+
f"Data object with a '{SERVER_FILE_PATH_FIELDNAME}' property pointing to server file"
|
138
|
+
" or a Message object with a path to the file. Supercedes 'Path' but supports same file types."
|
139
|
+
),
|
140
|
+
required=False,
|
141
|
+
input_types=["Data", "Message"],
|
142
|
+
is_list=True,
|
143
|
+
advanced=True,
|
144
|
+
),
|
145
|
+
StrInput(
|
146
|
+
name="separator",
|
147
|
+
display_name="Separator",
|
148
|
+
value="\n\n",
|
149
|
+
show=True,
|
150
|
+
info="Specify the separator to use between multiple outputs in Message format.",
|
151
|
+
advanced=True,
|
152
|
+
),
|
153
|
+
BoolInput(
|
154
|
+
name="silent_errors",
|
155
|
+
display_name="Silent Errors",
|
156
|
+
advanced=True,
|
157
|
+
info="If true, errors will not raise an exception.",
|
158
|
+
),
|
159
|
+
BoolInput(
|
160
|
+
name="delete_server_file_after_processing",
|
161
|
+
display_name="Delete Server File After Processing",
|
162
|
+
advanced=True,
|
163
|
+
value=True,
|
164
|
+
info="If true, the Server File Path will be deleted after processing.",
|
165
|
+
),
|
166
|
+
BoolInput(
|
167
|
+
name="ignore_unsupported_extensions",
|
168
|
+
display_name="Ignore Unsupported Extensions",
|
169
|
+
advanced=True,
|
170
|
+
value=True,
|
171
|
+
info="If true, files with unsupported extensions will not be processed.",
|
172
|
+
),
|
173
|
+
BoolInput(
|
174
|
+
name="ignore_unspecified_files",
|
175
|
+
display_name="Ignore Unspecified Files",
|
176
|
+
advanced=True,
|
177
|
+
value=False,
|
178
|
+
info=f"If true, Data with no '{SERVER_FILE_PATH_FIELDNAME}' property will be ignored.",
|
179
|
+
),
|
180
|
+
]
|
181
|
+
|
182
|
+
_base_outputs = [
|
183
|
+
Output(display_name="Files", name="dataframe", method="load_files"),
|
184
|
+
]
|
185
|
+
|
186
|
+
@abstractmethod
|
187
|
+
def process_files(self, file_list: list[BaseFile]) -> list[BaseFile]:
|
188
|
+
"""Processes a list of files.
|
189
|
+
|
190
|
+
Args:
|
191
|
+
file_list (list[BaseFile]): A list of file objects.
|
192
|
+
|
193
|
+
Returns:
|
194
|
+
list[BaseFile]: A list of BaseFile objects with updated `data`.
|
195
|
+
"""
|
196
|
+
|
197
|
+
def load_files_base(self) -> list[Data]:
|
198
|
+
"""Loads and parses file(s), including unpacked file bundles.
|
199
|
+
|
200
|
+
Returns:
|
201
|
+
list[Data]: Parsed data from the processed files.
|
202
|
+
"""
|
203
|
+
self._temp_dirs: list[TemporaryDirectory] = []
|
204
|
+
final_files = [] # Initialize to avoid UnboundLocalError
|
205
|
+
try:
|
206
|
+
# Step 1: Validate the provided paths
|
207
|
+
files = self._validate_and_resolve_paths()
|
208
|
+
|
209
|
+
# Step 2: Handle bundles recursively
|
210
|
+
all_files = self._unpack_and_collect_files(files)
|
211
|
+
|
212
|
+
# Step 3: Final validation of file types
|
213
|
+
final_files = self._filter_and_mark_files(all_files)
|
214
|
+
|
215
|
+
# Step 4: Process files
|
216
|
+
processed_files = self.process_files(final_files)
|
217
|
+
|
218
|
+
# Extract and flatten Data objects to return
|
219
|
+
return [data for file in processed_files for data in file.data if file.data]
|
220
|
+
|
221
|
+
finally:
|
222
|
+
# Delete temporary directories
|
223
|
+
for temp_dir in self._temp_dirs:
|
224
|
+
temp_dir.cleanup()
|
225
|
+
|
226
|
+
# Delete files marked for deletion
|
227
|
+
for file in final_files:
|
228
|
+
if file.delete_after_processing and file.path.exists():
|
229
|
+
if file.path.is_dir():
|
230
|
+
shutil.rmtree(file.path)
|
231
|
+
else:
|
232
|
+
file.path.unlink()
|
233
|
+
|
234
|
+
def load_files_core(self) -> list[Data]:
|
235
|
+
"""Load files and return as Data objects.
|
236
|
+
|
237
|
+
Returns:
|
238
|
+
list[Data]: List of Data objects from all files
|
239
|
+
"""
|
240
|
+
data_list = self.load_files_base()
|
241
|
+
if not data_list:
|
242
|
+
return [Data()]
|
243
|
+
return data_list
|
244
|
+
|
245
|
+
def load_files_message(self) -> Message:
|
246
|
+
"""Load files and return as Message.
|
247
|
+
|
248
|
+
Returns:
|
249
|
+
Message: Message containing all file data
|
250
|
+
"""
|
251
|
+
data_list = self.load_files_core()
|
252
|
+
if not data_list:
|
253
|
+
return Message() # No data -> empty message
|
254
|
+
|
255
|
+
sep: str = getattr(self, "separator", "\n\n") or "\n\n"
|
256
|
+
|
257
|
+
parts: list[str] = []
|
258
|
+
for d in data_list:
|
259
|
+
# Prefer explicit text if available, fall back to full dict, lastly str()
|
260
|
+
text = (getattr(d, "get_text", lambda: None)() or d.data.get("text")) if isinstance(d.data, dict) else None
|
261
|
+
parts.append(text if text is not None else str(d))
|
262
|
+
|
263
|
+
return Message(text=sep.join(parts))
|
264
|
+
|
265
|
+
def load_files_path(self) -> Message:
|
266
|
+
"""Returns a Message containing file paths from loaded files.
|
267
|
+
|
268
|
+
Returns:
|
269
|
+
Message: Message containing file paths
|
270
|
+
"""
|
271
|
+
files = self._validate_and_resolve_paths()
|
272
|
+
paths = [file.path.as_posix() for file in files if file.path.exists()]
|
273
|
+
|
274
|
+
return Message(text="\n".join(paths) if paths else "")
|
275
|
+
|
276
|
+
def load_files_structured_helper(self, file_path: str) -> list[dict] | None:
|
277
|
+
if not file_path:
|
278
|
+
return None
|
279
|
+
|
280
|
+
# Map file extensions to pandas read functions with type annotation
|
281
|
+
file_readers: dict[str, Callable[[str], pd.DataFrame]] = {
|
282
|
+
".csv": pd.read_csv,
|
283
|
+
".xlsx": pd.read_excel,
|
284
|
+
".parquet": pd.read_parquet,
|
285
|
+
# TODO: sqlite and json support?
|
286
|
+
}
|
287
|
+
|
288
|
+
# Get file extension in lowercase
|
289
|
+
ext = Path(file_path).suffix.lower()
|
290
|
+
|
291
|
+
# Get the appropriate reader function or None
|
292
|
+
reader = file_readers.get(ext)
|
293
|
+
|
294
|
+
if reader:
|
295
|
+
result = reader(file_path) # MyPy now knows reader is callable
|
296
|
+
return result.to_dict("records")
|
297
|
+
|
298
|
+
return None
|
299
|
+
|
300
|
+
def load_files_structured(self) -> DataFrame:
|
301
|
+
"""Load files and return as DataFrame with structured content.
|
302
|
+
|
303
|
+
Returns:
|
304
|
+
DataFrame: DataFrame containing structured content from all files
|
305
|
+
"""
|
306
|
+
data_list = self.load_files_core()
|
307
|
+
if not data_list:
|
308
|
+
return DataFrame()
|
309
|
+
|
310
|
+
# Get the file path from the first Data object
|
311
|
+
file_path = data_list[0].data.get(self.SERVER_FILE_PATH_FIELDNAME, None)
|
312
|
+
|
313
|
+
# If file_path is provided and is a CSV, read it directly
|
314
|
+
if file_path and str(file_path).lower().endswith((".csv", ".xlsx", ".parquet")):
|
315
|
+
rows = self.load_files_structured_helper(file_path)
|
316
|
+
else:
|
317
|
+
# Convert Data objects to a list of dictionaries
|
318
|
+
# TODO: Parse according to docling standards
|
319
|
+
rows = [data_list[0].data]
|
320
|
+
|
321
|
+
self.status = DataFrame(rows)
|
322
|
+
|
323
|
+
return DataFrame(rows)
|
324
|
+
|
325
|
+
def parse_string_to_dict(self, s: str) -> dict:
|
326
|
+
# Try JSON first (handles true/false/null)
|
327
|
+
try:
|
328
|
+
result = json.loads(s)
|
329
|
+
if isinstance(result, dict):
|
330
|
+
return result
|
331
|
+
except json.JSONDecodeError:
|
332
|
+
pass
|
333
|
+
|
334
|
+
# Fall back to Python literal evaluation
|
335
|
+
try:
|
336
|
+
result = ast.literal_eval(s)
|
337
|
+
if isinstance(result, dict):
|
338
|
+
return result
|
339
|
+
except (SyntaxError, ValueError):
|
340
|
+
pass
|
341
|
+
|
342
|
+
# If all parsing fails, return the fallback
|
343
|
+
return {"value": s}
|
344
|
+
|
345
|
+
def load_files_json(self) -> Data:
|
346
|
+
"""Load files and return as a single Data object containing JSON content.
|
347
|
+
|
348
|
+
Returns:
|
349
|
+
Data: Data object containing JSON content from all files
|
350
|
+
"""
|
351
|
+
data_list = self.load_files_core()
|
352
|
+
if not data_list:
|
353
|
+
return Data()
|
354
|
+
|
355
|
+
# Grab the JSON data
|
356
|
+
json_data = data_list[0].data[data_list[0].text_key]
|
357
|
+
json_data = self.parse_string_to_dict(json_data)
|
358
|
+
|
359
|
+
self.status = Data(data=json_data)
|
360
|
+
|
361
|
+
return Data(data=json_data)
|
362
|
+
|
363
|
+
def load_files(self) -> DataFrame:
|
364
|
+
"""Load files and return as DataFrame.
|
365
|
+
|
366
|
+
Returns:
|
367
|
+
DataFrame: DataFrame containing all file data
|
368
|
+
"""
|
369
|
+
data_list = self.load_files_core()
|
370
|
+
if not data_list:
|
371
|
+
return DataFrame()
|
372
|
+
|
373
|
+
# Convert Data objects to a list of dictionaries
|
374
|
+
all_rows = []
|
375
|
+
for data in data_list:
|
376
|
+
file_path = data.data.get(self.SERVER_FILE_PATH_FIELDNAME)
|
377
|
+
row = dict(data.data) if data.data else {}
|
378
|
+
|
379
|
+
# Add text if available, otherwise use the data's text property
|
380
|
+
if "text" in data.data:
|
381
|
+
row["text"] = data.data["text"]
|
382
|
+
if file_path:
|
383
|
+
row["file_path"] = file_path
|
384
|
+
all_rows.append(row)
|
385
|
+
|
386
|
+
self.status = DataFrame(all_rows)
|
387
|
+
|
388
|
+
return DataFrame(all_rows)
|
389
|
+
|
390
|
+
@property
|
391
|
+
def valid_extensions(self) -> list[str]:
|
392
|
+
"""Returns valid file extensions for the class.
|
393
|
+
|
394
|
+
This property can be overridden by child classes to provide specific
|
395
|
+
extensions.
|
396
|
+
|
397
|
+
Returns:
|
398
|
+
list[str]: A list of valid file extensions without the leading dot.
|
399
|
+
"""
|
400
|
+
return self.VALID_EXTENSIONS
|
401
|
+
|
402
|
+
@property
|
403
|
+
def ignore_starts_with(self) -> list[str]:
|
404
|
+
"""Returns prefixes to ignore when unpacking file bundles.
|
405
|
+
|
406
|
+
Returns:
|
407
|
+
list[str]: A list of prefixes to ignore when unpacking file bundles.
|
408
|
+
"""
|
409
|
+
return self.IGNORE_STARTS_WITH
|
410
|
+
|
411
|
+
def rollup_data(
|
412
|
+
self,
|
413
|
+
base_files: list[BaseFile],
|
414
|
+
data_list: list[Data | None],
|
415
|
+
path_field: str = SERVER_FILE_PATH_FIELDNAME,
|
416
|
+
) -> list[BaseFile]:
|
417
|
+
r"""Rolls up Data objects into corresponding BaseFile objects in order given by `base_files`.
|
418
|
+
|
419
|
+
Args:
|
420
|
+
base_files (list[BaseFile]): The original BaseFile objects.
|
421
|
+
data_list (list[Data | None]): The list of data to be aggregated into the BaseFile objects.
|
422
|
+
path_field (str): The field name on the data_list objects that holds the file path as a string.
|
423
|
+
|
424
|
+
Returns:
|
425
|
+
list[BaseFile]: A new list of BaseFile objects with merged `data` attributes.
|
426
|
+
"""
|
427
|
+
|
428
|
+
def _build_data_dict(data_list: list[Data | None], data_list_field: str) -> dict[str, list[Data]]:
|
429
|
+
"""Builds a dictionary grouping Data objects by a specified field."""
|
430
|
+
data_dict: dict[str, list[Data]] = {}
|
431
|
+
for data in data_list:
|
432
|
+
if data is None:
|
433
|
+
continue
|
434
|
+
key = data.data.get(data_list_field)
|
435
|
+
if key is None:
|
436
|
+
msg = f"Data object missing required field '{data_list_field}': {data}"
|
437
|
+
self.log(msg)
|
438
|
+
if not self.silent_errors:
|
439
|
+
msg = f"Data object missing required field '{data_list_field}': {data}"
|
440
|
+
self.log(msg)
|
441
|
+
raise ValueError(msg)
|
442
|
+
continue
|
443
|
+
data_dict.setdefault(key, []).append(data)
|
444
|
+
return data_dict
|
445
|
+
|
446
|
+
# Build the data dictionary from the provided data_list
|
447
|
+
data_dict = _build_data_dict(data_list, path_field)
|
448
|
+
|
449
|
+
# Generate the updated list of BaseFile objects, preserving the order of base_files
|
450
|
+
updated_base_files = []
|
451
|
+
for base_file in base_files:
|
452
|
+
new_data_list = data_dict.get(str(base_file.path), [])
|
453
|
+
merged_data_list = base_file.merge_data(new_data_list)
|
454
|
+
updated_base_files.append(
|
455
|
+
BaseFileComponent.BaseFile(
|
456
|
+
data=merged_data_list,
|
457
|
+
path=base_file.path,
|
458
|
+
delete_after_processing=base_file.delete_after_processing,
|
459
|
+
)
|
460
|
+
)
|
461
|
+
|
462
|
+
return updated_base_files
|
463
|
+
|
464
|
+
def _file_path_as_list(self) -> list[Data]:
|
465
|
+
file_path = self.file_path
|
466
|
+
if not file_path:
|
467
|
+
return []
|
468
|
+
|
469
|
+
def _message_to_data(message: Message) -> Data:
|
470
|
+
return Data(**{self.SERVER_FILE_PATH_FIELDNAME: message.text})
|
471
|
+
|
472
|
+
if isinstance(file_path, Data):
|
473
|
+
file_path = [file_path]
|
474
|
+
elif isinstance(file_path, Message):
|
475
|
+
file_path = [_message_to_data(file_path)]
|
476
|
+
elif not isinstance(file_path, list):
|
477
|
+
msg = f"Expected list of Data objects in file_path but got {type(file_path)}."
|
478
|
+
self.log(msg)
|
479
|
+
if not self.silent_errors:
|
480
|
+
raise ValueError(msg)
|
481
|
+
return []
|
482
|
+
|
483
|
+
file_paths = []
|
484
|
+
for obj in file_path:
|
485
|
+
data_obj = _message_to_data(obj) if isinstance(obj, Message) else obj
|
486
|
+
|
487
|
+
if not isinstance(data_obj, Data):
|
488
|
+
msg = f"Expected Data object in file_path but got {type(data_obj)}."
|
489
|
+
self.log(msg)
|
490
|
+
if not self.silent_errors:
|
491
|
+
raise ValueError(msg)
|
492
|
+
continue
|
493
|
+
file_paths.append(data_obj)
|
494
|
+
|
495
|
+
return file_paths
|
496
|
+
|
497
|
+
def _validate_and_resolve_paths(self) -> list[BaseFile]:
|
498
|
+
"""Validate that all input paths exist and are valid, and create BaseFile instances.
|
499
|
+
|
500
|
+
Returns:
|
501
|
+
list[BaseFile]: A list of valid BaseFile instances.
|
502
|
+
|
503
|
+
Raises:
|
504
|
+
ValueError: If any path does not exist.
|
505
|
+
"""
|
506
|
+
resolved_files = []
|
507
|
+
|
508
|
+
def add_file(data: Data, path: str | Path, *, delete_after_processing: bool):
|
509
|
+
resolved_path = Path(self.resolve_path(str(path)))
|
510
|
+
|
511
|
+
if not resolved_path.exists():
|
512
|
+
msg = f"File or directory not found: {path}"
|
513
|
+
self.log(msg)
|
514
|
+
if not self.silent_errors:
|
515
|
+
raise ValueError(msg)
|
516
|
+
resolved_files.append(
|
517
|
+
BaseFileComponent.BaseFile(data, resolved_path, delete_after_processing=delete_after_processing)
|
518
|
+
)
|
519
|
+
|
520
|
+
file_path = self._file_path_as_list()
|
521
|
+
|
522
|
+
if self.path and not file_path:
|
523
|
+
# Wrap self.path into a Data object
|
524
|
+
if isinstance(self.path, list):
|
525
|
+
for path in self.path:
|
526
|
+
data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: path})
|
527
|
+
add_file(data=data_obj, path=path, delete_after_processing=False)
|
528
|
+
else:
|
529
|
+
data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: self.path})
|
530
|
+
add_file(data=data_obj, path=self.path, delete_after_processing=False)
|
531
|
+
elif file_path:
|
532
|
+
for obj in file_path:
|
533
|
+
server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)
|
534
|
+
if server_file_path:
|
535
|
+
add_file(
|
536
|
+
data=obj,
|
537
|
+
path=server_file_path,
|
538
|
+
delete_after_processing=self.delete_server_file_after_processing,
|
539
|
+
)
|
540
|
+
elif not self.ignore_unspecified_files:
|
541
|
+
msg = f"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property."
|
542
|
+
self.log(msg)
|
543
|
+
if not self.silent_errors:
|
544
|
+
raise ValueError(msg)
|
545
|
+
else:
|
546
|
+
msg = f"Ignoring Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property:\n{obj}"
|
547
|
+
self.log(msg)
|
548
|
+
|
549
|
+
return resolved_files
|
550
|
+
|
551
|
+
def _unpack_and_collect_files(self, files: list[BaseFile]) -> list[BaseFile]:
|
552
|
+
"""Recursively unpack bundles and collect files into BaseFile instances.
|
553
|
+
|
554
|
+
Args:
|
555
|
+
files (list[BaseFile]): List of BaseFile instances to process.
|
556
|
+
|
557
|
+
Returns:
|
558
|
+
list[BaseFile]: Updated list of BaseFile instances.
|
559
|
+
"""
|
560
|
+
collected_files = []
|
561
|
+
|
562
|
+
for file in files:
|
563
|
+
path = file.path
|
564
|
+
delete_after_processing = file.delete_after_processing
|
565
|
+
data = file.data
|
566
|
+
|
567
|
+
if path.is_dir():
|
568
|
+
# Recurse into directories
|
569
|
+
collected_files.extend(
|
570
|
+
[
|
571
|
+
BaseFileComponent.BaseFile(
|
572
|
+
data,
|
573
|
+
sub_path,
|
574
|
+
delete_after_processing=delete_after_processing,
|
575
|
+
)
|
576
|
+
for sub_path in path.rglob("*")
|
577
|
+
if sub_path.is_file()
|
578
|
+
]
|
579
|
+
)
|
580
|
+
elif path.suffix[1:] in self.SUPPORTED_BUNDLE_EXTENSIONS:
|
581
|
+
# Unpack supported bundles
|
582
|
+
temp_dir = TemporaryDirectory()
|
583
|
+
self._temp_dirs.append(temp_dir)
|
584
|
+
temp_dir_path = Path(temp_dir.name)
|
585
|
+
self._unpack_bundle(path, temp_dir_path)
|
586
|
+
subpaths = list(temp_dir_path.iterdir())
|
587
|
+
self.log(f"Unpacked bundle {path.name} into {subpaths}")
|
588
|
+
collected_files.extend(
|
589
|
+
[
|
590
|
+
BaseFileComponent.BaseFile(
|
591
|
+
data,
|
592
|
+
sub_path,
|
593
|
+
delete_after_processing=delete_after_processing,
|
594
|
+
)
|
595
|
+
for sub_path in subpaths
|
596
|
+
]
|
597
|
+
)
|
598
|
+
else:
|
599
|
+
collected_files.append(file)
|
600
|
+
|
601
|
+
# Recurse again if any directories or bundles are left in the list
|
602
|
+
if any(
|
603
|
+
file.path.is_dir() or file.path.suffix[1:] in self.SUPPORTED_BUNDLE_EXTENSIONS for file in collected_files
|
604
|
+
):
|
605
|
+
return self._unpack_and_collect_files(collected_files)
|
606
|
+
|
607
|
+
return collected_files
|
608
|
+
|
609
|
+
def _unpack_bundle(self, bundle_path: Path, output_dir: Path):
|
610
|
+
"""Unpack a bundle into a temporary directory.
|
611
|
+
|
612
|
+
Args:
|
613
|
+
bundle_path (Path): Path to the bundle.
|
614
|
+
output_dir (Path): Directory where files will be extracted.
|
615
|
+
|
616
|
+
Raises:
|
617
|
+
ValueError: If the bundle format is unsupported or cannot be read.
|
618
|
+
"""
|
619
|
+
|
620
|
+
def _safe_extract_zip(bundle: ZipFile, output_dir: Path):
|
621
|
+
"""Safely extract ZIP files."""
|
622
|
+
for member in bundle.namelist():
|
623
|
+
member_path = output_dir / member
|
624
|
+
# Ensure no path traversal outside `output_dir`
|
625
|
+
if not member_path.resolve().is_relative_to(output_dir.resolve()):
|
626
|
+
msg = f"Attempted Path Traversal in ZIP File: {member}"
|
627
|
+
raise ValueError(msg)
|
628
|
+
bundle.extract(member, path=output_dir)
|
629
|
+
|
630
|
+
def _safe_extract_tar(bundle: tarfile.TarFile, output_dir: Path):
|
631
|
+
"""Safely extract TAR files."""
|
632
|
+
for member in bundle.getmembers():
|
633
|
+
member_path = output_dir / member.name
|
634
|
+
# Ensure no path traversal outside `output_dir`
|
635
|
+
if not member_path.resolve().is_relative_to(output_dir.resolve()):
|
636
|
+
msg = f"Attempted Path Traversal in TAR File: {member.name}"
|
637
|
+
raise ValueError(msg)
|
638
|
+
bundle.extract(member, path=output_dir)
|
639
|
+
|
640
|
+
# Check and extract based on file type
|
641
|
+
if is_zipfile(bundle_path):
|
642
|
+
with ZipFile(bundle_path, "r") as zip_bundle:
|
643
|
+
_safe_extract_zip(zip_bundle, output_dir)
|
644
|
+
elif tarfile.is_tarfile(bundle_path):
|
645
|
+
with tarfile.open(bundle_path, "r:*") as tar_bundle:
|
646
|
+
_safe_extract_tar(tar_bundle, output_dir)
|
647
|
+
else:
|
648
|
+
msg = f"Unsupported bundle format: {bundle_path.suffix}"
|
649
|
+
raise ValueError(msg)
|
650
|
+
|
651
|
+
def _filter_and_mark_files(self, files: list[BaseFile]) -> list[BaseFile]:
|
652
|
+
"""Validate file types and mark files for removal.
|
653
|
+
|
654
|
+
Args:
|
655
|
+
files (list[BaseFile]): List of BaseFile instances.
|
656
|
+
|
657
|
+
Returns:
|
658
|
+
list[BaseFile]: Validated BaseFile instances.
|
659
|
+
|
660
|
+
Raises:
|
661
|
+
ValueError: If unsupported files are encountered and `ignore_unsupported_extensions` is False.
|
662
|
+
"""
|
663
|
+
final_files = []
|
664
|
+
ignored_files = []
|
665
|
+
|
666
|
+
for file in files:
|
667
|
+
if not file.path.is_file():
|
668
|
+
self.log(f"Not a file: {file.path.name}")
|
669
|
+
continue
|
670
|
+
|
671
|
+
if file.path.suffix[1:].lower() not in self.valid_extensions:
|
672
|
+
if self.ignore_unsupported_extensions:
|
673
|
+
ignored_files.append(file.path.name)
|
674
|
+
continue
|
675
|
+
msg = f"Unsupported file extension: {file.path.suffix}"
|
676
|
+
self.log(msg)
|
677
|
+
if not self.silent_errors:
|
678
|
+
raise ValueError(msg)
|
679
|
+
|
680
|
+
final_files.append(file)
|
681
|
+
|
682
|
+
if ignored_files:
|
683
|
+
self.log(f"Ignored files: {ignored_files}")
|
684
|
+
|
685
|
+
return final_files
|