lfx-nightly 0.1.11.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lfx/__init__.py +0 -0
- lfx/__main__.py +25 -0
- lfx/base/__init__.py +0 -0
- lfx/base/agents/__init__.py +0 -0
- lfx/base/agents/agent.py +268 -0
- lfx/base/agents/callback.py +130 -0
- lfx/base/agents/context.py +109 -0
- lfx/base/agents/crewai/__init__.py +0 -0
- lfx/base/agents/crewai/crew.py +231 -0
- lfx/base/agents/crewai/tasks.py +12 -0
- lfx/base/agents/default_prompts.py +23 -0
- lfx/base/agents/errors.py +15 -0
- lfx/base/agents/events.py +346 -0
- lfx/base/agents/utils.py +205 -0
- lfx/base/astra_assistants/__init__.py +0 -0
- lfx/base/astra_assistants/util.py +171 -0
- lfx/base/chains/__init__.py +0 -0
- lfx/base/chains/model.py +19 -0
- lfx/base/composio/__init__.py +0 -0
- lfx/base/composio/composio_base.py +1291 -0
- lfx/base/compressors/__init__.py +0 -0
- lfx/base/compressors/model.py +60 -0
- lfx/base/constants.py +46 -0
- lfx/base/curl/__init__.py +0 -0
- lfx/base/curl/parse.py +188 -0
- lfx/base/data/__init__.py +5 -0
- lfx/base/data/base_file.py +685 -0
- lfx/base/data/docling_utils.py +245 -0
- lfx/base/data/utils.py +198 -0
- lfx/base/document_transformers/__init__.py +0 -0
- lfx/base/document_transformers/model.py +43 -0
- lfx/base/embeddings/__init__.py +0 -0
- lfx/base/embeddings/aiml_embeddings.py +62 -0
- lfx/base/embeddings/model.py +26 -0
- lfx/base/flow_processing/__init__.py +0 -0
- lfx/base/flow_processing/utils.py +86 -0
- lfx/base/huggingface/__init__.py +0 -0
- lfx/base/huggingface/model_bridge.py +133 -0
- lfx/base/io/__init__.py +0 -0
- lfx/base/io/chat.py +20 -0
- lfx/base/io/text.py +22 -0
- lfx/base/langchain_utilities/__init__.py +0 -0
- lfx/base/langchain_utilities/model.py +35 -0
- lfx/base/langchain_utilities/spider_constants.py +1 -0
- lfx/base/langwatch/__init__.py +0 -0
- lfx/base/langwatch/utils.py +18 -0
- lfx/base/mcp/__init__.py +0 -0
- lfx/base/mcp/constants.py +2 -0
- lfx/base/mcp/util.py +1398 -0
- lfx/base/memory/__init__.py +0 -0
- lfx/base/memory/memory.py +49 -0
- lfx/base/memory/model.py +38 -0
- lfx/base/models/__init__.py +3 -0
- lfx/base/models/aiml_constants.py +51 -0
- lfx/base/models/anthropic_constants.py +47 -0
- lfx/base/models/aws_constants.py +151 -0
- lfx/base/models/chat_result.py +76 -0
- lfx/base/models/google_generative_ai_constants.py +70 -0
- lfx/base/models/groq_constants.py +134 -0
- lfx/base/models/model.py +375 -0
- lfx/base/models/model_input_constants.py +307 -0
- lfx/base/models/model_metadata.py +41 -0
- lfx/base/models/model_utils.py +8 -0
- lfx/base/models/novita_constants.py +35 -0
- lfx/base/models/ollama_constants.py +49 -0
- lfx/base/models/openai_constants.py +122 -0
- lfx/base/models/sambanova_constants.py +18 -0
- lfx/base/processing/__init__.py +0 -0
- lfx/base/prompts/__init__.py +0 -0
- lfx/base/prompts/api_utils.py +224 -0
- lfx/base/prompts/utils.py +61 -0
- lfx/base/textsplitters/__init__.py +0 -0
- lfx/base/textsplitters/model.py +28 -0
- lfx/base/tools/__init__.py +0 -0
- lfx/base/tools/base.py +26 -0
- lfx/base/tools/component_tool.py +325 -0
- lfx/base/tools/constants.py +49 -0
- lfx/base/tools/flow_tool.py +132 -0
- lfx/base/tools/run_flow.py +224 -0
- lfx/base/vectorstores/__init__.py +0 -0
- lfx/base/vectorstores/model.py +193 -0
- lfx/base/vectorstores/utils.py +22 -0
- lfx/base/vectorstores/vector_store_connection_decorator.py +52 -0
- lfx/cli/__init__.py +5 -0
- lfx/cli/commands.py +319 -0
- lfx/cli/common.py +650 -0
- lfx/cli/run.py +441 -0
- lfx/cli/script_loader.py +247 -0
- lfx/cli/serve_app.py +546 -0
- lfx/cli/validation.py +69 -0
- lfx/components/FAISS/__init__.py +34 -0
- lfx/components/FAISS/faiss.py +111 -0
- lfx/components/Notion/__init__.py +19 -0
- lfx/components/Notion/add_content_to_page.py +269 -0
- lfx/components/Notion/create_page.py +94 -0
- lfx/components/Notion/list_database_properties.py +68 -0
- lfx/components/Notion/list_pages.py +122 -0
- lfx/components/Notion/list_users.py +77 -0
- lfx/components/Notion/page_content_viewer.py +93 -0
- lfx/components/Notion/search.py +111 -0
- lfx/components/Notion/update_page_property.py +114 -0
- lfx/components/__init__.py +411 -0
- lfx/components/_importing.py +42 -0
- lfx/components/agentql/__init__.py +3 -0
- lfx/components/agentql/agentql_api.py +151 -0
- lfx/components/agents/__init__.py +34 -0
- lfx/components/agents/agent.py +558 -0
- lfx/components/agents/mcp_component.py +501 -0
- lfx/components/aiml/__init__.py +37 -0
- lfx/components/aiml/aiml.py +112 -0
- lfx/components/aiml/aiml_embeddings.py +37 -0
- lfx/components/amazon/__init__.py +36 -0
- lfx/components/amazon/amazon_bedrock_embedding.py +109 -0
- lfx/components/amazon/amazon_bedrock_model.py +124 -0
- lfx/components/amazon/s3_bucket_uploader.py +211 -0
- lfx/components/anthropic/__init__.py +34 -0
- lfx/components/anthropic/anthropic.py +187 -0
- lfx/components/apify/__init__.py +5 -0
- lfx/components/apify/apify_actor.py +325 -0
- lfx/components/arxiv/__init__.py +3 -0
- lfx/components/arxiv/arxiv.py +163 -0
- lfx/components/assemblyai/__init__.py +46 -0
- lfx/components/assemblyai/assemblyai_get_subtitles.py +83 -0
- lfx/components/assemblyai/assemblyai_lemur.py +183 -0
- lfx/components/assemblyai/assemblyai_list_transcripts.py +95 -0
- lfx/components/assemblyai/assemblyai_poll_transcript.py +72 -0
- lfx/components/assemblyai/assemblyai_start_transcript.py +188 -0
- lfx/components/azure/__init__.py +37 -0
- lfx/components/azure/azure_openai.py +95 -0
- lfx/components/azure/azure_openai_embeddings.py +83 -0
- lfx/components/baidu/__init__.py +32 -0
- lfx/components/baidu/baidu_qianfan_chat.py +113 -0
- lfx/components/bing/__init__.py +3 -0
- lfx/components/bing/bing_search_api.py +61 -0
- lfx/components/cassandra/__init__.py +40 -0
- lfx/components/cassandra/cassandra.py +264 -0
- lfx/components/cassandra/cassandra_chat.py +92 -0
- lfx/components/cassandra/cassandra_graph.py +238 -0
- lfx/components/chains/__init__.py +3 -0
- lfx/components/chroma/__init__.py +34 -0
- lfx/components/chroma/chroma.py +167 -0
- lfx/components/cleanlab/__init__.py +40 -0
- lfx/components/cleanlab/cleanlab_evaluator.py +155 -0
- lfx/components/cleanlab/cleanlab_rag_evaluator.py +254 -0
- lfx/components/cleanlab/cleanlab_remediator.py +131 -0
- lfx/components/clickhouse/__init__.py +34 -0
- lfx/components/clickhouse/clickhouse.py +135 -0
- lfx/components/cloudflare/__init__.py +32 -0
- lfx/components/cloudflare/cloudflare.py +81 -0
- lfx/components/cohere/__init__.py +40 -0
- lfx/components/cohere/cohere_embeddings.py +81 -0
- lfx/components/cohere/cohere_models.py +46 -0
- lfx/components/cohere/cohere_rerank.py +51 -0
- lfx/components/composio/__init__.py +74 -0
- lfx/components/composio/composio_api.py +268 -0
- lfx/components/composio/dropbox_compnent.py +11 -0
- lfx/components/composio/github_composio.py +11 -0
- lfx/components/composio/gmail_composio.py +38 -0
- lfx/components/composio/googlecalendar_composio.py +11 -0
- lfx/components/composio/googlemeet_composio.py +11 -0
- lfx/components/composio/googletasks_composio.py +8 -0
- lfx/components/composio/linear_composio.py +11 -0
- lfx/components/composio/outlook_composio.py +11 -0
- lfx/components/composio/reddit_composio.py +11 -0
- lfx/components/composio/slack_composio.py +582 -0
- lfx/components/composio/slackbot_composio.py +11 -0
- lfx/components/composio/supabase_composio.py +11 -0
- lfx/components/composio/todoist_composio.py +11 -0
- lfx/components/composio/youtube_composio.py +11 -0
- lfx/components/confluence/__init__.py +3 -0
- lfx/components/confluence/confluence.py +84 -0
- lfx/components/couchbase/__init__.py +34 -0
- lfx/components/couchbase/couchbase.py +102 -0
- lfx/components/crewai/__init__.py +49 -0
- lfx/components/crewai/crewai.py +107 -0
- lfx/components/crewai/hierarchical_crew.py +46 -0
- lfx/components/crewai/hierarchical_task.py +44 -0
- lfx/components/crewai/sequential_crew.py +52 -0
- lfx/components/crewai/sequential_task.py +73 -0
- lfx/components/crewai/sequential_task_agent.py +143 -0
- lfx/components/custom_component/__init__.py +34 -0
- lfx/components/custom_component/custom_component.py +31 -0
- lfx/components/data/__init__.py +64 -0
- lfx/components/data/api_request.py +544 -0
- lfx/components/data/csv_to_data.py +95 -0
- lfx/components/data/directory.py +113 -0
- lfx/components/data/file.py +577 -0
- lfx/components/data/json_to_data.py +98 -0
- lfx/components/data/news_search.py +164 -0
- lfx/components/data/rss.py +69 -0
- lfx/components/data/sql_executor.py +101 -0
- lfx/components/data/url.py +311 -0
- lfx/components/data/web_search.py +112 -0
- lfx/components/data/webhook.py +56 -0
- lfx/components/datastax/__init__.py +70 -0
- lfx/components/datastax/astra_assistant_manager.py +306 -0
- lfx/components/datastax/astra_db.py +75 -0
- lfx/components/datastax/astra_vectorize.py +124 -0
- lfx/components/datastax/astradb.py +1285 -0
- lfx/components/datastax/astradb_cql.py +314 -0
- lfx/components/datastax/astradb_graph.py +330 -0
- lfx/components/datastax/astradb_tool.py +414 -0
- lfx/components/datastax/astradb_vectorstore.py +1285 -0
- lfx/components/datastax/cassandra.py +92 -0
- lfx/components/datastax/create_assistant.py +58 -0
- lfx/components/datastax/create_thread.py +32 -0
- lfx/components/datastax/dotenv.py +35 -0
- lfx/components/datastax/get_assistant.py +37 -0
- lfx/components/datastax/getenvvar.py +30 -0
- lfx/components/datastax/graph_rag.py +141 -0
- lfx/components/datastax/hcd.py +314 -0
- lfx/components/datastax/list_assistants.py +25 -0
- lfx/components/datastax/run.py +89 -0
- lfx/components/deactivated/__init__.py +15 -0
- lfx/components/deactivated/amazon_kendra.py +66 -0
- lfx/components/deactivated/chat_litellm_model.py +158 -0
- lfx/components/deactivated/code_block_extractor.py +26 -0
- lfx/components/deactivated/documents_to_data.py +22 -0
- lfx/components/deactivated/embed.py +16 -0
- lfx/components/deactivated/extract_key_from_data.py +46 -0
- lfx/components/deactivated/json_document_builder.py +57 -0
- lfx/components/deactivated/list_flows.py +20 -0
- lfx/components/deactivated/mcp_sse.py +61 -0
- lfx/components/deactivated/mcp_stdio.py +62 -0
- lfx/components/deactivated/merge_data.py +93 -0
- lfx/components/deactivated/message.py +37 -0
- lfx/components/deactivated/metal.py +54 -0
- lfx/components/deactivated/multi_query.py +59 -0
- lfx/components/deactivated/retriever.py +43 -0
- lfx/components/deactivated/selective_passthrough.py +77 -0
- lfx/components/deactivated/should_run_next.py +40 -0
- lfx/components/deactivated/split_text.py +63 -0
- lfx/components/deactivated/store_message.py +24 -0
- lfx/components/deactivated/sub_flow.py +124 -0
- lfx/components/deactivated/vectara_self_query.py +76 -0
- lfx/components/deactivated/vector_store.py +24 -0
- lfx/components/deepseek/__init__.py +34 -0
- lfx/components/deepseek/deepseek.py +136 -0
- lfx/components/docling/__init__.py +43 -0
- lfx/components/docling/chunk_docling_document.py +186 -0
- lfx/components/docling/docling_inline.py +231 -0
- lfx/components/docling/docling_remote.py +193 -0
- lfx/components/docling/export_docling_document.py +117 -0
- lfx/components/documentloaders/__init__.py +3 -0
- lfx/components/duckduckgo/__init__.py +3 -0
- lfx/components/duckduckgo/duck_duck_go_search_run.py +92 -0
- lfx/components/elastic/__init__.py +37 -0
- lfx/components/elastic/elasticsearch.py +267 -0
- lfx/components/elastic/opensearch.py +243 -0
- lfx/components/embeddings/__init__.py +37 -0
- lfx/components/embeddings/similarity.py +76 -0
- lfx/components/embeddings/text_embedder.py +64 -0
- lfx/components/exa/__init__.py +3 -0
- lfx/components/exa/exa_search.py +68 -0
- lfx/components/firecrawl/__init__.py +43 -0
- lfx/components/firecrawl/firecrawl_crawl_api.py +88 -0
- lfx/components/firecrawl/firecrawl_extract_api.py +136 -0
- lfx/components/firecrawl/firecrawl_map_api.py +89 -0
- lfx/components/firecrawl/firecrawl_scrape_api.py +73 -0
- lfx/components/git/__init__.py +4 -0
- lfx/components/git/git.py +262 -0
- lfx/components/git/gitextractor.py +196 -0
- lfx/components/glean/__init__.py +3 -0
- lfx/components/glean/glean_search_api.py +173 -0
- lfx/components/google/__init__.py +17 -0
- lfx/components/google/gmail.py +192 -0
- lfx/components/google/google_bq_sql_executor.py +157 -0
- lfx/components/google/google_drive.py +92 -0
- lfx/components/google/google_drive_search.py +152 -0
- lfx/components/google/google_generative_ai.py +147 -0
- lfx/components/google/google_generative_ai_embeddings.py +141 -0
- lfx/components/google/google_oauth_token.py +89 -0
- lfx/components/google/google_search_api_core.py +68 -0
- lfx/components/google/google_serper_api_core.py +74 -0
- lfx/components/groq/__init__.py +34 -0
- lfx/components/groq/groq.py +136 -0
- lfx/components/helpers/__init__.py +52 -0
- lfx/components/helpers/calculator_core.py +89 -0
- lfx/components/helpers/create_list.py +40 -0
- lfx/components/helpers/current_date.py +42 -0
- lfx/components/helpers/id_generator.py +42 -0
- lfx/components/helpers/memory.py +251 -0
- lfx/components/helpers/output_parser.py +45 -0
- lfx/components/helpers/store_message.py +90 -0
- lfx/components/homeassistant/__init__.py +7 -0
- lfx/components/homeassistant/home_assistant_control.py +152 -0
- lfx/components/homeassistant/list_home_assistant_states.py +137 -0
- lfx/components/huggingface/__init__.py +37 -0
- lfx/components/huggingface/huggingface.py +197 -0
- lfx/components/huggingface/huggingface_inference_api.py +106 -0
- lfx/components/ibm/__init__.py +34 -0
- lfx/components/ibm/watsonx.py +203 -0
- lfx/components/ibm/watsonx_embeddings.py +135 -0
- lfx/components/icosacomputing/__init__.py +5 -0
- lfx/components/icosacomputing/combinatorial_reasoner.py +84 -0
- lfx/components/input_output/__init__.py +38 -0
- lfx/components/input_output/chat.py +120 -0
- lfx/components/input_output/chat_output.py +200 -0
- lfx/components/input_output/text.py +27 -0
- lfx/components/input_output/text_output.py +29 -0
- lfx/components/jigsawstack/__init__.py +23 -0
- lfx/components/jigsawstack/ai_scrape.py +126 -0
- lfx/components/jigsawstack/ai_web_search.py +136 -0
- lfx/components/jigsawstack/file_read.py +115 -0
- lfx/components/jigsawstack/file_upload.py +94 -0
- lfx/components/jigsawstack/image_generation.py +205 -0
- lfx/components/jigsawstack/nsfw.py +60 -0
- lfx/components/jigsawstack/object_detection.py +124 -0
- lfx/components/jigsawstack/sentiment.py +112 -0
- lfx/components/jigsawstack/text_to_sql.py +90 -0
- lfx/components/jigsawstack/text_translate.py +77 -0
- lfx/components/jigsawstack/vocr.py +107 -0
- lfx/components/langchain_utilities/__init__.py +109 -0
- lfx/components/langchain_utilities/character.py +53 -0
- lfx/components/langchain_utilities/conversation.py +59 -0
- lfx/components/langchain_utilities/csv_agent.py +107 -0
- lfx/components/langchain_utilities/fake_embeddings.py +26 -0
- lfx/components/langchain_utilities/html_link_extractor.py +35 -0
- lfx/components/langchain_utilities/json_agent.py +45 -0
- lfx/components/langchain_utilities/langchain_hub.py +126 -0
- lfx/components/langchain_utilities/language_recursive.py +49 -0
- lfx/components/langchain_utilities/language_semantic.py +138 -0
- lfx/components/langchain_utilities/llm_checker.py +39 -0
- lfx/components/langchain_utilities/llm_math.py +42 -0
- lfx/components/langchain_utilities/natural_language.py +61 -0
- lfx/components/langchain_utilities/openai_tools.py +53 -0
- lfx/components/langchain_utilities/openapi.py +48 -0
- lfx/components/langchain_utilities/recursive_character.py +60 -0
- lfx/components/langchain_utilities/retrieval_qa.py +83 -0
- lfx/components/langchain_utilities/runnable_executor.py +137 -0
- lfx/components/langchain_utilities/self_query.py +80 -0
- lfx/components/langchain_utilities/spider.py +142 -0
- lfx/components/langchain_utilities/sql.py +40 -0
- lfx/components/langchain_utilities/sql_database.py +35 -0
- lfx/components/langchain_utilities/sql_generator.py +78 -0
- lfx/components/langchain_utilities/tool_calling.py +59 -0
- lfx/components/langchain_utilities/vector_store_info.py +49 -0
- lfx/components/langchain_utilities/vector_store_router.py +33 -0
- lfx/components/langchain_utilities/xml_agent.py +71 -0
- lfx/components/langwatch/__init__.py +3 -0
- lfx/components/langwatch/langwatch.py +278 -0
- lfx/components/link_extractors/__init__.py +3 -0
- lfx/components/lmstudio/__init__.py +34 -0
- lfx/components/lmstudio/lmstudioembeddings.py +89 -0
- lfx/components/lmstudio/lmstudiomodel.py +129 -0
- lfx/components/logic/__init__.py +52 -0
- lfx/components/logic/conditional_router.py +171 -0
- lfx/components/logic/data_conditional_router.py +125 -0
- lfx/components/logic/flow_tool.py +110 -0
- lfx/components/logic/listen.py +29 -0
- lfx/components/logic/loop.py +125 -0
- lfx/components/logic/notify.py +88 -0
- lfx/components/logic/pass_message.py +35 -0
- lfx/components/logic/run_flow.py +71 -0
- lfx/components/logic/sub_flow.py +114 -0
- lfx/components/maritalk/__init__.py +32 -0
- lfx/components/maritalk/maritalk.py +52 -0
- lfx/components/mem0/__init__.py +3 -0
- lfx/components/mem0/mem0_chat_memory.py +136 -0
- lfx/components/milvus/__init__.py +34 -0
- lfx/components/milvus/milvus.py +115 -0
- lfx/components/mistral/__init__.py +37 -0
- lfx/components/mistral/mistral.py +114 -0
- lfx/components/mistral/mistral_embeddings.py +58 -0
- lfx/components/models/__init__.py +34 -0
- lfx/components/models/embedding_model.py +114 -0
- lfx/components/models/language_model.py +144 -0
- lfx/components/mongodb/__init__.py +34 -0
- lfx/components/mongodb/mongodb_atlas.py +213 -0
- lfx/components/needle/__init__.py +3 -0
- lfx/components/needle/needle.py +104 -0
- lfx/components/notdiamond/__init__.py +34 -0
- lfx/components/notdiamond/notdiamond.py +228 -0
- lfx/components/novita/__init__.py +32 -0
- lfx/components/novita/novita.py +130 -0
- lfx/components/nvidia/__init__.py +57 -0
- lfx/components/nvidia/nvidia.py +157 -0
- lfx/components/nvidia/nvidia_embedding.py +77 -0
- lfx/components/nvidia/nvidia_ingest.py +317 -0
- lfx/components/nvidia/nvidia_rerank.py +63 -0
- lfx/components/nvidia/system_assist.py +65 -0
- lfx/components/olivya/__init__.py +3 -0
- lfx/components/olivya/olivya.py +116 -0
- lfx/components/ollama/__init__.py +37 -0
- lfx/components/ollama/ollama.py +330 -0
- lfx/components/ollama/ollama_embeddings.py +106 -0
- lfx/components/openai/__init__.py +37 -0
- lfx/components/openai/openai.py +100 -0
- lfx/components/openai/openai_chat_model.py +176 -0
- lfx/components/openrouter/__init__.py +32 -0
- lfx/components/openrouter/openrouter.py +202 -0
- lfx/components/output_parsers/__init__.py +3 -0
- lfx/components/perplexity/__init__.py +34 -0
- lfx/components/perplexity/perplexity.py +75 -0
- lfx/components/pgvector/__init__.py +34 -0
- lfx/components/pgvector/pgvector.py +72 -0
- lfx/components/pinecone/__init__.py +34 -0
- lfx/components/pinecone/pinecone.py +134 -0
- lfx/components/processing/__init__.py +117 -0
- lfx/components/processing/alter_metadata.py +108 -0
- lfx/components/processing/batch_run.py +205 -0
- lfx/components/processing/combine_text.py +39 -0
- lfx/components/processing/converter.py +159 -0
- lfx/components/processing/create_data.py +110 -0
- lfx/components/processing/data_operations.py +438 -0
- lfx/components/processing/data_to_dataframe.py +70 -0
- lfx/components/processing/dataframe_operations.py +313 -0
- lfx/components/processing/extract_key.py +53 -0
- lfx/components/processing/filter_data.py +42 -0
- lfx/components/processing/filter_data_values.py +88 -0
- lfx/components/processing/json_cleaner.py +103 -0
- lfx/components/processing/lambda_filter.py +154 -0
- lfx/components/processing/llm_router.py +499 -0
- lfx/components/processing/merge_data.py +90 -0
- lfx/components/processing/message_to_data.py +36 -0
- lfx/components/processing/parse_data.py +70 -0
- lfx/components/processing/parse_dataframe.py +68 -0
- lfx/components/processing/parse_json_data.py +90 -0
- lfx/components/processing/parser.py +143 -0
- lfx/components/processing/prompt.py +67 -0
- lfx/components/processing/python_repl_core.py +98 -0
- lfx/components/processing/regex.py +82 -0
- lfx/components/processing/save_file.py +225 -0
- lfx/components/processing/select_data.py +48 -0
- lfx/components/processing/split_text.py +141 -0
- lfx/components/processing/structured_output.py +202 -0
- lfx/components/processing/update_data.py +160 -0
- lfx/components/prototypes/__init__.py +34 -0
- lfx/components/prototypes/python_function.py +73 -0
- lfx/components/qdrant/__init__.py +34 -0
- lfx/components/qdrant/qdrant.py +109 -0
- lfx/components/redis/__init__.py +37 -0
- lfx/components/redis/redis.py +89 -0
- lfx/components/redis/redis_chat.py +43 -0
- lfx/components/sambanova/__init__.py +32 -0
- lfx/components/sambanova/sambanova.py +84 -0
- lfx/components/scrapegraph/__init__.py +40 -0
- lfx/components/scrapegraph/scrapegraph_markdownify_api.py +64 -0
- lfx/components/scrapegraph/scrapegraph_search_api.py +64 -0
- lfx/components/scrapegraph/scrapegraph_smart_scraper_api.py +71 -0
- lfx/components/searchapi/__init__.py +34 -0
- lfx/components/searchapi/search.py +79 -0
- lfx/components/serpapi/__init__.py +3 -0
- lfx/components/serpapi/serp.py +115 -0
- lfx/components/supabase/__init__.py +34 -0
- lfx/components/supabase/supabase.py +76 -0
- lfx/components/tavily/__init__.py +4 -0
- lfx/components/tavily/tavily_extract.py +117 -0
- lfx/components/tavily/tavily_search.py +212 -0
- lfx/components/textsplitters/__init__.py +3 -0
- lfx/components/toolkits/__init__.py +3 -0
- lfx/components/tools/__init__.py +72 -0
- lfx/components/tools/calculator.py +108 -0
- lfx/components/tools/google_search_api.py +45 -0
- lfx/components/tools/google_serper_api.py +115 -0
- lfx/components/tools/python_code_structured_tool.py +327 -0
- lfx/components/tools/python_repl.py +97 -0
- lfx/components/tools/search_api.py +87 -0
- lfx/components/tools/searxng.py +145 -0
- lfx/components/tools/serp_api.py +119 -0
- lfx/components/tools/tavily_search_tool.py +344 -0
- lfx/components/tools/wikidata_api.py +102 -0
- lfx/components/tools/wikipedia_api.py +49 -0
- lfx/components/tools/yahoo_finance.py +129 -0
- lfx/components/twelvelabs/__init__.py +52 -0
- lfx/components/twelvelabs/convert_astra_results.py +84 -0
- lfx/components/twelvelabs/pegasus_index.py +311 -0
- lfx/components/twelvelabs/split_video.py +291 -0
- lfx/components/twelvelabs/text_embeddings.py +57 -0
- lfx/components/twelvelabs/twelvelabs_pegasus.py +408 -0
- lfx/components/twelvelabs/video_embeddings.py +100 -0
- lfx/components/twelvelabs/video_file.py +179 -0
- lfx/components/unstructured/__init__.py +3 -0
- lfx/components/unstructured/unstructured.py +121 -0
- lfx/components/upstash/__init__.py +34 -0
- lfx/components/upstash/upstash.py +124 -0
- lfx/components/vectara/__init__.py +37 -0
- lfx/components/vectara/vectara.py +97 -0
- lfx/components/vectara/vectara_rag.py +164 -0
- lfx/components/vectorstores/__init__.py +40 -0
- lfx/components/vectorstores/astradb.py +1285 -0
- lfx/components/vectorstores/astradb_graph.py +319 -0
- lfx/components/vectorstores/cassandra.py +264 -0
- lfx/components/vectorstores/cassandra_graph.py +238 -0
- lfx/components/vectorstores/chroma.py +167 -0
- lfx/components/vectorstores/clickhouse.py +135 -0
- lfx/components/vectorstores/couchbase.py +102 -0
- lfx/components/vectorstores/elasticsearch.py +267 -0
- lfx/components/vectorstores/faiss.py +111 -0
- lfx/components/vectorstores/graph_rag.py +141 -0
- lfx/components/vectorstores/hcd.py +314 -0
- lfx/components/vectorstores/local_db.py +261 -0
- lfx/components/vectorstores/milvus.py +115 -0
- lfx/components/vectorstores/mongodb_atlas.py +213 -0
- lfx/components/vectorstores/opensearch.py +243 -0
- lfx/components/vectorstores/pgvector.py +72 -0
- lfx/components/vectorstores/pinecone.py +134 -0
- lfx/components/vectorstores/qdrant.py +109 -0
- lfx/components/vectorstores/supabase.py +76 -0
- lfx/components/vectorstores/upstash.py +124 -0
- lfx/components/vectorstores/vectara.py +97 -0
- lfx/components/vectorstores/vectara_rag.py +164 -0
- lfx/components/vectorstores/weaviate.py +89 -0
- lfx/components/vertexai/__init__.py +37 -0
- lfx/components/vertexai/vertexai.py +71 -0
- lfx/components/vertexai/vertexai_embeddings.py +67 -0
- lfx/components/weaviate/__init__.py +34 -0
- lfx/components/weaviate/weaviate.py +89 -0
- lfx/components/wikipedia/__init__.py +4 -0
- lfx/components/wikipedia/wikidata.py +86 -0
- lfx/components/wikipedia/wikipedia.py +53 -0
- lfx/components/wolframalpha/__init__.py +3 -0
- lfx/components/wolframalpha/wolfram_alpha_api.py +54 -0
- lfx/components/xai/__init__.py +32 -0
- lfx/components/xai/xai.py +167 -0
- lfx/components/yahoosearch/__init__.py +3 -0
- lfx/components/yahoosearch/yahoo.py +137 -0
- lfx/components/youtube/__init__.py +52 -0
- lfx/components/youtube/channel.py +227 -0
- lfx/components/youtube/comments.py +231 -0
- lfx/components/youtube/playlist.py +33 -0
- lfx/components/youtube/search.py +120 -0
- lfx/components/youtube/trending.py +285 -0
- lfx/components/youtube/video_details.py +263 -0
- lfx/components/youtube/youtube_transcripts.py +118 -0
- lfx/components/zep/__init__.py +3 -0
- lfx/components/zep/zep.py +44 -0
- lfx/constants.py +6 -0
- lfx/custom/__init__.py +7 -0
- lfx/custom/attributes.py +86 -0
- lfx/custom/code_parser/__init__.py +3 -0
- lfx/custom/code_parser/code_parser.py +361 -0
- lfx/custom/custom_component/__init__.py +0 -0
- lfx/custom/custom_component/base_component.py +128 -0
- lfx/custom/custom_component/component.py +1808 -0
- lfx/custom/custom_component/component_with_cache.py +8 -0
- lfx/custom/custom_component/custom_component.py +588 -0
- lfx/custom/dependency_analyzer.py +165 -0
- lfx/custom/directory_reader/__init__.py +3 -0
- lfx/custom/directory_reader/directory_reader.py +359 -0
- lfx/custom/directory_reader/utils.py +171 -0
- lfx/custom/eval.py +12 -0
- lfx/custom/schema.py +32 -0
- lfx/custom/tree_visitor.py +21 -0
- lfx/custom/utils.py +877 -0
- lfx/custom/validate.py +488 -0
- lfx/events/__init__.py +1 -0
- lfx/events/event_manager.py +110 -0
- lfx/exceptions/__init__.py +0 -0
- lfx/exceptions/component.py +15 -0
- lfx/field_typing/__init__.py +91 -0
- lfx/field_typing/constants.py +215 -0
- lfx/field_typing/range_spec.py +35 -0
- lfx/graph/__init__.py +6 -0
- lfx/graph/edge/__init__.py +0 -0
- lfx/graph/edge/base.py +277 -0
- lfx/graph/edge/schema.py +119 -0
- lfx/graph/edge/utils.py +0 -0
- lfx/graph/graph/__init__.py +0 -0
- lfx/graph/graph/ascii.py +202 -0
- lfx/graph/graph/base.py +2238 -0
- lfx/graph/graph/constants.py +63 -0
- lfx/graph/graph/runnable_vertices_manager.py +133 -0
- lfx/graph/graph/schema.py +52 -0
- lfx/graph/graph/state_model.py +66 -0
- lfx/graph/graph/utils.py +1024 -0
- lfx/graph/schema.py +75 -0
- lfx/graph/state/__init__.py +0 -0
- lfx/graph/state/model.py +237 -0
- lfx/graph/utils.py +200 -0
- lfx/graph/vertex/__init__.py +0 -0
- lfx/graph/vertex/base.py +823 -0
- lfx/graph/vertex/constants.py +0 -0
- lfx/graph/vertex/exceptions.py +4 -0
- lfx/graph/vertex/param_handler.py +264 -0
- lfx/graph/vertex/schema.py +26 -0
- lfx/graph/vertex/utils.py +19 -0
- lfx/graph/vertex/vertex_types.py +489 -0
- lfx/helpers/__init__.py +1 -0
- lfx/helpers/base_model.py +71 -0
- lfx/helpers/custom.py +13 -0
- lfx/helpers/data.py +167 -0
- lfx/helpers/flow.py +194 -0
- lfx/inputs/__init__.py +68 -0
- lfx/inputs/constants.py +2 -0
- lfx/inputs/input_mixin.py +328 -0
- lfx/inputs/inputs.py +714 -0
- lfx/inputs/validators.py +19 -0
- lfx/interface/__init__.py +6 -0
- lfx/interface/components.py +489 -0
- lfx/interface/importing/__init__.py +5 -0
- lfx/interface/importing/utils.py +39 -0
- lfx/interface/initialize/__init__.py +3 -0
- lfx/interface/initialize/loading.py +224 -0
- lfx/interface/listing.py +26 -0
- lfx/interface/run.py +16 -0
- lfx/interface/utils.py +111 -0
- lfx/io/__init__.py +63 -0
- lfx/io/schema.py +289 -0
- lfx/load/__init__.py +8 -0
- lfx/load/load.py +256 -0
- lfx/load/utils.py +99 -0
- lfx/log/__init__.py +5 -0
- lfx/log/logger.py +385 -0
- lfx/memory/__init__.py +90 -0
- lfx/memory/stubs.py +283 -0
- lfx/processing/__init__.py +1 -0
- lfx/processing/process.py +238 -0
- lfx/processing/utils.py +25 -0
- lfx/py.typed +0 -0
- lfx/schema/__init__.py +66 -0
- lfx/schema/artifact.py +83 -0
- lfx/schema/content_block.py +62 -0
- lfx/schema/content_types.py +91 -0
- lfx/schema/data.py +308 -0
- lfx/schema/dataframe.py +210 -0
- lfx/schema/dotdict.py +74 -0
- lfx/schema/encoders.py +13 -0
- lfx/schema/graph.py +47 -0
- lfx/schema/image.py +131 -0
- lfx/schema/json_schema.py +141 -0
- lfx/schema/log.py +61 -0
- lfx/schema/message.py +473 -0
- lfx/schema/openai_responses_schemas.py +74 -0
- lfx/schema/properties.py +41 -0
- lfx/schema/schema.py +171 -0
- lfx/schema/serialize.py +13 -0
- lfx/schema/table.py +140 -0
- lfx/schema/validators.py +114 -0
- lfx/serialization/__init__.py +5 -0
- lfx/serialization/constants.py +2 -0
- lfx/serialization/serialization.py +314 -0
- lfx/services/__init__.py +23 -0
- lfx/services/base.py +28 -0
- lfx/services/cache/__init__.py +6 -0
- lfx/services/cache/base.py +183 -0
- lfx/services/cache/service.py +166 -0
- lfx/services/cache/utils.py +169 -0
- lfx/services/chat/__init__.py +1 -0
- lfx/services/chat/config.py +2 -0
- lfx/services/chat/schema.py +10 -0
- lfx/services/deps.py +129 -0
- lfx/services/factory.py +19 -0
- lfx/services/initialize.py +19 -0
- lfx/services/interfaces.py +103 -0
- lfx/services/manager.py +172 -0
- lfx/services/schema.py +20 -0
- lfx/services/session.py +82 -0
- lfx/services/settings/__init__.py +3 -0
- lfx/services/settings/auth.py +130 -0
- lfx/services/settings/base.py +539 -0
- lfx/services/settings/constants.py +31 -0
- lfx/services/settings/factory.py +23 -0
- lfx/services/settings/feature_flags.py +12 -0
- lfx/services/settings/service.py +35 -0
- lfx/services/settings/utils.py +40 -0
- lfx/services/shared_component_cache/__init__.py +1 -0
- lfx/services/shared_component_cache/factory.py +30 -0
- lfx/services/shared_component_cache/service.py +9 -0
- lfx/services/storage/__init__.py +5 -0
- lfx/services/storage/local.py +155 -0
- lfx/services/storage/service.py +54 -0
- lfx/services/tracing/__init__.py +1 -0
- lfx/services/tracing/service.py +21 -0
- lfx/settings.py +6 -0
- lfx/template/__init__.py +6 -0
- lfx/template/field/__init__.py +0 -0
- lfx/template/field/base.py +257 -0
- lfx/template/field/prompt.py +15 -0
- lfx/template/frontend_node/__init__.py +6 -0
- lfx/template/frontend_node/base.py +212 -0
- lfx/template/frontend_node/constants.py +65 -0
- lfx/template/frontend_node/custom_components.py +79 -0
- lfx/template/template/__init__.py +0 -0
- lfx/template/template/base.py +100 -0
- lfx/template/utils.py +217 -0
- lfx/type_extraction/__init__.py +19 -0
- lfx/type_extraction/type_extraction.py +75 -0
- lfx/type_extraction.py +80 -0
- lfx/utils/__init__.py +1 -0
- lfx/utils/async_helpers.py +42 -0
- lfx/utils/component_utils.py +154 -0
- lfx/utils/concurrency.py +60 -0
- lfx/utils/connection_string_parser.py +11 -0
- lfx/utils/constants.py +205 -0
- lfx/utils/data_structure.py +212 -0
- lfx/utils/exceptions.py +22 -0
- lfx/utils/helpers.py +28 -0
- lfx/utils/image.py +73 -0
- lfx/utils/lazy_load.py +15 -0
- lfx/utils/request_utils.py +18 -0
- lfx/utils/schemas.py +139 -0
- lfx/utils/util.py +481 -0
- lfx/utils/util_strings.py +56 -0
- lfx/utils/version.py +24 -0
- lfx_nightly-0.1.11.dev0.dist-info/METADATA +293 -0
- lfx_nightly-0.1.11.dev0.dist-info/RECORD +699 -0
- lfx_nightly-0.1.11.dev0.dist-info/WHEEL +4 -0
- lfx_nightly-0.1.11.dev0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,245 @@
|
|
1
|
+
import signal
|
2
|
+
import sys
|
3
|
+
import traceback
|
4
|
+
from contextlib import suppress
|
5
|
+
|
6
|
+
from docling_core.types.doc import DoclingDocument
|
7
|
+
|
8
|
+
from lfx.log.logger import logger
|
9
|
+
from lfx.schema.data import Data
|
10
|
+
from lfx.schema.dataframe import DataFrame
|
11
|
+
|
12
|
+
|
13
|
+
def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_key: str) -> list[DoclingDocument]:
|
14
|
+
documents: list[DoclingDocument] = []
|
15
|
+
if isinstance(data_inputs, DataFrame):
|
16
|
+
if not len(data_inputs):
|
17
|
+
msg = "DataFrame is empty"
|
18
|
+
raise TypeError(msg)
|
19
|
+
|
20
|
+
if doc_key not in data_inputs.columns:
|
21
|
+
msg = f"Column '{doc_key}' not found in DataFrame"
|
22
|
+
raise TypeError(msg)
|
23
|
+
try:
|
24
|
+
documents = data_inputs[doc_key].tolist()
|
25
|
+
except Exception as e:
|
26
|
+
msg = f"Error extracting DoclingDocument from DataFrame: {e}"
|
27
|
+
raise TypeError(msg) from e
|
28
|
+
else:
|
29
|
+
if not data_inputs:
|
30
|
+
msg = "No data inputs provided"
|
31
|
+
raise TypeError(msg)
|
32
|
+
|
33
|
+
if isinstance(data_inputs, Data):
|
34
|
+
if doc_key not in data_inputs.data:
|
35
|
+
msg = (
|
36
|
+
f"'{doc_key}' field not available in the input Data. "
|
37
|
+
"Check that your input is a DoclingDocument. "
|
38
|
+
"You can use the Docling component to convert your input to a DoclingDocument."
|
39
|
+
)
|
40
|
+
raise TypeError(msg)
|
41
|
+
documents = [data_inputs.data[doc_key]]
|
42
|
+
else:
|
43
|
+
try:
|
44
|
+
documents = [
|
45
|
+
input_.data[doc_key]
|
46
|
+
for input_ in data_inputs
|
47
|
+
if isinstance(input_, Data)
|
48
|
+
and doc_key in input_.data
|
49
|
+
and isinstance(input_.data[doc_key], DoclingDocument)
|
50
|
+
]
|
51
|
+
if not documents:
|
52
|
+
msg = f"No valid Data inputs found in {type(data_inputs)}"
|
53
|
+
raise TypeError(msg)
|
54
|
+
except AttributeError as e:
|
55
|
+
msg = f"Invalid input type in collection: {e}"
|
56
|
+
raise TypeError(msg) from e
|
57
|
+
return documents
|
58
|
+
|
59
|
+
|
60
|
+
def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str):
|
61
|
+
"""Worker function for processing files with Docling in a separate process."""
|
62
|
+
# Signal handling for graceful shutdown
|
63
|
+
shutdown_requested = False
|
64
|
+
|
65
|
+
def signal_handler(signum: int, frame) -> None: # noqa: ARG001
|
66
|
+
"""Handle shutdown signals gracefully."""
|
67
|
+
nonlocal shutdown_requested
|
68
|
+
signal_names: dict[int, str] = {signal.SIGTERM: "SIGTERM", signal.SIGINT: "SIGINT"}
|
69
|
+
signal_name = signal_names.get(signum, f"signal {signum}")
|
70
|
+
|
71
|
+
logger.debug(f"Docling worker received {signal_name}, initiating graceful shutdown...")
|
72
|
+
shutdown_requested = True
|
73
|
+
|
74
|
+
# Send shutdown notification to parent process
|
75
|
+
with suppress(Exception):
|
76
|
+
queue.put({"error": f"Worker interrupted by {signal_name}", "shutdown": True})
|
77
|
+
|
78
|
+
# Exit gracefully
|
79
|
+
sys.exit(0)
|
80
|
+
|
81
|
+
def check_shutdown() -> None:
|
82
|
+
"""Check if shutdown was requested and exit if so."""
|
83
|
+
if shutdown_requested:
|
84
|
+
logger.info("Shutdown requested, exiting worker...")
|
85
|
+
|
86
|
+
with suppress(Exception):
|
87
|
+
queue.put({"error": "Worker shutdown requested", "shutdown": True})
|
88
|
+
|
89
|
+
sys.exit(0)
|
90
|
+
|
91
|
+
# Register signal handlers early
|
92
|
+
try:
|
93
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
94
|
+
signal.signal(signal.SIGINT, signal_handler)
|
95
|
+
logger.debug("Signal handlers registered for graceful shutdown")
|
96
|
+
except (OSError, ValueError) as e:
|
97
|
+
# Some signals might not be available on all platforms
|
98
|
+
logger.warning(f"Warning: Could not register signal handlers: {e}")
|
99
|
+
|
100
|
+
# Check for shutdown before heavy imports
|
101
|
+
check_shutdown()
|
102
|
+
|
103
|
+
try:
|
104
|
+
from docling.datamodel.base_models import ConversionStatus, InputFormat
|
105
|
+
from docling.datamodel.pipeline_options import OcrOptions, PdfPipelineOptions, VlmPipelineOptions
|
106
|
+
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
|
107
|
+
from docling.models.factories import get_ocr_factory
|
108
|
+
from docling.pipeline.vlm_pipeline import VlmPipeline
|
109
|
+
|
110
|
+
# Check for shutdown after imports
|
111
|
+
check_shutdown()
|
112
|
+
logger.debug("Docling dependencies loaded successfully")
|
113
|
+
|
114
|
+
except ModuleNotFoundError:
|
115
|
+
msg = (
|
116
|
+
"Docling is an optional dependency of Langflow. "
|
117
|
+
"Install with `uv pip install 'langflow[docling]'` "
|
118
|
+
"or refer to the documentation"
|
119
|
+
)
|
120
|
+
queue.put({"error": msg})
|
121
|
+
return
|
122
|
+
except ImportError as e:
|
123
|
+
# A different import failed (e.g., a transitive dependency); preserve details.
|
124
|
+
queue.put({"error": f"Failed to import a Docling dependency: {e}"})
|
125
|
+
return
|
126
|
+
except KeyboardInterrupt:
|
127
|
+
logger.warning("KeyboardInterrupt during imports, exiting...")
|
128
|
+
queue.put({"error": "Worker interrupted during imports", "shutdown": True})
|
129
|
+
return
|
130
|
+
|
131
|
+
# Configure the standard PDF pipeline
|
132
|
+
def _get_standard_opts() -> PdfPipelineOptions:
|
133
|
+
check_shutdown() # Check before heavy operations
|
134
|
+
|
135
|
+
pipeline_options = PdfPipelineOptions()
|
136
|
+
pipeline_options.do_ocr = ocr_engine != ""
|
137
|
+
if pipeline_options.do_ocr:
|
138
|
+
ocr_factory = get_ocr_factory(
|
139
|
+
allow_external_plugins=False,
|
140
|
+
)
|
141
|
+
|
142
|
+
ocr_options: OcrOptions = ocr_factory.create_options(
|
143
|
+
kind=ocr_engine,
|
144
|
+
)
|
145
|
+
pipeline_options.ocr_options = ocr_options
|
146
|
+
return pipeline_options
|
147
|
+
|
148
|
+
# Configure the VLM pipeline
|
149
|
+
def _get_vlm_opts() -> VlmPipelineOptions:
|
150
|
+
check_shutdown() # Check before heavy operations
|
151
|
+
return VlmPipelineOptions()
|
152
|
+
|
153
|
+
# Configure the main format options and create the DocumentConverter()
|
154
|
+
def _get_converter() -> DocumentConverter:
|
155
|
+
check_shutdown() # Check before heavy operations
|
156
|
+
|
157
|
+
if pipeline == "standard":
|
158
|
+
pdf_format_option = PdfFormatOption(
|
159
|
+
pipeline_options=_get_standard_opts(),
|
160
|
+
)
|
161
|
+
elif pipeline == "vlm":
|
162
|
+
pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
|
163
|
+
else:
|
164
|
+
msg = f"Unknown pipeline: {pipeline!r}"
|
165
|
+
raise ValueError(msg)
|
166
|
+
|
167
|
+
format_options: dict[InputFormat, FormatOption] = {
|
168
|
+
InputFormat.PDF: pdf_format_option,
|
169
|
+
InputFormat.IMAGE: pdf_format_option,
|
170
|
+
}
|
171
|
+
|
172
|
+
return DocumentConverter(format_options=format_options)
|
173
|
+
|
174
|
+
try:
|
175
|
+
# Check for shutdown before creating converter (can be slow)
|
176
|
+
check_shutdown()
|
177
|
+
logger.info(f"Initializing {pipeline} pipeline with OCR: {ocr_engine or 'disabled'}")
|
178
|
+
|
179
|
+
converter = _get_converter()
|
180
|
+
|
181
|
+
# Check for shutdown before processing files
|
182
|
+
check_shutdown()
|
183
|
+
logger.info(f"Starting to process {len(file_paths)} files...")
|
184
|
+
|
185
|
+
# Process files with periodic shutdown checks
|
186
|
+
results = []
|
187
|
+
for i, file_path in enumerate(file_paths):
|
188
|
+
# Check for shutdown before processing each file
|
189
|
+
check_shutdown()
|
190
|
+
|
191
|
+
logger.debug(f"Processing file {i + 1}/{len(file_paths)}: {file_path}")
|
192
|
+
|
193
|
+
try:
|
194
|
+
# Process single file (we can't easily interrupt convert_all)
|
195
|
+
single_result = converter.convert_all([file_path])
|
196
|
+
results.extend(single_result)
|
197
|
+
|
198
|
+
# Check for shutdown after each file
|
199
|
+
check_shutdown()
|
200
|
+
|
201
|
+
except (OSError, ValueError, RuntimeError, ImportError) as file_error:
|
202
|
+
# Handle specific file processing errors
|
203
|
+
logger.error(f"Error processing file {file_path}: {file_error}")
|
204
|
+
# Continue with other files, but check for shutdown
|
205
|
+
check_shutdown()
|
206
|
+
except Exception as file_error: # noqa: BLE001
|
207
|
+
# Catch any other unexpected errors to prevent worker crash
|
208
|
+
logger.error(f"Unexpected error processing file {file_path}: {file_error}")
|
209
|
+
# Continue with other files, but check for shutdown
|
210
|
+
check_shutdown()
|
211
|
+
|
212
|
+
# Final shutdown check before sending results
|
213
|
+
check_shutdown()
|
214
|
+
|
215
|
+
# Process the results while maintaining the original structure
|
216
|
+
processed_data = [
|
217
|
+
{"document": res.document, "file_path": str(res.input.file), "status": res.status.name}
|
218
|
+
if res.status == ConversionStatus.SUCCESS
|
219
|
+
else None
|
220
|
+
for res in results
|
221
|
+
]
|
222
|
+
|
223
|
+
logger.info(f"Successfully processed {len([d for d in processed_data if d])} files")
|
224
|
+
queue.put(processed_data)
|
225
|
+
|
226
|
+
except KeyboardInterrupt:
|
227
|
+
logger.warning("KeyboardInterrupt during processing, exiting gracefully...")
|
228
|
+
queue.put({"error": "Worker interrupted during processing", "shutdown": True})
|
229
|
+
return
|
230
|
+
except Exception as e: # noqa: BLE001
|
231
|
+
if shutdown_requested:
|
232
|
+
logger.exception("Exception occurred during shutdown, exiting...")
|
233
|
+
return
|
234
|
+
|
235
|
+
# Send any processing error to the main process with traceback
|
236
|
+
error_info = {"error": str(e), "traceback": traceback.format_exc()}
|
237
|
+
logger.error(f"Error in worker: {error_info}")
|
238
|
+
queue.put(error_info)
|
239
|
+
finally:
|
240
|
+
logger.info("Docling worker finishing...")
|
241
|
+
# Ensure we don't leave any hanging processes
|
242
|
+
if shutdown_requested:
|
243
|
+
logger.debug("Worker shutdown completed")
|
244
|
+
else:
|
245
|
+
logger.debug("Worker completed normally")
|
lfx/base/data/utils.py
ADDED
@@ -0,0 +1,198 @@
|
|
1
|
+
import unicodedata
|
2
|
+
from collections.abc import Callable
|
3
|
+
from concurrent import futures
|
4
|
+
from pathlib import Path
|
5
|
+
|
6
|
+
import chardet
|
7
|
+
import orjson
|
8
|
+
import yaml
|
9
|
+
from defusedxml import ElementTree
|
10
|
+
|
11
|
+
from lfx.schema.data import Data
|
12
|
+
|
13
|
+
# Types of files that can be read simply by file.read()
|
14
|
+
# and have 100% to be completely readable
|
15
|
+
TEXT_FILE_TYPES = [
|
16
|
+
"txt",
|
17
|
+
"md",
|
18
|
+
"mdx",
|
19
|
+
"csv",
|
20
|
+
"json",
|
21
|
+
"yaml",
|
22
|
+
"yml",
|
23
|
+
"xml",
|
24
|
+
"html",
|
25
|
+
"htm",
|
26
|
+
"pdf",
|
27
|
+
"docx",
|
28
|
+
"py",
|
29
|
+
"sh",
|
30
|
+
"sql",
|
31
|
+
"js",
|
32
|
+
"ts",
|
33
|
+
"tsx",
|
34
|
+
]
|
35
|
+
|
36
|
+
IMG_FILE_TYPES = ["jpg", "jpeg", "png", "bmp", "image"]
|
37
|
+
|
38
|
+
|
39
|
+
def normalize_text(text):
|
40
|
+
return unicodedata.normalize("NFKD", text)
|
41
|
+
|
42
|
+
|
43
|
+
def is_hidden(path: Path) -> bool:
|
44
|
+
return path.name.startswith(".")
|
45
|
+
|
46
|
+
|
47
|
+
def format_directory_path(path: str) -> str:
|
48
|
+
"""Format a directory path to ensure it's properly escaped and valid.
|
49
|
+
|
50
|
+
Args:
|
51
|
+
path (str): The input path string.
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
str: A properly formatted path string.
|
55
|
+
"""
|
56
|
+
return path.replace("\n", "\\n")
|
57
|
+
|
58
|
+
|
59
|
+
# Ignoring FBT001 because the DirectoryComponent in 1.0.19
|
60
|
+
# calls this function without keyword arguments
|
61
|
+
def retrieve_file_paths(
|
62
|
+
path: str,
|
63
|
+
load_hidden: bool, # noqa: FBT001
|
64
|
+
recursive: bool, # noqa: FBT001
|
65
|
+
depth: int,
|
66
|
+
types: list[str] = TEXT_FILE_TYPES,
|
67
|
+
) -> list[str]:
|
68
|
+
path = format_directory_path(path)
|
69
|
+
path_obj = Path(path)
|
70
|
+
if not path_obj.exists() or not path_obj.is_dir():
|
71
|
+
msg = f"Path {path} must exist and be a directory."
|
72
|
+
raise ValueError(msg)
|
73
|
+
|
74
|
+
def match_types(p: Path) -> bool:
|
75
|
+
return any(p.suffix == f".{t}" for t in types) if types else True
|
76
|
+
|
77
|
+
def is_not_hidden(p: Path) -> bool:
|
78
|
+
return not is_hidden(p) or load_hidden
|
79
|
+
|
80
|
+
def walk_level(directory: Path, max_depth: int):
|
81
|
+
directory = directory.resolve()
|
82
|
+
prefix_length = len(directory.parts)
|
83
|
+
for p in directory.rglob("*" if recursive else "[!.]*"):
|
84
|
+
if len(p.parts) - prefix_length <= max_depth:
|
85
|
+
yield p
|
86
|
+
|
87
|
+
glob = "**/*" if recursive else "*"
|
88
|
+
paths = walk_level(path_obj, depth) if depth else path_obj.glob(glob)
|
89
|
+
return [str(p) for p in paths if p.is_file() and match_types(p) and is_not_hidden(p)]
|
90
|
+
|
91
|
+
|
92
|
+
def partition_file_to_data(file_path: str, *, silent_errors: bool) -> Data | None:
|
93
|
+
# Use the partition function to load the file
|
94
|
+
from unstructured.partition.auto import partition
|
95
|
+
|
96
|
+
try:
|
97
|
+
elements = partition(file_path)
|
98
|
+
except Exception as e:
|
99
|
+
if not silent_errors:
|
100
|
+
msg = f"Error loading file {file_path}: {e}"
|
101
|
+
raise ValueError(msg) from e
|
102
|
+
return None
|
103
|
+
|
104
|
+
# Create a Data
|
105
|
+
text = "\n\n".join([str(el) for el in elements])
|
106
|
+
metadata = elements.metadata if hasattr(elements, "metadata") else {}
|
107
|
+
metadata["file_path"] = file_path
|
108
|
+
return Data(text=text, data=metadata)
|
109
|
+
|
110
|
+
|
111
|
+
def read_text_file(file_path: str) -> str:
|
112
|
+
file_path_ = Path(file_path)
|
113
|
+
raw_data = file_path_.read_bytes()
|
114
|
+
result = chardet.detect(raw_data)
|
115
|
+
encoding = result["encoding"]
|
116
|
+
|
117
|
+
if encoding in {"Windows-1252", "Windows-1254", "MacRoman"}:
|
118
|
+
encoding = "utf-8"
|
119
|
+
|
120
|
+
return file_path_.read_text(encoding=encoding)
|
121
|
+
|
122
|
+
|
123
|
+
def read_docx_file(file_path: str) -> str:
|
124
|
+
from docx import Document
|
125
|
+
|
126
|
+
doc = Document(file_path)
|
127
|
+
return "\n\n".join([p.text for p in doc.paragraphs])
|
128
|
+
|
129
|
+
|
130
|
+
def parse_pdf_to_text(file_path: str) -> str:
|
131
|
+
from pypdf import PdfReader
|
132
|
+
|
133
|
+
with Path(file_path).open("rb") as f, PdfReader(f) as reader:
|
134
|
+
return "\n\n".join([page.extract_text() for page in reader.pages])
|
135
|
+
|
136
|
+
|
137
|
+
def parse_text_file_to_data(file_path: str, *, silent_errors: bool) -> Data | None:
|
138
|
+
try:
|
139
|
+
if file_path.endswith(".pdf"):
|
140
|
+
text = parse_pdf_to_text(file_path)
|
141
|
+
elif file_path.endswith(".docx"):
|
142
|
+
text = read_docx_file(file_path)
|
143
|
+
else:
|
144
|
+
text = read_text_file(file_path)
|
145
|
+
|
146
|
+
# if file is json, yaml, or xml, we can parse it
|
147
|
+
if file_path.endswith(".json"):
|
148
|
+
loaded_json = orjson.loads(text)
|
149
|
+
if isinstance(loaded_json, dict):
|
150
|
+
loaded_json = {k: normalize_text(v) if isinstance(v, str) else v for k, v in loaded_json.items()}
|
151
|
+
elif isinstance(loaded_json, list):
|
152
|
+
loaded_json = [normalize_text(item) if isinstance(item, str) else item for item in loaded_json]
|
153
|
+
text = orjson.dumps(loaded_json).decode("utf-8")
|
154
|
+
|
155
|
+
elif file_path.endswith((".yaml", ".yml")):
|
156
|
+
text = yaml.safe_load(text)
|
157
|
+
elif file_path.endswith(".xml"):
|
158
|
+
xml_element = ElementTree.fromstring(text)
|
159
|
+
text = ElementTree.tostring(xml_element, encoding="unicode")
|
160
|
+
except Exception as e:
|
161
|
+
if not silent_errors:
|
162
|
+
msg = f"Error loading file {file_path}: {e}"
|
163
|
+
raise ValueError(msg) from e
|
164
|
+
return None
|
165
|
+
|
166
|
+
return Data(data={"file_path": file_path, "text": text})
|
167
|
+
|
168
|
+
|
169
|
+
# ! Removing unstructured dependency until
|
170
|
+
# ! 3.12 is supported
|
171
|
+
# def get_elements(
|
172
|
+
# file_paths: List[str],
|
173
|
+
# silent_errors: bool,
|
174
|
+
# max_concurrency: int,
|
175
|
+
# use_multithreading: bool,
|
176
|
+
# ) -> List[Optional[Data]]:
|
177
|
+
# if use_multithreading:
|
178
|
+
# data = parallel_load_data(file_paths, silent_errors, max_concurrency)
|
179
|
+
# else:
|
180
|
+
# data = [partition_file_to_data(file_path, silent_errors) for file_path in file_paths]
|
181
|
+
# data = list(filter(None, data))
|
182
|
+
# return data
|
183
|
+
|
184
|
+
|
185
|
+
def parallel_load_data(
|
186
|
+
file_paths: list[str],
|
187
|
+
*,
|
188
|
+
silent_errors: bool,
|
189
|
+
max_concurrency: int,
|
190
|
+
load_function: Callable = parse_text_file_to_data,
|
191
|
+
) -> list[Data | None]:
|
192
|
+
with futures.ThreadPoolExecutor(max_workers=max_concurrency) as executor:
|
193
|
+
loaded_files = executor.map(
|
194
|
+
lambda file_path: load_function(file_path, silent_errors=silent_errors),
|
195
|
+
file_paths,
|
196
|
+
)
|
197
|
+
# loaded_files is an iterator, so we need to convert it to a list
|
198
|
+
return list(loaded_files)
|
File without changes
|
@@ -0,0 +1,43 @@
|
|
1
|
+
from abc import abstractmethod
|
2
|
+
from typing import Any
|
3
|
+
|
4
|
+
from langchain_core.documents import BaseDocumentTransformer
|
5
|
+
|
6
|
+
from lfx.custom.custom_component.component import Component
|
7
|
+
from lfx.io import Output
|
8
|
+
from lfx.schema.data import Data
|
9
|
+
from lfx.utils.util import build_loader_repr_from_data
|
10
|
+
|
11
|
+
|
12
|
+
class LCDocumentTransformerComponent(Component):
|
13
|
+
trace_type = "document_transformer"
|
14
|
+
outputs = [
|
15
|
+
Output(display_name="Data", name="data", method="transform_data"),
|
16
|
+
]
|
17
|
+
|
18
|
+
def transform_data(self) -> list[Data]:
|
19
|
+
data_input = self.get_data_input()
|
20
|
+
documents = []
|
21
|
+
|
22
|
+
if not isinstance(data_input, list):
|
23
|
+
data_input = [data_input]
|
24
|
+
|
25
|
+
for _input in data_input:
|
26
|
+
if isinstance(_input, Data):
|
27
|
+
documents.append(_input.to_lc_document())
|
28
|
+
else:
|
29
|
+
documents.append(_input)
|
30
|
+
|
31
|
+
transformer = self.build_document_transformer()
|
32
|
+
docs = transformer.transform_documents(documents)
|
33
|
+
data = self.to_data(docs)
|
34
|
+
self.repr_value = build_loader_repr_from_data(data)
|
35
|
+
return data
|
36
|
+
|
37
|
+
@abstractmethod
|
38
|
+
def get_data_input(self) -> Any:
|
39
|
+
"""Get the data input."""
|
40
|
+
|
41
|
+
@abstractmethod
|
42
|
+
def build_document_transformer(self) -> BaseDocumentTransformer:
|
43
|
+
"""Build the text splitter."""
|
File without changes
|
@@ -0,0 +1,62 @@
|
|
1
|
+
import concurrent.futures
|
2
|
+
import json
|
3
|
+
|
4
|
+
import httpx
|
5
|
+
from pydantic import BaseModel, SecretStr
|
6
|
+
|
7
|
+
from lfx.field_typing import Embeddings
|
8
|
+
from lfx.log.logger import logger
|
9
|
+
|
10
|
+
|
11
|
+
class AIMLEmbeddingsImpl(BaseModel, Embeddings):
|
12
|
+
embeddings_completion_url: str = "https://api.aimlapi.com/v1/embeddings"
|
13
|
+
|
14
|
+
api_key: SecretStr
|
15
|
+
model: str
|
16
|
+
|
17
|
+
def embed_documents(self, texts: list[str]) -> list[list[float]]:
|
18
|
+
embeddings = [None] * len(texts)
|
19
|
+
headers = {
|
20
|
+
"Content-Type": "application/json",
|
21
|
+
"Authorization": f"Bearer {self.api_key.get_secret_value()}",
|
22
|
+
}
|
23
|
+
|
24
|
+
with httpx.Client() as client, concurrent.futures.ThreadPoolExecutor() as executor:
|
25
|
+
futures = []
|
26
|
+
for i, text in enumerate(texts):
|
27
|
+
futures.append((i, executor.submit(self._embed_text, client, headers, text)))
|
28
|
+
|
29
|
+
for index, future in futures:
|
30
|
+
try:
|
31
|
+
result_data = future.result()
|
32
|
+
if len(result_data["data"]) != 1:
|
33
|
+
msg = f"Expected one embedding, got {len(result_data['data'])}"
|
34
|
+
raise ValueError(msg)
|
35
|
+
embeddings[index] = result_data["data"][0]["embedding"]
|
36
|
+
except (
|
37
|
+
httpx.HTTPStatusError,
|
38
|
+
httpx.RequestError,
|
39
|
+
json.JSONDecodeError,
|
40
|
+
KeyError,
|
41
|
+
ValueError,
|
42
|
+
):
|
43
|
+
logger.exception("Error occurred")
|
44
|
+
raise
|
45
|
+
|
46
|
+
return embeddings # type: ignore[return-value]
|
47
|
+
|
48
|
+
def _embed_text(self, client: httpx.Client, headers: dict, text: str) -> dict:
|
49
|
+
payload = {
|
50
|
+
"model": self.model,
|
51
|
+
"input": text,
|
52
|
+
}
|
53
|
+
response = client.post(
|
54
|
+
self.embeddings_completion_url,
|
55
|
+
headers=headers,
|
56
|
+
json=payload,
|
57
|
+
)
|
58
|
+
response.raise_for_status()
|
59
|
+
return response.json()
|
60
|
+
|
61
|
+
def embed_query(self, text: str) -> list[float]:
|
62
|
+
return self.embed_documents([text])[0]
|
@@ -0,0 +1,26 @@
|
|
1
|
+
from lfx.custom.custom_component.component import Component
|
2
|
+
from lfx.field_typing import Embeddings
|
3
|
+
from lfx.io import Output
|
4
|
+
|
5
|
+
|
6
|
+
class LCEmbeddingsModel(Component):
|
7
|
+
trace_type = "embedding"
|
8
|
+
|
9
|
+
outputs = [
|
10
|
+
Output(display_name="Embedding Model", name="embeddings", method="build_embeddings"),
|
11
|
+
]
|
12
|
+
|
13
|
+
def _validate_outputs(self) -> None:
|
14
|
+
required_output_methods = ["build_embeddings"]
|
15
|
+
output_names = [output.name for output in self.outputs]
|
16
|
+
for method_name in required_output_methods:
|
17
|
+
if method_name not in output_names:
|
18
|
+
msg = f"Output with name '{method_name}' must be defined."
|
19
|
+
raise ValueError(msg)
|
20
|
+
if not hasattr(self, method_name):
|
21
|
+
msg = f"Method '{method_name}' must be defined."
|
22
|
+
raise ValueError(msg)
|
23
|
+
|
24
|
+
def build_embeddings(self) -> Embeddings:
|
25
|
+
msg = "You must implement the build_embeddings method in your class."
|
26
|
+
raise NotImplementedError(msg)
|
File without changes
|
@@ -0,0 +1,86 @@
|
|
1
|
+
from lfx.graph.schema import ResultData, RunOutputs
|
2
|
+
from lfx.log.logger import logger
|
3
|
+
from lfx.schema.data import Data
|
4
|
+
from lfx.schema.message import Message
|
5
|
+
|
6
|
+
|
7
|
+
def build_data_from_run_outputs(run_outputs: RunOutputs) -> list[Data]:
|
8
|
+
"""Build a list of data from the given RunOutputs.
|
9
|
+
|
10
|
+
Args:
|
11
|
+
run_outputs (RunOutputs): The RunOutputs object containing the output data.
|
12
|
+
|
13
|
+
Returns:
|
14
|
+
List[Data]: A list of data built from the RunOutputs.
|
15
|
+
|
16
|
+
"""
|
17
|
+
if not run_outputs:
|
18
|
+
return []
|
19
|
+
data = []
|
20
|
+
for result_data in run_outputs.outputs:
|
21
|
+
if result_data:
|
22
|
+
data.extend(build_data_from_result_data(result_data))
|
23
|
+
return data
|
24
|
+
|
25
|
+
|
26
|
+
def build_data_from_result_data(result_data: ResultData) -> list[Data]:
|
27
|
+
"""Build a list of data from the given ResultData.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
result_data (ResultData): The ResultData object containing the result data.
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
List[Data]: A list of data built from the ResultData.
|
34
|
+
|
35
|
+
"""
|
36
|
+
messages = result_data.messages
|
37
|
+
|
38
|
+
if not messages:
|
39
|
+
return []
|
40
|
+
data = []
|
41
|
+
|
42
|
+
# Handle results without chat messages (calling flow)
|
43
|
+
if not messages:
|
44
|
+
# Result with a single record
|
45
|
+
if isinstance(result_data.artifacts, dict):
|
46
|
+
data.append(Data(data=result_data.artifacts))
|
47
|
+
# List of artifacts
|
48
|
+
elif isinstance(result_data.artifacts, list):
|
49
|
+
for artifact in result_data.artifacts:
|
50
|
+
# If multiple records are found as artifacts, return as-is
|
51
|
+
if isinstance(artifact, Data):
|
52
|
+
data.append(artifact)
|
53
|
+
else:
|
54
|
+
# Warn about unknown output type
|
55
|
+
logger.warning(f"Unable to build record output from unknown ResultData.artifact: {artifact}")
|
56
|
+
# Chat or text output
|
57
|
+
elif result_data.results:
|
58
|
+
data.append(Data(data={"result": result_data.results}, text_key="result"))
|
59
|
+
return data
|
60
|
+
else:
|
61
|
+
return []
|
62
|
+
|
63
|
+
if isinstance(result_data.results, dict):
|
64
|
+
for name, result in result_data.results.items():
|
65
|
+
dataobj: Data | Message | None
|
66
|
+
dataobj = result if isinstance(result, Message) else Data(data=result, text_key=name)
|
67
|
+
|
68
|
+
data.append(dataobj)
|
69
|
+
else:
|
70
|
+
data.append(Data(data=result_data.results))
|
71
|
+
return data
|
72
|
+
|
73
|
+
|
74
|
+
def format_flow_output_data(data: list[Data]) -> str:
|
75
|
+
"""Format the flow output data into a string.
|
76
|
+
|
77
|
+
Args:
|
78
|
+
data (List[Data]): The list of data to format.
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
str: The formatted flow output data.
|
82
|
+
|
83
|
+
"""
|
84
|
+
result = "Flow run output:\n"
|
85
|
+
results = "\n".join([value.get_text() if hasattr(value, "get_text") else str(value) for value in data])
|
86
|
+
return result + results
|
File without changes
|