lfx-nightly 0.1.11.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lfx/__init__.py +0 -0
- lfx/__main__.py +25 -0
- lfx/base/__init__.py +0 -0
- lfx/base/agents/__init__.py +0 -0
- lfx/base/agents/agent.py +268 -0
- lfx/base/agents/callback.py +130 -0
- lfx/base/agents/context.py +109 -0
- lfx/base/agents/crewai/__init__.py +0 -0
- lfx/base/agents/crewai/crew.py +231 -0
- lfx/base/agents/crewai/tasks.py +12 -0
- lfx/base/agents/default_prompts.py +23 -0
- lfx/base/agents/errors.py +15 -0
- lfx/base/agents/events.py +346 -0
- lfx/base/agents/utils.py +205 -0
- lfx/base/astra_assistants/__init__.py +0 -0
- lfx/base/astra_assistants/util.py +171 -0
- lfx/base/chains/__init__.py +0 -0
- lfx/base/chains/model.py +19 -0
- lfx/base/composio/__init__.py +0 -0
- lfx/base/composio/composio_base.py +1291 -0
- lfx/base/compressors/__init__.py +0 -0
- lfx/base/compressors/model.py +60 -0
- lfx/base/constants.py +46 -0
- lfx/base/curl/__init__.py +0 -0
- lfx/base/curl/parse.py +188 -0
- lfx/base/data/__init__.py +5 -0
- lfx/base/data/base_file.py +685 -0
- lfx/base/data/docling_utils.py +245 -0
- lfx/base/data/utils.py +198 -0
- lfx/base/document_transformers/__init__.py +0 -0
- lfx/base/document_transformers/model.py +43 -0
- lfx/base/embeddings/__init__.py +0 -0
- lfx/base/embeddings/aiml_embeddings.py +62 -0
- lfx/base/embeddings/model.py +26 -0
- lfx/base/flow_processing/__init__.py +0 -0
- lfx/base/flow_processing/utils.py +86 -0
- lfx/base/huggingface/__init__.py +0 -0
- lfx/base/huggingface/model_bridge.py +133 -0
- lfx/base/io/__init__.py +0 -0
- lfx/base/io/chat.py +20 -0
- lfx/base/io/text.py +22 -0
- lfx/base/langchain_utilities/__init__.py +0 -0
- lfx/base/langchain_utilities/model.py +35 -0
- lfx/base/langchain_utilities/spider_constants.py +1 -0
- lfx/base/langwatch/__init__.py +0 -0
- lfx/base/langwatch/utils.py +18 -0
- lfx/base/mcp/__init__.py +0 -0
- lfx/base/mcp/constants.py +2 -0
- lfx/base/mcp/util.py +1398 -0
- lfx/base/memory/__init__.py +0 -0
- lfx/base/memory/memory.py +49 -0
- lfx/base/memory/model.py +38 -0
- lfx/base/models/__init__.py +3 -0
- lfx/base/models/aiml_constants.py +51 -0
- lfx/base/models/anthropic_constants.py +47 -0
- lfx/base/models/aws_constants.py +151 -0
- lfx/base/models/chat_result.py +76 -0
- lfx/base/models/google_generative_ai_constants.py +70 -0
- lfx/base/models/groq_constants.py +134 -0
- lfx/base/models/model.py +375 -0
- lfx/base/models/model_input_constants.py +307 -0
- lfx/base/models/model_metadata.py +41 -0
- lfx/base/models/model_utils.py +8 -0
- lfx/base/models/novita_constants.py +35 -0
- lfx/base/models/ollama_constants.py +49 -0
- lfx/base/models/openai_constants.py +122 -0
- lfx/base/models/sambanova_constants.py +18 -0
- lfx/base/processing/__init__.py +0 -0
- lfx/base/prompts/__init__.py +0 -0
- lfx/base/prompts/api_utils.py +224 -0
- lfx/base/prompts/utils.py +61 -0
- lfx/base/textsplitters/__init__.py +0 -0
- lfx/base/textsplitters/model.py +28 -0
- lfx/base/tools/__init__.py +0 -0
- lfx/base/tools/base.py +26 -0
- lfx/base/tools/component_tool.py +325 -0
- lfx/base/tools/constants.py +49 -0
- lfx/base/tools/flow_tool.py +132 -0
- lfx/base/tools/run_flow.py +224 -0
- lfx/base/vectorstores/__init__.py +0 -0
- lfx/base/vectorstores/model.py +193 -0
- lfx/base/vectorstores/utils.py +22 -0
- lfx/base/vectorstores/vector_store_connection_decorator.py +52 -0
- lfx/cli/__init__.py +5 -0
- lfx/cli/commands.py +319 -0
- lfx/cli/common.py +650 -0
- lfx/cli/run.py +441 -0
- lfx/cli/script_loader.py +247 -0
- lfx/cli/serve_app.py +546 -0
- lfx/cli/validation.py +69 -0
- lfx/components/FAISS/__init__.py +34 -0
- lfx/components/FAISS/faiss.py +111 -0
- lfx/components/Notion/__init__.py +19 -0
- lfx/components/Notion/add_content_to_page.py +269 -0
- lfx/components/Notion/create_page.py +94 -0
- lfx/components/Notion/list_database_properties.py +68 -0
- lfx/components/Notion/list_pages.py +122 -0
- lfx/components/Notion/list_users.py +77 -0
- lfx/components/Notion/page_content_viewer.py +93 -0
- lfx/components/Notion/search.py +111 -0
- lfx/components/Notion/update_page_property.py +114 -0
- lfx/components/__init__.py +411 -0
- lfx/components/_importing.py +42 -0
- lfx/components/agentql/__init__.py +3 -0
- lfx/components/agentql/agentql_api.py +151 -0
- lfx/components/agents/__init__.py +34 -0
- lfx/components/agents/agent.py +558 -0
- lfx/components/agents/mcp_component.py +501 -0
- lfx/components/aiml/__init__.py +37 -0
- lfx/components/aiml/aiml.py +112 -0
- lfx/components/aiml/aiml_embeddings.py +37 -0
- lfx/components/amazon/__init__.py +36 -0
- lfx/components/amazon/amazon_bedrock_embedding.py +109 -0
- lfx/components/amazon/amazon_bedrock_model.py +124 -0
- lfx/components/amazon/s3_bucket_uploader.py +211 -0
- lfx/components/anthropic/__init__.py +34 -0
- lfx/components/anthropic/anthropic.py +187 -0
- lfx/components/apify/__init__.py +5 -0
- lfx/components/apify/apify_actor.py +325 -0
- lfx/components/arxiv/__init__.py +3 -0
- lfx/components/arxiv/arxiv.py +163 -0
- lfx/components/assemblyai/__init__.py +46 -0
- lfx/components/assemblyai/assemblyai_get_subtitles.py +83 -0
- lfx/components/assemblyai/assemblyai_lemur.py +183 -0
- lfx/components/assemblyai/assemblyai_list_transcripts.py +95 -0
- lfx/components/assemblyai/assemblyai_poll_transcript.py +72 -0
- lfx/components/assemblyai/assemblyai_start_transcript.py +188 -0
- lfx/components/azure/__init__.py +37 -0
- lfx/components/azure/azure_openai.py +95 -0
- lfx/components/azure/azure_openai_embeddings.py +83 -0
- lfx/components/baidu/__init__.py +32 -0
- lfx/components/baidu/baidu_qianfan_chat.py +113 -0
- lfx/components/bing/__init__.py +3 -0
- lfx/components/bing/bing_search_api.py +61 -0
- lfx/components/cassandra/__init__.py +40 -0
- lfx/components/cassandra/cassandra.py +264 -0
- lfx/components/cassandra/cassandra_chat.py +92 -0
- lfx/components/cassandra/cassandra_graph.py +238 -0
- lfx/components/chains/__init__.py +3 -0
- lfx/components/chroma/__init__.py +34 -0
- lfx/components/chroma/chroma.py +167 -0
- lfx/components/cleanlab/__init__.py +40 -0
- lfx/components/cleanlab/cleanlab_evaluator.py +155 -0
- lfx/components/cleanlab/cleanlab_rag_evaluator.py +254 -0
- lfx/components/cleanlab/cleanlab_remediator.py +131 -0
- lfx/components/clickhouse/__init__.py +34 -0
- lfx/components/clickhouse/clickhouse.py +135 -0
- lfx/components/cloudflare/__init__.py +32 -0
- lfx/components/cloudflare/cloudflare.py +81 -0
- lfx/components/cohere/__init__.py +40 -0
- lfx/components/cohere/cohere_embeddings.py +81 -0
- lfx/components/cohere/cohere_models.py +46 -0
- lfx/components/cohere/cohere_rerank.py +51 -0
- lfx/components/composio/__init__.py +74 -0
- lfx/components/composio/composio_api.py +268 -0
- lfx/components/composio/dropbox_compnent.py +11 -0
- lfx/components/composio/github_composio.py +11 -0
- lfx/components/composio/gmail_composio.py +38 -0
- lfx/components/composio/googlecalendar_composio.py +11 -0
- lfx/components/composio/googlemeet_composio.py +11 -0
- lfx/components/composio/googletasks_composio.py +8 -0
- lfx/components/composio/linear_composio.py +11 -0
- lfx/components/composio/outlook_composio.py +11 -0
- lfx/components/composio/reddit_composio.py +11 -0
- lfx/components/composio/slack_composio.py +582 -0
- lfx/components/composio/slackbot_composio.py +11 -0
- lfx/components/composio/supabase_composio.py +11 -0
- lfx/components/composio/todoist_composio.py +11 -0
- lfx/components/composio/youtube_composio.py +11 -0
- lfx/components/confluence/__init__.py +3 -0
- lfx/components/confluence/confluence.py +84 -0
- lfx/components/couchbase/__init__.py +34 -0
- lfx/components/couchbase/couchbase.py +102 -0
- lfx/components/crewai/__init__.py +49 -0
- lfx/components/crewai/crewai.py +107 -0
- lfx/components/crewai/hierarchical_crew.py +46 -0
- lfx/components/crewai/hierarchical_task.py +44 -0
- lfx/components/crewai/sequential_crew.py +52 -0
- lfx/components/crewai/sequential_task.py +73 -0
- lfx/components/crewai/sequential_task_agent.py +143 -0
- lfx/components/custom_component/__init__.py +34 -0
- lfx/components/custom_component/custom_component.py +31 -0
- lfx/components/data/__init__.py +64 -0
- lfx/components/data/api_request.py +544 -0
- lfx/components/data/csv_to_data.py +95 -0
- lfx/components/data/directory.py +113 -0
- lfx/components/data/file.py +577 -0
- lfx/components/data/json_to_data.py +98 -0
- lfx/components/data/news_search.py +164 -0
- lfx/components/data/rss.py +69 -0
- lfx/components/data/sql_executor.py +101 -0
- lfx/components/data/url.py +311 -0
- lfx/components/data/web_search.py +112 -0
- lfx/components/data/webhook.py +56 -0
- lfx/components/datastax/__init__.py +70 -0
- lfx/components/datastax/astra_assistant_manager.py +306 -0
- lfx/components/datastax/astra_db.py +75 -0
- lfx/components/datastax/astra_vectorize.py +124 -0
- lfx/components/datastax/astradb.py +1285 -0
- lfx/components/datastax/astradb_cql.py +314 -0
- lfx/components/datastax/astradb_graph.py +330 -0
- lfx/components/datastax/astradb_tool.py +414 -0
- lfx/components/datastax/astradb_vectorstore.py +1285 -0
- lfx/components/datastax/cassandra.py +92 -0
- lfx/components/datastax/create_assistant.py +58 -0
- lfx/components/datastax/create_thread.py +32 -0
- lfx/components/datastax/dotenv.py +35 -0
- lfx/components/datastax/get_assistant.py +37 -0
- lfx/components/datastax/getenvvar.py +30 -0
- lfx/components/datastax/graph_rag.py +141 -0
- lfx/components/datastax/hcd.py +314 -0
- lfx/components/datastax/list_assistants.py +25 -0
- lfx/components/datastax/run.py +89 -0
- lfx/components/deactivated/__init__.py +15 -0
- lfx/components/deactivated/amazon_kendra.py +66 -0
- lfx/components/deactivated/chat_litellm_model.py +158 -0
- lfx/components/deactivated/code_block_extractor.py +26 -0
- lfx/components/deactivated/documents_to_data.py +22 -0
- lfx/components/deactivated/embed.py +16 -0
- lfx/components/deactivated/extract_key_from_data.py +46 -0
- lfx/components/deactivated/json_document_builder.py +57 -0
- lfx/components/deactivated/list_flows.py +20 -0
- lfx/components/deactivated/mcp_sse.py +61 -0
- lfx/components/deactivated/mcp_stdio.py +62 -0
- lfx/components/deactivated/merge_data.py +93 -0
- lfx/components/deactivated/message.py +37 -0
- lfx/components/deactivated/metal.py +54 -0
- lfx/components/deactivated/multi_query.py +59 -0
- lfx/components/deactivated/retriever.py +43 -0
- lfx/components/deactivated/selective_passthrough.py +77 -0
- lfx/components/deactivated/should_run_next.py +40 -0
- lfx/components/deactivated/split_text.py +63 -0
- lfx/components/deactivated/store_message.py +24 -0
- lfx/components/deactivated/sub_flow.py +124 -0
- lfx/components/deactivated/vectara_self_query.py +76 -0
- lfx/components/deactivated/vector_store.py +24 -0
- lfx/components/deepseek/__init__.py +34 -0
- lfx/components/deepseek/deepseek.py +136 -0
- lfx/components/docling/__init__.py +43 -0
- lfx/components/docling/chunk_docling_document.py +186 -0
- lfx/components/docling/docling_inline.py +231 -0
- lfx/components/docling/docling_remote.py +193 -0
- lfx/components/docling/export_docling_document.py +117 -0
- lfx/components/documentloaders/__init__.py +3 -0
- lfx/components/duckduckgo/__init__.py +3 -0
- lfx/components/duckduckgo/duck_duck_go_search_run.py +92 -0
- lfx/components/elastic/__init__.py +37 -0
- lfx/components/elastic/elasticsearch.py +267 -0
- lfx/components/elastic/opensearch.py +243 -0
- lfx/components/embeddings/__init__.py +37 -0
- lfx/components/embeddings/similarity.py +76 -0
- lfx/components/embeddings/text_embedder.py +64 -0
- lfx/components/exa/__init__.py +3 -0
- lfx/components/exa/exa_search.py +68 -0
- lfx/components/firecrawl/__init__.py +43 -0
- lfx/components/firecrawl/firecrawl_crawl_api.py +88 -0
- lfx/components/firecrawl/firecrawl_extract_api.py +136 -0
- lfx/components/firecrawl/firecrawl_map_api.py +89 -0
- lfx/components/firecrawl/firecrawl_scrape_api.py +73 -0
- lfx/components/git/__init__.py +4 -0
- lfx/components/git/git.py +262 -0
- lfx/components/git/gitextractor.py +196 -0
- lfx/components/glean/__init__.py +3 -0
- lfx/components/glean/glean_search_api.py +173 -0
- lfx/components/google/__init__.py +17 -0
- lfx/components/google/gmail.py +192 -0
- lfx/components/google/google_bq_sql_executor.py +157 -0
- lfx/components/google/google_drive.py +92 -0
- lfx/components/google/google_drive_search.py +152 -0
- lfx/components/google/google_generative_ai.py +147 -0
- lfx/components/google/google_generative_ai_embeddings.py +141 -0
- lfx/components/google/google_oauth_token.py +89 -0
- lfx/components/google/google_search_api_core.py +68 -0
- lfx/components/google/google_serper_api_core.py +74 -0
- lfx/components/groq/__init__.py +34 -0
- lfx/components/groq/groq.py +136 -0
- lfx/components/helpers/__init__.py +52 -0
- lfx/components/helpers/calculator_core.py +89 -0
- lfx/components/helpers/create_list.py +40 -0
- lfx/components/helpers/current_date.py +42 -0
- lfx/components/helpers/id_generator.py +42 -0
- lfx/components/helpers/memory.py +251 -0
- lfx/components/helpers/output_parser.py +45 -0
- lfx/components/helpers/store_message.py +90 -0
- lfx/components/homeassistant/__init__.py +7 -0
- lfx/components/homeassistant/home_assistant_control.py +152 -0
- lfx/components/homeassistant/list_home_assistant_states.py +137 -0
- lfx/components/huggingface/__init__.py +37 -0
- lfx/components/huggingface/huggingface.py +197 -0
- lfx/components/huggingface/huggingface_inference_api.py +106 -0
- lfx/components/ibm/__init__.py +34 -0
- lfx/components/ibm/watsonx.py +203 -0
- lfx/components/ibm/watsonx_embeddings.py +135 -0
- lfx/components/icosacomputing/__init__.py +5 -0
- lfx/components/icosacomputing/combinatorial_reasoner.py +84 -0
- lfx/components/input_output/__init__.py +38 -0
- lfx/components/input_output/chat.py +120 -0
- lfx/components/input_output/chat_output.py +200 -0
- lfx/components/input_output/text.py +27 -0
- lfx/components/input_output/text_output.py +29 -0
- lfx/components/jigsawstack/__init__.py +23 -0
- lfx/components/jigsawstack/ai_scrape.py +126 -0
- lfx/components/jigsawstack/ai_web_search.py +136 -0
- lfx/components/jigsawstack/file_read.py +115 -0
- lfx/components/jigsawstack/file_upload.py +94 -0
- lfx/components/jigsawstack/image_generation.py +205 -0
- lfx/components/jigsawstack/nsfw.py +60 -0
- lfx/components/jigsawstack/object_detection.py +124 -0
- lfx/components/jigsawstack/sentiment.py +112 -0
- lfx/components/jigsawstack/text_to_sql.py +90 -0
- lfx/components/jigsawstack/text_translate.py +77 -0
- lfx/components/jigsawstack/vocr.py +107 -0
- lfx/components/langchain_utilities/__init__.py +109 -0
- lfx/components/langchain_utilities/character.py +53 -0
- lfx/components/langchain_utilities/conversation.py +59 -0
- lfx/components/langchain_utilities/csv_agent.py +107 -0
- lfx/components/langchain_utilities/fake_embeddings.py +26 -0
- lfx/components/langchain_utilities/html_link_extractor.py +35 -0
- lfx/components/langchain_utilities/json_agent.py +45 -0
- lfx/components/langchain_utilities/langchain_hub.py +126 -0
- lfx/components/langchain_utilities/language_recursive.py +49 -0
- lfx/components/langchain_utilities/language_semantic.py +138 -0
- lfx/components/langchain_utilities/llm_checker.py +39 -0
- lfx/components/langchain_utilities/llm_math.py +42 -0
- lfx/components/langchain_utilities/natural_language.py +61 -0
- lfx/components/langchain_utilities/openai_tools.py +53 -0
- lfx/components/langchain_utilities/openapi.py +48 -0
- lfx/components/langchain_utilities/recursive_character.py +60 -0
- lfx/components/langchain_utilities/retrieval_qa.py +83 -0
- lfx/components/langchain_utilities/runnable_executor.py +137 -0
- lfx/components/langchain_utilities/self_query.py +80 -0
- lfx/components/langchain_utilities/spider.py +142 -0
- lfx/components/langchain_utilities/sql.py +40 -0
- lfx/components/langchain_utilities/sql_database.py +35 -0
- lfx/components/langchain_utilities/sql_generator.py +78 -0
- lfx/components/langchain_utilities/tool_calling.py +59 -0
- lfx/components/langchain_utilities/vector_store_info.py +49 -0
- lfx/components/langchain_utilities/vector_store_router.py +33 -0
- lfx/components/langchain_utilities/xml_agent.py +71 -0
- lfx/components/langwatch/__init__.py +3 -0
- lfx/components/langwatch/langwatch.py +278 -0
- lfx/components/link_extractors/__init__.py +3 -0
- lfx/components/lmstudio/__init__.py +34 -0
- lfx/components/lmstudio/lmstudioembeddings.py +89 -0
- lfx/components/lmstudio/lmstudiomodel.py +129 -0
- lfx/components/logic/__init__.py +52 -0
- lfx/components/logic/conditional_router.py +171 -0
- lfx/components/logic/data_conditional_router.py +125 -0
- lfx/components/logic/flow_tool.py +110 -0
- lfx/components/logic/listen.py +29 -0
- lfx/components/logic/loop.py +125 -0
- lfx/components/logic/notify.py +88 -0
- lfx/components/logic/pass_message.py +35 -0
- lfx/components/logic/run_flow.py +71 -0
- lfx/components/logic/sub_flow.py +114 -0
- lfx/components/maritalk/__init__.py +32 -0
- lfx/components/maritalk/maritalk.py +52 -0
- lfx/components/mem0/__init__.py +3 -0
- lfx/components/mem0/mem0_chat_memory.py +136 -0
- lfx/components/milvus/__init__.py +34 -0
- lfx/components/milvus/milvus.py +115 -0
- lfx/components/mistral/__init__.py +37 -0
- lfx/components/mistral/mistral.py +114 -0
- lfx/components/mistral/mistral_embeddings.py +58 -0
- lfx/components/models/__init__.py +34 -0
- lfx/components/models/embedding_model.py +114 -0
- lfx/components/models/language_model.py +144 -0
- lfx/components/mongodb/__init__.py +34 -0
- lfx/components/mongodb/mongodb_atlas.py +213 -0
- lfx/components/needle/__init__.py +3 -0
- lfx/components/needle/needle.py +104 -0
- lfx/components/notdiamond/__init__.py +34 -0
- lfx/components/notdiamond/notdiamond.py +228 -0
- lfx/components/novita/__init__.py +32 -0
- lfx/components/novita/novita.py +130 -0
- lfx/components/nvidia/__init__.py +57 -0
- lfx/components/nvidia/nvidia.py +157 -0
- lfx/components/nvidia/nvidia_embedding.py +77 -0
- lfx/components/nvidia/nvidia_ingest.py +317 -0
- lfx/components/nvidia/nvidia_rerank.py +63 -0
- lfx/components/nvidia/system_assist.py +65 -0
- lfx/components/olivya/__init__.py +3 -0
- lfx/components/olivya/olivya.py +116 -0
- lfx/components/ollama/__init__.py +37 -0
- lfx/components/ollama/ollama.py +330 -0
- lfx/components/ollama/ollama_embeddings.py +106 -0
- lfx/components/openai/__init__.py +37 -0
- lfx/components/openai/openai.py +100 -0
- lfx/components/openai/openai_chat_model.py +176 -0
- lfx/components/openrouter/__init__.py +32 -0
- lfx/components/openrouter/openrouter.py +202 -0
- lfx/components/output_parsers/__init__.py +3 -0
- lfx/components/perplexity/__init__.py +34 -0
- lfx/components/perplexity/perplexity.py +75 -0
- lfx/components/pgvector/__init__.py +34 -0
- lfx/components/pgvector/pgvector.py +72 -0
- lfx/components/pinecone/__init__.py +34 -0
- lfx/components/pinecone/pinecone.py +134 -0
- lfx/components/processing/__init__.py +117 -0
- lfx/components/processing/alter_metadata.py +108 -0
- lfx/components/processing/batch_run.py +205 -0
- lfx/components/processing/combine_text.py +39 -0
- lfx/components/processing/converter.py +159 -0
- lfx/components/processing/create_data.py +110 -0
- lfx/components/processing/data_operations.py +438 -0
- lfx/components/processing/data_to_dataframe.py +70 -0
- lfx/components/processing/dataframe_operations.py +313 -0
- lfx/components/processing/extract_key.py +53 -0
- lfx/components/processing/filter_data.py +42 -0
- lfx/components/processing/filter_data_values.py +88 -0
- lfx/components/processing/json_cleaner.py +103 -0
- lfx/components/processing/lambda_filter.py +154 -0
- lfx/components/processing/llm_router.py +499 -0
- lfx/components/processing/merge_data.py +90 -0
- lfx/components/processing/message_to_data.py +36 -0
- lfx/components/processing/parse_data.py +70 -0
- lfx/components/processing/parse_dataframe.py +68 -0
- lfx/components/processing/parse_json_data.py +90 -0
- lfx/components/processing/parser.py +143 -0
- lfx/components/processing/prompt.py +67 -0
- lfx/components/processing/python_repl_core.py +98 -0
- lfx/components/processing/regex.py +82 -0
- lfx/components/processing/save_file.py +225 -0
- lfx/components/processing/select_data.py +48 -0
- lfx/components/processing/split_text.py +141 -0
- lfx/components/processing/structured_output.py +202 -0
- lfx/components/processing/update_data.py +160 -0
- lfx/components/prototypes/__init__.py +34 -0
- lfx/components/prototypes/python_function.py +73 -0
- lfx/components/qdrant/__init__.py +34 -0
- lfx/components/qdrant/qdrant.py +109 -0
- lfx/components/redis/__init__.py +37 -0
- lfx/components/redis/redis.py +89 -0
- lfx/components/redis/redis_chat.py +43 -0
- lfx/components/sambanova/__init__.py +32 -0
- lfx/components/sambanova/sambanova.py +84 -0
- lfx/components/scrapegraph/__init__.py +40 -0
- lfx/components/scrapegraph/scrapegraph_markdownify_api.py +64 -0
- lfx/components/scrapegraph/scrapegraph_search_api.py +64 -0
- lfx/components/scrapegraph/scrapegraph_smart_scraper_api.py +71 -0
- lfx/components/searchapi/__init__.py +34 -0
- lfx/components/searchapi/search.py +79 -0
- lfx/components/serpapi/__init__.py +3 -0
- lfx/components/serpapi/serp.py +115 -0
- lfx/components/supabase/__init__.py +34 -0
- lfx/components/supabase/supabase.py +76 -0
- lfx/components/tavily/__init__.py +4 -0
- lfx/components/tavily/tavily_extract.py +117 -0
- lfx/components/tavily/tavily_search.py +212 -0
- lfx/components/textsplitters/__init__.py +3 -0
- lfx/components/toolkits/__init__.py +3 -0
- lfx/components/tools/__init__.py +72 -0
- lfx/components/tools/calculator.py +108 -0
- lfx/components/tools/google_search_api.py +45 -0
- lfx/components/tools/google_serper_api.py +115 -0
- lfx/components/tools/python_code_structured_tool.py +327 -0
- lfx/components/tools/python_repl.py +97 -0
- lfx/components/tools/search_api.py +87 -0
- lfx/components/tools/searxng.py +145 -0
- lfx/components/tools/serp_api.py +119 -0
- lfx/components/tools/tavily_search_tool.py +344 -0
- lfx/components/tools/wikidata_api.py +102 -0
- lfx/components/tools/wikipedia_api.py +49 -0
- lfx/components/tools/yahoo_finance.py +129 -0
- lfx/components/twelvelabs/__init__.py +52 -0
- lfx/components/twelvelabs/convert_astra_results.py +84 -0
- lfx/components/twelvelabs/pegasus_index.py +311 -0
- lfx/components/twelvelabs/split_video.py +291 -0
- lfx/components/twelvelabs/text_embeddings.py +57 -0
- lfx/components/twelvelabs/twelvelabs_pegasus.py +408 -0
- lfx/components/twelvelabs/video_embeddings.py +100 -0
- lfx/components/twelvelabs/video_file.py +179 -0
- lfx/components/unstructured/__init__.py +3 -0
- lfx/components/unstructured/unstructured.py +121 -0
- lfx/components/upstash/__init__.py +34 -0
- lfx/components/upstash/upstash.py +124 -0
- lfx/components/vectara/__init__.py +37 -0
- lfx/components/vectara/vectara.py +97 -0
- lfx/components/vectara/vectara_rag.py +164 -0
- lfx/components/vectorstores/__init__.py +40 -0
- lfx/components/vectorstores/astradb.py +1285 -0
- lfx/components/vectorstores/astradb_graph.py +319 -0
- lfx/components/vectorstores/cassandra.py +264 -0
- lfx/components/vectorstores/cassandra_graph.py +238 -0
- lfx/components/vectorstores/chroma.py +167 -0
- lfx/components/vectorstores/clickhouse.py +135 -0
- lfx/components/vectorstores/couchbase.py +102 -0
- lfx/components/vectorstores/elasticsearch.py +267 -0
- lfx/components/vectorstores/faiss.py +111 -0
- lfx/components/vectorstores/graph_rag.py +141 -0
- lfx/components/vectorstores/hcd.py +314 -0
- lfx/components/vectorstores/local_db.py +261 -0
- lfx/components/vectorstores/milvus.py +115 -0
- lfx/components/vectorstores/mongodb_atlas.py +213 -0
- lfx/components/vectorstores/opensearch.py +243 -0
- lfx/components/vectorstores/pgvector.py +72 -0
- lfx/components/vectorstores/pinecone.py +134 -0
- lfx/components/vectorstores/qdrant.py +109 -0
- lfx/components/vectorstores/supabase.py +76 -0
- lfx/components/vectorstores/upstash.py +124 -0
- lfx/components/vectorstores/vectara.py +97 -0
- lfx/components/vectorstores/vectara_rag.py +164 -0
- lfx/components/vectorstores/weaviate.py +89 -0
- lfx/components/vertexai/__init__.py +37 -0
- lfx/components/vertexai/vertexai.py +71 -0
- lfx/components/vertexai/vertexai_embeddings.py +67 -0
- lfx/components/weaviate/__init__.py +34 -0
- lfx/components/weaviate/weaviate.py +89 -0
- lfx/components/wikipedia/__init__.py +4 -0
- lfx/components/wikipedia/wikidata.py +86 -0
- lfx/components/wikipedia/wikipedia.py +53 -0
- lfx/components/wolframalpha/__init__.py +3 -0
- lfx/components/wolframalpha/wolfram_alpha_api.py +54 -0
- lfx/components/xai/__init__.py +32 -0
- lfx/components/xai/xai.py +167 -0
- lfx/components/yahoosearch/__init__.py +3 -0
- lfx/components/yahoosearch/yahoo.py +137 -0
- lfx/components/youtube/__init__.py +52 -0
- lfx/components/youtube/channel.py +227 -0
- lfx/components/youtube/comments.py +231 -0
- lfx/components/youtube/playlist.py +33 -0
- lfx/components/youtube/search.py +120 -0
- lfx/components/youtube/trending.py +285 -0
- lfx/components/youtube/video_details.py +263 -0
- lfx/components/youtube/youtube_transcripts.py +118 -0
- lfx/components/zep/__init__.py +3 -0
- lfx/components/zep/zep.py +44 -0
- lfx/constants.py +6 -0
- lfx/custom/__init__.py +7 -0
- lfx/custom/attributes.py +86 -0
- lfx/custom/code_parser/__init__.py +3 -0
- lfx/custom/code_parser/code_parser.py +361 -0
- lfx/custom/custom_component/__init__.py +0 -0
- lfx/custom/custom_component/base_component.py +128 -0
- lfx/custom/custom_component/component.py +1808 -0
- lfx/custom/custom_component/component_with_cache.py +8 -0
- lfx/custom/custom_component/custom_component.py +588 -0
- lfx/custom/dependency_analyzer.py +165 -0
- lfx/custom/directory_reader/__init__.py +3 -0
- lfx/custom/directory_reader/directory_reader.py +359 -0
- lfx/custom/directory_reader/utils.py +171 -0
- lfx/custom/eval.py +12 -0
- lfx/custom/schema.py +32 -0
- lfx/custom/tree_visitor.py +21 -0
- lfx/custom/utils.py +877 -0
- lfx/custom/validate.py +488 -0
- lfx/events/__init__.py +1 -0
- lfx/events/event_manager.py +110 -0
- lfx/exceptions/__init__.py +0 -0
- lfx/exceptions/component.py +15 -0
- lfx/field_typing/__init__.py +91 -0
- lfx/field_typing/constants.py +215 -0
- lfx/field_typing/range_spec.py +35 -0
- lfx/graph/__init__.py +6 -0
- lfx/graph/edge/__init__.py +0 -0
- lfx/graph/edge/base.py +277 -0
- lfx/graph/edge/schema.py +119 -0
- lfx/graph/edge/utils.py +0 -0
- lfx/graph/graph/__init__.py +0 -0
- lfx/graph/graph/ascii.py +202 -0
- lfx/graph/graph/base.py +2238 -0
- lfx/graph/graph/constants.py +63 -0
- lfx/graph/graph/runnable_vertices_manager.py +133 -0
- lfx/graph/graph/schema.py +52 -0
- lfx/graph/graph/state_model.py +66 -0
- lfx/graph/graph/utils.py +1024 -0
- lfx/graph/schema.py +75 -0
- lfx/graph/state/__init__.py +0 -0
- lfx/graph/state/model.py +237 -0
- lfx/graph/utils.py +200 -0
- lfx/graph/vertex/__init__.py +0 -0
- lfx/graph/vertex/base.py +823 -0
- lfx/graph/vertex/constants.py +0 -0
- lfx/graph/vertex/exceptions.py +4 -0
- lfx/graph/vertex/param_handler.py +264 -0
- lfx/graph/vertex/schema.py +26 -0
- lfx/graph/vertex/utils.py +19 -0
- lfx/graph/vertex/vertex_types.py +489 -0
- lfx/helpers/__init__.py +1 -0
- lfx/helpers/base_model.py +71 -0
- lfx/helpers/custom.py +13 -0
- lfx/helpers/data.py +167 -0
- lfx/helpers/flow.py +194 -0
- lfx/inputs/__init__.py +68 -0
- lfx/inputs/constants.py +2 -0
- lfx/inputs/input_mixin.py +328 -0
- lfx/inputs/inputs.py +714 -0
- lfx/inputs/validators.py +19 -0
- lfx/interface/__init__.py +6 -0
- lfx/interface/components.py +489 -0
- lfx/interface/importing/__init__.py +5 -0
- lfx/interface/importing/utils.py +39 -0
- lfx/interface/initialize/__init__.py +3 -0
- lfx/interface/initialize/loading.py +224 -0
- lfx/interface/listing.py +26 -0
- lfx/interface/run.py +16 -0
- lfx/interface/utils.py +111 -0
- lfx/io/__init__.py +63 -0
- lfx/io/schema.py +289 -0
- lfx/load/__init__.py +8 -0
- lfx/load/load.py +256 -0
- lfx/load/utils.py +99 -0
- lfx/log/__init__.py +5 -0
- lfx/log/logger.py +385 -0
- lfx/memory/__init__.py +90 -0
- lfx/memory/stubs.py +283 -0
- lfx/processing/__init__.py +1 -0
- lfx/processing/process.py +238 -0
- lfx/processing/utils.py +25 -0
- lfx/py.typed +0 -0
- lfx/schema/__init__.py +66 -0
- lfx/schema/artifact.py +83 -0
- lfx/schema/content_block.py +62 -0
- lfx/schema/content_types.py +91 -0
- lfx/schema/data.py +308 -0
- lfx/schema/dataframe.py +210 -0
- lfx/schema/dotdict.py +74 -0
- lfx/schema/encoders.py +13 -0
- lfx/schema/graph.py +47 -0
- lfx/schema/image.py +131 -0
- lfx/schema/json_schema.py +141 -0
- lfx/schema/log.py +61 -0
- lfx/schema/message.py +473 -0
- lfx/schema/openai_responses_schemas.py +74 -0
- lfx/schema/properties.py +41 -0
- lfx/schema/schema.py +171 -0
- lfx/schema/serialize.py +13 -0
- lfx/schema/table.py +140 -0
- lfx/schema/validators.py +114 -0
- lfx/serialization/__init__.py +5 -0
- lfx/serialization/constants.py +2 -0
- lfx/serialization/serialization.py +314 -0
- lfx/services/__init__.py +23 -0
- lfx/services/base.py +28 -0
- lfx/services/cache/__init__.py +6 -0
- lfx/services/cache/base.py +183 -0
- lfx/services/cache/service.py +166 -0
- lfx/services/cache/utils.py +169 -0
- lfx/services/chat/__init__.py +1 -0
- lfx/services/chat/config.py +2 -0
- lfx/services/chat/schema.py +10 -0
- lfx/services/deps.py +129 -0
- lfx/services/factory.py +19 -0
- lfx/services/initialize.py +19 -0
- lfx/services/interfaces.py +103 -0
- lfx/services/manager.py +172 -0
- lfx/services/schema.py +20 -0
- lfx/services/session.py +82 -0
- lfx/services/settings/__init__.py +3 -0
- lfx/services/settings/auth.py +130 -0
- lfx/services/settings/base.py +539 -0
- lfx/services/settings/constants.py +31 -0
- lfx/services/settings/factory.py +23 -0
- lfx/services/settings/feature_flags.py +12 -0
- lfx/services/settings/service.py +35 -0
- lfx/services/settings/utils.py +40 -0
- lfx/services/shared_component_cache/__init__.py +1 -0
- lfx/services/shared_component_cache/factory.py +30 -0
- lfx/services/shared_component_cache/service.py +9 -0
- lfx/services/storage/__init__.py +5 -0
- lfx/services/storage/local.py +155 -0
- lfx/services/storage/service.py +54 -0
- lfx/services/tracing/__init__.py +1 -0
- lfx/services/tracing/service.py +21 -0
- lfx/settings.py +6 -0
- lfx/template/__init__.py +6 -0
- lfx/template/field/__init__.py +0 -0
- lfx/template/field/base.py +257 -0
- lfx/template/field/prompt.py +15 -0
- lfx/template/frontend_node/__init__.py +6 -0
- lfx/template/frontend_node/base.py +212 -0
- lfx/template/frontend_node/constants.py +65 -0
- lfx/template/frontend_node/custom_components.py +79 -0
- lfx/template/template/__init__.py +0 -0
- lfx/template/template/base.py +100 -0
- lfx/template/utils.py +217 -0
- lfx/type_extraction/__init__.py +19 -0
- lfx/type_extraction/type_extraction.py +75 -0
- lfx/type_extraction.py +80 -0
- lfx/utils/__init__.py +1 -0
- lfx/utils/async_helpers.py +42 -0
- lfx/utils/component_utils.py +154 -0
- lfx/utils/concurrency.py +60 -0
- lfx/utils/connection_string_parser.py +11 -0
- lfx/utils/constants.py +205 -0
- lfx/utils/data_structure.py +212 -0
- lfx/utils/exceptions.py +22 -0
- lfx/utils/helpers.py +28 -0
- lfx/utils/image.py +73 -0
- lfx/utils/lazy_load.py +15 -0
- lfx/utils/request_utils.py +18 -0
- lfx/utils/schemas.py +139 -0
- lfx/utils/util.py +481 -0
- lfx/utils/util_strings.py +56 -0
- lfx/utils/version.py +24 -0
- lfx_nightly-0.1.11.dev0.dist-info/METADATA +293 -0
- lfx_nightly-0.1.11.dev0.dist-info/RECORD +699 -0
- lfx_nightly-0.1.11.dev0.dist-info/WHEEL +4 -0
- lfx_nightly-0.1.11.dev0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,186 @@
|
|
1
|
+
import json
|
2
|
+
|
3
|
+
import tiktoken
|
4
|
+
from docling_core.transforms.chunker import BaseChunker, DocMeta
|
5
|
+
from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker
|
6
|
+
|
7
|
+
from lfx.base.data.docling_utils import extract_docling_documents
|
8
|
+
from lfx.custom import Component
|
9
|
+
from lfx.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output, StrInput
|
10
|
+
from lfx.schema import Data, DataFrame
|
11
|
+
|
12
|
+
|
13
|
+
class ChunkDoclingDocumentComponent(Component):
|
14
|
+
display_name: str = "Chunk DoclingDocument"
|
15
|
+
description: str = "Use the DocumentDocument chunkers to split the document into chunks."
|
16
|
+
documentation = "https://docling-project.github.io/docling/concepts/chunking/"
|
17
|
+
icon = "Docling"
|
18
|
+
name = "ChunkDoclingDocument"
|
19
|
+
|
20
|
+
inputs = [
|
21
|
+
HandleInput(
|
22
|
+
name="data_inputs",
|
23
|
+
display_name="Data or DataFrame",
|
24
|
+
info="The data with documents to split in chunks.",
|
25
|
+
input_types=["Data", "DataFrame"],
|
26
|
+
required=True,
|
27
|
+
),
|
28
|
+
DropdownInput(
|
29
|
+
name="chunker",
|
30
|
+
display_name="Chunker",
|
31
|
+
options=["HybridChunker", "HierarchicalChunker"],
|
32
|
+
info=("Which chunker to use."),
|
33
|
+
value="HybridChunker",
|
34
|
+
real_time_refresh=True,
|
35
|
+
),
|
36
|
+
DropdownInput(
|
37
|
+
name="provider",
|
38
|
+
display_name="Provider",
|
39
|
+
options=["Hugging Face", "OpenAI"],
|
40
|
+
info=("Which tokenizer provider."),
|
41
|
+
value="Hugging Face",
|
42
|
+
show=True,
|
43
|
+
real_time_refresh=True,
|
44
|
+
advanced=True,
|
45
|
+
dynamic=True,
|
46
|
+
),
|
47
|
+
StrInput(
|
48
|
+
name="hf_model_name",
|
49
|
+
display_name="HF model name",
|
50
|
+
info=(
|
51
|
+
"Model name of the tokenizer to use with the HybridChunker when Hugging Face is chosen as a tokenizer."
|
52
|
+
),
|
53
|
+
value="sentence-transformers/all-MiniLM-L6-v2",
|
54
|
+
show=True,
|
55
|
+
advanced=True,
|
56
|
+
dynamic=True,
|
57
|
+
),
|
58
|
+
StrInput(
|
59
|
+
name="openai_model_name",
|
60
|
+
display_name="OpenAI model name",
|
61
|
+
info=("Model name of the tokenizer to use with the HybridChunker when OpenAI is chosen as a tokenizer."),
|
62
|
+
value="gpt-4o",
|
63
|
+
show=False,
|
64
|
+
advanced=True,
|
65
|
+
dynamic=True,
|
66
|
+
),
|
67
|
+
IntInput(
|
68
|
+
name="max_tokens",
|
69
|
+
display_name="Maximum tokens",
|
70
|
+
info=("Maximum number of tokens for the HybridChunker."),
|
71
|
+
show=True,
|
72
|
+
required=False,
|
73
|
+
advanced=True,
|
74
|
+
dynamic=True,
|
75
|
+
),
|
76
|
+
MessageTextInput(
|
77
|
+
name="doc_key",
|
78
|
+
display_name="Doc Key",
|
79
|
+
info="The key to use for the DoclingDocument column.",
|
80
|
+
value="doc",
|
81
|
+
advanced=True,
|
82
|
+
),
|
83
|
+
]
|
84
|
+
|
85
|
+
outputs = [
|
86
|
+
Output(display_name="DataFrame", name="dataframe", method="chunk_documents"),
|
87
|
+
]
|
88
|
+
|
89
|
+
def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:
|
90
|
+
if field_name == "chunker":
|
91
|
+
provider_type = build_config["provider"]["value"]
|
92
|
+
is_hf = provider_type == "Hugging Face"
|
93
|
+
is_openai = provider_type == "OpenAI"
|
94
|
+
if field_value == "HybridChunker":
|
95
|
+
build_config["provider"]["show"] = True
|
96
|
+
build_config["hf_model_name"]["show"] = is_hf
|
97
|
+
build_config["openai_model_name"]["show"] = is_openai
|
98
|
+
build_config["max_tokens"]["show"] = True
|
99
|
+
else:
|
100
|
+
build_config["provider"]["show"] = False
|
101
|
+
build_config["hf_model_name"]["show"] = False
|
102
|
+
build_config["openai_model_name"]["show"] = False
|
103
|
+
build_config["max_tokens"]["show"] = False
|
104
|
+
elif field_name == "provider" and build_config["chunker"]["value"] == "HybridChunker":
|
105
|
+
if field_value == "Hugging Face":
|
106
|
+
build_config["hf_model_name"]["show"] = True
|
107
|
+
build_config["openai_model_name"]["show"] = False
|
108
|
+
elif field_value == "OpenAI":
|
109
|
+
build_config["hf_model_name"]["show"] = False
|
110
|
+
build_config["openai_model_name"]["show"] = True
|
111
|
+
|
112
|
+
return build_config
|
113
|
+
|
114
|
+
def _docs_to_data(self, docs) -> list[Data]:
|
115
|
+
return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]
|
116
|
+
|
117
|
+
def chunk_documents(self) -> DataFrame:
|
118
|
+
documents = extract_docling_documents(self.data_inputs, self.doc_key)
|
119
|
+
|
120
|
+
chunker: BaseChunker
|
121
|
+
if self.chunker == "HybridChunker":
|
122
|
+
try:
|
123
|
+
from docling_core.transforms.chunker.hybrid_chunker import HybridChunker
|
124
|
+
except ImportError as e:
|
125
|
+
msg = (
|
126
|
+
"HybridChunker is not installed. Please install it with `uv pip install docling-core[chunking] "
|
127
|
+
"or `uv pip install transformers`"
|
128
|
+
)
|
129
|
+
raise ImportError(msg) from e
|
130
|
+
max_tokens: int | None = self.max_tokens if self.max_tokens else None
|
131
|
+
if self.provider == "Hugging Face":
|
132
|
+
try:
|
133
|
+
from docling_core.transforms.chunker.tokenizer.huggingface import HuggingFaceTokenizer
|
134
|
+
except ImportError as e:
|
135
|
+
msg = (
|
136
|
+
"HuggingFaceTokenizer is not installed."
|
137
|
+
" Please install it with `uv pip install docling-core[chunking]`"
|
138
|
+
)
|
139
|
+
raise ImportError(msg) from e
|
140
|
+
tokenizer = HuggingFaceTokenizer.from_pretrained(
|
141
|
+
model_name=self.hf_model_name,
|
142
|
+
max_tokens=max_tokens,
|
143
|
+
)
|
144
|
+
elif self.provider == "OpenAI":
|
145
|
+
try:
|
146
|
+
from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer
|
147
|
+
except ImportError as e:
|
148
|
+
msg = (
|
149
|
+
"OpenAITokenizer is not installed."
|
150
|
+
" Please install it with `uv pip install docling-core[chunking]`"
|
151
|
+
" or `uv pip install transformers`"
|
152
|
+
)
|
153
|
+
raise ImportError(msg) from e
|
154
|
+
if max_tokens is None:
|
155
|
+
max_tokens = 128 * 1024 # context window length required for OpenAI tokenizers
|
156
|
+
tokenizer = OpenAITokenizer(
|
157
|
+
tokenizer=tiktoken.encoding_for_model(self.openai_model_name), max_tokens=max_tokens
|
158
|
+
)
|
159
|
+
chunker = HybridChunker(
|
160
|
+
tokenizer=tokenizer,
|
161
|
+
)
|
162
|
+
elif self.chunker == "HierarchicalChunker":
|
163
|
+
chunker = HierarchicalChunker()
|
164
|
+
|
165
|
+
results: list[Data] = []
|
166
|
+
try:
|
167
|
+
for doc in documents:
|
168
|
+
for chunk in chunker.chunk(dl_doc=doc):
|
169
|
+
enriched_text = chunker.contextualize(chunk=chunk)
|
170
|
+
meta = DocMeta.model_validate(chunk.meta)
|
171
|
+
|
172
|
+
results.append(
|
173
|
+
Data(
|
174
|
+
data={
|
175
|
+
"text": enriched_text,
|
176
|
+
"document_id": f"{doc.origin.binary_hash}",
|
177
|
+
"doc_items": json.dumps([item.self_ref for item in meta.doc_items]),
|
178
|
+
}
|
179
|
+
)
|
180
|
+
)
|
181
|
+
|
182
|
+
except Exception as e:
|
183
|
+
msg = f"Error splitting text: {e}"
|
184
|
+
raise TypeError(msg) from e
|
185
|
+
|
186
|
+
return DataFrame(results)
|
@@ -0,0 +1,231 @@
|
|
1
|
+
import time
|
2
|
+
from multiprocessing import Queue, get_context
|
3
|
+
from queue import Empty
|
4
|
+
|
5
|
+
from lfx.base.data import BaseFileComponent
|
6
|
+
from lfx.base.data.docling_utils import docling_worker
|
7
|
+
from lfx.inputs import DropdownInput
|
8
|
+
from lfx.schema import Data
|
9
|
+
|
10
|
+
|
11
|
+
class DoclingInlineComponent(BaseFileComponent):
|
12
|
+
display_name = "Docling"
|
13
|
+
description = "Uses Docling to process input documents running the Docling models locally."
|
14
|
+
documentation = "https://docling-project.github.io/docling/"
|
15
|
+
trace_type = "tool"
|
16
|
+
icon = "Docling"
|
17
|
+
name = "DoclingInline"
|
18
|
+
|
19
|
+
# https://docling-project.github.io/docling/usage/supported_formats/
|
20
|
+
VALID_EXTENSIONS = [
|
21
|
+
"adoc",
|
22
|
+
"asciidoc",
|
23
|
+
"asc",
|
24
|
+
"bmp",
|
25
|
+
"csv",
|
26
|
+
"dotx",
|
27
|
+
"dotm",
|
28
|
+
"docm",
|
29
|
+
"docx",
|
30
|
+
"htm",
|
31
|
+
"html",
|
32
|
+
"jpeg",
|
33
|
+
"json",
|
34
|
+
"md",
|
35
|
+
"pdf",
|
36
|
+
"png",
|
37
|
+
"potx",
|
38
|
+
"ppsx",
|
39
|
+
"pptm",
|
40
|
+
"potm",
|
41
|
+
"ppsm",
|
42
|
+
"pptx",
|
43
|
+
"tiff",
|
44
|
+
"txt",
|
45
|
+
"xls",
|
46
|
+
"xlsx",
|
47
|
+
"xhtml",
|
48
|
+
"xml",
|
49
|
+
"webp",
|
50
|
+
]
|
51
|
+
|
52
|
+
inputs = [
|
53
|
+
*BaseFileComponent.get_base_inputs(),
|
54
|
+
DropdownInput(
|
55
|
+
name="pipeline",
|
56
|
+
display_name="Pipeline",
|
57
|
+
info="Docling pipeline to use",
|
58
|
+
options=["standard", "vlm"],
|
59
|
+
real_time_refresh=False,
|
60
|
+
value="standard",
|
61
|
+
),
|
62
|
+
DropdownInput(
|
63
|
+
name="ocr_engine",
|
64
|
+
display_name="OCR Engine",
|
65
|
+
info="OCR engine to use. None will disable OCR.",
|
66
|
+
options=["None", "easyocr", "tesserocr", "rapidocr", "ocrmac"],
|
67
|
+
real_time_refresh=False,
|
68
|
+
value="None",
|
69
|
+
),
|
70
|
+
# TODO: expose more Docling options
|
71
|
+
]
|
72
|
+
|
73
|
+
outputs = [
|
74
|
+
*BaseFileComponent.get_base_outputs(),
|
75
|
+
]
|
76
|
+
|
77
|
+
def _wait_for_result_with_process_monitoring(self, queue: Queue, proc, timeout: int = 300):
|
78
|
+
"""Wait for result from queue while monitoring process health.
|
79
|
+
|
80
|
+
Handles cases where process crashes without sending result.
|
81
|
+
"""
|
82
|
+
start_time = time.time()
|
83
|
+
|
84
|
+
while time.time() - start_time < timeout:
|
85
|
+
# Check if process is still alive
|
86
|
+
if not proc.is_alive():
|
87
|
+
# Process died, try to get any result it might have sent
|
88
|
+
try:
|
89
|
+
result = queue.get_nowait()
|
90
|
+
except Empty:
|
91
|
+
# Process died without sending result
|
92
|
+
msg = f"Worker process crashed unexpectedly without producing result. Exit code: {proc.exitcode}"
|
93
|
+
raise RuntimeError(msg) from None
|
94
|
+
else:
|
95
|
+
self.log("Process completed and result retrieved")
|
96
|
+
return result
|
97
|
+
|
98
|
+
# Poll the queue instead of blocking
|
99
|
+
try:
|
100
|
+
result = queue.get(timeout=1)
|
101
|
+
except Empty:
|
102
|
+
# No result yet, continue monitoring
|
103
|
+
continue
|
104
|
+
else:
|
105
|
+
self.log("Result received from worker process")
|
106
|
+
return result
|
107
|
+
|
108
|
+
# Overall timeout reached
|
109
|
+
msg = f"Process timed out after {timeout} seconds"
|
110
|
+
raise TimeoutError(msg)
|
111
|
+
|
112
|
+
def _terminate_process_gracefully(self, proc, timeout_terminate: int = 10, timeout_kill: int = 5):
|
113
|
+
"""Terminate process gracefully with escalating signals.
|
114
|
+
|
115
|
+
First tries SIGTERM, then SIGKILL if needed.
|
116
|
+
"""
|
117
|
+
if not proc.is_alive():
|
118
|
+
return
|
119
|
+
|
120
|
+
self.log("Attempting graceful process termination with SIGTERM")
|
121
|
+
proc.terminate() # Send SIGTERM
|
122
|
+
proc.join(timeout=timeout_terminate)
|
123
|
+
|
124
|
+
if proc.is_alive():
|
125
|
+
self.log("Process didn't respond to SIGTERM, using SIGKILL")
|
126
|
+
proc.kill() # Send SIGKILL
|
127
|
+
proc.join(timeout=timeout_kill)
|
128
|
+
|
129
|
+
if proc.is_alive():
|
130
|
+
self.log("Warning: Process still alive after SIGKILL")
|
131
|
+
|
132
|
+
def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
|
133
|
+
try:
|
134
|
+
from docling.datamodel.base_models import InputFormat
|
135
|
+
from docling.datamodel.pipeline_options import OcrOptions, PdfPipelineOptions, VlmPipelineOptions
|
136
|
+
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
|
137
|
+
from docling.models.factories import get_ocr_factory
|
138
|
+
from docling.pipeline.vlm_pipeline import VlmPipeline
|
139
|
+
except ImportError as e:
|
140
|
+
msg = (
|
141
|
+
"Docling is an optional dependency. Install with `uv pip install 'langflow[docling]'` or refer to the "
|
142
|
+
"documentation on how to install optional dependencies."
|
143
|
+
)
|
144
|
+
raise ImportError(msg) from e
|
145
|
+
|
146
|
+
# Configure the standard PDF pipeline
|
147
|
+
def _get_standard_opts() -> PdfPipelineOptions:
|
148
|
+
pipeline_options = PdfPipelineOptions()
|
149
|
+
pipeline_options.do_ocr = self.ocr_engine != "None"
|
150
|
+
if pipeline_options.do_ocr:
|
151
|
+
ocr_factory = get_ocr_factory(
|
152
|
+
allow_external_plugins=False,
|
153
|
+
)
|
154
|
+
|
155
|
+
ocr_options: OcrOptions = ocr_factory.create_options(
|
156
|
+
kind=self.ocr_engine,
|
157
|
+
)
|
158
|
+
pipeline_options.ocr_options = ocr_options
|
159
|
+
return pipeline_options
|
160
|
+
|
161
|
+
# Configure the VLM pipeline
|
162
|
+
def _get_vlm_opts() -> VlmPipelineOptions:
|
163
|
+
return VlmPipelineOptions()
|
164
|
+
|
165
|
+
# Configure the main format options and create the DocumentConverter()
|
166
|
+
def _get_converter() -> DocumentConverter:
|
167
|
+
if self.pipeline == "standard":
|
168
|
+
pdf_format_option = PdfFormatOption(
|
169
|
+
pipeline_options=_get_standard_opts(),
|
170
|
+
)
|
171
|
+
elif self.pipeline == "vlm":
|
172
|
+
pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
|
173
|
+
|
174
|
+
format_options: dict[InputFormat, FormatOption] = {
|
175
|
+
InputFormat.PDF: pdf_format_option,
|
176
|
+
InputFormat.IMAGE: pdf_format_option,
|
177
|
+
}
|
178
|
+
|
179
|
+
return DocumentConverter(format_options=format_options)
|
180
|
+
|
181
|
+
file_paths = [file.path for file in file_list if file.path]
|
182
|
+
|
183
|
+
if not file_paths:
|
184
|
+
self.log("No files to process.")
|
185
|
+
return file_list
|
186
|
+
|
187
|
+
ctx = get_context("spawn")
|
188
|
+
queue: Queue = ctx.Queue()
|
189
|
+
proc = ctx.Process(
|
190
|
+
target=docling_worker,
|
191
|
+
args=(file_paths, queue, self.pipeline, self.ocr_engine),
|
192
|
+
)
|
193
|
+
|
194
|
+
result = None
|
195
|
+
proc.start()
|
196
|
+
|
197
|
+
try:
|
198
|
+
result = self._wait_for_result_with_process_monitoring(queue, proc, timeout=300)
|
199
|
+
except KeyboardInterrupt:
|
200
|
+
self.log("Docling process cancelled by user")
|
201
|
+
result = []
|
202
|
+
except Exception as e:
|
203
|
+
self.log(f"Error during processing: {e}")
|
204
|
+
raise
|
205
|
+
finally:
|
206
|
+
# Improved cleanup with graceful termination
|
207
|
+
try:
|
208
|
+
self._terminate_process_gracefully(proc)
|
209
|
+
finally:
|
210
|
+
# Always close and cleanup queue resources
|
211
|
+
try:
|
212
|
+
queue.close()
|
213
|
+
queue.join_thread()
|
214
|
+
except Exception as e: # noqa: BLE001
|
215
|
+
# Ignore cleanup errors, but log them
|
216
|
+
self.log(f"Warning: Error during queue cleanup - {e}")
|
217
|
+
|
218
|
+
# Check if there was an error in the worker
|
219
|
+
if isinstance(result, dict) and "error" in result:
|
220
|
+
msg = result["error"]
|
221
|
+
if msg.startswith("Docling is not installed"):
|
222
|
+
raise ImportError(msg)
|
223
|
+
# Handle interrupt gracefully - return empty result instead of raising error
|
224
|
+
if "Worker interrupted by SIGINT" in msg or "shutdown" in result:
|
225
|
+
self.log("Docling process cancelled by user")
|
226
|
+
result = []
|
227
|
+
else:
|
228
|
+
raise RuntimeError(msg)
|
229
|
+
|
230
|
+
processed_data = [Data(data={"doc": r["document"], "file_path": r["file_path"]}) if r else None for r in result]
|
231
|
+
return self.rollup_data(file_list, processed_data)
|
@@ -0,0 +1,193 @@
|
|
1
|
+
import base64
|
2
|
+
import time
|
3
|
+
from concurrent.futures import Future, ThreadPoolExecutor
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Any
|
6
|
+
|
7
|
+
import httpx
|
8
|
+
from docling_core.types.doc import DoclingDocument
|
9
|
+
from pydantic import ValidationError
|
10
|
+
|
11
|
+
from lfx.base.data import BaseFileComponent
|
12
|
+
from lfx.inputs import IntInput, NestedDictInput, StrInput
|
13
|
+
from lfx.inputs.inputs import FloatInput
|
14
|
+
from lfx.schema import Data
|
15
|
+
|
16
|
+
|
17
|
+
class DoclingRemoteComponent(BaseFileComponent):
|
18
|
+
display_name = "Docling Serve"
|
19
|
+
description = "Uses Docling to process input documents connecting to your instance of Docling Serve."
|
20
|
+
documentation = "https://docling-project.github.io/docling/"
|
21
|
+
trace_type = "tool"
|
22
|
+
icon = "Docling"
|
23
|
+
name = "DoclingRemote"
|
24
|
+
|
25
|
+
MAX_500_RETRIES = 5
|
26
|
+
|
27
|
+
# https://docling-project.github.io/docling/usage/supported_formats/
|
28
|
+
VALID_EXTENSIONS = [
|
29
|
+
"adoc",
|
30
|
+
"asciidoc",
|
31
|
+
"asc",
|
32
|
+
"bmp",
|
33
|
+
"csv",
|
34
|
+
"dotx",
|
35
|
+
"dotm",
|
36
|
+
"docm",
|
37
|
+
"docx",
|
38
|
+
"htm",
|
39
|
+
"html",
|
40
|
+
"jpeg",
|
41
|
+
"json",
|
42
|
+
"md",
|
43
|
+
"pdf",
|
44
|
+
"png",
|
45
|
+
"potx",
|
46
|
+
"ppsx",
|
47
|
+
"pptm",
|
48
|
+
"potm",
|
49
|
+
"ppsm",
|
50
|
+
"pptx",
|
51
|
+
"tiff",
|
52
|
+
"txt",
|
53
|
+
"xls",
|
54
|
+
"xlsx",
|
55
|
+
"xhtml",
|
56
|
+
"xml",
|
57
|
+
"webp",
|
58
|
+
]
|
59
|
+
|
60
|
+
inputs = [
|
61
|
+
*BaseFileComponent.get_base_inputs(),
|
62
|
+
StrInput(
|
63
|
+
name="api_url",
|
64
|
+
display_name="Server address",
|
65
|
+
info="URL of the Docling Serve instance.",
|
66
|
+
required=True,
|
67
|
+
),
|
68
|
+
IntInput(
|
69
|
+
name="max_concurrency",
|
70
|
+
display_name="Concurrency",
|
71
|
+
info="Maximum number of concurrent requests for the server.",
|
72
|
+
advanced=True,
|
73
|
+
value=2,
|
74
|
+
),
|
75
|
+
FloatInput(
|
76
|
+
name="max_poll_timeout",
|
77
|
+
display_name="Maximum poll time",
|
78
|
+
info="Maximum waiting time for the document conversion to complete.",
|
79
|
+
advanced=True,
|
80
|
+
value=3600,
|
81
|
+
),
|
82
|
+
NestedDictInput(
|
83
|
+
name="api_headers",
|
84
|
+
display_name="HTTP headers",
|
85
|
+
advanced=True,
|
86
|
+
required=False,
|
87
|
+
info=("Optional dictionary of additional headers required for connecting to Docling Serve."),
|
88
|
+
),
|
89
|
+
NestedDictInput(
|
90
|
+
name="docling_serve_opts",
|
91
|
+
display_name="Docling options",
|
92
|
+
advanced=True,
|
93
|
+
required=False,
|
94
|
+
info=(
|
95
|
+
"Optional dictionary of additional options. "
|
96
|
+
"See https://github.com/docling-project/docling-serve/blob/main/docs/usage.md for more information."
|
97
|
+
),
|
98
|
+
),
|
99
|
+
]
|
100
|
+
|
101
|
+
outputs = [
|
102
|
+
*BaseFileComponent.get_base_outputs(),
|
103
|
+
]
|
104
|
+
|
105
|
+
def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
|
106
|
+
base_url = f"{self.api_url}/v1alpha"
|
107
|
+
|
108
|
+
def _convert_document(client: httpx.Client, file_path: Path, options: dict[str, Any]) -> Data | None:
|
109
|
+
encoded_doc = base64.b64encode(file_path.read_bytes()).decode()
|
110
|
+
payload = {
|
111
|
+
"options": options,
|
112
|
+
"file_sources": [{"base64_string": encoded_doc, "filename": file_path.name}],
|
113
|
+
}
|
114
|
+
|
115
|
+
response = client.post(f"{base_url}/convert/source/async", json=payload)
|
116
|
+
response.raise_for_status()
|
117
|
+
task = response.json()
|
118
|
+
|
119
|
+
http_failures = 0
|
120
|
+
retry_status_start = 500
|
121
|
+
retry_status_end = 600
|
122
|
+
start_wait_time = time.monotonic()
|
123
|
+
while task["task_status"] not in ("success", "failure"):
|
124
|
+
# Check if processing exceeds the maximum poll timeout
|
125
|
+
processing_time = time.monotonic() - start_wait_time
|
126
|
+
if processing_time >= self.max_poll_timeout:
|
127
|
+
msg = (
|
128
|
+
f"Processing time {processing_time=} exceeds the maximum poll timeout {self.max_poll_timeout=}."
|
129
|
+
"Please increase the max_poll_timeout parameter or review why the processing "
|
130
|
+
"takes long on the server."
|
131
|
+
)
|
132
|
+
self.log(msg)
|
133
|
+
raise RuntimeError(msg)
|
134
|
+
|
135
|
+
# Call for a new status update
|
136
|
+
time.sleep(2)
|
137
|
+
response = client.get(f"{base_url}/status/poll/{task['task_id']}")
|
138
|
+
|
139
|
+
# Check if the status call gets into 5xx errors and retry
|
140
|
+
if retry_status_start <= response.status_code < retry_status_end:
|
141
|
+
http_failures += 1
|
142
|
+
if http_failures > self.MAX_500_RETRIES:
|
143
|
+
self.log(f"The status requests got a http response {response.status_code} too many times.")
|
144
|
+
return None
|
145
|
+
continue
|
146
|
+
|
147
|
+
# Update task status
|
148
|
+
task = response.json()
|
149
|
+
|
150
|
+
result_resp = client.get(f"{base_url}/result/{task['task_id']}")
|
151
|
+
result_resp.raise_for_status()
|
152
|
+
result = result_resp.json()
|
153
|
+
|
154
|
+
if "json_content" not in result["document"] or result["document"]["json_content"] is None:
|
155
|
+
self.log("No JSON DoclingDocument found in the result.")
|
156
|
+
return None
|
157
|
+
|
158
|
+
try:
|
159
|
+
doc = DoclingDocument.model_validate(result["document"]["json_content"])
|
160
|
+
return Data(data={"doc": doc, "file_path": str(file_path)})
|
161
|
+
except ValidationError as e:
|
162
|
+
self.log(f"Error validating the document. {e}")
|
163
|
+
return None
|
164
|
+
|
165
|
+
docling_options = {
|
166
|
+
"to_formats": ["json"],
|
167
|
+
"image_export_mode": "placeholder",
|
168
|
+
"return_as_file": False,
|
169
|
+
**(self.docling_serve_opts or {}),
|
170
|
+
}
|
171
|
+
|
172
|
+
processed_data: list[Data | None] = []
|
173
|
+
with (
|
174
|
+
httpx.Client(headers=self.api_headers) as client,
|
175
|
+
ThreadPoolExecutor(max_workers=self.max_concurrency) as executor,
|
176
|
+
):
|
177
|
+
futures: list[tuple[int, Future]] = []
|
178
|
+
for i, file in enumerate(file_list):
|
179
|
+
if file.path is None:
|
180
|
+
processed_data.append(None)
|
181
|
+
continue
|
182
|
+
|
183
|
+
futures.append((i, executor.submit(_convert_document, client, file.path, docling_options)))
|
184
|
+
|
185
|
+
for _index, future in futures:
|
186
|
+
try:
|
187
|
+
result_data = future.result()
|
188
|
+
processed_data.append(result_data)
|
189
|
+
except (httpx.HTTPStatusError, httpx.RequestError, KeyError, ValueError) as exc:
|
190
|
+
self.log(f"Docling remote processing failed: {exc}")
|
191
|
+
raise
|
192
|
+
|
193
|
+
return self.rollup_data(file_list, processed_data)
|