lfx-nightly 0.1.11.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (699) hide show
  1. lfx/__init__.py +0 -0
  2. lfx/__main__.py +25 -0
  3. lfx/base/__init__.py +0 -0
  4. lfx/base/agents/__init__.py +0 -0
  5. lfx/base/agents/agent.py +268 -0
  6. lfx/base/agents/callback.py +130 -0
  7. lfx/base/agents/context.py +109 -0
  8. lfx/base/agents/crewai/__init__.py +0 -0
  9. lfx/base/agents/crewai/crew.py +231 -0
  10. lfx/base/agents/crewai/tasks.py +12 -0
  11. lfx/base/agents/default_prompts.py +23 -0
  12. lfx/base/agents/errors.py +15 -0
  13. lfx/base/agents/events.py +346 -0
  14. lfx/base/agents/utils.py +205 -0
  15. lfx/base/astra_assistants/__init__.py +0 -0
  16. lfx/base/astra_assistants/util.py +171 -0
  17. lfx/base/chains/__init__.py +0 -0
  18. lfx/base/chains/model.py +19 -0
  19. lfx/base/composio/__init__.py +0 -0
  20. lfx/base/composio/composio_base.py +1291 -0
  21. lfx/base/compressors/__init__.py +0 -0
  22. lfx/base/compressors/model.py +60 -0
  23. lfx/base/constants.py +46 -0
  24. lfx/base/curl/__init__.py +0 -0
  25. lfx/base/curl/parse.py +188 -0
  26. lfx/base/data/__init__.py +5 -0
  27. lfx/base/data/base_file.py +685 -0
  28. lfx/base/data/docling_utils.py +245 -0
  29. lfx/base/data/utils.py +198 -0
  30. lfx/base/document_transformers/__init__.py +0 -0
  31. lfx/base/document_transformers/model.py +43 -0
  32. lfx/base/embeddings/__init__.py +0 -0
  33. lfx/base/embeddings/aiml_embeddings.py +62 -0
  34. lfx/base/embeddings/model.py +26 -0
  35. lfx/base/flow_processing/__init__.py +0 -0
  36. lfx/base/flow_processing/utils.py +86 -0
  37. lfx/base/huggingface/__init__.py +0 -0
  38. lfx/base/huggingface/model_bridge.py +133 -0
  39. lfx/base/io/__init__.py +0 -0
  40. lfx/base/io/chat.py +20 -0
  41. lfx/base/io/text.py +22 -0
  42. lfx/base/langchain_utilities/__init__.py +0 -0
  43. lfx/base/langchain_utilities/model.py +35 -0
  44. lfx/base/langchain_utilities/spider_constants.py +1 -0
  45. lfx/base/langwatch/__init__.py +0 -0
  46. lfx/base/langwatch/utils.py +18 -0
  47. lfx/base/mcp/__init__.py +0 -0
  48. lfx/base/mcp/constants.py +2 -0
  49. lfx/base/mcp/util.py +1398 -0
  50. lfx/base/memory/__init__.py +0 -0
  51. lfx/base/memory/memory.py +49 -0
  52. lfx/base/memory/model.py +38 -0
  53. lfx/base/models/__init__.py +3 -0
  54. lfx/base/models/aiml_constants.py +51 -0
  55. lfx/base/models/anthropic_constants.py +47 -0
  56. lfx/base/models/aws_constants.py +151 -0
  57. lfx/base/models/chat_result.py +76 -0
  58. lfx/base/models/google_generative_ai_constants.py +70 -0
  59. lfx/base/models/groq_constants.py +134 -0
  60. lfx/base/models/model.py +375 -0
  61. lfx/base/models/model_input_constants.py +307 -0
  62. lfx/base/models/model_metadata.py +41 -0
  63. lfx/base/models/model_utils.py +8 -0
  64. lfx/base/models/novita_constants.py +35 -0
  65. lfx/base/models/ollama_constants.py +49 -0
  66. lfx/base/models/openai_constants.py +122 -0
  67. lfx/base/models/sambanova_constants.py +18 -0
  68. lfx/base/processing/__init__.py +0 -0
  69. lfx/base/prompts/__init__.py +0 -0
  70. lfx/base/prompts/api_utils.py +224 -0
  71. lfx/base/prompts/utils.py +61 -0
  72. lfx/base/textsplitters/__init__.py +0 -0
  73. lfx/base/textsplitters/model.py +28 -0
  74. lfx/base/tools/__init__.py +0 -0
  75. lfx/base/tools/base.py +26 -0
  76. lfx/base/tools/component_tool.py +325 -0
  77. lfx/base/tools/constants.py +49 -0
  78. lfx/base/tools/flow_tool.py +132 -0
  79. lfx/base/tools/run_flow.py +224 -0
  80. lfx/base/vectorstores/__init__.py +0 -0
  81. lfx/base/vectorstores/model.py +193 -0
  82. lfx/base/vectorstores/utils.py +22 -0
  83. lfx/base/vectorstores/vector_store_connection_decorator.py +52 -0
  84. lfx/cli/__init__.py +5 -0
  85. lfx/cli/commands.py +319 -0
  86. lfx/cli/common.py +650 -0
  87. lfx/cli/run.py +441 -0
  88. lfx/cli/script_loader.py +247 -0
  89. lfx/cli/serve_app.py +546 -0
  90. lfx/cli/validation.py +69 -0
  91. lfx/components/FAISS/__init__.py +34 -0
  92. lfx/components/FAISS/faiss.py +111 -0
  93. lfx/components/Notion/__init__.py +19 -0
  94. lfx/components/Notion/add_content_to_page.py +269 -0
  95. lfx/components/Notion/create_page.py +94 -0
  96. lfx/components/Notion/list_database_properties.py +68 -0
  97. lfx/components/Notion/list_pages.py +122 -0
  98. lfx/components/Notion/list_users.py +77 -0
  99. lfx/components/Notion/page_content_viewer.py +93 -0
  100. lfx/components/Notion/search.py +111 -0
  101. lfx/components/Notion/update_page_property.py +114 -0
  102. lfx/components/__init__.py +411 -0
  103. lfx/components/_importing.py +42 -0
  104. lfx/components/agentql/__init__.py +3 -0
  105. lfx/components/agentql/agentql_api.py +151 -0
  106. lfx/components/agents/__init__.py +34 -0
  107. lfx/components/agents/agent.py +558 -0
  108. lfx/components/agents/mcp_component.py +501 -0
  109. lfx/components/aiml/__init__.py +37 -0
  110. lfx/components/aiml/aiml.py +112 -0
  111. lfx/components/aiml/aiml_embeddings.py +37 -0
  112. lfx/components/amazon/__init__.py +36 -0
  113. lfx/components/amazon/amazon_bedrock_embedding.py +109 -0
  114. lfx/components/amazon/amazon_bedrock_model.py +124 -0
  115. lfx/components/amazon/s3_bucket_uploader.py +211 -0
  116. lfx/components/anthropic/__init__.py +34 -0
  117. lfx/components/anthropic/anthropic.py +187 -0
  118. lfx/components/apify/__init__.py +5 -0
  119. lfx/components/apify/apify_actor.py +325 -0
  120. lfx/components/arxiv/__init__.py +3 -0
  121. lfx/components/arxiv/arxiv.py +163 -0
  122. lfx/components/assemblyai/__init__.py +46 -0
  123. lfx/components/assemblyai/assemblyai_get_subtitles.py +83 -0
  124. lfx/components/assemblyai/assemblyai_lemur.py +183 -0
  125. lfx/components/assemblyai/assemblyai_list_transcripts.py +95 -0
  126. lfx/components/assemblyai/assemblyai_poll_transcript.py +72 -0
  127. lfx/components/assemblyai/assemblyai_start_transcript.py +188 -0
  128. lfx/components/azure/__init__.py +37 -0
  129. lfx/components/azure/azure_openai.py +95 -0
  130. lfx/components/azure/azure_openai_embeddings.py +83 -0
  131. lfx/components/baidu/__init__.py +32 -0
  132. lfx/components/baidu/baidu_qianfan_chat.py +113 -0
  133. lfx/components/bing/__init__.py +3 -0
  134. lfx/components/bing/bing_search_api.py +61 -0
  135. lfx/components/cassandra/__init__.py +40 -0
  136. lfx/components/cassandra/cassandra.py +264 -0
  137. lfx/components/cassandra/cassandra_chat.py +92 -0
  138. lfx/components/cassandra/cassandra_graph.py +238 -0
  139. lfx/components/chains/__init__.py +3 -0
  140. lfx/components/chroma/__init__.py +34 -0
  141. lfx/components/chroma/chroma.py +167 -0
  142. lfx/components/cleanlab/__init__.py +40 -0
  143. lfx/components/cleanlab/cleanlab_evaluator.py +155 -0
  144. lfx/components/cleanlab/cleanlab_rag_evaluator.py +254 -0
  145. lfx/components/cleanlab/cleanlab_remediator.py +131 -0
  146. lfx/components/clickhouse/__init__.py +34 -0
  147. lfx/components/clickhouse/clickhouse.py +135 -0
  148. lfx/components/cloudflare/__init__.py +32 -0
  149. lfx/components/cloudflare/cloudflare.py +81 -0
  150. lfx/components/cohere/__init__.py +40 -0
  151. lfx/components/cohere/cohere_embeddings.py +81 -0
  152. lfx/components/cohere/cohere_models.py +46 -0
  153. lfx/components/cohere/cohere_rerank.py +51 -0
  154. lfx/components/composio/__init__.py +74 -0
  155. lfx/components/composio/composio_api.py +268 -0
  156. lfx/components/composio/dropbox_compnent.py +11 -0
  157. lfx/components/composio/github_composio.py +11 -0
  158. lfx/components/composio/gmail_composio.py +38 -0
  159. lfx/components/composio/googlecalendar_composio.py +11 -0
  160. lfx/components/composio/googlemeet_composio.py +11 -0
  161. lfx/components/composio/googletasks_composio.py +8 -0
  162. lfx/components/composio/linear_composio.py +11 -0
  163. lfx/components/composio/outlook_composio.py +11 -0
  164. lfx/components/composio/reddit_composio.py +11 -0
  165. lfx/components/composio/slack_composio.py +582 -0
  166. lfx/components/composio/slackbot_composio.py +11 -0
  167. lfx/components/composio/supabase_composio.py +11 -0
  168. lfx/components/composio/todoist_composio.py +11 -0
  169. lfx/components/composio/youtube_composio.py +11 -0
  170. lfx/components/confluence/__init__.py +3 -0
  171. lfx/components/confluence/confluence.py +84 -0
  172. lfx/components/couchbase/__init__.py +34 -0
  173. lfx/components/couchbase/couchbase.py +102 -0
  174. lfx/components/crewai/__init__.py +49 -0
  175. lfx/components/crewai/crewai.py +107 -0
  176. lfx/components/crewai/hierarchical_crew.py +46 -0
  177. lfx/components/crewai/hierarchical_task.py +44 -0
  178. lfx/components/crewai/sequential_crew.py +52 -0
  179. lfx/components/crewai/sequential_task.py +73 -0
  180. lfx/components/crewai/sequential_task_agent.py +143 -0
  181. lfx/components/custom_component/__init__.py +34 -0
  182. lfx/components/custom_component/custom_component.py +31 -0
  183. lfx/components/data/__init__.py +64 -0
  184. lfx/components/data/api_request.py +544 -0
  185. lfx/components/data/csv_to_data.py +95 -0
  186. lfx/components/data/directory.py +113 -0
  187. lfx/components/data/file.py +577 -0
  188. lfx/components/data/json_to_data.py +98 -0
  189. lfx/components/data/news_search.py +164 -0
  190. lfx/components/data/rss.py +69 -0
  191. lfx/components/data/sql_executor.py +101 -0
  192. lfx/components/data/url.py +311 -0
  193. lfx/components/data/web_search.py +112 -0
  194. lfx/components/data/webhook.py +56 -0
  195. lfx/components/datastax/__init__.py +70 -0
  196. lfx/components/datastax/astra_assistant_manager.py +306 -0
  197. lfx/components/datastax/astra_db.py +75 -0
  198. lfx/components/datastax/astra_vectorize.py +124 -0
  199. lfx/components/datastax/astradb.py +1285 -0
  200. lfx/components/datastax/astradb_cql.py +314 -0
  201. lfx/components/datastax/astradb_graph.py +330 -0
  202. lfx/components/datastax/astradb_tool.py +414 -0
  203. lfx/components/datastax/astradb_vectorstore.py +1285 -0
  204. lfx/components/datastax/cassandra.py +92 -0
  205. lfx/components/datastax/create_assistant.py +58 -0
  206. lfx/components/datastax/create_thread.py +32 -0
  207. lfx/components/datastax/dotenv.py +35 -0
  208. lfx/components/datastax/get_assistant.py +37 -0
  209. lfx/components/datastax/getenvvar.py +30 -0
  210. lfx/components/datastax/graph_rag.py +141 -0
  211. lfx/components/datastax/hcd.py +314 -0
  212. lfx/components/datastax/list_assistants.py +25 -0
  213. lfx/components/datastax/run.py +89 -0
  214. lfx/components/deactivated/__init__.py +15 -0
  215. lfx/components/deactivated/amazon_kendra.py +66 -0
  216. lfx/components/deactivated/chat_litellm_model.py +158 -0
  217. lfx/components/deactivated/code_block_extractor.py +26 -0
  218. lfx/components/deactivated/documents_to_data.py +22 -0
  219. lfx/components/deactivated/embed.py +16 -0
  220. lfx/components/deactivated/extract_key_from_data.py +46 -0
  221. lfx/components/deactivated/json_document_builder.py +57 -0
  222. lfx/components/deactivated/list_flows.py +20 -0
  223. lfx/components/deactivated/mcp_sse.py +61 -0
  224. lfx/components/deactivated/mcp_stdio.py +62 -0
  225. lfx/components/deactivated/merge_data.py +93 -0
  226. lfx/components/deactivated/message.py +37 -0
  227. lfx/components/deactivated/metal.py +54 -0
  228. lfx/components/deactivated/multi_query.py +59 -0
  229. lfx/components/deactivated/retriever.py +43 -0
  230. lfx/components/deactivated/selective_passthrough.py +77 -0
  231. lfx/components/deactivated/should_run_next.py +40 -0
  232. lfx/components/deactivated/split_text.py +63 -0
  233. lfx/components/deactivated/store_message.py +24 -0
  234. lfx/components/deactivated/sub_flow.py +124 -0
  235. lfx/components/deactivated/vectara_self_query.py +76 -0
  236. lfx/components/deactivated/vector_store.py +24 -0
  237. lfx/components/deepseek/__init__.py +34 -0
  238. lfx/components/deepseek/deepseek.py +136 -0
  239. lfx/components/docling/__init__.py +43 -0
  240. lfx/components/docling/chunk_docling_document.py +186 -0
  241. lfx/components/docling/docling_inline.py +231 -0
  242. lfx/components/docling/docling_remote.py +193 -0
  243. lfx/components/docling/export_docling_document.py +117 -0
  244. lfx/components/documentloaders/__init__.py +3 -0
  245. lfx/components/duckduckgo/__init__.py +3 -0
  246. lfx/components/duckduckgo/duck_duck_go_search_run.py +92 -0
  247. lfx/components/elastic/__init__.py +37 -0
  248. lfx/components/elastic/elasticsearch.py +267 -0
  249. lfx/components/elastic/opensearch.py +243 -0
  250. lfx/components/embeddings/__init__.py +37 -0
  251. lfx/components/embeddings/similarity.py +76 -0
  252. lfx/components/embeddings/text_embedder.py +64 -0
  253. lfx/components/exa/__init__.py +3 -0
  254. lfx/components/exa/exa_search.py +68 -0
  255. lfx/components/firecrawl/__init__.py +43 -0
  256. lfx/components/firecrawl/firecrawl_crawl_api.py +88 -0
  257. lfx/components/firecrawl/firecrawl_extract_api.py +136 -0
  258. lfx/components/firecrawl/firecrawl_map_api.py +89 -0
  259. lfx/components/firecrawl/firecrawl_scrape_api.py +73 -0
  260. lfx/components/git/__init__.py +4 -0
  261. lfx/components/git/git.py +262 -0
  262. lfx/components/git/gitextractor.py +196 -0
  263. lfx/components/glean/__init__.py +3 -0
  264. lfx/components/glean/glean_search_api.py +173 -0
  265. lfx/components/google/__init__.py +17 -0
  266. lfx/components/google/gmail.py +192 -0
  267. lfx/components/google/google_bq_sql_executor.py +157 -0
  268. lfx/components/google/google_drive.py +92 -0
  269. lfx/components/google/google_drive_search.py +152 -0
  270. lfx/components/google/google_generative_ai.py +147 -0
  271. lfx/components/google/google_generative_ai_embeddings.py +141 -0
  272. lfx/components/google/google_oauth_token.py +89 -0
  273. lfx/components/google/google_search_api_core.py +68 -0
  274. lfx/components/google/google_serper_api_core.py +74 -0
  275. lfx/components/groq/__init__.py +34 -0
  276. lfx/components/groq/groq.py +136 -0
  277. lfx/components/helpers/__init__.py +52 -0
  278. lfx/components/helpers/calculator_core.py +89 -0
  279. lfx/components/helpers/create_list.py +40 -0
  280. lfx/components/helpers/current_date.py +42 -0
  281. lfx/components/helpers/id_generator.py +42 -0
  282. lfx/components/helpers/memory.py +251 -0
  283. lfx/components/helpers/output_parser.py +45 -0
  284. lfx/components/helpers/store_message.py +90 -0
  285. lfx/components/homeassistant/__init__.py +7 -0
  286. lfx/components/homeassistant/home_assistant_control.py +152 -0
  287. lfx/components/homeassistant/list_home_assistant_states.py +137 -0
  288. lfx/components/huggingface/__init__.py +37 -0
  289. lfx/components/huggingface/huggingface.py +197 -0
  290. lfx/components/huggingface/huggingface_inference_api.py +106 -0
  291. lfx/components/ibm/__init__.py +34 -0
  292. lfx/components/ibm/watsonx.py +203 -0
  293. lfx/components/ibm/watsonx_embeddings.py +135 -0
  294. lfx/components/icosacomputing/__init__.py +5 -0
  295. lfx/components/icosacomputing/combinatorial_reasoner.py +84 -0
  296. lfx/components/input_output/__init__.py +38 -0
  297. lfx/components/input_output/chat.py +120 -0
  298. lfx/components/input_output/chat_output.py +200 -0
  299. lfx/components/input_output/text.py +27 -0
  300. lfx/components/input_output/text_output.py +29 -0
  301. lfx/components/jigsawstack/__init__.py +23 -0
  302. lfx/components/jigsawstack/ai_scrape.py +126 -0
  303. lfx/components/jigsawstack/ai_web_search.py +136 -0
  304. lfx/components/jigsawstack/file_read.py +115 -0
  305. lfx/components/jigsawstack/file_upload.py +94 -0
  306. lfx/components/jigsawstack/image_generation.py +205 -0
  307. lfx/components/jigsawstack/nsfw.py +60 -0
  308. lfx/components/jigsawstack/object_detection.py +124 -0
  309. lfx/components/jigsawstack/sentiment.py +112 -0
  310. lfx/components/jigsawstack/text_to_sql.py +90 -0
  311. lfx/components/jigsawstack/text_translate.py +77 -0
  312. lfx/components/jigsawstack/vocr.py +107 -0
  313. lfx/components/langchain_utilities/__init__.py +109 -0
  314. lfx/components/langchain_utilities/character.py +53 -0
  315. lfx/components/langchain_utilities/conversation.py +59 -0
  316. lfx/components/langchain_utilities/csv_agent.py +107 -0
  317. lfx/components/langchain_utilities/fake_embeddings.py +26 -0
  318. lfx/components/langchain_utilities/html_link_extractor.py +35 -0
  319. lfx/components/langchain_utilities/json_agent.py +45 -0
  320. lfx/components/langchain_utilities/langchain_hub.py +126 -0
  321. lfx/components/langchain_utilities/language_recursive.py +49 -0
  322. lfx/components/langchain_utilities/language_semantic.py +138 -0
  323. lfx/components/langchain_utilities/llm_checker.py +39 -0
  324. lfx/components/langchain_utilities/llm_math.py +42 -0
  325. lfx/components/langchain_utilities/natural_language.py +61 -0
  326. lfx/components/langchain_utilities/openai_tools.py +53 -0
  327. lfx/components/langchain_utilities/openapi.py +48 -0
  328. lfx/components/langchain_utilities/recursive_character.py +60 -0
  329. lfx/components/langchain_utilities/retrieval_qa.py +83 -0
  330. lfx/components/langchain_utilities/runnable_executor.py +137 -0
  331. lfx/components/langchain_utilities/self_query.py +80 -0
  332. lfx/components/langchain_utilities/spider.py +142 -0
  333. lfx/components/langchain_utilities/sql.py +40 -0
  334. lfx/components/langchain_utilities/sql_database.py +35 -0
  335. lfx/components/langchain_utilities/sql_generator.py +78 -0
  336. lfx/components/langchain_utilities/tool_calling.py +59 -0
  337. lfx/components/langchain_utilities/vector_store_info.py +49 -0
  338. lfx/components/langchain_utilities/vector_store_router.py +33 -0
  339. lfx/components/langchain_utilities/xml_agent.py +71 -0
  340. lfx/components/langwatch/__init__.py +3 -0
  341. lfx/components/langwatch/langwatch.py +278 -0
  342. lfx/components/link_extractors/__init__.py +3 -0
  343. lfx/components/lmstudio/__init__.py +34 -0
  344. lfx/components/lmstudio/lmstudioembeddings.py +89 -0
  345. lfx/components/lmstudio/lmstudiomodel.py +129 -0
  346. lfx/components/logic/__init__.py +52 -0
  347. lfx/components/logic/conditional_router.py +171 -0
  348. lfx/components/logic/data_conditional_router.py +125 -0
  349. lfx/components/logic/flow_tool.py +110 -0
  350. lfx/components/logic/listen.py +29 -0
  351. lfx/components/logic/loop.py +125 -0
  352. lfx/components/logic/notify.py +88 -0
  353. lfx/components/logic/pass_message.py +35 -0
  354. lfx/components/logic/run_flow.py +71 -0
  355. lfx/components/logic/sub_flow.py +114 -0
  356. lfx/components/maritalk/__init__.py +32 -0
  357. lfx/components/maritalk/maritalk.py +52 -0
  358. lfx/components/mem0/__init__.py +3 -0
  359. lfx/components/mem0/mem0_chat_memory.py +136 -0
  360. lfx/components/milvus/__init__.py +34 -0
  361. lfx/components/milvus/milvus.py +115 -0
  362. lfx/components/mistral/__init__.py +37 -0
  363. lfx/components/mistral/mistral.py +114 -0
  364. lfx/components/mistral/mistral_embeddings.py +58 -0
  365. lfx/components/models/__init__.py +34 -0
  366. lfx/components/models/embedding_model.py +114 -0
  367. lfx/components/models/language_model.py +144 -0
  368. lfx/components/mongodb/__init__.py +34 -0
  369. lfx/components/mongodb/mongodb_atlas.py +213 -0
  370. lfx/components/needle/__init__.py +3 -0
  371. lfx/components/needle/needle.py +104 -0
  372. lfx/components/notdiamond/__init__.py +34 -0
  373. lfx/components/notdiamond/notdiamond.py +228 -0
  374. lfx/components/novita/__init__.py +32 -0
  375. lfx/components/novita/novita.py +130 -0
  376. lfx/components/nvidia/__init__.py +57 -0
  377. lfx/components/nvidia/nvidia.py +157 -0
  378. lfx/components/nvidia/nvidia_embedding.py +77 -0
  379. lfx/components/nvidia/nvidia_ingest.py +317 -0
  380. lfx/components/nvidia/nvidia_rerank.py +63 -0
  381. lfx/components/nvidia/system_assist.py +65 -0
  382. lfx/components/olivya/__init__.py +3 -0
  383. lfx/components/olivya/olivya.py +116 -0
  384. lfx/components/ollama/__init__.py +37 -0
  385. lfx/components/ollama/ollama.py +330 -0
  386. lfx/components/ollama/ollama_embeddings.py +106 -0
  387. lfx/components/openai/__init__.py +37 -0
  388. lfx/components/openai/openai.py +100 -0
  389. lfx/components/openai/openai_chat_model.py +176 -0
  390. lfx/components/openrouter/__init__.py +32 -0
  391. lfx/components/openrouter/openrouter.py +202 -0
  392. lfx/components/output_parsers/__init__.py +3 -0
  393. lfx/components/perplexity/__init__.py +34 -0
  394. lfx/components/perplexity/perplexity.py +75 -0
  395. lfx/components/pgvector/__init__.py +34 -0
  396. lfx/components/pgvector/pgvector.py +72 -0
  397. lfx/components/pinecone/__init__.py +34 -0
  398. lfx/components/pinecone/pinecone.py +134 -0
  399. lfx/components/processing/__init__.py +117 -0
  400. lfx/components/processing/alter_metadata.py +108 -0
  401. lfx/components/processing/batch_run.py +205 -0
  402. lfx/components/processing/combine_text.py +39 -0
  403. lfx/components/processing/converter.py +159 -0
  404. lfx/components/processing/create_data.py +110 -0
  405. lfx/components/processing/data_operations.py +438 -0
  406. lfx/components/processing/data_to_dataframe.py +70 -0
  407. lfx/components/processing/dataframe_operations.py +313 -0
  408. lfx/components/processing/extract_key.py +53 -0
  409. lfx/components/processing/filter_data.py +42 -0
  410. lfx/components/processing/filter_data_values.py +88 -0
  411. lfx/components/processing/json_cleaner.py +103 -0
  412. lfx/components/processing/lambda_filter.py +154 -0
  413. lfx/components/processing/llm_router.py +499 -0
  414. lfx/components/processing/merge_data.py +90 -0
  415. lfx/components/processing/message_to_data.py +36 -0
  416. lfx/components/processing/parse_data.py +70 -0
  417. lfx/components/processing/parse_dataframe.py +68 -0
  418. lfx/components/processing/parse_json_data.py +90 -0
  419. lfx/components/processing/parser.py +143 -0
  420. lfx/components/processing/prompt.py +67 -0
  421. lfx/components/processing/python_repl_core.py +98 -0
  422. lfx/components/processing/regex.py +82 -0
  423. lfx/components/processing/save_file.py +225 -0
  424. lfx/components/processing/select_data.py +48 -0
  425. lfx/components/processing/split_text.py +141 -0
  426. lfx/components/processing/structured_output.py +202 -0
  427. lfx/components/processing/update_data.py +160 -0
  428. lfx/components/prototypes/__init__.py +34 -0
  429. lfx/components/prototypes/python_function.py +73 -0
  430. lfx/components/qdrant/__init__.py +34 -0
  431. lfx/components/qdrant/qdrant.py +109 -0
  432. lfx/components/redis/__init__.py +37 -0
  433. lfx/components/redis/redis.py +89 -0
  434. lfx/components/redis/redis_chat.py +43 -0
  435. lfx/components/sambanova/__init__.py +32 -0
  436. lfx/components/sambanova/sambanova.py +84 -0
  437. lfx/components/scrapegraph/__init__.py +40 -0
  438. lfx/components/scrapegraph/scrapegraph_markdownify_api.py +64 -0
  439. lfx/components/scrapegraph/scrapegraph_search_api.py +64 -0
  440. lfx/components/scrapegraph/scrapegraph_smart_scraper_api.py +71 -0
  441. lfx/components/searchapi/__init__.py +34 -0
  442. lfx/components/searchapi/search.py +79 -0
  443. lfx/components/serpapi/__init__.py +3 -0
  444. lfx/components/serpapi/serp.py +115 -0
  445. lfx/components/supabase/__init__.py +34 -0
  446. lfx/components/supabase/supabase.py +76 -0
  447. lfx/components/tavily/__init__.py +4 -0
  448. lfx/components/tavily/tavily_extract.py +117 -0
  449. lfx/components/tavily/tavily_search.py +212 -0
  450. lfx/components/textsplitters/__init__.py +3 -0
  451. lfx/components/toolkits/__init__.py +3 -0
  452. lfx/components/tools/__init__.py +72 -0
  453. lfx/components/tools/calculator.py +108 -0
  454. lfx/components/tools/google_search_api.py +45 -0
  455. lfx/components/tools/google_serper_api.py +115 -0
  456. lfx/components/tools/python_code_structured_tool.py +327 -0
  457. lfx/components/tools/python_repl.py +97 -0
  458. lfx/components/tools/search_api.py +87 -0
  459. lfx/components/tools/searxng.py +145 -0
  460. lfx/components/tools/serp_api.py +119 -0
  461. lfx/components/tools/tavily_search_tool.py +344 -0
  462. lfx/components/tools/wikidata_api.py +102 -0
  463. lfx/components/tools/wikipedia_api.py +49 -0
  464. lfx/components/tools/yahoo_finance.py +129 -0
  465. lfx/components/twelvelabs/__init__.py +52 -0
  466. lfx/components/twelvelabs/convert_astra_results.py +84 -0
  467. lfx/components/twelvelabs/pegasus_index.py +311 -0
  468. lfx/components/twelvelabs/split_video.py +291 -0
  469. lfx/components/twelvelabs/text_embeddings.py +57 -0
  470. lfx/components/twelvelabs/twelvelabs_pegasus.py +408 -0
  471. lfx/components/twelvelabs/video_embeddings.py +100 -0
  472. lfx/components/twelvelabs/video_file.py +179 -0
  473. lfx/components/unstructured/__init__.py +3 -0
  474. lfx/components/unstructured/unstructured.py +121 -0
  475. lfx/components/upstash/__init__.py +34 -0
  476. lfx/components/upstash/upstash.py +124 -0
  477. lfx/components/vectara/__init__.py +37 -0
  478. lfx/components/vectara/vectara.py +97 -0
  479. lfx/components/vectara/vectara_rag.py +164 -0
  480. lfx/components/vectorstores/__init__.py +40 -0
  481. lfx/components/vectorstores/astradb.py +1285 -0
  482. lfx/components/vectorstores/astradb_graph.py +319 -0
  483. lfx/components/vectorstores/cassandra.py +264 -0
  484. lfx/components/vectorstores/cassandra_graph.py +238 -0
  485. lfx/components/vectorstores/chroma.py +167 -0
  486. lfx/components/vectorstores/clickhouse.py +135 -0
  487. lfx/components/vectorstores/couchbase.py +102 -0
  488. lfx/components/vectorstores/elasticsearch.py +267 -0
  489. lfx/components/vectorstores/faiss.py +111 -0
  490. lfx/components/vectorstores/graph_rag.py +141 -0
  491. lfx/components/vectorstores/hcd.py +314 -0
  492. lfx/components/vectorstores/local_db.py +261 -0
  493. lfx/components/vectorstores/milvus.py +115 -0
  494. lfx/components/vectorstores/mongodb_atlas.py +213 -0
  495. lfx/components/vectorstores/opensearch.py +243 -0
  496. lfx/components/vectorstores/pgvector.py +72 -0
  497. lfx/components/vectorstores/pinecone.py +134 -0
  498. lfx/components/vectorstores/qdrant.py +109 -0
  499. lfx/components/vectorstores/supabase.py +76 -0
  500. lfx/components/vectorstores/upstash.py +124 -0
  501. lfx/components/vectorstores/vectara.py +97 -0
  502. lfx/components/vectorstores/vectara_rag.py +164 -0
  503. lfx/components/vectorstores/weaviate.py +89 -0
  504. lfx/components/vertexai/__init__.py +37 -0
  505. lfx/components/vertexai/vertexai.py +71 -0
  506. lfx/components/vertexai/vertexai_embeddings.py +67 -0
  507. lfx/components/weaviate/__init__.py +34 -0
  508. lfx/components/weaviate/weaviate.py +89 -0
  509. lfx/components/wikipedia/__init__.py +4 -0
  510. lfx/components/wikipedia/wikidata.py +86 -0
  511. lfx/components/wikipedia/wikipedia.py +53 -0
  512. lfx/components/wolframalpha/__init__.py +3 -0
  513. lfx/components/wolframalpha/wolfram_alpha_api.py +54 -0
  514. lfx/components/xai/__init__.py +32 -0
  515. lfx/components/xai/xai.py +167 -0
  516. lfx/components/yahoosearch/__init__.py +3 -0
  517. lfx/components/yahoosearch/yahoo.py +137 -0
  518. lfx/components/youtube/__init__.py +52 -0
  519. lfx/components/youtube/channel.py +227 -0
  520. lfx/components/youtube/comments.py +231 -0
  521. lfx/components/youtube/playlist.py +33 -0
  522. lfx/components/youtube/search.py +120 -0
  523. lfx/components/youtube/trending.py +285 -0
  524. lfx/components/youtube/video_details.py +263 -0
  525. lfx/components/youtube/youtube_transcripts.py +118 -0
  526. lfx/components/zep/__init__.py +3 -0
  527. lfx/components/zep/zep.py +44 -0
  528. lfx/constants.py +6 -0
  529. lfx/custom/__init__.py +7 -0
  530. lfx/custom/attributes.py +86 -0
  531. lfx/custom/code_parser/__init__.py +3 -0
  532. lfx/custom/code_parser/code_parser.py +361 -0
  533. lfx/custom/custom_component/__init__.py +0 -0
  534. lfx/custom/custom_component/base_component.py +128 -0
  535. lfx/custom/custom_component/component.py +1808 -0
  536. lfx/custom/custom_component/component_with_cache.py +8 -0
  537. lfx/custom/custom_component/custom_component.py +588 -0
  538. lfx/custom/dependency_analyzer.py +165 -0
  539. lfx/custom/directory_reader/__init__.py +3 -0
  540. lfx/custom/directory_reader/directory_reader.py +359 -0
  541. lfx/custom/directory_reader/utils.py +171 -0
  542. lfx/custom/eval.py +12 -0
  543. lfx/custom/schema.py +32 -0
  544. lfx/custom/tree_visitor.py +21 -0
  545. lfx/custom/utils.py +877 -0
  546. lfx/custom/validate.py +488 -0
  547. lfx/events/__init__.py +1 -0
  548. lfx/events/event_manager.py +110 -0
  549. lfx/exceptions/__init__.py +0 -0
  550. lfx/exceptions/component.py +15 -0
  551. lfx/field_typing/__init__.py +91 -0
  552. lfx/field_typing/constants.py +215 -0
  553. lfx/field_typing/range_spec.py +35 -0
  554. lfx/graph/__init__.py +6 -0
  555. lfx/graph/edge/__init__.py +0 -0
  556. lfx/graph/edge/base.py +277 -0
  557. lfx/graph/edge/schema.py +119 -0
  558. lfx/graph/edge/utils.py +0 -0
  559. lfx/graph/graph/__init__.py +0 -0
  560. lfx/graph/graph/ascii.py +202 -0
  561. lfx/graph/graph/base.py +2238 -0
  562. lfx/graph/graph/constants.py +63 -0
  563. lfx/graph/graph/runnable_vertices_manager.py +133 -0
  564. lfx/graph/graph/schema.py +52 -0
  565. lfx/graph/graph/state_model.py +66 -0
  566. lfx/graph/graph/utils.py +1024 -0
  567. lfx/graph/schema.py +75 -0
  568. lfx/graph/state/__init__.py +0 -0
  569. lfx/graph/state/model.py +237 -0
  570. lfx/graph/utils.py +200 -0
  571. lfx/graph/vertex/__init__.py +0 -0
  572. lfx/graph/vertex/base.py +823 -0
  573. lfx/graph/vertex/constants.py +0 -0
  574. lfx/graph/vertex/exceptions.py +4 -0
  575. lfx/graph/vertex/param_handler.py +264 -0
  576. lfx/graph/vertex/schema.py +26 -0
  577. lfx/graph/vertex/utils.py +19 -0
  578. lfx/graph/vertex/vertex_types.py +489 -0
  579. lfx/helpers/__init__.py +1 -0
  580. lfx/helpers/base_model.py +71 -0
  581. lfx/helpers/custom.py +13 -0
  582. lfx/helpers/data.py +167 -0
  583. lfx/helpers/flow.py +194 -0
  584. lfx/inputs/__init__.py +68 -0
  585. lfx/inputs/constants.py +2 -0
  586. lfx/inputs/input_mixin.py +328 -0
  587. lfx/inputs/inputs.py +714 -0
  588. lfx/inputs/validators.py +19 -0
  589. lfx/interface/__init__.py +6 -0
  590. lfx/interface/components.py +489 -0
  591. lfx/interface/importing/__init__.py +5 -0
  592. lfx/interface/importing/utils.py +39 -0
  593. lfx/interface/initialize/__init__.py +3 -0
  594. lfx/interface/initialize/loading.py +224 -0
  595. lfx/interface/listing.py +26 -0
  596. lfx/interface/run.py +16 -0
  597. lfx/interface/utils.py +111 -0
  598. lfx/io/__init__.py +63 -0
  599. lfx/io/schema.py +289 -0
  600. lfx/load/__init__.py +8 -0
  601. lfx/load/load.py +256 -0
  602. lfx/load/utils.py +99 -0
  603. lfx/log/__init__.py +5 -0
  604. lfx/log/logger.py +385 -0
  605. lfx/memory/__init__.py +90 -0
  606. lfx/memory/stubs.py +283 -0
  607. lfx/processing/__init__.py +1 -0
  608. lfx/processing/process.py +238 -0
  609. lfx/processing/utils.py +25 -0
  610. lfx/py.typed +0 -0
  611. lfx/schema/__init__.py +66 -0
  612. lfx/schema/artifact.py +83 -0
  613. lfx/schema/content_block.py +62 -0
  614. lfx/schema/content_types.py +91 -0
  615. lfx/schema/data.py +308 -0
  616. lfx/schema/dataframe.py +210 -0
  617. lfx/schema/dotdict.py +74 -0
  618. lfx/schema/encoders.py +13 -0
  619. lfx/schema/graph.py +47 -0
  620. lfx/schema/image.py +131 -0
  621. lfx/schema/json_schema.py +141 -0
  622. lfx/schema/log.py +61 -0
  623. lfx/schema/message.py +473 -0
  624. lfx/schema/openai_responses_schemas.py +74 -0
  625. lfx/schema/properties.py +41 -0
  626. lfx/schema/schema.py +171 -0
  627. lfx/schema/serialize.py +13 -0
  628. lfx/schema/table.py +140 -0
  629. lfx/schema/validators.py +114 -0
  630. lfx/serialization/__init__.py +5 -0
  631. lfx/serialization/constants.py +2 -0
  632. lfx/serialization/serialization.py +314 -0
  633. lfx/services/__init__.py +23 -0
  634. lfx/services/base.py +28 -0
  635. lfx/services/cache/__init__.py +6 -0
  636. lfx/services/cache/base.py +183 -0
  637. lfx/services/cache/service.py +166 -0
  638. lfx/services/cache/utils.py +169 -0
  639. lfx/services/chat/__init__.py +1 -0
  640. lfx/services/chat/config.py +2 -0
  641. lfx/services/chat/schema.py +10 -0
  642. lfx/services/deps.py +129 -0
  643. lfx/services/factory.py +19 -0
  644. lfx/services/initialize.py +19 -0
  645. lfx/services/interfaces.py +103 -0
  646. lfx/services/manager.py +172 -0
  647. lfx/services/schema.py +20 -0
  648. lfx/services/session.py +82 -0
  649. lfx/services/settings/__init__.py +3 -0
  650. lfx/services/settings/auth.py +130 -0
  651. lfx/services/settings/base.py +539 -0
  652. lfx/services/settings/constants.py +31 -0
  653. lfx/services/settings/factory.py +23 -0
  654. lfx/services/settings/feature_flags.py +12 -0
  655. lfx/services/settings/service.py +35 -0
  656. lfx/services/settings/utils.py +40 -0
  657. lfx/services/shared_component_cache/__init__.py +1 -0
  658. lfx/services/shared_component_cache/factory.py +30 -0
  659. lfx/services/shared_component_cache/service.py +9 -0
  660. lfx/services/storage/__init__.py +5 -0
  661. lfx/services/storage/local.py +155 -0
  662. lfx/services/storage/service.py +54 -0
  663. lfx/services/tracing/__init__.py +1 -0
  664. lfx/services/tracing/service.py +21 -0
  665. lfx/settings.py +6 -0
  666. lfx/template/__init__.py +6 -0
  667. lfx/template/field/__init__.py +0 -0
  668. lfx/template/field/base.py +257 -0
  669. lfx/template/field/prompt.py +15 -0
  670. lfx/template/frontend_node/__init__.py +6 -0
  671. lfx/template/frontend_node/base.py +212 -0
  672. lfx/template/frontend_node/constants.py +65 -0
  673. lfx/template/frontend_node/custom_components.py +79 -0
  674. lfx/template/template/__init__.py +0 -0
  675. lfx/template/template/base.py +100 -0
  676. lfx/template/utils.py +217 -0
  677. lfx/type_extraction/__init__.py +19 -0
  678. lfx/type_extraction/type_extraction.py +75 -0
  679. lfx/type_extraction.py +80 -0
  680. lfx/utils/__init__.py +1 -0
  681. lfx/utils/async_helpers.py +42 -0
  682. lfx/utils/component_utils.py +154 -0
  683. lfx/utils/concurrency.py +60 -0
  684. lfx/utils/connection_string_parser.py +11 -0
  685. lfx/utils/constants.py +205 -0
  686. lfx/utils/data_structure.py +212 -0
  687. lfx/utils/exceptions.py +22 -0
  688. lfx/utils/helpers.py +28 -0
  689. lfx/utils/image.py +73 -0
  690. lfx/utils/lazy_load.py +15 -0
  691. lfx/utils/request_utils.py +18 -0
  692. lfx/utils/schemas.py +139 -0
  693. lfx/utils/util.py +481 -0
  694. lfx/utils/util_strings.py +56 -0
  695. lfx/utils/version.py +24 -0
  696. lfx_nightly-0.1.11.dev0.dist-info/METADATA +293 -0
  697. lfx_nightly-0.1.11.dev0.dist-info/RECORD +699 -0
  698. lfx_nightly-0.1.11.dev0.dist-info/WHEEL +4 -0
  699. lfx_nightly-0.1.11.dev0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,113 @@
1
+ from lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data, retrieve_file_paths
2
+ from lfx.custom.custom_component.component import Component
3
+ from lfx.io import BoolInput, IntInput, MessageTextInput, MultiselectInput
4
+ from lfx.schema.data import Data
5
+ from lfx.schema.dataframe import DataFrame
6
+ from lfx.template.field.base import Output
7
+
8
+
9
+ class DirectoryComponent(Component):
10
+ display_name = "Directory"
11
+ description = "Recursively load files from a directory."
12
+ documentation: str = "https://docs.langflow.org/components-data#directory"
13
+ icon = "folder"
14
+ name = "Directory"
15
+
16
+ inputs = [
17
+ MessageTextInput(
18
+ name="path",
19
+ display_name="Path",
20
+ info="Path to the directory to load files from. Defaults to current directory ('.')",
21
+ value=".",
22
+ tool_mode=True,
23
+ ),
24
+ MultiselectInput(
25
+ name="types",
26
+ display_name="File Types",
27
+ info="File types to load. Select one or more types or leave empty to load all supported types.",
28
+ options=TEXT_FILE_TYPES,
29
+ value=[],
30
+ ),
31
+ IntInput(
32
+ name="depth",
33
+ display_name="Depth",
34
+ info="Depth to search for files.",
35
+ value=0,
36
+ ),
37
+ IntInput(
38
+ name="max_concurrency",
39
+ display_name="Max Concurrency",
40
+ advanced=True,
41
+ info="Maximum concurrency for loading files.",
42
+ value=2,
43
+ ),
44
+ BoolInput(
45
+ name="load_hidden",
46
+ display_name="Load Hidden",
47
+ advanced=True,
48
+ info="If true, hidden files will be loaded.",
49
+ ),
50
+ BoolInput(
51
+ name="recursive",
52
+ display_name="Recursive",
53
+ advanced=True,
54
+ info="If true, the search will be recursive.",
55
+ ),
56
+ BoolInput(
57
+ name="silent_errors",
58
+ display_name="Silent Errors",
59
+ advanced=True,
60
+ info="If true, errors will not raise an exception.",
61
+ ),
62
+ BoolInput(
63
+ name="use_multithreading",
64
+ display_name="Use Multithreading",
65
+ advanced=True,
66
+ info="If true, multithreading will be used.",
67
+ ),
68
+ ]
69
+
70
+ outputs = [
71
+ Output(display_name="Loaded Files", name="dataframe", method="as_dataframe"),
72
+ ]
73
+
74
+ def load_directory(self) -> list[Data]:
75
+ path = self.path
76
+ types = self.types
77
+ depth = self.depth
78
+ max_concurrency = self.max_concurrency
79
+ load_hidden = self.load_hidden
80
+ recursive = self.recursive
81
+ silent_errors = self.silent_errors
82
+ use_multithreading = self.use_multithreading
83
+
84
+ resolved_path = self.resolve_path(path)
85
+
86
+ # If no types are specified, use all supported types
87
+ if not types:
88
+ types = TEXT_FILE_TYPES
89
+
90
+ # Check if all specified types are valid
91
+ invalid_types = [t for t in types if t not in TEXT_FILE_TYPES]
92
+ if invalid_types:
93
+ msg = f"Invalid file types specified: {invalid_types}. Valid types are: {TEXT_FILE_TYPES}"
94
+ raise ValueError(msg)
95
+
96
+ valid_types = types
97
+
98
+ file_paths = retrieve_file_paths(
99
+ resolved_path, load_hidden=load_hidden, recursive=recursive, depth=depth, types=valid_types
100
+ )
101
+
102
+ loaded_data = []
103
+ if use_multithreading:
104
+ loaded_data = parallel_load_data(file_paths, silent_errors=silent_errors, max_concurrency=max_concurrency)
105
+ else:
106
+ loaded_data = [parse_text_file_to_data(file_path, silent_errors=silent_errors) for file_path in file_paths]
107
+
108
+ valid_data = [x for x in loaded_data if x is not None and isinstance(x, Data)]
109
+ self.status = valid_data
110
+ return valid_data
111
+
112
+ def as_dataframe(self) -> DataFrame:
113
+ return DataFrame(self.load_directory())
@@ -0,0 +1,577 @@
1
+ """Enhanced file component with clearer structure and Docling isolation.
2
+
3
+ Notes:
4
+ -----
5
+ - Functionality is preserved with minimal behavioral changes.
6
+ - ALL Docling parsing/export runs in a separate OS process to prevent memory
7
+ growth and native library state from impacting the main Langflow process.
8
+ - Standard text/structured parsing continues to use existing BaseFileComponent
9
+ utilities (and optional threading via `parallel_load_data`).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import subprocess
16
+ import sys
17
+ import textwrap
18
+ from copy import deepcopy
19
+ from typing import Any
20
+
21
+ from lfx.base.data.base_file import BaseFileComponent
22
+ from lfx.base.data.utils import TEXT_FILE_TYPES, parallel_load_data, parse_text_file_to_data
23
+ from lfx.inputs.inputs import DropdownInput, MessageTextInput, StrInput
24
+ from lfx.io import BoolInput, FileInput, IntInput, Output
25
+ from lfx.schema import DataFrame # noqa: TC001
26
+ from lfx.schema.data import Data
27
+ from lfx.schema.message import Message
28
+
29
+
30
+ class FileComponent(BaseFileComponent):
31
+ """File component with optional Docling processing (isolated in a subprocess)."""
32
+
33
+ display_name = "File"
34
+ description = "Loads content from files with optional advanced document processing and export using Docling."
35
+ documentation: str = "https://docs.langflow.org/components-data#file"
36
+ icon = "file-text"
37
+ name = "File"
38
+
39
+ # Docling-supported/compatible extensions; TEXT_FILE_TYPES are supported by the base loader.
40
+ VALID_EXTENSIONS = [
41
+ "adoc",
42
+ "asciidoc",
43
+ "asc",
44
+ "bmp",
45
+ "csv",
46
+ "dotx",
47
+ "dotm",
48
+ "docm",
49
+ "docx",
50
+ "htm",
51
+ "html",
52
+ "jpeg",
53
+ "json",
54
+ "md",
55
+ "pdf",
56
+ "png",
57
+ "potx",
58
+ "ppsx",
59
+ "pptm",
60
+ "potm",
61
+ "ppsm",
62
+ "pptx",
63
+ "tiff",
64
+ "txt",
65
+ "xls",
66
+ "xlsx",
67
+ "xhtml",
68
+ "xml",
69
+ "webp",
70
+ *TEXT_FILE_TYPES,
71
+ ]
72
+
73
+ # Fixed export settings used when markdown export is requested.
74
+ EXPORT_FORMAT = "Markdown"
75
+ IMAGE_MODE = "placeholder"
76
+
77
+ _base_inputs = deepcopy(BaseFileComponent.get_base_inputs())
78
+
79
+ for input_item in _base_inputs:
80
+ if isinstance(input_item, FileInput) and input_item.name == "path":
81
+ input_item.real_time_refresh = True
82
+ break
83
+
84
+ inputs = [
85
+ *_base_inputs,
86
+ BoolInput(
87
+ name="advanced_mode",
88
+ display_name="Advanced Parser",
89
+ value=False,
90
+ real_time_refresh=True,
91
+ info=(
92
+ "Enable advanced document processing and export with Docling for PDFs, images, and office documents. "
93
+ "Available only for single file processing."
94
+ ),
95
+ show=False,
96
+ ),
97
+ DropdownInput(
98
+ name="pipeline",
99
+ display_name="Pipeline",
100
+ info="Docling pipeline to use",
101
+ options=["standard", "vlm"],
102
+ value="standard",
103
+ advanced=True,
104
+ ),
105
+ DropdownInput(
106
+ name="ocr_engine",
107
+ display_name="OCR Engine",
108
+ info="OCR engine to use. Only available when pipeline is set to 'standard'.",
109
+ options=["", "easyocr"],
110
+ value="",
111
+ show=False,
112
+ advanced=True,
113
+ ),
114
+ StrInput(
115
+ name="md_image_placeholder",
116
+ display_name="Image placeholder",
117
+ info="Specify the image placeholder for markdown exports.",
118
+ value="<!-- image -->",
119
+ advanced=True,
120
+ show=False,
121
+ ),
122
+ StrInput(
123
+ name="md_page_break_placeholder",
124
+ display_name="Page break placeholder",
125
+ info="Add this placeholder between pages in the markdown output.",
126
+ value="",
127
+ advanced=True,
128
+ show=False,
129
+ ),
130
+ MessageTextInput(
131
+ name="doc_key",
132
+ display_name="Doc Key",
133
+ info="The key to use for the DoclingDocument column.",
134
+ value="doc",
135
+ advanced=True,
136
+ show=False,
137
+ ),
138
+ # Deprecated input retained for backward-compatibility.
139
+ BoolInput(
140
+ name="use_multithreading",
141
+ display_name="[Deprecated] Use Multithreading",
142
+ advanced=True,
143
+ value=True,
144
+ info="Set 'Processing Concurrency' greater than 1 to enable multithreading.",
145
+ ),
146
+ IntInput(
147
+ name="concurrency_multithreading",
148
+ display_name="Processing Concurrency",
149
+ advanced=True,
150
+ info="When multiple files are being processed, the number of files to process concurrently.",
151
+ value=1,
152
+ ),
153
+ BoolInput(
154
+ name="markdown",
155
+ display_name="Markdown Export",
156
+ info="Export processed documents to Markdown format. Only available when advanced mode is enabled.",
157
+ value=False,
158
+ show=False,
159
+ ),
160
+ ]
161
+
162
+ outputs = [
163
+ Output(display_name="Raw Content", name="message", method="load_files_message"),
164
+ ]
165
+
166
+ # ------------------------------ UI helpers --------------------------------------
167
+
168
+ def _path_value(self, template: dict) -> list[str]:
169
+ """Return the list of currently selected file paths from the template."""
170
+ return template.get("path", {}).get("file_path", [])
171
+
172
+ def update_build_config(
173
+ self,
174
+ build_config: dict[str, Any],
175
+ field_value: Any,
176
+ field_name: str | None = None,
177
+ ) -> dict[str, Any]:
178
+ """Show/hide Advanced Parser and related fields based on selection context."""
179
+ if field_name == "path":
180
+ paths = self._path_value(build_config)
181
+ file_path = paths[0] if paths else ""
182
+ file_count = len(field_value) if field_value else 0
183
+
184
+ # Advanced mode only for single (non-tabular) file
185
+ allow_advanced = file_count == 1 and not file_path.endswith((".csv", ".xlsx", ".parquet"))
186
+ build_config["advanced_mode"]["show"] = allow_advanced
187
+ if not allow_advanced:
188
+ build_config["advanced_mode"]["value"] = False
189
+ for f in ("pipeline", "ocr_engine", "doc_key", "md_image_placeholder", "md_page_break_placeholder"):
190
+ if f in build_config:
191
+ build_config[f]["show"] = False
192
+
193
+ elif field_name == "advanced_mode":
194
+ for f in ("pipeline", "ocr_engine", "doc_key", "md_image_placeholder", "md_page_break_placeholder"):
195
+ if f in build_config:
196
+ build_config[f]["show"] = bool(field_value)
197
+
198
+ return build_config
199
+
200
+ def update_outputs(self, frontend_node: dict[str, Any], field_name: str, field_value: Any) -> dict[str, Any]: # noqa: ARG002
201
+ """Dynamically show outputs based on file count/type and advanced mode."""
202
+ if field_name not in ["path", "advanced_mode"]:
203
+ return frontend_node
204
+
205
+ template = frontend_node.get("template", {})
206
+ paths = self._path_value(template)
207
+ if not paths:
208
+ return frontend_node
209
+
210
+ frontend_node["outputs"] = []
211
+ if len(paths) == 1:
212
+ file_path = paths[0] if field_name == "path" else frontend_node["template"]["path"]["file_path"][0]
213
+ if file_path.endswith((".csv", ".xlsx", ".parquet")):
214
+ frontend_node["outputs"].append(
215
+ Output(display_name="Structured Content", name="dataframe", method="load_files_structured"),
216
+ )
217
+ elif file_path.endswith(".json"):
218
+ frontend_node["outputs"].append(
219
+ Output(display_name="Structured Content", name="json", method="load_files_json"),
220
+ )
221
+
222
+ advanced_mode = frontend_node.get("template", {}).get("advanced_mode", {}).get("value", False)
223
+ if advanced_mode:
224
+ frontend_node["outputs"].append(
225
+ Output(display_name="Structured Output", name="advanced", method="load_files_advanced"),
226
+ )
227
+ frontend_node["outputs"].append(
228
+ Output(display_name="Markdown", name="markdown", method="load_files_markdown"),
229
+ )
230
+ frontend_node["outputs"].append(
231
+ Output(display_name="File Path", name="path", method="load_files_path"),
232
+ )
233
+ else:
234
+ frontend_node["outputs"].append(
235
+ Output(display_name="Raw Content", name="message", method="load_files_message"),
236
+ )
237
+ frontend_node["outputs"].append(
238
+ Output(display_name="File Path", name="path", method="load_files_path"),
239
+ )
240
+ else:
241
+ # Multiple files => DataFrame output; advanced parser disabled
242
+ frontend_node["outputs"].append(Output(display_name="Files", name="dataframe", method="load_files"))
243
+
244
+ return frontend_node
245
+
246
+ # ------------------------------ Core processing ----------------------------------
247
+
248
+ def _is_docling_compatible(self, file_path: str) -> bool:
249
+ """Lightweight extension gate for Docling-compatible types."""
250
+ docling_exts = (
251
+ ".adoc",
252
+ ".asciidoc",
253
+ ".asc",
254
+ ".bmp",
255
+ ".csv",
256
+ ".dotx",
257
+ ".dotm",
258
+ ".docm",
259
+ ".docx",
260
+ ".htm",
261
+ ".html",
262
+ ".jpeg",
263
+ ".json",
264
+ ".md",
265
+ ".pdf",
266
+ ".png",
267
+ ".potx",
268
+ ".ppsx",
269
+ ".pptm",
270
+ ".potm",
271
+ ".ppsm",
272
+ ".pptx",
273
+ ".tiff",
274
+ ".txt",
275
+ ".xls",
276
+ ".xlsx",
277
+ ".xhtml",
278
+ ".xml",
279
+ ".webp",
280
+ )
281
+ return file_path.lower().endswith(docling_exts)
282
+
283
+ def _process_docling_in_subprocess(self, file_path: str) -> Data | None:
284
+ """Run Docling in a separate OS process and map the result to a Data object.
285
+
286
+ We avoid multiprocessing pickling by launching `python -c "<script>"` and
287
+ passing JSON config via stdin. The child prints a JSON result to stdout.
288
+ """
289
+ if not file_path:
290
+ return None
291
+
292
+ args: dict[str, Any] = {
293
+ "file_path": file_path,
294
+ "markdown": bool(self.markdown),
295
+ "image_mode": str(self.IMAGE_MODE),
296
+ "md_image_placeholder": str(self.md_image_placeholder),
297
+ "md_page_break_placeholder": str(self.md_page_break_placeholder),
298
+ "pipeline": str(self.pipeline),
299
+ "ocr_engine": str(self.ocr_engine) if getattr(self, "ocr_engine", "") else None,
300
+ }
301
+
302
+ # The child is a tiny, self-contained script to keep memory/state isolated.
303
+ child_script = textwrap.dedent(
304
+ r"""
305
+ import json, sys
306
+
307
+ def try_imports():
308
+ # Strategy 1: latest layout
309
+ try:
310
+ from docling.datamodel.base_models import ConversionStatus, InputFormat # type: ignore
311
+ from docling.document_converter import DocumentConverter # type: ignore
312
+ from docling_core.types.doc import ImageRefMode # type: ignore
313
+ return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, "latest"
314
+ except Exception:
315
+ pass
316
+ # Strategy 2: alternative layout
317
+ try:
318
+ from docling.document_converter import DocumentConverter # type: ignore
319
+ try:
320
+ from docling_core.types import ConversionStatus, InputFormat # type: ignore
321
+ except Exception:
322
+ try:
323
+ from docling.datamodel import ConversionStatus, InputFormat # type: ignore
324
+ except Exception:
325
+ class ConversionStatus: SUCCESS = "success"
326
+ class InputFormat:
327
+ PDF="pdf"; IMAGE="image"
328
+ try:
329
+ from docling_core.types.doc import ImageRefMode # type: ignore
330
+ except Exception:
331
+ class ImageRefMode:
332
+ PLACEHOLDER="placeholder"; EMBEDDED="embedded"
333
+ return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, "alternative"
334
+ except Exception:
335
+ pass
336
+ # Strategy 3: basic converter only
337
+ try:
338
+ from docling.document_converter import DocumentConverter # type: ignore
339
+ class ConversionStatus: SUCCESS = "success"
340
+ class InputFormat:
341
+ PDF="pdf"; IMAGE="image"
342
+ class ImageRefMode:
343
+ PLACEHOLDER="placeholder"; EMBEDDED="embedded"
344
+ return ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, "basic"
345
+ except Exception as e:
346
+ raise ImportError(f"Docling imports failed: {e}") from e
347
+
348
+ def create_converter(strategy, input_format, DocumentConverter, pipeline, ocr_engine):
349
+ if strategy == "latest" and pipeline == "standard":
350
+ try:
351
+ from docling.datamodel.pipeline_options import PdfPipelineOptions # type: ignore
352
+ from docling.document_converter import PdfFormatOption # type: ignore
353
+ pipe = PdfPipelineOptions()
354
+ if ocr_engine:
355
+ try:
356
+ from docling.models.factories import get_ocr_factory # type: ignore
357
+ pipe.do_ocr = True
358
+ fac = get_ocr_factory(allow_external_plugins=False)
359
+ pipe.ocr_options = fac.create_options(kind=ocr_engine)
360
+ except Exception:
361
+ pipe.do_ocr = False
362
+ fmt = {}
363
+ if hasattr(input_format, "PDF"):
364
+ fmt[getattr(input_format, "PDF")] = PdfFormatOption(pipeline_options=pipe)
365
+ if hasattr(input_format, "IMAGE"):
366
+ fmt[getattr(input_format, "IMAGE")] = PdfFormatOption(pipeline_options=pipe)
367
+ return DocumentConverter(format_options=fmt)
368
+ except Exception:
369
+ return DocumentConverter()
370
+ return DocumentConverter()
371
+
372
+ def export_markdown(document, ImageRefMode, image_mode, img_ph, pg_ph):
373
+ try:
374
+ mode = getattr(ImageRefMode, image_mode.upper(), image_mode)
375
+ return document.export_to_markdown(
376
+ image_mode=mode,
377
+ image_placeholder=img_ph,
378
+ page_break_placeholder=pg_ph,
379
+ )
380
+ except Exception:
381
+ try:
382
+ return document.export_to_text()
383
+ except Exception:
384
+ return str(document)
385
+
386
+ def to_rows(doc_dict):
387
+ rows = []
388
+ for t in doc_dict.get("texts", []):
389
+ prov = t.get("prov") or []
390
+ page_no = None
391
+ if prov and isinstance(prov, list) and isinstance(prov[0], dict):
392
+ page_no = prov[0].get("page_no")
393
+ rows.append({
394
+ "page_no": page_no,
395
+ "label": t.get("label"),
396
+ "text": t.get("text"),
397
+ "level": t.get("level"),
398
+ })
399
+ return rows
400
+
401
+ def main():
402
+ cfg = json.loads(sys.stdin.read())
403
+ file_path = cfg["file_path"]
404
+ markdown = cfg["markdown"]
405
+ image_mode = cfg["image_mode"]
406
+ img_ph = cfg["md_image_placeholder"]
407
+ pg_ph = cfg["md_page_break_placeholder"]
408
+ pipeline = cfg["pipeline"]
409
+ ocr_engine = cfg.get("ocr_engine")
410
+ meta = {"file_path": file_path}
411
+
412
+ try:
413
+ ConversionStatus, InputFormat, DocumentConverter, ImageRefMode, strategy = try_imports()
414
+ converter = create_converter(strategy, InputFormat, DocumentConverter, pipeline, ocr_engine)
415
+ try:
416
+ res = converter.convert(file_path)
417
+ except Exception as e:
418
+ print(json.dumps({"ok": False, "error": f"Docling conversion error: {e}", "meta": meta}))
419
+ return
420
+
421
+ ok = False
422
+ if hasattr(res, "status"):
423
+ try:
424
+ ok = (res.status == ConversionStatus.SUCCESS) or (str(res.status).lower() == "success")
425
+ except Exception:
426
+ ok = (str(res.status).lower() == "success")
427
+ if not ok and hasattr(res, "document"):
428
+ ok = getattr(res, "document", None) is not None
429
+ if not ok:
430
+ print(json.dumps({"ok": False, "error": "Docling conversion failed", "meta": meta}))
431
+ return
432
+
433
+ doc = getattr(res, "document", None)
434
+ if doc is None:
435
+ print(json.dumps({"ok": False, "error": "Docling produced no document", "meta": meta}))
436
+ return
437
+
438
+ if markdown:
439
+ text = export_markdown(doc, ImageRefMode, image_mode, img_ph, pg_ph)
440
+ print(json.dumps({"ok": True, "mode": "markdown", "text": text, "meta": meta}))
441
+ return
442
+
443
+ # structured
444
+ try:
445
+ doc_dict = doc.export_to_dict()
446
+ except Exception as e:
447
+ print(json.dumps({"ok": False, "error": f"Docling export_to_dict failed: {e}", "meta": meta}))
448
+ return
449
+
450
+ rows = to_rows(doc_dict)
451
+ print(json.dumps({"ok": True, "mode": "structured", "doc": rows, "meta": meta}))
452
+ except Exception as e:
453
+ print(
454
+ json.dumps({
455
+ "ok": False,
456
+ "error": f"Docling processing error: {e}",
457
+ "meta": {"file_path": file_path},
458
+ })
459
+ )
460
+
461
+ if __name__ == "__main__":
462
+ main()
463
+ """
464
+ )
465
+
466
+ # Validate file_path to avoid command injection or unsafe input
467
+ if not isinstance(args["file_path"], str) or any(c in args["file_path"] for c in [";", "|", "&", "$", "`"]):
468
+ return Data(data={"error": "Unsafe file path detected.", "file_path": args["file_path"]})
469
+
470
+ proc = subprocess.run( # noqa: S603
471
+ [sys.executable, "-u", "-c", child_script],
472
+ input=json.dumps(args).encode("utf-8"),
473
+ capture_output=True,
474
+ check=False,
475
+ )
476
+
477
+ if not proc.stdout:
478
+ err_msg = proc.stderr.decode("utf-8", errors="replace") or "no output from child process"
479
+ return Data(data={"error": f"Docling subprocess error: {err_msg}", "file_path": file_path})
480
+
481
+ try:
482
+ result = json.loads(proc.stdout.decode("utf-8"))
483
+ except Exception as e: # noqa: BLE001
484
+ err_msg = proc.stderr.decode("utf-8", errors="replace")
485
+ return Data(
486
+ data={"error": f"Invalid JSON from Docling subprocess: {e}. stderr={err_msg}", "file_path": file_path},
487
+ )
488
+
489
+ if not result.get("ok"):
490
+ return Data(data={"error": result.get("error", "Unknown Docling error"), **result.get("meta", {})})
491
+
492
+ meta = result.get("meta", {})
493
+ if result.get("mode") == "markdown":
494
+ exported_content = str(result.get("text", ""))
495
+ return Data(
496
+ text=exported_content,
497
+ data={"exported_content": exported_content, "export_format": self.EXPORT_FORMAT, **meta},
498
+ )
499
+
500
+ rows = list(result.get("doc", []))
501
+ return Data(data={"doc": rows, "export_format": self.EXPORT_FORMAT, **meta})
502
+
503
+ def process_files(
504
+ self,
505
+ file_list: list[BaseFileComponent.BaseFile],
506
+ ) -> list[BaseFileComponent.BaseFile]:
507
+ """Process input files.
508
+
509
+ - Single file + advanced_mode => Docling in a separate process.
510
+ - Otherwise => standard parsing in current process (optionally threaded).
511
+ """
512
+ if not file_list:
513
+ msg = "No files to process."
514
+ raise ValueError(msg)
515
+
516
+ def process_file_standard(file_path: str, *, silent_errors: bool = False) -> Data | None:
517
+ try:
518
+ return parse_text_file_to_data(file_path, silent_errors=silent_errors)
519
+ except FileNotFoundError as e:
520
+ self.log(f"File not found: {file_path}. Error: {e}")
521
+ if not silent_errors:
522
+ raise
523
+ return None
524
+ except Exception as e:
525
+ self.log(f"Unexpected error processing {file_path}: {e}")
526
+ if not silent_errors:
527
+ raise
528
+ return None
529
+
530
+ # Advanced path: only for a single Docling-compatible file
531
+ if len(file_list) == 1:
532
+ file_path = str(file_list[0].path)
533
+ if self.advanced_mode and self._is_docling_compatible(file_path):
534
+ advanced_data: Data | None = self._process_docling_in_subprocess(file_path)
535
+
536
+ # --- UNNEST: expand each element in `doc` to its own Data row
537
+ payload = getattr(advanced_data, "data", {}) or {}
538
+ doc_rows = payload.get("doc")
539
+ if isinstance(doc_rows, list):
540
+ rows: list[Data | None] = [
541
+ Data(
542
+ data={
543
+ "file_path": file_path,
544
+ **(item if isinstance(item, dict) else {"value": item}),
545
+ },
546
+ )
547
+ for item in doc_rows
548
+ ]
549
+ return self.rollup_data(file_list, rows)
550
+
551
+ # If not structured, keep as-is (e.g., markdown export or error dict)
552
+ return self.rollup_data(file_list, [advanced_data])
553
+
554
+ # Standard multi-file (or single non-advanced) path
555
+ concurrency = 1 if not self.use_multithreading else max(1, self.concurrency_multithreading)
556
+ file_paths = [str(f.path) for f in file_list]
557
+ self.log(f"Starting parallel processing of {len(file_paths)} files with concurrency: {concurrency}.")
558
+ my_data = parallel_load_data(
559
+ file_paths,
560
+ silent_errors=self.silent_errors,
561
+ load_function=process_file_standard,
562
+ max_concurrency=concurrency,
563
+ )
564
+ return self.rollup_data(file_list, my_data)
565
+
566
+ # ------------------------------ Output helpers -----------------------------------
567
+
568
+ def load_files_advanced(self) -> DataFrame:
569
+ """Load files using advanced Docling processing and export to an advanced format."""
570
+ self.markdown = False
571
+ return self.load_files()
572
+
573
+ def load_files_markdown(self) -> Message:
574
+ """Load files using advanced Docling processing and export to Markdown format."""
575
+ self.markdown = True
576
+ result = self.load_files()
577
+ return Message(text=str(result.text[0]))