lfx-nightly 0.1.11.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (699) hide show
  1. lfx/__init__.py +0 -0
  2. lfx/__main__.py +25 -0
  3. lfx/base/__init__.py +0 -0
  4. lfx/base/agents/__init__.py +0 -0
  5. lfx/base/agents/agent.py +268 -0
  6. lfx/base/agents/callback.py +130 -0
  7. lfx/base/agents/context.py +109 -0
  8. lfx/base/agents/crewai/__init__.py +0 -0
  9. lfx/base/agents/crewai/crew.py +231 -0
  10. lfx/base/agents/crewai/tasks.py +12 -0
  11. lfx/base/agents/default_prompts.py +23 -0
  12. lfx/base/agents/errors.py +15 -0
  13. lfx/base/agents/events.py +346 -0
  14. lfx/base/agents/utils.py +205 -0
  15. lfx/base/astra_assistants/__init__.py +0 -0
  16. lfx/base/astra_assistants/util.py +171 -0
  17. lfx/base/chains/__init__.py +0 -0
  18. lfx/base/chains/model.py +19 -0
  19. lfx/base/composio/__init__.py +0 -0
  20. lfx/base/composio/composio_base.py +1291 -0
  21. lfx/base/compressors/__init__.py +0 -0
  22. lfx/base/compressors/model.py +60 -0
  23. lfx/base/constants.py +46 -0
  24. lfx/base/curl/__init__.py +0 -0
  25. lfx/base/curl/parse.py +188 -0
  26. lfx/base/data/__init__.py +5 -0
  27. lfx/base/data/base_file.py +685 -0
  28. lfx/base/data/docling_utils.py +245 -0
  29. lfx/base/data/utils.py +198 -0
  30. lfx/base/document_transformers/__init__.py +0 -0
  31. lfx/base/document_transformers/model.py +43 -0
  32. lfx/base/embeddings/__init__.py +0 -0
  33. lfx/base/embeddings/aiml_embeddings.py +62 -0
  34. lfx/base/embeddings/model.py +26 -0
  35. lfx/base/flow_processing/__init__.py +0 -0
  36. lfx/base/flow_processing/utils.py +86 -0
  37. lfx/base/huggingface/__init__.py +0 -0
  38. lfx/base/huggingface/model_bridge.py +133 -0
  39. lfx/base/io/__init__.py +0 -0
  40. lfx/base/io/chat.py +20 -0
  41. lfx/base/io/text.py +22 -0
  42. lfx/base/langchain_utilities/__init__.py +0 -0
  43. lfx/base/langchain_utilities/model.py +35 -0
  44. lfx/base/langchain_utilities/spider_constants.py +1 -0
  45. lfx/base/langwatch/__init__.py +0 -0
  46. lfx/base/langwatch/utils.py +18 -0
  47. lfx/base/mcp/__init__.py +0 -0
  48. lfx/base/mcp/constants.py +2 -0
  49. lfx/base/mcp/util.py +1398 -0
  50. lfx/base/memory/__init__.py +0 -0
  51. lfx/base/memory/memory.py +49 -0
  52. lfx/base/memory/model.py +38 -0
  53. lfx/base/models/__init__.py +3 -0
  54. lfx/base/models/aiml_constants.py +51 -0
  55. lfx/base/models/anthropic_constants.py +47 -0
  56. lfx/base/models/aws_constants.py +151 -0
  57. lfx/base/models/chat_result.py +76 -0
  58. lfx/base/models/google_generative_ai_constants.py +70 -0
  59. lfx/base/models/groq_constants.py +134 -0
  60. lfx/base/models/model.py +375 -0
  61. lfx/base/models/model_input_constants.py +307 -0
  62. lfx/base/models/model_metadata.py +41 -0
  63. lfx/base/models/model_utils.py +8 -0
  64. lfx/base/models/novita_constants.py +35 -0
  65. lfx/base/models/ollama_constants.py +49 -0
  66. lfx/base/models/openai_constants.py +122 -0
  67. lfx/base/models/sambanova_constants.py +18 -0
  68. lfx/base/processing/__init__.py +0 -0
  69. lfx/base/prompts/__init__.py +0 -0
  70. lfx/base/prompts/api_utils.py +224 -0
  71. lfx/base/prompts/utils.py +61 -0
  72. lfx/base/textsplitters/__init__.py +0 -0
  73. lfx/base/textsplitters/model.py +28 -0
  74. lfx/base/tools/__init__.py +0 -0
  75. lfx/base/tools/base.py +26 -0
  76. lfx/base/tools/component_tool.py +325 -0
  77. lfx/base/tools/constants.py +49 -0
  78. lfx/base/tools/flow_tool.py +132 -0
  79. lfx/base/tools/run_flow.py +224 -0
  80. lfx/base/vectorstores/__init__.py +0 -0
  81. lfx/base/vectorstores/model.py +193 -0
  82. lfx/base/vectorstores/utils.py +22 -0
  83. lfx/base/vectorstores/vector_store_connection_decorator.py +52 -0
  84. lfx/cli/__init__.py +5 -0
  85. lfx/cli/commands.py +319 -0
  86. lfx/cli/common.py +650 -0
  87. lfx/cli/run.py +441 -0
  88. lfx/cli/script_loader.py +247 -0
  89. lfx/cli/serve_app.py +546 -0
  90. lfx/cli/validation.py +69 -0
  91. lfx/components/FAISS/__init__.py +34 -0
  92. lfx/components/FAISS/faiss.py +111 -0
  93. lfx/components/Notion/__init__.py +19 -0
  94. lfx/components/Notion/add_content_to_page.py +269 -0
  95. lfx/components/Notion/create_page.py +94 -0
  96. lfx/components/Notion/list_database_properties.py +68 -0
  97. lfx/components/Notion/list_pages.py +122 -0
  98. lfx/components/Notion/list_users.py +77 -0
  99. lfx/components/Notion/page_content_viewer.py +93 -0
  100. lfx/components/Notion/search.py +111 -0
  101. lfx/components/Notion/update_page_property.py +114 -0
  102. lfx/components/__init__.py +411 -0
  103. lfx/components/_importing.py +42 -0
  104. lfx/components/agentql/__init__.py +3 -0
  105. lfx/components/agentql/agentql_api.py +151 -0
  106. lfx/components/agents/__init__.py +34 -0
  107. lfx/components/agents/agent.py +558 -0
  108. lfx/components/agents/mcp_component.py +501 -0
  109. lfx/components/aiml/__init__.py +37 -0
  110. lfx/components/aiml/aiml.py +112 -0
  111. lfx/components/aiml/aiml_embeddings.py +37 -0
  112. lfx/components/amazon/__init__.py +36 -0
  113. lfx/components/amazon/amazon_bedrock_embedding.py +109 -0
  114. lfx/components/amazon/amazon_bedrock_model.py +124 -0
  115. lfx/components/amazon/s3_bucket_uploader.py +211 -0
  116. lfx/components/anthropic/__init__.py +34 -0
  117. lfx/components/anthropic/anthropic.py +187 -0
  118. lfx/components/apify/__init__.py +5 -0
  119. lfx/components/apify/apify_actor.py +325 -0
  120. lfx/components/arxiv/__init__.py +3 -0
  121. lfx/components/arxiv/arxiv.py +163 -0
  122. lfx/components/assemblyai/__init__.py +46 -0
  123. lfx/components/assemblyai/assemblyai_get_subtitles.py +83 -0
  124. lfx/components/assemblyai/assemblyai_lemur.py +183 -0
  125. lfx/components/assemblyai/assemblyai_list_transcripts.py +95 -0
  126. lfx/components/assemblyai/assemblyai_poll_transcript.py +72 -0
  127. lfx/components/assemblyai/assemblyai_start_transcript.py +188 -0
  128. lfx/components/azure/__init__.py +37 -0
  129. lfx/components/azure/azure_openai.py +95 -0
  130. lfx/components/azure/azure_openai_embeddings.py +83 -0
  131. lfx/components/baidu/__init__.py +32 -0
  132. lfx/components/baidu/baidu_qianfan_chat.py +113 -0
  133. lfx/components/bing/__init__.py +3 -0
  134. lfx/components/bing/bing_search_api.py +61 -0
  135. lfx/components/cassandra/__init__.py +40 -0
  136. lfx/components/cassandra/cassandra.py +264 -0
  137. lfx/components/cassandra/cassandra_chat.py +92 -0
  138. lfx/components/cassandra/cassandra_graph.py +238 -0
  139. lfx/components/chains/__init__.py +3 -0
  140. lfx/components/chroma/__init__.py +34 -0
  141. lfx/components/chroma/chroma.py +167 -0
  142. lfx/components/cleanlab/__init__.py +40 -0
  143. lfx/components/cleanlab/cleanlab_evaluator.py +155 -0
  144. lfx/components/cleanlab/cleanlab_rag_evaluator.py +254 -0
  145. lfx/components/cleanlab/cleanlab_remediator.py +131 -0
  146. lfx/components/clickhouse/__init__.py +34 -0
  147. lfx/components/clickhouse/clickhouse.py +135 -0
  148. lfx/components/cloudflare/__init__.py +32 -0
  149. lfx/components/cloudflare/cloudflare.py +81 -0
  150. lfx/components/cohere/__init__.py +40 -0
  151. lfx/components/cohere/cohere_embeddings.py +81 -0
  152. lfx/components/cohere/cohere_models.py +46 -0
  153. lfx/components/cohere/cohere_rerank.py +51 -0
  154. lfx/components/composio/__init__.py +74 -0
  155. lfx/components/composio/composio_api.py +268 -0
  156. lfx/components/composio/dropbox_compnent.py +11 -0
  157. lfx/components/composio/github_composio.py +11 -0
  158. lfx/components/composio/gmail_composio.py +38 -0
  159. lfx/components/composio/googlecalendar_composio.py +11 -0
  160. lfx/components/composio/googlemeet_composio.py +11 -0
  161. lfx/components/composio/googletasks_composio.py +8 -0
  162. lfx/components/composio/linear_composio.py +11 -0
  163. lfx/components/composio/outlook_composio.py +11 -0
  164. lfx/components/composio/reddit_composio.py +11 -0
  165. lfx/components/composio/slack_composio.py +582 -0
  166. lfx/components/composio/slackbot_composio.py +11 -0
  167. lfx/components/composio/supabase_composio.py +11 -0
  168. lfx/components/composio/todoist_composio.py +11 -0
  169. lfx/components/composio/youtube_composio.py +11 -0
  170. lfx/components/confluence/__init__.py +3 -0
  171. lfx/components/confluence/confluence.py +84 -0
  172. lfx/components/couchbase/__init__.py +34 -0
  173. lfx/components/couchbase/couchbase.py +102 -0
  174. lfx/components/crewai/__init__.py +49 -0
  175. lfx/components/crewai/crewai.py +107 -0
  176. lfx/components/crewai/hierarchical_crew.py +46 -0
  177. lfx/components/crewai/hierarchical_task.py +44 -0
  178. lfx/components/crewai/sequential_crew.py +52 -0
  179. lfx/components/crewai/sequential_task.py +73 -0
  180. lfx/components/crewai/sequential_task_agent.py +143 -0
  181. lfx/components/custom_component/__init__.py +34 -0
  182. lfx/components/custom_component/custom_component.py +31 -0
  183. lfx/components/data/__init__.py +64 -0
  184. lfx/components/data/api_request.py +544 -0
  185. lfx/components/data/csv_to_data.py +95 -0
  186. lfx/components/data/directory.py +113 -0
  187. lfx/components/data/file.py +577 -0
  188. lfx/components/data/json_to_data.py +98 -0
  189. lfx/components/data/news_search.py +164 -0
  190. lfx/components/data/rss.py +69 -0
  191. lfx/components/data/sql_executor.py +101 -0
  192. lfx/components/data/url.py +311 -0
  193. lfx/components/data/web_search.py +112 -0
  194. lfx/components/data/webhook.py +56 -0
  195. lfx/components/datastax/__init__.py +70 -0
  196. lfx/components/datastax/astra_assistant_manager.py +306 -0
  197. lfx/components/datastax/astra_db.py +75 -0
  198. lfx/components/datastax/astra_vectorize.py +124 -0
  199. lfx/components/datastax/astradb.py +1285 -0
  200. lfx/components/datastax/astradb_cql.py +314 -0
  201. lfx/components/datastax/astradb_graph.py +330 -0
  202. lfx/components/datastax/astradb_tool.py +414 -0
  203. lfx/components/datastax/astradb_vectorstore.py +1285 -0
  204. lfx/components/datastax/cassandra.py +92 -0
  205. lfx/components/datastax/create_assistant.py +58 -0
  206. lfx/components/datastax/create_thread.py +32 -0
  207. lfx/components/datastax/dotenv.py +35 -0
  208. lfx/components/datastax/get_assistant.py +37 -0
  209. lfx/components/datastax/getenvvar.py +30 -0
  210. lfx/components/datastax/graph_rag.py +141 -0
  211. lfx/components/datastax/hcd.py +314 -0
  212. lfx/components/datastax/list_assistants.py +25 -0
  213. lfx/components/datastax/run.py +89 -0
  214. lfx/components/deactivated/__init__.py +15 -0
  215. lfx/components/deactivated/amazon_kendra.py +66 -0
  216. lfx/components/deactivated/chat_litellm_model.py +158 -0
  217. lfx/components/deactivated/code_block_extractor.py +26 -0
  218. lfx/components/deactivated/documents_to_data.py +22 -0
  219. lfx/components/deactivated/embed.py +16 -0
  220. lfx/components/deactivated/extract_key_from_data.py +46 -0
  221. lfx/components/deactivated/json_document_builder.py +57 -0
  222. lfx/components/deactivated/list_flows.py +20 -0
  223. lfx/components/deactivated/mcp_sse.py +61 -0
  224. lfx/components/deactivated/mcp_stdio.py +62 -0
  225. lfx/components/deactivated/merge_data.py +93 -0
  226. lfx/components/deactivated/message.py +37 -0
  227. lfx/components/deactivated/metal.py +54 -0
  228. lfx/components/deactivated/multi_query.py +59 -0
  229. lfx/components/deactivated/retriever.py +43 -0
  230. lfx/components/deactivated/selective_passthrough.py +77 -0
  231. lfx/components/deactivated/should_run_next.py +40 -0
  232. lfx/components/deactivated/split_text.py +63 -0
  233. lfx/components/deactivated/store_message.py +24 -0
  234. lfx/components/deactivated/sub_flow.py +124 -0
  235. lfx/components/deactivated/vectara_self_query.py +76 -0
  236. lfx/components/deactivated/vector_store.py +24 -0
  237. lfx/components/deepseek/__init__.py +34 -0
  238. lfx/components/deepseek/deepseek.py +136 -0
  239. lfx/components/docling/__init__.py +43 -0
  240. lfx/components/docling/chunk_docling_document.py +186 -0
  241. lfx/components/docling/docling_inline.py +231 -0
  242. lfx/components/docling/docling_remote.py +193 -0
  243. lfx/components/docling/export_docling_document.py +117 -0
  244. lfx/components/documentloaders/__init__.py +3 -0
  245. lfx/components/duckduckgo/__init__.py +3 -0
  246. lfx/components/duckduckgo/duck_duck_go_search_run.py +92 -0
  247. lfx/components/elastic/__init__.py +37 -0
  248. lfx/components/elastic/elasticsearch.py +267 -0
  249. lfx/components/elastic/opensearch.py +243 -0
  250. lfx/components/embeddings/__init__.py +37 -0
  251. lfx/components/embeddings/similarity.py +76 -0
  252. lfx/components/embeddings/text_embedder.py +64 -0
  253. lfx/components/exa/__init__.py +3 -0
  254. lfx/components/exa/exa_search.py +68 -0
  255. lfx/components/firecrawl/__init__.py +43 -0
  256. lfx/components/firecrawl/firecrawl_crawl_api.py +88 -0
  257. lfx/components/firecrawl/firecrawl_extract_api.py +136 -0
  258. lfx/components/firecrawl/firecrawl_map_api.py +89 -0
  259. lfx/components/firecrawl/firecrawl_scrape_api.py +73 -0
  260. lfx/components/git/__init__.py +4 -0
  261. lfx/components/git/git.py +262 -0
  262. lfx/components/git/gitextractor.py +196 -0
  263. lfx/components/glean/__init__.py +3 -0
  264. lfx/components/glean/glean_search_api.py +173 -0
  265. lfx/components/google/__init__.py +17 -0
  266. lfx/components/google/gmail.py +192 -0
  267. lfx/components/google/google_bq_sql_executor.py +157 -0
  268. lfx/components/google/google_drive.py +92 -0
  269. lfx/components/google/google_drive_search.py +152 -0
  270. lfx/components/google/google_generative_ai.py +147 -0
  271. lfx/components/google/google_generative_ai_embeddings.py +141 -0
  272. lfx/components/google/google_oauth_token.py +89 -0
  273. lfx/components/google/google_search_api_core.py +68 -0
  274. lfx/components/google/google_serper_api_core.py +74 -0
  275. lfx/components/groq/__init__.py +34 -0
  276. lfx/components/groq/groq.py +136 -0
  277. lfx/components/helpers/__init__.py +52 -0
  278. lfx/components/helpers/calculator_core.py +89 -0
  279. lfx/components/helpers/create_list.py +40 -0
  280. lfx/components/helpers/current_date.py +42 -0
  281. lfx/components/helpers/id_generator.py +42 -0
  282. lfx/components/helpers/memory.py +251 -0
  283. lfx/components/helpers/output_parser.py +45 -0
  284. lfx/components/helpers/store_message.py +90 -0
  285. lfx/components/homeassistant/__init__.py +7 -0
  286. lfx/components/homeassistant/home_assistant_control.py +152 -0
  287. lfx/components/homeassistant/list_home_assistant_states.py +137 -0
  288. lfx/components/huggingface/__init__.py +37 -0
  289. lfx/components/huggingface/huggingface.py +197 -0
  290. lfx/components/huggingface/huggingface_inference_api.py +106 -0
  291. lfx/components/ibm/__init__.py +34 -0
  292. lfx/components/ibm/watsonx.py +203 -0
  293. lfx/components/ibm/watsonx_embeddings.py +135 -0
  294. lfx/components/icosacomputing/__init__.py +5 -0
  295. lfx/components/icosacomputing/combinatorial_reasoner.py +84 -0
  296. lfx/components/input_output/__init__.py +38 -0
  297. lfx/components/input_output/chat.py +120 -0
  298. lfx/components/input_output/chat_output.py +200 -0
  299. lfx/components/input_output/text.py +27 -0
  300. lfx/components/input_output/text_output.py +29 -0
  301. lfx/components/jigsawstack/__init__.py +23 -0
  302. lfx/components/jigsawstack/ai_scrape.py +126 -0
  303. lfx/components/jigsawstack/ai_web_search.py +136 -0
  304. lfx/components/jigsawstack/file_read.py +115 -0
  305. lfx/components/jigsawstack/file_upload.py +94 -0
  306. lfx/components/jigsawstack/image_generation.py +205 -0
  307. lfx/components/jigsawstack/nsfw.py +60 -0
  308. lfx/components/jigsawstack/object_detection.py +124 -0
  309. lfx/components/jigsawstack/sentiment.py +112 -0
  310. lfx/components/jigsawstack/text_to_sql.py +90 -0
  311. lfx/components/jigsawstack/text_translate.py +77 -0
  312. lfx/components/jigsawstack/vocr.py +107 -0
  313. lfx/components/langchain_utilities/__init__.py +109 -0
  314. lfx/components/langchain_utilities/character.py +53 -0
  315. lfx/components/langchain_utilities/conversation.py +59 -0
  316. lfx/components/langchain_utilities/csv_agent.py +107 -0
  317. lfx/components/langchain_utilities/fake_embeddings.py +26 -0
  318. lfx/components/langchain_utilities/html_link_extractor.py +35 -0
  319. lfx/components/langchain_utilities/json_agent.py +45 -0
  320. lfx/components/langchain_utilities/langchain_hub.py +126 -0
  321. lfx/components/langchain_utilities/language_recursive.py +49 -0
  322. lfx/components/langchain_utilities/language_semantic.py +138 -0
  323. lfx/components/langchain_utilities/llm_checker.py +39 -0
  324. lfx/components/langchain_utilities/llm_math.py +42 -0
  325. lfx/components/langchain_utilities/natural_language.py +61 -0
  326. lfx/components/langchain_utilities/openai_tools.py +53 -0
  327. lfx/components/langchain_utilities/openapi.py +48 -0
  328. lfx/components/langchain_utilities/recursive_character.py +60 -0
  329. lfx/components/langchain_utilities/retrieval_qa.py +83 -0
  330. lfx/components/langchain_utilities/runnable_executor.py +137 -0
  331. lfx/components/langchain_utilities/self_query.py +80 -0
  332. lfx/components/langchain_utilities/spider.py +142 -0
  333. lfx/components/langchain_utilities/sql.py +40 -0
  334. lfx/components/langchain_utilities/sql_database.py +35 -0
  335. lfx/components/langchain_utilities/sql_generator.py +78 -0
  336. lfx/components/langchain_utilities/tool_calling.py +59 -0
  337. lfx/components/langchain_utilities/vector_store_info.py +49 -0
  338. lfx/components/langchain_utilities/vector_store_router.py +33 -0
  339. lfx/components/langchain_utilities/xml_agent.py +71 -0
  340. lfx/components/langwatch/__init__.py +3 -0
  341. lfx/components/langwatch/langwatch.py +278 -0
  342. lfx/components/link_extractors/__init__.py +3 -0
  343. lfx/components/lmstudio/__init__.py +34 -0
  344. lfx/components/lmstudio/lmstudioembeddings.py +89 -0
  345. lfx/components/lmstudio/lmstudiomodel.py +129 -0
  346. lfx/components/logic/__init__.py +52 -0
  347. lfx/components/logic/conditional_router.py +171 -0
  348. lfx/components/logic/data_conditional_router.py +125 -0
  349. lfx/components/logic/flow_tool.py +110 -0
  350. lfx/components/logic/listen.py +29 -0
  351. lfx/components/logic/loop.py +125 -0
  352. lfx/components/logic/notify.py +88 -0
  353. lfx/components/logic/pass_message.py +35 -0
  354. lfx/components/logic/run_flow.py +71 -0
  355. lfx/components/logic/sub_flow.py +114 -0
  356. lfx/components/maritalk/__init__.py +32 -0
  357. lfx/components/maritalk/maritalk.py +52 -0
  358. lfx/components/mem0/__init__.py +3 -0
  359. lfx/components/mem0/mem0_chat_memory.py +136 -0
  360. lfx/components/milvus/__init__.py +34 -0
  361. lfx/components/milvus/milvus.py +115 -0
  362. lfx/components/mistral/__init__.py +37 -0
  363. lfx/components/mistral/mistral.py +114 -0
  364. lfx/components/mistral/mistral_embeddings.py +58 -0
  365. lfx/components/models/__init__.py +34 -0
  366. lfx/components/models/embedding_model.py +114 -0
  367. lfx/components/models/language_model.py +144 -0
  368. lfx/components/mongodb/__init__.py +34 -0
  369. lfx/components/mongodb/mongodb_atlas.py +213 -0
  370. lfx/components/needle/__init__.py +3 -0
  371. lfx/components/needle/needle.py +104 -0
  372. lfx/components/notdiamond/__init__.py +34 -0
  373. lfx/components/notdiamond/notdiamond.py +228 -0
  374. lfx/components/novita/__init__.py +32 -0
  375. lfx/components/novita/novita.py +130 -0
  376. lfx/components/nvidia/__init__.py +57 -0
  377. lfx/components/nvidia/nvidia.py +157 -0
  378. lfx/components/nvidia/nvidia_embedding.py +77 -0
  379. lfx/components/nvidia/nvidia_ingest.py +317 -0
  380. lfx/components/nvidia/nvidia_rerank.py +63 -0
  381. lfx/components/nvidia/system_assist.py +65 -0
  382. lfx/components/olivya/__init__.py +3 -0
  383. lfx/components/olivya/olivya.py +116 -0
  384. lfx/components/ollama/__init__.py +37 -0
  385. lfx/components/ollama/ollama.py +330 -0
  386. lfx/components/ollama/ollama_embeddings.py +106 -0
  387. lfx/components/openai/__init__.py +37 -0
  388. lfx/components/openai/openai.py +100 -0
  389. lfx/components/openai/openai_chat_model.py +176 -0
  390. lfx/components/openrouter/__init__.py +32 -0
  391. lfx/components/openrouter/openrouter.py +202 -0
  392. lfx/components/output_parsers/__init__.py +3 -0
  393. lfx/components/perplexity/__init__.py +34 -0
  394. lfx/components/perplexity/perplexity.py +75 -0
  395. lfx/components/pgvector/__init__.py +34 -0
  396. lfx/components/pgvector/pgvector.py +72 -0
  397. lfx/components/pinecone/__init__.py +34 -0
  398. lfx/components/pinecone/pinecone.py +134 -0
  399. lfx/components/processing/__init__.py +117 -0
  400. lfx/components/processing/alter_metadata.py +108 -0
  401. lfx/components/processing/batch_run.py +205 -0
  402. lfx/components/processing/combine_text.py +39 -0
  403. lfx/components/processing/converter.py +159 -0
  404. lfx/components/processing/create_data.py +110 -0
  405. lfx/components/processing/data_operations.py +438 -0
  406. lfx/components/processing/data_to_dataframe.py +70 -0
  407. lfx/components/processing/dataframe_operations.py +313 -0
  408. lfx/components/processing/extract_key.py +53 -0
  409. lfx/components/processing/filter_data.py +42 -0
  410. lfx/components/processing/filter_data_values.py +88 -0
  411. lfx/components/processing/json_cleaner.py +103 -0
  412. lfx/components/processing/lambda_filter.py +154 -0
  413. lfx/components/processing/llm_router.py +499 -0
  414. lfx/components/processing/merge_data.py +90 -0
  415. lfx/components/processing/message_to_data.py +36 -0
  416. lfx/components/processing/parse_data.py +70 -0
  417. lfx/components/processing/parse_dataframe.py +68 -0
  418. lfx/components/processing/parse_json_data.py +90 -0
  419. lfx/components/processing/parser.py +143 -0
  420. lfx/components/processing/prompt.py +67 -0
  421. lfx/components/processing/python_repl_core.py +98 -0
  422. lfx/components/processing/regex.py +82 -0
  423. lfx/components/processing/save_file.py +225 -0
  424. lfx/components/processing/select_data.py +48 -0
  425. lfx/components/processing/split_text.py +141 -0
  426. lfx/components/processing/structured_output.py +202 -0
  427. lfx/components/processing/update_data.py +160 -0
  428. lfx/components/prototypes/__init__.py +34 -0
  429. lfx/components/prototypes/python_function.py +73 -0
  430. lfx/components/qdrant/__init__.py +34 -0
  431. lfx/components/qdrant/qdrant.py +109 -0
  432. lfx/components/redis/__init__.py +37 -0
  433. lfx/components/redis/redis.py +89 -0
  434. lfx/components/redis/redis_chat.py +43 -0
  435. lfx/components/sambanova/__init__.py +32 -0
  436. lfx/components/sambanova/sambanova.py +84 -0
  437. lfx/components/scrapegraph/__init__.py +40 -0
  438. lfx/components/scrapegraph/scrapegraph_markdownify_api.py +64 -0
  439. lfx/components/scrapegraph/scrapegraph_search_api.py +64 -0
  440. lfx/components/scrapegraph/scrapegraph_smart_scraper_api.py +71 -0
  441. lfx/components/searchapi/__init__.py +34 -0
  442. lfx/components/searchapi/search.py +79 -0
  443. lfx/components/serpapi/__init__.py +3 -0
  444. lfx/components/serpapi/serp.py +115 -0
  445. lfx/components/supabase/__init__.py +34 -0
  446. lfx/components/supabase/supabase.py +76 -0
  447. lfx/components/tavily/__init__.py +4 -0
  448. lfx/components/tavily/tavily_extract.py +117 -0
  449. lfx/components/tavily/tavily_search.py +212 -0
  450. lfx/components/textsplitters/__init__.py +3 -0
  451. lfx/components/toolkits/__init__.py +3 -0
  452. lfx/components/tools/__init__.py +72 -0
  453. lfx/components/tools/calculator.py +108 -0
  454. lfx/components/tools/google_search_api.py +45 -0
  455. lfx/components/tools/google_serper_api.py +115 -0
  456. lfx/components/tools/python_code_structured_tool.py +327 -0
  457. lfx/components/tools/python_repl.py +97 -0
  458. lfx/components/tools/search_api.py +87 -0
  459. lfx/components/tools/searxng.py +145 -0
  460. lfx/components/tools/serp_api.py +119 -0
  461. lfx/components/tools/tavily_search_tool.py +344 -0
  462. lfx/components/tools/wikidata_api.py +102 -0
  463. lfx/components/tools/wikipedia_api.py +49 -0
  464. lfx/components/tools/yahoo_finance.py +129 -0
  465. lfx/components/twelvelabs/__init__.py +52 -0
  466. lfx/components/twelvelabs/convert_astra_results.py +84 -0
  467. lfx/components/twelvelabs/pegasus_index.py +311 -0
  468. lfx/components/twelvelabs/split_video.py +291 -0
  469. lfx/components/twelvelabs/text_embeddings.py +57 -0
  470. lfx/components/twelvelabs/twelvelabs_pegasus.py +408 -0
  471. lfx/components/twelvelabs/video_embeddings.py +100 -0
  472. lfx/components/twelvelabs/video_file.py +179 -0
  473. lfx/components/unstructured/__init__.py +3 -0
  474. lfx/components/unstructured/unstructured.py +121 -0
  475. lfx/components/upstash/__init__.py +34 -0
  476. lfx/components/upstash/upstash.py +124 -0
  477. lfx/components/vectara/__init__.py +37 -0
  478. lfx/components/vectara/vectara.py +97 -0
  479. lfx/components/vectara/vectara_rag.py +164 -0
  480. lfx/components/vectorstores/__init__.py +40 -0
  481. lfx/components/vectorstores/astradb.py +1285 -0
  482. lfx/components/vectorstores/astradb_graph.py +319 -0
  483. lfx/components/vectorstores/cassandra.py +264 -0
  484. lfx/components/vectorstores/cassandra_graph.py +238 -0
  485. lfx/components/vectorstores/chroma.py +167 -0
  486. lfx/components/vectorstores/clickhouse.py +135 -0
  487. lfx/components/vectorstores/couchbase.py +102 -0
  488. lfx/components/vectorstores/elasticsearch.py +267 -0
  489. lfx/components/vectorstores/faiss.py +111 -0
  490. lfx/components/vectorstores/graph_rag.py +141 -0
  491. lfx/components/vectorstores/hcd.py +314 -0
  492. lfx/components/vectorstores/local_db.py +261 -0
  493. lfx/components/vectorstores/milvus.py +115 -0
  494. lfx/components/vectorstores/mongodb_atlas.py +213 -0
  495. lfx/components/vectorstores/opensearch.py +243 -0
  496. lfx/components/vectorstores/pgvector.py +72 -0
  497. lfx/components/vectorstores/pinecone.py +134 -0
  498. lfx/components/vectorstores/qdrant.py +109 -0
  499. lfx/components/vectorstores/supabase.py +76 -0
  500. lfx/components/vectorstores/upstash.py +124 -0
  501. lfx/components/vectorstores/vectara.py +97 -0
  502. lfx/components/vectorstores/vectara_rag.py +164 -0
  503. lfx/components/vectorstores/weaviate.py +89 -0
  504. lfx/components/vertexai/__init__.py +37 -0
  505. lfx/components/vertexai/vertexai.py +71 -0
  506. lfx/components/vertexai/vertexai_embeddings.py +67 -0
  507. lfx/components/weaviate/__init__.py +34 -0
  508. lfx/components/weaviate/weaviate.py +89 -0
  509. lfx/components/wikipedia/__init__.py +4 -0
  510. lfx/components/wikipedia/wikidata.py +86 -0
  511. lfx/components/wikipedia/wikipedia.py +53 -0
  512. lfx/components/wolframalpha/__init__.py +3 -0
  513. lfx/components/wolframalpha/wolfram_alpha_api.py +54 -0
  514. lfx/components/xai/__init__.py +32 -0
  515. lfx/components/xai/xai.py +167 -0
  516. lfx/components/yahoosearch/__init__.py +3 -0
  517. lfx/components/yahoosearch/yahoo.py +137 -0
  518. lfx/components/youtube/__init__.py +52 -0
  519. lfx/components/youtube/channel.py +227 -0
  520. lfx/components/youtube/comments.py +231 -0
  521. lfx/components/youtube/playlist.py +33 -0
  522. lfx/components/youtube/search.py +120 -0
  523. lfx/components/youtube/trending.py +285 -0
  524. lfx/components/youtube/video_details.py +263 -0
  525. lfx/components/youtube/youtube_transcripts.py +118 -0
  526. lfx/components/zep/__init__.py +3 -0
  527. lfx/components/zep/zep.py +44 -0
  528. lfx/constants.py +6 -0
  529. lfx/custom/__init__.py +7 -0
  530. lfx/custom/attributes.py +86 -0
  531. lfx/custom/code_parser/__init__.py +3 -0
  532. lfx/custom/code_parser/code_parser.py +361 -0
  533. lfx/custom/custom_component/__init__.py +0 -0
  534. lfx/custom/custom_component/base_component.py +128 -0
  535. lfx/custom/custom_component/component.py +1808 -0
  536. lfx/custom/custom_component/component_with_cache.py +8 -0
  537. lfx/custom/custom_component/custom_component.py +588 -0
  538. lfx/custom/dependency_analyzer.py +165 -0
  539. lfx/custom/directory_reader/__init__.py +3 -0
  540. lfx/custom/directory_reader/directory_reader.py +359 -0
  541. lfx/custom/directory_reader/utils.py +171 -0
  542. lfx/custom/eval.py +12 -0
  543. lfx/custom/schema.py +32 -0
  544. lfx/custom/tree_visitor.py +21 -0
  545. lfx/custom/utils.py +877 -0
  546. lfx/custom/validate.py +488 -0
  547. lfx/events/__init__.py +1 -0
  548. lfx/events/event_manager.py +110 -0
  549. lfx/exceptions/__init__.py +0 -0
  550. lfx/exceptions/component.py +15 -0
  551. lfx/field_typing/__init__.py +91 -0
  552. lfx/field_typing/constants.py +215 -0
  553. lfx/field_typing/range_spec.py +35 -0
  554. lfx/graph/__init__.py +6 -0
  555. lfx/graph/edge/__init__.py +0 -0
  556. lfx/graph/edge/base.py +277 -0
  557. lfx/graph/edge/schema.py +119 -0
  558. lfx/graph/edge/utils.py +0 -0
  559. lfx/graph/graph/__init__.py +0 -0
  560. lfx/graph/graph/ascii.py +202 -0
  561. lfx/graph/graph/base.py +2238 -0
  562. lfx/graph/graph/constants.py +63 -0
  563. lfx/graph/graph/runnable_vertices_manager.py +133 -0
  564. lfx/graph/graph/schema.py +52 -0
  565. lfx/graph/graph/state_model.py +66 -0
  566. lfx/graph/graph/utils.py +1024 -0
  567. lfx/graph/schema.py +75 -0
  568. lfx/graph/state/__init__.py +0 -0
  569. lfx/graph/state/model.py +237 -0
  570. lfx/graph/utils.py +200 -0
  571. lfx/graph/vertex/__init__.py +0 -0
  572. lfx/graph/vertex/base.py +823 -0
  573. lfx/graph/vertex/constants.py +0 -0
  574. lfx/graph/vertex/exceptions.py +4 -0
  575. lfx/graph/vertex/param_handler.py +264 -0
  576. lfx/graph/vertex/schema.py +26 -0
  577. lfx/graph/vertex/utils.py +19 -0
  578. lfx/graph/vertex/vertex_types.py +489 -0
  579. lfx/helpers/__init__.py +1 -0
  580. lfx/helpers/base_model.py +71 -0
  581. lfx/helpers/custom.py +13 -0
  582. lfx/helpers/data.py +167 -0
  583. lfx/helpers/flow.py +194 -0
  584. lfx/inputs/__init__.py +68 -0
  585. lfx/inputs/constants.py +2 -0
  586. lfx/inputs/input_mixin.py +328 -0
  587. lfx/inputs/inputs.py +714 -0
  588. lfx/inputs/validators.py +19 -0
  589. lfx/interface/__init__.py +6 -0
  590. lfx/interface/components.py +489 -0
  591. lfx/interface/importing/__init__.py +5 -0
  592. lfx/interface/importing/utils.py +39 -0
  593. lfx/interface/initialize/__init__.py +3 -0
  594. lfx/interface/initialize/loading.py +224 -0
  595. lfx/interface/listing.py +26 -0
  596. lfx/interface/run.py +16 -0
  597. lfx/interface/utils.py +111 -0
  598. lfx/io/__init__.py +63 -0
  599. lfx/io/schema.py +289 -0
  600. lfx/load/__init__.py +8 -0
  601. lfx/load/load.py +256 -0
  602. lfx/load/utils.py +99 -0
  603. lfx/log/__init__.py +5 -0
  604. lfx/log/logger.py +385 -0
  605. lfx/memory/__init__.py +90 -0
  606. lfx/memory/stubs.py +283 -0
  607. lfx/processing/__init__.py +1 -0
  608. lfx/processing/process.py +238 -0
  609. lfx/processing/utils.py +25 -0
  610. lfx/py.typed +0 -0
  611. lfx/schema/__init__.py +66 -0
  612. lfx/schema/artifact.py +83 -0
  613. lfx/schema/content_block.py +62 -0
  614. lfx/schema/content_types.py +91 -0
  615. lfx/schema/data.py +308 -0
  616. lfx/schema/dataframe.py +210 -0
  617. lfx/schema/dotdict.py +74 -0
  618. lfx/schema/encoders.py +13 -0
  619. lfx/schema/graph.py +47 -0
  620. lfx/schema/image.py +131 -0
  621. lfx/schema/json_schema.py +141 -0
  622. lfx/schema/log.py +61 -0
  623. lfx/schema/message.py +473 -0
  624. lfx/schema/openai_responses_schemas.py +74 -0
  625. lfx/schema/properties.py +41 -0
  626. lfx/schema/schema.py +171 -0
  627. lfx/schema/serialize.py +13 -0
  628. lfx/schema/table.py +140 -0
  629. lfx/schema/validators.py +114 -0
  630. lfx/serialization/__init__.py +5 -0
  631. lfx/serialization/constants.py +2 -0
  632. lfx/serialization/serialization.py +314 -0
  633. lfx/services/__init__.py +23 -0
  634. lfx/services/base.py +28 -0
  635. lfx/services/cache/__init__.py +6 -0
  636. lfx/services/cache/base.py +183 -0
  637. lfx/services/cache/service.py +166 -0
  638. lfx/services/cache/utils.py +169 -0
  639. lfx/services/chat/__init__.py +1 -0
  640. lfx/services/chat/config.py +2 -0
  641. lfx/services/chat/schema.py +10 -0
  642. lfx/services/deps.py +129 -0
  643. lfx/services/factory.py +19 -0
  644. lfx/services/initialize.py +19 -0
  645. lfx/services/interfaces.py +103 -0
  646. lfx/services/manager.py +172 -0
  647. lfx/services/schema.py +20 -0
  648. lfx/services/session.py +82 -0
  649. lfx/services/settings/__init__.py +3 -0
  650. lfx/services/settings/auth.py +130 -0
  651. lfx/services/settings/base.py +539 -0
  652. lfx/services/settings/constants.py +31 -0
  653. lfx/services/settings/factory.py +23 -0
  654. lfx/services/settings/feature_flags.py +12 -0
  655. lfx/services/settings/service.py +35 -0
  656. lfx/services/settings/utils.py +40 -0
  657. lfx/services/shared_component_cache/__init__.py +1 -0
  658. lfx/services/shared_component_cache/factory.py +30 -0
  659. lfx/services/shared_component_cache/service.py +9 -0
  660. lfx/services/storage/__init__.py +5 -0
  661. lfx/services/storage/local.py +155 -0
  662. lfx/services/storage/service.py +54 -0
  663. lfx/services/tracing/__init__.py +1 -0
  664. lfx/services/tracing/service.py +21 -0
  665. lfx/settings.py +6 -0
  666. lfx/template/__init__.py +6 -0
  667. lfx/template/field/__init__.py +0 -0
  668. lfx/template/field/base.py +257 -0
  669. lfx/template/field/prompt.py +15 -0
  670. lfx/template/frontend_node/__init__.py +6 -0
  671. lfx/template/frontend_node/base.py +212 -0
  672. lfx/template/frontend_node/constants.py +65 -0
  673. lfx/template/frontend_node/custom_components.py +79 -0
  674. lfx/template/template/__init__.py +0 -0
  675. lfx/template/template/base.py +100 -0
  676. lfx/template/utils.py +217 -0
  677. lfx/type_extraction/__init__.py +19 -0
  678. lfx/type_extraction/type_extraction.py +75 -0
  679. lfx/type_extraction.py +80 -0
  680. lfx/utils/__init__.py +1 -0
  681. lfx/utils/async_helpers.py +42 -0
  682. lfx/utils/component_utils.py +154 -0
  683. lfx/utils/concurrency.py +60 -0
  684. lfx/utils/connection_string_parser.py +11 -0
  685. lfx/utils/constants.py +205 -0
  686. lfx/utils/data_structure.py +212 -0
  687. lfx/utils/exceptions.py +22 -0
  688. lfx/utils/helpers.py +28 -0
  689. lfx/utils/image.py +73 -0
  690. lfx/utils/lazy_load.py +15 -0
  691. lfx/utils/request_utils.py +18 -0
  692. lfx/utils/schemas.py +139 -0
  693. lfx/utils/util.py +481 -0
  694. lfx/utils/util_strings.py +56 -0
  695. lfx/utils/version.py +24 -0
  696. lfx_nightly-0.1.11.dev0.dist-info/METADATA +293 -0
  697. lfx_nightly-0.1.11.dev0.dist-info/RECORD +699 -0
  698. lfx_nightly-0.1.11.dev0.dist-info/WHEEL +4 -0
  699. lfx_nightly-0.1.11.dev0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,186 @@
1
+ import json
2
+
3
+ import tiktoken
4
+ from docling_core.transforms.chunker import BaseChunker, DocMeta
5
+ from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker
6
+
7
+ from lfx.base.data.docling_utils import extract_docling_documents
8
+ from lfx.custom import Component
9
+ from lfx.io import DropdownInput, HandleInput, IntInput, MessageTextInput, Output, StrInput
10
+ from lfx.schema import Data, DataFrame
11
+
12
+
13
+ class ChunkDoclingDocumentComponent(Component):
14
+ display_name: str = "Chunk DoclingDocument"
15
+ description: str = "Use the DocumentDocument chunkers to split the document into chunks."
16
+ documentation = "https://docling-project.github.io/docling/concepts/chunking/"
17
+ icon = "Docling"
18
+ name = "ChunkDoclingDocument"
19
+
20
+ inputs = [
21
+ HandleInput(
22
+ name="data_inputs",
23
+ display_name="Data or DataFrame",
24
+ info="The data with documents to split in chunks.",
25
+ input_types=["Data", "DataFrame"],
26
+ required=True,
27
+ ),
28
+ DropdownInput(
29
+ name="chunker",
30
+ display_name="Chunker",
31
+ options=["HybridChunker", "HierarchicalChunker"],
32
+ info=("Which chunker to use."),
33
+ value="HybridChunker",
34
+ real_time_refresh=True,
35
+ ),
36
+ DropdownInput(
37
+ name="provider",
38
+ display_name="Provider",
39
+ options=["Hugging Face", "OpenAI"],
40
+ info=("Which tokenizer provider."),
41
+ value="Hugging Face",
42
+ show=True,
43
+ real_time_refresh=True,
44
+ advanced=True,
45
+ dynamic=True,
46
+ ),
47
+ StrInput(
48
+ name="hf_model_name",
49
+ display_name="HF model name",
50
+ info=(
51
+ "Model name of the tokenizer to use with the HybridChunker when Hugging Face is chosen as a tokenizer."
52
+ ),
53
+ value="sentence-transformers/all-MiniLM-L6-v2",
54
+ show=True,
55
+ advanced=True,
56
+ dynamic=True,
57
+ ),
58
+ StrInput(
59
+ name="openai_model_name",
60
+ display_name="OpenAI model name",
61
+ info=("Model name of the tokenizer to use with the HybridChunker when OpenAI is chosen as a tokenizer."),
62
+ value="gpt-4o",
63
+ show=False,
64
+ advanced=True,
65
+ dynamic=True,
66
+ ),
67
+ IntInput(
68
+ name="max_tokens",
69
+ display_name="Maximum tokens",
70
+ info=("Maximum number of tokens for the HybridChunker."),
71
+ show=True,
72
+ required=False,
73
+ advanced=True,
74
+ dynamic=True,
75
+ ),
76
+ MessageTextInput(
77
+ name="doc_key",
78
+ display_name="Doc Key",
79
+ info="The key to use for the DoclingDocument column.",
80
+ value="doc",
81
+ advanced=True,
82
+ ),
83
+ ]
84
+
85
+ outputs = [
86
+ Output(display_name="DataFrame", name="dataframe", method="chunk_documents"),
87
+ ]
88
+
89
+ def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:
90
+ if field_name == "chunker":
91
+ provider_type = build_config["provider"]["value"]
92
+ is_hf = provider_type == "Hugging Face"
93
+ is_openai = provider_type == "OpenAI"
94
+ if field_value == "HybridChunker":
95
+ build_config["provider"]["show"] = True
96
+ build_config["hf_model_name"]["show"] = is_hf
97
+ build_config["openai_model_name"]["show"] = is_openai
98
+ build_config["max_tokens"]["show"] = True
99
+ else:
100
+ build_config["provider"]["show"] = False
101
+ build_config["hf_model_name"]["show"] = False
102
+ build_config["openai_model_name"]["show"] = False
103
+ build_config["max_tokens"]["show"] = False
104
+ elif field_name == "provider" and build_config["chunker"]["value"] == "HybridChunker":
105
+ if field_value == "Hugging Face":
106
+ build_config["hf_model_name"]["show"] = True
107
+ build_config["openai_model_name"]["show"] = False
108
+ elif field_value == "OpenAI":
109
+ build_config["hf_model_name"]["show"] = False
110
+ build_config["openai_model_name"]["show"] = True
111
+
112
+ return build_config
113
+
114
+ def _docs_to_data(self, docs) -> list[Data]:
115
+ return [Data(text=doc.page_content, data=doc.metadata) for doc in docs]
116
+
117
+ def chunk_documents(self) -> DataFrame:
118
+ documents = extract_docling_documents(self.data_inputs, self.doc_key)
119
+
120
+ chunker: BaseChunker
121
+ if self.chunker == "HybridChunker":
122
+ try:
123
+ from docling_core.transforms.chunker.hybrid_chunker import HybridChunker
124
+ except ImportError as e:
125
+ msg = (
126
+ "HybridChunker is not installed. Please install it with `uv pip install docling-core[chunking] "
127
+ "or `uv pip install transformers`"
128
+ )
129
+ raise ImportError(msg) from e
130
+ max_tokens: int | None = self.max_tokens if self.max_tokens else None
131
+ if self.provider == "Hugging Face":
132
+ try:
133
+ from docling_core.transforms.chunker.tokenizer.huggingface import HuggingFaceTokenizer
134
+ except ImportError as e:
135
+ msg = (
136
+ "HuggingFaceTokenizer is not installed."
137
+ " Please install it with `uv pip install docling-core[chunking]`"
138
+ )
139
+ raise ImportError(msg) from e
140
+ tokenizer = HuggingFaceTokenizer.from_pretrained(
141
+ model_name=self.hf_model_name,
142
+ max_tokens=max_tokens,
143
+ )
144
+ elif self.provider == "OpenAI":
145
+ try:
146
+ from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer
147
+ except ImportError as e:
148
+ msg = (
149
+ "OpenAITokenizer is not installed."
150
+ " Please install it with `uv pip install docling-core[chunking]`"
151
+ " or `uv pip install transformers`"
152
+ )
153
+ raise ImportError(msg) from e
154
+ if max_tokens is None:
155
+ max_tokens = 128 * 1024 # context window length required for OpenAI tokenizers
156
+ tokenizer = OpenAITokenizer(
157
+ tokenizer=tiktoken.encoding_for_model(self.openai_model_name), max_tokens=max_tokens
158
+ )
159
+ chunker = HybridChunker(
160
+ tokenizer=tokenizer,
161
+ )
162
+ elif self.chunker == "HierarchicalChunker":
163
+ chunker = HierarchicalChunker()
164
+
165
+ results: list[Data] = []
166
+ try:
167
+ for doc in documents:
168
+ for chunk in chunker.chunk(dl_doc=doc):
169
+ enriched_text = chunker.contextualize(chunk=chunk)
170
+ meta = DocMeta.model_validate(chunk.meta)
171
+
172
+ results.append(
173
+ Data(
174
+ data={
175
+ "text": enriched_text,
176
+ "document_id": f"{doc.origin.binary_hash}",
177
+ "doc_items": json.dumps([item.self_ref for item in meta.doc_items]),
178
+ }
179
+ )
180
+ )
181
+
182
+ except Exception as e:
183
+ msg = f"Error splitting text: {e}"
184
+ raise TypeError(msg) from e
185
+
186
+ return DataFrame(results)
@@ -0,0 +1,231 @@
1
+ import time
2
+ from multiprocessing import Queue, get_context
3
+ from queue import Empty
4
+
5
+ from lfx.base.data import BaseFileComponent
6
+ from lfx.base.data.docling_utils import docling_worker
7
+ from lfx.inputs import DropdownInput
8
+ from lfx.schema import Data
9
+
10
+
11
+ class DoclingInlineComponent(BaseFileComponent):
12
+ display_name = "Docling"
13
+ description = "Uses Docling to process input documents running the Docling models locally."
14
+ documentation = "https://docling-project.github.io/docling/"
15
+ trace_type = "tool"
16
+ icon = "Docling"
17
+ name = "DoclingInline"
18
+
19
+ # https://docling-project.github.io/docling/usage/supported_formats/
20
+ VALID_EXTENSIONS = [
21
+ "adoc",
22
+ "asciidoc",
23
+ "asc",
24
+ "bmp",
25
+ "csv",
26
+ "dotx",
27
+ "dotm",
28
+ "docm",
29
+ "docx",
30
+ "htm",
31
+ "html",
32
+ "jpeg",
33
+ "json",
34
+ "md",
35
+ "pdf",
36
+ "png",
37
+ "potx",
38
+ "ppsx",
39
+ "pptm",
40
+ "potm",
41
+ "ppsm",
42
+ "pptx",
43
+ "tiff",
44
+ "txt",
45
+ "xls",
46
+ "xlsx",
47
+ "xhtml",
48
+ "xml",
49
+ "webp",
50
+ ]
51
+
52
+ inputs = [
53
+ *BaseFileComponent.get_base_inputs(),
54
+ DropdownInput(
55
+ name="pipeline",
56
+ display_name="Pipeline",
57
+ info="Docling pipeline to use",
58
+ options=["standard", "vlm"],
59
+ real_time_refresh=False,
60
+ value="standard",
61
+ ),
62
+ DropdownInput(
63
+ name="ocr_engine",
64
+ display_name="OCR Engine",
65
+ info="OCR engine to use. None will disable OCR.",
66
+ options=["None", "easyocr", "tesserocr", "rapidocr", "ocrmac"],
67
+ real_time_refresh=False,
68
+ value="None",
69
+ ),
70
+ # TODO: expose more Docling options
71
+ ]
72
+
73
+ outputs = [
74
+ *BaseFileComponent.get_base_outputs(),
75
+ ]
76
+
77
+ def _wait_for_result_with_process_monitoring(self, queue: Queue, proc, timeout: int = 300):
78
+ """Wait for result from queue while monitoring process health.
79
+
80
+ Handles cases where process crashes without sending result.
81
+ """
82
+ start_time = time.time()
83
+
84
+ while time.time() - start_time < timeout:
85
+ # Check if process is still alive
86
+ if not proc.is_alive():
87
+ # Process died, try to get any result it might have sent
88
+ try:
89
+ result = queue.get_nowait()
90
+ except Empty:
91
+ # Process died without sending result
92
+ msg = f"Worker process crashed unexpectedly without producing result. Exit code: {proc.exitcode}"
93
+ raise RuntimeError(msg) from None
94
+ else:
95
+ self.log("Process completed and result retrieved")
96
+ return result
97
+
98
+ # Poll the queue instead of blocking
99
+ try:
100
+ result = queue.get(timeout=1)
101
+ except Empty:
102
+ # No result yet, continue monitoring
103
+ continue
104
+ else:
105
+ self.log("Result received from worker process")
106
+ return result
107
+
108
+ # Overall timeout reached
109
+ msg = f"Process timed out after {timeout} seconds"
110
+ raise TimeoutError(msg)
111
+
112
+ def _terminate_process_gracefully(self, proc, timeout_terminate: int = 10, timeout_kill: int = 5):
113
+ """Terminate process gracefully with escalating signals.
114
+
115
+ First tries SIGTERM, then SIGKILL if needed.
116
+ """
117
+ if not proc.is_alive():
118
+ return
119
+
120
+ self.log("Attempting graceful process termination with SIGTERM")
121
+ proc.terminate() # Send SIGTERM
122
+ proc.join(timeout=timeout_terminate)
123
+
124
+ if proc.is_alive():
125
+ self.log("Process didn't respond to SIGTERM, using SIGKILL")
126
+ proc.kill() # Send SIGKILL
127
+ proc.join(timeout=timeout_kill)
128
+
129
+ if proc.is_alive():
130
+ self.log("Warning: Process still alive after SIGKILL")
131
+
132
+ def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
133
+ try:
134
+ from docling.datamodel.base_models import InputFormat
135
+ from docling.datamodel.pipeline_options import OcrOptions, PdfPipelineOptions, VlmPipelineOptions
136
+ from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
137
+ from docling.models.factories import get_ocr_factory
138
+ from docling.pipeline.vlm_pipeline import VlmPipeline
139
+ except ImportError as e:
140
+ msg = (
141
+ "Docling is an optional dependency. Install with `uv pip install 'langflow[docling]'` or refer to the "
142
+ "documentation on how to install optional dependencies."
143
+ )
144
+ raise ImportError(msg) from e
145
+
146
+ # Configure the standard PDF pipeline
147
+ def _get_standard_opts() -> PdfPipelineOptions:
148
+ pipeline_options = PdfPipelineOptions()
149
+ pipeline_options.do_ocr = self.ocr_engine != "None"
150
+ if pipeline_options.do_ocr:
151
+ ocr_factory = get_ocr_factory(
152
+ allow_external_plugins=False,
153
+ )
154
+
155
+ ocr_options: OcrOptions = ocr_factory.create_options(
156
+ kind=self.ocr_engine,
157
+ )
158
+ pipeline_options.ocr_options = ocr_options
159
+ return pipeline_options
160
+
161
+ # Configure the VLM pipeline
162
+ def _get_vlm_opts() -> VlmPipelineOptions:
163
+ return VlmPipelineOptions()
164
+
165
+ # Configure the main format options and create the DocumentConverter()
166
+ def _get_converter() -> DocumentConverter:
167
+ if self.pipeline == "standard":
168
+ pdf_format_option = PdfFormatOption(
169
+ pipeline_options=_get_standard_opts(),
170
+ )
171
+ elif self.pipeline == "vlm":
172
+ pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
173
+
174
+ format_options: dict[InputFormat, FormatOption] = {
175
+ InputFormat.PDF: pdf_format_option,
176
+ InputFormat.IMAGE: pdf_format_option,
177
+ }
178
+
179
+ return DocumentConverter(format_options=format_options)
180
+
181
+ file_paths = [file.path for file in file_list if file.path]
182
+
183
+ if not file_paths:
184
+ self.log("No files to process.")
185
+ return file_list
186
+
187
+ ctx = get_context("spawn")
188
+ queue: Queue = ctx.Queue()
189
+ proc = ctx.Process(
190
+ target=docling_worker,
191
+ args=(file_paths, queue, self.pipeline, self.ocr_engine),
192
+ )
193
+
194
+ result = None
195
+ proc.start()
196
+
197
+ try:
198
+ result = self._wait_for_result_with_process_monitoring(queue, proc, timeout=300)
199
+ except KeyboardInterrupt:
200
+ self.log("Docling process cancelled by user")
201
+ result = []
202
+ except Exception as e:
203
+ self.log(f"Error during processing: {e}")
204
+ raise
205
+ finally:
206
+ # Improved cleanup with graceful termination
207
+ try:
208
+ self._terminate_process_gracefully(proc)
209
+ finally:
210
+ # Always close and cleanup queue resources
211
+ try:
212
+ queue.close()
213
+ queue.join_thread()
214
+ except Exception as e: # noqa: BLE001
215
+ # Ignore cleanup errors, but log them
216
+ self.log(f"Warning: Error during queue cleanup - {e}")
217
+
218
+ # Check if there was an error in the worker
219
+ if isinstance(result, dict) and "error" in result:
220
+ msg = result["error"]
221
+ if msg.startswith("Docling is not installed"):
222
+ raise ImportError(msg)
223
+ # Handle interrupt gracefully - return empty result instead of raising error
224
+ if "Worker interrupted by SIGINT" in msg or "shutdown" in result:
225
+ self.log("Docling process cancelled by user")
226
+ result = []
227
+ else:
228
+ raise RuntimeError(msg)
229
+
230
+ processed_data = [Data(data={"doc": r["document"], "file_path": r["file_path"]}) if r else None for r in result]
231
+ return self.rollup_data(file_list, processed_data)
@@ -0,0 +1,193 @@
1
+ import base64
2
+ import time
3
+ from concurrent.futures import Future, ThreadPoolExecutor
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ import httpx
8
+ from docling_core.types.doc import DoclingDocument
9
+ from pydantic import ValidationError
10
+
11
+ from lfx.base.data import BaseFileComponent
12
+ from lfx.inputs import IntInput, NestedDictInput, StrInput
13
+ from lfx.inputs.inputs import FloatInput
14
+ from lfx.schema import Data
15
+
16
+
17
+ class DoclingRemoteComponent(BaseFileComponent):
18
+ display_name = "Docling Serve"
19
+ description = "Uses Docling to process input documents connecting to your instance of Docling Serve."
20
+ documentation = "https://docling-project.github.io/docling/"
21
+ trace_type = "tool"
22
+ icon = "Docling"
23
+ name = "DoclingRemote"
24
+
25
+ MAX_500_RETRIES = 5
26
+
27
+ # https://docling-project.github.io/docling/usage/supported_formats/
28
+ VALID_EXTENSIONS = [
29
+ "adoc",
30
+ "asciidoc",
31
+ "asc",
32
+ "bmp",
33
+ "csv",
34
+ "dotx",
35
+ "dotm",
36
+ "docm",
37
+ "docx",
38
+ "htm",
39
+ "html",
40
+ "jpeg",
41
+ "json",
42
+ "md",
43
+ "pdf",
44
+ "png",
45
+ "potx",
46
+ "ppsx",
47
+ "pptm",
48
+ "potm",
49
+ "ppsm",
50
+ "pptx",
51
+ "tiff",
52
+ "txt",
53
+ "xls",
54
+ "xlsx",
55
+ "xhtml",
56
+ "xml",
57
+ "webp",
58
+ ]
59
+
60
+ inputs = [
61
+ *BaseFileComponent.get_base_inputs(),
62
+ StrInput(
63
+ name="api_url",
64
+ display_name="Server address",
65
+ info="URL of the Docling Serve instance.",
66
+ required=True,
67
+ ),
68
+ IntInput(
69
+ name="max_concurrency",
70
+ display_name="Concurrency",
71
+ info="Maximum number of concurrent requests for the server.",
72
+ advanced=True,
73
+ value=2,
74
+ ),
75
+ FloatInput(
76
+ name="max_poll_timeout",
77
+ display_name="Maximum poll time",
78
+ info="Maximum waiting time for the document conversion to complete.",
79
+ advanced=True,
80
+ value=3600,
81
+ ),
82
+ NestedDictInput(
83
+ name="api_headers",
84
+ display_name="HTTP headers",
85
+ advanced=True,
86
+ required=False,
87
+ info=("Optional dictionary of additional headers required for connecting to Docling Serve."),
88
+ ),
89
+ NestedDictInput(
90
+ name="docling_serve_opts",
91
+ display_name="Docling options",
92
+ advanced=True,
93
+ required=False,
94
+ info=(
95
+ "Optional dictionary of additional options. "
96
+ "See https://github.com/docling-project/docling-serve/blob/main/docs/usage.md for more information."
97
+ ),
98
+ ),
99
+ ]
100
+
101
+ outputs = [
102
+ *BaseFileComponent.get_base_outputs(),
103
+ ]
104
+
105
+ def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:
106
+ base_url = f"{self.api_url}/v1alpha"
107
+
108
+ def _convert_document(client: httpx.Client, file_path: Path, options: dict[str, Any]) -> Data | None:
109
+ encoded_doc = base64.b64encode(file_path.read_bytes()).decode()
110
+ payload = {
111
+ "options": options,
112
+ "file_sources": [{"base64_string": encoded_doc, "filename": file_path.name}],
113
+ }
114
+
115
+ response = client.post(f"{base_url}/convert/source/async", json=payload)
116
+ response.raise_for_status()
117
+ task = response.json()
118
+
119
+ http_failures = 0
120
+ retry_status_start = 500
121
+ retry_status_end = 600
122
+ start_wait_time = time.monotonic()
123
+ while task["task_status"] not in ("success", "failure"):
124
+ # Check if processing exceeds the maximum poll timeout
125
+ processing_time = time.monotonic() - start_wait_time
126
+ if processing_time >= self.max_poll_timeout:
127
+ msg = (
128
+ f"Processing time {processing_time=} exceeds the maximum poll timeout {self.max_poll_timeout=}."
129
+ "Please increase the max_poll_timeout parameter or review why the processing "
130
+ "takes long on the server."
131
+ )
132
+ self.log(msg)
133
+ raise RuntimeError(msg)
134
+
135
+ # Call for a new status update
136
+ time.sleep(2)
137
+ response = client.get(f"{base_url}/status/poll/{task['task_id']}")
138
+
139
+ # Check if the status call gets into 5xx errors and retry
140
+ if retry_status_start <= response.status_code < retry_status_end:
141
+ http_failures += 1
142
+ if http_failures > self.MAX_500_RETRIES:
143
+ self.log(f"The status requests got a http response {response.status_code} too many times.")
144
+ return None
145
+ continue
146
+
147
+ # Update task status
148
+ task = response.json()
149
+
150
+ result_resp = client.get(f"{base_url}/result/{task['task_id']}")
151
+ result_resp.raise_for_status()
152
+ result = result_resp.json()
153
+
154
+ if "json_content" not in result["document"] or result["document"]["json_content"] is None:
155
+ self.log("No JSON DoclingDocument found in the result.")
156
+ return None
157
+
158
+ try:
159
+ doc = DoclingDocument.model_validate(result["document"]["json_content"])
160
+ return Data(data={"doc": doc, "file_path": str(file_path)})
161
+ except ValidationError as e:
162
+ self.log(f"Error validating the document. {e}")
163
+ return None
164
+
165
+ docling_options = {
166
+ "to_formats": ["json"],
167
+ "image_export_mode": "placeholder",
168
+ "return_as_file": False,
169
+ **(self.docling_serve_opts or {}),
170
+ }
171
+
172
+ processed_data: list[Data | None] = []
173
+ with (
174
+ httpx.Client(headers=self.api_headers) as client,
175
+ ThreadPoolExecutor(max_workers=self.max_concurrency) as executor,
176
+ ):
177
+ futures: list[tuple[int, Future]] = []
178
+ for i, file in enumerate(file_list):
179
+ if file.path is None:
180
+ processed_data.append(None)
181
+ continue
182
+
183
+ futures.append((i, executor.submit(_convert_document, client, file.path, docling_options)))
184
+
185
+ for _index, future in futures:
186
+ try:
187
+ result_data = future.result()
188
+ processed_data.append(result_data)
189
+ except (httpx.HTTPStatusError, httpx.RequestError, KeyError, ValueError) as exc:
190
+ self.log(f"Docling remote processing failed: {exc}")
191
+ raise
192
+
193
+ return self.rollup_data(file_list, processed_data)