lfx-nightly 0.1.11.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (699) hide show
  1. lfx/__init__.py +0 -0
  2. lfx/__main__.py +25 -0
  3. lfx/base/__init__.py +0 -0
  4. lfx/base/agents/__init__.py +0 -0
  5. lfx/base/agents/agent.py +268 -0
  6. lfx/base/agents/callback.py +130 -0
  7. lfx/base/agents/context.py +109 -0
  8. lfx/base/agents/crewai/__init__.py +0 -0
  9. lfx/base/agents/crewai/crew.py +231 -0
  10. lfx/base/agents/crewai/tasks.py +12 -0
  11. lfx/base/agents/default_prompts.py +23 -0
  12. lfx/base/agents/errors.py +15 -0
  13. lfx/base/agents/events.py +346 -0
  14. lfx/base/agents/utils.py +205 -0
  15. lfx/base/astra_assistants/__init__.py +0 -0
  16. lfx/base/astra_assistants/util.py +171 -0
  17. lfx/base/chains/__init__.py +0 -0
  18. lfx/base/chains/model.py +19 -0
  19. lfx/base/composio/__init__.py +0 -0
  20. lfx/base/composio/composio_base.py +1291 -0
  21. lfx/base/compressors/__init__.py +0 -0
  22. lfx/base/compressors/model.py +60 -0
  23. lfx/base/constants.py +46 -0
  24. lfx/base/curl/__init__.py +0 -0
  25. lfx/base/curl/parse.py +188 -0
  26. lfx/base/data/__init__.py +5 -0
  27. lfx/base/data/base_file.py +685 -0
  28. lfx/base/data/docling_utils.py +245 -0
  29. lfx/base/data/utils.py +198 -0
  30. lfx/base/document_transformers/__init__.py +0 -0
  31. lfx/base/document_transformers/model.py +43 -0
  32. lfx/base/embeddings/__init__.py +0 -0
  33. lfx/base/embeddings/aiml_embeddings.py +62 -0
  34. lfx/base/embeddings/model.py +26 -0
  35. lfx/base/flow_processing/__init__.py +0 -0
  36. lfx/base/flow_processing/utils.py +86 -0
  37. lfx/base/huggingface/__init__.py +0 -0
  38. lfx/base/huggingface/model_bridge.py +133 -0
  39. lfx/base/io/__init__.py +0 -0
  40. lfx/base/io/chat.py +20 -0
  41. lfx/base/io/text.py +22 -0
  42. lfx/base/langchain_utilities/__init__.py +0 -0
  43. lfx/base/langchain_utilities/model.py +35 -0
  44. lfx/base/langchain_utilities/spider_constants.py +1 -0
  45. lfx/base/langwatch/__init__.py +0 -0
  46. lfx/base/langwatch/utils.py +18 -0
  47. lfx/base/mcp/__init__.py +0 -0
  48. lfx/base/mcp/constants.py +2 -0
  49. lfx/base/mcp/util.py +1398 -0
  50. lfx/base/memory/__init__.py +0 -0
  51. lfx/base/memory/memory.py +49 -0
  52. lfx/base/memory/model.py +38 -0
  53. lfx/base/models/__init__.py +3 -0
  54. lfx/base/models/aiml_constants.py +51 -0
  55. lfx/base/models/anthropic_constants.py +47 -0
  56. lfx/base/models/aws_constants.py +151 -0
  57. lfx/base/models/chat_result.py +76 -0
  58. lfx/base/models/google_generative_ai_constants.py +70 -0
  59. lfx/base/models/groq_constants.py +134 -0
  60. lfx/base/models/model.py +375 -0
  61. lfx/base/models/model_input_constants.py +307 -0
  62. lfx/base/models/model_metadata.py +41 -0
  63. lfx/base/models/model_utils.py +8 -0
  64. lfx/base/models/novita_constants.py +35 -0
  65. lfx/base/models/ollama_constants.py +49 -0
  66. lfx/base/models/openai_constants.py +122 -0
  67. lfx/base/models/sambanova_constants.py +18 -0
  68. lfx/base/processing/__init__.py +0 -0
  69. lfx/base/prompts/__init__.py +0 -0
  70. lfx/base/prompts/api_utils.py +224 -0
  71. lfx/base/prompts/utils.py +61 -0
  72. lfx/base/textsplitters/__init__.py +0 -0
  73. lfx/base/textsplitters/model.py +28 -0
  74. lfx/base/tools/__init__.py +0 -0
  75. lfx/base/tools/base.py +26 -0
  76. lfx/base/tools/component_tool.py +325 -0
  77. lfx/base/tools/constants.py +49 -0
  78. lfx/base/tools/flow_tool.py +132 -0
  79. lfx/base/tools/run_flow.py +224 -0
  80. lfx/base/vectorstores/__init__.py +0 -0
  81. lfx/base/vectorstores/model.py +193 -0
  82. lfx/base/vectorstores/utils.py +22 -0
  83. lfx/base/vectorstores/vector_store_connection_decorator.py +52 -0
  84. lfx/cli/__init__.py +5 -0
  85. lfx/cli/commands.py +319 -0
  86. lfx/cli/common.py +650 -0
  87. lfx/cli/run.py +441 -0
  88. lfx/cli/script_loader.py +247 -0
  89. lfx/cli/serve_app.py +546 -0
  90. lfx/cli/validation.py +69 -0
  91. lfx/components/FAISS/__init__.py +34 -0
  92. lfx/components/FAISS/faiss.py +111 -0
  93. lfx/components/Notion/__init__.py +19 -0
  94. lfx/components/Notion/add_content_to_page.py +269 -0
  95. lfx/components/Notion/create_page.py +94 -0
  96. lfx/components/Notion/list_database_properties.py +68 -0
  97. lfx/components/Notion/list_pages.py +122 -0
  98. lfx/components/Notion/list_users.py +77 -0
  99. lfx/components/Notion/page_content_viewer.py +93 -0
  100. lfx/components/Notion/search.py +111 -0
  101. lfx/components/Notion/update_page_property.py +114 -0
  102. lfx/components/__init__.py +411 -0
  103. lfx/components/_importing.py +42 -0
  104. lfx/components/agentql/__init__.py +3 -0
  105. lfx/components/agentql/agentql_api.py +151 -0
  106. lfx/components/agents/__init__.py +34 -0
  107. lfx/components/agents/agent.py +558 -0
  108. lfx/components/agents/mcp_component.py +501 -0
  109. lfx/components/aiml/__init__.py +37 -0
  110. lfx/components/aiml/aiml.py +112 -0
  111. lfx/components/aiml/aiml_embeddings.py +37 -0
  112. lfx/components/amazon/__init__.py +36 -0
  113. lfx/components/amazon/amazon_bedrock_embedding.py +109 -0
  114. lfx/components/amazon/amazon_bedrock_model.py +124 -0
  115. lfx/components/amazon/s3_bucket_uploader.py +211 -0
  116. lfx/components/anthropic/__init__.py +34 -0
  117. lfx/components/anthropic/anthropic.py +187 -0
  118. lfx/components/apify/__init__.py +5 -0
  119. lfx/components/apify/apify_actor.py +325 -0
  120. lfx/components/arxiv/__init__.py +3 -0
  121. lfx/components/arxiv/arxiv.py +163 -0
  122. lfx/components/assemblyai/__init__.py +46 -0
  123. lfx/components/assemblyai/assemblyai_get_subtitles.py +83 -0
  124. lfx/components/assemblyai/assemblyai_lemur.py +183 -0
  125. lfx/components/assemblyai/assemblyai_list_transcripts.py +95 -0
  126. lfx/components/assemblyai/assemblyai_poll_transcript.py +72 -0
  127. lfx/components/assemblyai/assemblyai_start_transcript.py +188 -0
  128. lfx/components/azure/__init__.py +37 -0
  129. lfx/components/azure/azure_openai.py +95 -0
  130. lfx/components/azure/azure_openai_embeddings.py +83 -0
  131. lfx/components/baidu/__init__.py +32 -0
  132. lfx/components/baidu/baidu_qianfan_chat.py +113 -0
  133. lfx/components/bing/__init__.py +3 -0
  134. lfx/components/bing/bing_search_api.py +61 -0
  135. lfx/components/cassandra/__init__.py +40 -0
  136. lfx/components/cassandra/cassandra.py +264 -0
  137. lfx/components/cassandra/cassandra_chat.py +92 -0
  138. lfx/components/cassandra/cassandra_graph.py +238 -0
  139. lfx/components/chains/__init__.py +3 -0
  140. lfx/components/chroma/__init__.py +34 -0
  141. lfx/components/chroma/chroma.py +167 -0
  142. lfx/components/cleanlab/__init__.py +40 -0
  143. lfx/components/cleanlab/cleanlab_evaluator.py +155 -0
  144. lfx/components/cleanlab/cleanlab_rag_evaluator.py +254 -0
  145. lfx/components/cleanlab/cleanlab_remediator.py +131 -0
  146. lfx/components/clickhouse/__init__.py +34 -0
  147. lfx/components/clickhouse/clickhouse.py +135 -0
  148. lfx/components/cloudflare/__init__.py +32 -0
  149. lfx/components/cloudflare/cloudflare.py +81 -0
  150. lfx/components/cohere/__init__.py +40 -0
  151. lfx/components/cohere/cohere_embeddings.py +81 -0
  152. lfx/components/cohere/cohere_models.py +46 -0
  153. lfx/components/cohere/cohere_rerank.py +51 -0
  154. lfx/components/composio/__init__.py +74 -0
  155. lfx/components/composio/composio_api.py +268 -0
  156. lfx/components/composio/dropbox_compnent.py +11 -0
  157. lfx/components/composio/github_composio.py +11 -0
  158. lfx/components/composio/gmail_composio.py +38 -0
  159. lfx/components/composio/googlecalendar_composio.py +11 -0
  160. lfx/components/composio/googlemeet_composio.py +11 -0
  161. lfx/components/composio/googletasks_composio.py +8 -0
  162. lfx/components/composio/linear_composio.py +11 -0
  163. lfx/components/composio/outlook_composio.py +11 -0
  164. lfx/components/composio/reddit_composio.py +11 -0
  165. lfx/components/composio/slack_composio.py +582 -0
  166. lfx/components/composio/slackbot_composio.py +11 -0
  167. lfx/components/composio/supabase_composio.py +11 -0
  168. lfx/components/composio/todoist_composio.py +11 -0
  169. lfx/components/composio/youtube_composio.py +11 -0
  170. lfx/components/confluence/__init__.py +3 -0
  171. lfx/components/confluence/confluence.py +84 -0
  172. lfx/components/couchbase/__init__.py +34 -0
  173. lfx/components/couchbase/couchbase.py +102 -0
  174. lfx/components/crewai/__init__.py +49 -0
  175. lfx/components/crewai/crewai.py +107 -0
  176. lfx/components/crewai/hierarchical_crew.py +46 -0
  177. lfx/components/crewai/hierarchical_task.py +44 -0
  178. lfx/components/crewai/sequential_crew.py +52 -0
  179. lfx/components/crewai/sequential_task.py +73 -0
  180. lfx/components/crewai/sequential_task_agent.py +143 -0
  181. lfx/components/custom_component/__init__.py +34 -0
  182. lfx/components/custom_component/custom_component.py +31 -0
  183. lfx/components/data/__init__.py +64 -0
  184. lfx/components/data/api_request.py +544 -0
  185. lfx/components/data/csv_to_data.py +95 -0
  186. lfx/components/data/directory.py +113 -0
  187. lfx/components/data/file.py +577 -0
  188. lfx/components/data/json_to_data.py +98 -0
  189. lfx/components/data/news_search.py +164 -0
  190. lfx/components/data/rss.py +69 -0
  191. lfx/components/data/sql_executor.py +101 -0
  192. lfx/components/data/url.py +311 -0
  193. lfx/components/data/web_search.py +112 -0
  194. lfx/components/data/webhook.py +56 -0
  195. lfx/components/datastax/__init__.py +70 -0
  196. lfx/components/datastax/astra_assistant_manager.py +306 -0
  197. lfx/components/datastax/astra_db.py +75 -0
  198. lfx/components/datastax/astra_vectorize.py +124 -0
  199. lfx/components/datastax/astradb.py +1285 -0
  200. lfx/components/datastax/astradb_cql.py +314 -0
  201. lfx/components/datastax/astradb_graph.py +330 -0
  202. lfx/components/datastax/astradb_tool.py +414 -0
  203. lfx/components/datastax/astradb_vectorstore.py +1285 -0
  204. lfx/components/datastax/cassandra.py +92 -0
  205. lfx/components/datastax/create_assistant.py +58 -0
  206. lfx/components/datastax/create_thread.py +32 -0
  207. lfx/components/datastax/dotenv.py +35 -0
  208. lfx/components/datastax/get_assistant.py +37 -0
  209. lfx/components/datastax/getenvvar.py +30 -0
  210. lfx/components/datastax/graph_rag.py +141 -0
  211. lfx/components/datastax/hcd.py +314 -0
  212. lfx/components/datastax/list_assistants.py +25 -0
  213. lfx/components/datastax/run.py +89 -0
  214. lfx/components/deactivated/__init__.py +15 -0
  215. lfx/components/deactivated/amazon_kendra.py +66 -0
  216. lfx/components/deactivated/chat_litellm_model.py +158 -0
  217. lfx/components/deactivated/code_block_extractor.py +26 -0
  218. lfx/components/deactivated/documents_to_data.py +22 -0
  219. lfx/components/deactivated/embed.py +16 -0
  220. lfx/components/deactivated/extract_key_from_data.py +46 -0
  221. lfx/components/deactivated/json_document_builder.py +57 -0
  222. lfx/components/deactivated/list_flows.py +20 -0
  223. lfx/components/deactivated/mcp_sse.py +61 -0
  224. lfx/components/deactivated/mcp_stdio.py +62 -0
  225. lfx/components/deactivated/merge_data.py +93 -0
  226. lfx/components/deactivated/message.py +37 -0
  227. lfx/components/deactivated/metal.py +54 -0
  228. lfx/components/deactivated/multi_query.py +59 -0
  229. lfx/components/deactivated/retriever.py +43 -0
  230. lfx/components/deactivated/selective_passthrough.py +77 -0
  231. lfx/components/deactivated/should_run_next.py +40 -0
  232. lfx/components/deactivated/split_text.py +63 -0
  233. lfx/components/deactivated/store_message.py +24 -0
  234. lfx/components/deactivated/sub_flow.py +124 -0
  235. lfx/components/deactivated/vectara_self_query.py +76 -0
  236. lfx/components/deactivated/vector_store.py +24 -0
  237. lfx/components/deepseek/__init__.py +34 -0
  238. lfx/components/deepseek/deepseek.py +136 -0
  239. lfx/components/docling/__init__.py +43 -0
  240. lfx/components/docling/chunk_docling_document.py +186 -0
  241. lfx/components/docling/docling_inline.py +231 -0
  242. lfx/components/docling/docling_remote.py +193 -0
  243. lfx/components/docling/export_docling_document.py +117 -0
  244. lfx/components/documentloaders/__init__.py +3 -0
  245. lfx/components/duckduckgo/__init__.py +3 -0
  246. lfx/components/duckduckgo/duck_duck_go_search_run.py +92 -0
  247. lfx/components/elastic/__init__.py +37 -0
  248. lfx/components/elastic/elasticsearch.py +267 -0
  249. lfx/components/elastic/opensearch.py +243 -0
  250. lfx/components/embeddings/__init__.py +37 -0
  251. lfx/components/embeddings/similarity.py +76 -0
  252. lfx/components/embeddings/text_embedder.py +64 -0
  253. lfx/components/exa/__init__.py +3 -0
  254. lfx/components/exa/exa_search.py +68 -0
  255. lfx/components/firecrawl/__init__.py +43 -0
  256. lfx/components/firecrawl/firecrawl_crawl_api.py +88 -0
  257. lfx/components/firecrawl/firecrawl_extract_api.py +136 -0
  258. lfx/components/firecrawl/firecrawl_map_api.py +89 -0
  259. lfx/components/firecrawl/firecrawl_scrape_api.py +73 -0
  260. lfx/components/git/__init__.py +4 -0
  261. lfx/components/git/git.py +262 -0
  262. lfx/components/git/gitextractor.py +196 -0
  263. lfx/components/glean/__init__.py +3 -0
  264. lfx/components/glean/glean_search_api.py +173 -0
  265. lfx/components/google/__init__.py +17 -0
  266. lfx/components/google/gmail.py +192 -0
  267. lfx/components/google/google_bq_sql_executor.py +157 -0
  268. lfx/components/google/google_drive.py +92 -0
  269. lfx/components/google/google_drive_search.py +152 -0
  270. lfx/components/google/google_generative_ai.py +147 -0
  271. lfx/components/google/google_generative_ai_embeddings.py +141 -0
  272. lfx/components/google/google_oauth_token.py +89 -0
  273. lfx/components/google/google_search_api_core.py +68 -0
  274. lfx/components/google/google_serper_api_core.py +74 -0
  275. lfx/components/groq/__init__.py +34 -0
  276. lfx/components/groq/groq.py +136 -0
  277. lfx/components/helpers/__init__.py +52 -0
  278. lfx/components/helpers/calculator_core.py +89 -0
  279. lfx/components/helpers/create_list.py +40 -0
  280. lfx/components/helpers/current_date.py +42 -0
  281. lfx/components/helpers/id_generator.py +42 -0
  282. lfx/components/helpers/memory.py +251 -0
  283. lfx/components/helpers/output_parser.py +45 -0
  284. lfx/components/helpers/store_message.py +90 -0
  285. lfx/components/homeassistant/__init__.py +7 -0
  286. lfx/components/homeassistant/home_assistant_control.py +152 -0
  287. lfx/components/homeassistant/list_home_assistant_states.py +137 -0
  288. lfx/components/huggingface/__init__.py +37 -0
  289. lfx/components/huggingface/huggingface.py +197 -0
  290. lfx/components/huggingface/huggingface_inference_api.py +106 -0
  291. lfx/components/ibm/__init__.py +34 -0
  292. lfx/components/ibm/watsonx.py +203 -0
  293. lfx/components/ibm/watsonx_embeddings.py +135 -0
  294. lfx/components/icosacomputing/__init__.py +5 -0
  295. lfx/components/icosacomputing/combinatorial_reasoner.py +84 -0
  296. lfx/components/input_output/__init__.py +38 -0
  297. lfx/components/input_output/chat.py +120 -0
  298. lfx/components/input_output/chat_output.py +200 -0
  299. lfx/components/input_output/text.py +27 -0
  300. lfx/components/input_output/text_output.py +29 -0
  301. lfx/components/jigsawstack/__init__.py +23 -0
  302. lfx/components/jigsawstack/ai_scrape.py +126 -0
  303. lfx/components/jigsawstack/ai_web_search.py +136 -0
  304. lfx/components/jigsawstack/file_read.py +115 -0
  305. lfx/components/jigsawstack/file_upload.py +94 -0
  306. lfx/components/jigsawstack/image_generation.py +205 -0
  307. lfx/components/jigsawstack/nsfw.py +60 -0
  308. lfx/components/jigsawstack/object_detection.py +124 -0
  309. lfx/components/jigsawstack/sentiment.py +112 -0
  310. lfx/components/jigsawstack/text_to_sql.py +90 -0
  311. lfx/components/jigsawstack/text_translate.py +77 -0
  312. lfx/components/jigsawstack/vocr.py +107 -0
  313. lfx/components/langchain_utilities/__init__.py +109 -0
  314. lfx/components/langchain_utilities/character.py +53 -0
  315. lfx/components/langchain_utilities/conversation.py +59 -0
  316. lfx/components/langchain_utilities/csv_agent.py +107 -0
  317. lfx/components/langchain_utilities/fake_embeddings.py +26 -0
  318. lfx/components/langchain_utilities/html_link_extractor.py +35 -0
  319. lfx/components/langchain_utilities/json_agent.py +45 -0
  320. lfx/components/langchain_utilities/langchain_hub.py +126 -0
  321. lfx/components/langchain_utilities/language_recursive.py +49 -0
  322. lfx/components/langchain_utilities/language_semantic.py +138 -0
  323. lfx/components/langchain_utilities/llm_checker.py +39 -0
  324. lfx/components/langchain_utilities/llm_math.py +42 -0
  325. lfx/components/langchain_utilities/natural_language.py +61 -0
  326. lfx/components/langchain_utilities/openai_tools.py +53 -0
  327. lfx/components/langchain_utilities/openapi.py +48 -0
  328. lfx/components/langchain_utilities/recursive_character.py +60 -0
  329. lfx/components/langchain_utilities/retrieval_qa.py +83 -0
  330. lfx/components/langchain_utilities/runnable_executor.py +137 -0
  331. lfx/components/langchain_utilities/self_query.py +80 -0
  332. lfx/components/langchain_utilities/spider.py +142 -0
  333. lfx/components/langchain_utilities/sql.py +40 -0
  334. lfx/components/langchain_utilities/sql_database.py +35 -0
  335. lfx/components/langchain_utilities/sql_generator.py +78 -0
  336. lfx/components/langchain_utilities/tool_calling.py +59 -0
  337. lfx/components/langchain_utilities/vector_store_info.py +49 -0
  338. lfx/components/langchain_utilities/vector_store_router.py +33 -0
  339. lfx/components/langchain_utilities/xml_agent.py +71 -0
  340. lfx/components/langwatch/__init__.py +3 -0
  341. lfx/components/langwatch/langwatch.py +278 -0
  342. lfx/components/link_extractors/__init__.py +3 -0
  343. lfx/components/lmstudio/__init__.py +34 -0
  344. lfx/components/lmstudio/lmstudioembeddings.py +89 -0
  345. lfx/components/lmstudio/lmstudiomodel.py +129 -0
  346. lfx/components/logic/__init__.py +52 -0
  347. lfx/components/logic/conditional_router.py +171 -0
  348. lfx/components/logic/data_conditional_router.py +125 -0
  349. lfx/components/logic/flow_tool.py +110 -0
  350. lfx/components/logic/listen.py +29 -0
  351. lfx/components/logic/loop.py +125 -0
  352. lfx/components/logic/notify.py +88 -0
  353. lfx/components/logic/pass_message.py +35 -0
  354. lfx/components/logic/run_flow.py +71 -0
  355. lfx/components/logic/sub_flow.py +114 -0
  356. lfx/components/maritalk/__init__.py +32 -0
  357. lfx/components/maritalk/maritalk.py +52 -0
  358. lfx/components/mem0/__init__.py +3 -0
  359. lfx/components/mem0/mem0_chat_memory.py +136 -0
  360. lfx/components/milvus/__init__.py +34 -0
  361. lfx/components/milvus/milvus.py +115 -0
  362. lfx/components/mistral/__init__.py +37 -0
  363. lfx/components/mistral/mistral.py +114 -0
  364. lfx/components/mistral/mistral_embeddings.py +58 -0
  365. lfx/components/models/__init__.py +34 -0
  366. lfx/components/models/embedding_model.py +114 -0
  367. lfx/components/models/language_model.py +144 -0
  368. lfx/components/mongodb/__init__.py +34 -0
  369. lfx/components/mongodb/mongodb_atlas.py +213 -0
  370. lfx/components/needle/__init__.py +3 -0
  371. lfx/components/needle/needle.py +104 -0
  372. lfx/components/notdiamond/__init__.py +34 -0
  373. lfx/components/notdiamond/notdiamond.py +228 -0
  374. lfx/components/novita/__init__.py +32 -0
  375. lfx/components/novita/novita.py +130 -0
  376. lfx/components/nvidia/__init__.py +57 -0
  377. lfx/components/nvidia/nvidia.py +157 -0
  378. lfx/components/nvidia/nvidia_embedding.py +77 -0
  379. lfx/components/nvidia/nvidia_ingest.py +317 -0
  380. lfx/components/nvidia/nvidia_rerank.py +63 -0
  381. lfx/components/nvidia/system_assist.py +65 -0
  382. lfx/components/olivya/__init__.py +3 -0
  383. lfx/components/olivya/olivya.py +116 -0
  384. lfx/components/ollama/__init__.py +37 -0
  385. lfx/components/ollama/ollama.py +330 -0
  386. lfx/components/ollama/ollama_embeddings.py +106 -0
  387. lfx/components/openai/__init__.py +37 -0
  388. lfx/components/openai/openai.py +100 -0
  389. lfx/components/openai/openai_chat_model.py +176 -0
  390. lfx/components/openrouter/__init__.py +32 -0
  391. lfx/components/openrouter/openrouter.py +202 -0
  392. lfx/components/output_parsers/__init__.py +3 -0
  393. lfx/components/perplexity/__init__.py +34 -0
  394. lfx/components/perplexity/perplexity.py +75 -0
  395. lfx/components/pgvector/__init__.py +34 -0
  396. lfx/components/pgvector/pgvector.py +72 -0
  397. lfx/components/pinecone/__init__.py +34 -0
  398. lfx/components/pinecone/pinecone.py +134 -0
  399. lfx/components/processing/__init__.py +117 -0
  400. lfx/components/processing/alter_metadata.py +108 -0
  401. lfx/components/processing/batch_run.py +205 -0
  402. lfx/components/processing/combine_text.py +39 -0
  403. lfx/components/processing/converter.py +159 -0
  404. lfx/components/processing/create_data.py +110 -0
  405. lfx/components/processing/data_operations.py +438 -0
  406. lfx/components/processing/data_to_dataframe.py +70 -0
  407. lfx/components/processing/dataframe_operations.py +313 -0
  408. lfx/components/processing/extract_key.py +53 -0
  409. lfx/components/processing/filter_data.py +42 -0
  410. lfx/components/processing/filter_data_values.py +88 -0
  411. lfx/components/processing/json_cleaner.py +103 -0
  412. lfx/components/processing/lambda_filter.py +154 -0
  413. lfx/components/processing/llm_router.py +499 -0
  414. lfx/components/processing/merge_data.py +90 -0
  415. lfx/components/processing/message_to_data.py +36 -0
  416. lfx/components/processing/parse_data.py +70 -0
  417. lfx/components/processing/parse_dataframe.py +68 -0
  418. lfx/components/processing/parse_json_data.py +90 -0
  419. lfx/components/processing/parser.py +143 -0
  420. lfx/components/processing/prompt.py +67 -0
  421. lfx/components/processing/python_repl_core.py +98 -0
  422. lfx/components/processing/regex.py +82 -0
  423. lfx/components/processing/save_file.py +225 -0
  424. lfx/components/processing/select_data.py +48 -0
  425. lfx/components/processing/split_text.py +141 -0
  426. lfx/components/processing/structured_output.py +202 -0
  427. lfx/components/processing/update_data.py +160 -0
  428. lfx/components/prototypes/__init__.py +34 -0
  429. lfx/components/prototypes/python_function.py +73 -0
  430. lfx/components/qdrant/__init__.py +34 -0
  431. lfx/components/qdrant/qdrant.py +109 -0
  432. lfx/components/redis/__init__.py +37 -0
  433. lfx/components/redis/redis.py +89 -0
  434. lfx/components/redis/redis_chat.py +43 -0
  435. lfx/components/sambanova/__init__.py +32 -0
  436. lfx/components/sambanova/sambanova.py +84 -0
  437. lfx/components/scrapegraph/__init__.py +40 -0
  438. lfx/components/scrapegraph/scrapegraph_markdownify_api.py +64 -0
  439. lfx/components/scrapegraph/scrapegraph_search_api.py +64 -0
  440. lfx/components/scrapegraph/scrapegraph_smart_scraper_api.py +71 -0
  441. lfx/components/searchapi/__init__.py +34 -0
  442. lfx/components/searchapi/search.py +79 -0
  443. lfx/components/serpapi/__init__.py +3 -0
  444. lfx/components/serpapi/serp.py +115 -0
  445. lfx/components/supabase/__init__.py +34 -0
  446. lfx/components/supabase/supabase.py +76 -0
  447. lfx/components/tavily/__init__.py +4 -0
  448. lfx/components/tavily/tavily_extract.py +117 -0
  449. lfx/components/tavily/tavily_search.py +212 -0
  450. lfx/components/textsplitters/__init__.py +3 -0
  451. lfx/components/toolkits/__init__.py +3 -0
  452. lfx/components/tools/__init__.py +72 -0
  453. lfx/components/tools/calculator.py +108 -0
  454. lfx/components/tools/google_search_api.py +45 -0
  455. lfx/components/tools/google_serper_api.py +115 -0
  456. lfx/components/tools/python_code_structured_tool.py +327 -0
  457. lfx/components/tools/python_repl.py +97 -0
  458. lfx/components/tools/search_api.py +87 -0
  459. lfx/components/tools/searxng.py +145 -0
  460. lfx/components/tools/serp_api.py +119 -0
  461. lfx/components/tools/tavily_search_tool.py +344 -0
  462. lfx/components/tools/wikidata_api.py +102 -0
  463. lfx/components/tools/wikipedia_api.py +49 -0
  464. lfx/components/tools/yahoo_finance.py +129 -0
  465. lfx/components/twelvelabs/__init__.py +52 -0
  466. lfx/components/twelvelabs/convert_astra_results.py +84 -0
  467. lfx/components/twelvelabs/pegasus_index.py +311 -0
  468. lfx/components/twelvelabs/split_video.py +291 -0
  469. lfx/components/twelvelabs/text_embeddings.py +57 -0
  470. lfx/components/twelvelabs/twelvelabs_pegasus.py +408 -0
  471. lfx/components/twelvelabs/video_embeddings.py +100 -0
  472. lfx/components/twelvelabs/video_file.py +179 -0
  473. lfx/components/unstructured/__init__.py +3 -0
  474. lfx/components/unstructured/unstructured.py +121 -0
  475. lfx/components/upstash/__init__.py +34 -0
  476. lfx/components/upstash/upstash.py +124 -0
  477. lfx/components/vectara/__init__.py +37 -0
  478. lfx/components/vectara/vectara.py +97 -0
  479. lfx/components/vectara/vectara_rag.py +164 -0
  480. lfx/components/vectorstores/__init__.py +40 -0
  481. lfx/components/vectorstores/astradb.py +1285 -0
  482. lfx/components/vectorstores/astradb_graph.py +319 -0
  483. lfx/components/vectorstores/cassandra.py +264 -0
  484. lfx/components/vectorstores/cassandra_graph.py +238 -0
  485. lfx/components/vectorstores/chroma.py +167 -0
  486. lfx/components/vectorstores/clickhouse.py +135 -0
  487. lfx/components/vectorstores/couchbase.py +102 -0
  488. lfx/components/vectorstores/elasticsearch.py +267 -0
  489. lfx/components/vectorstores/faiss.py +111 -0
  490. lfx/components/vectorstores/graph_rag.py +141 -0
  491. lfx/components/vectorstores/hcd.py +314 -0
  492. lfx/components/vectorstores/local_db.py +261 -0
  493. lfx/components/vectorstores/milvus.py +115 -0
  494. lfx/components/vectorstores/mongodb_atlas.py +213 -0
  495. lfx/components/vectorstores/opensearch.py +243 -0
  496. lfx/components/vectorstores/pgvector.py +72 -0
  497. lfx/components/vectorstores/pinecone.py +134 -0
  498. lfx/components/vectorstores/qdrant.py +109 -0
  499. lfx/components/vectorstores/supabase.py +76 -0
  500. lfx/components/vectorstores/upstash.py +124 -0
  501. lfx/components/vectorstores/vectara.py +97 -0
  502. lfx/components/vectorstores/vectara_rag.py +164 -0
  503. lfx/components/vectorstores/weaviate.py +89 -0
  504. lfx/components/vertexai/__init__.py +37 -0
  505. lfx/components/vertexai/vertexai.py +71 -0
  506. lfx/components/vertexai/vertexai_embeddings.py +67 -0
  507. lfx/components/weaviate/__init__.py +34 -0
  508. lfx/components/weaviate/weaviate.py +89 -0
  509. lfx/components/wikipedia/__init__.py +4 -0
  510. lfx/components/wikipedia/wikidata.py +86 -0
  511. lfx/components/wikipedia/wikipedia.py +53 -0
  512. lfx/components/wolframalpha/__init__.py +3 -0
  513. lfx/components/wolframalpha/wolfram_alpha_api.py +54 -0
  514. lfx/components/xai/__init__.py +32 -0
  515. lfx/components/xai/xai.py +167 -0
  516. lfx/components/yahoosearch/__init__.py +3 -0
  517. lfx/components/yahoosearch/yahoo.py +137 -0
  518. lfx/components/youtube/__init__.py +52 -0
  519. lfx/components/youtube/channel.py +227 -0
  520. lfx/components/youtube/comments.py +231 -0
  521. lfx/components/youtube/playlist.py +33 -0
  522. lfx/components/youtube/search.py +120 -0
  523. lfx/components/youtube/trending.py +285 -0
  524. lfx/components/youtube/video_details.py +263 -0
  525. lfx/components/youtube/youtube_transcripts.py +118 -0
  526. lfx/components/zep/__init__.py +3 -0
  527. lfx/components/zep/zep.py +44 -0
  528. lfx/constants.py +6 -0
  529. lfx/custom/__init__.py +7 -0
  530. lfx/custom/attributes.py +86 -0
  531. lfx/custom/code_parser/__init__.py +3 -0
  532. lfx/custom/code_parser/code_parser.py +361 -0
  533. lfx/custom/custom_component/__init__.py +0 -0
  534. lfx/custom/custom_component/base_component.py +128 -0
  535. lfx/custom/custom_component/component.py +1808 -0
  536. lfx/custom/custom_component/component_with_cache.py +8 -0
  537. lfx/custom/custom_component/custom_component.py +588 -0
  538. lfx/custom/dependency_analyzer.py +165 -0
  539. lfx/custom/directory_reader/__init__.py +3 -0
  540. lfx/custom/directory_reader/directory_reader.py +359 -0
  541. lfx/custom/directory_reader/utils.py +171 -0
  542. lfx/custom/eval.py +12 -0
  543. lfx/custom/schema.py +32 -0
  544. lfx/custom/tree_visitor.py +21 -0
  545. lfx/custom/utils.py +877 -0
  546. lfx/custom/validate.py +488 -0
  547. lfx/events/__init__.py +1 -0
  548. lfx/events/event_manager.py +110 -0
  549. lfx/exceptions/__init__.py +0 -0
  550. lfx/exceptions/component.py +15 -0
  551. lfx/field_typing/__init__.py +91 -0
  552. lfx/field_typing/constants.py +215 -0
  553. lfx/field_typing/range_spec.py +35 -0
  554. lfx/graph/__init__.py +6 -0
  555. lfx/graph/edge/__init__.py +0 -0
  556. lfx/graph/edge/base.py +277 -0
  557. lfx/graph/edge/schema.py +119 -0
  558. lfx/graph/edge/utils.py +0 -0
  559. lfx/graph/graph/__init__.py +0 -0
  560. lfx/graph/graph/ascii.py +202 -0
  561. lfx/graph/graph/base.py +2238 -0
  562. lfx/graph/graph/constants.py +63 -0
  563. lfx/graph/graph/runnable_vertices_manager.py +133 -0
  564. lfx/graph/graph/schema.py +52 -0
  565. lfx/graph/graph/state_model.py +66 -0
  566. lfx/graph/graph/utils.py +1024 -0
  567. lfx/graph/schema.py +75 -0
  568. lfx/graph/state/__init__.py +0 -0
  569. lfx/graph/state/model.py +237 -0
  570. lfx/graph/utils.py +200 -0
  571. lfx/graph/vertex/__init__.py +0 -0
  572. lfx/graph/vertex/base.py +823 -0
  573. lfx/graph/vertex/constants.py +0 -0
  574. lfx/graph/vertex/exceptions.py +4 -0
  575. lfx/graph/vertex/param_handler.py +264 -0
  576. lfx/graph/vertex/schema.py +26 -0
  577. lfx/graph/vertex/utils.py +19 -0
  578. lfx/graph/vertex/vertex_types.py +489 -0
  579. lfx/helpers/__init__.py +1 -0
  580. lfx/helpers/base_model.py +71 -0
  581. lfx/helpers/custom.py +13 -0
  582. lfx/helpers/data.py +167 -0
  583. lfx/helpers/flow.py +194 -0
  584. lfx/inputs/__init__.py +68 -0
  585. lfx/inputs/constants.py +2 -0
  586. lfx/inputs/input_mixin.py +328 -0
  587. lfx/inputs/inputs.py +714 -0
  588. lfx/inputs/validators.py +19 -0
  589. lfx/interface/__init__.py +6 -0
  590. lfx/interface/components.py +489 -0
  591. lfx/interface/importing/__init__.py +5 -0
  592. lfx/interface/importing/utils.py +39 -0
  593. lfx/interface/initialize/__init__.py +3 -0
  594. lfx/interface/initialize/loading.py +224 -0
  595. lfx/interface/listing.py +26 -0
  596. lfx/interface/run.py +16 -0
  597. lfx/interface/utils.py +111 -0
  598. lfx/io/__init__.py +63 -0
  599. lfx/io/schema.py +289 -0
  600. lfx/load/__init__.py +8 -0
  601. lfx/load/load.py +256 -0
  602. lfx/load/utils.py +99 -0
  603. lfx/log/__init__.py +5 -0
  604. lfx/log/logger.py +385 -0
  605. lfx/memory/__init__.py +90 -0
  606. lfx/memory/stubs.py +283 -0
  607. lfx/processing/__init__.py +1 -0
  608. lfx/processing/process.py +238 -0
  609. lfx/processing/utils.py +25 -0
  610. lfx/py.typed +0 -0
  611. lfx/schema/__init__.py +66 -0
  612. lfx/schema/artifact.py +83 -0
  613. lfx/schema/content_block.py +62 -0
  614. lfx/schema/content_types.py +91 -0
  615. lfx/schema/data.py +308 -0
  616. lfx/schema/dataframe.py +210 -0
  617. lfx/schema/dotdict.py +74 -0
  618. lfx/schema/encoders.py +13 -0
  619. lfx/schema/graph.py +47 -0
  620. lfx/schema/image.py +131 -0
  621. lfx/schema/json_schema.py +141 -0
  622. lfx/schema/log.py +61 -0
  623. lfx/schema/message.py +473 -0
  624. lfx/schema/openai_responses_schemas.py +74 -0
  625. lfx/schema/properties.py +41 -0
  626. lfx/schema/schema.py +171 -0
  627. lfx/schema/serialize.py +13 -0
  628. lfx/schema/table.py +140 -0
  629. lfx/schema/validators.py +114 -0
  630. lfx/serialization/__init__.py +5 -0
  631. lfx/serialization/constants.py +2 -0
  632. lfx/serialization/serialization.py +314 -0
  633. lfx/services/__init__.py +23 -0
  634. lfx/services/base.py +28 -0
  635. lfx/services/cache/__init__.py +6 -0
  636. lfx/services/cache/base.py +183 -0
  637. lfx/services/cache/service.py +166 -0
  638. lfx/services/cache/utils.py +169 -0
  639. lfx/services/chat/__init__.py +1 -0
  640. lfx/services/chat/config.py +2 -0
  641. lfx/services/chat/schema.py +10 -0
  642. lfx/services/deps.py +129 -0
  643. lfx/services/factory.py +19 -0
  644. lfx/services/initialize.py +19 -0
  645. lfx/services/interfaces.py +103 -0
  646. lfx/services/manager.py +172 -0
  647. lfx/services/schema.py +20 -0
  648. lfx/services/session.py +82 -0
  649. lfx/services/settings/__init__.py +3 -0
  650. lfx/services/settings/auth.py +130 -0
  651. lfx/services/settings/base.py +539 -0
  652. lfx/services/settings/constants.py +31 -0
  653. lfx/services/settings/factory.py +23 -0
  654. lfx/services/settings/feature_flags.py +12 -0
  655. lfx/services/settings/service.py +35 -0
  656. lfx/services/settings/utils.py +40 -0
  657. lfx/services/shared_component_cache/__init__.py +1 -0
  658. lfx/services/shared_component_cache/factory.py +30 -0
  659. lfx/services/shared_component_cache/service.py +9 -0
  660. lfx/services/storage/__init__.py +5 -0
  661. lfx/services/storage/local.py +155 -0
  662. lfx/services/storage/service.py +54 -0
  663. lfx/services/tracing/__init__.py +1 -0
  664. lfx/services/tracing/service.py +21 -0
  665. lfx/settings.py +6 -0
  666. lfx/template/__init__.py +6 -0
  667. lfx/template/field/__init__.py +0 -0
  668. lfx/template/field/base.py +257 -0
  669. lfx/template/field/prompt.py +15 -0
  670. lfx/template/frontend_node/__init__.py +6 -0
  671. lfx/template/frontend_node/base.py +212 -0
  672. lfx/template/frontend_node/constants.py +65 -0
  673. lfx/template/frontend_node/custom_components.py +79 -0
  674. lfx/template/template/__init__.py +0 -0
  675. lfx/template/template/base.py +100 -0
  676. lfx/template/utils.py +217 -0
  677. lfx/type_extraction/__init__.py +19 -0
  678. lfx/type_extraction/type_extraction.py +75 -0
  679. lfx/type_extraction.py +80 -0
  680. lfx/utils/__init__.py +1 -0
  681. lfx/utils/async_helpers.py +42 -0
  682. lfx/utils/component_utils.py +154 -0
  683. lfx/utils/concurrency.py +60 -0
  684. lfx/utils/connection_string_parser.py +11 -0
  685. lfx/utils/constants.py +205 -0
  686. lfx/utils/data_structure.py +212 -0
  687. lfx/utils/exceptions.py +22 -0
  688. lfx/utils/helpers.py +28 -0
  689. lfx/utils/image.py +73 -0
  690. lfx/utils/lazy_load.py +15 -0
  691. lfx/utils/request_utils.py +18 -0
  692. lfx/utils/schemas.py +139 -0
  693. lfx/utils/util.py +481 -0
  694. lfx/utils/util_strings.py +56 -0
  695. lfx/utils/version.py +24 -0
  696. lfx_nightly-0.1.11.dev0.dist-info/METADATA +293 -0
  697. lfx_nightly-0.1.11.dev0.dist-info/RECORD +699 -0
  698. lfx_nightly-0.1.11.dev0.dist-info/WHEEL +4 -0
  699. lfx_nightly-0.1.11.dev0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,245 @@
1
+ import signal
2
+ import sys
3
+ import traceback
4
+ from contextlib import suppress
5
+
6
+ from docling_core.types.doc import DoclingDocument
7
+
8
+ from lfx.log.logger import logger
9
+ from lfx.schema.data import Data
10
+ from lfx.schema.dataframe import DataFrame
11
+
12
+
13
+ def extract_docling_documents(data_inputs: Data | list[Data] | DataFrame, doc_key: str) -> list[DoclingDocument]:
14
+ documents: list[DoclingDocument] = []
15
+ if isinstance(data_inputs, DataFrame):
16
+ if not len(data_inputs):
17
+ msg = "DataFrame is empty"
18
+ raise TypeError(msg)
19
+
20
+ if doc_key not in data_inputs.columns:
21
+ msg = f"Column '{doc_key}' not found in DataFrame"
22
+ raise TypeError(msg)
23
+ try:
24
+ documents = data_inputs[doc_key].tolist()
25
+ except Exception as e:
26
+ msg = f"Error extracting DoclingDocument from DataFrame: {e}"
27
+ raise TypeError(msg) from e
28
+ else:
29
+ if not data_inputs:
30
+ msg = "No data inputs provided"
31
+ raise TypeError(msg)
32
+
33
+ if isinstance(data_inputs, Data):
34
+ if doc_key not in data_inputs.data:
35
+ msg = (
36
+ f"'{doc_key}' field not available in the input Data. "
37
+ "Check that your input is a DoclingDocument. "
38
+ "You can use the Docling component to convert your input to a DoclingDocument."
39
+ )
40
+ raise TypeError(msg)
41
+ documents = [data_inputs.data[doc_key]]
42
+ else:
43
+ try:
44
+ documents = [
45
+ input_.data[doc_key]
46
+ for input_ in data_inputs
47
+ if isinstance(input_, Data)
48
+ and doc_key in input_.data
49
+ and isinstance(input_.data[doc_key], DoclingDocument)
50
+ ]
51
+ if not documents:
52
+ msg = f"No valid Data inputs found in {type(data_inputs)}"
53
+ raise TypeError(msg)
54
+ except AttributeError as e:
55
+ msg = f"Invalid input type in collection: {e}"
56
+ raise TypeError(msg) from e
57
+ return documents
58
+
59
+
60
+ def docling_worker(file_paths: list[str], queue, pipeline: str, ocr_engine: str):
61
+ """Worker function for processing files with Docling in a separate process."""
62
+ # Signal handling for graceful shutdown
63
+ shutdown_requested = False
64
+
65
+ def signal_handler(signum: int, frame) -> None: # noqa: ARG001
66
+ """Handle shutdown signals gracefully."""
67
+ nonlocal shutdown_requested
68
+ signal_names: dict[int, str] = {signal.SIGTERM: "SIGTERM", signal.SIGINT: "SIGINT"}
69
+ signal_name = signal_names.get(signum, f"signal {signum}")
70
+
71
+ logger.debug(f"Docling worker received {signal_name}, initiating graceful shutdown...")
72
+ shutdown_requested = True
73
+
74
+ # Send shutdown notification to parent process
75
+ with suppress(Exception):
76
+ queue.put({"error": f"Worker interrupted by {signal_name}", "shutdown": True})
77
+
78
+ # Exit gracefully
79
+ sys.exit(0)
80
+
81
+ def check_shutdown() -> None:
82
+ """Check if shutdown was requested and exit if so."""
83
+ if shutdown_requested:
84
+ logger.info("Shutdown requested, exiting worker...")
85
+
86
+ with suppress(Exception):
87
+ queue.put({"error": "Worker shutdown requested", "shutdown": True})
88
+
89
+ sys.exit(0)
90
+
91
+ # Register signal handlers early
92
+ try:
93
+ signal.signal(signal.SIGTERM, signal_handler)
94
+ signal.signal(signal.SIGINT, signal_handler)
95
+ logger.debug("Signal handlers registered for graceful shutdown")
96
+ except (OSError, ValueError) as e:
97
+ # Some signals might not be available on all platforms
98
+ logger.warning(f"Warning: Could not register signal handlers: {e}")
99
+
100
+ # Check for shutdown before heavy imports
101
+ check_shutdown()
102
+
103
+ try:
104
+ from docling.datamodel.base_models import ConversionStatus, InputFormat
105
+ from docling.datamodel.pipeline_options import OcrOptions, PdfPipelineOptions, VlmPipelineOptions
106
+ from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
107
+ from docling.models.factories import get_ocr_factory
108
+ from docling.pipeline.vlm_pipeline import VlmPipeline
109
+
110
+ # Check for shutdown after imports
111
+ check_shutdown()
112
+ logger.debug("Docling dependencies loaded successfully")
113
+
114
+ except ModuleNotFoundError:
115
+ msg = (
116
+ "Docling is an optional dependency of Langflow. "
117
+ "Install with `uv pip install 'langflow[docling]'` "
118
+ "or refer to the documentation"
119
+ )
120
+ queue.put({"error": msg})
121
+ return
122
+ except ImportError as e:
123
+ # A different import failed (e.g., a transitive dependency); preserve details.
124
+ queue.put({"error": f"Failed to import a Docling dependency: {e}"})
125
+ return
126
+ except KeyboardInterrupt:
127
+ logger.warning("KeyboardInterrupt during imports, exiting...")
128
+ queue.put({"error": "Worker interrupted during imports", "shutdown": True})
129
+ return
130
+
131
+ # Configure the standard PDF pipeline
132
+ def _get_standard_opts() -> PdfPipelineOptions:
133
+ check_shutdown() # Check before heavy operations
134
+
135
+ pipeline_options = PdfPipelineOptions()
136
+ pipeline_options.do_ocr = ocr_engine != ""
137
+ if pipeline_options.do_ocr:
138
+ ocr_factory = get_ocr_factory(
139
+ allow_external_plugins=False,
140
+ )
141
+
142
+ ocr_options: OcrOptions = ocr_factory.create_options(
143
+ kind=ocr_engine,
144
+ )
145
+ pipeline_options.ocr_options = ocr_options
146
+ return pipeline_options
147
+
148
+ # Configure the VLM pipeline
149
+ def _get_vlm_opts() -> VlmPipelineOptions:
150
+ check_shutdown() # Check before heavy operations
151
+ return VlmPipelineOptions()
152
+
153
+ # Configure the main format options and create the DocumentConverter()
154
+ def _get_converter() -> DocumentConverter:
155
+ check_shutdown() # Check before heavy operations
156
+
157
+ if pipeline == "standard":
158
+ pdf_format_option = PdfFormatOption(
159
+ pipeline_options=_get_standard_opts(),
160
+ )
161
+ elif pipeline == "vlm":
162
+ pdf_format_option = PdfFormatOption(pipeline_cls=VlmPipeline, pipeline_options=_get_vlm_opts())
163
+ else:
164
+ msg = f"Unknown pipeline: {pipeline!r}"
165
+ raise ValueError(msg)
166
+
167
+ format_options: dict[InputFormat, FormatOption] = {
168
+ InputFormat.PDF: pdf_format_option,
169
+ InputFormat.IMAGE: pdf_format_option,
170
+ }
171
+
172
+ return DocumentConverter(format_options=format_options)
173
+
174
+ try:
175
+ # Check for shutdown before creating converter (can be slow)
176
+ check_shutdown()
177
+ logger.info(f"Initializing {pipeline} pipeline with OCR: {ocr_engine or 'disabled'}")
178
+
179
+ converter = _get_converter()
180
+
181
+ # Check for shutdown before processing files
182
+ check_shutdown()
183
+ logger.info(f"Starting to process {len(file_paths)} files...")
184
+
185
+ # Process files with periodic shutdown checks
186
+ results = []
187
+ for i, file_path in enumerate(file_paths):
188
+ # Check for shutdown before processing each file
189
+ check_shutdown()
190
+
191
+ logger.debug(f"Processing file {i + 1}/{len(file_paths)}: {file_path}")
192
+
193
+ try:
194
+ # Process single file (we can't easily interrupt convert_all)
195
+ single_result = converter.convert_all([file_path])
196
+ results.extend(single_result)
197
+
198
+ # Check for shutdown after each file
199
+ check_shutdown()
200
+
201
+ except (OSError, ValueError, RuntimeError, ImportError) as file_error:
202
+ # Handle specific file processing errors
203
+ logger.error(f"Error processing file {file_path}: {file_error}")
204
+ # Continue with other files, but check for shutdown
205
+ check_shutdown()
206
+ except Exception as file_error: # noqa: BLE001
207
+ # Catch any other unexpected errors to prevent worker crash
208
+ logger.error(f"Unexpected error processing file {file_path}: {file_error}")
209
+ # Continue with other files, but check for shutdown
210
+ check_shutdown()
211
+
212
+ # Final shutdown check before sending results
213
+ check_shutdown()
214
+
215
+ # Process the results while maintaining the original structure
216
+ processed_data = [
217
+ {"document": res.document, "file_path": str(res.input.file), "status": res.status.name}
218
+ if res.status == ConversionStatus.SUCCESS
219
+ else None
220
+ for res in results
221
+ ]
222
+
223
+ logger.info(f"Successfully processed {len([d for d in processed_data if d])} files")
224
+ queue.put(processed_data)
225
+
226
+ except KeyboardInterrupt:
227
+ logger.warning("KeyboardInterrupt during processing, exiting gracefully...")
228
+ queue.put({"error": "Worker interrupted during processing", "shutdown": True})
229
+ return
230
+ except Exception as e: # noqa: BLE001
231
+ if shutdown_requested:
232
+ logger.exception("Exception occurred during shutdown, exiting...")
233
+ return
234
+
235
+ # Send any processing error to the main process with traceback
236
+ error_info = {"error": str(e), "traceback": traceback.format_exc()}
237
+ logger.error(f"Error in worker: {error_info}")
238
+ queue.put(error_info)
239
+ finally:
240
+ logger.info("Docling worker finishing...")
241
+ # Ensure we don't leave any hanging processes
242
+ if shutdown_requested:
243
+ logger.debug("Worker shutdown completed")
244
+ else:
245
+ logger.debug("Worker completed normally")
lfx/base/data/utils.py ADDED
@@ -0,0 +1,198 @@
1
+ import unicodedata
2
+ from collections.abc import Callable
3
+ from concurrent import futures
4
+ from pathlib import Path
5
+
6
+ import chardet
7
+ import orjson
8
+ import yaml
9
+ from defusedxml import ElementTree
10
+
11
+ from lfx.schema.data import Data
12
+
13
+ # Types of files that can be read simply by file.read()
14
+ # and have 100% to be completely readable
15
+ TEXT_FILE_TYPES = [
16
+ "txt",
17
+ "md",
18
+ "mdx",
19
+ "csv",
20
+ "json",
21
+ "yaml",
22
+ "yml",
23
+ "xml",
24
+ "html",
25
+ "htm",
26
+ "pdf",
27
+ "docx",
28
+ "py",
29
+ "sh",
30
+ "sql",
31
+ "js",
32
+ "ts",
33
+ "tsx",
34
+ ]
35
+
36
+ IMG_FILE_TYPES = ["jpg", "jpeg", "png", "bmp", "image"]
37
+
38
+
39
+ def normalize_text(text):
40
+ return unicodedata.normalize("NFKD", text)
41
+
42
+
43
+ def is_hidden(path: Path) -> bool:
44
+ return path.name.startswith(".")
45
+
46
+
47
+ def format_directory_path(path: str) -> str:
48
+ """Format a directory path to ensure it's properly escaped and valid.
49
+
50
+ Args:
51
+ path (str): The input path string.
52
+
53
+ Returns:
54
+ str: A properly formatted path string.
55
+ """
56
+ return path.replace("\n", "\\n")
57
+
58
+
59
+ # Ignoring FBT001 because the DirectoryComponent in 1.0.19
60
+ # calls this function without keyword arguments
61
+ def retrieve_file_paths(
62
+ path: str,
63
+ load_hidden: bool, # noqa: FBT001
64
+ recursive: bool, # noqa: FBT001
65
+ depth: int,
66
+ types: list[str] = TEXT_FILE_TYPES,
67
+ ) -> list[str]:
68
+ path = format_directory_path(path)
69
+ path_obj = Path(path)
70
+ if not path_obj.exists() or not path_obj.is_dir():
71
+ msg = f"Path {path} must exist and be a directory."
72
+ raise ValueError(msg)
73
+
74
+ def match_types(p: Path) -> bool:
75
+ return any(p.suffix == f".{t}" for t in types) if types else True
76
+
77
+ def is_not_hidden(p: Path) -> bool:
78
+ return not is_hidden(p) or load_hidden
79
+
80
+ def walk_level(directory: Path, max_depth: int):
81
+ directory = directory.resolve()
82
+ prefix_length = len(directory.parts)
83
+ for p in directory.rglob("*" if recursive else "[!.]*"):
84
+ if len(p.parts) - prefix_length <= max_depth:
85
+ yield p
86
+
87
+ glob = "**/*" if recursive else "*"
88
+ paths = walk_level(path_obj, depth) if depth else path_obj.glob(glob)
89
+ return [str(p) for p in paths if p.is_file() and match_types(p) and is_not_hidden(p)]
90
+
91
+
92
+ def partition_file_to_data(file_path: str, *, silent_errors: bool) -> Data | None:
93
+ # Use the partition function to load the file
94
+ from unstructured.partition.auto import partition
95
+
96
+ try:
97
+ elements = partition(file_path)
98
+ except Exception as e:
99
+ if not silent_errors:
100
+ msg = f"Error loading file {file_path}: {e}"
101
+ raise ValueError(msg) from e
102
+ return None
103
+
104
+ # Create a Data
105
+ text = "\n\n".join([str(el) for el in elements])
106
+ metadata = elements.metadata if hasattr(elements, "metadata") else {}
107
+ metadata["file_path"] = file_path
108
+ return Data(text=text, data=metadata)
109
+
110
+
111
+ def read_text_file(file_path: str) -> str:
112
+ file_path_ = Path(file_path)
113
+ raw_data = file_path_.read_bytes()
114
+ result = chardet.detect(raw_data)
115
+ encoding = result["encoding"]
116
+
117
+ if encoding in {"Windows-1252", "Windows-1254", "MacRoman"}:
118
+ encoding = "utf-8"
119
+
120
+ return file_path_.read_text(encoding=encoding)
121
+
122
+
123
+ def read_docx_file(file_path: str) -> str:
124
+ from docx import Document
125
+
126
+ doc = Document(file_path)
127
+ return "\n\n".join([p.text for p in doc.paragraphs])
128
+
129
+
130
+ def parse_pdf_to_text(file_path: str) -> str:
131
+ from pypdf import PdfReader
132
+
133
+ with Path(file_path).open("rb") as f, PdfReader(f) as reader:
134
+ return "\n\n".join([page.extract_text() for page in reader.pages])
135
+
136
+
137
+ def parse_text_file_to_data(file_path: str, *, silent_errors: bool) -> Data | None:
138
+ try:
139
+ if file_path.endswith(".pdf"):
140
+ text = parse_pdf_to_text(file_path)
141
+ elif file_path.endswith(".docx"):
142
+ text = read_docx_file(file_path)
143
+ else:
144
+ text = read_text_file(file_path)
145
+
146
+ # if file is json, yaml, or xml, we can parse it
147
+ if file_path.endswith(".json"):
148
+ loaded_json = orjson.loads(text)
149
+ if isinstance(loaded_json, dict):
150
+ loaded_json = {k: normalize_text(v) if isinstance(v, str) else v for k, v in loaded_json.items()}
151
+ elif isinstance(loaded_json, list):
152
+ loaded_json = [normalize_text(item) if isinstance(item, str) else item for item in loaded_json]
153
+ text = orjson.dumps(loaded_json).decode("utf-8")
154
+
155
+ elif file_path.endswith((".yaml", ".yml")):
156
+ text = yaml.safe_load(text)
157
+ elif file_path.endswith(".xml"):
158
+ xml_element = ElementTree.fromstring(text)
159
+ text = ElementTree.tostring(xml_element, encoding="unicode")
160
+ except Exception as e:
161
+ if not silent_errors:
162
+ msg = f"Error loading file {file_path}: {e}"
163
+ raise ValueError(msg) from e
164
+ return None
165
+
166
+ return Data(data={"file_path": file_path, "text": text})
167
+
168
+
169
+ # ! Removing unstructured dependency until
170
+ # ! 3.12 is supported
171
+ # def get_elements(
172
+ # file_paths: List[str],
173
+ # silent_errors: bool,
174
+ # max_concurrency: int,
175
+ # use_multithreading: bool,
176
+ # ) -> List[Optional[Data]]:
177
+ # if use_multithreading:
178
+ # data = parallel_load_data(file_paths, silent_errors, max_concurrency)
179
+ # else:
180
+ # data = [partition_file_to_data(file_path, silent_errors) for file_path in file_paths]
181
+ # data = list(filter(None, data))
182
+ # return data
183
+
184
+
185
+ def parallel_load_data(
186
+ file_paths: list[str],
187
+ *,
188
+ silent_errors: bool,
189
+ max_concurrency: int,
190
+ load_function: Callable = parse_text_file_to_data,
191
+ ) -> list[Data | None]:
192
+ with futures.ThreadPoolExecutor(max_workers=max_concurrency) as executor:
193
+ loaded_files = executor.map(
194
+ lambda file_path: load_function(file_path, silent_errors=silent_errors),
195
+ file_paths,
196
+ )
197
+ # loaded_files is an iterator, so we need to convert it to a list
198
+ return list(loaded_files)
File without changes
@@ -0,0 +1,43 @@
1
+ from abc import abstractmethod
2
+ from typing import Any
3
+
4
+ from langchain_core.documents import BaseDocumentTransformer
5
+
6
+ from lfx.custom.custom_component.component import Component
7
+ from lfx.io import Output
8
+ from lfx.schema.data import Data
9
+ from lfx.utils.util import build_loader_repr_from_data
10
+
11
+
12
+ class LCDocumentTransformerComponent(Component):
13
+ trace_type = "document_transformer"
14
+ outputs = [
15
+ Output(display_name="Data", name="data", method="transform_data"),
16
+ ]
17
+
18
+ def transform_data(self) -> list[Data]:
19
+ data_input = self.get_data_input()
20
+ documents = []
21
+
22
+ if not isinstance(data_input, list):
23
+ data_input = [data_input]
24
+
25
+ for _input in data_input:
26
+ if isinstance(_input, Data):
27
+ documents.append(_input.to_lc_document())
28
+ else:
29
+ documents.append(_input)
30
+
31
+ transformer = self.build_document_transformer()
32
+ docs = transformer.transform_documents(documents)
33
+ data = self.to_data(docs)
34
+ self.repr_value = build_loader_repr_from_data(data)
35
+ return data
36
+
37
+ @abstractmethod
38
+ def get_data_input(self) -> Any:
39
+ """Get the data input."""
40
+
41
+ @abstractmethod
42
+ def build_document_transformer(self) -> BaseDocumentTransformer:
43
+ """Build the text splitter."""
File without changes
@@ -0,0 +1,62 @@
1
+ import concurrent.futures
2
+ import json
3
+
4
+ import httpx
5
+ from pydantic import BaseModel, SecretStr
6
+
7
+ from lfx.field_typing import Embeddings
8
+ from lfx.log.logger import logger
9
+
10
+
11
+ class AIMLEmbeddingsImpl(BaseModel, Embeddings):
12
+ embeddings_completion_url: str = "https://api.aimlapi.com/v1/embeddings"
13
+
14
+ api_key: SecretStr
15
+ model: str
16
+
17
+ def embed_documents(self, texts: list[str]) -> list[list[float]]:
18
+ embeddings = [None] * len(texts)
19
+ headers = {
20
+ "Content-Type": "application/json",
21
+ "Authorization": f"Bearer {self.api_key.get_secret_value()}",
22
+ }
23
+
24
+ with httpx.Client() as client, concurrent.futures.ThreadPoolExecutor() as executor:
25
+ futures = []
26
+ for i, text in enumerate(texts):
27
+ futures.append((i, executor.submit(self._embed_text, client, headers, text)))
28
+
29
+ for index, future in futures:
30
+ try:
31
+ result_data = future.result()
32
+ if len(result_data["data"]) != 1:
33
+ msg = f"Expected one embedding, got {len(result_data['data'])}"
34
+ raise ValueError(msg)
35
+ embeddings[index] = result_data["data"][0]["embedding"]
36
+ except (
37
+ httpx.HTTPStatusError,
38
+ httpx.RequestError,
39
+ json.JSONDecodeError,
40
+ KeyError,
41
+ ValueError,
42
+ ):
43
+ logger.exception("Error occurred")
44
+ raise
45
+
46
+ return embeddings # type: ignore[return-value]
47
+
48
+ def _embed_text(self, client: httpx.Client, headers: dict, text: str) -> dict:
49
+ payload = {
50
+ "model": self.model,
51
+ "input": text,
52
+ }
53
+ response = client.post(
54
+ self.embeddings_completion_url,
55
+ headers=headers,
56
+ json=payload,
57
+ )
58
+ response.raise_for_status()
59
+ return response.json()
60
+
61
+ def embed_query(self, text: str) -> list[float]:
62
+ return self.embed_documents([text])[0]
@@ -0,0 +1,26 @@
1
+ from lfx.custom.custom_component.component import Component
2
+ from lfx.field_typing import Embeddings
3
+ from lfx.io import Output
4
+
5
+
6
+ class LCEmbeddingsModel(Component):
7
+ trace_type = "embedding"
8
+
9
+ outputs = [
10
+ Output(display_name="Embedding Model", name="embeddings", method="build_embeddings"),
11
+ ]
12
+
13
+ def _validate_outputs(self) -> None:
14
+ required_output_methods = ["build_embeddings"]
15
+ output_names = [output.name for output in self.outputs]
16
+ for method_name in required_output_methods:
17
+ if method_name not in output_names:
18
+ msg = f"Output with name '{method_name}' must be defined."
19
+ raise ValueError(msg)
20
+ if not hasattr(self, method_name):
21
+ msg = f"Method '{method_name}' must be defined."
22
+ raise ValueError(msg)
23
+
24
+ def build_embeddings(self) -> Embeddings:
25
+ msg = "You must implement the build_embeddings method in your class."
26
+ raise NotImplementedError(msg)
File without changes
@@ -0,0 +1,86 @@
1
+ from lfx.graph.schema import ResultData, RunOutputs
2
+ from lfx.log.logger import logger
3
+ from lfx.schema.data import Data
4
+ from lfx.schema.message import Message
5
+
6
+
7
+ def build_data_from_run_outputs(run_outputs: RunOutputs) -> list[Data]:
8
+ """Build a list of data from the given RunOutputs.
9
+
10
+ Args:
11
+ run_outputs (RunOutputs): The RunOutputs object containing the output data.
12
+
13
+ Returns:
14
+ List[Data]: A list of data built from the RunOutputs.
15
+
16
+ """
17
+ if not run_outputs:
18
+ return []
19
+ data = []
20
+ for result_data in run_outputs.outputs:
21
+ if result_data:
22
+ data.extend(build_data_from_result_data(result_data))
23
+ return data
24
+
25
+
26
+ def build_data_from_result_data(result_data: ResultData) -> list[Data]:
27
+ """Build a list of data from the given ResultData.
28
+
29
+ Args:
30
+ result_data (ResultData): The ResultData object containing the result data.
31
+
32
+ Returns:
33
+ List[Data]: A list of data built from the ResultData.
34
+
35
+ """
36
+ messages = result_data.messages
37
+
38
+ if not messages:
39
+ return []
40
+ data = []
41
+
42
+ # Handle results without chat messages (calling flow)
43
+ if not messages:
44
+ # Result with a single record
45
+ if isinstance(result_data.artifacts, dict):
46
+ data.append(Data(data=result_data.artifacts))
47
+ # List of artifacts
48
+ elif isinstance(result_data.artifacts, list):
49
+ for artifact in result_data.artifacts:
50
+ # If multiple records are found as artifacts, return as-is
51
+ if isinstance(artifact, Data):
52
+ data.append(artifact)
53
+ else:
54
+ # Warn about unknown output type
55
+ logger.warning(f"Unable to build record output from unknown ResultData.artifact: {artifact}")
56
+ # Chat or text output
57
+ elif result_data.results:
58
+ data.append(Data(data={"result": result_data.results}, text_key="result"))
59
+ return data
60
+ else:
61
+ return []
62
+
63
+ if isinstance(result_data.results, dict):
64
+ for name, result in result_data.results.items():
65
+ dataobj: Data | Message | None
66
+ dataobj = result if isinstance(result, Message) else Data(data=result, text_key=name)
67
+
68
+ data.append(dataobj)
69
+ else:
70
+ data.append(Data(data=result_data.results))
71
+ return data
72
+
73
+
74
+ def format_flow_output_data(data: list[Data]) -> str:
75
+ """Format the flow output data into a string.
76
+
77
+ Args:
78
+ data (List[Data]): The list of data to format.
79
+
80
+ Returns:
81
+ str: The formatted flow output data.
82
+
83
+ """
84
+ result = "Flow run output:\n"
85
+ results = "\n".join([value.get_text() if hasattr(value, "get_text") else str(value) for value in data])
86
+ return result + results
File without changes