lfx-nightly 0.2.0.dev25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lfx-nightly might be problematic. Click here for more details.

Files changed (769) hide show
  1. lfx/__init__.py +0 -0
  2. lfx/__main__.py +25 -0
  3. lfx/_assets/component_index.json +1 -0
  4. lfx/base/__init__.py +0 -0
  5. lfx/base/agents/__init__.py +0 -0
  6. lfx/base/agents/agent.py +375 -0
  7. lfx/base/agents/altk_base_agent.py +380 -0
  8. lfx/base/agents/altk_tool_wrappers.py +565 -0
  9. lfx/base/agents/callback.py +130 -0
  10. lfx/base/agents/context.py +109 -0
  11. lfx/base/agents/crewai/__init__.py +0 -0
  12. lfx/base/agents/crewai/crew.py +231 -0
  13. lfx/base/agents/crewai/tasks.py +12 -0
  14. lfx/base/agents/default_prompts.py +23 -0
  15. lfx/base/agents/errors.py +15 -0
  16. lfx/base/agents/events.py +430 -0
  17. lfx/base/agents/utils.py +237 -0
  18. lfx/base/astra_assistants/__init__.py +0 -0
  19. lfx/base/astra_assistants/util.py +171 -0
  20. lfx/base/chains/__init__.py +0 -0
  21. lfx/base/chains/model.py +19 -0
  22. lfx/base/composio/__init__.py +0 -0
  23. lfx/base/composio/composio_base.py +2584 -0
  24. lfx/base/compressors/__init__.py +0 -0
  25. lfx/base/compressors/model.py +60 -0
  26. lfx/base/constants.py +46 -0
  27. lfx/base/curl/__init__.py +0 -0
  28. lfx/base/curl/parse.py +188 -0
  29. lfx/base/data/__init__.py +5 -0
  30. lfx/base/data/base_file.py +810 -0
  31. lfx/base/data/docling_utils.py +338 -0
  32. lfx/base/data/storage_utils.py +192 -0
  33. lfx/base/data/utils.py +362 -0
  34. lfx/base/datastax/__init__.py +5 -0
  35. lfx/base/datastax/astradb_base.py +896 -0
  36. lfx/base/document_transformers/__init__.py +0 -0
  37. lfx/base/document_transformers/model.py +43 -0
  38. lfx/base/embeddings/__init__.py +0 -0
  39. lfx/base/embeddings/aiml_embeddings.py +62 -0
  40. lfx/base/embeddings/embeddings_class.py +113 -0
  41. lfx/base/embeddings/model.py +26 -0
  42. lfx/base/flow_processing/__init__.py +0 -0
  43. lfx/base/flow_processing/utils.py +86 -0
  44. lfx/base/huggingface/__init__.py +0 -0
  45. lfx/base/huggingface/model_bridge.py +133 -0
  46. lfx/base/io/__init__.py +0 -0
  47. lfx/base/io/chat.py +21 -0
  48. lfx/base/io/text.py +22 -0
  49. lfx/base/knowledge_bases/__init__.py +3 -0
  50. lfx/base/knowledge_bases/knowledge_base_utils.py +137 -0
  51. lfx/base/langchain_utilities/__init__.py +0 -0
  52. lfx/base/langchain_utilities/model.py +35 -0
  53. lfx/base/langchain_utilities/spider_constants.py +1 -0
  54. lfx/base/langwatch/__init__.py +0 -0
  55. lfx/base/langwatch/utils.py +18 -0
  56. lfx/base/mcp/__init__.py +0 -0
  57. lfx/base/mcp/constants.py +2 -0
  58. lfx/base/mcp/util.py +1659 -0
  59. lfx/base/memory/__init__.py +0 -0
  60. lfx/base/memory/memory.py +49 -0
  61. lfx/base/memory/model.py +38 -0
  62. lfx/base/models/__init__.py +3 -0
  63. lfx/base/models/aiml_constants.py +51 -0
  64. lfx/base/models/anthropic_constants.py +51 -0
  65. lfx/base/models/aws_constants.py +151 -0
  66. lfx/base/models/chat_result.py +76 -0
  67. lfx/base/models/cometapi_constants.py +54 -0
  68. lfx/base/models/google_generative_ai_constants.py +70 -0
  69. lfx/base/models/google_generative_ai_model.py +38 -0
  70. lfx/base/models/groq_constants.py +150 -0
  71. lfx/base/models/groq_model_discovery.py +265 -0
  72. lfx/base/models/model.py +375 -0
  73. lfx/base/models/model_input_constants.py +378 -0
  74. lfx/base/models/model_metadata.py +41 -0
  75. lfx/base/models/model_utils.py +108 -0
  76. lfx/base/models/novita_constants.py +35 -0
  77. lfx/base/models/ollama_constants.py +52 -0
  78. lfx/base/models/openai_constants.py +129 -0
  79. lfx/base/models/sambanova_constants.py +18 -0
  80. lfx/base/models/watsonx_constants.py +36 -0
  81. lfx/base/processing/__init__.py +0 -0
  82. lfx/base/prompts/__init__.py +0 -0
  83. lfx/base/prompts/api_utils.py +224 -0
  84. lfx/base/prompts/utils.py +61 -0
  85. lfx/base/textsplitters/__init__.py +0 -0
  86. lfx/base/textsplitters/model.py +28 -0
  87. lfx/base/tools/__init__.py +0 -0
  88. lfx/base/tools/base.py +26 -0
  89. lfx/base/tools/component_tool.py +325 -0
  90. lfx/base/tools/constants.py +49 -0
  91. lfx/base/tools/flow_tool.py +132 -0
  92. lfx/base/tools/run_flow.py +698 -0
  93. lfx/base/vectorstores/__init__.py +0 -0
  94. lfx/base/vectorstores/model.py +193 -0
  95. lfx/base/vectorstores/utils.py +22 -0
  96. lfx/base/vectorstores/vector_store_connection_decorator.py +52 -0
  97. lfx/cli/__init__.py +5 -0
  98. lfx/cli/commands.py +327 -0
  99. lfx/cli/common.py +650 -0
  100. lfx/cli/run.py +506 -0
  101. lfx/cli/script_loader.py +289 -0
  102. lfx/cli/serve_app.py +546 -0
  103. lfx/cli/validation.py +69 -0
  104. lfx/components/FAISS/__init__.py +34 -0
  105. lfx/components/FAISS/faiss.py +111 -0
  106. lfx/components/Notion/__init__.py +19 -0
  107. lfx/components/Notion/add_content_to_page.py +269 -0
  108. lfx/components/Notion/create_page.py +94 -0
  109. lfx/components/Notion/list_database_properties.py +68 -0
  110. lfx/components/Notion/list_pages.py +122 -0
  111. lfx/components/Notion/list_users.py +77 -0
  112. lfx/components/Notion/page_content_viewer.py +93 -0
  113. lfx/components/Notion/search.py +111 -0
  114. lfx/components/Notion/update_page_property.py +114 -0
  115. lfx/components/__init__.py +428 -0
  116. lfx/components/_importing.py +42 -0
  117. lfx/components/agentql/__init__.py +3 -0
  118. lfx/components/agentql/agentql_api.py +151 -0
  119. lfx/components/aiml/__init__.py +37 -0
  120. lfx/components/aiml/aiml.py +115 -0
  121. lfx/components/aiml/aiml_embeddings.py +37 -0
  122. lfx/components/altk/__init__.py +34 -0
  123. lfx/components/altk/altk_agent.py +193 -0
  124. lfx/components/amazon/__init__.py +36 -0
  125. lfx/components/amazon/amazon_bedrock_converse.py +195 -0
  126. lfx/components/amazon/amazon_bedrock_embedding.py +109 -0
  127. lfx/components/amazon/amazon_bedrock_model.py +130 -0
  128. lfx/components/amazon/s3_bucket_uploader.py +211 -0
  129. lfx/components/anthropic/__init__.py +34 -0
  130. lfx/components/anthropic/anthropic.py +187 -0
  131. lfx/components/apify/__init__.py +5 -0
  132. lfx/components/apify/apify_actor.py +325 -0
  133. lfx/components/arxiv/__init__.py +3 -0
  134. lfx/components/arxiv/arxiv.py +169 -0
  135. lfx/components/assemblyai/__init__.py +46 -0
  136. lfx/components/assemblyai/assemblyai_get_subtitles.py +83 -0
  137. lfx/components/assemblyai/assemblyai_lemur.py +183 -0
  138. lfx/components/assemblyai/assemblyai_list_transcripts.py +95 -0
  139. lfx/components/assemblyai/assemblyai_poll_transcript.py +72 -0
  140. lfx/components/assemblyai/assemblyai_start_transcript.py +188 -0
  141. lfx/components/azure/__init__.py +37 -0
  142. lfx/components/azure/azure_openai.py +95 -0
  143. lfx/components/azure/azure_openai_embeddings.py +83 -0
  144. lfx/components/baidu/__init__.py +32 -0
  145. lfx/components/baidu/baidu_qianfan_chat.py +113 -0
  146. lfx/components/bing/__init__.py +3 -0
  147. lfx/components/bing/bing_search_api.py +61 -0
  148. lfx/components/cassandra/__init__.py +40 -0
  149. lfx/components/cassandra/cassandra.py +264 -0
  150. lfx/components/cassandra/cassandra_chat.py +92 -0
  151. lfx/components/cassandra/cassandra_graph.py +238 -0
  152. lfx/components/chains/__init__.py +3 -0
  153. lfx/components/chroma/__init__.py +34 -0
  154. lfx/components/chroma/chroma.py +169 -0
  155. lfx/components/cleanlab/__init__.py +40 -0
  156. lfx/components/cleanlab/cleanlab_evaluator.py +155 -0
  157. lfx/components/cleanlab/cleanlab_rag_evaluator.py +254 -0
  158. lfx/components/cleanlab/cleanlab_remediator.py +131 -0
  159. lfx/components/clickhouse/__init__.py +34 -0
  160. lfx/components/clickhouse/clickhouse.py +135 -0
  161. lfx/components/cloudflare/__init__.py +32 -0
  162. lfx/components/cloudflare/cloudflare.py +81 -0
  163. lfx/components/cohere/__init__.py +40 -0
  164. lfx/components/cohere/cohere_embeddings.py +81 -0
  165. lfx/components/cohere/cohere_models.py +46 -0
  166. lfx/components/cohere/cohere_rerank.py +51 -0
  167. lfx/components/cometapi/__init__.py +32 -0
  168. lfx/components/cometapi/cometapi.py +166 -0
  169. lfx/components/composio/__init__.py +222 -0
  170. lfx/components/composio/agentql_composio.py +11 -0
  171. lfx/components/composio/agiled_composio.py +11 -0
  172. lfx/components/composio/airtable_composio.py +11 -0
  173. lfx/components/composio/apollo_composio.py +11 -0
  174. lfx/components/composio/asana_composio.py +11 -0
  175. lfx/components/composio/attio_composio.py +11 -0
  176. lfx/components/composio/bitbucket_composio.py +11 -0
  177. lfx/components/composio/bolna_composio.py +11 -0
  178. lfx/components/composio/brightdata_composio.py +11 -0
  179. lfx/components/composio/calendly_composio.py +11 -0
  180. lfx/components/composio/canva_composio.py +11 -0
  181. lfx/components/composio/canvas_composio.py +11 -0
  182. lfx/components/composio/coda_composio.py +11 -0
  183. lfx/components/composio/composio_api.py +278 -0
  184. lfx/components/composio/contentful_composio.py +11 -0
  185. lfx/components/composio/digicert_composio.py +11 -0
  186. lfx/components/composio/discord_composio.py +11 -0
  187. lfx/components/composio/dropbox_compnent.py +11 -0
  188. lfx/components/composio/elevenlabs_composio.py +11 -0
  189. lfx/components/composio/exa_composio.py +11 -0
  190. lfx/components/composio/figma_composio.py +11 -0
  191. lfx/components/composio/finage_composio.py +11 -0
  192. lfx/components/composio/firecrawl_composio.py +11 -0
  193. lfx/components/composio/fireflies_composio.py +11 -0
  194. lfx/components/composio/fixer_composio.py +11 -0
  195. lfx/components/composio/flexisign_composio.py +11 -0
  196. lfx/components/composio/freshdesk_composio.py +11 -0
  197. lfx/components/composio/github_composio.py +11 -0
  198. lfx/components/composio/gmail_composio.py +38 -0
  199. lfx/components/composio/googlebigquery_composio.py +11 -0
  200. lfx/components/composio/googlecalendar_composio.py +11 -0
  201. lfx/components/composio/googleclassroom_composio.py +11 -0
  202. lfx/components/composio/googledocs_composio.py +11 -0
  203. lfx/components/composio/googlemeet_composio.py +11 -0
  204. lfx/components/composio/googlesheets_composio.py +11 -0
  205. lfx/components/composio/googletasks_composio.py +8 -0
  206. lfx/components/composio/heygen_composio.py +11 -0
  207. lfx/components/composio/instagram_composio.py +11 -0
  208. lfx/components/composio/jira_composio.py +11 -0
  209. lfx/components/composio/jotform_composio.py +11 -0
  210. lfx/components/composio/klaviyo_composio.py +11 -0
  211. lfx/components/composio/linear_composio.py +11 -0
  212. lfx/components/composio/listennotes_composio.py +11 -0
  213. lfx/components/composio/mem0_composio.py +11 -0
  214. lfx/components/composio/miro_composio.py +11 -0
  215. lfx/components/composio/missive_composio.py +11 -0
  216. lfx/components/composio/notion_composio.py +11 -0
  217. lfx/components/composio/onedrive_composio.py +11 -0
  218. lfx/components/composio/outlook_composio.py +11 -0
  219. lfx/components/composio/pandadoc_composio.py +11 -0
  220. lfx/components/composio/peopledatalabs_composio.py +11 -0
  221. lfx/components/composio/perplexityai_composio.py +11 -0
  222. lfx/components/composio/reddit_composio.py +11 -0
  223. lfx/components/composio/serpapi_composio.py +11 -0
  224. lfx/components/composio/slack_composio.py +11 -0
  225. lfx/components/composio/slackbot_composio.py +11 -0
  226. lfx/components/composio/snowflake_composio.py +11 -0
  227. lfx/components/composio/supabase_composio.py +11 -0
  228. lfx/components/composio/tavily_composio.py +11 -0
  229. lfx/components/composio/timelinesai_composio.py +11 -0
  230. lfx/components/composio/todoist_composio.py +11 -0
  231. lfx/components/composio/wrike_composio.py +11 -0
  232. lfx/components/composio/youtube_composio.py +11 -0
  233. lfx/components/confluence/__init__.py +3 -0
  234. lfx/components/confluence/confluence.py +84 -0
  235. lfx/components/couchbase/__init__.py +34 -0
  236. lfx/components/couchbase/couchbase.py +102 -0
  237. lfx/components/crewai/__init__.py +49 -0
  238. lfx/components/crewai/crewai.py +108 -0
  239. lfx/components/crewai/hierarchical_crew.py +47 -0
  240. lfx/components/crewai/hierarchical_task.py +45 -0
  241. lfx/components/crewai/sequential_crew.py +53 -0
  242. lfx/components/crewai/sequential_task.py +74 -0
  243. lfx/components/crewai/sequential_task_agent.py +144 -0
  244. lfx/components/cuga/__init__.py +34 -0
  245. lfx/components/cuga/cuga_agent.py +730 -0
  246. lfx/components/custom_component/__init__.py +34 -0
  247. lfx/components/custom_component/custom_component.py +31 -0
  248. lfx/components/data/__init__.py +114 -0
  249. lfx/components/data_source/__init__.py +58 -0
  250. lfx/components/data_source/api_request.py +577 -0
  251. lfx/components/data_source/csv_to_data.py +101 -0
  252. lfx/components/data_source/json_to_data.py +106 -0
  253. lfx/components/data_source/mock_data.py +398 -0
  254. lfx/components/data_source/news_search.py +166 -0
  255. lfx/components/data_source/rss.py +71 -0
  256. lfx/components/data_source/sql_executor.py +101 -0
  257. lfx/components/data_source/url.py +311 -0
  258. lfx/components/data_source/web_search.py +326 -0
  259. lfx/components/datastax/__init__.py +76 -0
  260. lfx/components/datastax/astradb_assistant_manager.py +307 -0
  261. lfx/components/datastax/astradb_chatmemory.py +40 -0
  262. lfx/components/datastax/astradb_cql.py +288 -0
  263. lfx/components/datastax/astradb_graph.py +217 -0
  264. lfx/components/datastax/astradb_tool.py +378 -0
  265. lfx/components/datastax/astradb_vectorize.py +122 -0
  266. lfx/components/datastax/astradb_vectorstore.py +449 -0
  267. lfx/components/datastax/create_assistant.py +59 -0
  268. lfx/components/datastax/create_thread.py +33 -0
  269. lfx/components/datastax/dotenv.py +36 -0
  270. lfx/components/datastax/get_assistant.py +38 -0
  271. lfx/components/datastax/getenvvar.py +31 -0
  272. lfx/components/datastax/graph_rag.py +141 -0
  273. lfx/components/datastax/hcd.py +315 -0
  274. lfx/components/datastax/list_assistants.py +26 -0
  275. lfx/components/datastax/run.py +90 -0
  276. lfx/components/deactivated/__init__.py +15 -0
  277. lfx/components/deactivated/amazon_kendra.py +66 -0
  278. lfx/components/deactivated/chat_litellm_model.py +158 -0
  279. lfx/components/deactivated/code_block_extractor.py +26 -0
  280. lfx/components/deactivated/documents_to_data.py +22 -0
  281. lfx/components/deactivated/embed.py +16 -0
  282. lfx/components/deactivated/extract_key_from_data.py +46 -0
  283. lfx/components/deactivated/json_document_builder.py +57 -0
  284. lfx/components/deactivated/list_flows.py +20 -0
  285. lfx/components/deactivated/mcp_sse.py +61 -0
  286. lfx/components/deactivated/mcp_stdio.py +62 -0
  287. lfx/components/deactivated/merge_data.py +93 -0
  288. lfx/components/deactivated/message.py +37 -0
  289. lfx/components/deactivated/metal.py +54 -0
  290. lfx/components/deactivated/multi_query.py +59 -0
  291. lfx/components/deactivated/retriever.py +43 -0
  292. lfx/components/deactivated/selective_passthrough.py +77 -0
  293. lfx/components/deactivated/should_run_next.py +40 -0
  294. lfx/components/deactivated/split_text.py +63 -0
  295. lfx/components/deactivated/store_message.py +24 -0
  296. lfx/components/deactivated/sub_flow.py +124 -0
  297. lfx/components/deactivated/vectara_self_query.py +76 -0
  298. lfx/components/deactivated/vector_store.py +24 -0
  299. lfx/components/deepseek/__init__.py +34 -0
  300. lfx/components/deepseek/deepseek.py +136 -0
  301. lfx/components/docling/__init__.py +43 -0
  302. lfx/components/docling/chunk_docling_document.py +186 -0
  303. lfx/components/docling/docling_inline.py +238 -0
  304. lfx/components/docling/docling_remote.py +195 -0
  305. lfx/components/docling/export_docling_document.py +117 -0
  306. lfx/components/documentloaders/__init__.py +3 -0
  307. lfx/components/duckduckgo/__init__.py +3 -0
  308. lfx/components/duckduckgo/duck_duck_go_search_run.py +92 -0
  309. lfx/components/elastic/__init__.py +37 -0
  310. lfx/components/elastic/elasticsearch.py +267 -0
  311. lfx/components/elastic/opensearch.py +789 -0
  312. lfx/components/elastic/opensearch_multimodal.py +1575 -0
  313. lfx/components/embeddings/__init__.py +37 -0
  314. lfx/components/embeddings/similarity.py +77 -0
  315. lfx/components/embeddings/text_embedder.py +65 -0
  316. lfx/components/exa/__init__.py +3 -0
  317. lfx/components/exa/exa_search.py +68 -0
  318. lfx/components/files_and_knowledge/__init__.py +47 -0
  319. lfx/components/files_and_knowledge/directory.py +113 -0
  320. lfx/components/files_and_knowledge/file.py +841 -0
  321. lfx/components/files_and_knowledge/ingestion.py +694 -0
  322. lfx/components/files_and_knowledge/retrieval.py +264 -0
  323. lfx/components/files_and_knowledge/save_file.py +746 -0
  324. lfx/components/firecrawl/__init__.py +43 -0
  325. lfx/components/firecrawl/firecrawl_crawl_api.py +88 -0
  326. lfx/components/firecrawl/firecrawl_extract_api.py +136 -0
  327. lfx/components/firecrawl/firecrawl_map_api.py +89 -0
  328. lfx/components/firecrawl/firecrawl_scrape_api.py +73 -0
  329. lfx/components/flow_controls/__init__.py +58 -0
  330. lfx/components/flow_controls/conditional_router.py +208 -0
  331. lfx/components/flow_controls/data_conditional_router.py +126 -0
  332. lfx/components/flow_controls/flow_tool.py +111 -0
  333. lfx/components/flow_controls/listen.py +29 -0
  334. lfx/components/flow_controls/loop.py +163 -0
  335. lfx/components/flow_controls/notify.py +88 -0
  336. lfx/components/flow_controls/pass_message.py +36 -0
  337. lfx/components/flow_controls/run_flow.py +108 -0
  338. lfx/components/flow_controls/sub_flow.py +115 -0
  339. lfx/components/git/__init__.py +4 -0
  340. lfx/components/git/git.py +262 -0
  341. lfx/components/git/gitextractor.py +196 -0
  342. lfx/components/glean/__init__.py +3 -0
  343. lfx/components/glean/glean_search_api.py +173 -0
  344. lfx/components/google/__init__.py +17 -0
  345. lfx/components/google/gmail.py +193 -0
  346. lfx/components/google/google_bq_sql_executor.py +157 -0
  347. lfx/components/google/google_drive.py +92 -0
  348. lfx/components/google/google_drive_search.py +152 -0
  349. lfx/components/google/google_generative_ai.py +144 -0
  350. lfx/components/google/google_generative_ai_embeddings.py +141 -0
  351. lfx/components/google/google_oauth_token.py +89 -0
  352. lfx/components/google/google_search_api_core.py +68 -0
  353. lfx/components/google/google_serper_api_core.py +74 -0
  354. lfx/components/groq/__init__.py +34 -0
  355. lfx/components/groq/groq.py +143 -0
  356. lfx/components/helpers/__init__.py +154 -0
  357. lfx/components/homeassistant/__init__.py +7 -0
  358. lfx/components/homeassistant/home_assistant_control.py +152 -0
  359. lfx/components/homeassistant/list_home_assistant_states.py +137 -0
  360. lfx/components/huggingface/__init__.py +37 -0
  361. lfx/components/huggingface/huggingface.py +199 -0
  362. lfx/components/huggingface/huggingface_inference_api.py +106 -0
  363. lfx/components/ibm/__init__.py +34 -0
  364. lfx/components/ibm/watsonx.py +207 -0
  365. lfx/components/ibm/watsonx_embeddings.py +135 -0
  366. lfx/components/icosacomputing/__init__.py +5 -0
  367. lfx/components/icosacomputing/combinatorial_reasoner.py +84 -0
  368. lfx/components/input_output/__init__.py +40 -0
  369. lfx/components/input_output/chat.py +109 -0
  370. lfx/components/input_output/chat_output.py +184 -0
  371. lfx/components/input_output/text.py +27 -0
  372. lfx/components/input_output/text_output.py +29 -0
  373. lfx/components/input_output/webhook.py +56 -0
  374. lfx/components/jigsawstack/__init__.py +23 -0
  375. lfx/components/jigsawstack/ai_scrape.py +126 -0
  376. lfx/components/jigsawstack/ai_web_search.py +136 -0
  377. lfx/components/jigsawstack/file_read.py +115 -0
  378. lfx/components/jigsawstack/file_upload.py +94 -0
  379. lfx/components/jigsawstack/image_generation.py +205 -0
  380. lfx/components/jigsawstack/nsfw.py +60 -0
  381. lfx/components/jigsawstack/object_detection.py +124 -0
  382. lfx/components/jigsawstack/sentiment.py +112 -0
  383. lfx/components/jigsawstack/text_to_sql.py +90 -0
  384. lfx/components/jigsawstack/text_translate.py +77 -0
  385. lfx/components/jigsawstack/vocr.py +107 -0
  386. lfx/components/knowledge_bases/__init__.py +89 -0
  387. lfx/components/langchain_utilities/__init__.py +109 -0
  388. lfx/components/langchain_utilities/character.py +53 -0
  389. lfx/components/langchain_utilities/conversation.py +59 -0
  390. lfx/components/langchain_utilities/csv_agent.py +175 -0
  391. lfx/components/langchain_utilities/fake_embeddings.py +26 -0
  392. lfx/components/langchain_utilities/html_link_extractor.py +35 -0
  393. lfx/components/langchain_utilities/json_agent.py +100 -0
  394. lfx/components/langchain_utilities/langchain_hub.py +126 -0
  395. lfx/components/langchain_utilities/language_recursive.py +49 -0
  396. lfx/components/langchain_utilities/language_semantic.py +138 -0
  397. lfx/components/langchain_utilities/llm_checker.py +39 -0
  398. lfx/components/langchain_utilities/llm_math.py +42 -0
  399. lfx/components/langchain_utilities/natural_language.py +61 -0
  400. lfx/components/langchain_utilities/openai_tools.py +53 -0
  401. lfx/components/langchain_utilities/openapi.py +48 -0
  402. lfx/components/langchain_utilities/recursive_character.py +60 -0
  403. lfx/components/langchain_utilities/retrieval_qa.py +83 -0
  404. lfx/components/langchain_utilities/runnable_executor.py +137 -0
  405. lfx/components/langchain_utilities/self_query.py +80 -0
  406. lfx/components/langchain_utilities/spider.py +142 -0
  407. lfx/components/langchain_utilities/sql.py +40 -0
  408. lfx/components/langchain_utilities/sql_database.py +35 -0
  409. lfx/components/langchain_utilities/sql_generator.py +78 -0
  410. lfx/components/langchain_utilities/tool_calling.py +59 -0
  411. lfx/components/langchain_utilities/vector_store_info.py +49 -0
  412. lfx/components/langchain_utilities/vector_store_router.py +33 -0
  413. lfx/components/langchain_utilities/xml_agent.py +71 -0
  414. lfx/components/langwatch/__init__.py +3 -0
  415. lfx/components/langwatch/langwatch.py +278 -0
  416. lfx/components/link_extractors/__init__.py +3 -0
  417. lfx/components/llm_operations/__init__.py +46 -0
  418. lfx/components/llm_operations/batch_run.py +205 -0
  419. lfx/components/llm_operations/lambda_filter.py +218 -0
  420. lfx/components/llm_operations/llm_conditional_router.py +421 -0
  421. lfx/components/llm_operations/llm_selector.py +499 -0
  422. lfx/components/llm_operations/structured_output.py +244 -0
  423. lfx/components/lmstudio/__init__.py +34 -0
  424. lfx/components/lmstudio/lmstudioembeddings.py +89 -0
  425. lfx/components/lmstudio/lmstudiomodel.py +133 -0
  426. lfx/components/logic/__init__.py +181 -0
  427. lfx/components/maritalk/__init__.py +32 -0
  428. lfx/components/maritalk/maritalk.py +52 -0
  429. lfx/components/mem0/__init__.py +3 -0
  430. lfx/components/mem0/mem0_chat_memory.py +147 -0
  431. lfx/components/milvus/__init__.py +34 -0
  432. lfx/components/milvus/milvus.py +115 -0
  433. lfx/components/mistral/__init__.py +37 -0
  434. lfx/components/mistral/mistral.py +114 -0
  435. lfx/components/mistral/mistral_embeddings.py +58 -0
  436. lfx/components/models/__init__.py +89 -0
  437. lfx/components/models_and_agents/__init__.py +49 -0
  438. lfx/components/models_and_agents/agent.py +644 -0
  439. lfx/components/models_and_agents/embedding_model.py +423 -0
  440. lfx/components/models_and_agents/language_model.py +398 -0
  441. lfx/components/models_and_agents/mcp_component.py +594 -0
  442. lfx/components/models_and_agents/memory.py +268 -0
  443. lfx/components/models_and_agents/prompt.py +67 -0
  444. lfx/components/mongodb/__init__.py +34 -0
  445. lfx/components/mongodb/mongodb_atlas.py +213 -0
  446. lfx/components/needle/__init__.py +3 -0
  447. lfx/components/needle/needle.py +104 -0
  448. lfx/components/notdiamond/__init__.py +34 -0
  449. lfx/components/notdiamond/notdiamond.py +228 -0
  450. lfx/components/novita/__init__.py +32 -0
  451. lfx/components/novita/novita.py +130 -0
  452. lfx/components/nvidia/__init__.py +57 -0
  453. lfx/components/nvidia/nvidia.py +151 -0
  454. lfx/components/nvidia/nvidia_embedding.py +77 -0
  455. lfx/components/nvidia/nvidia_ingest.py +317 -0
  456. lfx/components/nvidia/nvidia_rerank.py +63 -0
  457. lfx/components/nvidia/system_assist.py +65 -0
  458. lfx/components/olivya/__init__.py +3 -0
  459. lfx/components/olivya/olivya.py +116 -0
  460. lfx/components/ollama/__init__.py +37 -0
  461. lfx/components/ollama/ollama.py +548 -0
  462. lfx/components/ollama/ollama_embeddings.py +103 -0
  463. lfx/components/openai/__init__.py +37 -0
  464. lfx/components/openai/openai.py +100 -0
  465. lfx/components/openai/openai_chat_model.py +176 -0
  466. lfx/components/openrouter/__init__.py +32 -0
  467. lfx/components/openrouter/openrouter.py +104 -0
  468. lfx/components/output_parsers/__init__.py +3 -0
  469. lfx/components/perplexity/__init__.py +34 -0
  470. lfx/components/perplexity/perplexity.py +75 -0
  471. lfx/components/pgvector/__init__.py +34 -0
  472. lfx/components/pgvector/pgvector.py +72 -0
  473. lfx/components/pinecone/__init__.py +34 -0
  474. lfx/components/pinecone/pinecone.py +134 -0
  475. lfx/components/processing/__init__.py +72 -0
  476. lfx/components/processing/alter_metadata.py +109 -0
  477. lfx/components/processing/combine_text.py +40 -0
  478. lfx/components/processing/converter.py +248 -0
  479. lfx/components/processing/create_data.py +111 -0
  480. lfx/components/processing/create_list.py +40 -0
  481. lfx/components/processing/data_operations.py +528 -0
  482. lfx/components/processing/data_to_dataframe.py +71 -0
  483. lfx/components/processing/dataframe_operations.py +313 -0
  484. lfx/components/processing/dataframe_to_toolset.py +259 -0
  485. lfx/components/processing/dynamic_create_data.py +357 -0
  486. lfx/components/processing/extract_key.py +54 -0
  487. lfx/components/processing/filter_data.py +43 -0
  488. lfx/components/processing/filter_data_values.py +89 -0
  489. lfx/components/processing/json_cleaner.py +104 -0
  490. lfx/components/processing/merge_data.py +91 -0
  491. lfx/components/processing/message_to_data.py +37 -0
  492. lfx/components/processing/output_parser.py +46 -0
  493. lfx/components/processing/parse_data.py +71 -0
  494. lfx/components/processing/parse_dataframe.py +69 -0
  495. lfx/components/processing/parse_json_data.py +91 -0
  496. lfx/components/processing/parser.py +148 -0
  497. lfx/components/processing/regex.py +83 -0
  498. lfx/components/processing/select_data.py +49 -0
  499. lfx/components/processing/split_text.py +141 -0
  500. lfx/components/processing/store_message.py +91 -0
  501. lfx/components/processing/update_data.py +161 -0
  502. lfx/components/prototypes/__init__.py +35 -0
  503. lfx/components/prototypes/python_function.py +73 -0
  504. lfx/components/qdrant/__init__.py +34 -0
  505. lfx/components/qdrant/qdrant.py +109 -0
  506. lfx/components/redis/__init__.py +37 -0
  507. lfx/components/redis/redis.py +89 -0
  508. lfx/components/redis/redis_chat.py +43 -0
  509. lfx/components/sambanova/__init__.py +32 -0
  510. lfx/components/sambanova/sambanova.py +84 -0
  511. lfx/components/scrapegraph/__init__.py +40 -0
  512. lfx/components/scrapegraph/scrapegraph_markdownify_api.py +64 -0
  513. lfx/components/scrapegraph/scrapegraph_search_api.py +64 -0
  514. lfx/components/scrapegraph/scrapegraph_smart_scraper_api.py +71 -0
  515. lfx/components/searchapi/__init__.py +34 -0
  516. lfx/components/searchapi/search.py +79 -0
  517. lfx/components/serpapi/__init__.py +3 -0
  518. lfx/components/serpapi/serp.py +115 -0
  519. lfx/components/supabase/__init__.py +34 -0
  520. lfx/components/supabase/supabase.py +76 -0
  521. lfx/components/tavily/__init__.py +4 -0
  522. lfx/components/tavily/tavily_extract.py +117 -0
  523. lfx/components/tavily/tavily_search.py +212 -0
  524. lfx/components/textsplitters/__init__.py +3 -0
  525. lfx/components/toolkits/__init__.py +3 -0
  526. lfx/components/tools/__init__.py +66 -0
  527. lfx/components/tools/calculator.py +109 -0
  528. lfx/components/tools/google_search_api.py +45 -0
  529. lfx/components/tools/google_serper_api.py +115 -0
  530. lfx/components/tools/python_code_structured_tool.py +328 -0
  531. lfx/components/tools/python_repl.py +98 -0
  532. lfx/components/tools/search_api.py +88 -0
  533. lfx/components/tools/searxng.py +145 -0
  534. lfx/components/tools/serp_api.py +120 -0
  535. lfx/components/tools/tavily_search_tool.py +345 -0
  536. lfx/components/tools/wikidata_api.py +103 -0
  537. lfx/components/tools/wikipedia_api.py +50 -0
  538. lfx/components/tools/yahoo_finance.py +130 -0
  539. lfx/components/twelvelabs/__init__.py +52 -0
  540. lfx/components/twelvelabs/convert_astra_results.py +84 -0
  541. lfx/components/twelvelabs/pegasus_index.py +311 -0
  542. lfx/components/twelvelabs/split_video.py +301 -0
  543. lfx/components/twelvelabs/text_embeddings.py +57 -0
  544. lfx/components/twelvelabs/twelvelabs_pegasus.py +408 -0
  545. lfx/components/twelvelabs/video_embeddings.py +100 -0
  546. lfx/components/twelvelabs/video_file.py +191 -0
  547. lfx/components/unstructured/__init__.py +3 -0
  548. lfx/components/unstructured/unstructured.py +121 -0
  549. lfx/components/upstash/__init__.py +34 -0
  550. lfx/components/upstash/upstash.py +124 -0
  551. lfx/components/utilities/__init__.py +43 -0
  552. lfx/components/utilities/calculator_core.py +89 -0
  553. lfx/components/utilities/current_date.py +42 -0
  554. lfx/components/utilities/id_generator.py +42 -0
  555. lfx/components/utilities/python_repl_core.py +98 -0
  556. lfx/components/vectara/__init__.py +37 -0
  557. lfx/components/vectara/vectara.py +97 -0
  558. lfx/components/vectara/vectara_rag.py +164 -0
  559. lfx/components/vectorstores/__init__.py +34 -0
  560. lfx/components/vectorstores/local_db.py +270 -0
  561. lfx/components/vertexai/__init__.py +37 -0
  562. lfx/components/vertexai/vertexai.py +71 -0
  563. lfx/components/vertexai/vertexai_embeddings.py +67 -0
  564. lfx/components/vlmrun/__init__.py +34 -0
  565. lfx/components/vlmrun/vlmrun_transcription.py +224 -0
  566. lfx/components/weaviate/__init__.py +34 -0
  567. lfx/components/weaviate/weaviate.py +89 -0
  568. lfx/components/wikipedia/__init__.py +4 -0
  569. lfx/components/wikipedia/wikidata.py +86 -0
  570. lfx/components/wikipedia/wikipedia.py +53 -0
  571. lfx/components/wolframalpha/__init__.py +3 -0
  572. lfx/components/wolframalpha/wolfram_alpha_api.py +54 -0
  573. lfx/components/xai/__init__.py +32 -0
  574. lfx/components/xai/xai.py +167 -0
  575. lfx/components/yahoosearch/__init__.py +3 -0
  576. lfx/components/yahoosearch/yahoo.py +137 -0
  577. lfx/components/youtube/__init__.py +52 -0
  578. lfx/components/youtube/channel.py +227 -0
  579. lfx/components/youtube/comments.py +231 -0
  580. lfx/components/youtube/playlist.py +33 -0
  581. lfx/components/youtube/search.py +120 -0
  582. lfx/components/youtube/trending.py +285 -0
  583. lfx/components/youtube/video_details.py +263 -0
  584. lfx/components/youtube/youtube_transcripts.py +206 -0
  585. lfx/components/zep/__init__.py +3 -0
  586. lfx/components/zep/zep.py +45 -0
  587. lfx/constants.py +6 -0
  588. lfx/custom/__init__.py +7 -0
  589. lfx/custom/attributes.py +87 -0
  590. lfx/custom/code_parser/__init__.py +3 -0
  591. lfx/custom/code_parser/code_parser.py +361 -0
  592. lfx/custom/custom_component/__init__.py +0 -0
  593. lfx/custom/custom_component/base_component.py +128 -0
  594. lfx/custom/custom_component/component.py +1890 -0
  595. lfx/custom/custom_component/component_with_cache.py +8 -0
  596. lfx/custom/custom_component/custom_component.py +650 -0
  597. lfx/custom/dependency_analyzer.py +165 -0
  598. lfx/custom/directory_reader/__init__.py +3 -0
  599. lfx/custom/directory_reader/directory_reader.py +359 -0
  600. lfx/custom/directory_reader/utils.py +171 -0
  601. lfx/custom/eval.py +12 -0
  602. lfx/custom/schema.py +32 -0
  603. lfx/custom/tree_visitor.py +21 -0
  604. lfx/custom/utils.py +877 -0
  605. lfx/custom/validate.py +523 -0
  606. lfx/events/__init__.py +1 -0
  607. lfx/events/event_manager.py +110 -0
  608. lfx/exceptions/__init__.py +0 -0
  609. lfx/exceptions/component.py +15 -0
  610. lfx/field_typing/__init__.py +91 -0
  611. lfx/field_typing/constants.py +216 -0
  612. lfx/field_typing/range_spec.py +35 -0
  613. lfx/graph/__init__.py +6 -0
  614. lfx/graph/edge/__init__.py +0 -0
  615. lfx/graph/edge/base.py +300 -0
  616. lfx/graph/edge/schema.py +119 -0
  617. lfx/graph/edge/utils.py +0 -0
  618. lfx/graph/graph/__init__.py +0 -0
  619. lfx/graph/graph/ascii.py +202 -0
  620. lfx/graph/graph/base.py +2298 -0
  621. lfx/graph/graph/constants.py +63 -0
  622. lfx/graph/graph/runnable_vertices_manager.py +133 -0
  623. lfx/graph/graph/schema.py +53 -0
  624. lfx/graph/graph/state_model.py +66 -0
  625. lfx/graph/graph/utils.py +1024 -0
  626. lfx/graph/schema.py +75 -0
  627. lfx/graph/state/__init__.py +0 -0
  628. lfx/graph/state/model.py +250 -0
  629. lfx/graph/utils.py +206 -0
  630. lfx/graph/vertex/__init__.py +0 -0
  631. lfx/graph/vertex/base.py +826 -0
  632. lfx/graph/vertex/constants.py +0 -0
  633. lfx/graph/vertex/exceptions.py +4 -0
  634. lfx/graph/vertex/param_handler.py +316 -0
  635. lfx/graph/vertex/schema.py +26 -0
  636. lfx/graph/vertex/utils.py +19 -0
  637. lfx/graph/vertex/vertex_types.py +489 -0
  638. lfx/helpers/__init__.py +141 -0
  639. lfx/helpers/base_model.py +71 -0
  640. lfx/helpers/custom.py +13 -0
  641. lfx/helpers/data.py +167 -0
  642. lfx/helpers/flow.py +308 -0
  643. lfx/inputs/__init__.py +68 -0
  644. lfx/inputs/constants.py +2 -0
  645. lfx/inputs/input_mixin.py +352 -0
  646. lfx/inputs/inputs.py +718 -0
  647. lfx/inputs/validators.py +19 -0
  648. lfx/interface/__init__.py +6 -0
  649. lfx/interface/components.py +897 -0
  650. lfx/interface/importing/__init__.py +5 -0
  651. lfx/interface/importing/utils.py +39 -0
  652. lfx/interface/initialize/__init__.py +3 -0
  653. lfx/interface/initialize/loading.py +317 -0
  654. lfx/interface/listing.py +26 -0
  655. lfx/interface/run.py +16 -0
  656. lfx/interface/utils.py +111 -0
  657. lfx/io/__init__.py +63 -0
  658. lfx/io/schema.py +295 -0
  659. lfx/load/__init__.py +8 -0
  660. lfx/load/load.py +256 -0
  661. lfx/load/utils.py +99 -0
  662. lfx/log/__init__.py +5 -0
  663. lfx/log/logger.py +411 -0
  664. lfx/logging/__init__.py +11 -0
  665. lfx/logging/logger.py +24 -0
  666. lfx/memory/__init__.py +70 -0
  667. lfx/memory/stubs.py +302 -0
  668. lfx/processing/__init__.py +1 -0
  669. lfx/processing/process.py +238 -0
  670. lfx/processing/utils.py +25 -0
  671. lfx/py.typed +0 -0
  672. lfx/schema/__init__.py +66 -0
  673. lfx/schema/artifact.py +83 -0
  674. lfx/schema/content_block.py +62 -0
  675. lfx/schema/content_types.py +91 -0
  676. lfx/schema/cross_module.py +80 -0
  677. lfx/schema/data.py +309 -0
  678. lfx/schema/dataframe.py +210 -0
  679. lfx/schema/dotdict.py +74 -0
  680. lfx/schema/encoders.py +13 -0
  681. lfx/schema/graph.py +47 -0
  682. lfx/schema/image.py +184 -0
  683. lfx/schema/json_schema.py +186 -0
  684. lfx/schema/log.py +62 -0
  685. lfx/schema/message.py +493 -0
  686. lfx/schema/openai_responses_schemas.py +74 -0
  687. lfx/schema/properties.py +41 -0
  688. lfx/schema/schema.py +180 -0
  689. lfx/schema/serialize.py +13 -0
  690. lfx/schema/table.py +142 -0
  691. lfx/schema/validators.py +114 -0
  692. lfx/serialization/__init__.py +5 -0
  693. lfx/serialization/constants.py +2 -0
  694. lfx/serialization/serialization.py +314 -0
  695. lfx/services/__init__.py +26 -0
  696. lfx/services/base.py +28 -0
  697. lfx/services/cache/__init__.py +6 -0
  698. lfx/services/cache/base.py +183 -0
  699. lfx/services/cache/service.py +166 -0
  700. lfx/services/cache/utils.py +169 -0
  701. lfx/services/chat/__init__.py +1 -0
  702. lfx/services/chat/config.py +2 -0
  703. lfx/services/chat/schema.py +10 -0
  704. lfx/services/database/__init__.py +5 -0
  705. lfx/services/database/service.py +25 -0
  706. lfx/services/deps.py +194 -0
  707. lfx/services/factory.py +19 -0
  708. lfx/services/initialize.py +19 -0
  709. lfx/services/interfaces.py +103 -0
  710. lfx/services/manager.py +185 -0
  711. lfx/services/mcp_composer/__init__.py +6 -0
  712. lfx/services/mcp_composer/factory.py +16 -0
  713. lfx/services/mcp_composer/service.py +1441 -0
  714. lfx/services/schema.py +21 -0
  715. lfx/services/session.py +87 -0
  716. lfx/services/settings/__init__.py +3 -0
  717. lfx/services/settings/auth.py +133 -0
  718. lfx/services/settings/base.py +668 -0
  719. lfx/services/settings/constants.py +43 -0
  720. lfx/services/settings/factory.py +23 -0
  721. lfx/services/settings/feature_flags.py +11 -0
  722. lfx/services/settings/service.py +35 -0
  723. lfx/services/settings/utils.py +40 -0
  724. lfx/services/shared_component_cache/__init__.py +1 -0
  725. lfx/services/shared_component_cache/factory.py +30 -0
  726. lfx/services/shared_component_cache/service.py +9 -0
  727. lfx/services/storage/__init__.py +5 -0
  728. lfx/services/storage/local.py +185 -0
  729. lfx/services/storage/service.py +177 -0
  730. lfx/services/tracing/__init__.py +1 -0
  731. lfx/services/tracing/service.py +21 -0
  732. lfx/settings.py +6 -0
  733. lfx/template/__init__.py +6 -0
  734. lfx/template/field/__init__.py +0 -0
  735. lfx/template/field/base.py +260 -0
  736. lfx/template/field/prompt.py +15 -0
  737. lfx/template/frontend_node/__init__.py +6 -0
  738. lfx/template/frontend_node/base.py +214 -0
  739. lfx/template/frontend_node/constants.py +65 -0
  740. lfx/template/frontend_node/custom_components.py +79 -0
  741. lfx/template/template/__init__.py +0 -0
  742. lfx/template/template/base.py +100 -0
  743. lfx/template/utils.py +217 -0
  744. lfx/type_extraction/__init__.py +19 -0
  745. lfx/type_extraction/type_extraction.py +75 -0
  746. lfx/type_extraction.py +80 -0
  747. lfx/utils/__init__.py +1 -0
  748. lfx/utils/async_helpers.py +42 -0
  749. lfx/utils/component_utils.py +154 -0
  750. lfx/utils/concurrency.py +60 -0
  751. lfx/utils/connection_string_parser.py +11 -0
  752. lfx/utils/constants.py +233 -0
  753. lfx/utils/data_structure.py +212 -0
  754. lfx/utils/exceptions.py +22 -0
  755. lfx/utils/helpers.py +34 -0
  756. lfx/utils/image.py +79 -0
  757. lfx/utils/langflow_utils.py +52 -0
  758. lfx/utils/lazy_load.py +15 -0
  759. lfx/utils/request_utils.py +18 -0
  760. lfx/utils/schemas.py +139 -0
  761. lfx/utils/ssrf_protection.py +384 -0
  762. lfx/utils/util.py +626 -0
  763. lfx/utils/util_strings.py +56 -0
  764. lfx/utils/validate_cloud.py +26 -0
  765. lfx/utils/version.py +24 -0
  766. lfx_nightly-0.2.0.dev25.dist-info/METADATA +312 -0
  767. lfx_nightly-0.2.0.dev25.dist-info/RECORD +769 -0
  768. lfx_nightly-0.2.0.dev25.dist-info/WHEEL +4 -0
  769. lfx_nightly-0.2.0.dev25.dist-info/entry_points.txt +2 -0
lfx/base/data/utils.py ADDED
@@ -0,0 +1,362 @@
1
+ import contextlib
2
+ import tempfile
3
+ import unicodedata
4
+ from collections.abc import Callable
5
+ from concurrent import futures
6
+ from io import BytesIO
7
+ from pathlib import Path
8
+
9
+ import chardet
10
+ import orjson
11
+ import yaml
12
+ from defusedxml import ElementTree
13
+ from pypdf import PdfReader
14
+
15
+ from lfx.base.data.storage_utils import read_file_bytes
16
+ from lfx.schema.data import Data
17
+ from lfx.services.deps import get_settings_service
18
+ from lfx.utils.async_helpers import run_until_complete
19
+
20
+ # Types of files that can be read simply by file.read()
21
+ # and have 100% to be completely readable
22
+ TEXT_FILE_TYPES = [
23
+ "csv",
24
+ "json",
25
+ "pdf",
26
+ "txt",
27
+ "md",
28
+ "mdx",
29
+ "yaml",
30
+ "yml",
31
+ "xml",
32
+ "html",
33
+ "htm",
34
+ "docx",
35
+ "py",
36
+ "sh",
37
+ "sql",
38
+ "js",
39
+ "ts",
40
+ "tsx",
41
+ ]
42
+
43
+ IMG_FILE_TYPES = ["jpg", "jpeg", "png", "bmp", "image"]
44
+
45
+
46
+ def parse_structured_text(text: str, file_path: str) -> str | dict | list:
47
+ """Parse structured text formats (JSON, YAML, XML) and normalize text.
48
+
49
+ Args:
50
+ text: The text content to parse
51
+ file_path: The file path (used to determine format)
52
+
53
+ Returns:
54
+ Parsed content (dict/list for JSON, dict for YAML, str for XML)
55
+ """
56
+ if file_path.endswith(".json"):
57
+ loaded_json = orjson.loads(text)
58
+ if isinstance(loaded_json, dict):
59
+ loaded_json = {k: normalize_text(v) if isinstance(v, str) else v for k, v in loaded_json.items()}
60
+ elif isinstance(loaded_json, list):
61
+ loaded_json = [normalize_text(item) if isinstance(item, str) else item for item in loaded_json]
62
+ return orjson.dumps(loaded_json).decode("utf-8")
63
+
64
+ if file_path.endswith((".yaml", ".yml")):
65
+ return yaml.safe_load(text)
66
+
67
+ if file_path.endswith(".xml"):
68
+ xml_element = ElementTree.fromstring(text)
69
+ return ElementTree.tostring(xml_element, encoding="unicode")
70
+
71
+ return text
72
+
73
+
74
+ def normalize_text(text):
75
+ return unicodedata.normalize("NFKD", text)
76
+
77
+
78
+ def is_hidden(path: Path) -> bool:
79
+ return path.name.startswith(".")
80
+
81
+
82
+ def format_directory_path(path: str) -> str:
83
+ """Format a directory path to ensure it's properly escaped and valid.
84
+
85
+ Args:
86
+ path (str): The input path string.
87
+
88
+ Returns:
89
+ str: A properly formatted path string.
90
+ """
91
+ return path.replace("\n", "\\n")
92
+
93
+
94
+ # Ignoring FBT001 because the DirectoryComponent in 1.0.19
95
+ # calls this function without keyword arguments
96
+ def retrieve_file_paths(
97
+ path: str,
98
+ load_hidden: bool, # noqa: FBT001
99
+ recursive: bool, # noqa: FBT001
100
+ depth: int,
101
+ types: list[str] = TEXT_FILE_TYPES,
102
+ ) -> list[str]:
103
+ path = format_directory_path(path)
104
+ path_obj = Path(path)
105
+ if not path_obj.exists() or not path_obj.is_dir():
106
+ msg = f"Path {path} must exist and be a directory."
107
+ raise ValueError(msg)
108
+
109
+ def match_types(p: Path) -> bool:
110
+ return any(p.suffix == f".{t}" for t in types) if types else True
111
+
112
+ def is_not_hidden(p: Path) -> bool:
113
+ return not is_hidden(p) or load_hidden
114
+
115
+ def walk_level(directory: Path, max_depth: int):
116
+ directory = directory.resolve()
117
+ prefix_length = len(directory.parts)
118
+ for p in directory.rglob("*" if recursive else "[!.]*"):
119
+ if len(p.parts) - prefix_length <= max_depth:
120
+ yield p
121
+
122
+ glob = "**/*" if recursive else "*"
123
+ paths = walk_level(path_obj, depth) if depth else path_obj.glob(glob)
124
+ return [str(p) for p in paths if p.is_file() and match_types(p) and is_not_hidden(p)]
125
+
126
+
127
+ def partition_file_to_data(file_path: str, *, silent_errors: bool) -> Data | None:
128
+ # Use the partition function to load the file
129
+ from unstructured.partition.auto import partition
130
+
131
+ try:
132
+ elements = partition(file_path)
133
+ except Exception as e:
134
+ if not silent_errors:
135
+ msg = f"Error loading file {file_path}: {e}"
136
+ raise ValueError(msg) from e
137
+ return None
138
+
139
+ # Create a Data
140
+ text = "\n\n".join([str(el) for el in elements])
141
+ metadata = elements.metadata if hasattr(elements, "metadata") else {}
142
+ metadata["file_path"] = file_path
143
+ return Data(text=text, data=metadata)
144
+
145
+
146
+ def read_text_file(file_path: str) -> str:
147
+ """Read a text file with automatic encoding detection.
148
+
149
+ Args:
150
+ file_path: Path to the file (local path only, not storage service path)
151
+
152
+ Returns:
153
+ str: The file content as text
154
+ """
155
+ file_path_ = Path(file_path)
156
+ raw_data = file_path_.read_bytes()
157
+ result = chardet.detect(raw_data)
158
+ encoding = result["encoding"]
159
+
160
+ if encoding in {"Windows-1252", "Windows-1254", "MacRoman"}:
161
+ encoding = "utf-8"
162
+
163
+ return file_path_.read_text(encoding=encoding)
164
+
165
+
166
+ async def read_text_file_async(file_path: str) -> str:
167
+ """Read a text file with automatic encoding detection (async, storage-aware).
168
+
169
+ Args:
170
+ file_path: Path to the file (S3 key format "flow_id/filename" or local path)
171
+
172
+ Returns:
173
+ str: The file content as text
174
+ """
175
+ from .storage_utils import read_file_bytes
176
+
177
+ # Use storage-aware read to get bytes
178
+ raw_data = await read_file_bytes(file_path)
179
+
180
+ # Auto-detect encoding
181
+ result = chardet.detect(raw_data)
182
+ encoding = result.get("encoding")
183
+
184
+ # If encoding detection fails (e.g., binary file), default to utf-8
185
+ if not encoding or encoding in {"Windows-1252", "Windows-1254", "MacRoman"}:
186
+ encoding = "utf-8"
187
+
188
+ return raw_data.decode(encoding, errors="replace")
189
+
190
+
191
+ def read_docx_file(file_path: str) -> str:
192
+ """Read a DOCX file and extract text.
193
+
194
+ ote: python-docx requires a file path, so this only works with local files.
195
+ For storage service files, use read_docx_file_async which downloads to temp.
196
+
197
+ Args:
198
+ file_path: Path to the DOCX file (local path only)
199
+
200
+ Returns:
201
+ str: Extracted text from the document
202
+ """
203
+ from docx import Document
204
+
205
+ doc = Document(file_path)
206
+ return "\n\n".join([p.text for p in doc.paragraphs])
207
+
208
+
209
+ async def read_docx_file_async(file_path: str) -> str:
210
+ """Read a DOCX file and extract text (async, storage-aware).
211
+
212
+ For S3 storage, downloads to temp file (python-docx requires file path).
213
+ For local storage, reads directly.
214
+
215
+ Args:
216
+ file_path: Path to the DOCX file (S3 key format "flow_id/filename" or local path)
217
+
218
+ Returns:
219
+ str: Extracted text from the document
220
+ """
221
+ from docx import Document
222
+
223
+ from .storage_utils import read_file_bytes
224
+
225
+ settings = get_settings_service().settings
226
+
227
+ if settings.storage_type == "local":
228
+ # Local storage - read directly
229
+ doc = Document(file_path)
230
+ return "\n\n".join([p.text for p in doc.paragraphs])
231
+
232
+ # S3 storage - need temp file for python-docx (doesn't support BytesIO)
233
+ content = await read_file_bytes(file_path)
234
+
235
+ # Create temp file with .docx extension
236
+ # Extract filename from path for suffix
237
+ suffix = Path(file_path.split("/")[-1]).suffix
238
+ with tempfile.NamedTemporaryFile(mode="wb", suffix=suffix, delete=False) as tmp_file:
239
+ tmp_file.write(content)
240
+ temp_path = tmp_file.name
241
+
242
+ try:
243
+ doc = Document(temp_path)
244
+ return "\n\n".join([p.text for p in doc.paragraphs])
245
+ finally:
246
+ with contextlib.suppress(Exception):
247
+ Path(temp_path).unlink()
248
+
249
+
250
+ def parse_pdf_to_text(file_path: str) -> str:
251
+ from pypdf import PdfReader
252
+
253
+ with Path(file_path).open("rb") as f, PdfReader(f) as reader:
254
+ return "\n\n".join([page.extract_text() for page in reader.pages])
255
+
256
+
257
+ async def parse_pdf_to_text_async(file_path: str) -> str:
258
+ """Parse a PDF file to extract text (async, storage-aware).
259
+
260
+ Uses storage-aware file reading to support both local and S3 storage.
261
+
262
+ Args:
263
+ file_path: Path to the PDF file (S3 key format "flow_id/filename" or local path)
264
+
265
+ Returns:
266
+ str: Extracted text from all pages
267
+ """
268
+ content = await read_file_bytes(file_path)
269
+ with BytesIO(content) as f, PdfReader(f) as reader:
270
+ return "\n\n".join([page.extract_text() for page in reader.pages])
271
+
272
+
273
+ def parse_text_file_to_data(file_path: str, *, silent_errors: bool) -> Data | None:
274
+ """Parse a text file to Data (sync version).
275
+
276
+ For S3 storage, this will use async operations to fetch the file.
277
+ For local storage, reads directly from filesystem.
278
+ """
279
+ settings = get_settings_service().settings
280
+
281
+ # If using S3 storage, we need to use async operations
282
+ if settings.storage_type == "s3":
283
+ # Run the async version safely (handles existing event loops)
284
+ return run_until_complete(parse_text_file_to_data_async(file_path, silent_errors=silent_errors))
285
+
286
+ try:
287
+ if file_path.endswith(".pdf"):
288
+ text = parse_pdf_to_text(file_path)
289
+ elif file_path.endswith(".docx"):
290
+ text = read_docx_file(file_path)
291
+ else:
292
+ text = read_text_file(file_path)
293
+
294
+ text = parse_structured_text(text, file_path)
295
+ except Exception as e:
296
+ if not silent_errors:
297
+ msg = f"Error loading file {file_path}: {e}"
298
+ raise ValueError(msg) from e
299
+ return None
300
+
301
+ return Data(data={"file_path": file_path, "text": text})
302
+
303
+
304
+ async def parse_text_file_to_data_async(file_path: str, *, silent_errors: bool) -> Data | None:
305
+ """Parse a text file to Data (async version, supports storage service).
306
+
307
+ This version properly handles storage service files:
308
+ - For text/JSON/YAML/XML: reads bytes directly (no temp file)
309
+ - For PDF: reads bytes directly via BytesIO (no temp file)
310
+ - For DOCX: downloads to temp file (python-docx requires file path)
311
+ """
312
+ try:
313
+ if file_path.endswith(".pdf"):
314
+ text = await parse_pdf_to_text_async(file_path)
315
+ elif file_path.endswith(".docx"):
316
+ text = await read_docx_file_async(file_path)
317
+ else:
318
+ # Text files - read directly, no temp file needed
319
+ text = await read_text_file_async(file_path)
320
+
321
+ # Parse structured formats (JSON, YAML, XML)
322
+ text = parse_structured_text(text, file_path)
323
+
324
+ return Data(data={"file_path": file_path, "text": text})
325
+
326
+ except Exception as e:
327
+ if not silent_errors:
328
+ msg = f"Error loading file {file_path}: {e}"
329
+ raise ValueError(msg) from e
330
+ return None
331
+
332
+
333
+ # ! Removing unstructured dependency until
334
+ # ! 3.12 is supported
335
+ # def get_elements(
336
+ # file_paths: List[str],
337
+ # silent_errors: bool,
338
+ # max_concurrency: int,
339
+ # use_multithreading: bool,
340
+ # ) -> List[Optional[Data]]:
341
+ # if use_multithreading:
342
+ # data = parallel_load_data(file_paths, silent_errors, max_concurrency)
343
+ # else:
344
+ # data = [partition_file_to_data(file_path, silent_errors) for file_path in file_paths]
345
+ # data = list(filter(None, data))
346
+ # return data
347
+
348
+
349
+ def parallel_load_data(
350
+ file_paths: list[str],
351
+ *,
352
+ silent_errors: bool,
353
+ max_concurrency: int,
354
+ load_function: Callable = parse_text_file_to_data,
355
+ ) -> list[Data | None]:
356
+ with futures.ThreadPoolExecutor(max_workers=max_concurrency) as executor:
357
+ loaded_files = executor.map(
358
+ lambda file_path: load_function(file_path, silent_errors=silent_errors),
359
+ file_paths,
360
+ )
361
+ # loaded_files is an iterator, so we need to convert it to a list
362
+ return list(loaded_files)
@@ -0,0 +1,5 @@
1
+ from .astradb_base import AstraDBBaseComponent
2
+
3
+ __all__ = [
4
+ "AstraDBBaseComponent",
5
+ ]