lfx-nightly 0.2.0.dev25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lfx-nightly might be problematic. Click here for more details.

Files changed (769) hide show
  1. lfx/__init__.py +0 -0
  2. lfx/__main__.py +25 -0
  3. lfx/_assets/component_index.json +1 -0
  4. lfx/base/__init__.py +0 -0
  5. lfx/base/agents/__init__.py +0 -0
  6. lfx/base/agents/agent.py +375 -0
  7. lfx/base/agents/altk_base_agent.py +380 -0
  8. lfx/base/agents/altk_tool_wrappers.py +565 -0
  9. lfx/base/agents/callback.py +130 -0
  10. lfx/base/agents/context.py +109 -0
  11. lfx/base/agents/crewai/__init__.py +0 -0
  12. lfx/base/agents/crewai/crew.py +231 -0
  13. lfx/base/agents/crewai/tasks.py +12 -0
  14. lfx/base/agents/default_prompts.py +23 -0
  15. lfx/base/agents/errors.py +15 -0
  16. lfx/base/agents/events.py +430 -0
  17. lfx/base/agents/utils.py +237 -0
  18. lfx/base/astra_assistants/__init__.py +0 -0
  19. lfx/base/astra_assistants/util.py +171 -0
  20. lfx/base/chains/__init__.py +0 -0
  21. lfx/base/chains/model.py +19 -0
  22. lfx/base/composio/__init__.py +0 -0
  23. lfx/base/composio/composio_base.py +2584 -0
  24. lfx/base/compressors/__init__.py +0 -0
  25. lfx/base/compressors/model.py +60 -0
  26. lfx/base/constants.py +46 -0
  27. lfx/base/curl/__init__.py +0 -0
  28. lfx/base/curl/parse.py +188 -0
  29. lfx/base/data/__init__.py +5 -0
  30. lfx/base/data/base_file.py +810 -0
  31. lfx/base/data/docling_utils.py +338 -0
  32. lfx/base/data/storage_utils.py +192 -0
  33. lfx/base/data/utils.py +362 -0
  34. lfx/base/datastax/__init__.py +5 -0
  35. lfx/base/datastax/astradb_base.py +896 -0
  36. lfx/base/document_transformers/__init__.py +0 -0
  37. lfx/base/document_transformers/model.py +43 -0
  38. lfx/base/embeddings/__init__.py +0 -0
  39. lfx/base/embeddings/aiml_embeddings.py +62 -0
  40. lfx/base/embeddings/embeddings_class.py +113 -0
  41. lfx/base/embeddings/model.py +26 -0
  42. lfx/base/flow_processing/__init__.py +0 -0
  43. lfx/base/flow_processing/utils.py +86 -0
  44. lfx/base/huggingface/__init__.py +0 -0
  45. lfx/base/huggingface/model_bridge.py +133 -0
  46. lfx/base/io/__init__.py +0 -0
  47. lfx/base/io/chat.py +21 -0
  48. lfx/base/io/text.py +22 -0
  49. lfx/base/knowledge_bases/__init__.py +3 -0
  50. lfx/base/knowledge_bases/knowledge_base_utils.py +137 -0
  51. lfx/base/langchain_utilities/__init__.py +0 -0
  52. lfx/base/langchain_utilities/model.py +35 -0
  53. lfx/base/langchain_utilities/spider_constants.py +1 -0
  54. lfx/base/langwatch/__init__.py +0 -0
  55. lfx/base/langwatch/utils.py +18 -0
  56. lfx/base/mcp/__init__.py +0 -0
  57. lfx/base/mcp/constants.py +2 -0
  58. lfx/base/mcp/util.py +1659 -0
  59. lfx/base/memory/__init__.py +0 -0
  60. lfx/base/memory/memory.py +49 -0
  61. lfx/base/memory/model.py +38 -0
  62. lfx/base/models/__init__.py +3 -0
  63. lfx/base/models/aiml_constants.py +51 -0
  64. lfx/base/models/anthropic_constants.py +51 -0
  65. lfx/base/models/aws_constants.py +151 -0
  66. lfx/base/models/chat_result.py +76 -0
  67. lfx/base/models/cometapi_constants.py +54 -0
  68. lfx/base/models/google_generative_ai_constants.py +70 -0
  69. lfx/base/models/google_generative_ai_model.py +38 -0
  70. lfx/base/models/groq_constants.py +150 -0
  71. lfx/base/models/groq_model_discovery.py +265 -0
  72. lfx/base/models/model.py +375 -0
  73. lfx/base/models/model_input_constants.py +378 -0
  74. lfx/base/models/model_metadata.py +41 -0
  75. lfx/base/models/model_utils.py +108 -0
  76. lfx/base/models/novita_constants.py +35 -0
  77. lfx/base/models/ollama_constants.py +52 -0
  78. lfx/base/models/openai_constants.py +129 -0
  79. lfx/base/models/sambanova_constants.py +18 -0
  80. lfx/base/models/watsonx_constants.py +36 -0
  81. lfx/base/processing/__init__.py +0 -0
  82. lfx/base/prompts/__init__.py +0 -0
  83. lfx/base/prompts/api_utils.py +224 -0
  84. lfx/base/prompts/utils.py +61 -0
  85. lfx/base/textsplitters/__init__.py +0 -0
  86. lfx/base/textsplitters/model.py +28 -0
  87. lfx/base/tools/__init__.py +0 -0
  88. lfx/base/tools/base.py +26 -0
  89. lfx/base/tools/component_tool.py +325 -0
  90. lfx/base/tools/constants.py +49 -0
  91. lfx/base/tools/flow_tool.py +132 -0
  92. lfx/base/tools/run_flow.py +698 -0
  93. lfx/base/vectorstores/__init__.py +0 -0
  94. lfx/base/vectorstores/model.py +193 -0
  95. lfx/base/vectorstores/utils.py +22 -0
  96. lfx/base/vectorstores/vector_store_connection_decorator.py +52 -0
  97. lfx/cli/__init__.py +5 -0
  98. lfx/cli/commands.py +327 -0
  99. lfx/cli/common.py +650 -0
  100. lfx/cli/run.py +506 -0
  101. lfx/cli/script_loader.py +289 -0
  102. lfx/cli/serve_app.py +546 -0
  103. lfx/cli/validation.py +69 -0
  104. lfx/components/FAISS/__init__.py +34 -0
  105. lfx/components/FAISS/faiss.py +111 -0
  106. lfx/components/Notion/__init__.py +19 -0
  107. lfx/components/Notion/add_content_to_page.py +269 -0
  108. lfx/components/Notion/create_page.py +94 -0
  109. lfx/components/Notion/list_database_properties.py +68 -0
  110. lfx/components/Notion/list_pages.py +122 -0
  111. lfx/components/Notion/list_users.py +77 -0
  112. lfx/components/Notion/page_content_viewer.py +93 -0
  113. lfx/components/Notion/search.py +111 -0
  114. lfx/components/Notion/update_page_property.py +114 -0
  115. lfx/components/__init__.py +428 -0
  116. lfx/components/_importing.py +42 -0
  117. lfx/components/agentql/__init__.py +3 -0
  118. lfx/components/agentql/agentql_api.py +151 -0
  119. lfx/components/aiml/__init__.py +37 -0
  120. lfx/components/aiml/aiml.py +115 -0
  121. lfx/components/aiml/aiml_embeddings.py +37 -0
  122. lfx/components/altk/__init__.py +34 -0
  123. lfx/components/altk/altk_agent.py +193 -0
  124. lfx/components/amazon/__init__.py +36 -0
  125. lfx/components/amazon/amazon_bedrock_converse.py +195 -0
  126. lfx/components/amazon/amazon_bedrock_embedding.py +109 -0
  127. lfx/components/amazon/amazon_bedrock_model.py +130 -0
  128. lfx/components/amazon/s3_bucket_uploader.py +211 -0
  129. lfx/components/anthropic/__init__.py +34 -0
  130. lfx/components/anthropic/anthropic.py +187 -0
  131. lfx/components/apify/__init__.py +5 -0
  132. lfx/components/apify/apify_actor.py +325 -0
  133. lfx/components/arxiv/__init__.py +3 -0
  134. lfx/components/arxiv/arxiv.py +169 -0
  135. lfx/components/assemblyai/__init__.py +46 -0
  136. lfx/components/assemblyai/assemblyai_get_subtitles.py +83 -0
  137. lfx/components/assemblyai/assemblyai_lemur.py +183 -0
  138. lfx/components/assemblyai/assemblyai_list_transcripts.py +95 -0
  139. lfx/components/assemblyai/assemblyai_poll_transcript.py +72 -0
  140. lfx/components/assemblyai/assemblyai_start_transcript.py +188 -0
  141. lfx/components/azure/__init__.py +37 -0
  142. lfx/components/azure/azure_openai.py +95 -0
  143. lfx/components/azure/azure_openai_embeddings.py +83 -0
  144. lfx/components/baidu/__init__.py +32 -0
  145. lfx/components/baidu/baidu_qianfan_chat.py +113 -0
  146. lfx/components/bing/__init__.py +3 -0
  147. lfx/components/bing/bing_search_api.py +61 -0
  148. lfx/components/cassandra/__init__.py +40 -0
  149. lfx/components/cassandra/cassandra.py +264 -0
  150. lfx/components/cassandra/cassandra_chat.py +92 -0
  151. lfx/components/cassandra/cassandra_graph.py +238 -0
  152. lfx/components/chains/__init__.py +3 -0
  153. lfx/components/chroma/__init__.py +34 -0
  154. lfx/components/chroma/chroma.py +169 -0
  155. lfx/components/cleanlab/__init__.py +40 -0
  156. lfx/components/cleanlab/cleanlab_evaluator.py +155 -0
  157. lfx/components/cleanlab/cleanlab_rag_evaluator.py +254 -0
  158. lfx/components/cleanlab/cleanlab_remediator.py +131 -0
  159. lfx/components/clickhouse/__init__.py +34 -0
  160. lfx/components/clickhouse/clickhouse.py +135 -0
  161. lfx/components/cloudflare/__init__.py +32 -0
  162. lfx/components/cloudflare/cloudflare.py +81 -0
  163. lfx/components/cohere/__init__.py +40 -0
  164. lfx/components/cohere/cohere_embeddings.py +81 -0
  165. lfx/components/cohere/cohere_models.py +46 -0
  166. lfx/components/cohere/cohere_rerank.py +51 -0
  167. lfx/components/cometapi/__init__.py +32 -0
  168. lfx/components/cometapi/cometapi.py +166 -0
  169. lfx/components/composio/__init__.py +222 -0
  170. lfx/components/composio/agentql_composio.py +11 -0
  171. lfx/components/composio/agiled_composio.py +11 -0
  172. lfx/components/composio/airtable_composio.py +11 -0
  173. lfx/components/composio/apollo_composio.py +11 -0
  174. lfx/components/composio/asana_composio.py +11 -0
  175. lfx/components/composio/attio_composio.py +11 -0
  176. lfx/components/composio/bitbucket_composio.py +11 -0
  177. lfx/components/composio/bolna_composio.py +11 -0
  178. lfx/components/composio/brightdata_composio.py +11 -0
  179. lfx/components/composio/calendly_composio.py +11 -0
  180. lfx/components/composio/canva_composio.py +11 -0
  181. lfx/components/composio/canvas_composio.py +11 -0
  182. lfx/components/composio/coda_composio.py +11 -0
  183. lfx/components/composio/composio_api.py +278 -0
  184. lfx/components/composio/contentful_composio.py +11 -0
  185. lfx/components/composio/digicert_composio.py +11 -0
  186. lfx/components/composio/discord_composio.py +11 -0
  187. lfx/components/composio/dropbox_compnent.py +11 -0
  188. lfx/components/composio/elevenlabs_composio.py +11 -0
  189. lfx/components/composio/exa_composio.py +11 -0
  190. lfx/components/composio/figma_composio.py +11 -0
  191. lfx/components/composio/finage_composio.py +11 -0
  192. lfx/components/composio/firecrawl_composio.py +11 -0
  193. lfx/components/composio/fireflies_composio.py +11 -0
  194. lfx/components/composio/fixer_composio.py +11 -0
  195. lfx/components/composio/flexisign_composio.py +11 -0
  196. lfx/components/composio/freshdesk_composio.py +11 -0
  197. lfx/components/composio/github_composio.py +11 -0
  198. lfx/components/composio/gmail_composio.py +38 -0
  199. lfx/components/composio/googlebigquery_composio.py +11 -0
  200. lfx/components/composio/googlecalendar_composio.py +11 -0
  201. lfx/components/composio/googleclassroom_composio.py +11 -0
  202. lfx/components/composio/googledocs_composio.py +11 -0
  203. lfx/components/composio/googlemeet_composio.py +11 -0
  204. lfx/components/composio/googlesheets_composio.py +11 -0
  205. lfx/components/composio/googletasks_composio.py +8 -0
  206. lfx/components/composio/heygen_composio.py +11 -0
  207. lfx/components/composio/instagram_composio.py +11 -0
  208. lfx/components/composio/jira_composio.py +11 -0
  209. lfx/components/composio/jotform_composio.py +11 -0
  210. lfx/components/composio/klaviyo_composio.py +11 -0
  211. lfx/components/composio/linear_composio.py +11 -0
  212. lfx/components/composio/listennotes_composio.py +11 -0
  213. lfx/components/composio/mem0_composio.py +11 -0
  214. lfx/components/composio/miro_composio.py +11 -0
  215. lfx/components/composio/missive_composio.py +11 -0
  216. lfx/components/composio/notion_composio.py +11 -0
  217. lfx/components/composio/onedrive_composio.py +11 -0
  218. lfx/components/composio/outlook_composio.py +11 -0
  219. lfx/components/composio/pandadoc_composio.py +11 -0
  220. lfx/components/composio/peopledatalabs_composio.py +11 -0
  221. lfx/components/composio/perplexityai_composio.py +11 -0
  222. lfx/components/composio/reddit_composio.py +11 -0
  223. lfx/components/composio/serpapi_composio.py +11 -0
  224. lfx/components/composio/slack_composio.py +11 -0
  225. lfx/components/composio/slackbot_composio.py +11 -0
  226. lfx/components/composio/snowflake_composio.py +11 -0
  227. lfx/components/composio/supabase_composio.py +11 -0
  228. lfx/components/composio/tavily_composio.py +11 -0
  229. lfx/components/composio/timelinesai_composio.py +11 -0
  230. lfx/components/composio/todoist_composio.py +11 -0
  231. lfx/components/composio/wrike_composio.py +11 -0
  232. lfx/components/composio/youtube_composio.py +11 -0
  233. lfx/components/confluence/__init__.py +3 -0
  234. lfx/components/confluence/confluence.py +84 -0
  235. lfx/components/couchbase/__init__.py +34 -0
  236. lfx/components/couchbase/couchbase.py +102 -0
  237. lfx/components/crewai/__init__.py +49 -0
  238. lfx/components/crewai/crewai.py +108 -0
  239. lfx/components/crewai/hierarchical_crew.py +47 -0
  240. lfx/components/crewai/hierarchical_task.py +45 -0
  241. lfx/components/crewai/sequential_crew.py +53 -0
  242. lfx/components/crewai/sequential_task.py +74 -0
  243. lfx/components/crewai/sequential_task_agent.py +144 -0
  244. lfx/components/cuga/__init__.py +34 -0
  245. lfx/components/cuga/cuga_agent.py +730 -0
  246. lfx/components/custom_component/__init__.py +34 -0
  247. lfx/components/custom_component/custom_component.py +31 -0
  248. lfx/components/data/__init__.py +114 -0
  249. lfx/components/data_source/__init__.py +58 -0
  250. lfx/components/data_source/api_request.py +577 -0
  251. lfx/components/data_source/csv_to_data.py +101 -0
  252. lfx/components/data_source/json_to_data.py +106 -0
  253. lfx/components/data_source/mock_data.py +398 -0
  254. lfx/components/data_source/news_search.py +166 -0
  255. lfx/components/data_source/rss.py +71 -0
  256. lfx/components/data_source/sql_executor.py +101 -0
  257. lfx/components/data_source/url.py +311 -0
  258. lfx/components/data_source/web_search.py +326 -0
  259. lfx/components/datastax/__init__.py +76 -0
  260. lfx/components/datastax/astradb_assistant_manager.py +307 -0
  261. lfx/components/datastax/astradb_chatmemory.py +40 -0
  262. lfx/components/datastax/astradb_cql.py +288 -0
  263. lfx/components/datastax/astradb_graph.py +217 -0
  264. lfx/components/datastax/astradb_tool.py +378 -0
  265. lfx/components/datastax/astradb_vectorize.py +122 -0
  266. lfx/components/datastax/astradb_vectorstore.py +449 -0
  267. lfx/components/datastax/create_assistant.py +59 -0
  268. lfx/components/datastax/create_thread.py +33 -0
  269. lfx/components/datastax/dotenv.py +36 -0
  270. lfx/components/datastax/get_assistant.py +38 -0
  271. lfx/components/datastax/getenvvar.py +31 -0
  272. lfx/components/datastax/graph_rag.py +141 -0
  273. lfx/components/datastax/hcd.py +315 -0
  274. lfx/components/datastax/list_assistants.py +26 -0
  275. lfx/components/datastax/run.py +90 -0
  276. lfx/components/deactivated/__init__.py +15 -0
  277. lfx/components/deactivated/amazon_kendra.py +66 -0
  278. lfx/components/deactivated/chat_litellm_model.py +158 -0
  279. lfx/components/deactivated/code_block_extractor.py +26 -0
  280. lfx/components/deactivated/documents_to_data.py +22 -0
  281. lfx/components/deactivated/embed.py +16 -0
  282. lfx/components/deactivated/extract_key_from_data.py +46 -0
  283. lfx/components/deactivated/json_document_builder.py +57 -0
  284. lfx/components/deactivated/list_flows.py +20 -0
  285. lfx/components/deactivated/mcp_sse.py +61 -0
  286. lfx/components/deactivated/mcp_stdio.py +62 -0
  287. lfx/components/deactivated/merge_data.py +93 -0
  288. lfx/components/deactivated/message.py +37 -0
  289. lfx/components/deactivated/metal.py +54 -0
  290. lfx/components/deactivated/multi_query.py +59 -0
  291. lfx/components/deactivated/retriever.py +43 -0
  292. lfx/components/deactivated/selective_passthrough.py +77 -0
  293. lfx/components/deactivated/should_run_next.py +40 -0
  294. lfx/components/deactivated/split_text.py +63 -0
  295. lfx/components/deactivated/store_message.py +24 -0
  296. lfx/components/deactivated/sub_flow.py +124 -0
  297. lfx/components/deactivated/vectara_self_query.py +76 -0
  298. lfx/components/deactivated/vector_store.py +24 -0
  299. lfx/components/deepseek/__init__.py +34 -0
  300. lfx/components/deepseek/deepseek.py +136 -0
  301. lfx/components/docling/__init__.py +43 -0
  302. lfx/components/docling/chunk_docling_document.py +186 -0
  303. lfx/components/docling/docling_inline.py +238 -0
  304. lfx/components/docling/docling_remote.py +195 -0
  305. lfx/components/docling/export_docling_document.py +117 -0
  306. lfx/components/documentloaders/__init__.py +3 -0
  307. lfx/components/duckduckgo/__init__.py +3 -0
  308. lfx/components/duckduckgo/duck_duck_go_search_run.py +92 -0
  309. lfx/components/elastic/__init__.py +37 -0
  310. lfx/components/elastic/elasticsearch.py +267 -0
  311. lfx/components/elastic/opensearch.py +789 -0
  312. lfx/components/elastic/opensearch_multimodal.py +1575 -0
  313. lfx/components/embeddings/__init__.py +37 -0
  314. lfx/components/embeddings/similarity.py +77 -0
  315. lfx/components/embeddings/text_embedder.py +65 -0
  316. lfx/components/exa/__init__.py +3 -0
  317. lfx/components/exa/exa_search.py +68 -0
  318. lfx/components/files_and_knowledge/__init__.py +47 -0
  319. lfx/components/files_and_knowledge/directory.py +113 -0
  320. lfx/components/files_and_knowledge/file.py +841 -0
  321. lfx/components/files_and_knowledge/ingestion.py +694 -0
  322. lfx/components/files_and_knowledge/retrieval.py +264 -0
  323. lfx/components/files_and_knowledge/save_file.py +746 -0
  324. lfx/components/firecrawl/__init__.py +43 -0
  325. lfx/components/firecrawl/firecrawl_crawl_api.py +88 -0
  326. lfx/components/firecrawl/firecrawl_extract_api.py +136 -0
  327. lfx/components/firecrawl/firecrawl_map_api.py +89 -0
  328. lfx/components/firecrawl/firecrawl_scrape_api.py +73 -0
  329. lfx/components/flow_controls/__init__.py +58 -0
  330. lfx/components/flow_controls/conditional_router.py +208 -0
  331. lfx/components/flow_controls/data_conditional_router.py +126 -0
  332. lfx/components/flow_controls/flow_tool.py +111 -0
  333. lfx/components/flow_controls/listen.py +29 -0
  334. lfx/components/flow_controls/loop.py +163 -0
  335. lfx/components/flow_controls/notify.py +88 -0
  336. lfx/components/flow_controls/pass_message.py +36 -0
  337. lfx/components/flow_controls/run_flow.py +108 -0
  338. lfx/components/flow_controls/sub_flow.py +115 -0
  339. lfx/components/git/__init__.py +4 -0
  340. lfx/components/git/git.py +262 -0
  341. lfx/components/git/gitextractor.py +196 -0
  342. lfx/components/glean/__init__.py +3 -0
  343. lfx/components/glean/glean_search_api.py +173 -0
  344. lfx/components/google/__init__.py +17 -0
  345. lfx/components/google/gmail.py +193 -0
  346. lfx/components/google/google_bq_sql_executor.py +157 -0
  347. lfx/components/google/google_drive.py +92 -0
  348. lfx/components/google/google_drive_search.py +152 -0
  349. lfx/components/google/google_generative_ai.py +144 -0
  350. lfx/components/google/google_generative_ai_embeddings.py +141 -0
  351. lfx/components/google/google_oauth_token.py +89 -0
  352. lfx/components/google/google_search_api_core.py +68 -0
  353. lfx/components/google/google_serper_api_core.py +74 -0
  354. lfx/components/groq/__init__.py +34 -0
  355. lfx/components/groq/groq.py +143 -0
  356. lfx/components/helpers/__init__.py +154 -0
  357. lfx/components/homeassistant/__init__.py +7 -0
  358. lfx/components/homeassistant/home_assistant_control.py +152 -0
  359. lfx/components/homeassistant/list_home_assistant_states.py +137 -0
  360. lfx/components/huggingface/__init__.py +37 -0
  361. lfx/components/huggingface/huggingface.py +199 -0
  362. lfx/components/huggingface/huggingface_inference_api.py +106 -0
  363. lfx/components/ibm/__init__.py +34 -0
  364. lfx/components/ibm/watsonx.py +207 -0
  365. lfx/components/ibm/watsonx_embeddings.py +135 -0
  366. lfx/components/icosacomputing/__init__.py +5 -0
  367. lfx/components/icosacomputing/combinatorial_reasoner.py +84 -0
  368. lfx/components/input_output/__init__.py +40 -0
  369. lfx/components/input_output/chat.py +109 -0
  370. lfx/components/input_output/chat_output.py +184 -0
  371. lfx/components/input_output/text.py +27 -0
  372. lfx/components/input_output/text_output.py +29 -0
  373. lfx/components/input_output/webhook.py +56 -0
  374. lfx/components/jigsawstack/__init__.py +23 -0
  375. lfx/components/jigsawstack/ai_scrape.py +126 -0
  376. lfx/components/jigsawstack/ai_web_search.py +136 -0
  377. lfx/components/jigsawstack/file_read.py +115 -0
  378. lfx/components/jigsawstack/file_upload.py +94 -0
  379. lfx/components/jigsawstack/image_generation.py +205 -0
  380. lfx/components/jigsawstack/nsfw.py +60 -0
  381. lfx/components/jigsawstack/object_detection.py +124 -0
  382. lfx/components/jigsawstack/sentiment.py +112 -0
  383. lfx/components/jigsawstack/text_to_sql.py +90 -0
  384. lfx/components/jigsawstack/text_translate.py +77 -0
  385. lfx/components/jigsawstack/vocr.py +107 -0
  386. lfx/components/knowledge_bases/__init__.py +89 -0
  387. lfx/components/langchain_utilities/__init__.py +109 -0
  388. lfx/components/langchain_utilities/character.py +53 -0
  389. lfx/components/langchain_utilities/conversation.py +59 -0
  390. lfx/components/langchain_utilities/csv_agent.py +175 -0
  391. lfx/components/langchain_utilities/fake_embeddings.py +26 -0
  392. lfx/components/langchain_utilities/html_link_extractor.py +35 -0
  393. lfx/components/langchain_utilities/json_agent.py +100 -0
  394. lfx/components/langchain_utilities/langchain_hub.py +126 -0
  395. lfx/components/langchain_utilities/language_recursive.py +49 -0
  396. lfx/components/langchain_utilities/language_semantic.py +138 -0
  397. lfx/components/langchain_utilities/llm_checker.py +39 -0
  398. lfx/components/langchain_utilities/llm_math.py +42 -0
  399. lfx/components/langchain_utilities/natural_language.py +61 -0
  400. lfx/components/langchain_utilities/openai_tools.py +53 -0
  401. lfx/components/langchain_utilities/openapi.py +48 -0
  402. lfx/components/langchain_utilities/recursive_character.py +60 -0
  403. lfx/components/langchain_utilities/retrieval_qa.py +83 -0
  404. lfx/components/langchain_utilities/runnable_executor.py +137 -0
  405. lfx/components/langchain_utilities/self_query.py +80 -0
  406. lfx/components/langchain_utilities/spider.py +142 -0
  407. lfx/components/langchain_utilities/sql.py +40 -0
  408. lfx/components/langchain_utilities/sql_database.py +35 -0
  409. lfx/components/langchain_utilities/sql_generator.py +78 -0
  410. lfx/components/langchain_utilities/tool_calling.py +59 -0
  411. lfx/components/langchain_utilities/vector_store_info.py +49 -0
  412. lfx/components/langchain_utilities/vector_store_router.py +33 -0
  413. lfx/components/langchain_utilities/xml_agent.py +71 -0
  414. lfx/components/langwatch/__init__.py +3 -0
  415. lfx/components/langwatch/langwatch.py +278 -0
  416. lfx/components/link_extractors/__init__.py +3 -0
  417. lfx/components/llm_operations/__init__.py +46 -0
  418. lfx/components/llm_operations/batch_run.py +205 -0
  419. lfx/components/llm_operations/lambda_filter.py +218 -0
  420. lfx/components/llm_operations/llm_conditional_router.py +421 -0
  421. lfx/components/llm_operations/llm_selector.py +499 -0
  422. lfx/components/llm_operations/structured_output.py +244 -0
  423. lfx/components/lmstudio/__init__.py +34 -0
  424. lfx/components/lmstudio/lmstudioembeddings.py +89 -0
  425. lfx/components/lmstudio/lmstudiomodel.py +133 -0
  426. lfx/components/logic/__init__.py +181 -0
  427. lfx/components/maritalk/__init__.py +32 -0
  428. lfx/components/maritalk/maritalk.py +52 -0
  429. lfx/components/mem0/__init__.py +3 -0
  430. lfx/components/mem0/mem0_chat_memory.py +147 -0
  431. lfx/components/milvus/__init__.py +34 -0
  432. lfx/components/milvus/milvus.py +115 -0
  433. lfx/components/mistral/__init__.py +37 -0
  434. lfx/components/mistral/mistral.py +114 -0
  435. lfx/components/mistral/mistral_embeddings.py +58 -0
  436. lfx/components/models/__init__.py +89 -0
  437. lfx/components/models_and_agents/__init__.py +49 -0
  438. lfx/components/models_and_agents/agent.py +644 -0
  439. lfx/components/models_and_agents/embedding_model.py +423 -0
  440. lfx/components/models_and_agents/language_model.py +398 -0
  441. lfx/components/models_and_agents/mcp_component.py +594 -0
  442. lfx/components/models_and_agents/memory.py +268 -0
  443. lfx/components/models_and_agents/prompt.py +67 -0
  444. lfx/components/mongodb/__init__.py +34 -0
  445. lfx/components/mongodb/mongodb_atlas.py +213 -0
  446. lfx/components/needle/__init__.py +3 -0
  447. lfx/components/needle/needle.py +104 -0
  448. lfx/components/notdiamond/__init__.py +34 -0
  449. lfx/components/notdiamond/notdiamond.py +228 -0
  450. lfx/components/novita/__init__.py +32 -0
  451. lfx/components/novita/novita.py +130 -0
  452. lfx/components/nvidia/__init__.py +57 -0
  453. lfx/components/nvidia/nvidia.py +151 -0
  454. lfx/components/nvidia/nvidia_embedding.py +77 -0
  455. lfx/components/nvidia/nvidia_ingest.py +317 -0
  456. lfx/components/nvidia/nvidia_rerank.py +63 -0
  457. lfx/components/nvidia/system_assist.py +65 -0
  458. lfx/components/olivya/__init__.py +3 -0
  459. lfx/components/olivya/olivya.py +116 -0
  460. lfx/components/ollama/__init__.py +37 -0
  461. lfx/components/ollama/ollama.py +548 -0
  462. lfx/components/ollama/ollama_embeddings.py +103 -0
  463. lfx/components/openai/__init__.py +37 -0
  464. lfx/components/openai/openai.py +100 -0
  465. lfx/components/openai/openai_chat_model.py +176 -0
  466. lfx/components/openrouter/__init__.py +32 -0
  467. lfx/components/openrouter/openrouter.py +104 -0
  468. lfx/components/output_parsers/__init__.py +3 -0
  469. lfx/components/perplexity/__init__.py +34 -0
  470. lfx/components/perplexity/perplexity.py +75 -0
  471. lfx/components/pgvector/__init__.py +34 -0
  472. lfx/components/pgvector/pgvector.py +72 -0
  473. lfx/components/pinecone/__init__.py +34 -0
  474. lfx/components/pinecone/pinecone.py +134 -0
  475. lfx/components/processing/__init__.py +72 -0
  476. lfx/components/processing/alter_metadata.py +109 -0
  477. lfx/components/processing/combine_text.py +40 -0
  478. lfx/components/processing/converter.py +248 -0
  479. lfx/components/processing/create_data.py +111 -0
  480. lfx/components/processing/create_list.py +40 -0
  481. lfx/components/processing/data_operations.py +528 -0
  482. lfx/components/processing/data_to_dataframe.py +71 -0
  483. lfx/components/processing/dataframe_operations.py +313 -0
  484. lfx/components/processing/dataframe_to_toolset.py +259 -0
  485. lfx/components/processing/dynamic_create_data.py +357 -0
  486. lfx/components/processing/extract_key.py +54 -0
  487. lfx/components/processing/filter_data.py +43 -0
  488. lfx/components/processing/filter_data_values.py +89 -0
  489. lfx/components/processing/json_cleaner.py +104 -0
  490. lfx/components/processing/merge_data.py +91 -0
  491. lfx/components/processing/message_to_data.py +37 -0
  492. lfx/components/processing/output_parser.py +46 -0
  493. lfx/components/processing/parse_data.py +71 -0
  494. lfx/components/processing/parse_dataframe.py +69 -0
  495. lfx/components/processing/parse_json_data.py +91 -0
  496. lfx/components/processing/parser.py +148 -0
  497. lfx/components/processing/regex.py +83 -0
  498. lfx/components/processing/select_data.py +49 -0
  499. lfx/components/processing/split_text.py +141 -0
  500. lfx/components/processing/store_message.py +91 -0
  501. lfx/components/processing/update_data.py +161 -0
  502. lfx/components/prototypes/__init__.py +35 -0
  503. lfx/components/prototypes/python_function.py +73 -0
  504. lfx/components/qdrant/__init__.py +34 -0
  505. lfx/components/qdrant/qdrant.py +109 -0
  506. lfx/components/redis/__init__.py +37 -0
  507. lfx/components/redis/redis.py +89 -0
  508. lfx/components/redis/redis_chat.py +43 -0
  509. lfx/components/sambanova/__init__.py +32 -0
  510. lfx/components/sambanova/sambanova.py +84 -0
  511. lfx/components/scrapegraph/__init__.py +40 -0
  512. lfx/components/scrapegraph/scrapegraph_markdownify_api.py +64 -0
  513. lfx/components/scrapegraph/scrapegraph_search_api.py +64 -0
  514. lfx/components/scrapegraph/scrapegraph_smart_scraper_api.py +71 -0
  515. lfx/components/searchapi/__init__.py +34 -0
  516. lfx/components/searchapi/search.py +79 -0
  517. lfx/components/serpapi/__init__.py +3 -0
  518. lfx/components/serpapi/serp.py +115 -0
  519. lfx/components/supabase/__init__.py +34 -0
  520. lfx/components/supabase/supabase.py +76 -0
  521. lfx/components/tavily/__init__.py +4 -0
  522. lfx/components/tavily/tavily_extract.py +117 -0
  523. lfx/components/tavily/tavily_search.py +212 -0
  524. lfx/components/textsplitters/__init__.py +3 -0
  525. lfx/components/toolkits/__init__.py +3 -0
  526. lfx/components/tools/__init__.py +66 -0
  527. lfx/components/tools/calculator.py +109 -0
  528. lfx/components/tools/google_search_api.py +45 -0
  529. lfx/components/tools/google_serper_api.py +115 -0
  530. lfx/components/tools/python_code_structured_tool.py +328 -0
  531. lfx/components/tools/python_repl.py +98 -0
  532. lfx/components/tools/search_api.py +88 -0
  533. lfx/components/tools/searxng.py +145 -0
  534. lfx/components/tools/serp_api.py +120 -0
  535. lfx/components/tools/tavily_search_tool.py +345 -0
  536. lfx/components/tools/wikidata_api.py +103 -0
  537. lfx/components/tools/wikipedia_api.py +50 -0
  538. lfx/components/tools/yahoo_finance.py +130 -0
  539. lfx/components/twelvelabs/__init__.py +52 -0
  540. lfx/components/twelvelabs/convert_astra_results.py +84 -0
  541. lfx/components/twelvelabs/pegasus_index.py +311 -0
  542. lfx/components/twelvelabs/split_video.py +301 -0
  543. lfx/components/twelvelabs/text_embeddings.py +57 -0
  544. lfx/components/twelvelabs/twelvelabs_pegasus.py +408 -0
  545. lfx/components/twelvelabs/video_embeddings.py +100 -0
  546. lfx/components/twelvelabs/video_file.py +191 -0
  547. lfx/components/unstructured/__init__.py +3 -0
  548. lfx/components/unstructured/unstructured.py +121 -0
  549. lfx/components/upstash/__init__.py +34 -0
  550. lfx/components/upstash/upstash.py +124 -0
  551. lfx/components/utilities/__init__.py +43 -0
  552. lfx/components/utilities/calculator_core.py +89 -0
  553. lfx/components/utilities/current_date.py +42 -0
  554. lfx/components/utilities/id_generator.py +42 -0
  555. lfx/components/utilities/python_repl_core.py +98 -0
  556. lfx/components/vectara/__init__.py +37 -0
  557. lfx/components/vectara/vectara.py +97 -0
  558. lfx/components/vectara/vectara_rag.py +164 -0
  559. lfx/components/vectorstores/__init__.py +34 -0
  560. lfx/components/vectorstores/local_db.py +270 -0
  561. lfx/components/vertexai/__init__.py +37 -0
  562. lfx/components/vertexai/vertexai.py +71 -0
  563. lfx/components/vertexai/vertexai_embeddings.py +67 -0
  564. lfx/components/vlmrun/__init__.py +34 -0
  565. lfx/components/vlmrun/vlmrun_transcription.py +224 -0
  566. lfx/components/weaviate/__init__.py +34 -0
  567. lfx/components/weaviate/weaviate.py +89 -0
  568. lfx/components/wikipedia/__init__.py +4 -0
  569. lfx/components/wikipedia/wikidata.py +86 -0
  570. lfx/components/wikipedia/wikipedia.py +53 -0
  571. lfx/components/wolframalpha/__init__.py +3 -0
  572. lfx/components/wolframalpha/wolfram_alpha_api.py +54 -0
  573. lfx/components/xai/__init__.py +32 -0
  574. lfx/components/xai/xai.py +167 -0
  575. lfx/components/yahoosearch/__init__.py +3 -0
  576. lfx/components/yahoosearch/yahoo.py +137 -0
  577. lfx/components/youtube/__init__.py +52 -0
  578. lfx/components/youtube/channel.py +227 -0
  579. lfx/components/youtube/comments.py +231 -0
  580. lfx/components/youtube/playlist.py +33 -0
  581. lfx/components/youtube/search.py +120 -0
  582. lfx/components/youtube/trending.py +285 -0
  583. lfx/components/youtube/video_details.py +263 -0
  584. lfx/components/youtube/youtube_transcripts.py +206 -0
  585. lfx/components/zep/__init__.py +3 -0
  586. lfx/components/zep/zep.py +45 -0
  587. lfx/constants.py +6 -0
  588. lfx/custom/__init__.py +7 -0
  589. lfx/custom/attributes.py +87 -0
  590. lfx/custom/code_parser/__init__.py +3 -0
  591. lfx/custom/code_parser/code_parser.py +361 -0
  592. lfx/custom/custom_component/__init__.py +0 -0
  593. lfx/custom/custom_component/base_component.py +128 -0
  594. lfx/custom/custom_component/component.py +1890 -0
  595. lfx/custom/custom_component/component_with_cache.py +8 -0
  596. lfx/custom/custom_component/custom_component.py +650 -0
  597. lfx/custom/dependency_analyzer.py +165 -0
  598. lfx/custom/directory_reader/__init__.py +3 -0
  599. lfx/custom/directory_reader/directory_reader.py +359 -0
  600. lfx/custom/directory_reader/utils.py +171 -0
  601. lfx/custom/eval.py +12 -0
  602. lfx/custom/schema.py +32 -0
  603. lfx/custom/tree_visitor.py +21 -0
  604. lfx/custom/utils.py +877 -0
  605. lfx/custom/validate.py +523 -0
  606. lfx/events/__init__.py +1 -0
  607. lfx/events/event_manager.py +110 -0
  608. lfx/exceptions/__init__.py +0 -0
  609. lfx/exceptions/component.py +15 -0
  610. lfx/field_typing/__init__.py +91 -0
  611. lfx/field_typing/constants.py +216 -0
  612. lfx/field_typing/range_spec.py +35 -0
  613. lfx/graph/__init__.py +6 -0
  614. lfx/graph/edge/__init__.py +0 -0
  615. lfx/graph/edge/base.py +300 -0
  616. lfx/graph/edge/schema.py +119 -0
  617. lfx/graph/edge/utils.py +0 -0
  618. lfx/graph/graph/__init__.py +0 -0
  619. lfx/graph/graph/ascii.py +202 -0
  620. lfx/graph/graph/base.py +2298 -0
  621. lfx/graph/graph/constants.py +63 -0
  622. lfx/graph/graph/runnable_vertices_manager.py +133 -0
  623. lfx/graph/graph/schema.py +53 -0
  624. lfx/graph/graph/state_model.py +66 -0
  625. lfx/graph/graph/utils.py +1024 -0
  626. lfx/graph/schema.py +75 -0
  627. lfx/graph/state/__init__.py +0 -0
  628. lfx/graph/state/model.py +250 -0
  629. lfx/graph/utils.py +206 -0
  630. lfx/graph/vertex/__init__.py +0 -0
  631. lfx/graph/vertex/base.py +826 -0
  632. lfx/graph/vertex/constants.py +0 -0
  633. lfx/graph/vertex/exceptions.py +4 -0
  634. lfx/graph/vertex/param_handler.py +316 -0
  635. lfx/graph/vertex/schema.py +26 -0
  636. lfx/graph/vertex/utils.py +19 -0
  637. lfx/graph/vertex/vertex_types.py +489 -0
  638. lfx/helpers/__init__.py +141 -0
  639. lfx/helpers/base_model.py +71 -0
  640. lfx/helpers/custom.py +13 -0
  641. lfx/helpers/data.py +167 -0
  642. lfx/helpers/flow.py +308 -0
  643. lfx/inputs/__init__.py +68 -0
  644. lfx/inputs/constants.py +2 -0
  645. lfx/inputs/input_mixin.py +352 -0
  646. lfx/inputs/inputs.py +718 -0
  647. lfx/inputs/validators.py +19 -0
  648. lfx/interface/__init__.py +6 -0
  649. lfx/interface/components.py +897 -0
  650. lfx/interface/importing/__init__.py +5 -0
  651. lfx/interface/importing/utils.py +39 -0
  652. lfx/interface/initialize/__init__.py +3 -0
  653. lfx/interface/initialize/loading.py +317 -0
  654. lfx/interface/listing.py +26 -0
  655. lfx/interface/run.py +16 -0
  656. lfx/interface/utils.py +111 -0
  657. lfx/io/__init__.py +63 -0
  658. lfx/io/schema.py +295 -0
  659. lfx/load/__init__.py +8 -0
  660. lfx/load/load.py +256 -0
  661. lfx/load/utils.py +99 -0
  662. lfx/log/__init__.py +5 -0
  663. lfx/log/logger.py +411 -0
  664. lfx/logging/__init__.py +11 -0
  665. lfx/logging/logger.py +24 -0
  666. lfx/memory/__init__.py +70 -0
  667. lfx/memory/stubs.py +302 -0
  668. lfx/processing/__init__.py +1 -0
  669. lfx/processing/process.py +238 -0
  670. lfx/processing/utils.py +25 -0
  671. lfx/py.typed +0 -0
  672. lfx/schema/__init__.py +66 -0
  673. lfx/schema/artifact.py +83 -0
  674. lfx/schema/content_block.py +62 -0
  675. lfx/schema/content_types.py +91 -0
  676. lfx/schema/cross_module.py +80 -0
  677. lfx/schema/data.py +309 -0
  678. lfx/schema/dataframe.py +210 -0
  679. lfx/schema/dotdict.py +74 -0
  680. lfx/schema/encoders.py +13 -0
  681. lfx/schema/graph.py +47 -0
  682. lfx/schema/image.py +184 -0
  683. lfx/schema/json_schema.py +186 -0
  684. lfx/schema/log.py +62 -0
  685. lfx/schema/message.py +493 -0
  686. lfx/schema/openai_responses_schemas.py +74 -0
  687. lfx/schema/properties.py +41 -0
  688. lfx/schema/schema.py +180 -0
  689. lfx/schema/serialize.py +13 -0
  690. lfx/schema/table.py +142 -0
  691. lfx/schema/validators.py +114 -0
  692. lfx/serialization/__init__.py +5 -0
  693. lfx/serialization/constants.py +2 -0
  694. lfx/serialization/serialization.py +314 -0
  695. lfx/services/__init__.py +26 -0
  696. lfx/services/base.py +28 -0
  697. lfx/services/cache/__init__.py +6 -0
  698. lfx/services/cache/base.py +183 -0
  699. lfx/services/cache/service.py +166 -0
  700. lfx/services/cache/utils.py +169 -0
  701. lfx/services/chat/__init__.py +1 -0
  702. lfx/services/chat/config.py +2 -0
  703. lfx/services/chat/schema.py +10 -0
  704. lfx/services/database/__init__.py +5 -0
  705. lfx/services/database/service.py +25 -0
  706. lfx/services/deps.py +194 -0
  707. lfx/services/factory.py +19 -0
  708. lfx/services/initialize.py +19 -0
  709. lfx/services/interfaces.py +103 -0
  710. lfx/services/manager.py +185 -0
  711. lfx/services/mcp_composer/__init__.py +6 -0
  712. lfx/services/mcp_composer/factory.py +16 -0
  713. lfx/services/mcp_composer/service.py +1441 -0
  714. lfx/services/schema.py +21 -0
  715. lfx/services/session.py +87 -0
  716. lfx/services/settings/__init__.py +3 -0
  717. lfx/services/settings/auth.py +133 -0
  718. lfx/services/settings/base.py +668 -0
  719. lfx/services/settings/constants.py +43 -0
  720. lfx/services/settings/factory.py +23 -0
  721. lfx/services/settings/feature_flags.py +11 -0
  722. lfx/services/settings/service.py +35 -0
  723. lfx/services/settings/utils.py +40 -0
  724. lfx/services/shared_component_cache/__init__.py +1 -0
  725. lfx/services/shared_component_cache/factory.py +30 -0
  726. lfx/services/shared_component_cache/service.py +9 -0
  727. lfx/services/storage/__init__.py +5 -0
  728. lfx/services/storage/local.py +185 -0
  729. lfx/services/storage/service.py +177 -0
  730. lfx/services/tracing/__init__.py +1 -0
  731. lfx/services/tracing/service.py +21 -0
  732. lfx/settings.py +6 -0
  733. lfx/template/__init__.py +6 -0
  734. lfx/template/field/__init__.py +0 -0
  735. lfx/template/field/base.py +260 -0
  736. lfx/template/field/prompt.py +15 -0
  737. lfx/template/frontend_node/__init__.py +6 -0
  738. lfx/template/frontend_node/base.py +214 -0
  739. lfx/template/frontend_node/constants.py +65 -0
  740. lfx/template/frontend_node/custom_components.py +79 -0
  741. lfx/template/template/__init__.py +0 -0
  742. lfx/template/template/base.py +100 -0
  743. lfx/template/utils.py +217 -0
  744. lfx/type_extraction/__init__.py +19 -0
  745. lfx/type_extraction/type_extraction.py +75 -0
  746. lfx/type_extraction.py +80 -0
  747. lfx/utils/__init__.py +1 -0
  748. lfx/utils/async_helpers.py +42 -0
  749. lfx/utils/component_utils.py +154 -0
  750. lfx/utils/concurrency.py +60 -0
  751. lfx/utils/connection_string_parser.py +11 -0
  752. lfx/utils/constants.py +233 -0
  753. lfx/utils/data_structure.py +212 -0
  754. lfx/utils/exceptions.py +22 -0
  755. lfx/utils/helpers.py +34 -0
  756. lfx/utils/image.py +79 -0
  757. lfx/utils/langflow_utils.py +52 -0
  758. lfx/utils/lazy_load.py +15 -0
  759. lfx/utils/request_utils.py +18 -0
  760. lfx/utils/schemas.py +139 -0
  761. lfx/utils/ssrf_protection.py +384 -0
  762. lfx/utils/util.py +626 -0
  763. lfx/utils/util_strings.py +56 -0
  764. lfx/utils/validate_cloud.py +26 -0
  765. lfx/utils/version.py +24 -0
  766. lfx_nightly-0.2.0.dev25.dist-info/METADATA +312 -0
  767. lfx_nightly-0.2.0.dev25.dist-info/RECORD +769 -0
  768. lfx_nightly-0.2.0.dev25.dist-info/WHEEL +4 -0
  769. lfx_nightly-0.2.0.dev25.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,810 @@
1
+ import ast
2
+ import shutil
3
+ import tarfile
4
+ from abc import ABC, abstractmethod
5
+ from io import BytesIO
6
+ from pathlib import Path
7
+ from tempfile import TemporaryDirectory
8
+ from typing import TYPE_CHECKING, Any
9
+ from zipfile import ZipFile, is_zipfile
10
+
11
+ import orjson
12
+ import pandas as pd
13
+
14
+ from lfx.base.data.storage_utils import get_file_size, read_file_bytes
15
+ from lfx.custom.custom_component.component import Component
16
+ from lfx.io import BoolInput, FileInput, HandleInput, Output, StrInput
17
+ from lfx.schema.data import Data
18
+ from lfx.schema.dataframe import DataFrame
19
+ from lfx.schema.message import Message
20
+ from lfx.services.deps import get_settings_service
21
+ from lfx.utils.async_helpers import run_until_complete
22
+ from lfx.utils.helpers import build_content_type_from_extension
23
+
24
+ if TYPE_CHECKING:
25
+ from collections.abc import Callable
26
+
27
+
28
+ class BaseFileComponent(Component, ABC):
29
+ """Base class for handling file processing components.
30
+
31
+ This class provides common functionality for resolving, validating, and
32
+ processing file paths. Child classes must define valid file extensions
33
+ and implement the `process_files` method.
34
+
35
+ # TODO: May want to subclass for local and remote files
36
+ """
37
+
38
+ class BaseFile:
39
+ """Internal class to represent a file with additional metadata."""
40
+
41
+ def __init__(
42
+ self,
43
+ data: Data | list[Data],
44
+ path: Path,
45
+ *,
46
+ delete_after_processing: bool = False,
47
+ silent_errors: bool = False,
48
+ ):
49
+ self._data = data if isinstance(data, list) else [data]
50
+ self.path = path
51
+ self.delete_after_processing = delete_after_processing
52
+ self._silent_errors = silent_errors
53
+
54
+ @property
55
+ def data(self) -> list[Data]:
56
+ return self._data or []
57
+
58
+ @data.setter
59
+ def data(self, value: Data | list[Data]):
60
+ if isinstance(value, Data):
61
+ self._data = [value]
62
+ elif isinstance(value, list) and all(isinstance(item, Data) for item in value):
63
+ self._data = value
64
+ else:
65
+ msg = f"data must be a Data object or a list of Data objects. Got: {type(value)}"
66
+ if not self._silent_errors:
67
+ raise ValueError(msg)
68
+
69
+ def merge_data(self, new_data: Data | list[Data] | None) -> list[Data]:
70
+ r"""Generate a new list of Data objects by merging `new_data` into the current `data`.
71
+
72
+ Args:
73
+ new_data (Data | list[Data] | None): The new Data object(s) to merge into each existing Data object.
74
+ If None, the current `data` is returned unchanged.
75
+
76
+ Returns:
77
+ list[Data]: A new list of Data objects with `new_data` merged.
78
+ """
79
+ if new_data is None:
80
+ return self.data
81
+
82
+ if isinstance(new_data, Data):
83
+ new_data_list = [new_data]
84
+ elif isinstance(new_data, list) and all(isinstance(item, Data) for item in new_data):
85
+ new_data_list = new_data
86
+ else:
87
+ msg = "new_data must be a Data object, a list of Data objects, or None."
88
+ if not self._silent_errors:
89
+ raise ValueError(msg)
90
+ return self.data
91
+
92
+ return [
93
+ Data(data={**data.data, **new_data_item.data}) for data in self.data for new_data_item in new_data_list
94
+ ]
95
+
96
+ def __str__(self):
97
+ if len(self.data) == 0:
98
+ text_preview = ""
99
+ elif len(self.data) == 1:
100
+ max_text_length = 50
101
+ text_preview = self.data.get_text()[:max_text_length]
102
+ if len(self.data.get_text()) > max_text_length:
103
+ text_preview += "..."
104
+ text_preview = f"text_preview='{text_preview}'"
105
+ else:
106
+ text_preview = f"{len(self.data)} data objects"
107
+ return f"BaseFile(path={self.path}, delete_after_processing={self.delete_after_processing}, {text_preview}"
108
+
109
+ # Subclasses can override these class variables
110
+ VALID_EXTENSIONS: list[str] = [] # To be overridden by child classes
111
+ IGNORE_STARTS_WITH = [".", "__MACOSX"]
112
+
113
+ SERVER_FILE_PATH_FIELDNAME = "file_path"
114
+ SUPPORTED_BUNDLE_EXTENSIONS = ["zip", "tar", "tgz", "bz2", "gz"]
115
+
116
+ def __init__(self, *args, **kwargs):
117
+ super().__init__(*args, **kwargs)
118
+ # Dynamically update FileInput to include valid extensions and bundles
119
+ self.get_base_inputs()[0].file_types = [
120
+ *self.valid_extensions,
121
+ *self.SUPPORTED_BUNDLE_EXTENSIONS,
122
+ ]
123
+
124
+ file_types = ", ".join(self.valid_extensions)
125
+ bundles = ", ".join(self.SUPPORTED_BUNDLE_EXTENSIONS)
126
+ self.get_base_inputs()[
127
+ 0
128
+ ].info = f"Supported file extensions: {file_types}; optionally bundled in file extensions: {bundles}"
129
+
130
+ _base_inputs = [
131
+ FileInput(
132
+ name="path",
133
+ display_name="Files",
134
+ fileTypes=[], # Dynamically set in __init__
135
+ info="", # Dynamically set in __init__
136
+ required=False,
137
+ list=True,
138
+ value=[],
139
+ tool_mode=True,
140
+ ),
141
+ HandleInput(
142
+ name="file_path",
143
+ display_name="Server File Path",
144
+ info=(
145
+ f"Data object with a '{SERVER_FILE_PATH_FIELDNAME}' property pointing to server file"
146
+ " or a Message object with a path to the file. Supercedes 'Path' but supports same file types."
147
+ ),
148
+ required=False,
149
+ input_types=["Data", "Message"],
150
+ is_list=True,
151
+ advanced=True,
152
+ ),
153
+ StrInput(
154
+ name="separator",
155
+ display_name="Separator",
156
+ value="\n\n",
157
+ show=True,
158
+ info="Specify the separator to use between multiple outputs in Message format.",
159
+ advanced=True,
160
+ ),
161
+ BoolInput(
162
+ name="silent_errors",
163
+ display_name="Silent Errors",
164
+ advanced=True,
165
+ info="If true, errors will not raise an exception.",
166
+ ),
167
+ BoolInput(
168
+ name="delete_server_file_after_processing",
169
+ display_name="Delete Server File After Processing",
170
+ advanced=True,
171
+ value=True,
172
+ info="If true, the Server File Path will be deleted after processing.",
173
+ ),
174
+ BoolInput(
175
+ name="ignore_unsupported_extensions",
176
+ display_name="Ignore Unsupported Extensions",
177
+ advanced=True,
178
+ value=True,
179
+ info="If true, files with unsupported extensions will not be processed.",
180
+ ),
181
+ BoolInput(
182
+ name="ignore_unspecified_files",
183
+ display_name="Ignore Unspecified Files",
184
+ advanced=True,
185
+ value=False,
186
+ info=f"If true, Data with no '{SERVER_FILE_PATH_FIELDNAME}' property will be ignored.",
187
+ ),
188
+ ]
189
+
190
+ _base_outputs = [
191
+ Output(display_name="Files", name="dataframe", method="load_files"),
192
+ ]
193
+
194
+ @abstractmethod
195
+ def process_files(self, file_list: list[BaseFile]) -> list[BaseFile]:
196
+ """Processes a list of files.
197
+
198
+ Args:
199
+ file_list (list[BaseFile]): A list of file objects.
200
+
201
+ Returns:
202
+ list[BaseFile]: A list of BaseFile objects with updated `data`.
203
+ """
204
+
205
+ def load_files_base(self) -> list[Data]:
206
+ """Loads and parses file(s), including unpacked file bundles.
207
+
208
+ Returns:
209
+ list[Data]: Parsed data from the processed files.
210
+ """
211
+ self._temp_dirs: list[TemporaryDirectory] = []
212
+ final_files = [] # Initialize to avoid UnboundLocalError
213
+ try:
214
+ # Step 1: Validate the provided paths
215
+ files = self._validate_and_resolve_paths()
216
+
217
+ # Step 2: Handle bundles recursively
218
+ all_files = self._unpack_and_collect_files(files)
219
+
220
+ # Step 3: Final validation of file types
221
+ final_files = self._filter_and_mark_files(all_files)
222
+
223
+ # Step 4: Process files
224
+ processed_files = self.process_files(final_files)
225
+
226
+ # Extract and flatten Data objects to return
227
+ return [data for file in processed_files for data in file.data if file.data]
228
+
229
+ finally:
230
+ # Delete temporary directories
231
+ for temp_dir in self._temp_dirs:
232
+ temp_dir.cleanup()
233
+ # Delete files marked for deletion
234
+ for file in final_files:
235
+ if file.delete_after_processing and file.path.exists():
236
+ if file.path.is_dir():
237
+ shutil.rmtree(file.path)
238
+ else:
239
+ file.path.unlink()
240
+
241
+ def load_files_core(self) -> list[Data]:
242
+ """Load files and return as Data objects.
243
+
244
+ Returns:
245
+ list[Data]: List of Data objects from all files
246
+ """
247
+ data_list = self.load_files_base()
248
+ if not data_list:
249
+ return [Data()]
250
+ return data_list
251
+
252
+ def _extract_file_metadata(self, data_item) -> dict:
253
+ """Extract metadata from a data item with file_path."""
254
+ metadata: dict[str, Any] = {}
255
+ if not hasattr(data_item, "file_path"):
256
+ return metadata
257
+
258
+ file_path = data_item.file_path
259
+ file_path_obj = Path(file_path)
260
+ filename = file_path_obj.name
261
+
262
+ settings = get_settings_service().settings
263
+
264
+ # Get file size - use storage service for S3, filesystem for local
265
+ if settings.storage_type == "s3":
266
+ try:
267
+ file_size = get_file_size(file_path)
268
+ except (FileNotFoundError, ValueError):
269
+ # If we can't get file size, set to 0 or omit
270
+ file_size = 0
271
+ else:
272
+ try:
273
+ file_size_stat = file_path_obj.stat()
274
+ file_size = file_size_stat.st_size
275
+ except OSError:
276
+ file_size = 0
277
+
278
+ # Basic file metadata
279
+ metadata["filename"] = filename
280
+ metadata["file_size"] = file_size
281
+
282
+ # Add MIME type from extension
283
+ extension = filename.split(".")[-1]
284
+ if extension:
285
+ metadata["mimetype"] = build_content_type_from_extension(extension)
286
+
287
+ # Copy additional metadata from data if available
288
+ if hasattr(data_item, "data") and isinstance(data_item.data, dict):
289
+ metadata_fields = ["mimetype", "file_size", "created_time", "modified_time"]
290
+ for field in metadata_fields:
291
+ if field in data_item.data:
292
+ metadata[field] = data_item.data[field]
293
+
294
+ return metadata
295
+
296
+ def _extract_text(self, data_item) -> str:
297
+ """Extract text content from a data item."""
298
+ if isinstance(data_item.data, dict):
299
+ text = getattr(data_item, "get_text", lambda: None)() or data_item.data.get("text")
300
+ return text if text is not None else str(data_item)
301
+ return str(data_item)
302
+
303
+ def load_files_message(self) -> Message:
304
+ """Load files and return as Message.
305
+
306
+ Returns:
307
+ Message: Message containing all file data
308
+ """
309
+ data_list = self.load_files_core()
310
+ if not data_list:
311
+ return Message()
312
+
313
+ # Extract metadata from the first data item
314
+ metadata = self._extract_file_metadata(data_list[0])
315
+
316
+ sep: str = getattr(self, "separator", "\n\n") or "\n\n"
317
+ parts: list[str] = []
318
+ for d in data_list:
319
+ try:
320
+ data_text = self._extract_text(d)
321
+ if data_text and isinstance(data_text, str):
322
+ parts.append(data_text)
323
+ elif data_text:
324
+ # get_text() returned non-string, convert it
325
+ parts.append(str(data_text))
326
+ elif isinstance(d.data, dict):
327
+ # convert the data dict to a readable string
328
+ parts.append(orjson.dumps(d.data, option=orjson.OPT_INDENT_2, default=str).decode())
329
+ else:
330
+ parts.append(str(d))
331
+ except Exception: # noqa: BLE001
332
+ # Final fallback - just try to convert to string
333
+ # TODO: Consider downstream error case more. Should this raise an error?
334
+ parts.append(str(d))
335
+
336
+ return Message(text=sep.join(parts), **metadata)
337
+
338
+ def load_files_path(self) -> Message:
339
+ """Returns a Message containing file paths from loaded files.
340
+
341
+ Returns:
342
+ Message: Message containing file paths
343
+ """
344
+ files = self._validate_and_resolve_paths()
345
+ settings = get_settings_service().settings
346
+
347
+ # For S3 storage, paths are virtual storage keys that don't exist on the local filesystem.
348
+ # Skip the exists() check for S3 files to preserve them in the output.
349
+ # Validation of S3 file existence is deferred until file processing (see _validate_and_resolve_paths).
350
+ # If a file was removed from S3, it will fail when attempting to read/process it later.
351
+ if settings.storage_type == "s3":
352
+ paths = [file.path.as_posix() for file in files]
353
+ else:
354
+ paths = [file.path.as_posix() for file in files if file.path.exists()]
355
+
356
+ return Message(text="\n".join(paths) if paths else "")
357
+
358
+ def load_files_structured_helper(self, file_path: str) -> list[dict] | None:
359
+ if not file_path:
360
+ return None
361
+
362
+ # Get file extension in lowercase
363
+ ext = Path(file_path).suffix.lower()
364
+
365
+ settings = get_settings_service().settings
366
+
367
+ # For S3 storage, download file bytes first
368
+ if settings.storage_type == "s3":
369
+ # Download file content from S3
370
+ content = run_until_complete(read_file_bytes(file_path))
371
+
372
+ # Map file extensions to pandas read functions that support BytesIO
373
+ if ext == ".csv":
374
+ result = pd.read_csv(BytesIO(content))
375
+ elif ext == ".xlsx":
376
+ result = pd.read_excel(BytesIO(content))
377
+ elif ext == ".parquet":
378
+ result = pd.read_parquet(BytesIO(content))
379
+ else:
380
+ return None
381
+
382
+ return result.to_dict("records")
383
+
384
+ # Local storage - read directly from filesystem
385
+ file_readers: dict[str, Callable[[str], pd.DataFrame]] = {
386
+ ".csv": pd.read_csv,
387
+ ".xlsx": pd.read_excel,
388
+ ".parquet": pd.read_parquet,
389
+ # TODO: sqlite and json support?
390
+ }
391
+
392
+ # Get the appropriate reader function or None
393
+ reader = file_readers.get(ext)
394
+
395
+ if reader:
396
+ result = reader(file_path) # MyPy now knows reader is callable
397
+ return result.to_dict("records")
398
+
399
+ return None
400
+
401
+ def load_files_structured(self) -> DataFrame:
402
+ """Load files and return as DataFrame with structured content.
403
+
404
+ Returns:
405
+ DataFrame: DataFrame containing structured content from all files
406
+ """
407
+ data_list = self.load_files_core()
408
+ if not data_list:
409
+ return DataFrame()
410
+
411
+ # Get the file path from the first Data object
412
+ file_path = data_list[0].data.get(self.SERVER_FILE_PATH_FIELDNAME, None)
413
+
414
+ # If file_path is provided and is a CSV, read it directly
415
+ if file_path and str(file_path).lower().endswith((".csv", ".xlsx", ".parquet")):
416
+ rows = self.load_files_structured_helper(file_path)
417
+ else:
418
+ # Convert Data objects to a list of dictionaries
419
+ # TODO: Parse according to docling standards
420
+ rows = [data_list[0].data]
421
+
422
+ self.status = DataFrame(rows)
423
+
424
+ return DataFrame(rows)
425
+
426
+ def parse_string_to_dict(self, s: str) -> dict:
427
+ # Try JSON first (handles true/false/null)
428
+ try:
429
+ result = orjson.loads(s)
430
+ if isinstance(result, dict):
431
+ return result
432
+ except orjson.JSONDecodeError:
433
+ pass
434
+
435
+ # Fall back to Python literal evaluation
436
+ try:
437
+ result = ast.literal_eval(s)
438
+ if isinstance(result, dict):
439
+ return result
440
+ except (SyntaxError, ValueError):
441
+ pass
442
+
443
+ # If all parsing fails, return the fallback
444
+ return {"value": s}
445
+
446
+ def load_files_json(self) -> Data:
447
+ """Load files and return as a single Data object containing JSON content.
448
+
449
+ Returns:
450
+ Data: Data object containing JSON content from all files
451
+ """
452
+ data_list = self.load_files_core()
453
+ if not data_list:
454
+ return Data()
455
+
456
+ # Grab the JSON data
457
+ json_data = data_list[0].data[data_list[0].text_key]
458
+ json_data = self.parse_string_to_dict(json_data)
459
+
460
+ self.status = Data(data=json_data)
461
+
462
+ return Data(data=json_data)
463
+
464
+ def load_files(self) -> DataFrame:
465
+ """Load files and return as DataFrame.
466
+
467
+ Returns:
468
+ DataFrame: DataFrame containing all file data
469
+ """
470
+ data_list = self.load_files_core()
471
+ if not data_list:
472
+ return DataFrame()
473
+
474
+ # Convert Data objects to a list of dictionaries
475
+ all_rows = []
476
+ for data in data_list:
477
+ file_path = data.data.get(self.SERVER_FILE_PATH_FIELDNAME)
478
+ row = dict(data.data) if data.data else {}
479
+
480
+ # Add text if available, otherwise use the data's text property
481
+ if "text" in data.data:
482
+ row["text"] = data.data["text"]
483
+ if file_path:
484
+ row["file_path"] = file_path
485
+ all_rows.append(row)
486
+
487
+ self.status = DataFrame(all_rows)
488
+
489
+ return DataFrame(all_rows)
490
+
491
+ @property
492
+ def valid_extensions(self) -> list[str]:
493
+ """Returns valid file extensions for the class.
494
+
495
+ This property can be overridden by child classes to provide specific
496
+ extensions.
497
+
498
+ Returns:
499
+ list[str]: A list of valid file extensions without the leading dot.
500
+ """
501
+ return self.VALID_EXTENSIONS
502
+
503
+ @property
504
+ def ignore_starts_with(self) -> list[str]:
505
+ """Returns prefixes to ignore when unpacking file bundles.
506
+
507
+ Returns:
508
+ list[str]: A list of prefixes to ignore when unpacking file bundles.
509
+ """
510
+ return self.IGNORE_STARTS_WITH
511
+
512
+ def rollup_data(
513
+ self,
514
+ base_files: list[BaseFile],
515
+ data_list: list[Data | None],
516
+ path_field: str = SERVER_FILE_PATH_FIELDNAME,
517
+ ) -> list[BaseFile]:
518
+ r"""Rolls up Data objects into corresponding BaseFile objects in order given by `base_files`.
519
+
520
+ Args:
521
+ base_files (list[BaseFile]): The original BaseFile objects.
522
+ data_list (list[Data | None]): The list of data to be aggregated into the BaseFile objects.
523
+ path_field (str): The field name on the data_list objects that holds the file path as a string.
524
+
525
+ Returns:
526
+ list[BaseFile]: A new list of BaseFile objects with merged `data` attributes.
527
+ """
528
+
529
+ def _build_data_dict(data_list: list[Data | None], data_list_field: str) -> dict[str, list[Data]]:
530
+ """Builds a dictionary grouping Data objects by a specified field."""
531
+ data_dict: dict[str, list[Data]] = {}
532
+ for data in data_list:
533
+ if data is None:
534
+ continue
535
+ key = data.data.get(data_list_field)
536
+ if key is None:
537
+ msg = f"Data object missing required field '{data_list_field}': {data}"
538
+ self.log(msg)
539
+ if not self.silent_errors:
540
+ msg = f"Data object missing required field '{data_list_field}': {data}"
541
+ self.log(msg)
542
+ raise ValueError(msg)
543
+ continue
544
+ data_dict.setdefault(key, []).append(data)
545
+ return data_dict
546
+
547
+ # Build the data dictionary from the provided data_list
548
+ data_dict = _build_data_dict(data_list, path_field)
549
+
550
+ # Generate the updated list of BaseFile objects, preserving the order of base_files
551
+ updated_base_files = []
552
+ for base_file in base_files:
553
+ new_data_list = data_dict.get(str(base_file.path), [])
554
+ merged_data_list = base_file.merge_data(new_data_list)
555
+ updated_base_files.append(
556
+ BaseFileComponent.BaseFile(
557
+ data=merged_data_list,
558
+ path=base_file.path,
559
+ delete_after_processing=base_file.delete_after_processing,
560
+ )
561
+ )
562
+
563
+ return updated_base_files
564
+
565
+ def _file_path_as_list(self) -> list[Data]:
566
+ file_path = self.file_path
567
+ if not file_path:
568
+ return []
569
+
570
+ def _message_to_data(message: Message) -> Data:
571
+ return Data(**{self.SERVER_FILE_PATH_FIELDNAME: message.text})
572
+
573
+ if isinstance(file_path, Data):
574
+ file_path = [file_path]
575
+ elif isinstance(file_path, Message):
576
+ file_path = [_message_to_data(file_path)]
577
+ elif not isinstance(file_path, list):
578
+ msg = f"Expected list of Data objects in file_path but got {type(file_path)}."
579
+ self.log(msg)
580
+ if not self.silent_errors:
581
+ raise ValueError(msg)
582
+ return []
583
+
584
+ file_paths = []
585
+ for obj in file_path:
586
+ data_obj = _message_to_data(obj) if isinstance(obj, Message) else obj
587
+
588
+ if not isinstance(data_obj, Data):
589
+ msg = f"Expected Data object in file_path but got {type(data_obj)}."
590
+ self.log(msg)
591
+ if not self.silent_errors:
592
+ raise ValueError(msg)
593
+ continue
594
+ file_paths.append(data_obj)
595
+
596
+ return file_paths
597
+
598
+ def _validate_and_resolve_paths(self) -> list[BaseFile]:
599
+ """Validate that all input paths exist and are valid, and create BaseFile instances.
600
+
601
+ Returns:
602
+ list[BaseFile]: A list of valid BaseFile instances.
603
+
604
+ Raises:
605
+ ValueError: If any path does not exist.
606
+ """
607
+ resolved_files = []
608
+
609
+ def add_file(data: Data, path: str | Path, *, delete_after_processing: bool):
610
+ path_str = str(path)
611
+ settings = get_settings_service().settings
612
+
613
+ # When using object storage (S3), file paths are storage keys (e.g., "<flow_id>/<filename>")
614
+ # that don't exist on the local filesystem. We defer validation until file processing.
615
+ # For local storage, validate the file exists immediately to fail fast.
616
+ if settings.storage_type == "s3":
617
+ resolved_files.append(
618
+ BaseFileComponent.BaseFile(data, Path(path_str), delete_after_processing=delete_after_processing)
619
+ )
620
+ else:
621
+ resolved_path = Path(self.resolve_path(path_str))
622
+ if not resolved_path.exists():
623
+ msg = f"File or directory not found: {path}"
624
+ self.log(msg)
625
+ if not self.silent_errors:
626
+ raise ValueError(msg)
627
+ resolved_files.append(
628
+ BaseFileComponent.BaseFile(data, resolved_path, delete_after_processing=delete_after_processing)
629
+ )
630
+
631
+ file_path = self._file_path_as_list()
632
+
633
+ if self.path and not file_path:
634
+ # Wrap self.path into a Data object
635
+ if isinstance(self.path, list):
636
+ for path in self.path:
637
+ data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: path})
638
+ add_file(data=data_obj, path=path, delete_after_processing=False)
639
+ else:
640
+ data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: self.path})
641
+ add_file(data=data_obj, path=self.path, delete_after_processing=False)
642
+ elif file_path:
643
+ for obj in file_path:
644
+ server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)
645
+ if server_file_path:
646
+ add_file(
647
+ data=obj,
648
+ path=server_file_path,
649
+ delete_after_processing=self.delete_server_file_after_processing,
650
+ )
651
+ elif not self.ignore_unspecified_files:
652
+ msg = f"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property."
653
+ self.log(msg)
654
+ if not self.silent_errors:
655
+ raise ValueError(msg)
656
+ else:
657
+ msg = f"Ignoring Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property:\n{obj}"
658
+ self.log(msg)
659
+
660
+ return resolved_files
661
+
662
+ def _unpack_and_collect_files(self, files: list[BaseFile]) -> list[BaseFile]:
663
+ """Recursively unpack bundles and collect files into BaseFile instances.
664
+
665
+ Args:
666
+ files (list[BaseFile]): List of BaseFile instances to process.
667
+
668
+ Returns:
669
+ list[BaseFile]: Updated list of BaseFile instances.
670
+ """
671
+ collected_files = []
672
+
673
+ for file in files:
674
+ path = file.path
675
+ delete_after_processing = file.delete_after_processing
676
+ data = file.data
677
+
678
+ if path.is_dir():
679
+ # Recurse into directories
680
+ collected_files.extend(
681
+ [
682
+ BaseFileComponent.BaseFile(
683
+ data,
684
+ sub_path,
685
+ delete_after_processing=delete_after_processing,
686
+ )
687
+ for sub_path in path.rglob("*")
688
+ if sub_path.is_file()
689
+ ]
690
+ )
691
+ elif path.suffix[1:] in self.SUPPORTED_BUNDLE_EXTENSIONS:
692
+ # Unpack supported bundles
693
+ temp_dir = TemporaryDirectory()
694
+ self._temp_dirs.append(temp_dir)
695
+ temp_dir_path = Path(temp_dir.name)
696
+ self._unpack_bundle(path, temp_dir_path)
697
+ subpaths = list(temp_dir_path.iterdir())
698
+ self.log(f"Unpacked bundle {path.name} into {subpaths}")
699
+ collected_files.extend(
700
+ [
701
+ BaseFileComponent.BaseFile(
702
+ data,
703
+ sub_path,
704
+ delete_after_processing=delete_after_processing,
705
+ )
706
+ for sub_path in subpaths
707
+ ]
708
+ )
709
+ else:
710
+ collected_files.append(file)
711
+
712
+ # Recurse again if any directories or bundles are left in the list
713
+ if any(
714
+ file.path.is_dir() or file.path.suffix[1:] in self.SUPPORTED_BUNDLE_EXTENSIONS for file in collected_files
715
+ ):
716
+ return self._unpack_and_collect_files(collected_files)
717
+
718
+ return collected_files
719
+
720
+ def _unpack_bundle(self, bundle_path: Path, output_dir: Path):
721
+ """Unpack a bundle into a temporary directory.
722
+
723
+ Args:
724
+ bundle_path (Path): Path to the bundle.
725
+ output_dir (Path): Directory where files will be extracted.
726
+
727
+ Raises:
728
+ ValueError: If the bundle format is unsupported or cannot be read.
729
+ """
730
+
731
+ def _safe_extract_zip(bundle: ZipFile, output_dir: Path):
732
+ """Safely extract ZIP files."""
733
+ for member in bundle.namelist():
734
+ # Filter out resource fork information for automatic production of mac
735
+ if Path(member).name.startswith("._"):
736
+ continue
737
+ member_path = output_dir / member
738
+ # Ensure no path traversal outside `output_dir`
739
+ if not member_path.resolve().is_relative_to(output_dir.resolve()):
740
+ msg = f"Attempted Path Traversal in ZIP File: {member}"
741
+ raise ValueError(msg)
742
+ bundle.extract(member, path=output_dir)
743
+
744
+ def _safe_extract_tar(bundle: tarfile.TarFile, output_dir: Path):
745
+ """Safely extract TAR files."""
746
+ for member in bundle.getmembers():
747
+ # Filter out resource fork information for automatic production of mac
748
+ if Path(member.name).name.startswith("._"):
749
+ continue
750
+ member_path = output_dir / member.name
751
+ # Ensure no path traversal outside `output_dir`
752
+ if not member_path.resolve().is_relative_to(output_dir.resolve()):
753
+ msg = f"Attempted Path Traversal in TAR File: {member.name}"
754
+ raise ValueError(msg)
755
+ bundle.extract(member, path=output_dir)
756
+
757
+ # Check and extract based on file type
758
+ if is_zipfile(bundle_path):
759
+ with ZipFile(bundle_path, "r") as zip_bundle:
760
+ _safe_extract_zip(zip_bundle, output_dir)
761
+ elif tarfile.is_tarfile(bundle_path):
762
+ with tarfile.open(bundle_path, "r:*") as tar_bundle:
763
+ _safe_extract_tar(tar_bundle, output_dir)
764
+ else:
765
+ msg = f"Unsupported bundle format: {bundle_path.suffix}"
766
+ raise ValueError(msg)
767
+
768
+ def _filter_and_mark_files(self, files: list[BaseFile]) -> list[BaseFile]:
769
+ """Validate file types and filter out invalid files.
770
+
771
+ Args:
772
+ files (list[BaseFile]): List of BaseFile instances.
773
+
774
+ Returns:
775
+ list[BaseFile]: Validated BaseFile instances.
776
+
777
+ Raises:
778
+ ValueError: If unsupported files are encountered and `ignore_unsupported_extensions` is False.
779
+ """
780
+ settings = get_settings_service().settings
781
+ is_s3_storage = settings.storage_type == "s3"
782
+ final_files = []
783
+ ignored_files = []
784
+
785
+ for file in files:
786
+ # For local storage, verify the path is actually a file
787
+ # For S3 storage, paths are virtual keys that don't exist locally
788
+ if not is_s3_storage and not file.path.is_file():
789
+ self.log(f"Not a file: {file.path.name}")
790
+ continue
791
+
792
+ # Validate file extension
793
+ extension = file.path.suffix[1:].lower() if file.path.suffix else ""
794
+ if extension not in self.valid_extensions:
795
+ # For local storage, optionally ignore unsupported extensions
796
+ if not is_s3_storage and self.ignore_unsupported_extensions:
797
+ ignored_files.append(file.path.name)
798
+ continue
799
+
800
+ msg = f"Unsupported file extension: {file.path.suffix}"
801
+ self.log(msg)
802
+ if not self.silent_errors:
803
+ raise ValueError(msg)
804
+
805
+ final_files.append(file)
806
+
807
+ if ignored_files:
808
+ self.log(f"Ignored files: {ignored_files}")
809
+
810
+ return final_files