lfx-nightly 0.1.11.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (699) hide show
  1. lfx/__init__.py +0 -0
  2. lfx/__main__.py +25 -0
  3. lfx/base/__init__.py +0 -0
  4. lfx/base/agents/__init__.py +0 -0
  5. lfx/base/agents/agent.py +268 -0
  6. lfx/base/agents/callback.py +130 -0
  7. lfx/base/agents/context.py +109 -0
  8. lfx/base/agents/crewai/__init__.py +0 -0
  9. lfx/base/agents/crewai/crew.py +231 -0
  10. lfx/base/agents/crewai/tasks.py +12 -0
  11. lfx/base/agents/default_prompts.py +23 -0
  12. lfx/base/agents/errors.py +15 -0
  13. lfx/base/agents/events.py +346 -0
  14. lfx/base/agents/utils.py +205 -0
  15. lfx/base/astra_assistants/__init__.py +0 -0
  16. lfx/base/astra_assistants/util.py +171 -0
  17. lfx/base/chains/__init__.py +0 -0
  18. lfx/base/chains/model.py +19 -0
  19. lfx/base/composio/__init__.py +0 -0
  20. lfx/base/composio/composio_base.py +1291 -0
  21. lfx/base/compressors/__init__.py +0 -0
  22. lfx/base/compressors/model.py +60 -0
  23. lfx/base/constants.py +46 -0
  24. lfx/base/curl/__init__.py +0 -0
  25. lfx/base/curl/parse.py +188 -0
  26. lfx/base/data/__init__.py +5 -0
  27. lfx/base/data/base_file.py +685 -0
  28. lfx/base/data/docling_utils.py +245 -0
  29. lfx/base/data/utils.py +198 -0
  30. lfx/base/document_transformers/__init__.py +0 -0
  31. lfx/base/document_transformers/model.py +43 -0
  32. lfx/base/embeddings/__init__.py +0 -0
  33. lfx/base/embeddings/aiml_embeddings.py +62 -0
  34. lfx/base/embeddings/model.py +26 -0
  35. lfx/base/flow_processing/__init__.py +0 -0
  36. lfx/base/flow_processing/utils.py +86 -0
  37. lfx/base/huggingface/__init__.py +0 -0
  38. lfx/base/huggingface/model_bridge.py +133 -0
  39. lfx/base/io/__init__.py +0 -0
  40. lfx/base/io/chat.py +20 -0
  41. lfx/base/io/text.py +22 -0
  42. lfx/base/langchain_utilities/__init__.py +0 -0
  43. lfx/base/langchain_utilities/model.py +35 -0
  44. lfx/base/langchain_utilities/spider_constants.py +1 -0
  45. lfx/base/langwatch/__init__.py +0 -0
  46. lfx/base/langwatch/utils.py +18 -0
  47. lfx/base/mcp/__init__.py +0 -0
  48. lfx/base/mcp/constants.py +2 -0
  49. lfx/base/mcp/util.py +1398 -0
  50. lfx/base/memory/__init__.py +0 -0
  51. lfx/base/memory/memory.py +49 -0
  52. lfx/base/memory/model.py +38 -0
  53. lfx/base/models/__init__.py +3 -0
  54. lfx/base/models/aiml_constants.py +51 -0
  55. lfx/base/models/anthropic_constants.py +47 -0
  56. lfx/base/models/aws_constants.py +151 -0
  57. lfx/base/models/chat_result.py +76 -0
  58. lfx/base/models/google_generative_ai_constants.py +70 -0
  59. lfx/base/models/groq_constants.py +134 -0
  60. lfx/base/models/model.py +375 -0
  61. lfx/base/models/model_input_constants.py +307 -0
  62. lfx/base/models/model_metadata.py +41 -0
  63. lfx/base/models/model_utils.py +8 -0
  64. lfx/base/models/novita_constants.py +35 -0
  65. lfx/base/models/ollama_constants.py +49 -0
  66. lfx/base/models/openai_constants.py +122 -0
  67. lfx/base/models/sambanova_constants.py +18 -0
  68. lfx/base/processing/__init__.py +0 -0
  69. lfx/base/prompts/__init__.py +0 -0
  70. lfx/base/prompts/api_utils.py +224 -0
  71. lfx/base/prompts/utils.py +61 -0
  72. lfx/base/textsplitters/__init__.py +0 -0
  73. lfx/base/textsplitters/model.py +28 -0
  74. lfx/base/tools/__init__.py +0 -0
  75. lfx/base/tools/base.py +26 -0
  76. lfx/base/tools/component_tool.py +325 -0
  77. lfx/base/tools/constants.py +49 -0
  78. lfx/base/tools/flow_tool.py +132 -0
  79. lfx/base/tools/run_flow.py +224 -0
  80. lfx/base/vectorstores/__init__.py +0 -0
  81. lfx/base/vectorstores/model.py +193 -0
  82. lfx/base/vectorstores/utils.py +22 -0
  83. lfx/base/vectorstores/vector_store_connection_decorator.py +52 -0
  84. lfx/cli/__init__.py +5 -0
  85. lfx/cli/commands.py +319 -0
  86. lfx/cli/common.py +650 -0
  87. lfx/cli/run.py +441 -0
  88. lfx/cli/script_loader.py +247 -0
  89. lfx/cli/serve_app.py +546 -0
  90. lfx/cli/validation.py +69 -0
  91. lfx/components/FAISS/__init__.py +34 -0
  92. lfx/components/FAISS/faiss.py +111 -0
  93. lfx/components/Notion/__init__.py +19 -0
  94. lfx/components/Notion/add_content_to_page.py +269 -0
  95. lfx/components/Notion/create_page.py +94 -0
  96. lfx/components/Notion/list_database_properties.py +68 -0
  97. lfx/components/Notion/list_pages.py +122 -0
  98. lfx/components/Notion/list_users.py +77 -0
  99. lfx/components/Notion/page_content_viewer.py +93 -0
  100. lfx/components/Notion/search.py +111 -0
  101. lfx/components/Notion/update_page_property.py +114 -0
  102. lfx/components/__init__.py +411 -0
  103. lfx/components/_importing.py +42 -0
  104. lfx/components/agentql/__init__.py +3 -0
  105. lfx/components/agentql/agentql_api.py +151 -0
  106. lfx/components/agents/__init__.py +34 -0
  107. lfx/components/agents/agent.py +558 -0
  108. lfx/components/agents/mcp_component.py +501 -0
  109. lfx/components/aiml/__init__.py +37 -0
  110. lfx/components/aiml/aiml.py +112 -0
  111. lfx/components/aiml/aiml_embeddings.py +37 -0
  112. lfx/components/amazon/__init__.py +36 -0
  113. lfx/components/amazon/amazon_bedrock_embedding.py +109 -0
  114. lfx/components/amazon/amazon_bedrock_model.py +124 -0
  115. lfx/components/amazon/s3_bucket_uploader.py +211 -0
  116. lfx/components/anthropic/__init__.py +34 -0
  117. lfx/components/anthropic/anthropic.py +187 -0
  118. lfx/components/apify/__init__.py +5 -0
  119. lfx/components/apify/apify_actor.py +325 -0
  120. lfx/components/arxiv/__init__.py +3 -0
  121. lfx/components/arxiv/arxiv.py +163 -0
  122. lfx/components/assemblyai/__init__.py +46 -0
  123. lfx/components/assemblyai/assemblyai_get_subtitles.py +83 -0
  124. lfx/components/assemblyai/assemblyai_lemur.py +183 -0
  125. lfx/components/assemblyai/assemblyai_list_transcripts.py +95 -0
  126. lfx/components/assemblyai/assemblyai_poll_transcript.py +72 -0
  127. lfx/components/assemblyai/assemblyai_start_transcript.py +188 -0
  128. lfx/components/azure/__init__.py +37 -0
  129. lfx/components/azure/azure_openai.py +95 -0
  130. lfx/components/azure/azure_openai_embeddings.py +83 -0
  131. lfx/components/baidu/__init__.py +32 -0
  132. lfx/components/baidu/baidu_qianfan_chat.py +113 -0
  133. lfx/components/bing/__init__.py +3 -0
  134. lfx/components/bing/bing_search_api.py +61 -0
  135. lfx/components/cassandra/__init__.py +40 -0
  136. lfx/components/cassandra/cassandra.py +264 -0
  137. lfx/components/cassandra/cassandra_chat.py +92 -0
  138. lfx/components/cassandra/cassandra_graph.py +238 -0
  139. lfx/components/chains/__init__.py +3 -0
  140. lfx/components/chroma/__init__.py +34 -0
  141. lfx/components/chroma/chroma.py +167 -0
  142. lfx/components/cleanlab/__init__.py +40 -0
  143. lfx/components/cleanlab/cleanlab_evaluator.py +155 -0
  144. lfx/components/cleanlab/cleanlab_rag_evaluator.py +254 -0
  145. lfx/components/cleanlab/cleanlab_remediator.py +131 -0
  146. lfx/components/clickhouse/__init__.py +34 -0
  147. lfx/components/clickhouse/clickhouse.py +135 -0
  148. lfx/components/cloudflare/__init__.py +32 -0
  149. lfx/components/cloudflare/cloudflare.py +81 -0
  150. lfx/components/cohere/__init__.py +40 -0
  151. lfx/components/cohere/cohere_embeddings.py +81 -0
  152. lfx/components/cohere/cohere_models.py +46 -0
  153. lfx/components/cohere/cohere_rerank.py +51 -0
  154. lfx/components/composio/__init__.py +74 -0
  155. lfx/components/composio/composio_api.py +268 -0
  156. lfx/components/composio/dropbox_compnent.py +11 -0
  157. lfx/components/composio/github_composio.py +11 -0
  158. lfx/components/composio/gmail_composio.py +38 -0
  159. lfx/components/composio/googlecalendar_composio.py +11 -0
  160. lfx/components/composio/googlemeet_composio.py +11 -0
  161. lfx/components/composio/googletasks_composio.py +8 -0
  162. lfx/components/composio/linear_composio.py +11 -0
  163. lfx/components/composio/outlook_composio.py +11 -0
  164. lfx/components/composio/reddit_composio.py +11 -0
  165. lfx/components/composio/slack_composio.py +582 -0
  166. lfx/components/composio/slackbot_composio.py +11 -0
  167. lfx/components/composio/supabase_composio.py +11 -0
  168. lfx/components/composio/todoist_composio.py +11 -0
  169. lfx/components/composio/youtube_composio.py +11 -0
  170. lfx/components/confluence/__init__.py +3 -0
  171. lfx/components/confluence/confluence.py +84 -0
  172. lfx/components/couchbase/__init__.py +34 -0
  173. lfx/components/couchbase/couchbase.py +102 -0
  174. lfx/components/crewai/__init__.py +49 -0
  175. lfx/components/crewai/crewai.py +107 -0
  176. lfx/components/crewai/hierarchical_crew.py +46 -0
  177. lfx/components/crewai/hierarchical_task.py +44 -0
  178. lfx/components/crewai/sequential_crew.py +52 -0
  179. lfx/components/crewai/sequential_task.py +73 -0
  180. lfx/components/crewai/sequential_task_agent.py +143 -0
  181. lfx/components/custom_component/__init__.py +34 -0
  182. lfx/components/custom_component/custom_component.py +31 -0
  183. lfx/components/data/__init__.py +64 -0
  184. lfx/components/data/api_request.py +544 -0
  185. lfx/components/data/csv_to_data.py +95 -0
  186. lfx/components/data/directory.py +113 -0
  187. lfx/components/data/file.py +577 -0
  188. lfx/components/data/json_to_data.py +98 -0
  189. lfx/components/data/news_search.py +164 -0
  190. lfx/components/data/rss.py +69 -0
  191. lfx/components/data/sql_executor.py +101 -0
  192. lfx/components/data/url.py +311 -0
  193. lfx/components/data/web_search.py +112 -0
  194. lfx/components/data/webhook.py +56 -0
  195. lfx/components/datastax/__init__.py +70 -0
  196. lfx/components/datastax/astra_assistant_manager.py +306 -0
  197. lfx/components/datastax/astra_db.py +75 -0
  198. lfx/components/datastax/astra_vectorize.py +124 -0
  199. lfx/components/datastax/astradb.py +1285 -0
  200. lfx/components/datastax/astradb_cql.py +314 -0
  201. lfx/components/datastax/astradb_graph.py +330 -0
  202. lfx/components/datastax/astradb_tool.py +414 -0
  203. lfx/components/datastax/astradb_vectorstore.py +1285 -0
  204. lfx/components/datastax/cassandra.py +92 -0
  205. lfx/components/datastax/create_assistant.py +58 -0
  206. lfx/components/datastax/create_thread.py +32 -0
  207. lfx/components/datastax/dotenv.py +35 -0
  208. lfx/components/datastax/get_assistant.py +37 -0
  209. lfx/components/datastax/getenvvar.py +30 -0
  210. lfx/components/datastax/graph_rag.py +141 -0
  211. lfx/components/datastax/hcd.py +314 -0
  212. lfx/components/datastax/list_assistants.py +25 -0
  213. lfx/components/datastax/run.py +89 -0
  214. lfx/components/deactivated/__init__.py +15 -0
  215. lfx/components/deactivated/amazon_kendra.py +66 -0
  216. lfx/components/deactivated/chat_litellm_model.py +158 -0
  217. lfx/components/deactivated/code_block_extractor.py +26 -0
  218. lfx/components/deactivated/documents_to_data.py +22 -0
  219. lfx/components/deactivated/embed.py +16 -0
  220. lfx/components/deactivated/extract_key_from_data.py +46 -0
  221. lfx/components/deactivated/json_document_builder.py +57 -0
  222. lfx/components/deactivated/list_flows.py +20 -0
  223. lfx/components/deactivated/mcp_sse.py +61 -0
  224. lfx/components/deactivated/mcp_stdio.py +62 -0
  225. lfx/components/deactivated/merge_data.py +93 -0
  226. lfx/components/deactivated/message.py +37 -0
  227. lfx/components/deactivated/metal.py +54 -0
  228. lfx/components/deactivated/multi_query.py +59 -0
  229. lfx/components/deactivated/retriever.py +43 -0
  230. lfx/components/deactivated/selective_passthrough.py +77 -0
  231. lfx/components/deactivated/should_run_next.py +40 -0
  232. lfx/components/deactivated/split_text.py +63 -0
  233. lfx/components/deactivated/store_message.py +24 -0
  234. lfx/components/deactivated/sub_flow.py +124 -0
  235. lfx/components/deactivated/vectara_self_query.py +76 -0
  236. lfx/components/deactivated/vector_store.py +24 -0
  237. lfx/components/deepseek/__init__.py +34 -0
  238. lfx/components/deepseek/deepseek.py +136 -0
  239. lfx/components/docling/__init__.py +43 -0
  240. lfx/components/docling/chunk_docling_document.py +186 -0
  241. lfx/components/docling/docling_inline.py +231 -0
  242. lfx/components/docling/docling_remote.py +193 -0
  243. lfx/components/docling/export_docling_document.py +117 -0
  244. lfx/components/documentloaders/__init__.py +3 -0
  245. lfx/components/duckduckgo/__init__.py +3 -0
  246. lfx/components/duckduckgo/duck_duck_go_search_run.py +92 -0
  247. lfx/components/elastic/__init__.py +37 -0
  248. lfx/components/elastic/elasticsearch.py +267 -0
  249. lfx/components/elastic/opensearch.py +243 -0
  250. lfx/components/embeddings/__init__.py +37 -0
  251. lfx/components/embeddings/similarity.py +76 -0
  252. lfx/components/embeddings/text_embedder.py +64 -0
  253. lfx/components/exa/__init__.py +3 -0
  254. lfx/components/exa/exa_search.py +68 -0
  255. lfx/components/firecrawl/__init__.py +43 -0
  256. lfx/components/firecrawl/firecrawl_crawl_api.py +88 -0
  257. lfx/components/firecrawl/firecrawl_extract_api.py +136 -0
  258. lfx/components/firecrawl/firecrawl_map_api.py +89 -0
  259. lfx/components/firecrawl/firecrawl_scrape_api.py +73 -0
  260. lfx/components/git/__init__.py +4 -0
  261. lfx/components/git/git.py +262 -0
  262. lfx/components/git/gitextractor.py +196 -0
  263. lfx/components/glean/__init__.py +3 -0
  264. lfx/components/glean/glean_search_api.py +173 -0
  265. lfx/components/google/__init__.py +17 -0
  266. lfx/components/google/gmail.py +192 -0
  267. lfx/components/google/google_bq_sql_executor.py +157 -0
  268. lfx/components/google/google_drive.py +92 -0
  269. lfx/components/google/google_drive_search.py +152 -0
  270. lfx/components/google/google_generative_ai.py +147 -0
  271. lfx/components/google/google_generative_ai_embeddings.py +141 -0
  272. lfx/components/google/google_oauth_token.py +89 -0
  273. lfx/components/google/google_search_api_core.py +68 -0
  274. lfx/components/google/google_serper_api_core.py +74 -0
  275. lfx/components/groq/__init__.py +34 -0
  276. lfx/components/groq/groq.py +136 -0
  277. lfx/components/helpers/__init__.py +52 -0
  278. lfx/components/helpers/calculator_core.py +89 -0
  279. lfx/components/helpers/create_list.py +40 -0
  280. lfx/components/helpers/current_date.py +42 -0
  281. lfx/components/helpers/id_generator.py +42 -0
  282. lfx/components/helpers/memory.py +251 -0
  283. lfx/components/helpers/output_parser.py +45 -0
  284. lfx/components/helpers/store_message.py +90 -0
  285. lfx/components/homeassistant/__init__.py +7 -0
  286. lfx/components/homeassistant/home_assistant_control.py +152 -0
  287. lfx/components/homeassistant/list_home_assistant_states.py +137 -0
  288. lfx/components/huggingface/__init__.py +37 -0
  289. lfx/components/huggingface/huggingface.py +197 -0
  290. lfx/components/huggingface/huggingface_inference_api.py +106 -0
  291. lfx/components/ibm/__init__.py +34 -0
  292. lfx/components/ibm/watsonx.py +203 -0
  293. lfx/components/ibm/watsonx_embeddings.py +135 -0
  294. lfx/components/icosacomputing/__init__.py +5 -0
  295. lfx/components/icosacomputing/combinatorial_reasoner.py +84 -0
  296. lfx/components/input_output/__init__.py +38 -0
  297. lfx/components/input_output/chat.py +120 -0
  298. lfx/components/input_output/chat_output.py +200 -0
  299. lfx/components/input_output/text.py +27 -0
  300. lfx/components/input_output/text_output.py +29 -0
  301. lfx/components/jigsawstack/__init__.py +23 -0
  302. lfx/components/jigsawstack/ai_scrape.py +126 -0
  303. lfx/components/jigsawstack/ai_web_search.py +136 -0
  304. lfx/components/jigsawstack/file_read.py +115 -0
  305. lfx/components/jigsawstack/file_upload.py +94 -0
  306. lfx/components/jigsawstack/image_generation.py +205 -0
  307. lfx/components/jigsawstack/nsfw.py +60 -0
  308. lfx/components/jigsawstack/object_detection.py +124 -0
  309. lfx/components/jigsawstack/sentiment.py +112 -0
  310. lfx/components/jigsawstack/text_to_sql.py +90 -0
  311. lfx/components/jigsawstack/text_translate.py +77 -0
  312. lfx/components/jigsawstack/vocr.py +107 -0
  313. lfx/components/langchain_utilities/__init__.py +109 -0
  314. lfx/components/langchain_utilities/character.py +53 -0
  315. lfx/components/langchain_utilities/conversation.py +59 -0
  316. lfx/components/langchain_utilities/csv_agent.py +107 -0
  317. lfx/components/langchain_utilities/fake_embeddings.py +26 -0
  318. lfx/components/langchain_utilities/html_link_extractor.py +35 -0
  319. lfx/components/langchain_utilities/json_agent.py +45 -0
  320. lfx/components/langchain_utilities/langchain_hub.py +126 -0
  321. lfx/components/langchain_utilities/language_recursive.py +49 -0
  322. lfx/components/langchain_utilities/language_semantic.py +138 -0
  323. lfx/components/langchain_utilities/llm_checker.py +39 -0
  324. lfx/components/langchain_utilities/llm_math.py +42 -0
  325. lfx/components/langchain_utilities/natural_language.py +61 -0
  326. lfx/components/langchain_utilities/openai_tools.py +53 -0
  327. lfx/components/langchain_utilities/openapi.py +48 -0
  328. lfx/components/langchain_utilities/recursive_character.py +60 -0
  329. lfx/components/langchain_utilities/retrieval_qa.py +83 -0
  330. lfx/components/langchain_utilities/runnable_executor.py +137 -0
  331. lfx/components/langchain_utilities/self_query.py +80 -0
  332. lfx/components/langchain_utilities/spider.py +142 -0
  333. lfx/components/langchain_utilities/sql.py +40 -0
  334. lfx/components/langchain_utilities/sql_database.py +35 -0
  335. lfx/components/langchain_utilities/sql_generator.py +78 -0
  336. lfx/components/langchain_utilities/tool_calling.py +59 -0
  337. lfx/components/langchain_utilities/vector_store_info.py +49 -0
  338. lfx/components/langchain_utilities/vector_store_router.py +33 -0
  339. lfx/components/langchain_utilities/xml_agent.py +71 -0
  340. lfx/components/langwatch/__init__.py +3 -0
  341. lfx/components/langwatch/langwatch.py +278 -0
  342. lfx/components/link_extractors/__init__.py +3 -0
  343. lfx/components/lmstudio/__init__.py +34 -0
  344. lfx/components/lmstudio/lmstudioembeddings.py +89 -0
  345. lfx/components/lmstudio/lmstudiomodel.py +129 -0
  346. lfx/components/logic/__init__.py +52 -0
  347. lfx/components/logic/conditional_router.py +171 -0
  348. lfx/components/logic/data_conditional_router.py +125 -0
  349. lfx/components/logic/flow_tool.py +110 -0
  350. lfx/components/logic/listen.py +29 -0
  351. lfx/components/logic/loop.py +125 -0
  352. lfx/components/logic/notify.py +88 -0
  353. lfx/components/logic/pass_message.py +35 -0
  354. lfx/components/logic/run_flow.py +71 -0
  355. lfx/components/logic/sub_flow.py +114 -0
  356. lfx/components/maritalk/__init__.py +32 -0
  357. lfx/components/maritalk/maritalk.py +52 -0
  358. lfx/components/mem0/__init__.py +3 -0
  359. lfx/components/mem0/mem0_chat_memory.py +136 -0
  360. lfx/components/milvus/__init__.py +34 -0
  361. lfx/components/milvus/milvus.py +115 -0
  362. lfx/components/mistral/__init__.py +37 -0
  363. lfx/components/mistral/mistral.py +114 -0
  364. lfx/components/mistral/mistral_embeddings.py +58 -0
  365. lfx/components/models/__init__.py +34 -0
  366. lfx/components/models/embedding_model.py +114 -0
  367. lfx/components/models/language_model.py +144 -0
  368. lfx/components/mongodb/__init__.py +34 -0
  369. lfx/components/mongodb/mongodb_atlas.py +213 -0
  370. lfx/components/needle/__init__.py +3 -0
  371. lfx/components/needle/needle.py +104 -0
  372. lfx/components/notdiamond/__init__.py +34 -0
  373. lfx/components/notdiamond/notdiamond.py +228 -0
  374. lfx/components/novita/__init__.py +32 -0
  375. lfx/components/novita/novita.py +130 -0
  376. lfx/components/nvidia/__init__.py +57 -0
  377. lfx/components/nvidia/nvidia.py +157 -0
  378. lfx/components/nvidia/nvidia_embedding.py +77 -0
  379. lfx/components/nvidia/nvidia_ingest.py +317 -0
  380. lfx/components/nvidia/nvidia_rerank.py +63 -0
  381. lfx/components/nvidia/system_assist.py +65 -0
  382. lfx/components/olivya/__init__.py +3 -0
  383. lfx/components/olivya/olivya.py +116 -0
  384. lfx/components/ollama/__init__.py +37 -0
  385. lfx/components/ollama/ollama.py +330 -0
  386. lfx/components/ollama/ollama_embeddings.py +106 -0
  387. lfx/components/openai/__init__.py +37 -0
  388. lfx/components/openai/openai.py +100 -0
  389. lfx/components/openai/openai_chat_model.py +176 -0
  390. lfx/components/openrouter/__init__.py +32 -0
  391. lfx/components/openrouter/openrouter.py +202 -0
  392. lfx/components/output_parsers/__init__.py +3 -0
  393. lfx/components/perplexity/__init__.py +34 -0
  394. lfx/components/perplexity/perplexity.py +75 -0
  395. lfx/components/pgvector/__init__.py +34 -0
  396. lfx/components/pgvector/pgvector.py +72 -0
  397. lfx/components/pinecone/__init__.py +34 -0
  398. lfx/components/pinecone/pinecone.py +134 -0
  399. lfx/components/processing/__init__.py +117 -0
  400. lfx/components/processing/alter_metadata.py +108 -0
  401. lfx/components/processing/batch_run.py +205 -0
  402. lfx/components/processing/combine_text.py +39 -0
  403. lfx/components/processing/converter.py +159 -0
  404. lfx/components/processing/create_data.py +110 -0
  405. lfx/components/processing/data_operations.py +438 -0
  406. lfx/components/processing/data_to_dataframe.py +70 -0
  407. lfx/components/processing/dataframe_operations.py +313 -0
  408. lfx/components/processing/extract_key.py +53 -0
  409. lfx/components/processing/filter_data.py +42 -0
  410. lfx/components/processing/filter_data_values.py +88 -0
  411. lfx/components/processing/json_cleaner.py +103 -0
  412. lfx/components/processing/lambda_filter.py +154 -0
  413. lfx/components/processing/llm_router.py +499 -0
  414. lfx/components/processing/merge_data.py +90 -0
  415. lfx/components/processing/message_to_data.py +36 -0
  416. lfx/components/processing/parse_data.py +70 -0
  417. lfx/components/processing/parse_dataframe.py +68 -0
  418. lfx/components/processing/parse_json_data.py +90 -0
  419. lfx/components/processing/parser.py +143 -0
  420. lfx/components/processing/prompt.py +67 -0
  421. lfx/components/processing/python_repl_core.py +98 -0
  422. lfx/components/processing/regex.py +82 -0
  423. lfx/components/processing/save_file.py +225 -0
  424. lfx/components/processing/select_data.py +48 -0
  425. lfx/components/processing/split_text.py +141 -0
  426. lfx/components/processing/structured_output.py +202 -0
  427. lfx/components/processing/update_data.py +160 -0
  428. lfx/components/prototypes/__init__.py +34 -0
  429. lfx/components/prototypes/python_function.py +73 -0
  430. lfx/components/qdrant/__init__.py +34 -0
  431. lfx/components/qdrant/qdrant.py +109 -0
  432. lfx/components/redis/__init__.py +37 -0
  433. lfx/components/redis/redis.py +89 -0
  434. lfx/components/redis/redis_chat.py +43 -0
  435. lfx/components/sambanova/__init__.py +32 -0
  436. lfx/components/sambanova/sambanova.py +84 -0
  437. lfx/components/scrapegraph/__init__.py +40 -0
  438. lfx/components/scrapegraph/scrapegraph_markdownify_api.py +64 -0
  439. lfx/components/scrapegraph/scrapegraph_search_api.py +64 -0
  440. lfx/components/scrapegraph/scrapegraph_smart_scraper_api.py +71 -0
  441. lfx/components/searchapi/__init__.py +34 -0
  442. lfx/components/searchapi/search.py +79 -0
  443. lfx/components/serpapi/__init__.py +3 -0
  444. lfx/components/serpapi/serp.py +115 -0
  445. lfx/components/supabase/__init__.py +34 -0
  446. lfx/components/supabase/supabase.py +76 -0
  447. lfx/components/tavily/__init__.py +4 -0
  448. lfx/components/tavily/tavily_extract.py +117 -0
  449. lfx/components/tavily/tavily_search.py +212 -0
  450. lfx/components/textsplitters/__init__.py +3 -0
  451. lfx/components/toolkits/__init__.py +3 -0
  452. lfx/components/tools/__init__.py +72 -0
  453. lfx/components/tools/calculator.py +108 -0
  454. lfx/components/tools/google_search_api.py +45 -0
  455. lfx/components/tools/google_serper_api.py +115 -0
  456. lfx/components/tools/python_code_structured_tool.py +327 -0
  457. lfx/components/tools/python_repl.py +97 -0
  458. lfx/components/tools/search_api.py +87 -0
  459. lfx/components/tools/searxng.py +145 -0
  460. lfx/components/tools/serp_api.py +119 -0
  461. lfx/components/tools/tavily_search_tool.py +344 -0
  462. lfx/components/tools/wikidata_api.py +102 -0
  463. lfx/components/tools/wikipedia_api.py +49 -0
  464. lfx/components/tools/yahoo_finance.py +129 -0
  465. lfx/components/twelvelabs/__init__.py +52 -0
  466. lfx/components/twelvelabs/convert_astra_results.py +84 -0
  467. lfx/components/twelvelabs/pegasus_index.py +311 -0
  468. lfx/components/twelvelabs/split_video.py +291 -0
  469. lfx/components/twelvelabs/text_embeddings.py +57 -0
  470. lfx/components/twelvelabs/twelvelabs_pegasus.py +408 -0
  471. lfx/components/twelvelabs/video_embeddings.py +100 -0
  472. lfx/components/twelvelabs/video_file.py +179 -0
  473. lfx/components/unstructured/__init__.py +3 -0
  474. lfx/components/unstructured/unstructured.py +121 -0
  475. lfx/components/upstash/__init__.py +34 -0
  476. lfx/components/upstash/upstash.py +124 -0
  477. lfx/components/vectara/__init__.py +37 -0
  478. lfx/components/vectara/vectara.py +97 -0
  479. lfx/components/vectara/vectara_rag.py +164 -0
  480. lfx/components/vectorstores/__init__.py +40 -0
  481. lfx/components/vectorstores/astradb.py +1285 -0
  482. lfx/components/vectorstores/astradb_graph.py +319 -0
  483. lfx/components/vectorstores/cassandra.py +264 -0
  484. lfx/components/vectorstores/cassandra_graph.py +238 -0
  485. lfx/components/vectorstores/chroma.py +167 -0
  486. lfx/components/vectorstores/clickhouse.py +135 -0
  487. lfx/components/vectorstores/couchbase.py +102 -0
  488. lfx/components/vectorstores/elasticsearch.py +267 -0
  489. lfx/components/vectorstores/faiss.py +111 -0
  490. lfx/components/vectorstores/graph_rag.py +141 -0
  491. lfx/components/vectorstores/hcd.py +314 -0
  492. lfx/components/vectorstores/local_db.py +261 -0
  493. lfx/components/vectorstores/milvus.py +115 -0
  494. lfx/components/vectorstores/mongodb_atlas.py +213 -0
  495. lfx/components/vectorstores/opensearch.py +243 -0
  496. lfx/components/vectorstores/pgvector.py +72 -0
  497. lfx/components/vectorstores/pinecone.py +134 -0
  498. lfx/components/vectorstores/qdrant.py +109 -0
  499. lfx/components/vectorstores/supabase.py +76 -0
  500. lfx/components/vectorstores/upstash.py +124 -0
  501. lfx/components/vectorstores/vectara.py +97 -0
  502. lfx/components/vectorstores/vectara_rag.py +164 -0
  503. lfx/components/vectorstores/weaviate.py +89 -0
  504. lfx/components/vertexai/__init__.py +37 -0
  505. lfx/components/vertexai/vertexai.py +71 -0
  506. lfx/components/vertexai/vertexai_embeddings.py +67 -0
  507. lfx/components/weaviate/__init__.py +34 -0
  508. lfx/components/weaviate/weaviate.py +89 -0
  509. lfx/components/wikipedia/__init__.py +4 -0
  510. lfx/components/wikipedia/wikidata.py +86 -0
  511. lfx/components/wikipedia/wikipedia.py +53 -0
  512. lfx/components/wolframalpha/__init__.py +3 -0
  513. lfx/components/wolframalpha/wolfram_alpha_api.py +54 -0
  514. lfx/components/xai/__init__.py +32 -0
  515. lfx/components/xai/xai.py +167 -0
  516. lfx/components/yahoosearch/__init__.py +3 -0
  517. lfx/components/yahoosearch/yahoo.py +137 -0
  518. lfx/components/youtube/__init__.py +52 -0
  519. lfx/components/youtube/channel.py +227 -0
  520. lfx/components/youtube/comments.py +231 -0
  521. lfx/components/youtube/playlist.py +33 -0
  522. lfx/components/youtube/search.py +120 -0
  523. lfx/components/youtube/trending.py +285 -0
  524. lfx/components/youtube/video_details.py +263 -0
  525. lfx/components/youtube/youtube_transcripts.py +118 -0
  526. lfx/components/zep/__init__.py +3 -0
  527. lfx/components/zep/zep.py +44 -0
  528. lfx/constants.py +6 -0
  529. lfx/custom/__init__.py +7 -0
  530. lfx/custom/attributes.py +86 -0
  531. lfx/custom/code_parser/__init__.py +3 -0
  532. lfx/custom/code_parser/code_parser.py +361 -0
  533. lfx/custom/custom_component/__init__.py +0 -0
  534. lfx/custom/custom_component/base_component.py +128 -0
  535. lfx/custom/custom_component/component.py +1808 -0
  536. lfx/custom/custom_component/component_with_cache.py +8 -0
  537. lfx/custom/custom_component/custom_component.py +588 -0
  538. lfx/custom/dependency_analyzer.py +165 -0
  539. lfx/custom/directory_reader/__init__.py +3 -0
  540. lfx/custom/directory_reader/directory_reader.py +359 -0
  541. lfx/custom/directory_reader/utils.py +171 -0
  542. lfx/custom/eval.py +12 -0
  543. lfx/custom/schema.py +32 -0
  544. lfx/custom/tree_visitor.py +21 -0
  545. lfx/custom/utils.py +877 -0
  546. lfx/custom/validate.py +488 -0
  547. lfx/events/__init__.py +1 -0
  548. lfx/events/event_manager.py +110 -0
  549. lfx/exceptions/__init__.py +0 -0
  550. lfx/exceptions/component.py +15 -0
  551. lfx/field_typing/__init__.py +91 -0
  552. lfx/field_typing/constants.py +215 -0
  553. lfx/field_typing/range_spec.py +35 -0
  554. lfx/graph/__init__.py +6 -0
  555. lfx/graph/edge/__init__.py +0 -0
  556. lfx/graph/edge/base.py +277 -0
  557. lfx/graph/edge/schema.py +119 -0
  558. lfx/graph/edge/utils.py +0 -0
  559. lfx/graph/graph/__init__.py +0 -0
  560. lfx/graph/graph/ascii.py +202 -0
  561. lfx/graph/graph/base.py +2238 -0
  562. lfx/graph/graph/constants.py +63 -0
  563. lfx/graph/graph/runnable_vertices_manager.py +133 -0
  564. lfx/graph/graph/schema.py +52 -0
  565. lfx/graph/graph/state_model.py +66 -0
  566. lfx/graph/graph/utils.py +1024 -0
  567. lfx/graph/schema.py +75 -0
  568. lfx/graph/state/__init__.py +0 -0
  569. lfx/graph/state/model.py +237 -0
  570. lfx/graph/utils.py +200 -0
  571. lfx/graph/vertex/__init__.py +0 -0
  572. lfx/graph/vertex/base.py +823 -0
  573. lfx/graph/vertex/constants.py +0 -0
  574. lfx/graph/vertex/exceptions.py +4 -0
  575. lfx/graph/vertex/param_handler.py +264 -0
  576. lfx/graph/vertex/schema.py +26 -0
  577. lfx/graph/vertex/utils.py +19 -0
  578. lfx/graph/vertex/vertex_types.py +489 -0
  579. lfx/helpers/__init__.py +1 -0
  580. lfx/helpers/base_model.py +71 -0
  581. lfx/helpers/custom.py +13 -0
  582. lfx/helpers/data.py +167 -0
  583. lfx/helpers/flow.py +194 -0
  584. lfx/inputs/__init__.py +68 -0
  585. lfx/inputs/constants.py +2 -0
  586. lfx/inputs/input_mixin.py +328 -0
  587. lfx/inputs/inputs.py +714 -0
  588. lfx/inputs/validators.py +19 -0
  589. lfx/interface/__init__.py +6 -0
  590. lfx/interface/components.py +489 -0
  591. lfx/interface/importing/__init__.py +5 -0
  592. lfx/interface/importing/utils.py +39 -0
  593. lfx/interface/initialize/__init__.py +3 -0
  594. lfx/interface/initialize/loading.py +224 -0
  595. lfx/interface/listing.py +26 -0
  596. lfx/interface/run.py +16 -0
  597. lfx/interface/utils.py +111 -0
  598. lfx/io/__init__.py +63 -0
  599. lfx/io/schema.py +289 -0
  600. lfx/load/__init__.py +8 -0
  601. lfx/load/load.py +256 -0
  602. lfx/load/utils.py +99 -0
  603. lfx/log/__init__.py +5 -0
  604. lfx/log/logger.py +385 -0
  605. lfx/memory/__init__.py +90 -0
  606. lfx/memory/stubs.py +283 -0
  607. lfx/processing/__init__.py +1 -0
  608. lfx/processing/process.py +238 -0
  609. lfx/processing/utils.py +25 -0
  610. lfx/py.typed +0 -0
  611. lfx/schema/__init__.py +66 -0
  612. lfx/schema/artifact.py +83 -0
  613. lfx/schema/content_block.py +62 -0
  614. lfx/schema/content_types.py +91 -0
  615. lfx/schema/data.py +308 -0
  616. lfx/schema/dataframe.py +210 -0
  617. lfx/schema/dotdict.py +74 -0
  618. lfx/schema/encoders.py +13 -0
  619. lfx/schema/graph.py +47 -0
  620. lfx/schema/image.py +131 -0
  621. lfx/schema/json_schema.py +141 -0
  622. lfx/schema/log.py +61 -0
  623. lfx/schema/message.py +473 -0
  624. lfx/schema/openai_responses_schemas.py +74 -0
  625. lfx/schema/properties.py +41 -0
  626. lfx/schema/schema.py +171 -0
  627. lfx/schema/serialize.py +13 -0
  628. lfx/schema/table.py +140 -0
  629. lfx/schema/validators.py +114 -0
  630. lfx/serialization/__init__.py +5 -0
  631. lfx/serialization/constants.py +2 -0
  632. lfx/serialization/serialization.py +314 -0
  633. lfx/services/__init__.py +23 -0
  634. lfx/services/base.py +28 -0
  635. lfx/services/cache/__init__.py +6 -0
  636. lfx/services/cache/base.py +183 -0
  637. lfx/services/cache/service.py +166 -0
  638. lfx/services/cache/utils.py +169 -0
  639. lfx/services/chat/__init__.py +1 -0
  640. lfx/services/chat/config.py +2 -0
  641. lfx/services/chat/schema.py +10 -0
  642. lfx/services/deps.py +129 -0
  643. lfx/services/factory.py +19 -0
  644. lfx/services/initialize.py +19 -0
  645. lfx/services/interfaces.py +103 -0
  646. lfx/services/manager.py +172 -0
  647. lfx/services/schema.py +20 -0
  648. lfx/services/session.py +82 -0
  649. lfx/services/settings/__init__.py +3 -0
  650. lfx/services/settings/auth.py +130 -0
  651. lfx/services/settings/base.py +539 -0
  652. lfx/services/settings/constants.py +31 -0
  653. lfx/services/settings/factory.py +23 -0
  654. lfx/services/settings/feature_flags.py +12 -0
  655. lfx/services/settings/service.py +35 -0
  656. lfx/services/settings/utils.py +40 -0
  657. lfx/services/shared_component_cache/__init__.py +1 -0
  658. lfx/services/shared_component_cache/factory.py +30 -0
  659. lfx/services/shared_component_cache/service.py +9 -0
  660. lfx/services/storage/__init__.py +5 -0
  661. lfx/services/storage/local.py +155 -0
  662. lfx/services/storage/service.py +54 -0
  663. lfx/services/tracing/__init__.py +1 -0
  664. lfx/services/tracing/service.py +21 -0
  665. lfx/settings.py +6 -0
  666. lfx/template/__init__.py +6 -0
  667. lfx/template/field/__init__.py +0 -0
  668. lfx/template/field/base.py +257 -0
  669. lfx/template/field/prompt.py +15 -0
  670. lfx/template/frontend_node/__init__.py +6 -0
  671. lfx/template/frontend_node/base.py +212 -0
  672. lfx/template/frontend_node/constants.py +65 -0
  673. lfx/template/frontend_node/custom_components.py +79 -0
  674. lfx/template/template/__init__.py +0 -0
  675. lfx/template/template/base.py +100 -0
  676. lfx/template/utils.py +217 -0
  677. lfx/type_extraction/__init__.py +19 -0
  678. lfx/type_extraction/type_extraction.py +75 -0
  679. lfx/type_extraction.py +80 -0
  680. lfx/utils/__init__.py +1 -0
  681. lfx/utils/async_helpers.py +42 -0
  682. lfx/utils/component_utils.py +154 -0
  683. lfx/utils/concurrency.py +60 -0
  684. lfx/utils/connection_string_parser.py +11 -0
  685. lfx/utils/constants.py +205 -0
  686. lfx/utils/data_structure.py +212 -0
  687. lfx/utils/exceptions.py +22 -0
  688. lfx/utils/helpers.py +28 -0
  689. lfx/utils/image.py +73 -0
  690. lfx/utils/lazy_load.py +15 -0
  691. lfx/utils/request_utils.py +18 -0
  692. lfx/utils/schemas.py +139 -0
  693. lfx/utils/util.py +481 -0
  694. lfx/utils/util_strings.py +56 -0
  695. lfx/utils/version.py +24 -0
  696. lfx_nightly-0.1.11.dev0.dist-info/METADATA +293 -0
  697. lfx_nightly-0.1.11.dev0.dist-info/RECORD +699 -0
  698. lfx_nightly-0.1.11.dev0.dist-info/WHEEL +4 -0
  699. lfx_nightly-0.1.11.dev0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,685 @@
1
+ import ast
2
+ import json
3
+ import shutil
4
+ import tarfile
5
+ from abc import ABC, abstractmethod
6
+ from pathlib import Path
7
+ from tempfile import TemporaryDirectory
8
+ from typing import TYPE_CHECKING
9
+ from zipfile import ZipFile, is_zipfile
10
+
11
+ import pandas as pd
12
+
13
+ from lfx.custom.custom_component.component import Component
14
+ from lfx.io import BoolInput, FileInput, HandleInput, Output, StrInput
15
+ from lfx.schema.data import Data
16
+ from lfx.schema.dataframe import DataFrame
17
+ from lfx.schema.message import Message
18
+
19
+ if TYPE_CHECKING:
20
+ from collections.abc import Callable
21
+
22
+
23
+ class BaseFileComponent(Component, ABC):
24
+ """Base class for handling file processing components.
25
+
26
+ This class provides common functionality for resolving, validating, and
27
+ processing file paths. Child classes must define valid file extensions
28
+ and implement the `process_files` method.
29
+ """
30
+
31
+ class BaseFile:
32
+ """Internal class to represent a file with additional metadata."""
33
+
34
+ def __init__(
35
+ self,
36
+ data: Data | list[Data],
37
+ path: Path,
38
+ *,
39
+ delete_after_processing: bool = False,
40
+ silent_errors: bool = False,
41
+ ):
42
+ self._data = data if isinstance(data, list) else [data]
43
+ self.path = path
44
+ self.delete_after_processing = delete_after_processing
45
+ self._silent_errors = silent_errors
46
+
47
+ @property
48
+ def data(self) -> list[Data]:
49
+ return self._data or []
50
+
51
+ @data.setter
52
+ def data(self, value: Data | list[Data]):
53
+ if isinstance(value, Data):
54
+ self._data = [value]
55
+ elif isinstance(value, list) and all(isinstance(item, Data) for item in value):
56
+ self._data = value
57
+ else:
58
+ msg = f"data must be a Data object or a list of Data objects. Got: {type(value)}"
59
+ if not self._silent_errors:
60
+ raise ValueError(msg)
61
+
62
+ def merge_data(self, new_data: Data | list[Data] | None) -> list[Data]:
63
+ r"""Generate a new list of Data objects by merging `new_data` into the current `data`.
64
+
65
+ Args:
66
+ new_data (Data | list[Data] | None): The new Data object(s) to merge into each existing Data object.
67
+ If None, the current `data` is returned unchanged.
68
+
69
+ Returns:
70
+ list[Data]: A new list of Data objects with `new_data` merged.
71
+ """
72
+ if new_data is None:
73
+ return self.data
74
+
75
+ if isinstance(new_data, Data):
76
+ new_data_list = [new_data]
77
+ elif isinstance(new_data, list) and all(isinstance(item, Data) for item in new_data):
78
+ new_data_list = new_data
79
+ else:
80
+ msg = "new_data must be a Data object, a list of Data objects, or None."
81
+ if not self._silent_errors:
82
+ raise ValueError(msg)
83
+ return self.data
84
+
85
+ return [
86
+ Data(data={**data.data, **new_data_item.data}) for data in self.data for new_data_item in new_data_list
87
+ ]
88
+
89
+ def __str__(self):
90
+ if len(self.data) == 0:
91
+ text_preview = ""
92
+ elif len(self.data) == 1:
93
+ max_text_length = 50
94
+ text_preview = self.data.get_text()[:max_text_length]
95
+ if len(self.data.get_text()) > max_text_length:
96
+ text_preview += "..."
97
+ text_preview = f"text_preview='{text_preview}'"
98
+ else:
99
+ text_preview = f"{len(self.data)} data objects"
100
+ return f"BaseFile(path={self.path}, delete_after_processing={self.delete_after_processing}, {text_preview}"
101
+
102
+ # Subclasses can override these class variables
103
+ VALID_EXTENSIONS: list[str] = [] # To be overridden by child classes
104
+ IGNORE_STARTS_WITH = [".", "__MACOSX"]
105
+
106
+ SERVER_FILE_PATH_FIELDNAME = "file_path"
107
+ SUPPORTED_BUNDLE_EXTENSIONS = ["zip", "tar", "tgz", "bz2", "gz"]
108
+
109
+ def __init__(self, *args, **kwargs):
110
+ super().__init__(*args, **kwargs)
111
+ # Dynamically update FileInput to include valid extensions and bundles
112
+ self.get_base_inputs()[0].file_types = [
113
+ *self.valid_extensions,
114
+ *self.SUPPORTED_BUNDLE_EXTENSIONS,
115
+ ]
116
+
117
+ file_types = ", ".join(self.valid_extensions)
118
+ bundles = ", ".join(self.SUPPORTED_BUNDLE_EXTENSIONS)
119
+ self.get_base_inputs()[
120
+ 0
121
+ ].info = f"Supported file extensions: {file_types}; optionally bundled in file extensions: {bundles}"
122
+
123
+ _base_inputs = [
124
+ FileInput(
125
+ name="path",
126
+ display_name="Files",
127
+ fileTypes=[], # Dynamically set in __init__
128
+ info="", # Dynamically set in __init__
129
+ required=False,
130
+ list=True,
131
+ value=[],
132
+ ),
133
+ HandleInput(
134
+ name="file_path",
135
+ display_name="Server File Path",
136
+ info=(
137
+ f"Data object with a '{SERVER_FILE_PATH_FIELDNAME}' property pointing to server file"
138
+ " or a Message object with a path to the file. Supercedes 'Path' but supports same file types."
139
+ ),
140
+ required=False,
141
+ input_types=["Data", "Message"],
142
+ is_list=True,
143
+ advanced=True,
144
+ ),
145
+ StrInput(
146
+ name="separator",
147
+ display_name="Separator",
148
+ value="\n\n",
149
+ show=True,
150
+ info="Specify the separator to use between multiple outputs in Message format.",
151
+ advanced=True,
152
+ ),
153
+ BoolInput(
154
+ name="silent_errors",
155
+ display_name="Silent Errors",
156
+ advanced=True,
157
+ info="If true, errors will not raise an exception.",
158
+ ),
159
+ BoolInput(
160
+ name="delete_server_file_after_processing",
161
+ display_name="Delete Server File After Processing",
162
+ advanced=True,
163
+ value=True,
164
+ info="If true, the Server File Path will be deleted after processing.",
165
+ ),
166
+ BoolInput(
167
+ name="ignore_unsupported_extensions",
168
+ display_name="Ignore Unsupported Extensions",
169
+ advanced=True,
170
+ value=True,
171
+ info="If true, files with unsupported extensions will not be processed.",
172
+ ),
173
+ BoolInput(
174
+ name="ignore_unspecified_files",
175
+ display_name="Ignore Unspecified Files",
176
+ advanced=True,
177
+ value=False,
178
+ info=f"If true, Data with no '{SERVER_FILE_PATH_FIELDNAME}' property will be ignored.",
179
+ ),
180
+ ]
181
+
182
+ _base_outputs = [
183
+ Output(display_name="Files", name="dataframe", method="load_files"),
184
+ ]
185
+
186
+ @abstractmethod
187
+ def process_files(self, file_list: list[BaseFile]) -> list[BaseFile]:
188
+ """Processes a list of files.
189
+
190
+ Args:
191
+ file_list (list[BaseFile]): A list of file objects.
192
+
193
+ Returns:
194
+ list[BaseFile]: A list of BaseFile objects with updated `data`.
195
+ """
196
+
197
+ def load_files_base(self) -> list[Data]:
198
+ """Loads and parses file(s), including unpacked file bundles.
199
+
200
+ Returns:
201
+ list[Data]: Parsed data from the processed files.
202
+ """
203
+ self._temp_dirs: list[TemporaryDirectory] = []
204
+ final_files = [] # Initialize to avoid UnboundLocalError
205
+ try:
206
+ # Step 1: Validate the provided paths
207
+ files = self._validate_and_resolve_paths()
208
+
209
+ # Step 2: Handle bundles recursively
210
+ all_files = self._unpack_and_collect_files(files)
211
+
212
+ # Step 3: Final validation of file types
213
+ final_files = self._filter_and_mark_files(all_files)
214
+
215
+ # Step 4: Process files
216
+ processed_files = self.process_files(final_files)
217
+
218
+ # Extract and flatten Data objects to return
219
+ return [data for file in processed_files for data in file.data if file.data]
220
+
221
+ finally:
222
+ # Delete temporary directories
223
+ for temp_dir in self._temp_dirs:
224
+ temp_dir.cleanup()
225
+
226
+ # Delete files marked for deletion
227
+ for file in final_files:
228
+ if file.delete_after_processing and file.path.exists():
229
+ if file.path.is_dir():
230
+ shutil.rmtree(file.path)
231
+ else:
232
+ file.path.unlink()
233
+
234
+ def load_files_core(self) -> list[Data]:
235
+ """Load files and return as Data objects.
236
+
237
+ Returns:
238
+ list[Data]: List of Data objects from all files
239
+ """
240
+ data_list = self.load_files_base()
241
+ if not data_list:
242
+ return [Data()]
243
+ return data_list
244
+
245
+ def load_files_message(self) -> Message:
246
+ """Load files and return as Message.
247
+
248
+ Returns:
249
+ Message: Message containing all file data
250
+ """
251
+ data_list = self.load_files_core()
252
+ if not data_list:
253
+ return Message() # No data -> empty message
254
+
255
+ sep: str = getattr(self, "separator", "\n\n") or "\n\n"
256
+
257
+ parts: list[str] = []
258
+ for d in data_list:
259
+ # Prefer explicit text if available, fall back to full dict, lastly str()
260
+ text = (getattr(d, "get_text", lambda: None)() or d.data.get("text")) if isinstance(d.data, dict) else None
261
+ parts.append(text if text is not None else str(d))
262
+
263
+ return Message(text=sep.join(parts))
264
+
265
+ def load_files_path(self) -> Message:
266
+ """Returns a Message containing file paths from loaded files.
267
+
268
+ Returns:
269
+ Message: Message containing file paths
270
+ """
271
+ files = self._validate_and_resolve_paths()
272
+ paths = [file.path.as_posix() for file in files if file.path.exists()]
273
+
274
+ return Message(text="\n".join(paths) if paths else "")
275
+
276
+ def load_files_structured_helper(self, file_path: str) -> list[dict] | None:
277
+ if not file_path:
278
+ return None
279
+
280
+ # Map file extensions to pandas read functions with type annotation
281
+ file_readers: dict[str, Callable[[str], pd.DataFrame]] = {
282
+ ".csv": pd.read_csv,
283
+ ".xlsx": pd.read_excel,
284
+ ".parquet": pd.read_parquet,
285
+ # TODO: sqlite and json support?
286
+ }
287
+
288
+ # Get file extension in lowercase
289
+ ext = Path(file_path).suffix.lower()
290
+
291
+ # Get the appropriate reader function or None
292
+ reader = file_readers.get(ext)
293
+
294
+ if reader:
295
+ result = reader(file_path) # MyPy now knows reader is callable
296
+ return result.to_dict("records")
297
+
298
+ return None
299
+
300
+ def load_files_structured(self) -> DataFrame:
301
+ """Load files and return as DataFrame with structured content.
302
+
303
+ Returns:
304
+ DataFrame: DataFrame containing structured content from all files
305
+ """
306
+ data_list = self.load_files_core()
307
+ if not data_list:
308
+ return DataFrame()
309
+
310
+ # Get the file path from the first Data object
311
+ file_path = data_list[0].data.get(self.SERVER_FILE_PATH_FIELDNAME, None)
312
+
313
+ # If file_path is provided and is a CSV, read it directly
314
+ if file_path and str(file_path).lower().endswith((".csv", ".xlsx", ".parquet")):
315
+ rows = self.load_files_structured_helper(file_path)
316
+ else:
317
+ # Convert Data objects to a list of dictionaries
318
+ # TODO: Parse according to docling standards
319
+ rows = [data_list[0].data]
320
+
321
+ self.status = DataFrame(rows)
322
+
323
+ return DataFrame(rows)
324
+
325
+ def parse_string_to_dict(self, s: str) -> dict:
326
+ # Try JSON first (handles true/false/null)
327
+ try:
328
+ result = json.loads(s)
329
+ if isinstance(result, dict):
330
+ return result
331
+ except json.JSONDecodeError:
332
+ pass
333
+
334
+ # Fall back to Python literal evaluation
335
+ try:
336
+ result = ast.literal_eval(s)
337
+ if isinstance(result, dict):
338
+ return result
339
+ except (SyntaxError, ValueError):
340
+ pass
341
+
342
+ # If all parsing fails, return the fallback
343
+ return {"value": s}
344
+
345
+ def load_files_json(self) -> Data:
346
+ """Load files and return as a single Data object containing JSON content.
347
+
348
+ Returns:
349
+ Data: Data object containing JSON content from all files
350
+ """
351
+ data_list = self.load_files_core()
352
+ if not data_list:
353
+ return Data()
354
+
355
+ # Grab the JSON data
356
+ json_data = data_list[0].data[data_list[0].text_key]
357
+ json_data = self.parse_string_to_dict(json_data)
358
+
359
+ self.status = Data(data=json_data)
360
+
361
+ return Data(data=json_data)
362
+
363
+ def load_files(self) -> DataFrame:
364
+ """Load files and return as DataFrame.
365
+
366
+ Returns:
367
+ DataFrame: DataFrame containing all file data
368
+ """
369
+ data_list = self.load_files_core()
370
+ if not data_list:
371
+ return DataFrame()
372
+
373
+ # Convert Data objects to a list of dictionaries
374
+ all_rows = []
375
+ for data in data_list:
376
+ file_path = data.data.get(self.SERVER_FILE_PATH_FIELDNAME)
377
+ row = dict(data.data) if data.data else {}
378
+
379
+ # Add text if available, otherwise use the data's text property
380
+ if "text" in data.data:
381
+ row["text"] = data.data["text"]
382
+ if file_path:
383
+ row["file_path"] = file_path
384
+ all_rows.append(row)
385
+
386
+ self.status = DataFrame(all_rows)
387
+
388
+ return DataFrame(all_rows)
389
+
390
+ @property
391
+ def valid_extensions(self) -> list[str]:
392
+ """Returns valid file extensions for the class.
393
+
394
+ This property can be overridden by child classes to provide specific
395
+ extensions.
396
+
397
+ Returns:
398
+ list[str]: A list of valid file extensions without the leading dot.
399
+ """
400
+ return self.VALID_EXTENSIONS
401
+
402
+ @property
403
+ def ignore_starts_with(self) -> list[str]:
404
+ """Returns prefixes to ignore when unpacking file bundles.
405
+
406
+ Returns:
407
+ list[str]: A list of prefixes to ignore when unpacking file bundles.
408
+ """
409
+ return self.IGNORE_STARTS_WITH
410
+
411
+ def rollup_data(
412
+ self,
413
+ base_files: list[BaseFile],
414
+ data_list: list[Data | None],
415
+ path_field: str = SERVER_FILE_PATH_FIELDNAME,
416
+ ) -> list[BaseFile]:
417
+ r"""Rolls up Data objects into corresponding BaseFile objects in order given by `base_files`.
418
+
419
+ Args:
420
+ base_files (list[BaseFile]): The original BaseFile objects.
421
+ data_list (list[Data | None]): The list of data to be aggregated into the BaseFile objects.
422
+ path_field (str): The field name on the data_list objects that holds the file path as a string.
423
+
424
+ Returns:
425
+ list[BaseFile]: A new list of BaseFile objects with merged `data` attributes.
426
+ """
427
+
428
+ def _build_data_dict(data_list: list[Data | None], data_list_field: str) -> dict[str, list[Data]]:
429
+ """Builds a dictionary grouping Data objects by a specified field."""
430
+ data_dict: dict[str, list[Data]] = {}
431
+ for data in data_list:
432
+ if data is None:
433
+ continue
434
+ key = data.data.get(data_list_field)
435
+ if key is None:
436
+ msg = f"Data object missing required field '{data_list_field}': {data}"
437
+ self.log(msg)
438
+ if not self.silent_errors:
439
+ msg = f"Data object missing required field '{data_list_field}': {data}"
440
+ self.log(msg)
441
+ raise ValueError(msg)
442
+ continue
443
+ data_dict.setdefault(key, []).append(data)
444
+ return data_dict
445
+
446
+ # Build the data dictionary from the provided data_list
447
+ data_dict = _build_data_dict(data_list, path_field)
448
+
449
+ # Generate the updated list of BaseFile objects, preserving the order of base_files
450
+ updated_base_files = []
451
+ for base_file in base_files:
452
+ new_data_list = data_dict.get(str(base_file.path), [])
453
+ merged_data_list = base_file.merge_data(new_data_list)
454
+ updated_base_files.append(
455
+ BaseFileComponent.BaseFile(
456
+ data=merged_data_list,
457
+ path=base_file.path,
458
+ delete_after_processing=base_file.delete_after_processing,
459
+ )
460
+ )
461
+
462
+ return updated_base_files
463
+
464
+ def _file_path_as_list(self) -> list[Data]:
465
+ file_path = self.file_path
466
+ if not file_path:
467
+ return []
468
+
469
+ def _message_to_data(message: Message) -> Data:
470
+ return Data(**{self.SERVER_FILE_PATH_FIELDNAME: message.text})
471
+
472
+ if isinstance(file_path, Data):
473
+ file_path = [file_path]
474
+ elif isinstance(file_path, Message):
475
+ file_path = [_message_to_data(file_path)]
476
+ elif not isinstance(file_path, list):
477
+ msg = f"Expected list of Data objects in file_path but got {type(file_path)}."
478
+ self.log(msg)
479
+ if not self.silent_errors:
480
+ raise ValueError(msg)
481
+ return []
482
+
483
+ file_paths = []
484
+ for obj in file_path:
485
+ data_obj = _message_to_data(obj) if isinstance(obj, Message) else obj
486
+
487
+ if not isinstance(data_obj, Data):
488
+ msg = f"Expected Data object in file_path but got {type(data_obj)}."
489
+ self.log(msg)
490
+ if not self.silent_errors:
491
+ raise ValueError(msg)
492
+ continue
493
+ file_paths.append(data_obj)
494
+
495
+ return file_paths
496
+
497
+ def _validate_and_resolve_paths(self) -> list[BaseFile]:
498
+ """Validate that all input paths exist and are valid, and create BaseFile instances.
499
+
500
+ Returns:
501
+ list[BaseFile]: A list of valid BaseFile instances.
502
+
503
+ Raises:
504
+ ValueError: If any path does not exist.
505
+ """
506
+ resolved_files = []
507
+
508
+ def add_file(data: Data, path: str | Path, *, delete_after_processing: bool):
509
+ resolved_path = Path(self.resolve_path(str(path)))
510
+
511
+ if not resolved_path.exists():
512
+ msg = f"File or directory not found: {path}"
513
+ self.log(msg)
514
+ if not self.silent_errors:
515
+ raise ValueError(msg)
516
+ resolved_files.append(
517
+ BaseFileComponent.BaseFile(data, resolved_path, delete_after_processing=delete_after_processing)
518
+ )
519
+
520
+ file_path = self._file_path_as_list()
521
+
522
+ if self.path and not file_path:
523
+ # Wrap self.path into a Data object
524
+ if isinstance(self.path, list):
525
+ for path in self.path:
526
+ data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: path})
527
+ add_file(data=data_obj, path=path, delete_after_processing=False)
528
+ else:
529
+ data_obj = Data(data={self.SERVER_FILE_PATH_FIELDNAME: self.path})
530
+ add_file(data=data_obj, path=self.path, delete_after_processing=False)
531
+ elif file_path:
532
+ for obj in file_path:
533
+ server_file_path = obj.data.get(self.SERVER_FILE_PATH_FIELDNAME)
534
+ if server_file_path:
535
+ add_file(
536
+ data=obj,
537
+ path=server_file_path,
538
+ delete_after_processing=self.delete_server_file_after_processing,
539
+ )
540
+ elif not self.ignore_unspecified_files:
541
+ msg = f"Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property."
542
+ self.log(msg)
543
+ if not self.silent_errors:
544
+ raise ValueError(msg)
545
+ else:
546
+ msg = f"Ignoring Data object missing '{self.SERVER_FILE_PATH_FIELDNAME}' property:\n{obj}"
547
+ self.log(msg)
548
+
549
+ return resolved_files
550
+
551
+ def _unpack_and_collect_files(self, files: list[BaseFile]) -> list[BaseFile]:
552
+ """Recursively unpack bundles and collect files into BaseFile instances.
553
+
554
+ Args:
555
+ files (list[BaseFile]): List of BaseFile instances to process.
556
+
557
+ Returns:
558
+ list[BaseFile]: Updated list of BaseFile instances.
559
+ """
560
+ collected_files = []
561
+
562
+ for file in files:
563
+ path = file.path
564
+ delete_after_processing = file.delete_after_processing
565
+ data = file.data
566
+
567
+ if path.is_dir():
568
+ # Recurse into directories
569
+ collected_files.extend(
570
+ [
571
+ BaseFileComponent.BaseFile(
572
+ data,
573
+ sub_path,
574
+ delete_after_processing=delete_after_processing,
575
+ )
576
+ for sub_path in path.rglob("*")
577
+ if sub_path.is_file()
578
+ ]
579
+ )
580
+ elif path.suffix[1:] in self.SUPPORTED_BUNDLE_EXTENSIONS:
581
+ # Unpack supported bundles
582
+ temp_dir = TemporaryDirectory()
583
+ self._temp_dirs.append(temp_dir)
584
+ temp_dir_path = Path(temp_dir.name)
585
+ self._unpack_bundle(path, temp_dir_path)
586
+ subpaths = list(temp_dir_path.iterdir())
587
+ self.log(f"Unpacked bundle {path.name} into {subpaths}")
588
+ collected_files.extend(
589
+ [
590
+ BaseFileComponent.BaseFile(
591
+ data,
592
+ sub_path,
593
+ delete_after_processing=delete_after_processing,
594
+ )
595
+ for sub_path in subpaths
596
+ ]
597
+ )
598
+ else:
599
+ collected_files.append(file)
600
+
601
+ # Recurse again if any directories or bundles are left in the list
602
+ if any(
603
+ file.path.is_dir() or file.path.suffix[1:] in self.SUPPORTED_BUNDLE_EXTENSIONS for file in collected_files
604
+ ):
605
+ return self._unpack_and_collect_files(collected_files)
606
+
607
+ return collected_files
608
+
609
+ def _unpack_bundle(self, bundle_path: Path, output_dir: Path):
610
+ """Unpack a bundle into a temporary directory.
611
+
612
+ Args:
613
+ bundle_path (Path): Path to the bundle.
614
+ output_dir (Path): Directory where files will be extracted.
615
+
616
+ Raises:
617
+ ValueError: If the bundle format is unsupported or cannot be read.
618
+ """
619
+
620
+ def _safe_extract_zip(bundle: ZipFile, output_dir: Path):
621
+ """Safely extract ZIP files."""
622
+ for member in bundle.namelist():
623
+ member_path = output_dir / member
624
+ # Ensure no path traversal outside `output_dir`
625
+ if not member_path.resolve().is_relative_to(output_dir.resolve()):
626
+ msg = f"Attempted Path Traversal in ZIP File: {member}"
627
+ raise ValueError(msg)
628
+ bundle.extract(member, path=output_dir)
629
+
630
+ def _safe_extract_tar(bundle: tarfile.TarFile, output_dir: Path):
631
+ """Safely extract TAR files."""
632
+ for member in bundle.getmembers():
633
+ member_path = output_dir / member.name
634
+ # Ensure no path traversal outside `output_dir`
635
+ if not member_path.resolve().is_relative_to(output_dir.resolve()):
636
+ msg = f"Attempted Path Traversal in TAR File: {member.name}"
637
+ raise ValueError(msg)
638
+ bundle.extract(member, path=output_dir)
639
+
640
+ # Check and extract based on file type
641
+ if is_zipfile(bundle_path):
642
+ with ZipFile(bundle_path, "r") as zip_bundle:
643
+ _safe_extract_zip(zip_bundle, output_dir)
644
+ elif tarfile.is_tarfile(bundle_path):
645
+ with tarfile.open(bundle_path, "r:*") as tar_bundle:
646
+ _safe_extract_tar(tar_bundle, output_dir)
647
+ else:
648
+ msg = f"Unsupported bundle format: {bundle_path.suffix}"
649
+ raise ValueError(msg)
650
+
651
+ def _filter_and_mark_files(self, files: list[BaseFile]) -> list[BaseFile]:
652
+ """Validate file types and mark files for removal.
653
+
654
+ Args:
655
+ files (list[BaseFile]): List of BaseFile instances.
656
+
657
+ Returns:
658
+ list[BaseFile]: Validated BaseFile instances.
659
+
660
+ Raises:
661
+ ValueError: If unsupported files are encountered and `ignore_unsupported_extensions` is False.
662
+ """
663
+ final_files = []
664
+ ignored_files = []
665
+
666
+ for file in files:
667
+ if not file.path.is_file():
668
+ self.log(f"Not a file: {file.path.name}")
669
+ continue
670
+
671
+ if file.path.suffix[1:].lower() not in self.valid_extensions:
672
+ if self.ignore_unsupported_extensions:
673
+ ignored_files.append(file.path.name)
674
+ continue
675
+ msg = f"Unsupported file extension: {file.path.suffix}"
676
+ self.log(msg)
677
+ if not self.silent_errors:
678
+ raise ValueError(msg)
679
+
680
+ final_files.append(file)
681
+
682
+ if ignored_files:
683
+ self.log(f"Ignored files: {ignored_files}")
684
+
685
+ return final_files