lfx-nightly 0.1.11.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (699) hide show
  1. lfx/__init__.py +0 -0
  2. lfx/__main__.py +25 -0
  3. lfx/base/__init__.py +0 -0
  4. lfx/base/agents/__init__.py +0 -0
  5. lfx/base/agents/agent.py +268 -0
  6. lfx/base/agents/callback.py +130 -0
  7. lfx/base/agents/context.py +109 -0
  8. lfx/base/agents/crewai/__init__.py +0 -0
  9. lfx/base/agents/crewai/crew.py +231 -0
  10. lfx/base/agents/crewai/tasks.py +12 -0
  11. lfx/base/agents/default_prompts.py +23 -0
  12. lfx/base/agents/errors.py +15 -0
  13. lfx/base/agents/events.py +346 -0
  14. lfx/base/agents/utils.py +205 -0
  15. lfx/base/astra_assistants/__init__.py +0 -0
  16. lfx/base/astra_assistants/util.py +171 -0
  17. lfx/base/chains/__init__.py +0 -0
  18. lfx/base/chains/model.py +19 -0
  19. lfx/base/composio/__init__.py +0 -0
  20. lfx/base/composio/composio_base.py +1291 -0
  21. lfx/base/compressors/__init__.py +0 -0
  22. lfx/base/compressors/model.py +60 -0
  23. lfx/base/constants.py +46 -0
  24. lfx/base/curl/__init__.py +0 -0
  25. lfx/base/curl/parse.py +188 -0
  26. lfx/base/data/__init__.py +5 -0
  27. lfx/base/data/base_file.py +685 -0
  28. lfx/base/data/docling_utils.py +245 -0
  29. lfx/base/data/utils.py +198 -0
  30. lfx/base/document_transformers/__init__.py +0 -0
  31. lfx/base/document_transformers/model.py +43 -0
  32. lfx/base/embeddings/__init__.py +0 -0
  33. lfx/base/embeddings/aiml_embeddings.py +62 -0
  34. lfx/base/embeddings/model.py +26 -0
  35. lfx/base/flow_processing/__init__.py +0 -0
  36. lfx/base/flow_processing/utils.py +86 -0
  37. lfx/base/huggingface/__init__.py +0 -0
  38. lfx/base/huggingface/model_bridge.py +133 -0
  39. lfx/base/io/__init__.py +0 -0
  40. lfx/base/io/chat.py +20 -0
  41. lfx/base/io/text.py +22 -0
  42. lfx/base/langchain_utilities/__init__.py +0 -0
  43. lfx/base/langchain_utilities/model.py +35 -0
  44. lfx/base/langchain_utilities/spider_constants.py +1 -0
  45. lfx/base/langwatch/__init__.py +0 -0
  46. lfx/base/langwatch/utils.py +18 -0
  47. lfx/base/mcp/__init__.py +0 -0
  48. lfx/base/mcp/constants.py +2 -0
  49. lfx/base/mcp/util.py +1398 -0
  50. lfx/base/memory/__init__.py +0 -0
  51. lfx/base/memory/memory.py +49 -0
  52. lfx/base/memory/model.py +38 -0
  53. lfx/base/models/__init__.py +3 -0
  54. lfx/base/models/aiml_constants.py +51 -0
  55. lfx/base/models/anthropic_constants.py +47 -0
  56. lfx/base/models/aws_constants.py +151 -0
  57. lfx/base/models/chat_result.py +76 -0
  58. lfx/base/models/google_generative_ai_constants.py +70 -0
  59. lfx/base/models/groq_constants.py +134 -0
  60. lfx/base/models/model.py +375 -0
  61. lfx/base/models/model_input_constants.py +307 -0
  62. lfx/base/models/model_metadata.py +41 -0
  63. lfx/base/models/model_utils.py +8 -0
  64. lfx/base/models/novita_constants.py +35 -0
  65. lfx/base/models/ollama_constants.py +49 -0
  66. lfx/base/models/openai_constants.py +122 -0
  67. lfx/base/models/sambanova_constants.py +18 -0
  68. lfx/base/processing/__init__.py +0 -0
  69. lfx/base/prompts/__init__.py +0 -0
  70. lfx/base/prompts/api_utils.py +224 -0
  71. lfx/base/prompts/utils.py +61 -0
  72. lfx/base/textsplitters/__init__.py +0 -0
  73. lfx/base/textsplitters/model.py +28 -0
  74. lfx/base/tools/__init__.py +0 -0
  75. lfx/base/tools/base.py +26 -0
  76. lfx/base/tools/component_tool.py +325 -0
  77. lfx/base/tools/constants.py +49 -0
  78. lfx/base/tools/flow_tool.py +132 -0
  79. lfx/base/tools/run_flow.py +224 -0
  80. lfx/base/vectorstores/__init__.py +0 -0
  81. lfx/base/vectorstores/model.py +193 -0
  82. lfx/base/vectorstores/utils.py +22 -0
  83. lfx/base/vectorstores/vector_store_connection_decorator.py +52 -0
  84. lfx/cli/__init__.py +5 -0
  85. lfx/cli/commands.py +319 -0
  86. lfx/cli/common.py +650 -0
  87. lfx/cli/run.py +441 -0
  88. lfx/cli/script_loader.py +247 -0
  89. lfx/cli/serve_app.py +546 -0
  90. lfx/cli/validation.py +69 -0
  91. lfx/components/FAISS/__init__.py +34 -0
  92. lfx/components/FAISS/faiss.py +111 -0
  93. lfx/components/Notion/__init__.py +19 -0
  94. lfx/components/Notion/add_content_to_page.py +269 -0
  95. lfx/components/Notion/create_page.py +94 -0
  96. lfx/components/Notion/list_database_properties.py +68 -0
  97. lfx/components/Notion/list_pages.py +122 -0
  98. lfx/components/Notion/list_users.py +77 -0
  99. lfx/components/Notion/page_content_viewer.py +93 -0
  100. lfx/components/Notion/search.py +111 -0
  101. lfx/components/Notion/update_page_property.py +114 -0
  102. lfx/components/__init__.py +411 -0
  103. lfx/components/_importing.py +42 -0
  104. lfx/components/agentql/__init__.py +3 -0
  105. lfx/components/agentql/agentql_api.py +151 -0
  106. lfx/components/agents/__init__.py +34 -0
  107. lfx/components/agents/agent.py +558 -0
  108. lfx/components/agents/mcp_component.py +501 -0
  109. lfx/components/aiml/__init__.py +37 -0
  110. lfx/components/aiml/aiml.py +112 -0
  111. lfx/components/aiml/aiml_embeddings.py +37 -0
  112. lfx/components/amazon/__init__.py +36 -0
  113. lfx/components/amazon/amazon_bedrock_embedding.py +109 -0
  114. lfx/components/amazon/amazon_bedrock_model.py +124 -0
  115. lfx/components/amazon/s3_bucket_uploader.py +211 -0
  116. lfx/components/anthropic/__init__.py +34 -0
  117. lfx/components/anthropic/anthropic.py +187 -0
  118. lfx/components/apify/__init__.py +5 -0
  119. lfx/components/apify/apify_actor.py +325 -0
  120. lfx/components/arxiv/__init__.py +3 -0
  121. lfx/components/arxiv/arxiv.py +163 -0
  122. lfx/components/assemblyai/__init__.py +46 -0
  123. lfx/components/assemblyai/assemblyai_get_subtitles.py +83 -0
  124. lfx/components/assemblyai/assemblyai_lemur.py +183 -0
  125. lfx/components/assemblyai/assemblyai_list_transcripts.py +95 -0
  126. lfx/components/assemblyai/assemblyai_poll_transcript.py +72 -0
  127. lfx/components/assemblyai/assemblyai_start_transcript.py +188 -0
  128. lfx/components/azure/__init__.py +37 -0
  129. lfx/components/azure/azure_openai.py +95 -0
  130. lfx/components/azure/azure_openai_embeddings.py +83 -0
  131. lfx/components/baidu/__init__.py +32 -0
  132. lfx/components/baidu/baidu_qianfan_chat.py +113 -0
  133. lfx/components/bing/__init__.py +3 -0
  134. lfx/components/bing/bing_search_api.py +61 -0
  135. lfx/components/cassandra/__init__.py +40 -0
  136. lfx/components/cassandra/cassandra.py +264 -0
  137. lfx/components/cassandra/cassandra_chat.py +92 -0
  138. lfx/components/cassandra/cassandra_graph.py +238 -0
  139. lfx/components/chains/__init__.py +3 -0
  140. lfx/components/chroma/__init__.py +34 -0
  141. lfx/components/chroma/chroma.py +167 -0
  142. lfx/components/cleanlab/__init__.py +40 -0
  143. lfx/components/cleanlab/cleanlab_evaluator.py +155 -0
  144. lfx/components/cleanlab/cleanlab_rag_evaluator.py +254 -0
  145. lfx/components/cleanlab/cleanlab_remediator.py +131 -0
  146. lfx/components/clickhouse/__init__.py +34 -0
  147. lfx/components/clickhouse/clickhouse.py +135 -0
  148. lfx/components/cloudflare/__init__.py +32 -0
  149. lfx/components/cloudflare/cloudflare.py +81 -0
  150. lfx/components/cohere/__init__.py +40 -0
  151. lfx/components/cohere/cohere_embeddings.py +81 -0
  152. lfx/components/cohere/cohere_models.py +46 -0
  153. lfx/components/cohere/cohere_rerank.py +51 -0
  154. lfx/components/composio/__init__.py +74 -0
  155. lfx/components/composio/composio_api.py +268 -0
  156. lfx/components/composio/dropbox_compnent.py +11 -0
  157. lfx/components/composio/github_composio.py +11 -0
  158. lfx/components/composio/gmail_composio.py +38 -0
  159. lfx/components/composio/googlecalendar_composio.py +11 -0
  160. lfx/components/composio/googlemeet_composio.py +11 -0
  161. lfx/components/composio/googletasks_composio.py +8 -0
  162. lfx/components/composio/linear_composio.py +11 -0
  163. lfx/components/composio/outlook_composio.py +11 -0
  164. lfx/components/composio/reddit_composio.py +11 -0
  165. lfx/components/composio/slack_composio.py +582 -0
  166. lfx/components/composio/slackbot_composio.py +11 -0
  167. lfx/components/composio/supabase_composio.py +11 -0
  168. lfx/components/composio/todoist_composio.py +11 -0
  169. lfx/components/composio/youtube_composio.py +11 -0
  170. lfx/components/confluence/__init__.py +3 -0
  171. lfx/components/confluence/confluence.py +84 -0
  172. lfx/components/couchbase/__init__.py +34 -0
  173. lfx/components/couchbase/couchbase.py +102 -0
  174. lfx/components/crewai/__init__.py +49 -0
  175. lfx/components/crewai/crewai.py +107 -0
  176. lfx/components/crewai/hierarchical_crew.py +46 -0
  177. lfx/components/crewai/hierarchical_task.py +44 -0
  178. lfx/components/crewai/sequential_crew.py +52 -0
  179. lfx/components/crewai/sequential_task.py +73 -0
  180. lfx/components/crewai/sequential_task_agent.py +143 -0
  181. lfx/components/custom_component/__init__.py +34 -0
  182. lfx/components/custom_component/custom_component.py +31 -0
  183. lfx/components/data/__init__.py +64 -0
  184. lfx/components/data/api_request.py +544 -0
  185. lfx/components/data/csv_to_data.py +95 -0
  186. lfx/components/data/directory.py +113 -0
  187. lfx/components/data/file.py +577 -0
  188. lfx/components/data/json_to_data.py +98 -0
  189. lfx/components/data/news_search.py +164 -0
  190. lfx/components/data/rss.py +69 -0
  191. lfx/components/data/sql_executor.py +101 -0
  192. lfx/components/data/url.py +311 -0
  193. lfx/components/data/web_search.py +112 -0
  194. lfx/components/data/webhook.py +56 -0
  195. lfx/components/datastax/__init__.py +70 -0
  196. lfx/components/datastax/astra_assistant_manager.py +306 -0
  197. lfx/components/datastax/astra_db.py +75 -0
  198. lfx/components/datastax/astra_vectorize.py +124 -0
  199. lfx/components/datastax/astradb.py +1285 -0
  200. lfx/components/datastax/astradb_cql.py +314 -0
  201. lfx/components/datastax/astradb_graph.py +330 -0
  202. lfx/components/datastax/astradb_tool.py +414 -0
  203. lfx/components/datastax/astradb_vectorstore.py +1285 -0
  204. lfx/components/datastax/cassandra.py +92 -0
  205. lfx/components/datastax/create_assistant.py +58 -0
  206. lfx/components/datastax/create_thread.py +32 -0
  207. lfx/components/datastax/dotenv.py +35 -0
  208. lfx/components/datastax/get_assistant.py +37 -0
  209. lfx/components/datastax/getenvvar.py +30 -0
  210. lfx/components/datastax/graph_rag.py +141 -0
  211. lfx/components/datastax/hcd.py +314 -0
  212. lfx/components/datastax/list_assistants.py +25 -0
  213. lfx/components/datastax/run.py +89 -0
  214. lfx/components/deactivated/__init__.py +15 -0
  215. lfx/components/deactivated/amazon_kendra.py +66 -0
  216. lfx/components/deactivated/chat_litellm_model.py +158 -0
  217. lfx/components/deactivated/code_block_extractor.py +26 -0
  218. lfx/components/deactivated/documents_to_data.py +22 -0
  219. lfx/components/deactivated/embed.py +16 -0
  220. lfx/components/deactivated/extract_key_from_data.py +46 -0
  221. lfx/components/deactivated/json_document_builder.py +57 -0
  222. lfx/components/deactivated/list_flows.py +20 -0
  223. lfx/components/deactivated/mcp_sse.py +61 -0
  224. lfx/components/deactivated/mcp_stdio.py +62 -0
  225. lfx/components/deactivated/merge_data.py +93 -0
  226. lfx/components/deactivated/message.py +37 -0
  227. lfx/components/deactivated/metal.py +54 -0
  228. lfx/components/deactivated/multi_query.py +59 -0
  229. lfx/components/deactivated/retriever.py +43 -0
  230. lfx/components/deactivated/selective_passthrough.py +77 -0
  231. lfx/components/deactivated/should_run_next.py +40 -0
  232. lfx/components/deactivated/split_text.py +63 -0
  233. lfx/components/deactivated/store_message.py +24 -0
  234. lfx/components/deactivated/sub_flow.py +124 -0
  235. lfx/components/deactivated/vectara_self_query.py +76 -0
  236. lfx/components/deactivated/vector_store.py +24 -0
  237. lfx/components/deepseek/__init__.py +34 -0
  238. lfx/components/deepseek/deepseek.py +136 -0
  239. lfx/components/docling/__init__.py +43 -0
  240. lfx/components/docling/chunk_docling_document.py +186 -0
  241. lfx/components/docling/docling_inline.py +231 -0
  242. lfx/components/docling/docling_remote.py +193 -0
  243. lfx/components/docling/export_docling_document.py +117 -0
  244. lfx/components/documentloaders/__init__.py +3 -0
  245. lfx/components/duckduckgo/__init__.py +3 -0
  246. lfx/components/duckduckgo/duck_duck_go_search_run.py +92 -0
  247. lfx/components/elastic/__init__.py +37 -0
  248. lfx/components/elastic/elasticsearch.py +267 -0
  249. lfx/components/elastic/opensearch.py +243 -0
  250. lfx/components/embeddings/__init__.py +37 -0
  251. lfx/components/embeddings/similarity.py +76 -0
  252. lfx/components/embeddings/text_embedder.py +64 -0
  253. lfx/components/exa/__init__.py +3 -0
  254. lfx/components/exa/exa_search.py +68 -0
  255. lfx/components/firecrawl/__init__.py +43 -0
  256. lfx/components/firecrawl/firecrawl_crawl_api.py +88 -0
  257. lfx/components/firecrawl/firecrawl_extract_api.py +136 -0
  258. lfx/components/firecrawl/firecrawl_map_api.py +89 -0
  259. lfx/components/firecrawl/firecrawl_scrape_api.py +73 -0
  260. lfx/components/git/__init__.py +4 -0
  261. lfx/components/git/git.py +262 -0
  262. lfx/components/git/gitextractor.py +196 -0
  263. lfx/components/glean/__init__.py +3 -0
  264. lfx/components/glean/glean_search_api.py +173 -0
  265. lfx/components/google/__init__.py +17 -0
  266. lfx/components/google/gmail.py +192 -0
  267. lfx/components/google/google_bq_sql_executor.py +157 -0
  268. lfx/components/google/google_drive.py +92 -0
  269. lfx/components/google/google_drive_search.py +152 -0
  270. lfx/components/google/google_generative_ai.py +147 -0
  271. lfx/components/google/google_generative_ai_embeddings.py +141 -0
  272. lfx/components/google/google_oauth_token.py +89 -0
  273. lfx/components/google/google_search_api_core.py +68 -0
  274. lfx/components/google/google_serper_api_core.py +74 -0
  275. lfx/components/groq/__init__.py +34 -0
  276. lfx/components/groq/groq.py +136 -0
  277. lfx/components/helpers/__init__.py +52 -0
  278. lfx/components/helpers/calculator_core.py +89 -0
  279. lfx/components/helpers/create_list.py +40 -0
  280. lfx/components/helpers/current_date.py +42 -0
  281. lfx/components/helpers/id_generator.py +42 -0
  282. lfx/components/helpers/memory.py +251 -0
  283. lfx/components/helpers/output_parser.py +45 -0
  284. lfx/components/helpers/store_message.py +90 -0
  285. lfx/components/homeassistant/__init__.py +7 -0
  286. lfx/components/homeassistant/home_assistant_control.py +152 -0
  287. lfx/components/homeassistant/list_home_assistant_states.py +137 -0
  288. lfx/components/huggingface/__init__.py +37 -0
  289. lfx/components/huggingface/huggingface.py +197 -0
  290. lfx/components/huggingface/huggingface_inference_api.py +106 -0
  291. lfx/components/ibm/__init__.py +34 -0
  292. lfx/components/ibm/watsonx.py +203 -0
  293. lfx/components/ibm/watsonx_embeddings.py +135 -0
  294. lfx/components/icosacomputing/__init__.py +5 -0
  295. lfx/components/icosacomputing/combinatorial_reasoner.py +84 -0
  296. lfx/components/input_output/__init__.py +38 -0
  297. lfx/components/input_output/chat.py +120 -0
  298. lfx/components/input_output/chat_output.py +200 -0
  299. lfx/components/input_output/text.py +27 -0
  300. lfx/components/input_output/text_output.py +29 -0
  301. lfx/components/jigsawstack/__init__.py +23 -0
  302. lfx/components/jigsawstack/ai_scrape.py +126 -0
  303. lfx/components/jigsawstack/ai_web_search.py +136 -0
  304. lfx/components/jigsawstack/file_read.py +115 -0
  305. lfx/components/jigsawstack/file_upload.py +94 -0
  306. lfx/components/jigsawstack/image_generation.py +205 -0
  307. lfx/components/jigsawstack/nsfw.py +60 -0
  308. lfx/components/jigsawstack/object_detection.py +124 -0
  309. lfx/components/jigsawstack/sentiment.py +112 -0
  310. lfx/components/jigsawstack/text_to_sql.py +90 -0
  311. lfx/components/jigsawstack/text_translate.py +77 -0
  312. lfx/components/jigsawstack/vocr.py +107 -0
  313. lfx/components/langchain_utilities/__init__.py +109 -0
  314. lfx/components/langchain_utilities/character.py +53 -0
  315. lfx/components/langchain_utilities/conversation.py +59 -0
  316. lfx/components/langchain_utilities/csv_agent.py +107 -0
  317. lfx/components/langchain_utilities/fake_embeddings.py +26 -0
  318. lfx/components/langchain_utilities/html_link_extractor.py +35 -0
  319. lfx/components/langchain_utilities/json_agent.py +45 -0
  320. lfx/components/langchain_utilities/langchain_hub.py +126 -0
  321. lfx/components/langchain_utilities/language_recursive.py +49 -0
  322. lfx/components/langchain_utilities/language_semantic.py +138 -0
  323. lfx/components/langchain_utilities/llm_checker.py +39 -0
  324. lfx/components/langchain_utilities/llm_math.py +42 -0
  325. lfx/components/langchain_utilities/natural_language.py +61 -0
  326. lfx/components/langchain_utilities/openai_tools.py +53 -0
  327. lfx/components/langchain_utilities/openapi.py +48 -0
  328. lfx/components/langchain_utilities/recursive_character.py +60 -0
  329. lfx/components/langchain_utilities/retrieval_qa.py +83 -0
  330. lfx/components/langchain_utilities/runnable_executor.py +137 -0
  331. lfx/components/langchain_utilities/self_query.py +80 -0
  332. lfx/components/langchain_utilities/spider.py +142 -0
  333. lfx/components/langchain_utilities/sql.py +40 -0
  334. lfx/components/langchain_utilities/sql_database.py +35 -0
  335. lfx/components/langchain_utilities/sql_generator.py +78 -0
  336. lfx/components/langchain_utilities/tool_calling.py +59 -0
  337. lfx/components/langchain_utilities/vector_store_info.py +49 -0
  338. lfx/components/langchain_utilities/vector_store_router.py +33 -0
  339. lfx/components/langchain_utilities/xml_agent.py +71 -0
  340. lfx/components/langwatch/__init__.py +3 -0
  341. lfx/components/langwatch/langwatch.py +278 -0
  342. lfx/components/link_extractors/__init__.py +3 -0
  343. lfx/components/lmstudio/__init__.py +34 -0
  344. lfx/components/lmstudio/lmstudioembeddings.py +89 -0
  345. lfx/components/lmstudio/lmstudiomodel.py +129 -0
  346. lfx/components/logic/__init__.py +52 -0
  347. lfx/components/logic/conditional_router.py +171 -0
  348. lfx/components/logic/data_conditional_router.py +125 -0
  349. lfx/components/logic/flow_tool.py +110 -0
  350. lfx/components/logic/listen.py +29 -0
  351. lfx/components/logic/loop.py +125 -0
  352. lfx/components/logic/notify.py +88 -0
  353. lfx/components/logic/pass_message.py +35 -0
  354. lfx/components/logic/run_flow.py +71 -0
  355. lfx/components/logic/sub_flow.py +114 -0
  356. lfx/components/maritalk/__init__.py +32 -0
  357. lfx/components/maritalk/maritalk.py +52 -0
  358. lfx/components/mem0/__init__.py +3 -0
  359. lfx/components/mem0/mem0_chat_memory.py +136 -0
  360. lfx/components/milvus/__init__.py +34 -0
  361. lfx/components/milvus/milvus.py +115 -0
  362. lfx/components/mistral/__init__.py +37 -0
  363. lfx/components/mistral/mistral.py +114 -0
  364. lfx/components/mistral/mistral_embeddings.py +58 -0
  365. lfx/components/models/__init__.py +34 -0
  366. lfx/components/models/embedding_model.py +114 -0
  367. lfx/components/models/language_model.py +144 -0
  368. lfx/components/mongodb/__init__.py +34 -0
  369. lfx/components/mongodb/mongodb_atlas.py +213 -0
  370. lfx/components/needle/__init__.py +3 -0
  371. lfx/components/needle/needle.py +104 -0
  372. lfx/components/notdiamond/__init__.py +34 -0
  373. lfx/components/notdiamond/notdiamond.py +228 -0
  374. lfx/components/novita/__init__.py +32 -0
  375. lfx/components/novita/novita.py +130 -0
  376. lfx/components/nvidia/__init__.py +57 -0
  377. lfx/components/nvidia/nvidia.py +157 -0
  378. lfx/components/nvidia/nvidia_embedding.py +77 -0
  379. lfx/components/nvidia/nvidia_ingest.py +317 -0
  380. lfx/components/nvidia/nvidia_rerank.py +63 -0
  381. lfx/components/nvidia/system_assist.py +65 -0
  382. lfx/components/olivya/__init__.py +3 -0
  383. lfx/components/olivya/olivya.py +116 -0
  384. lfx/components/ollama/__init__.py +37 -0
  385. lfx/components/ollama/ollama.py +330 -0
  386. lfx/components/ollama/ollama_embeddings.py +106 -0
  387. lfx/components/openai/__init__.py +37 -0
  388. lfx/components/openai/openai.py +100 -0
  389. lfx/components/openai/openai_chat_model.py +176 -0
  390. lfx/components/openrouter/__init__.py +32 -0
  391. lfx/components/openrouter/openrouter.py +202 -0
  392. lfx/components/output_parsers/__init__.py +3 -0
  393. lfx/components/perplexity/__init__.py +34 -0
  394. lfx/components/perplexity/perplexity.py +75 -0
  395. lfx/components/pgvector/__init__.py +34 -0
  396. lfx/components/pgvector/pgvector.py +72 -0
  397. lfx/components/pinecone/__init__.py +34 -0
  398. lfx/components/pinecone/pinecone.py +134 -0
  399. lfx/components/processing/__init__.py +117 -0
  400. lfx/components/processing/alter_metadata.py +108 -0
  401. lfx/components/processing/batch_run.py +205 -0
  402. lfx/components/processing/combine_text.py +39 -0
  403. lfx/components/processing/converter.py +159 -0
  404. lfx/components/processing/create_data.py +110 -0
  405. lfx/components/processing/data_operations.py +438 -0
  406. lfx/components/processing/data_to_dataframe.py +70 -0
  407. lfx/components/processing/dataframe_operations.py +313 -0
  408. lfx/components/processing/extract_key.py +53 -0
  409. lfx/components/processing/filter_data.py +42 -0
  410. lfx/components/processing/filter_data_values.py +88 -0
  411. lfx/components/processing/json_cleaner.py +103 -0
  412. lfx/components/processing/lambda_filter.py +154 -0
  413. lfx/components/processing/llm_router.py +499 -0
  414. lfx/components/processing/merge_data.py +90 -0
  415. lfx/components/processing/message_to_data.py +36 -0
  416. lfx/components/processing/parse_data.py +70 -0
  417. lfx/components/processing/parse_dataframe.py +68 -0
  418. lfx/components/processing/parse_json_data.py +90 -0
  419. lfx/components/processing/parser.py +143 -0
  420. lfx/components/processing/prompt.py +67 -0
  421. lfx/components/processing/python_repl_core.py +98 -0
  422. lfx/components/processing/regex.py +82 -0
  423. lfx/components/processing/save_file.py +225 -0
  424. lfx/components/processing/select_data.py +48 -0
  425. lfx/components/processing/split_text.py +141 -0
  426. lfx/components/processing/structured_output.py +202 -0
  427. lfx/components/processing/update_data.py +160 -0
  428. lfx/components/prototypes/__init__.py +34 -0
  429. lfx/components/prototypes/python_function.py +73 -0
  430. lfx/components/qdrant/__init__.py +34 -0
  431. lfx/components/qdrant/qdrant.py +109 -0
  432. lfx/components/redis/__init__.py +37 -0
  433. lfx/components/redis/redis.py +89 -0
  434. lfx/components/redis/redis_chat.py +43 -0
  435. lfx/components/sambanova/__init__.py +32 -0
  436. lfx/components/sambanova/sambanova.py +84 -0
  437. lfx/components/scrapegraph/__init__.py +40 -0
  438. lfx/components/scrapegraph/scrapegraph_markdownify_api.py +64 -0
  439. lfx/components/scrapegraph/scrapegraph_search_api.py +64 -0
  440. lfx/components/scrapegraph/scrapegraph_smart_scraper_api.py +71 -0
  441. lfx/components/searchapi/__init__.py +34 -0
  442. lfx/components/searchapi/search.py +79 -0
  443. lfx/components/serpapi/__init__.py +3 -0
  444. lfx/components/serpapi/serp.py +115 -0
  445. lfx/components/supabase/__init__.py +34 -0
  446. lfx/components/supabase/supabase.py +76 -0
  447. lfx/components/tavily/__init__.py +4 -0
  448. lfx/components/tavily/tavily_extract.py +117 -0
  449. lfx/components/tavily/tavily_search.py +212 -0
  450. lfx/components/textsplitters/__init__.py +3 -0
  451. lfx/components/toolkits/__init__.py +3 -0
  452. lfx/components/tools/__init__.py +72 -0
  453. lfx/components/tools/calculator.py +108 -0
  454. lfx/components/tools/google_search_api.py +45 -0
  455. lfx/components/tools/google_serper_api.py +115 -0
  456. lfx/components/tools/python_code_structured_tool.py +327 -0
  457. lfx/components/tools/python_repl.py +97 -0
  458. lfx/components/tools/search_api.py +87 -0
  459. lfx/components/tools/searxng.py +145 -0
  460. lfx/components/tools/serp_api.py +119 -0
  461. lfx/components/tools/tavily_search_tool.py +344 -0
  462. lfx/components/tools/wikidata_api.py +102 -0
  463. lfx/components/tools/wikipedia_api.py +49 -0
  464. lfx/components/tools/yahoo_finance.py +129 -0
  465. lfx/components/twelvelabs/__init__.py +52 -0
  466. lfx/components/twelvelabs/convert_astra_results.py +84 -0
  467. lfx/components/twelvelabs/pegasus_index.py +311 -0
  468. lfx/components/twelvelabs/split_video.py +291 -0
  469. lfx/components/twelvelabs/text_embeddings.py +57 -0
  470. lfx/components/twelvelabs/twelvelabs_pegasus.py +408 -0
  471. lfx/components/twelvelabs/video_embeddings.py +100 -0
  472. lfx/components/twelvelabs/video_file.py +179 -0
  473. lfx/components/unstructured/__init__.py +3 -0
  474. lfx/components/unstructured/unstructured.py +121 -0
  475. lfx/components/upstash/__init__.py +34 -0
  476. lfx/components/upstash/upstash.py +124 -0
  477. lfx/components/vectara/__init__.py +37 -0
  478. lfx/components/vectara/vectara.py +97 -0
  479. lfx/components/vectara/vectara_rag.py +164 -0
  480. lfx/components/vectorstores/__init__.py +40 -0
  481. lfx/components/vectorstores/astradb.py +1285 -0
  482. lfx/components/vectorstores/astradb_graph.py +319 -0
  483. lfx/components/vectorstores/cassandra.py +264 -0
  484. lfx/components/vectorstores/cassandra_graph.py +238 -0
  485. lfx/components/vectorstores/chroma.py +167 -0
  486. lfx/components/vectorstores/clickhouse.py +135 -0
  487. lfx/components/vectorstores/couchbase.py +102 -0
  488. lfx/components/vectorstores/elasticsearch.py +267 -0
  489. lfx/components/vectorstores/faiss.py +111 -0
  490. lfx/components/vectorstores/graph_rag.py +141 -0
  491. lfx/components/vectorstores/hcd.py +314 -0
  492. lfx/components/vectorstores/local_db.py +261 -0
  493. lfx/components/vectorstores/milvus.py +115 -0
  494. lfx/components/vectorstores/mongodb_atlas.py +213 -0
  495. lfx/components/vectorstores/opensearch.py +243 -0
  496. lfx/components/vectorstores/pgvector.py +72 -0
  497. lfx/components/vectorstores/pinecone.py +134 -0
  498. lfx/components/vectorstores/qdrant.py +109 -0
  499. lfx/components/vectorstores/supabase.py +76 -0
  500. lfx/components/vectorstores/upstash.py +124 -0
  501. lfx/components/vectorstores/vectara.py +97 -0
  502. lfx/components/vectorstores/vectara_rag.py +164 -0
  503. lfx/components/vectorstores/weaviate.py +89 -0
  504. lfx/components/vertexai/__init__.py +37 -0
  505. lfx/components/vertexai/vertexai.py +71 -0
  506. lfx/components/vertexai/vertexai_embeddings.py +67 -0
  507. lfx/components/weaviate/__init__.py +34 -0
  508. lfx/components/weaviate/weaviate.py +89 -0
  509. lfx/components/wikipedia/__init__.py +4 -0
  510. lfx/components/wikipedia/wikidata.py +86 -0
  511. lfx/components/wikipedia/wikipedia.py +53 -0
  512. lfx/components/wolframalpha/__init__.py +3 -0
  513. lfx/components/wolframalpha/wolfram_alpha_api.py +54 -0
  514. lfx/components/xai/__init__.py +32 -0
  515. lfx/components/xai/xai.py +167 -0
  516. lfx/components/yahoosearch/__init__.py +3 -0
  517. lfx/components/yahoosearch/yahoo.py +137 -0
  518. lfx/components/youtube/__init__.py +52 -0
  519. lfx/components/youtube/channel.py +227 -0
  520. lfx/components/youtube/comments.py +231 -0
  521. lfx/components/youtube/playlist.py +33 -0
  522. lfx/components/youtube/search.py +120 -0
  523. lfx/components/youtube/trending.py +285 -0
  524. lfx/components/youtube/video_details.py +263 -0
  525. lfx/components/youtube/youtube_transcripts.py +118 -0
  526. lfx/components/zep/__init__.py +3 -0
  527. lfx/components/zep/zep.py +44 -0
  528. lfx/constants.py +6 -0
  529. lfx/custom/__init__.py +7 -0
  530. lfx/custom/attributes.py +86 -0
  531. lfx/custom/code_parser/__init__.py +3 -0
  532. lfx/custom/code_parser/code_parser.py +361 -0
  533. lfx/custom/custom_component/__init__.py +0 -0
  534. lfx/custom/custom_component/base_component.py +128 -0
  535. lfx/custom/custom_component/component.py +1808 -0
  536. lfx/custom/custom_component/component_with_cache.py +8 -0
  537. lfx/custom/custom_component/custom_component.py +588 -0
  538. lfx/custom/dependency_analyzer.py +165 -0
  539. lfx/custom/directory_reader/__init__.py +3 -0
  540. lfx/custom/directory_reader/directory_reader.py +359 -0
  541. lfx/custom/directory_reader/utils.py +171 -0
  542. lfx/custom/eval.py +12 -0
  543. lfx/custom/schema.py +32 -0
  544. lfx/custom/tree_visitor.py +21 -0
  545. lfx/custom/utils.py +877 -0
  546. lfx/custom/validate.py +488 -0
  547. lfx/events/__init__.py +1 -0
  548. lfx/events/event_manager.py +110 -0
  549. lfx/exceptions/__init__.py +0 -0
  550. lfx/exceptions/component.py +15 -0
  551. lfx/field_typing/__init__.py +91 -0
  552. lfx/field_typing/constants.py +215 -0
  553. lfx/field_typing/range_spec.py +35 -0
  554. lfx/graph/__init__.py +6 -0
  555. lfx/graph/edge/__init__.py +0 -0
  556. lfx/graph/edge/base.py +277 -0
  557. lfx/graph/edge/schema.py +119 -0
  558. lfx/graph/edge/utils.py +0 -0
  559. lfx/graph/graph/__init__.py +0 -0
  560. lfx/graph/graph/ascii.py +202 -0
  561. lfx/graph/graph/base.py +2238 -0
  562. lfx/graph/graph/constants.py +63 -0
  563. lfx/graph/graph/runnable_vertices_manager.py +133 -0
  564. lfx/graph/graph/schema.py +52 -0
  565. lfx/graph/graph/state_model.py +66 -0
  566. lfx/graph/graph/utils.py +1024 -0
  567. lfx/graph/schema.py +75 -0
  568. lfx/graph/state/__init__.py +0 -0
  569. lfx/graph/state/model.py +237 -0
  570. lfx/graph/utils.py +200 -0
  571. lfx/graph/vertex/__init__.py +0 -0
  572. lfx/graph/vertex/base.py +823 -0
  573. lfx/graph/vertex/constants.py +0 -0
  574. lfx/graph/vertex/exceptions.py +4 -0
  575. lfx/graph/vertex/param_handler.py +264 -0
  576. lfx/graph/vertex/schema.py +26 -0
  577. lfx/graph/vertex/utils.py +19 -0
  578. lfx/graph/vertex/vertex_types.py +489 -0
  579. lfx/helpers/__init__.py +1 -0
  580. lfx/helpers/base_model.py +71 -0
  581. lfx/helpers/custom.py +13 -0
  582. lfx/helpers/data.py +167 -0
  583. lfx/helpers/flow.py +194 -0
  584. lfx/inputs/__init__.py +68 -0
  585. lfx/inputs/constants.py +2 -0
  586. lfx/inputs/input_mixin.py +328 -0
  587. lfx/inputs/inputs.py +714 -0
  588. lfx/inputs/validators.py +19 -0
  589. lfx/interface/__init__.py +6 -0
  590. lfx/interface/components.py +489 -0
  591. lfx/interface/importing/__init__.py +5 -0
  592. lfx/interface/importing/utils.py +39 -0
  593. lfx/interface/initialize/__init__.py +3 -0
  594. lfx/interface/initialize/loading.py +224 -0
  595. lfx/interface/listing.py +26 -0
  596. lfx/interface/run.py +16 -0
  597. lfx/interface/utils.py +111 -0
  598. lfx/io/__init__.py +63 -0
  599. lfx/io/schema.py +289 -0
  600. lfx/load/__init__.py +8 -0
  601. lfx/load/load.py +256 -0
  602. lfx/load/utils.py +99 -0
  603. lfx/log/__init__.py +5 -0
  604. lfx/log/logger.py +385 -0
  605. lfx/memory/__init__.py +90 -0
  606. lfx/memory/stubs.py +283 -0
  607. lfx/processing/__init__.py +1 -0
  608. lfx/processing/process.py +238 -0
  609. lfx/processing/utils.py +25 -0
  610. lfx/py.typed +0 -0
  611. lfx/schema/__init__.py +66 -0
  612. lfx/schema/artifact.py +83 -0
  613. lfx/schema/content_block.py +62 -0
  614. lfx/schema/content_types.py +91 -0
  615. lfx/schema/data.py +308 -0
  616. lfx/schema/dataframe.py +210 -0
  617. lfx/schema/dotdict.py +74 -0
  618. lfx/schema/encoders.py +13 -0
  619. lfx/schema/graph.py +47 -0
  620. lfx/schema/image.py +131 -0
  621. lfx/schema/json_schema.py +141 -0
  622. lfx/schema/log.py +61 -0
  623. lfx/schema/message.py +473 -0
  624. lfx/schema/openai_responses_schemas.py +74 -0
  625. lfx/schema/properties.py +41 -0
  626. lfx/schema/schema.py +171 -0
  627. lfx/schema/serialize.py +13 -0
  628. lfx/schema/table.py +140 -0
  629. lfx/schema/validators.py +114 -0
  630. lfx/serialization/__init__.py +5 -0
  631. lfx/serialization/constants.py +2 -0
  632. lfx/serialization/serialization.py +314 -0
  633. lfx/services/__init__.py +23 -0
  634. lfx/services/base.py +28 -0
  635. lfx/services/cache/__init__.py +6 -0
  636. lfx/services/cache/base.py +183 -0
  637. lfx/services/cache/service.py +166 -0
  638. lfx/services/cache/utils.py +169 -0
  639. lfx/services/chat/__init__.py +1 -0
  640. lfx/services/chat/config.py +2 -0
  641. lfx/services/chat/schema.py +10 -0
  642. lfx/services/deps.py +129 -0
  643. lfx/services/factory.py +19 -0
  644. lfx/services/initialize.py +19 -0
  645. lfx/services/interfaces.py +103 -0
  646. lfx/services/manager.py +172 -0
  647. lfx/services/schema.py +20 -0
  648. lfx/services/session.py +82 -0
  649. lfx/services/settings/__init__.py +3 -0
  650. lfx/services/settings/auth.py +130 -0
  651. lfx/services/settings/base.py +539 -0
  652. lfx/services/settings/constants.py +31 -0
  653. lfx/services/settings/factory.py +23 -0
  654. lfx/services/settings/feature_flags.py +12 -0
  655. lfx/services/settings/service.py +35 -0
  656. lfx/services/settings/utils.py +40 -0
  657. lfx/services/shared_component_cache/__init__.py +1 -0
  658. lfx/services/shared_component_cache/factory.py +30 -0
  659. lfx/services/shared_component_cache/service.py +9 -0
  660. lfx/services/storage/__init__.py +5 -0
  661. lfx/services/storage/local.py +155 -0
  662. lfx/services/storage/service.py +54 -0
  663. lfx/services/tracing/__init__.py +1 -0
  664. lfx/services/tracing/service.py +21 -0
  665. lfx/settings.py +6 -0
  666. lfx/template/__init__.py +6 -0
  667. lfx/template/field/__init__.py +0 -0
  668. lfx/template/field/base.py +257 -0
  669. lfx/template/field/prompt.py +15 -0
  670. lfx/template/frontend_node/__init__.py +6 -0
  671. lfx/template/frontend_node/base.py +212 -0
  672. lfx/template/frontend_node/constants.py +65 -0
  673. lfx/template/frontend_node/custom_components.py +79 -0
  674. lfx/template/template/__init__.py +0 -0
  675. lfx/template/template/base.py +100 -0
  676. lfx/template/utils.py +217 -0
  677. lfx/type_extraction/__init__.py +19 -0
  678. lfx/type_extraction/type_extraction.py +75 -0
  679. lfx/type_extraction.py +80 -0
  680. lfx/utils/__init__.py +1 -0
  681. lfx/utils/async_helpers.py +42 -0
  682. lfx/utils/component_utils.py +154 -0
  683. lfx/utils/concurrency.py +60 -0
  684. lfx/utils/connection_string_parser.py +11 -0
  685. lfx/utils/constants.py +205 -0
  686. lfx/utils/data_structure.py +212 -0
  687. lfx/utils/exceptions.py +22 -0
  688. lfx/utils/helpers.py +28 -0
  689. lfx/utils/image.py +73 -0
  690. lfx/utils/lazy_load.py +15 -0
  691. lfx/utils/request_utils.py +18 -0
  692. lfx/utils/schemas.py +139 -0
  693. lfx/utils/util.py +481 -0
  694. lfx/utils/util_strings.py +56 -0
  695. lfx/utils/version.py +24 -0
  696. lfx_nightly-0.1.11.dev0.dist-info/METADATA +293 -0
  697. lfx_nightly-0.1.11.dev0.dist-info/RECORD +699 -0
  698. lfx_nightly-0.1.11.dev0.dist-info/WHEEL +4 -0
  699. lfx_nightly-0.1.11.dev0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1285 @@
1
+ import re
2
+ from collections import defaultdict
3
+ from dataclasses import asdict, dataclass, field
4
+
5
+ from astrapy import DataAPIClient, Database
6
+ from astrapy.data.info.reranking import RerankServiceOptions
7
+ from astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions
8
+ from langchain_astradb import AstraDBVectorStore, VectorServiceOptions
9
+ from langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment
10
+ from langchain_core.documents import Document
11
+
12
+ from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
13
+ from lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection
14
+ from lfx.helpers.data import docs_to_data
15
+ from lfx.inputs.inputs import FloatInput, NestedDictInput
16
+ from lfx.io import (
17
+ BoolInput,
18
+ DropdownInput,
19
+ HandleInput,
20
+ IntInput,
21
+ QueryInput,
22
+ SecretStrInput,
23
+ StrInput,
24
+ )
25
+ from lfx.schema.data import Data
26
+ from lfx.serialization import serialize
27
+ from lfx.utils.version import get_version_info
28
+
29
+
30
+ @vector_store_connection
31
+ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
32
+ display_name: str = "Astra DB"
33
+ description: str = "Ingest and search documents in Astra DB"
34
+ documentation: str = "https://docs.datastax.com/en/langflow/astra-components.html"
35
+ name = "AstraDB"
36
+ icon: str = "AstraDB"
37
+
38
+ _cached_vector_store: AstraDBVectorStore | None = None
39
+
40
+ @dataclass
41
+ class NewDatabaseInput:
42
+ functionality: str = "create"
43
+ fields: dict[str, dict] = field(
44
+ default_factory=lambda: {
45
+ "data": {
46
+ "node": {
47
+ "name": "create_database",
48
+ "description": "Please allow several minutes for creation to complete.",
49
+ "display_name": "Create new database",
50
+ "field_order": ["01_new_database_name", "02_cloud_provider", "03_region"],
51
+ "template": {
52
+ "01_new_database_name": StrInput(
53
+ name="new_database_name",
54
+ display_name="Name",
55
+ info="Name of the new database to create in Astra DB.",
56
+ required=True,
57
+ ),
58
+ "02_cloud_provider": DropdownInput(
59
+ name="cloud_provider",
60
+ display_name="Cloud provider",
61
+ info="Cloud provider for the new database.",
62
+ options=[],
63
+ required=True,
64
+ real_time_refresh=True,
65
+ ),
66
+ "03_region": DropdownInput(
67
+ name="region",
68
+ display_name="Region",
69
+ info="Region for the new database.",
70
+ options=[],
71
+ required=True,
72
+ ),
73
+ },
74
+ },
75
+ }
76
+ }
77
+ )
78
+
79
+ @dataclass
80
+ class NewCollectionInput:
81
+ functionality: str = "create"
82
+ fields: dict[str, dict] = field(
83
+ default_factory=lambda: {
84
+ "data": {
85
+ "node": {
86
+ "name": "create_collection",
87
+ "description": "Please allow several seconds for creation to complete.",
88
+ "display_name": "Create new collection",
89
+ "field_order": [
90
+ "01_new_collection_name",
91
+ "02_embedding_generation_provider",
92
+ "03_embedding_generation_model",
93
+ "04_dimension",
94
+ ],
95
+ "template": {
96
+ "01_new_collection_name": StrInput(
97
+ name="new_collection_name",
98
+ display_name="Name",
99
+ info="Name of the new collection to create in Astra DB.",
100
+ required=True,
101
+ ),
102
+ "02_embedding_generation_provider": DropdownInput(
103
+ name="embedding_generation_provider",
104
+ display_name="Embedding generation method",
105
+ info="Provider to use for generating embeddings.",
106
+ helper_text=(
107
+ "To create collections with more embedding provider options, go to "
108
+ '<a class="underline" href="https://astra.datastax.com/" target=" _blank" '
109
+ 'rel="noopener noreferrer">your database in Astra DB</a>'
110
+ ),
111
+ real_time_refresh=True,
112
+ required=True,
113
+ options=[],
114
+ ),
115
+ "03_embedding_generation_model": DropdownInput(
116
+ name="embedding_generation_model",
117
+ display_name="Embedding model",
118
+ info="Model to use for generating embeddings.",
119
+ real_time_refresh=True,
120
+ options=[],
121
+ ),
122
+ "04_dimension": IntInput(
123
+ name="dimension",
124
+ display_name="Dimensions",
125
+ info="Dimensions of the embeddings to generate.",
126
+ value=None,
127
+ ),
128
+ },
129
+ },
130
+ }
131
+ }
132
+ )
133
+
134
+ inputs = [
135
+ SecretStrInput(
136
+ name="token",
137
+ display_name="Astra DB Application Token",
138
+ info="Authentication token for accessing Astra DB.",
139
+ value="ASTRA_DB_APPLICATION_TOKEN",
140
+ required=True,
141
+ real_time_refresh=True,
142
+ input_types=[],
143
+ ),
144
+ DropdownInput(
145
+ name="environment",
146
+ display_name="Environment",
147
+ info="The environment for the Astra DB API Endpoint.",
148
+ options=["prod", "test", "dev"],
149
+ value="prod",
150
+ advanced=True,
151
+ real_time_refresh=True,
152
+ combobox=True,
153
+ ),
154
+ DropdownInput(
155
+ name="database_name",
156
+ display_name="Database",
157
+ info="The Database name for the Astra DB instance.",
158
+ required=True,
159
+ refresh_button=True,
160
+ real_time_refresh=True,
161
+ dialog_inputs=asdict(NewDatabaseInput()),
162
+ combobox=True,
163
+ ),
164
+ DropdownInput(
165
+ name="api_endpoint",
166
+ display_name="Astra DB API Endpoint",
167
+ info="The API Endpoint for the Astra DB instance. Supercedes database selection.",
168
+ advanced=True,
169
+ ),
170
+ DropdownInput(
171
+ name="keyspace",
172
+ display_name="Keyspace",
173
+ info="Optional keyspace within Astra DB to use for the collection.",
174
+ advanced=True,
175
+ options=[],
176
+ real_time_refresh=True,
177
+ ),
178
+ DropdownInput(
179
+ name="collection_name",
180
+ display_name="Collection",
181
+ info="The name of the collection within Astra DB where the vectors will be stored.",
182
+ required=True,
183
+ refresh_button=True,
184
+ real_time_refresh=True,
185
+ dialog_inputs=asdict(NewCollectionInput()),
186
+ combobox=True,
187
+ show=False,
188
+ ),
189
+ HandleInput(
190
+ name="embedding_model",
191
+ display_name="Embedding Model",
192
+ input_types=["Embeddings"],
193
+ info="Specify the Embedding Model. Not required for Astra Vectorize collections.",
194
+ required=False,
195
+ show=False,
196
+ ),
197
+ *LCVectorStoreComponent.inputs,
198
+ DropdownInput(
199
+ name="search_method",
200
+ display_name="Search Method",
201
+ info=(
202
+ "Determine how your content is matched: Vector finds semantic similarity, "
203
+ "and Hybrid Search (suggested) combines both approaches "
204
+ "with a reranker."
205
+ ),
206
+ options=["Hybrid Search", "Vector Search"], # TODO: Restore Lexical Search?
207
+ options_metadata=[{"icon": "SearchHybrid"}, {"icon": "SearchVector"}],
208
+ value="Vector Search",
209
+ advanced=True,
210
+ real_time_refresh=True,
211
+ ),
212
+ DropdownInput(
213
+ name="reranker",
214
+ display_name="Reranker",
215
+ info="Post-retrieval model that re-scores results for optimal relevance ranking.",
216
+ show=False,
217
+ toggle=True,
218
+ ),
219
+ QueryInput(
220
+ name="lexical_terms",
221
+ display_name="Lexical Terms",
222
+ info="Add additional terms/keywords to augment search precision.",
223
+ placeholder="Enter terms to search...",
224
+ separator=" ",
225
+ show=False,
226
+ value="",
227
+ ),
228
+ IntInput(
229
+ name="number_of_results",
230
+ display_name="Number of Search Results",
231
+ info="Number of search results to return.",
232
+ advanced=True,
233
+ value=4,
234
+ ),
235
+ DropdownInput(
236
+ name="search_type",
237
+ display_name="Search Type",
238
+ info="Search type to use",
239
+ options=["Similarity", "Similarity with score threshold", "MMR (Max Marginal Relevance)"],
240
+ value="Similarity",
241
+ advanced=True,
242
+ ),
243
+ FloatInput(
244
+ name="search_score_threshold",
245
+ display_name="Search Score Threshold",
246
+ info="Minimum similarity score threshold for search results. "
247
+ "(when using 'Similarity with score threshold')",
248
+ value=0,
249
+ advanced=True,
250
+ ),
251
+ NestedDictInput(
252
+ name="advanced_search_filter",
253
+ display_name="Search Metadata Filter",
254
+ info="Optional dictionary of filters to apply to the search query.",
255
+ advanced=True,
256
+ ),
257
+ BoolInput(
258
+ name="autodetect_collection",
259
+ display_name="Autodetect Collection",
260
+ info="Boolean flag to determine whether to autodetect the collection.",
261
+ advanced=True,
262
+ value=True,
263
+ ),
264
+ StrInput(
265
+ name="content_field",
266
+ display_name="Content Field",
267
+ info="Field to use as the text content field for the vector store.",
268
+ advanced=True,
269
+ ),
270
+ StrInput(
271
+ name="deletion_field",
272
+ display_name="Deletion Based On Field",
273
+ info="When this parameter is provided, documents in the target collection with "
274
+ "metadata field values matching the input metadata field value will be deleted "
275
+ "before new data is loaded.",
276
+ advanced=True,
277
+ ),
278
+ BoolInput(
279
+ name="ignore_invalid_documents",
280
+ display_name="Ignore Invalid Documents",
281
+ info="Boolean flag to determine whether to ignore invalid documents at runtime.",
282
+ advanced=True,
283
+ ),
284
+ NestedDictInput(
285
+ name="astradb_vectorstore_kwargs",
286
+ display_name="AstraDBVectorStore Parameters",
287
+ info="Optional dictionary of additional parameters for the AstraDBVectorStore.",
288
+ advanced=True,
289
+ ),
290
+ ]
291
+
292
+ @classmethod
293
+ def map_cloud_providers(cls):
294
+ # TODO: Programmatically fetch the regions for each cloud provider
295
+ return {
296
+ "dev": {
297
+ "Amazon Web Services": {
298
+ "id": "aws",
299
+ "regions": ["us-west-2"],
300
+ },
301
+ "Google Cloud Platform": {
302
+ "id": "gcp",
303
+ "regions": ["us-central1", "europe-west4"],
304
+ },
305
+ },
306
+ "test": {
307
+ "Google Cloud Platform": {
308
+ "id": "gcp",
309
+ "regions": ["us-central1"],
310
+ },
311
+ },
312
+ "prod": {
313
+ "Amazon Web Services": {
314
+ "id": "aws",
315
+ "regions": ["us-east-2", "ap-south-1", "eu-west-1"],
316
+ },
317
+ "Google Cloud Platform": {
318
+ "id": "gcp",
319
+ "regions": ["us-east1"],
320
+ },
321
+ "Microsoft Azure": {
322
+ "id": "azure",
323
+ "regions": ["westus3"],
324
+ },
325
+ },
326
+ }
327
+
328
+ @classmethod
329
+ def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):
330
+ try:
331
+ # Get the admin object
332
+ client = DataAPIClient(environment=environment)
333
+ admin_client = client.get_admin()
334
+ db_admin = admin_client.get_database_admin(api_endpoint, token=token)
335
+
336
+ # Get the list of embedding providers
337
+ embedding_providers = db_admin.find_embedding_providers()
338
+
339
+ vectorize_providers_mapping = {}
340
+ # Map the provider display name to the provider key and models
341
+ for provider_key, provider_data in embedding_providers.embedding_providers.items():
342
+ # Get the provider display name and models
343
+ display_name = provider_data.display_name
344
+ models = [model.name for model in provider_data.models]
345
+
346
+ # Build our mapping
347
+ vectorize_providers_mapping[display_name] = [provider_key, models]
348
+
349
+ # Sort the resulting dictionary
350
+ return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))
351
+ except Exception as _: # noqa: BLE001
352
+ return {}
353
+
354
+ @classmethod
355
+ async def create_database_api(
356
+ cls,
357
+ new_database_name: str,
358
+ cloud_provider: str,
359
+ region: str,
360
+ token: str,
361
+ environment: str | None = None,
362
+ keyspace: str | None = None,
363
+ ):
364
+ client = DataAPIClient(environment=environment)
365
+
366
+ # Get the admin object
367
+ admin_client = client.get_admin(token=token)
368
+
369
+ # Get the environment, set to prod if null like
370
+ my_env = environment or "prod"
371
+
372
+ # Raise a value error if name isn't provided
373
+ if not new_database_name:
374
+ msg = "Database name is required to create a new database."
375
+ raise ValueError(msg)
376
+
377
+ # Call the create database function
378
+ return await admin_client.async_create_database(
379
+ name=new_database_name,
380
+ cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider]["id"],
381
+ region=region,
382
+ keyspace=keyspace,
383
+ wait_until_active=False,
384
+ )
385
+
386
+ @classmethod
387
+ async def create_collection_api(
388
+ cls,
389
+ new_collection_name: str,
390
+ token: str,
391
+ api_endpoint: str,
392
+ environment: str | None = None,
393
+ keyspace: str | None = None,
394
+ dimension: int | None = None,
395
+ embedding_generation_provider: str | None = None,
396
+ embedding_generation_model: str | None = None,
397
+ reranker: str | None = None,
398
+ ):
399
+ # Build vectorize options, if needed
400
+ vectorize_options = None
401
+ if not dimension:
402
+ providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)
403
+ vectorize_options = VectorServiceOptions(
404
+ provider=providers.get(embedding_generation_provider, [None, []])[0],
405
+ model_name=embedding_generation_model,
406
+ )
407
+
408
+ # Raise a value error if name isn't provided
409
+ if not new_collection_name:
410
+ msg = "Collection name is required to create a new collection."
411
+ raise ValueError(msg)
412
+
413
+ # Define the base arguments being passed to the create collection function
414
+ base_args = {
415
+ "collection_name": new_collection_name,
416
+ "token": token,
417
+ "api_endpoint": api_endpoint,
418
+ "keyspace": keyspace,
419
+ "environment": environment,
420
+ "embedding_dimension": dimension,
421
+ "collection_vector_service_options": vectorize_options,
422
+ }
423
+
424
+ # Add optional arguments if the reranker is set
425
+ if reranker:
426
+ # Split the reranker field into a provider a model name
427
+ provider, _ = reranker.split("/")
428
+ base_args["collection_rerank"] = CollectionRerankOptions(
429
+ service=RerankServiceOptions(provider=provider, model_name=reranker),
430
+ )
431
+ base_args["collection_lexical"] = CollectionLexicalOptions(analyzer="STANDARD")
432
+
433
+ _AstraDBCollectionEnvironment(**base_args)
434
+
435
+ @classmethod
436
+ def get_database_list_static(cls, token: str, environment: str | None = None):
437
+ client = DataAPIClient(environment=environment)
438
+
439
+ # Get the admin object
440
+ admin_client = client.get_admin(token=token)
441
+
442
+ # Get the list of databases
443
+ db_list = admin_client.list_databases()
444
+
445
+ # Generate the api endpoint for each database
446
+ db_info_dict = {}
447
+ for db in db_list:
448
+ try:
449
+ # Get the API endpoint for the database
450
+ api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]
451
+
452
+ # Get the number of collections
453
+ try:
454
+ # Get the number of collections in the database
455
+ num_collections = len(
456
+ client.get_database(
457
+ api_endpoints[0],
458
+ token=token,
459
+ ).list_collection_names()
460
+ )
461
+ except Exception: # noqa: BLE001
462
+ if db.status != "PENDING":
463
+ continue
464
+ num_collections = 0
465
+
466
+ # Add the database to the dictionary
467
+ db_info_dict[db.name] = {
468
+ "api_endpoints": api_endpoints,
469
+ "keyspaces": db.keyspaces,
470
+ "collections": num_collections,
471
+ "status": db.status if db.status != "ACTIVE" else None,
472
+ "org_id": db.org_id if db.org_id else None,
473
+ }
474
+ except Exception: # noqa: BLE001
475
+ pass
476
+
477
+ return db_info_dict
478
+
479
+ def get_database_list(self):
480
+ return self.get_database_list_static(
481
+ token=self.token,
482
+ environment=self.environment,
483
+ )
484
+
485
+ @classmethod
486
+ def get_api_endpoint_static(
487
+ cls,
488
+ token: str,
489
+ environment: str | None = None,
490
+ api_endpoint: str | None = None,
491
+ database_name: str | None = None,
492
+ ):
493
+ # If the api_endpoint is set, return it
494
+ if api_endpoint:
495
+ return api_endpoint
496
+
497
+ # Check if the database_name is like a url
498
+ if database_name and database_name.startswith("https://"):
499
+ return database_name
500
+
501
+ # If the database is not set, nothing we can do.
502
+ if not database_name:
503
+ return None
504
+
505
+ # Grab the database object
506
+ db = cls.get_database_list_static(token=token, environment=environment).get(database_name)
507
+ if not db:
508
+ return None
509
+
510
+ # Otherwise, get the URL from the database list
511
+ endpoints = db.get("api_endpoints") or []
512
+ return endpoints[0] if endpoints else None
513
+
514
+ def get_api_endpoint(self):
515
+ return self.get_api_endpoint_static(
516
+ token=self.token,
517
+ environment=self.environment,
518
+ api_endpoint=self.api_endpoint,
519
+ database_name=self.database_name,
520
+ )
521
+
522
+ @classmethod
523
+ def get_database_id_static(cls, api_endpoint: str) -> str | None:
524
+ # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters
525
+ uuid_pattern = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"
526
+ match = re.search(uuid_pattern, api_endpoint)
527
+
528
+ return match.group(0) if match else None
529
+
530
+ def get_database_id(self):
531
+ return self.get_database_id_static(api_endpoint=self.get_api_endpoint())
532
+
533
+ def get_keyspace(self):
534
+ keyspace = self.keyspace
535
+
536
+ if keyspace:
537
+ return keyspace.strip()
538
+
539
+ return "default_keyspace"
540
+
541
+ def get_database_object(self, api_endpoint: str | None = None):
542
+ try:
543
+ client = DataAPIClient(environment=self.environment)
544
+
545
+ return client.get_database(
546
+ api_endpoint or self.get_api_endpoint(),
547
+ token=self.token,
548
+ keyspace=self.get_keyspace(),
549
+ )
550
+ except Exception as e:
551
+ msg = f"Error fetching database object: {e}"
552
+ raise ValueError(msg) from e
553
+
554
+ def collection_data(self, collection_name: str, database: Database | None = None):
555
+ try:
556
+ if not database:
557
+ client = DataAPIClient(environment=self.environment)
558
+
559
+ database = client.get_database(
560
+ self.get_api_endpoint(),
561
+ token=self.token,
562
+ keyspace=self.get_keyspace(),
563
+ )
564
+
565
+ collection = database.get_collection(collection_name)
566
+
567
+ return collection.estimated_document_count()
568
+ except Exception as e: # noqa: BLE001
569
+ self.log(f"Error checking collection data: {e}")
570
+
571
+ return None
572
+
573
+ def _initialize_database_options(self):
574
+ try:
575
+ return [
576
+ {
577
+ "name": name,
578
+ "status": info["status"],
579
+ "collections": info["collections"],
580
+ "api_endpoints": info["api_endpoints"],
581
+ "keyspaces": info["keyspaces"],
582
+ "org_id": info["org_id"],
583
+ }
584
+ for name, info in self.get_database_list().items()
585
+ ]
586
+ except Exception as e:
587
+ msg = f"Error fetching database options: {e}"
588
+ raise ValueError(msg) from e
589
+
590
+ @classmethod
591
+ def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:
592
+ # Get the provider name from the collection
593
+ provider_name = provider_name or (
594
+ collection.definition.vector.service.provider
595
+ if (
596
+ collection
597
+ and collection.definition
598
+ and collection.definition.vector
599
+ and collection.definition.vector.service
600
+ )
601
+ else None
602
+ )
603
+
604
+ # If there is no provider, use the vector store icon
605
+ if not provider_name or provider_name.lower() == "bring your own":
606
+ return "vectorstores"
607
+
608
+ # Map provider casings
609
+ case_map = {
610
+ "nvidia": "NVIDIA",
611
+ "openai": "OpenAI",
612
+ "amazon bedrock": "AmazonBedrockEmbeddings",
613
+ "azure openai": "AzureOpenAiEmbeddings",
614
+ "cohere": "Cohere",
615
+ "jina ai": "JinaAI",
616
+ "mistral ai": "MistralAI",
617
+ "upstage": "Upstage",
618
+ "voyage ai": "VoyageAI",
619
+ }
620
+
621
+ # Adjust the casing on some like nvidia
622
+ return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()
623
+
624
+ def _initialize_collection_options(self, api_endpoint: str | None = None):
625
+ # Nothing to generate if we don't have an API endpoint yet
626
+ api_endpoint = api_endpoint or self.get_api_endpoint()
627
+ if not api_endpoint:
628
+ return []
629
+
630
+ # Retrieve the database object
631
+ database = self.get_database_object(api_endpoint=api_endpoint)
632
+
633
+ # Get the list of collections
634
+ collection_list = database.list_collections(keyspace=self.get_keyspace())
635
+
636
+ # Return the list of collections and metadata associated
637
+ return [
638
+ {
639
+ "name": col.name,
640
+ "records": self.collection_data(collection_name=col.name, database=database),
641
+ "provider": (
642
+ col.definition.vector.service.provider
643
+ if col.definition.vector and col.definition.vector.service
644
+ else None
645
+ ),
646
+ "icon": self.get_provider_icon(collection=col),
647
+ "model": (
648
+ col.definition.vector.service.model_name
649
+ if col.definition.vector and col.definition.vector.service
650
+ else None
651
+ ),
652
+ }
653
+ for col in collection_list
654
+ ]
655
+
656
+ def reset_provider_options(self, build_config: dict) -> dict:
657
+ """Reset provider options and related configurations in the build_config dictionary."""
658
+ # Extract template path for cleaner access
659
+ template = build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
660
+
661
+ # Get vectorize providers
662
+ vectorize_providers_api = self.get_vectorize_providers(
663
+ token=self.token,
664
+ environment=self.environment,
665
+ api_endpoint=build_config["api_endpoint"]["value"],
666
+ )
667
+
668
+ # Create a new dictionary with "Bring your own" first
669
+ vectorize_providers: dict[str, list[list[str]]] = {"Bring your own": [[], []]}
670
+
671
+ # Add the remaining items (only Nvidia) from the original dictionary
672
+ vectorize_providers.update(
673
+ {
674
+ k: v
675
+ for k, v in vectorize_providers_api.items()
676
+ if k.lower() in ["nvidia"] # TODO: Eventually support more
677
+ }
678
+ )
679
+
680
+ # Set provider options
681
+ provider_field = "02_embedding_generation_provider"
682
+ template[provider_field]["options"] = list(vectorize_providers.keys())
683
+
684
+ # Add metadata for each provider option
685
+ template[provider_field]["options_metadata"] = [
686
+ {"icon": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field]["options"]
687
+ ]
688
+
689
+ # Get selected embedding provider
690
+ embedding_provider = template[provider_field]["value"]
691
+ is_bring_your_own = embedding_provider and embedding_provider == "Bring your own"
692
+
693
+ # Configure embedding model field
694
+ model_field = "03_embedding_generation_model"
695
+ template[model_field].update(
696
+ {
697
+ "options": vectorize_providers.get(embedding_provider, [[], []])[1],
698
+ "placeholder": "Bring your own" if is_bring_your_own else None,
699
+ "readonly": is_bring_your_own,
700
+ "required": not is_bring_your_own,
701
+ "value": None,
702
+ }
703
+ )
704
+
705
+ # If this is a bring your own, set dimensions to 0
706
+ return self.reset_dimension_field(build_config)
707
+
708
+ def reset_dimension_field(self, build_config: dict) -> dict:
709
+ """Reset dimension field options based on provided configuration."""
710
+ # Extract template path for cleaner access
711
+ template = build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
712
+
713
+ # Get selected embedding model
714
+ provider_field = "02_embedding_generation_provider"
715
+ embedding_provider = template[provider_field]["value"]
716
+ is_bring_your_own = embedding_provider and embedding_provider == "Bring your own"
717
+
718
+ # Configure dimension field
719
+ dimension_field = "04_dimension"
720
+ dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out
721
+ template[dimension_field].update(
722
+ {
723
+ "placeholder": dimension_value,
724
+ "value": dimension_value,
725
+ "readonly": not is_bring_your_own,
726
+ "required": is_bring_your_own,
727
+ }
728
+ )
729
+
730
+ return build_config
731
+
732
+ def reset_collection_list(self, build_config: dict) -> dict:
733
+ """Reset collection list options based on provided configuration."""
734
+ # Get collection options
735
+ collection_options = self._initialize_collection_options(api_endpoint=build_config["api_endpoint"]["value"])
736
+ # Update collection configuration
737
+ collection_config = build_config["collection_name"]
738
+ collection_config.update(
739
+ {
740
+ "options": [col["name"] for col in collection_options],
741
+ "options_metadata": [{k: v for k, v in col.items() if k != "name"} for col in collection_options],
742
+ }
743
+ )
744
+
745
+ # Reset selected collection if not in options
746
+ if collection_config["value"] not in collection_config["options"]:
747
+ collection_config["value"] = ""
748
+
749
+ # Set advanced status based on database selection
750
+ collection_config["show"] = bool(build_config["database_name"]["value"])
751
+
752
+ return build_config
753
+
754
+ def reset_database_list(self, build_config: dict) -> dict:
755
+ """Reset database list options and related configurations."""
756
+ # Get database options
757
+ database_options = self._initialize_database_options()
758
+
759
+ # Update cloud provider options
760
+ env = self.environment
761
+ template = build_config["database_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
762
+ template["02_cloud_provider"]["options"] = list(self.map_cloud_providers()[env].keys())
763
+
764
+ # Update database configuration
765
+ database_config = build_config["database_name"]
766
+ database_config.update(
767
+ {
768
+ "options": [db["name"] for db in database_options],
769
+ "options_metadata": [{k: v for k, v in db.items() if k != "name"} for db in database_options],
770
+ }
771
+ )
772
+
773
+ # Reset selections if value not in options
774
+ if database_config["value"] not in database_config["options"]:
775
+ database_config["value"] = ""
776
+ build_config["api_endpoint"]["options"] = []
777
+ build_config["api_endpoint"]["value"] = ""
778
+ build_config["collection_name"]["show"] = False
779
+
780
+ # Set advanced status based on token presence
781
+ database_config["show"] = bool(build_config["token"]["value"])
782
+
783
+ return build_config
784
+
785
+ def reset_build_config(self, build_config: dict) -> dict:
786
+ """Reset all build configuration options to default empty state."""
787
+ # Reset database configuration
788
+ database_config = build_config["database_name"]
789
+ database_config.update({"options": [], "options_metadata": [], "value": "", "show": False})
790
+ build_config["api_endpoint"]["options"] = []
791
+ build_config["api_endpoint"]["value"] = ""
792
+
793
+ # Reset collection configuration
794
+ collection_config = build_config["collection_name"]
795
+ collection_config.update({"options": [], "options_metadata": [], "value": "", "show": False})
796
+
797
+ return build_config
798
+
799
+ def _handle_hybrid_search_options(self, build_config: dict) -> dict:
800
+ """Set hybrid search options in the build configuration."""
801
+ # Detect what hybrid options are available
802
+ # Get the admin object
803
+ client = DataAPIClient(environment=self.environment)
804
+ admin_client = client.get_admin()
805
+ db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)
806
+
807
+ # We will try to get the reranking providers to see if its hybrid emabled
808
+ try:
809
+ providers = db_admin.find_reranking_providers()
810
+ build_config["reranker"]["options"] = [
811
+ model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models
812
+ ]
813
+ build_config["reranker"]["options_metadata"] = [
814
+ {"icon": self.get_provider_icon(provider_name=model.name.split("/")[0])}
815
+ for provider in providers.reranking_providers.values()
816
+ for model in provider.models
817
+ ]
818
+ build_config["reranker"]["value"] = build_config["reranker"]["options"][0]
819
+
820
+ # Set the default search field to hybrid search
821
+ build_config["search_method"]["show"] = True
822
+ build_config["search_method"]["options"] = ["Hybrid Search", "Vector Search"]
823
+ build_config["search_method"]["value"] = "Hybrid Search"
824
+ except Exception as _: # noqa: BLE001
825
+ build_config["reranker"]["options"] = []
826
+ build_config["reranker"]["options_metadata"] = []
827
+
828
+ # Set the default search field to vector search
829
+ build_config["search_method"]["show"] = False
830
+ build_config["search_method"]["options"] = ["Vector Search"]
831
+ build_config["search_method"]["value"] = "Vector Search"
832
+
833
+ return build_config
834
+
835
+ async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:
836
+ """Update build configuration based on field name and value."""
837
+ # Early return if no token provided
838
+ if not self.token:
839
+ return self.reset_build_config(build_config)
840
+
841
+ # Database creation callback
842
+ if field_name == "database_name" and isinstance(field_value, dict):
843
+ if "01_new_database_name" in field_value:
844
+ await self._create_new_database(build_config, field_value)
845
+ return self.reset_collection_list(build_config)
846
+ return self._update_cloud_regions(build_config, field_value)
847
+
848
+ # Collection creation callback
849
+ if field_name == "collection_name" and isinstance(field_value, dict):
850
+ # Case 1: New collection creation
851
+ if "01_new_collection_name" in field_value:
852
+ await self._create_new_collection(build_config, field_value)
853
+ return build_config
854
+
855
+ # Case 2: Update embedding provider options
856
+ if "02_embedding_generation_provider" in field_value:
857
+ return self.reset_provider_options(build_config)
858
+
859
+ # Case 3: Update dimension field
860
+ if "03_embedding_generation_model" in field_value:
861
+ return self.reset_dimension_field(build_config)
862
+
863
+ # Initial execution or token/environment change
864
+ first_run = field_name == "collection_name" and not field_value and not build_config["database_name"]["options"]
865
+ if first_run or field_name in {"token", "environment"}:
866
+ return self.reset_database_list(build_config)
867
+
868
+ # Database selection change
869
+ if field_name == "database_name" and not isinstance(field_value, dict):
870
+ return self._handle_database_selection(build_config, field_value)
871
+
872
+ # Keyspace selection change
873
+ if field_name == "keyspace":
874
+ return self.reset_collection_list(build_config)
875
+
876
+ # Collection selection change
877
+ if field_name == "collection_name" and not isinstance(field_value, dict):
878
+ return self._handle_collection_selection(build_config, field_value)
879
+
880
+ # Search method selection change
881
+ if field_name == "search_method":
882
+ is_vector_search = field_value == "Vector Search"
883
+ is_autodetect = build_config["autodetect_collection"]["value"]
884
+
885
+ # Configure lexical terms (same for both cases)
886
+ build_config["lexical_terms"]["show"] = not is_vector_search
887
+ build_config["lexical_terms"]["value"] = "" if is_vector_search else build_config["lexical_terms"]["value"]
888
+
889
+ # Disable reranker disabling if hybrid search is selected
890
+ build_config["reranker"]["show"] = not is_vector_search
891
+ build_config["reranker"]["toggle_disable"] = not is_vector_search
892
+ build_config["reranker"]["toggle_value"] = True
893
+ build_config["reranker"]["value"] = build_config["reranker"]["options"][0]
894
+
895
+ # Toggle search type and score threshold based on search method
896
+ build_config["search_type"]["show"] = is_vector_search
897
+ build_config["search_score_threshold"]["show"] = is_vector_search
898
+
899
+ # Make sure the search_type is set to "Similarity"
900
+ if not is_vector_search or is_autodetect:
901
+ build_config["search_type"]["value"] = "Similarity"
902
+
903
+ return build_config
904
+
905
+ async def _create_new_database(self, build_config: dict, field_value: dict) -> None:
906
+ """Create a new database and update build config options."""
907
+ try:
908
+ await self.create_database_api(
909
+ new_database_name=field_value["01_new_database_name"],
910
+ token=self.token,
911
+ keyspace=self.get_keyspace(),
912
+ environment=self.environment,
913
+ cloud_provider=field_value["02_cloud_provider"],
914
+ region=field_value["03_region"],
915
+ )
916
+ except Exception as e:
917
+ msg = f"Error creating database: {e}"
918
+ raise ValueError(msg) from e
919
+
920
+ build_config["database_name"]["options"].append(field_value["01_new_database_name"])
921
+ build_config["database_name"]["options_metadata"].append(
922
+ {
923
+ "status": "PENDING",
924
+ "collections": 0,
925
+ "api_endpoints": [],
926
+ "keyspaces": [self.get_keyspace()],
927
+ "org_id": None,
928
+ }
929
+ )
930
+
931
+ def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:
932
+ """Update cloud provider regions in build config."""
933
+ env = self.environment
934
+ cloud_provider = field_value["02_cloud_provider"]
935
+
936
+ # Update the region options based on the selected cloud provider
937
+ template = build_config["database_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
938
+ template["03_region"]["options"] = self.map_cloud_providers()[env][cloud_provider]["regions"]
939
+
940
+ # Reset the the 03_region value if it's not in the new options
941
+ if template["03_region"]["value"] not in template["03_region"]["options"]:
942
+ template["03_region"]["value"] = None
943
+
944
+ return build_config
945
+
946
+ async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:
947
+ """Create a new collection and update build config options."""
948
+ embedding_provider = field_value.get("02_embedding_generation_provider")
949
+ try:
950
+ await self.create_collection_api(
951
+ new_collection_name=field_value["01_new_collection_name"],
952
+ token=self.token,
953
+ api_endpoint=build_config["api_endpoint"]["value"],
954
+ environment=self.environment,
955
+ keyspace=self.get_keyspace(),
956
+ dimension=field_value.get("04_dimension") if embedding_provider == "Bring your own" else None,
957
+ embedding_generation_provider=embedding_provider,
958
+ embedding_generation_model=field_value.get("03_embedding_generation_model"),
959
+ reranker=self.reranker,
960
+ )
961
+ except Exception as e:
962
+ msg = f"Error creating collection: {e}"
963
+ raise ValueError(msg) from e
964
+
965
+ provider = embedding_provider.lower() if embedding_provider and embedding_provider != "Bring your own" else None
966
+ build_config["collection_name"].update(
967
+ {
968
+ "value": field_value["01_new_collection_name"],
969
+ "options": build_config["collection_name"]["options"] + [field_value["01_new_collection_name"]],
970
+ }
971
+ )
972
+ build_config["embedding_model"]["show"] = not bool(provider)
973
+ build_config["embedding_model"]["required"] = not bool(provider)
974
+ build_config["collection_name"]["options_metadata"].append(
975
+ {
976
+ "records": 0,
977
+ "provider": provider,
978
+ "icon": self.get_provider_icon(provider_name=provider),
979
+ "model": field_value.get("03_embedding_generation_model"),
980
+ }
981
+ )
982
+
983
+ # Make sure we always show the reranker options if the collection is hybrid enabled
984
+ # And right now they always are
985
+ build_config["lexical_terms"]["show"] = True
986
+
987
+ def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:
988
+ """Handle database selection and update related configurations."""
989
+ build_config = self.reset_database_list(build_config)
990
+
991
+ # Reset collection list if database selection changes
992
+ if field_value not in build_config["database_name"]["options"]:
993
+ build_config["database_name"]["value"] = ""
994
+ return build_config
995
+
996
+ # Get the api endpoint for the selected database
997
+ index = build_config["database_name"]["options"].index(field_value)
998
+ build_config["api_endpoint"]["options"] = build_config["database_name"]["options_metadata"][index][
999
+ "api_endpoints"
1000
+ ]
1001
+ build_config["api_endpoint"]["value"] = build_config["database_name"]["options_metadata"][index][
1002
+ "api_endpoints"
1003
+ ][0]
1004
+
1005
+ # Get the org_id for the selected database
1006
+ org_id = build_config["database_name"]["options_metadata"][index]["org_id"]
1007
+ if not org_id:
1008
+ return build_config
1009
+
1010
+ # Update the list of keyspaces based on the db info
1011
+ build_config["keyspace"]["options"] = build_config["database_name"]["options_metadata"][index]["keyspaces"]
1012
+ build_config["keyspace"]["value"] = (
1013
+ build_config["keyspace"]["options"] and build_config["keyspace"]["options"][0]
1014
+ if build_config["keyspace"]["value"] not in build_config["keyspace"]["options"]
1015
+ else build_config["keyspace"]["value"]
1016
+ )
1017
+
1018
+ # Get the database id for the selected database
1019
+ db_id = self.get_database_id_static(api_endpoint=build_config["api_endpoint"]["value"])
1020
+ keyspace = self.get_keyspace()
1021
+
1022
+ # Update the helper text for the embedding provider field
1023
+ template = build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
1024
+ template["02_embedding_generation_provider"]["helper_text"] = (
1025
+ "To create collections with more embedding provider options, go to "
1026
+ f'<a class="underline" target="_blank" rel="noopener noreferrer" '
1027
+ f'href="https://astra.datastax.com/org/{org_id}/database/{db_id}/data-explorer?createCollection=1&namespace={keyspace}">'
1028
+ "your database in Astra DB</a>."
1029
+ )
1030
+
1031
+ # Reset provider options
1032
+ build_config = self.reset_provider_options(build_config)
1033
+
1034
+ # Handle hybrid search options
1035
+ build_config = self._handle_hybrid_search_options(build_config)
1036
+
1037
+ return self.reset_collection_list(build_config)
1038
+
1039
+ def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:
1040
+ """Handle collection selection and update embedding options."""
1041
+ build_config["autodetect_collection"]["value"] = True
1042
+ build_config = self.reset_collection_list(build_config)
1043
+
1044
+ # Reset embedding model if collection selection changes
1045
+ if field_value and field_value not in build_config["collection_name"]["options"]:
1046
+ build_config["collection_name"]["options"].append(field_value)
1047
+ build_config["collection_name"]["options_metadata"].append(
1048
+ {
1049
+ "records": 0,
1050
+ "provider": None,
1051
+ "icon": "vectorstores",
1052
+ "model": None,
1053
+ }
1054
+ )
1055
+ build_config["autodetect_collection"]["value"] = False
1056
+
1057
+ if not field_value:
1058
+ return build_config
1059
+
1060
+ # Get the selected collection index
1061
+ index = build_config["collection_name"]["options"].index(field_value)
1062
+
1063
+ # Set the provider of the selected collection
1064
+ provider = build_config["collection_name"]["options_metadata"][index]["provider"]
1065
+ build_config["embedding_model"]["show"] = not bool(provider)
1066
+ build_config["embedding_model"]["required"] = not bool(provider)
1067
+
1068
+ # Grab the collection object
1069
+ database = self.get_database_object(api_endpoint=build_config["api_endpoint"]["value"])
1070
+ collection = database.get_collection(
1071
+ name=field_value,
1072
+ keyspace=build_config["keyspace"]["value"],
1073
+ )
1074
+
1075
+ # Check if hybrid and lexical are enabled
1076
+ col_options = collection.options()
1077
+ hyb_enabled = col_options.rerank and col_options.rerank.enabled
1078
+ lex_enabled = col_options.lexical and col_options.lexical.enabled
1079
+ user_hyb_enabled = build_config["search_method"]["value"] == "Hybrid Search"
1080
+
1081
+ # Reranker visible when both the collection supports it and the user selected Hybrid
1082
+ hybrid_active = bool(hyb_enabled and user_hyb_enabled)
1083
+ build_config["reranker"]["show"] = hybrid_active
1084
+ build_config["reranker"]["toggle_value"] = hybrid_active
1085
+ build_config["reranker"]["toggle_disable"] = False # allow user to toggle if visible
1086
+
1087
+ # If hybrid is active, lock search_type to "Similarity"
1088
+ if hybrid_active:
1089
+ build_config["search_type"]["value"] = "Similarity"
1090
+
1091
+ # Show the lexical terms option only if the collection enables lexical search
1092
+ build_config["lexical_terms"]["show"] = bool(lex_enabled)
1093
+
1094
+ return build_config
1095
+
1096
+ @check_cached_vector_store
1097
+ def build_vector_store(self):
1098
+ try:
1099
+ from langchain_astradb import AstraDBVectorStore
1100
+ except ImportError as e:
1101
+ msg = (
1102
+ "Could not import langchain Astra DB integration package. "
1103
+ "Please install it with `pip install langchain-astradb`."
1104
+ )
1105
+ raise ImportError(msg) from e
1106
+
1107
+ # Get the embedding model and additional params
1108
+ embedding_params = {"embedding": self.embedding_model} if self.embedding_model else {}
1109
+
1110
+ # Get the additional parameters
1111
+ additional_params = self.astradb_vectorstore_kwargs or {}
1112
+
1113
+ # Get Langflow version and platform information
1114
+ __version__ = get_version_info()["version"]
1115
+ langflow_prefix = ""
1116
+ # if os.getenv("AWS_EXECUTION_ENV") == "AWS_ECS_FARGATE": # TODO: More precise way of detecting
1117
+ # langflow_prefix = "ds-"
1118
+
1119
+ # Get the database object
1120
+ database = self.get_database_object()
1121
+ autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection
1122
+
1123
+ # Bundle up the auto-detect parameters
1124
+ autodetect_params = {
1125
+ "autodetect_collection": autodetect,
1126
+ "content_field": (
1127
+ self.content_field
1128
+ if self.content_field and embedding_params
1129
+ else (
1130
+ "page_content"
1131
+ if embedding_params
1132
+ and self.collection_data(collection_name=self.collection_name, database=database) == 0
1133
+ else None
1134
+ )
1135
+ ),
1136
+ "ignore_invalid_documents": self.ignore_invalid_documents,
1137
+ }
1138
+
1139
+ # Choose HybridSearchMode based on the selected param
1140
+ hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == "Hybrid Search" else HybridSearchMode.OFF
1141
+
1142
+ # Attempt to build the Vector Store object
1143
+ try:
1144
+ vector_store = AstraDBVectorStore(
1145
+ # Astra DB Authentication Parameters
1146
+ token=self.token,
1147
+ api_endpoint=database.api_endpoint,
1148
+ namespace=database.keyspace,
1149
+ collection_name=self.collection_name,
1150
+ environment=self.environment,
1151
+ # Hybrid Search Parameters
1152
+ hybrid_search=hybrid_search_mode,
1153
+ # Astra DB Usage Tracking Parameters
1154
+ ext_callers=[(f"{langflow_prefix}langflow", __version__)],
1155
+ # Astra DB Vector Store Parameters
1156
+ **autodetect_params,
1157
+ **embedding_params,
1158
+ **additional_params,
1159
+ )
1160
+ except Exception as e:
1161
+ msg = f"Error initializing AstraDBVectorStore: {e}"
1162
+ raise ValueError(msg) from e
1163
+
1164
+ # Add documents to the vector store
1165
+ self._add_documents_to_vector_store(vector_store)
1166
+
1167
+ return vector_store
1168
+
1169
+ def _add_documents_to_vector_store(self, vector_store) -> None:
1170
+ self.ingest_data = self._prepare_ingest_data()
1171
+
1172
+ documents = []
1173
+ for _input in self.ingest_data or []:
1174
+ if isinstance(_input, Data):
1175
+ documents.append(_input.to_lc_document())
1176
+ else:
1177
+ msg = "Vector Store Inputs must be Data objects."
1178
+ raise TypeError(msg)
1179
+
1180
+ documents = [
1181
+ Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents
1182
+ ]
1183
+
1184
+ if documents and self.deletion_field:
1185
+ self.log(f"Deleting documents where {self.deletion_field}")
1186
+ try:
1187
+ database = self.get_database_object()
1188
+ collection = database.get_collection(self.collection_name, keyspace=database.keyspace)
1189
+ delete_values = list({doc.metadata[self.deletion_field] for doc in documents})
1190
+ self.log(f"Deleting documents where {self.deletion_field} matches {delete_values}.")
1191
+ collection.delete_many({f"metadata.{self.deletion_field}": {"$in": delete_values}})
1192
+ except Exception as e:
1193
+ msg = f"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}"
1194
+ raise ValueError(msg) from e
1195
+
1196
+ if documents:
1197
+ self.log(f"Adding {len(documents)} documents to the Vector Store.")
1198
+ try:
1199
+ vector_store.add_documents(documents)
1200
+ except Exception as e:
1201
+ msg = f"Error adding documents to AstraDBVectorStore: {e}"
1202
+ raise ValueError(msg) from e
1203
+ else:
1204
+ self.log("No documents to add to the Vector Store.")
1205
+
1206
+ def _map_search_type(self) -> str:
1207
+ search_type_mapping = {
1208
+ "Similarity with score threshold": "similarity_score_threshold",
1209
+ "MMR (Max Marginal Relevance)": "mmr",
1210
+ }
1211
+
1212
+ return search_type_mapping.get(self.search_type, "similarity")
1213
+
1214
+ def _build_search_args(self):
1215
+ # Clean up the search query
1216
+ query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None
1217
+ lexical_terms = self.lexical_terms or None
1218
+
1219
+ # Check if we have a search query, and if so set the args
1220
+ if query:
1221
+ args = {
1222
+ "query": query,
1223
+ "search_type": self._map_search_type(),
1224
+ "k": self.number_of_results,
1225
+ "score_threshold": self.search_score_threshold,
1226
+ "lexical_query": lexical_terms,
1227
+ }
1228
+ elif self.advanced_search_filter:
1229
+ args = {
1230
+ "n": self.number_of_results,
1231
+ }
1232
+ else:
1233
+ return {}
1234
+
1235
+ filter_arg = self.advanced_search_filter or {}
1236
+ if filter_arg:
1237
+ args["filter"] = filter_arg
1238
+
1239
+ return args
1240
+
1241
+ def search_documents(self, vector_store=None) -> list[Data]:
1242
+ vector_store = vector_store or self.build_vector_store()
1243
+
1244
+ self.log(f"Search input: {self.search_query}")
1245
+ self.log(f"Search type: {self.search_type}")
1246
+ self.log(f"Number of results: {self.number_of_results}")
1247
+ self.log(f"store.hybrid_search: {vector_store.hybrid_search}")
1248
+ self.log(f"Lexical terms: {self.lexical_terms}")
1249
+ self.log(f"Reranker: {self.reranker}")
1250
+
1251
+ try:
1252
+ search_args = self._build_search_args()
1253
+ except Exception as e:
1254
+ msg = f"Error in AstraDBVectorStore._build_search_args: {e}"
1255
+ raise ValueError(msg) from e
1256
+
1257
+ if not search_args:
1258
+ self.log("No search input or filters provided. Skipping search.")
1259
+ return []
1260
+
1261
+ docs = []
1262
+ search_method = "search" if "query" in search_args else "metadata_search"
1263
+
1264
+ try:
1265
+ self.log(f"Calling vector_store.{search_method} with args: {search_args}")
1266
+ docs = getattr(vector_store, search_method)(**search_args)
1267
+ except Exception as e:
1268
+ msg = f"Error performing {search_method} in AstraDBVectorStore: {e}"
1269
+ raise ValueError(msg) from e
1270
+
1271
+ self.log(f"Retrieved documents: {len(docs)}")
1272
+
1273
+ data = docs_to_data(docs)
1274
+ self.log(f"Converted documents to data: {len(data)}")
1275
+ self.status = data
1276
+
1277
+ return data
1278
+
1279
+ def get_retriever_kwargs(self):
1280
+ search_args = self._build_search_args()
1281
+
1282
+ return {
1283
+ "search_type": self._map_search_type(),
1284
+ "search_kwargs": search_args,
1285
+ }