lfx-nightly 0.1.13.dev0__py3-none-any.whl → 0.2.0.dev26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. lfx/_assets/component_index.json +1 -1
  2. lfx/base/agents/agent.py +121 -29
  3. lfx/base/agents/altk_base_agent.py +380 -0
  4. lfx/base/agents/altk_tool_wrappers.py +565 -0
  5. lfx/base/agents/events.py +103 -35
  6. lfx/base/agents/utils.py +15 -2
  7. lfx/base/composio/composio_base.py +183 -233
  8. lfx/base/data/base_file.py +88 -21
  9. lfx/base/data/storage_utils.py +192 -0
  10. lfx/base/data/utils.py +178 -14
  11. lfx/base/datastax/__init__.py +5 -0
  12. lfx/{components/vectorstores/astradb.py → base/datastax/astradb_base.py} +84 -473
  13. lfx/base/embeddings/embeddings_class.py +113 -0
  14. lfx/base/io/chat.py +5 -4
  15. lfx/base/mcp/util.py +101 -15
  16. lfx/base/models/groq_constants.py +74 -58
  17. lfx/base/models/groq_model_discovery.py +265 -0
  18. lfx/base/models/model.py +1 -1
  19. lfx/base/models/model_input_constants.py +74 -7
  20. lfx/base/models/model_utils.py +100 -0
  21. lfx/base/models/ollama_constants.py +3 -0
  22. lfx/base/models/openai_constants.py +7 -0
  23. lfx/base/models/watsonx_constants.py +36 -0
  24. lfx/base/tools/run_flow.py +601 -129
  25. lfx/cli/commands.py +7 -4
  26. lfx/cli/common.py +2 -2
  27. lfx/cli/run.py +1 -1
  28. lfx/cli/script_loader.py +53 -11
  29. lfx/components/Notion/create_page.py +1 -1
  30. lfx/components/Notion/list_database_properties.py +1 -1
  31. lfx/components/Notion/list_pages.py +1 -1
  32. lfx/components/Notion/list_users.py +1 -1
  33. lfx/components/Notion/page_content_viewer.py +1 -1
  34. lfx/components/Notion/search.py +1 -1
  35. lfx/components/Notion/update_page_property.py +1 -1
  36. lfx/components/__init__.py +19 -5
  37. lfx/components/altk/__init__.py +34 -0
  38. lfx/components/altk/altk_agent.py +193 -0
  39. lfx/components/amazon/amazon_bedrock_converse.py +1 -1
  40. lfx/components/apify/apify_actor.py +4 -4
  41. lfx/components/composio/__init__.py +70 -18
  42. lfx/components/composio/apollo_composio.py +11 -0
  43. lfx/components/composio/bitbucket_composio.py +11 -0
  44. lfx/components/composio/canva_composio.py +11 -0
  45. lfx/components/composio/coda_composio.py +11 -0
  46. lfx/components/composio/composio_api.py +10 -0
  47. lfx/components/composio/discord_composio.py +1 -1
  48. lfx/components/composio/elevenlabs_composio.py +11 -0
  49. lfx/components/composio/exa_composio.py +11 -0
  50. lfx/components/composio/firecrawl_composio.py +11 -0
  51. lfx/components/composio/fireflies_composio.py +11 -0
  52. lfx/components/composio/gmail_composio.py +1 -1
  53. lfx/components/composio/googlebigquery_composio.py +11 -0
  54. lfx/components/composio/googlecalendar_composio.py +1 -1
  55. lfx/components/composio/googledocs_composio.py +1 -1
  56. lfx/components/composio/googlemeet_composio.py +1 -1
  57. lfx/components/composio/googlesheets_composio.py +1 -1
  58. lfx/components/composio/googletasks_composio.py +1 -1
  59. lfx/components/composio/heygen_composio.py +11 -0
  60. lfx/components/composio/mem0_composio.py +11 -0
  61. lfx/components/composio/peopledatalabs_composio.py +11 -0
  62. lfx/components/composio/perplexityai_composio.py +11 -0
  63. lfx/components/composio/serpapi_composio.py +11 -0
  64. lfx/components/composio/slack_composio.py +3 -574
  65. lfx/components/composio/slackbot_composio.py +1 -1
  66. lfx/components/composio/snowflake_composio.py +11 -0
  67. lfx/components/composio/tavily_composio.py +11 -0
  68. lfx/components/composio/youtube_composio.py +2 -2
  69. lfx/components/{agents → cuga}/__init__.py +5 -7
  70. lfx/components/cuga/cuga_agent.py +730 -0
  71. lfx/components/data/__init__.py +78 -28
  72. lfx/components/data_source/__init__.py +58 -0
  73. lfx/components/{data → data_source}/api_request.py +26 -3
  74. lfx/components/{data → data_source}/csv_to_data.py +15 -10
  75. lfx/components/{data → data_source}/json_to_data.py +15 -8
  76. lfx/components/{data → data_source}/news_search.py +1 -1
  77. lfx/components/{data → data_source}/rss.py +1 -1
  78. lfx/components/{data → data_source}/sql_executor.py +1 -1
  79. lfx/components/{data → data_source}/url.py +1 -1
  80. lfx/components/{data → data_source}/web_search.py +1 -1
  81. lfx/components/datastax/__init__.py +12 -6
  82. lfx/components/datastax/{astra_assistant_manager.py → astradb_assistant_manager.py} +1 -0
  83. lfx/components/datastax/astradb_chatmemory.py +40 -0
  84. lfx/components/datastax/astradb_cql.py +6 -32
  85. lfx/components/datastax/astradb_graph.py +10 -124
  86. lfx/components/datastax/astradb_tool.py +13 -53
  87. lfx/components/datastax/astradb_vectorstore.py +134 -977
  88. lfx/components/datastax/create_assistant.py +1 -0
  89. lfx/components/datastax/create_thread.py +1 -0
  90. lfx/components/datastax/dotenv.py +1 -0
  91. lfx/components/datastax/get_assistant.py +1 -0
  92. lfx/components/datastax/getenvvar.py +1 -0
  93. lfx/components/datastax/graph_rag.py +1 -1
  94. lfx/components/datastax/hcd.py +1 -1
  95. lfx/components/datastax/list_assistants.py +1 -0
  96. lfx/components/datastax/run.py +1 -0
  97. lfx/components/deactivated/json_document_builder.py +1 -1
  98. lfx/components/elastic/elasticsearch.py +1 -1
  99. lfx/components/elastic/opensearch_multimodal.py +1575 -0
  100. lfx/components/files_and_knowledge/__init__.py +47 -0
  101. lfx/components/{data → files_and_knowledge}/directory.py +1 -1
  102. lfx/components/{data → files_and_knowledge}/file.py +246 -18
  103. lfx/components/{knowledge_bases → files_and_knowledge}/ingestion.py +17 -9
  104. lfx/components/{knowledge_bases → files_and_knowledge}/retrieval.py +18 -10
  105. lfx/components/{data → files_and_knowledge}/save_file.py +142 -22
  106. lfx/components/flow_controls/__init__.py +58 -0
  107. lfx/components/{logic → flow_controls}/conditional_router.py +1 -1
  108. lfx/components/{logic → flow_controls}/loop.py +47 -9
  109. lfx/components/flow_controls/run_flow.py +108 -0
  110. lfx/components/glean/glean_search_api.py +1 -1
  111. lfx/components/groq/groq.py +35 -28
  112. lfx/components/helpers/__init__.py +102 -0
  113. lfx/components/ibm/watsonx.py +25 -21
  114. lfx/components/input_output/__init__.py +3 -1
  115. lfx/components/input_output/chat.py +12 -3
  116. lfx/components/input_output/chat_output.py +12 -4
  117. lfx/components/input_output/text.py +1 -1
  118. lfx/components/input_output/text_output.py +1 -1
  119. lfx/components/{data → input_output}/webhook.py +1 -1
  120. lfx/components/knowledge_bases/__init__.py +59 -4
  121. lfx/components/langchain_utilities/character.py +1 -1
  122. lfx/components/langchain_utilities/csv_agent.py +84 -16
  123. lfx/components/langchain_utilities/json_agent.py +67 -12
  124. lfx/components/langchain_utilities/language_recursive.py +1 -1
  125. lfx/components/llm_operations/__init__.py +46 -0
  126. lfx/components/{processing → llm_operations}/batch_run.py +1 -1
  127. lfx/components/{processing → llm_operations}/lambda_filter.py +1 -1
  128. lfx/components/{logic → llm_operations}/llm_conditional_router.py +1 -1
  129. lfx/components/{processing/llm_router.py → llm_operations/llm_selector.py} +3 -3
  130. lfx/components/{processing → llm_operations}/structured_output.py +56 -18
  131. lfx/components/logic/__init__.py +126 -0
  132. lfx/components/mem0/mem0_chat_memory.py +11 -0
  133. lfx/components/mistral/mistral_embeddings.py +1 -1
  134. lfx/components/models/__init__.py +64 -9
  135. lfx/components/models_and_agents/__init__.py +49 -0
  136. lfx/components/{agents → models_and_agents}/agent.py +49 -6
  137. lfx/components/models_and_agents/embedding_model.py +423 -0
  138. lfx/components/models_and_agents/language_model.py +398 -0
  139. lfx/components/{agents → models_and_agents}/mcp_component.py +84 -45
  140. lfx/components/{helpers → models_and_agents}/memory.py +1 -1
  141. lfx/components/nvidia/system_assist.py +1 -1
  142. lfx/components/olivya/olivya.py +1 -1
  143. lfx/components/ollama/ollama.py +235 -14
  144. lfx/components/openrouter/openrouter.py +49 -147
  145. lfx/components/processing/__init__.py +9 -57
  146. lfx/components/processing/converter.py +1 -1
  147. lfx/components/processing/dataframe_operations.py +1 -1
  148. lfx/components/processing/parse_json_data.py +2 -2
  149. lfx/components/processing/parser.py +7 -2
  150. lfx/components/processing/split_text.py +1 -1
  151. lfx/components/qdrant/qdrant.py +1 -1
  152. lfx/components/redis/redis.py +1 -1
  153. lfx/components/twelvelabs/split_video.py +10 -0
  154. lfx/components/twelvelabs/video_file.py +12 -0
  155. lfx/components/utilities/__init__.py +43 -0
  156. lfx/components/{helpers → utilities}/calculator_core.py +1 -1
  157. lfx/components/{helpers → utilities}/current_date.py +1 -1
  158. lfx/components/{processing → utilities}/python_repl_core.py +1 -1
  159. lfx/components/vectorstores/__init__.py +0 -6
  160. lfx/components/vectorstores/local_db.py +9 -0
  161. lfx/components/youtube/youtube_transcripts.py +118 -30
  162. lfx/custom/custom_component/component.py +60 -3
  163. lfx/custom/custom_component/custom_component.py +68 -6
  164. lfx/field_typing/constants.py +1 -0
  165. lfx/graph/edge/base.py +45 -22
  166. lfx/graph/graph/base.py +5 -2
  167. lfx/graph/graph/schema.py +3 -2
  168. lfx/graph/state/model.py +15 -2
  169. lfx/graph/utils.py +6 -0
  170. lfx/graph/vertex/base.py +4 -1
  171. lfx/graph/vertex/param_handler.py +10 -7
  172. lfx/graph/vertex/vertex_types.py +1 -1
  173. lfx/helpers/__init__.py +12 -0
  174. lfx/helpers/flow.py +117 -0
  175. lfx/inputs/input_mixin.py +24 -1
  176. lfx/inputs/inputs.py +13 -1
  177. lfx/interface/components.py +161 -83
  178. lfx/io/schema.py +6 -0
  179. lfx/log/logger.py +5 -3
  180. lfx/schema/schema.py +5 -0
  181. lfx/services/database/__init__.py +5 -0
  182. lfx/services/database/service.py +25 -0
  183. lfx/services/deps.py +87 -22
  184. lfx/services/manager.py +19 -6
  185. lfx/services/mcp_composer/service.py +998 -157
  186. lfx/services/session.py +5 -0
  187. lfx/services/settings/base.py +51 -7
  188. lfx/services/settings/constants.py +8 -0
  189. lfx/services/storage/local.py +76 -46
  190. lfx/services/storage/service.py +152 -29
  191. lfx/template/field/base.py +3 -0
  192. lfx/utils/ssrf_protection.py +384 -0
  193. lfx/utils/validate_cloud.py +26 -0
  194. {lfx_nightly-0.1.13.dev0.dist-info → lfx_nightly-0.2.0.dev26.dist-info}/METADATA +38 -22
  195. {lfx_nightly-0.1.13.dev0.dist-info → lfx_nightly-0.2.0.dev26.dist-info}/RECORD +210 -196
  196. {lfx_nightly-0.1.13.dev0.dist-info → lfx_nightly-0.2.0.dev26.dist-info}/WHEEL +1 -1
  197. lfx/components/agents/cuga_agent.py +0 -1013
  198. lfx/components/datastax/astra_db.py +0 -77
  199. lfx/components/datastax/cassandra.py +0 -92
  200. lfx/components/logic/run_flow.py +0 -71
  201. lfx/components/models/embedding_model.py +0 -114
  202. lfx/components/models/language_model.py +0 -144
  203. lfx/components/vectorstores/astradb_graph.py +0 -326
  204. lfx/components/vectorstores/cassandra.py +0 -264
  205. lfx/components/vectorstores/cassandra_graph.py +0 -238
  206. lfx/components/vectorstores/chroma.py +0 -167
  207. lfx/components/vectorstores/clickhouse.py +0 -135
  208. lfx/components/vectorstores/couchbase.py +0 -102
  209. lfx/components/vectorstores/elasticsearch.py +0 -267
  210. lfx/components/vectorstores/faiss.py +0 -111
  211. lfx/components/vectorstores/graph_rag.py +0 -141
  212. lfx/components/vectorstores/hcd.py +0 -314
  213. lfx/components/vectorstores/milvus.py +0 -115
  214. lfx/components/vectorstores/mongodb_atlas.py +0 -213
  215. lfx/components/vectorstores/opensearch.py +0 -243
  216. lfx/components/vectorstores/pgvector.py +0 -72
  217. lfx/components/vectorstores/pinecone.py +0 -134
  218. lfx/components/vectorstores/qdrant.py +0 -109
  219. lfx/components/vectorstores/supabase.py +0 -76
  220. lfx/components/vectorstores/upstash.py +0 -124
  221. lfx/components/vectorstores/vectara.py +0 -97
  222. lfx/components/vectorstores/vectara_rag.py +0 -164
  223. lfx/components/vectorstores/weaviate.py +0 -89
  224. /lfx/components/{data → data_source}/mock_data.py +0 -0
  225. /lfx/components/datastax/{astra_vectorize.py → astradb_vectorize.py} +0 -0
  226. /lfx/components/{logic → flow_controls}/data_conditional_router.py +0 -0
  227. /lfx/components/{logic → flow_controls}/flow_tool.py +0 -0
  228. /lfx/components/{logic → flow_controls}/listen.py +0 -0
  229. /lfx/components/{logic → flow_controls}/notify.py +0 -0
  230. /lfx/components/{logic → flow_controls}/pass_message.py +0 -0
  231. /lfx/components/{logic → flow_controls}/sub_flow.py +0 -0
  232. /lfx/components/{processing → models_and_agents}/prompt.py +0 -0
  233. /lfx/components/{helpers → processing}/create_list.py +0 -0
  234. /lfx/components/{helpers → processing}/output_parser.py +0 -0
  235. /lfx/components/{helpers → processing}/store_message.py +0 -0
  236. /lfx/components/{helpers → utilities}/id_generator.py +0 -0
  237. {lfx_nightly-0.1.13.dev0.dist-info → lfx_nightly-0.2.0.dev26.dist-info}/entry_points.txt +0 -0
@@ -1,191 +1,27 @@
1
- import re
2
- from collections import defaultdict
3
- from dataclasses import asdict, dataclass, field
4
-
5
- from astrapy import DataAPIClient, Database
6
- from astrapy.data.info.reranking import RerankServiceOptions
7
- from astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions
8
- from langchain_astradb import AstraDBVectorStore, VectorServiceOptions
9
- from langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment
1
+ from astrapy import DataAPIClient
10
2
  from langchain_core.documents import Document
11
3
 
4
+ from lfx.base.datastax.astradb_base import AstraDBBaseComponent
12
5
  from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
13
6
  from lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection
14
7
  from lfx.helpers.data import docs_to_data
15
- from lfx.inputs.inputs import FloatInput, NestedDictInput
16
- from lfx.io import (
17
- BoolInput,
18
- DropdownInput,
19
- HandleInput,
20
- IntInput,
21
- QueryInput,
22
- SecretStrInput,
23
- StrInput,
24
- )
8
+ from lfx.io import BoolInput, DropdownInput, FloatInput, HandleInput, IntInput, NestedDictInput, QueryInput, StrInput
25
9
  from lfx.schema.data import Data
26
10
  from lfx.serialization import serialize
27
11
  from lfx.utils.version import get_version_info
28
12
 
29
13
 
30
14
  @vector_store_connection
31
- class AstraDBVectorStoreComponent(LCVectorStoreComponent):
15
+ class AstraDBVectorStoreComponent(AstraDBBaseComponent, LCVectorStoreComponent):
32
16
  display_name: str = "Astra DB"
33
17
  description: str = "Ingest and search documents in Astra DB"
34
- documentation: str = "https://docs.langflow.org/bundles-datastax#astra-db"
18
+ documentation: str = "https://docs.langflow.org/bundles-datastax"
35
19
  name = "AstraDB"
36
20
  icon: str = "AstraDB"
37
21
 
38
- _cached_vector_store: AstraDBVectorStore | None = None
39
-
40
- @dataclass
41
- class NewDatabaseInput:
42
- functionality: str = "create"
43
- fields: dict[str, dict] = field(
44
- default_factory=lambda: {
45
- "data": {
46
- "node": {
47
- "name": "create_database",
48
- "description": "Please allow several minutes for creation to complete.",
49
- "display_name": "Create new database",
50
- "field_order": ["01_new_database_name", "02_cloud_provider", "03_region"],
51
- "template": {
52
- "01_new_database_name": StrInput(
53
- name="new_database_name",
54
- display_name="Name",
55
- info="Name of the new database to create in Astra DB.",
56
- required=True,
57
- ),
58
- "02_cloud_provider": DropdownInput(
59
- name="cloud_provider",
60
- display_name="Cloud provider",
61
- info="Cloud provider for the new database.",
62
- options=[],
63
- required=True,
64
- real_time_refresh=True,
65
- ),
66
- "03_region": DropdownInput(
67
- name="region",
68
- display_name="Region",
69
- info="Region for the new database.",
70
- options=[],
71
- required=True,
72
- ),
73
- },
74
- },
75
- }
76
- }
77
- )
78
-
79
- @dataclass
80
- class NewCollectionInput:
81
- functionality: str = "create"
82
- fields: dict[str, dict] = field(
83
- default_factory=lambda: {
84
- "data": {
85
- "node": {
86
- "name": "create_collection",
87
- "description": "Please allow several seconds for creation to complete.",
88
- "display_name": "Create new collection",
89
- "field_order": [
90
- "01_new_collection_name",
91
- "02_embedding_generation_provider",
92
- "03_embedding_generation_model",
93
- "04_dimension",
94
- ],
95
- "template": {
96
- "01_new_collection_name": StrInput(
97
- name="new_collection_name",
98
- display_name="Name",
99
- info="Name of the new collection to create in Astra DB.",
100
- required=True,
101
- ),
102
- "02_embedding_generation_provider": DropdownInput(
103
- name="embedding_generation_provider",
104
- display_name="Embedding generation method",
105
- info="Provider to use for generating embeddings.",
106
- helper_text=(
107
- "To create collections with more embedding provider options, go to "
108
- '<a class="underline" href="https://astra.datastax.com/" target=" _blank" '
109
- 'rel="noopener noreferrer">your database in Astra DB</a>'
110
- ),
111
- real_time_refresh=True,
112
- required=True,
113
- options=[],
114
- ),
115
- "03_embedding_generation_model": DropdownInput(
116
- name="embedding_generation_model",
117
- display_name="Embedding model",
118
- info="Model to use for generating embeddings.",
119
- real_time_refresh=True,
120
- options=[],
121
- ),
122
- "04_dimension": IntInput(
123
- name="dimension",
124
- display_name="Dimensions",
125
- info="Dimensions of the embeddings to generate.",
126
- value=None,
127
- ),
128
- },
129
- },
130
- }
131
- }
132
- )
133
-
134
22
  inputs = [
135
- SecretStrInput(
136
- name="token",
137
- display_name="Astra DB Application Token",
138
- info="Authentication token for accessing Astra DB.",
139
- value="ASTRA_DB_APPLICATION_TOKEN",
140
- required=True,
141
- real_time_refresh=True,
142
- input_types=[],
143
- ),
144
- DropdownInput(
145
- name="environment",
146
- display_name="Environment",
147
- info="The environment for the Astra DB API Endpoint.",
148
- options=["prod", "test", "dev"],
149
- value="prod",
150
- advanced=True,
151
- real_time_refresh=True,
152
- combobox=True,
153
- ),
154
- DropdownInput(
155
- name="database_name",
156
- display_name="Database",
157
- info="The Database name for the Astra DB instance.",
158
- required=True,
159
- refresh_button=True,
160
- real_time_refresh=True,
161
- dialog_inputs=asdict(NewDatabaseInput()),
162
- combobox=True,
163
- ),
164
- DropdownInput(
165
- name="api_endpoint",
166
- display_name="Astra DB API Endpoint",
167
- info="The API Endpoint for the Astra DB instance. Supercedes database selection.",
168
- advanced=True,
169
- ),
170
- DropdownInput(
171
- name="keyspace",
172
- display_name="Keyspace",
173
- info="Optional keyspace within Astra DB to use for the collection.",
174
- advanced=True,
175
- options=[],
176
- real_time_refresh=True,
177
- ),
178
- DropdownInput(
179
- name="collection_name",
180
- display_name="Collection",
181
- info="The name of the collection within Astra DB where the vectors will be stored.",
182
- required=True,
183
- refresh_button=True,
184
- real_time_refresh=True,
185
- dialog_inputs=asdict(NewCollectionInput()),
186
- combobox=True,
187
- show=False,
188
- ),
23
+ *AstraDBBaseComponent.inputs,
24
+ *LCVectorStoreComponent.inputs,
189
25
  HandleInput(
190
26
  name="embedding_model",
191
27
  display_name="Embedding Model",
@@ -194,7 +30,32 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
194
30
  required=False,
195
31
  show=True,
196
32
  ),
197
- *LCVectorStoreComponent.inputs,
33
+ StrInput(
34
+ name="content_field",
35
+ display_name="Content Field",
36
+ info="Field to use as the text content field for the vector store.",
37
+ advanced=True,
38
+ ),
39
+ StrInput(
40
+ name="deletion_field",
41
+ display_name="Deletion Based On Field",
42
+ info="When this parameter is provided, documents in the target collection with "
43
+ "metadata field values matching the input metadata field value will be deleted "
44
+ "before new data is loaded.",
45
+ advanced=True,
46
+ ),
47
+ BoolInput(
48
+ name="ignore_invalid_documents",
49
+ display_name="Ignore Invalid Documents",
50
+ info="Boolean flag to determine whether to ignore invalid documents at runtime.",
51
+ advanced=True,
52
+ ),
53
+ NestedDictInput(
54
+ name="astradb_vectorstore_kwargs",
55
+ display_name="AstraDBVectorStore Parameters",
56
+ info="Optional dictionary of additional parameters for the AstraDBVectorStore.",
57
+ advanced=True,
58
+ ),
198
59
  DropdownInput(
199
60
  name="search_method",
200
61
  display_name="Search Method",
@@ -254,856 +115,152 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
254
115
  info="Optional dictionary of filters to apply to the search query.",
255
116
  advanced=True,
256
117
  ),
257
- BoolInput(
258
- name="autodetect_collection",
259
- display_name="Autodetect Collection",
260
- info="Boolean flag to determine whether to autodetect the collection.",
261
- advanced=True,
262
- value=True,
263
- ),
264
- StrInput(
265
- name="content_field",
266
- display_name="Content Field",
267
- info="Field to use as the text content field for the vector store.",
268
- advanced=True,
269
- ),
270
- StrInput(
271
- name="deletion_field",
272
- display_name="Deletion Based On Field",
273
- info="When this parameter is provided, documents in the target collection with "
274
- "metadata field values matching the input metadata field value will be deleted "
275
- "before new data is loaded.",
276
- advanced=True,
277
- ),
278
- BoolInput(
279
- name="ignore_invalid_documents",
280
- display_name="Ignore Invalid Documents",
281
- info="Boolean flag to determine whether to ignore invalid documents at runtime.",
282
- advanced=True,
283
- ),
284
- NestedDictInput(
285
- name="astradb_vectorstore_kwargs",
286
- display_name="AstraDBVectorStore Parameters",
287
- info="Optional dictionary of additional parameters for the AstraDBVectorStore.",
288
- advanced=True,
289
- ),
290
118
  ]
291
119
 
292
- @classmethod
293
- def map_cloud_providers(cls):
294
- # TODO: Programmatically fetch the regions for each cloud provider
295
- return {
296
- "dev": {
297
- "Amazon Web Services": {
298
- "id": "aws",
299
- "regions": ["us-west-2"],
300
- },
301
- "Google Cloud Platform": {
302
- "id": "gcp",
303
- "regions": ["us-central1", "europe-west4"],
304
- },
305
- },
306
- "test": {
307
- "Google Cloud Platform": {
308
- "id": "gcp",
309
- "regions": ["us-central1"],
310
- },
311
- },
312
- "prod": {
313
- "Amazon Web Services": {
314
- "id": "aws",
315
- "regions": ["us-east-2", "ap-south-1", "eu-west-1"],
316
- },
317
- "Google Cloud Platform": {
318
- "id": "gcp",
319
- "regions": ["us-east1"],
320
- },
321
- "Microsoft Azure": {
322
- "id": "azure",
323
- "regions": ["westus3"],
324
- },
325
- },
326
- }
327
-
328
- @classmethod
329
- def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):
330
- try:
331
- # Get the admin object
332
- client = DataAPIClient(environment=environment)
333
- admin_client = client.get_admin()
334
- db_admin = admin_client.get_database_admin(api_endpoint, token=token)
335
-
336
- # Get the list of embedding providers
337
- embedding_providers = db_admin.find_embedding_providers()
338
-
339
- vectorize_providers_mapping = {}
340
- # Map the provider display name to the provider key and models
341
- for provider_key, provider_data in embedding_providers.embedding_providers.items():
342
- # Get the provider display name and models
343
- display_name = provider_data.display_name
344
- models = [model.name for model in provider_data.models]
345
-
346
- # Build our mapping
347
- vectorize_providers_mapping[display_name] = [provider_key, models]
348
-
349
- # Sort the resulting dictionary
350
- return defaultdict(list, dict(sorted(vectorize_providers_mapping.items())))
351
- except Exception as _: # noqa: BLE001
352
- return {}
353
-
354
- @classmethod
355
- async def create_database_api(
356
- cls,
357
- new_database_name: str,
358
- cloud_provider: str,
359
- region: str,
360
- token: str,
361
- environment: str | None = None,
362
- keyspace: str | None = None,
363
- ):
364
- client = DataAPIClient(environment=environment)
365
-
366
- # Get the admin object
367
- admin_client = client.get_admin(token=token)
368
-
369
- # Get the environment, set to prod if null like
370
- my_env = environment or "prod"
371
-
372
- # Raise a value error if name isn't provided
373
- if not new_database_name:
374
- msg = "Database name is required to create a new database."
375
- raise ValueError(msg)
376
-
377
- # Call the create database function
378
- return await admin_client.async_create_database(
379
- name=new_database_name,
380
- cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider]["id"],
381
- region=region,
382
- keyspace=keyspace,
383
- wait_until_active=False,
384
- )
385
-
386
- @classmethod
387
- async def create_collection_api(
388
- cls,
389
- new_collection_name: str,
390
- token: str,
391
- api_endpoint: str,
392
- environment: str | None = None,
393
- keyspace: str | None = None,
394
- dimension: int | None = None,
395
- embedding_generation_provider: str | None = None,
396
- embedding_generation_model: str | None = None,
397
- reranker: str | None = None,
398
- ):
399
- # Build vectorize options, if needed
400
- vectorize_options = None
401
- if not dimension:
402
- providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)
403
- vectorize_options = VectorServiceOptions(
404
- provider=providers.get(embedding_generation_provider, [None, []])[0],
405
- model_name=embedding_generation_model,
406
- )
407
-
408
- # Raise a value error if name isn't provided
409
- if not new_collection_name:
410
- msg = "Collection name is required to create a new collection."
411
- raise ValueError(msg)
412
-
413
- # Define the base arguments being passed to the create collection function
414
- base_args = {
415
- "collection_name": new_collection_name,
416
- "token": token,
417
- "api_endpoint": api_endpoint,
418
- "keyspace": keyspace,
419
- "environment": environment,
420
- "embedding_dimension": dimension,
421
- "collection_vector_service_options": vectorize_options,
422
- }
423
-
424
- # Add optional arguments if the reranker is set
425
- if reranker:
426
- # Split the reranker field into a provider a model name
427
- provider, _ = reranker.split("/")
428
- base_args["collection_rerank"] = CollectionRerankOptions(
429
- service=RerankServiceOptions(provider=provider, model_name=reranker),
430
- )
431
- base_args["collection_lexical"] = CollectionLexicalOptions(analyzer="STANDARD")
432
-
433
- _AstraDBCollectionEnvironment(**base_args)
434
-
435
- @classmethod
436
- def get_database_list_static(cls, token: str, environment: str | None = None):
437
- client = DataAPIClient(environment=environment)
438
-
439
- # Get the admin object
440
- admin_client = client.get_admin(token=token)
441
-
442
- # Get the list of databases
443
- db_list = admin_client.list_databases()
444
-
445
- # Generate the api endpoint for each database
446
- db_info_dict = {}
447
- for db in db_list:
448
- try:
449
- # Get the API endpoint for the database
450
- api_endpoints = [db_reg.api_endpoint for db_reg in db.regions]
451
-
452
- # Get the number of collections
453
- try:
454
- # Get the number of collections in the database
455
- num_collections = len(
456
- client.get_database(
457
- api_endpoints[0],
458
- token=token,
459
- ).list_collection_names()
460
- )
461
- except Exception: # noqa: BLE001
462
- if db.status != "PENDING":
463
- continue
464
- num_collections = 0
465
-
466
- # Add the database to the dictionary
467
- db_info_dict[db.name] = {
468
- "api_endpoints": api_endpoints,
469
- "keyspaces": db.keyspaces,
470
- "collections": num_collections,
471
- "status": db.status if db.status != "ACTIVE" else None,
472
- "org_id": db.org_id if db.org_id else None,
473
- }
474
- except Exception: # noqa: BLE001
475
- pass
476
-
477
- return db_info_dict
478
-
479
- def get_database_list(self):
480
- return self.get_database_list_static(
481
- token=self.token,
482
- environment=self.environment,
483
- )
484
-
485
- @classmethod
486
- def get_api_endpoint_static(
487
- cls,
488
- token: str,
489
- environment: str | None = None,
490
- api_endpoint: str | None = None,
491
- database_name: str | None = None,
492
- ):
493
- # If the api_endpoint is set, return it
494
- if api_endpoint:
495
- return api_endpoint
496
-
497
- # Check if the database_name is like a url
498
- if database_name and database_name.startswith("https://"):
499
- return database_name
500
-
501
- # If the database is not set, nothing we can do.
502
- if not database_name:
503
- return None
504
-
505
- # Grab the database object
506
- db = cls.get_database_list_static(token=token, environment=environment).get(database_name)
507
- if not db:
508
- return None
509
-
510
- # Otherwise, get the URL from the database list
511
- endpoints = db.get("api_endpoints") or []
512
- return endpoints[0] if endpoints else None
513
-
514
- def get_api_endpoint(self):
515
- return self.get_api_endpoint_static(
516
- token=self.token,
517
- environment=self.environment,
518
- api_endpoint=self.api_endpoint,
519
- database_name=self.database_name,
520
- )
521
-
522
- @classmethod
523
- def get_database_id_static(cls, api_endpoint: str) -> str | None:
524
- # Pattern matches standard UUID format: 8-4-4-4-12 hexadecimal characters
525
- uuid_pattern = r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"
526
- match = re.search(uuid_pattern, api_endpoint)
527
-
528
- return match.group(0) if match else None
529
-
530
- def get_database_id(self):
531
- return self.get_database_id_static(api_endpoint=self.get_api_endpoint())
532
-
533
- def get_keyspace(self):
534
- keyspace = self.keyspace
535
-
536
- if keyspace:
537
- return keyspace.strip()
538
-
539
- return "default_keyspace"
540
-
541
- def get_database_object(self, api_endpoint: str | None = None):
542
- try:
543
- client = DataAPIClient(environment=self.environment)
544
-
545
- return client.get_database(
546
- api_endpoint or self.get_api_endpoint(),
547
- token=self.token,
548
- keyspace=self.get_keyspace(),
549
- )
550
- except Exception as e:
551
- msg = f"Error fetching database object: {e}"
552
- raise ValueError(msg) from e
553
-
554
- def collection_data(self, collection_name: str, database: Database | None = None):
555
- try:
556
- if not database:
557
- client = DataAPIClient(environment=self.environment)
558
-
559
- database = client.get_database(
560
- self.get_api_endpoint(),
561
- token=self.token,
562
- keyspace=self.get_keyspace(),
563
- )
564
-
565
- collection = database.get_collection(collection_name)
566
-
567
- return collection.estimated_document_count()
568
- except Exception as e: # noqa: BLE001
569
- self.log(f"Error checking collection data: {e}")
570
-
571
- return None
572
-
573
- def _initialize_database_options(self):
574
- try:
575
- return [
576
- {
577
- "name": name,
578
- "status": info["status"],
579
- "collections": info["collections"],
580
- "api_endpoints": info["api_endpoints"],
581
- "keyspaces": info["keyspaces"],
582
- "org_id": info["org_id"],
583
- }
584
- for name, info in self.get_database_list().items()
585
- ]
586
- except Exception as e:
587
- msg = f"Error fetching database options: {e}"
588
- raise ValueError(msg) from e
589
-
590
- @classmethod
591
- def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:
592
- # Get the provider name from the collection
593
- provider_name = provider_name or (
594
- collection.definition.vector.service.provider
595
- if (
596
- collection
597
- and collection.definition
598
- and collection.definition.vector
599
- and collection.definition.vector.service
600
- )
601
- else None
120
+ async def update_build_config(
121
+ self,
122
+ build_config: dict,
123
+ field_value: str | dict,
124
+ field_name: str | None = None,
125
+ ) -> dict:
126
+ """Update build configuration with proper handling of embedding and search options."""
127
+ # Handle base astra db build config updates
128
+ build_config = await super().update_build_config(
129
+ build_config,
130
+ field_value=field_value,
131
+ field_name=field_name,
602
132
  )
603
133
 
604
- # If there is no provider, use the vector store icon
605
- if not provider_name or provider_name.lower() == "bring your own":
606
- return "vectorstores"
607
-
608
- # Map provider casings
609
- case_map = {
610
- "nvidia": "NVIDIA",
611
- "openai": "OpenAI",
612
- "amazon bedrock": "AmazonBedrockEmbeddings",
613
- "azure openai": "AzureOpenAiEmbeddings",
614
- "cohere": "Cohere",
615
- "jina ai": "JinaAI",
616
- "mistral ai": "MistralAI",
617
- "upstage": "Upstage",
618
- "voyage ai": "VoyageAI",
619
- }
134
+ # Set embedding model display based on provider selection
135
+ if isinstance(field_value, dict) and "02_embedding_generation_provider" in field_value:
136
+ embedding_provider = field_value.get("02_embedding_generation_provider")
137
+ is_custom_provider = embedding_provider and embedding_provider != "Bring your own"
138
+ provider = embedding_provider.lower() if is_custom_provider and embedding_provider is not None else None
620
139
 
621
- # Adjust the casing on some like nvidia
622
- return case_map[provider_name.lower()] if provider_name.lower() in case_map else provider_name.title()
623
-
624
- def _initialize_collection_options(self, api_endpoint: str | None = None):
625
- # Nothing to generate if we don't have an API endpoint yet
626
- api_endpoint = api_endpoint or self.get_api_endpoint()
627
- if not api_endpoint:
628
- return []
140
+ build_config["embedding_model"]["show"] = not bool(provider)
141
+ build_config["embedding_model"]["required"] = not bool(provider)
629
142
 
630
- # Retrieve the database object
631
- database = self.get_database_object(api_endpoint=api_endpoint)
632
-
633
- # Get the list of collections
634
- collection_list = database.list_collections(keyspace=self.get_keyspace())
635
-
636
- # Return the list of collections and metadata associated
637
- return [
638
- {
639
- "name": col.name,
640
- "records": self.collection_data(collection_name=col.name, database=database),
641
- "provider": (
642
- col.definition.vector.service.provider
643
- if col.definition.vector and col.definition.vector.service
644
- else None
645
- ),
646
- "icon": self.get_provider_icon(collection=col),
647
- "model": (
648
- col.definition.vector.service.model_name
649
- if col.definition.vector and col.definition.vector.service
650
- else None
651
- ),
652
- }
653
- for col in collection_list
654
- ]
655
-
656
- def reset_provider_options(self, build_config: dict) -> dict:
657
- """Reset provider options and related configurations in the build_config dictionary."""
658
- # Extract template path for cleaner access
659
- template = build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
660
-
661
- # Get vectorize providers
662
- vectorize_providers_api = self.get_vectorize_providers(
663
- token=self.token,
664
- environment=self.environment,
665
- api_endpoint=build_config["api_endpoint"]["value"],
666
- )
667
-
668
- # Create a new dictionary with "Bring your own" first
669
- vectorize_providers: dict[str, list[list[str]]] = {"Bring your own": [[], []]}
670
-
671
- # Add the remaining items (only Nvidia) from the original dictionary
672
- vectorize_providers.update(
673
- {
674
- k: v
675
- for k, v in vectorize_providers_api.items()
676
- if k.lower() in ["nvidia"] # TODO: Eventually support more
677
- }
678
- )
679
-
680
- # Set provider options
681
- provider_field = "02_embedding_generation_provider"
682
- template[provider_field]["options"] = list(vectorize_providers.keys())
683
-
684
- # Add metadata for each provider option
685
- template[provider_field]["options_metadata"] = [
686
- {"icon": self.get_provider_icon(provider_name=provider)} for provider in template[provider_field]["options"]
687
- ]
688
-
689
- # Get selected embedding provider
690
- embedding_provider = template[provider_field]["value"]
691
- is_bring_your_own = embedding_provider and embedding_provider == "Bring your own"
692
-
693
- # Configure embedding model field
694
- model_field = "03_embedding_generation_model"
695
- template[model_field].update(
696
- {
697
- "options": vectorize_providers.get(embedding_provider, [[], []])[1],
698
- "placeholder": "Bring your own" if is_bring_your_own else None,
699
- "readonly": is_bring_your_own,
700
- "required": not is_bring_your_own,
701
- "value": None,
702
- }
703
- )
704
-
705
- # If this is a bring your own, set dimensions to 0
706
- return self.reset_dimension_field(build_config)
143
+ # Early return if no API endpoint is configured
144
+ if not self.get_api_endpoint():
145
+ return build_config
707
146
 
708
- def reset_dimension_field(self, build_config: dict) -> dict:
709
- """Reset dimension field options based on provided configuration."""
710
- # Extract template path for cleaner access
711
- template = build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
147
+ # Configure search method and related options
148
+ return self._configure_search_options(build_config)
712
149
 
713
- # Get selected embedding model
714
- provider_field = "02_embedding_generation_provider"
715
- embedding_provider = template[provider_field]["value"]
716
- is_bring_your_own = embedding_provider and embedding_provider == "Bring your own"
150
+ def _configure_search_options(self, build_config: dict) -> dict:
151
+ """Configure hybrid search, reranker, and vector search options."""
152
+ # Detect available hybrid search capabilities
153
+ hybrid_capabilities = self._detect_hybrid_capabilities()
717
154
 
718
- # Configure dimension field
719
- dimension_field = "04_dimension"
720
- dimension_value = 1024 if not is_bring_your_own else None # TODO: Dynamically figure this out
721
- template[dimension_field].update(
722
- {
723
- "placeholder": dimension_value,
724
- "value": dimension_value,
725
- "readonly": not is_bring_your_own,
726
- "required": is_bring_your_own,
727
- }
728
- )
729
-
730
- return build_config
155
+ # Return if we haven't selected a collection
156
+ if not build_config["collection_name"]["options"] or not build_config["collection_name"]["value"]:
157
+ return build_config
731
158
 
732
- def reset_collection_list(self, build_config: dict) -> dict:
733
- """Reset collection list options based on provided configuration."""
734
159
  # Get collection options
735
- collection_options = self._initialize_collection_options(api_endpoint=build_config["api_endpoint"]["value"])
736
- # Update collection configuration
737
- collection_config = build_config["collection_name"]
738
- collection_config.update(
739
- {
740
- "options": [col["name"] for col in collection_options],
741
- "options_metadata": [{k: v for k, v in col.items() if k != "name"} for col in collection_options],
742
- }
743
- )
744
-
745
- # Reset selected collection if not in options
746
- if collection_config["value"] not in collection_config["options"]:
747
- collection_config["value"] = ""
160
+ collection_options = self._get_collection_options(build_config)
748
161
 
749
- # Set advanced status based on database selection
750
- collection_config["show"] = bool(build_config["database_name"]["value"])
162
+ # Get the selected collection index
163
+ index = build_config["collection_name"]["options"].index(build_config["collection_name"]["value"])
164
+ provider = build_config["collection_name"]["options_metadata"][index]["provider"]
165
+ build_config["embedding_model"]["show"] = not bool(provider)
166
+ build_config["embedding_model"]["required"] = not bool(provider)
751
167
 
752
- return build_config
168
+ # Determine search configuration
169
+ is_vector_search = build_config["search_method"]["value"] == "Vector Search"
170
+ is_autodetect = build_config["autodetect_collection"]["value"]
753
171
 
754
- def reset_database_list(self, build_config: dict) -> dict:
755
- """Reset database list options and related configurations."""
756
- # Get database options
757
- database_options = self._initialize_database_options()
172
+ # Apply hybrid search configuration
173
+ if hybrid_capabilities["available"]:
174
+ build_config["search_method"]["show"] = True
175
+ build_config["search_method"]["options"] = ["Hybrid Search", "Vector Search"]
176
+ build_config["search_method"]["value"] = build_config["search_method"].get("value", "Hybrid Search")
758
177
 
759
- # Update cloud provider options
760
- env = self.environment
761
- template = build_config["database_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
762
- template["02_cloud_provider"]["options"] = list(self.map_cloud_providers()[env].keys())
178
+ build_config["reranker"]["options"] = hybrid_capabilities["reranker_models"]
179
+ build_config["reranker"]["options_metadata"] = hybrid_capabilities["reranker_metadata"]
180
+ if hybrid_capabilities["reranker_models"]:
181
+ build_config["reranker"]["value"] = hybrid_capabilities["reranker_models"][0]
182
+ else:
183
+ build_config["search_method"]["show"] = False
184
+ build_config["search_method"]["options"] = ["Vector Search"]
185
+ build_config["search_method"]["value"] = "Vector Search"
186
+ build_config["reranker"]["options"] = []
187
+ build_config["reranker"]["options_metadata"] = []
763
188
 
764
- # Update database configuration
765
- database_config = build_config["database_name"]
766
- database_config.update(
767
- {
768
- "options": [db["name"] for db in database_options],
769
- "options_metadata": [{k: v for k, v in db.items() if k != "name"} for db in database_options],
770
- }
189
+ # Configure reranker visibility and state
190
+ hybrid_enabled = (
191
+ collection_options["rerank_enabled"] and build_config["search_method"]["value"] == "Hybrid Search"
771
192
  )
772
193
 
773
- # Reset selections if value not in options
774
- if database_config["value"] not in database_config["options"]:
775
- database_config["value"] = ""
776
- build_config["api_endpoint"]["options"] = []
777
- build_config["api_endpoint"]["value"] = ""
778
- build_config["collection_name"]["show"] = False
194
+ build_config["reranker"]["show"] = hybrid_enabled
195
+ build_config["reranker"]["toggle_value"] = hybrid_enabled
196
+ build_config["reranker"]["toggle_disable"] = is_vector_search
779
197
 
780
- # Set advanced status based on token presence
781
- database_config["show"] = bool(build_config["token"]["value"])
782
-
783
- return build_config
198
+ # Configure lexical terms
199
+ lexical_visible = collection_options["lexical_enabled"] and not is_vector_search
200
+ build_config["lexical_terms"]["show"] = lexical_visible
201
+ build_config["lexical_terms"]["value"] = "" if is_vector_search else build_config["lexical_terms"]["value"]
784
202
 
785
- def reset_build_config(self, build_config: dict) -> dict:
786
- """Reset all build configuration options to default empty state."""
787
- # Reset database configuration
788
- database_config = build_config["database_name"]
789
- database_config.update({"options": [], "options_metadata": [], "value": "", "show": False})
790
- build_config["api_endpoint"]["options"] = []
791
- build_config["api_endpoint"]["value"] = ""
203
+ # Configure search type and score threshold
204
+ build_config["search_type"]["show"] = is_vector_search
205
+ build_config["search_score_threshold"]["show"] = is_vector_search
792
206
 
793
- # Reset hybrid search options
794
- build_config["reranker"]["options"] = []
795
- build_config["reranker"]["value"] = ""
796
- build_config["reranker"]["show"] = False
797
- build_config["lexical_terms"]["value"] = ""
798
- build_config["lexical_terms"]["show"] = False
799
-
800
- # Reset collection configuration
801
- collection_config = build_config["collection_name"]
802
- collection_config.update({"options": [], "options_metadata": [], "value": "", "show": False})
207
+ # Force similarity search for hybrid mode or autodetect
208
+ if hybrid_enabled or is_autodetect:
209
+ build_config["search_type"]["value"] = "Similarity"
803
210
 
804
211
  return build_config
805
212
 
806
- def _handle_hybrid_search_options(self, build_config: dict) -> dict:
807
- """Set hybrid search options in the build configuration."""
808
- # Detect what hybrid options are available
809
- # Get the admin object
810
- client = DataAPIClient(environment=self.environment)
213
+ def _detect_hybrid_capabilities(self) -> dict:
214
+ """Detect available hybrid search and reranking capabilities."""
215
+ environment = self.get_environment(self.environment)
216
+ client = DataAPIClient(environment=environment)
811
217
  admin_client = client.get_admin()
812
218
  db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)
813
219
 
814
- # We will try to get the reranking providers to see if its hybrid emabled
815
220
  try:
816
221
  providers = db_admin.find_reranking_providers()
817
- build_config["reranker"]["options"] = [
222
+ reranker_models = [
818
223
  model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models
819
224
  ]
820
- build_config["reranker"]["options_metadata"] = [
225
+ reranker_metadata = [
821
226
  {"icon": self.get_provider_icon(provider_name=model.name.split("/")[0])}
822
227
  for provider in providers.reranking_providers.values()
823
228
  for model in provider.models
824
229
  ]
825
- build_config["reranker"]["value"] = build_config["reranker"]["options"][0]
826
-
827
- # Set the default search field to hybrid search
828
- build_config["search_method"]["show"] = True
829
- build_config["search_method"]["options"] = ["Hybrid Search", "Vector Search"]
830
- build_config["search_method"]["value"] = "Hybrid Search"
831
- except Exception as _: # noqa: BLE001
832
- build_config["reranker"]["options"] = []
833
- build_config["reranker"]["options_metadata"] = []
834
-
835
- # Set the default search field to vector search
836
- build_config["search_method"]["show"] = False
837
- build_config["search_method"]["options"] = ["Vector Search"]
838
- build_config["search_method"]["value"] = "Vector Search"
839
-
840
- return build_config
841
-
842
- async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:
843
- """Update build configuration based on field name and value."""
844
- # Early return if no token provided
845
- if not self.token:
846
- return self.reset_build_config(build_config)
847
-
848
- # Database creation callback
849
- if field_name == "database_name" and isinstance(field_value, dict):
850
- if "01_new_database_name" in field_value:
851
- await self._create_new_database(build_config, field_value)
852
- return self.reset_collection_list(build_config)
853
- return self._update_cloud_regions(build_config, field_value)
854
-
855
- # Collection creation callback
856
- if field_name == "collection_name" and isinstance(field_value, dict):
857
- # Case 1: New collection creation
858
- if "01_new_collection_name" in field_value:
859
- await self._create_new_collection(build_config, field_value)
860
- return build_config
861
-
862
- # Case 2: Update embedding provider options
863
- if "02_embedding_generation_provider" in field_value:
864
- return self.reset_provider_options(build_config)
865
-
866
- # Case 3: Update dimension field
867
- if "03_embedding_generation_model" in field_value:
868
- return self.reset_dimension_field(build_config)
869
-
870
- # Initial execution or token/environment change
871
- first_run = field_name == "collection_name" and not field_value and not build_config["database_name"]["options"]
872
- if first_run or field_name in {"token", "environment"}:
873
- return self.reset_database_list(build_config)
874
-
875
- # Database selection change
876
- if field_name == "database_name" and not isinstance(field_value, dict):
877
- return self._handle_database_selection(build_config, field_value)
878
-
879
- # Keyspace selection change
880
- if field_name == "keyspace":
881
- return self.reset_collection_list(build_config)
882
-
883
- # Collection selection change
884
- if field_name == "collection_name" and not isinstance(field_value, dict):
885
- return self._handle_collection_selection(build_config, field_value)
886
-
887
- # Search method selection change
888
- if field_name == "search_method":
889
- is_vector_search = field_value == "Vector Search"
890
- is_autodetect = build_config["autodetect_collection"]["value"]
891
-
892
- # Configure lexical terms (same for both cases)
893
- build_config["lexical_terms"]["show"] = not is_vector_search
894
- build_config["lexical_terms"]["value"] = "" if is_vector_search else build_config["lexical_terms"]["value"]
895
-
896
- # Disable reranker disabling if hybrid search is selected
897
- build_config["reranker"]["show"] = not is_vector_search
898
- build_config["reranker"]["toggle_disable"] = not is_vector_search
899
- build_config["reranker"]["toggle_value"] = True
900
- build_config["reranker"]["value"] = build_config["reranker"]["options"][0]
901
-
902
- # Toggle search type and score threshold based on search method
903
- build_config["search_type"]["show"] = is_vector_search
904
- build_config["search_score_threshold"]["show"] = is_vector_search
905
-
906
- # Make sure the search_type is set to "Similarity"
907
- if not is_vector_search or is_autodetect:
908
- build_config["search_type"]["value"] = "Similarity"
909
-
910
- return build_config
911
-
912
- async def _create_new_database(self, build_config: dict, field_value: dict) -> None:
913
- """Create a new database and update build config options."""
914
- try:
915
- await self.create_database_api(
916
- new_database_name=field_value["01_new_database_name"],
917
- token=self.token,
918
- keyspace=self.get_keyspace(),
919
- environment=self.environment,
920
- cloud_provider=field_value["02_cloud_provider"],
921
- region=field_value["03_region"],
922
- )
923
- except Exception as e:
924
- msg = f"Error creating database: {e}"
925
- raise ValueError(msg) from e
926
-
927
- build_config["database_name"]["options"].append(field_value["01_new_database_name"])
928
- build_config["database_name"]["options_metadata"].append(
929
- {
930
- "status": "PENDING",
931
- "collections": 0,
932
- "api_endpoints": [],
933
- "keyspaces": [self.get_keyspace()],
934
- "org_id": None,
935
- }
936
- )
937
-
938
- def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:
939
- """Update cloud provider regions in build config."""
940
- env = self.environment
941
- cloud_provider = field_value["02_cloud_provider"]
942
-
943
- # Update the region options based on the selected cloud provider
944
- template = build_config["database_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
945
- template["03_region"]["options"] = self.map_cloud_providers()[env][cloud_provider]["regions"]
946
-
947
- # Reset the the 03_region value if it's not in the new options
948
- if template["03_region"]["value"] not in template["03_region"]["options"]:
949
- template["03_region"]["value"] = None
950
-
951
- return build_config
952
-
953
- async def _create_new_collection(self, build_config: dict, field_value: dict) -> None:
954
- """Create a new collection and update build config options."""
955
- embedding_provider = field_value.get("02_embedding_generation_provider")
956
- try:
957
- await self.create_collection_api(
958
- new_collection_name=field_value["01_new_collection_name"],
959
- token=self.token,
960
- api_endpoint=build_config["api_endpoint"]["value"],
961
- environment=self.environment,
962
- keyspace=self.get_keyspace(),
963
- dimension=field_value.get("04_dimension") if embedding_provider == "Bring your own" else None,
964
- embedding_generation_provider=embedding_provider,
965
- embedding_generation_model=field_value.get("03_embedding_generation_model"),
966
- reranker=self.reranker,
967
- )
968
- except Exception as e:
969
- msg = f"Error creating collection: {e}"
970
- raise ValueError(msg) from e
971
-
972
- provider = embedding_provider.lower() if embedding_provider and embedding_provider != "Bring your own" else None
973
- build_config["collection_name"].update(
974
- {
975
- "value": field_value["01_new_collection_name"],
976
- "options": build_config["collection_name"]["options"] + [field_value["01_new_collection_name"]],
230
+ except Exception as e: # noqa: BLE001
231
+ self.log(f"Hybrid search not available: {e}")
232
+ return {
233
+ "available": False,
234
+ "reranker_models": [],
235
+ "reranker_metadata": [],
977
236
  }
978
- )
979
- build_config["embedding_model"]["show"] = not bool(provider)
980
- build_config["embedding_model"]["required"] = not bool(provider)
981
- build_config["collection_name"]["options_metadata"].append(
982
- {
983
- "records": 0,
984
- "provider": provider,
985
- "icon": self.get_provider_icon(provider_name=provider),
986
- "model": field_value.get("03_embedding_generation_model"),
237
+ else:
238
+ return {
239
+ "available": True,
240
+ "reranker_models": reranker_models,
241
+ "reranker_metadata": reranker_metadata,
987
242
  }
988
- )
989
-
990
- # Make sure we always show the reranker options if the collection is hybrid enabled
991
- # And right now they always are
992
- build_config["lexical_terms"]["show"] = True
993
-
994
- def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:
995
- """Handle database selection and update related configurations."""
996
- build_config = self.reset_database_list(build_config)
997
-
998
- # Reset collection list if database selection changes
999
- if field_value not in build_config["database_name"]["options"]:
1000
- build_config["database_name"]["value"] = ""
1001
- return build_config
1002
-
1003
- # Get the api endpoint for the selected database
1004
- index = build_config["database_name"]["options"].index(field_value)
1005
- build_config["api_endpoint"]["options"] = build_config["database_name"]["options_metadata"][index][
1006
- "api_endpoints"
1007
- ]
1008
- build_config["api_endpoint"]["value"] = build_config["database_name"]["options_metadata"][index][
1009
- "api_endpoints"
1010
- ][0]
1011
-
1012
- # Get the org_id for the selected database
1013
- org_id = build_config["database_name"]["options_metadata"][index]["org_id"]
1014
- if not org_id:
1015
- return build_config
1016
-
1017
- # Update the list of keyspaces based on the db info
1018
- build_config["keyspace"]["options"] = build_config["database_name"]["options_metadata"][index]["keyspaces"]
1019
- build_config["keyspace"]["value"] = (
1020
- build_config["keyspace"]["options"] and build_config["keyspace"]["options"][0]
1021
- if build_config["keyspace"]["value"] not in build_config["keyspace"]["options"]
1022
- else build_config["keyspace"]["value"]
1023
- )
1024
-
1025
- # Get the database id for the selected database
1026
- db_id = self.get_database_id_static(api_endpoint=build_config["api_endpoint"]["value"])
1027
- keyspace = self.get_keyspace()
1028
-
1029
- # Update the helper text for the embedding provider field
1030
- template = build_config["collection_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
1031
- template["02_embedding_generation_provider"]["helper_text"] = (
1032
- "To create collections with more embedding provider options, go to "
1033
- f'<a class="underline" target="_blank" rel="noopener noreferrer" '
1034
- f'href="https://astra.datastax.com/org/{org_id}/database/{db_id}/data-explorer?createCollection=1&namespace={keyspace}">'
1035
- "your database in Astra DB</a>."
1036
- )
1037
-
1038
- # Reset provider options
1039
- build_config = self.reset_provider_options(build_config)
1040
-
1041
- # Handle hybrid search options
1042
- build_config = self._handle_hybrid_search_options(build_config)
1043
-
1044
- return self.reset_collection_list(build_config)
1045
243
 
1046
- def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:
1047
- """Handle collection selection and update embedding options."""
1048
- build_config["autodetect_collection"]["value"] = True
1049
- build_config = self.reset_collection_list(build_config)
1050
-
1051
- # Reset embedding model if collection selection changes
1052
- if field_value and field_value not in build_config["collection_name"]["options"]:
1053
- build_config["collection_name"]["options"].append(field_value)
1054
- build_config["collection_name"]["options_metadata"].append(
1055
- {
1056
- "records": 0,
1057
- "provider": None,
1058
- "icon": "vectorstores",
1059
- "model": None,
1060
- }
1061
- )
1062
- build_config["autodetect_collection"]["value"] = False
1063
-
1064
- if not field_value:
1065
- return build_config
1066
-
1067
- # Get the selected collection index
1068
- index = build_config["collection_name"]["options"].index(field_value)
1069
-
1070
- # Set the provider of the selected collection
1071
- provider = build_config["collection_name"]["options_metadata"][index]["provider"]
1072
- build_config["embedding_model"]["show"] = not bool(provider)
1073
- build_config["embedding_model"]["required"] = not bool(provider)
1074
-
1075
- # Grab the collection object
244
+ def _get_collection_options(self, build_config: dict) -> dict:
245
+ """Retrieve collection-level search options."""
1076
246
  database = self.get_database_object(api_endpoint=build_config["api_endpoint"]["value"])
1077
247
  collection = database.get_collection(
1078
- name=field_value,
248
+ name=build_config["collection_name"]["value"],
1079
249
  keyspace=build_config["keyspace"]["value"],
1080
250
  )
1081
251
 
1082
- # Check if hybrid and lexical are enabled
1083
252
  col_options = collection.options()
1084
- hyb_enabled = col_options.rerank and col_options.rerank.enabled
1085
- lex_enabled = col_options.lexical and col_options.lexical.enabled
1086
- user_hyb_enabled = build_config["search_method"]["value"] == "Hybrid Search"
1087
-
1088
- # Reranker visible when both the collection supports it and the user selected Hybrid
1089
- hybrid_active = bool(hyb_enabled and user_hyb_enabled)
1090
- build_config["reranker"]["show"] = hybrid_active
1091
- build_config["reranker"]["toggle_value"] = hybrid_active
1092
- build_config["reranker"]["toggle_disable"] = False # allow user to toggle if visible
1093
-
1094
- # If hybrid is active, lock search_type to "Similarity"
1095
- if hybrid_active:
1096
- build_config["search_type"]["value"] = "Similarity"
1097
253
 
1098
- # Show the lexical terms option only if the collection enables lexical search
1099
- build_config["lexical_terms"]["show"] = bool(lex_enabled)
1100
-
1101
- return build_config
254
+ return {
255
+ "rerank_enabled": bool(col_options.rerank and col_options.rerank.enabled),
256
+ "lexical_enabled": bool(col_options.lexical and col_options.lexical.enabled),
257
+ }
1102
258
 
1103
259
  @check_cached_vector_store
1104
260
  def build_vector_store(self):
1105
261
  try:
1106
262
  from langchain_astradb import AstraDBVectorStore
263
+ from langchain_astradb.utils.astradb import HybridSearchMode
1107
264
  except ImportError as e:
1108
265
  msg = (
1109
266
  "Could not import langchain Astra DB integration package. "
@@ -1164,7 +321,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
1164
321
  **embedding_params,
1165
322
  **additional_params,
1166
323
  )
1167
- except Exception as e:
324
+ except ValueError as e:
1168
325
  msg = f"Error initializing AstraDBVectorStore: {e}"
1169
326
  raise ValueError(msg) from e
1170
327
 
@@ -1196,7 +353,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
1196
353
  delete_values = list({doc.metadata[self.deletion_field] for doc in documents})
1197
354
  self.log(f"Deleting documents where {self.deletion_field} matches {delete_values}.")
1198
355
  collection.delete_many({f"metadata.{self.deletion_field}": {"$in": delete_values}})
1199
- except Exception as e:
356
+ except ValueError as e:
1200
357
  msg = f"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}"
1201
358
  raise ValueError(msg) from e
1202
359
 
@@ -1204,7 +361,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
1204
361
  self.log(f"Adding {len(documents)} documents to the Vector Store.")
1205
362
  try:
1206
363
  vector_store.add_documents(documents)
1207
- except Exception as e:
364
+ except ValueError as e:
1208
365
  msg = f"Error adding documents to AstraDBVectorStore: {e}"
1209
366
  raise ValueError(msg) from e
1210
367
  else:
@@ -1257,7 +414,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
1257
414
 
1258
415
  try:
1259
416
  search_args = self._build_search_args()
1260
- except Exception as e:
417
+ except ValueError as e:
1261
418
  msg = f"Error in AstraDBVectorStore._build_search_args: {e}"
1262
419
  raise ValueError(msg) from e
1263
420
 
@@ -1271,7 +428,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
1271
428
  try:
1272
429
  self.log(f"Calling vector_store.{search_method} with args: {search_args}")
1273
430
  docs = getattr(vector_store, search_method)(**search_args)
1274
- except Exception as e:
431
+ except ValueError as e:
1275
432
  msg = f"Error performing {search_method} in AstraDBVectorStore: {e}"
1276
433
  raise ValueError(msg) from e
1277
434