lfx-nightly 0.1.13.dev0__py3-none-any.whl → 0.2.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. lfx/_assets/component_index.json +1 -1
  2. lfx/base/agents/agent.py +109 -29
  3. lfx/base/agents/events.py +102 -35
  4. lfx/base/agents/utils.py +15 -2
  5. lfx/base/composio/composio_base.py +24 -9
  6. lfx/base/datastax/__init__.py +5 -0
  7. lfx/{components/vectorstores/astradb.py → base/datastax/astradb_base.py} +84 -473
  8. lfx/base/io/chat.py +5 -4
  9. lfx/base/mcp/util.py +101 -15
  10. lfx/base/models/model_input_constants.py +74 -7
  11. lfx/base/models/ollama_constants.py +3 -0
  12. lfx/base/models/watsonx_constants.py +12 -0
  13. lfx/cli/commands.py +1 -1
  14. lfx/components/agents/__init__.py +3 -1
  15. lfx/components/agents/agent.py +47 -4
  16. lfx/components/agents/altk_agent.py +366 -0
  17. lfx/components/agents/cuga_agent.py +1 -1
  18. lfx/components/agents/mcp_component.py +32 -2
  19. lfx/components/amazon/amazon_bedrock_converse.py +1 -1
  20. lfx/components/apify/apify_actor.py +3 -3
  21. lfx/components/datastax/__init__.py +12 -6
  22. lfx/components/datastax/{astra_assistant_manager.py → astradb_assistant_manager.py} +1 -0
  23. lfx/components/datastax/astradb_chatmemory.py +40 -0
  24. lfx/components/datastax/astradb_cql.py +5 -31
  25. lfx/components/datastax/astradb_graph.py +9 -123
  26. lfx/components/datastax/astradb_tool.py +12 -52
  27. lfx/components/datastax/astradb_vectorstore.py +133 -976
  28. lfx/components/datastax/create_assistant.py +1 -0
  29. lfx/components/datastax/create_thread.py +1 -0
  30. lfx/components/datastax/dotenv.py +1 -0
  31. lfx/components/datastax/get_assistant.py +1 -0
  32. lfx/components/datastax/getenvvar.py +1 -0
  33. lfx/components/datastax/graph_rag.py +1 -1
  34. lfx/components/datastax/list_assistants.py +1 -0
  35. lfx/components/datastax/run.py +1 -0
  36. lfx/components/docling/__init__.py +3 -0
  37. lfx/components/docling/docling_remote_vlm.py +284 -0
  38. lfx/components/ibm/watsonx.py +25 -21
  39. lfx/components/input_output/chat.py +8 -0
  40. lfx/components/input_output/chat_output.py +8 -0
  41. lfx/components/knowledge_bases/ingestion.py +17 -9
  42. lfx/components/knowledge_bases/retrieval.py +16 -8
  43. lfx/components/logic/loop.py +4 -0
  44. lfx/components/mistral/mistral_embeddings.py +1 -1
  45. lfx/components/models/embedding_model.py +88 -7
  46. lfx/components/ollama/ollama.py +221 -14
  47. lfx/components/openrouter/openrouter.py +49 -147
  48. lfx/components/processing/parser.py +6 -1
  49. lfx/components/processing/structured_output.py +55 -17
  50. lfx/components/vectorstores/__init__.py +0 -6
  51. lfx/custom/custom_component/component.py +3 -2
  52. lfx/field_typing/constants.py +1 -0
  53. lfx/graph/edge/base.py +2 -2
  54. lfx/graph/graph/base.py +1 -1
  55. lfx/graph/graph/schema.py +3 -2
  56. lfx/graph/vertex/vertex_types.py +1 -1
  57. lfx/io/schema.py +6 -0
  58. lfx/schema/schema.py +5 -0
  59. {lfx_nightly-0.1.13.dev0.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/METADATA +1 -1
  60. {lfx_nightly-0.1.13.dev0.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/RECORD +63 -81
  61. lfx/components/datastax/astra_db.py +0 -77
  62. lfx/components/datastax/cassandra.py +0 -92
  63. lfx/components/vectorstores/astradb_graph.py +0 -326
  64. lfx/components/vectorstores/cassandra.py +0 -264
  65. lfx/components/vectorstores/cassandra_graph.py +0 -238
  66. lfx/components/vectorstores/chroma.py +0 -167
  67. lfx/components/vectorstores/clickhouse.py +0 -135
  68. lfx/components/vectorstores/couchbase.py +0 -102
  69. lfx/components/vectorstores/elasticsearch.py +0 -267
  70. lfx/components/vectorstores/faiss.py +0 -111
  71. lfx/components/vectorstores/graph_rag.py +0 -141
  72. lfx/components/vectorstores/hcd.py +0 -314
  73. lfx/components/vectorstores/milvus.py +0 -115
  74. lfx/components/vectorstores/mongodb_atlas.py +0 -213
  75. lfx/components/vectorstores/opensearch.py +0 -243
  76. lfx/components/vectorstores/pgvector.py +0 -72
  77. lfx/components/vectorstores/pinecone.py +0 -134
  78. lfx/components/vectorstores/qdrant.py +0 -109
  79. lfx/components/vectorstores/supabase.py +0 -76
  80. lfx/components/vectorstores/upstash.py +0 -124
  81. lfx/components/vectorstores/vectara.py +0 -97
  82. lfx/components/vectorstores/vectara_rag.py +0 -164
  83. lfx/components/vectorstores/weaviate.py +0 -89
  84. /lfx/components/datastax/{astra_vectorize.py → astradb_vectorize.py} +0 -0
  85. {lfx_nightly-0.1.13.dev0.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/WHEEL +0 -0
  86. {lfx_nightly-0.1.13.dev0.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -1,41 +1,24 @@
1
1
  import re
2
2
  from collections import defaultdict
3
3
  from dataclasses import asdict, dataclass, field
4
+ from typing import Any
4
5
 
5
6
  from astrapy import DataAPIClient, Database
6
- from astrapy.data.info.reranking import RerankServiceOptions
7
- from astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions
8
- from langchain_astradb import AstraDBVectorStore, VectorServiceOptions
9
- from langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment
10
- from langchain_core.documents import Document
11
-
12
- from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
13
- from lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection
14
- from lfx.helpers.data import docs_to_data
15
- from lfx.inputs.inputs import FloatInput, NestedDictInput
7
+ from langchain_astradb.utils.astradb import _AstraDBCollectionEnvironment
8
+
9
+ from lfx.custom.custom_component.component import Component
16
10
  from lfx.io import (
17
11
  BoolInput,
18
12
  DropdownInput,
19
- HandleInput,
20
13
  IntInput,
21
- QueryInput,
22
14
  SecretStrInput,
23
15
  StrInput,
24
16
  )
25
- from lfx.schema.data import Data
26
- from lfx.serialization import serialize
27
- from lfx.utils.version import get_version_info
28
-
17
+ from lfx.log.logger import logger
29
18
 
30
- @vector_store_connection
31
- class AstraDBVectorStoreComponent(LCVectorStoreComponent):
32
- display_name: str = "Astra DB"
33
- description: str = "Ingest and search documents in Astra DB"
34
- documentation: str = "https://docs.datastax.com/en/langflow/astra-components.html"
35
- name = "AstraDB"
36
- icon: str = "AstraDB"
37
19
 
38
- _cached_vector_store: AstraDBVectorStore | None = None
20
+ class AstraDBBaseComponent(Component):
21
+ """Base class for AstraDB components with common functionality."""
39
22
 
40
23
  @dataclass
41
24
  class NewDatabaseInput:
@@ -186,74 +169,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
186
169
  combobox=True,
187
170
  show=False,
188
171
  ),
189
- HandleInput(
190
- name="embedding_model",
191
- display_name="Embedding Model",
192
- input_types=["Embeddings"],
193
- info="Specify the Embedding Model. Not required for Astra Vectorize collections.",
194
- required=False,
195
- show=False,
196
- ),
197
- *LCVectorStoreComponent.inputs,
198
- DropdownInput(
199
- name="search_method",
200
- display_name="Search Method",
201
- info=(
202
- "Determine how your content is matched: Vector finds semantic similarity, "
203
- "and Hybrid Search (suggested) combines both approaches "
204
- "with a reranker."
205
- ),
206
- options=["Hybrid Search", "Vector Search"], # TODO: Restore Lexical Search?
207
- options_metadata=[{"icon": "SearchHybrid"}, {"icon": "SearchVector"}],
208
- value="Vector Search",
209
- advanced=True,
210
- real_time_refresh=True,
211
- ),
212
- DropdownInput(
213
- name="reranker",
214
- display_name="Reranker",
215
- info="Post-retrieval model that re-scores results for optimal relevance ranking.",
216
- show=False,
217
- toggle=True,
218
- ),
219
- QueryInput(
220
- name="lexical_terms",
221
- display_name="Lexical Terms",
222
- info="Add additional terms/keywords to augment search precision.",
223
- placeholder="Enter terms to search...",
224
- separator=" ",
225
- show=False,
226
- value="",
227
- ),
228
- IntInput(
229
- name="number_of_results",
230
- display_name="Number of Search Results",
231
- info="Number of search results to return.",
232
- advanced=True,
233
- value=4,
234
- ),
235
- DropdownInput(
236
- name="search_type",
237
- display_name="Search Type",
238
- info="Search type to use",
239
- options=["Similarity", "Similarity with score threshold", "MMR (Max Marginal Relevance)"],
240
- value="Similarity",
241
- advanced=True,
242
- ),
243
- FloatInput(
244
- name="search_score_threshold",
245
- display_name="Search Score Threshold",
246
- info="Minimum similarity score threshold for search results. "
247
- "(when using 'Similarity with score threshold')",
248
- value=0,
249
- advanced=True,
250
- ),
251
- NestedDictInput(
252
- name="advanced_search_filter",
253
- display_name="Search Metadata Filter",
254
- info="Optional dictionary of filters to apply to the search query.",
255
- advanced=True,
256
- ),
257
172
  BoolInput(
258
173
  name="autodetect_collection",
259
174
  display_name="Autodetect Collection",
@@ -261,75 +176,51 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
261
176
  advanced=True,
262
177
  value=True,
263
178
  ),
264
- StrInput(
265
- name="content_field",
266
- display_name="Content Field",
267
- info="Field to use as the text content field for the vector store.",
268
- advanced=True,
269
- ),
270
- StrInput(
271
- name="deletion_field",
272
- display_name="Deletion Based On Field",
273
- info="When this parameter is provided, documents in the target collection with "
274
- "metadata field values matching the input metadata field value will be deleted "
275
- "before new data is loaded.",
276
- advanced=True,
277
- ),
278
- BoolInput(
279
- name="ignore_invalid_documents",
280
- display_name="Ignore Invalid Documents",
281
- info="Boolean flag to determine whether to ignore invalid documents at runtime.",
282
- advanced=True,
283
- ),
284
- NestedDictInput(
285
- name="astradb_vectorstore_kwargs",
286
- display_name="AstraDBVectorStore Parameters",
287
- info="Optional dictionary of additional parameters for the AstraDBVectorStore.",
288
- advanced=True,
289
- ),
290
179
  ]
291
180
 
292
181
  @classmethod
293
- def map_cloud_providers(cls):
294
- # TODO: Programmatically fetch the regions for each cloud provider
295
- return {
296
- "dev": {
297
- "Amazon Web Services": {
298
- "id": "aws",
299
- "regions": ["us-west-2"],
300
- },
301
- "Google Cloud Platform": {
302
- "id": "gcp",
303
- "regions": ["us-central1", "europe-west4"],
304
- },
305
- },
306
- "test": {
307
- "Google Cloud Platform": {
308
- "id": "gcp",
309
- "regions": ["us-central1"],
310
- },
311
- },
312
- "prod": {
313
- "Amazon Web Services": {
314
- "id": "aws",
315
- "regions": ["us-east-2", "ap-south-1", "eu-west-1"],
316
- },
317
- "Google Cloud Platform": {
318
- "id": "gcp",
319
- "regions": ["us-east1"],
320
- },
321
- "Microsoft Azure": {
322
- "id": "azure",
323
- "regions": ["westus3"],
324
- },
325
- },
182
+ def get_environment(cls, environment: str | None = None) -> str:
183
+ if not environment:
184
+ return "prod"
185
+ return environment
186
+
187
+ @classmethod
188
+ def map_cloud_providers(cls, token: str, environment: str | None = None) -> dict[str, dict[str, Any]]:
189
+ """Fetch all available cloud providers and regions."""
190
+ # Get the admin object
191
+ client = DataAPIClient(environment=cls.get_environment(environment))
192
+ admin_client = client.get_admin(token=token)
193
+
194
+ # Get the list of available regions
195
+ available_regions = admin_client.find_available_regions(only_org_enabled_regions=True)
196
+
197
+ provider_mapping: dict[str, dict[str, str]] = {
198
+ "AWS": {"name": "Amazon Web Services", "id": "aws"},
199
+ "GCP": {"name": "Google Cloud Platform", "id": "gcp"},
200
+ "Azure": {"name": "Microsoft Azure", "id": "azure"},
326
201
  }
327
202
 
203
+ result: dict[str, dict[str, Any]] = {}
204
+ for region_info in available_regions:
205
+ cloud_provider = region_info.cloud_provider
206
+ region = region_info.name
207
+
208
+ if cloud_provider in provider_mapping:
209
+ provider_name = provider_mapping[cloud_provider]["name"]
210
+ provider_id = provider_mapping[cloud_provider]["id"]
211
+
212
+ if provider_name not in result:
213
+ result[provider_name] = {"id": provider_id, "regions": []}
214
+
215
+ result[provider_name]["regions"].append(region)
216
+
217
+ return result
218
+
328
219
  @classmethod
329
220
  def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):
330
221
  try:
331
222
  # Get the admin object
332
- client = DataAPIClient(environment=environment)
223
+ client = DataAPIClient(environment=cls.get_environment(environment))
333
224
  admin_client = client.get_admin()
334
225
  db_admin = admin_client.get_database_admin(api_endpoint, token=token)
335
226
 
@@ -361,14 +252,15 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
361
252
  environment: str | None = None,
362
253
  keyspace: str | None = None,
363
254
  ):
364
- client = DataAPIClient(environment=environment)
255
+ # Get the environment, set to prod if null like
256
+ my_env = cls.get_environment(environment)
257
+
258
+ # Initialize the Data API client
259
+ client = DataAPIClient(environment=my_env)
365
260
 
366
261
  # Get the admin object
367
262
  admin_client = client.get_admin(token=token)
368
263
 
369
- # Get the environment, set to prod if null like
370
- my_env = environment or "prod"
371
-
372
264
  # Raise a value error if name isn't provided
373
265
  if not new_database_name:
374
266
  msg = "Database name is required to create a new database."
@@ -377,7 +269,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
377
269
  # Call the create database function
378
270
  return await admin_client.async_create_database(
379
271
  name=new_database_name,
380
- cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider]["id"],
272
+ cloud_provider=cls.map_cloud_providers(token=token, environment=my_env)[cloud_provider]["id"],
381
273
  region=region,
382
274
  keyspace=keyspace,
383
275
  wait_until_active=False,
@@ -394,11 +286,21 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
394
286
  dimension: int | None = None,
395
287
  embedding_generation_provider: str | None = None,
396
288
  embedding_generation_model: str | None = None,
397
- reranker: str | None = None,
398
289
  ):
399
290
  # Build vectorize options, if needed
400
291
  vectorize_options = None
401
292
  if not dimension:
293
+ try:
294
+ from langchain_astradb import VectorServiceOptions
295
+ except ImportError as e:
296
+ msg = (
297
+ "langchain-astradb is required to create AstraDB collections with "
298
+ "Astra Vectorize embeddings. Please install it with "
299
+ "`pip install langchain-astradb`."
300
+ )
301
+ raise ImportError(msg) from e
302
+
303
+ environment = cls.get_environment(environment)
402
304
  providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)
403
305
  vectorize_options = VectorServiceOptions(
404
306
  provider=providers.get(embedding_generation_provider, [None, []])[0],
@@ -421,19 +323,11 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
421
323
  "collection_vector_service_options": vectorize_options,
422
324
  }
423
325
 
424
- # Add optional arguments if the reranker is set
425
- if reranker:
426
- # Split the reranker field into a provider a model name
427
- provider, _ = reranker.split("/")
428
- base_args["collection_rerank"] = CollectionRerankOptions(
429
- service=RerankServiceOptions(provider=provider, model_name=reranker),
430
- )
431
- base_args["collection_lexical"] = CollectionLexicalOptions(analyzer="STANDARD")
432
-
433
326
  _AstraDBCollectionEnvironment(**base_args)
434
327
 
435
328
  @classmethod
436
329
  def get_database_list_static(cls, token: str, environment: str | None = None):
330
+ environment = cls.get_environment(environment)
437
331
  client = DataAPIClient(environment=environment)
438
332
 
439
333
  # Get the admin object
@@ -471,8 +365,8 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
471
365
  "status": db.status if db.status != "ACTIVE" else None,
472
366
  "org_id": db.org_id if db.org_id else None,
473
367
  }
474
- except Exception: # noqa: BLE001
475
- pass
368
+ except Exception as e: # noqa: BLE001
369
+ logger.debug("Failed to get metadata for database %s: %s", db.name, e)
476
370
 
477
371
  return db_info_dict
478
372
 
@@ -503,6 +397,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
503
397
  return None
504
398
 
505
399
  # Grab the database object
400
+ environment = cls.get_environment(environment)
506
401
  db = cls.get_database_list_static(token=token, environment=environment).get(database_name)
507
402
  if not db:
508
403
  return None
@@ -551,7 +446,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
551
446
  msg = f"Error fetching database object: {e}"
552
447
  raise ValueError(msg) from e
553
448
 
554
- def collection_data(self, collection_name: str, database: Database | None = None):
449
+ def collection_data(self, collection_name: str, database: Database = None):
555
450
  try:
556
451
  if not database:
557
452
  client = DataAPIClient(environment=self.environment)
@@ -588,7 +483,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
588
483
  raise ValueError(msg) from e
589
484
 
590
485
  @classmethod
591
- def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:
486
+ def get_provider_icon(cls, collection=None, provider_name: str | None = None) -> str:
592
487
  # Get the provider name from the collection
593
488
  provider_name = provider_name or (
594
489
  collection.definition.vector.service.provider
@@ -757,9 +652,13 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
757
652
  database_options = self._initialize_database_options()
758
653
 
759
654
  # Update cloud provider options
760
- env = self.environment
761
655
  template = build_config["database_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
762
- template["02_cloud_provider"]["options"] = list(self.map_cloud_providers()[env].keys())
656
+ template["02_cloud_provider"]["options"] = list(
657
+ self.map_cloud_providers(
658
+ token=self.token,
659
+ environment=self.environment,
660
+ ).keys()
661
+ )
763
662
 
764
663
  # Update database configuration
765
664
  database_config = build_config["database_name"]
@@ -796,43 +695,12 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
796
695
 
797
696
  return build_config
798
697
 
799
- def _handle_hybrid_search_options(self, build_config: dict) -> dict:
800
- """Set hybrid search options in the build configuration."""
801
- # Detect what hybrid options are available
802
- # Get the admin object
803
- client = DataAPIClient(environment=self.environment)
804
- admin_client = client.get_admin()
805
- db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)
806
-
807
- # We will try to get the reranking providers to see if its hybrid emabled
808
- try:
809
- providers = db_admin.find_reranking_providers()
810
- build_config["reranker"]["options"] = [
811
- model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models
812
- ]
813
- build_config["reranker"]["options_metadata"] = [
814
- {"icon": self.get_provider_icon(provider_name=model.name.split("/")[0])}
815
- for provider in providers.reranking_providers.values()
816
- for model in provider.models
817
- ]
818
- build_config["reranker"]["value"] = build_config["reranker"]["options"][0]
819
-
820
- # Set the default search field to hybrid search
821
- build_config["search_method"]["show"] = True
822
- build_config["search_method"]["options"] = ["Hybrid Search", "Vector Search"]
823
- build_config["search_method"]["value"] = "Hybrid Search"
824
- except Exception as _: # noqa: BLE001
825
- build_config["reranker"]["options"] = []
826
- build_config["reranker"]["options_metadata"] = []
827
-
828
- # Set the default search field to vector search
829
- build_config["search_method"]["show"] = False
830
- build_config["search_method"]["options"] = ["Vector Search"]
831
- build_config["search_method"]["value"] = "Vector Search"
832
-
833
- return build_config
834
-
835
- async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:
698
+ async def update_build_config(
699
+ self,
700
+ build_config: dict,
701
+ field_value: str | dict,
702
+ field_name: str | None = None,
703
+ ) -> dict:
836
704
  """Update build configuration based on field name and value."""
837
705
  # Early return if no token provided
838
706
  if not self.token:
@@ -877,29 +745,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
877
745
  if field_name == "collection_name" and not isinstance(field_value, dict):
878
746
  return self._handle_collection_selection(build_config, field_value)
879
747
 
880
- # Search method selection change
881
- if field_name == "search_method":
882
- is_vector_search = field_value == "Vector Search"
883
- is_autodetect = build_config["autodetect_collection"]["value"]
884
-
885
- # Configure lexical terms (same for both cases)
886
- build_config["lexical_terms"]["show"] = not is_vector_search
887
- build_config["lexical_terms"]["value"] = "" if is_vector_search else build_config["lexical_terms"]["value"]
888
-
889
- # Disable reranker disabling if hybrid search is selected
890
- build_config["reranker"]["show"] = not is_vector_search
891
- build_config["reranker"]["toggle_disable"] = not is_vector_search
892
- build_config["reranker"]["toggle_value"] = True
893
- build_config["reranker"]["value"] = build_config["reranker"]["options"][0]
894
-
895
- # Toggle search type and score threshold based on search method
896
- build_config["search_type"]["show"] = is_vector_search
897
- build_config["search_score_threshold"]["show"] = is_vector_search
898
-
899
- # Make sure the search_type is set to "Similarity"
900
- if not is_vector_search or is_autodetect:
901
- build_config["search_type"]["value"] = "Similarity"
902
-
903
748
  return build_config
904
749
 
905
750
  async def _create_new_database(self, build_config: dict, field_value: dict) -> None:
@@ -930,12 +775,14 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
930
775
 
931
776
  def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:
932
777
  """Update cloud provider regions in build config."""
933
- env = self.environment
934
778
  cloud_provider = field_value["02_cloud_provider"]
935
779
 
936
780
  # Update the region options based on the selected cloud provider
937
781
  template = build_config["database_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
938
- template["03_region"]["options"] = self.map_cloud_providers()[env][cloud_provider]["regions"]
782
+ template["03_region"]["options"] = self.map_cloud_providers(
783
+ token=self.token,
784
+ environment=self.environment,
785
+ )[cloud_provider]["regions"]
939
786
 
940
787
  # Reset the the 03_region value if it's not in the new options
941
788
  if template["03_region"]["value"] not in template["03_region"]["options"]:
@@ -956,7 +803,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
956
803
  dimension=field_value.get("04_dimension") if embedding_provider == "Bring your own" else None,
957
804
  embedding_generation_provider=embedding_provider,
958
805
  embedding_generation_model=field_value.get("03_embedding_generation_model"),
959
- reranker=self.reranker,
960
806
  )
961
807
  except Exception as e:
962
808
  msg = f"Error creating collection: {e}"
@@ -969,8 +815,8 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
969
815
  "options": build_config["collection_name"]["options"] + [field_value["01_new_collection_name"]],
970
816
  }
971
817
  )
972
- build_config["embedding_model"]["show"] = not bool(provider)
973
- build_config["embedding_model"]["required"] = not bool(provider)
818
+
819
+ # Update collection metadata
974
820
  build_config["collection_name"]["options_metadata"].append(
975
821
  {
976
822
  "records": 0,
@@ -980,10 +826,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
980
826
  }
981
827
  )
982
828
 
983
- # Make sure we always show the reranker options if the collection is hybrid enabled
984
- # And right now they always are
985
- build_config["lexical_terms"]["show"] = True
986
-
987
829
  def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:
988
830
  """Handle database selection and update related configurations."""
989
831
  build_config = self.reset_database_list(build_config)
@@ -1031,9 +873,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
1031
873
  # Reset provider options
1032
874
  build_config = self.reset_provider_options(build_config)
1033
875
 
1034
- # Handle hybrid search options
1035
- build_config = self._handle_hybrid_search_options(build_config)
1036
-
1037
876
  return self.reset_collection_list(build_config)
1038
877
 
1039
878
  def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:
@@ -1054,232 +893,4 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
1054
893
  )
1055
894
  build_config["autodetect_collection"]["value"] = False
1056
895
 
1057
- if not field_value:
1058
- return build_config
1059
-
1060
- # Get the selected collection index
1061
- index = build_config["collection_name"]["options"].index(field_value)
1062
-
1063
- # Set the provider of the selected collection
1064
- provider = build_config["collection_name"]["options_metadata"][index]["provider"]
1065
- build_config["embedding_model"]["show"] = not bool(provider)
1066
- build_config["embedding_model"]["required"] = not bool(provider)
1067
-
1068
- # Grab the collection object
1069
- database = self.get_database_object(api_endpoint=build_config["api_endpoint"]["value"])
1070
- collection = database.get_collection(
1071
- name=field_value,
1072
- keyspace=build_config["keyspace"]["value"],
1073
- )
1074
-
1075
- # Check if hybrid and lexical are enabled
1076
- col_options = collection.options()
1077
- hyb_enabled = col_options.rerank and col_options.rerank.enabled
1078
- lex_enabled = col_options.lexical and col_options.lexical.enabled
1079
- user_hyb_enabled = build_config["search_method"]["value"] == "Hybrid Search"
1080
-
1081
- # Reranker visible when both the collection supports it and the user selected Hybrid
1082
- hybrid_active = bool(hyb_enabled and user_hyb_enabled)
1083
- build_config["reranker"]["show"] = hybrid_active
1084
- build_config["reranker"]["toggle_value"] = hybrid_active
1085
- build_config["reranker"]["toggle_disable"] = False # allow user to toggle if visible
1086
-
1087
- # If hybrid is active, lock search_type to "Similarity"
1088
- if hybrid_active:
1089
- build_config["search_type"]["value"] = "Similarity"
1090
-
1091
- # Show the lexical terms option only if the collection enables lexical search
1092
- build_config["lexical_terms"]["show"] = bool(lex_enabled)
1093
-
1094
896
  return build_config
1095
-
1096
- @check_cached_vector_store
1097
- def build_vector_store(self):
1098
- try:
1099
- from langchain_astradb import AstraDBVectorStore
1100
- except ImportError as e:
1101
- msg = (
1102
- "Could not import langchain Astra DB integration package. "
1103
- "Please install it with `pip install langchain-astradb`."
1104
- )
1105
- raise ImportError(msg) from e
1106
-
1107
- # Get the embedding model and additional params
1108
- embedding_params = {"embedding": self.embedding_model} if self.embedding_model else {}
1109
-
1110
- # Get the additional parameters
1111
- additional_params = self.astradb_vectorstore_kwargs or {}
1112
-
1113
- # Get Langflow version and platform information
1114
- __version__ = get_version_info()["version"]
1115
- langflow_prefix = ""
1116
- # if os.getenv("AWS_EXECUTION_ENV") == "AWS_ECS_FARGATE": # TODO: More precise way of detecting
1117
- # langflow_prefix = "ds-"
1118
-
1119
- # Get the database object
1120
- database = self.get_database_object()
1121
- autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection
1122
-
1123
- # Bundle up the auto-detect parameters
1124
- autodetect_params = {
1125
- "autodetect_collection": autodetect,
1126
- "content_field": (
1127
- self.content_field
1128
- if self.content_field and embedding_params
1129
- else (
1130
- "page_content"
1131
- if embedding_params
1132
- and self.collection_data(collection_name=self.collection_name, database=database) == 0
1133
- else None
1134
- )
1135
- ),
1136
- "ignore_invalid_documents": self.ignore_invalid_documents,
1137
- }
1138
-
1139
- # Choose HybridSearchMode based on the selected param
1140
- hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == "Hybrid Search" else HybridSearchMode.OFF
1141
-
1142
- # Attempt to build the Vector Store object
1143
- try:
1144
- vector_store = AstraDBVectorStore(
1145
- # Astra DB Authentication Parameters
1146
- token=self.token,
1147
- api_endpoint=database.api_endpoint,
1148
- namespace=database.keyspace,
1149
- collection_name=self.collection_name,
1150
- environment=self.environment,
1151
- # Hybrid Search Parameters
1152
- hybrid_search=hybrid_search_mode,
1153
- # Astra DB Usage Tracking Parameters
1154
- ext_callers=[(f"{langflow_prefix}langflow", __version__)],
1155
- # Astra DB Vector Store Parameters
1156
- **autodetect_params,
1157
- **embedding_params,
1158
- **additional_params,
1159
- )
1160
- except Exception as e:
1161
- msg = f"Error initializing AstraDBVectorStore: {e}"
1162
- raise ValueError(msg) from e
1163
-
1164
- # Add documents to the vector store
1165
- self._add_documents_to_vector_store(vector_store)
1166
-
1167
- return vector_store
1168
-
1169
- def _add_documents_to_vector_store(self, vector_store) -> None:
1170
- self.ingest_data = self._prepare_ingest_data()
1171
-
1172
- documents = []
1173
- for _input in self.ingest_data or []:
1174
- if isinstance(_input, Data):
1175
- documents.append(_input.to_lc_document())
1176
- else:
1177
- msg = "Vector Store Inputs must be Data objects."
1178
- raise TypeError(msg)
1179
-
1180
- documents = [
1181
- Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents
1182
- ]
1183
-
1184
- if documents and self.deletion_field:
1185
- self.log(f"Deleting documents where {self.deletion_field}")
1186
- try:
1187
- database = self.get_database_object()
1188
- collection = database.get_collection(self.collection_name, keyspace=database.keyspace)
1189
- delete_values = list({doc.metadata[self.deletion_field] for doc in documents})
1190
- self.log(f"Deleting documents where {self.deletion_field} matches {delete_values}.")
1191
- collection.delete_many({f"metadata.{self.deletion_field}": {"$in": delete_values}})
1192
- except Exception as e:
1193
- msg = f"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}"
1194
- raise ValueError(msg) from e
1195
-
1196
- if documents:
1197
- self.log(f"Adding {len(documents)} documents to the Vector Store.")
1198
- try:
1199
- vector_store.add_documents(documents)
1200
- except Exception as e:
1201
- msg = f"Error adding documents to AstraDBVectorStore: {e}"
1202
- raise ValueError(msg) from e
1203
- else:
1204
- self.log("No documents to add to the Vector Store.")
1205
-
1206
- def _map_search_type(self) -> str:
1207
- search_type_mapping = {
1208
- "Similarity with score threshold": "similarity_score_threshold",
1209
- "MMR (Max Marginal Relevance)": "mmr",
1210
- }
1211
-
1212
- return search_type_mapping.get(self.search_type, "similarity")
1213
-
1214
- def _build_search_args(self):
1215
- # Clean up the search query
1216
- query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None
1217
- lexical_terms = self.lexical_terms or None
1218
-
1219
- # Check if we have a search query, and if so set the args
1220
- if query:
1221
- args = {
1222
- "query": query,
1223
- "search_type": self._map_search_type(),
1224
- "k": self.number_of_results,
1225
- "score_threshold": self.search_score_threshold,
1226
- "lexical_query": lexical_terms,
1227
- }
1228
- elif self.advanced_search_filter:
1229
- args = {
1230
- "n": self.number_of_results,
1231
- }
1232
- else:
1233
- return {}
1234
-
1235
- filter_arg = self.advanced_search_filter or {}
1236
- if filter_arg:
1237
- args["filter"] = filter_arg
1238
-
1239
- return args
1240
-
1241
- def search_documents(self, vector_store=None) -> list[Data]:
1242
- vector_store = vector_store or self.build_vector_store()
1243
-
1244
- self.log(f"Search input: {self.search_query}")
1245
- self.log(f"Search type: {self.search_type}")
1246
- self.log(f"Number of results: {self.number_of_results}")
1247
- self.log(f"store.hybrid_search: {vector_store.hybrid_search}")
1248
- self.log(f"Lexical terms: {self.lexical_terms}")
1249
- self.log(f"Reranker: {self.reranker}")
1250
-
1251
- try:
1252
- search_args = self._build_search_args()
1253
- except Exception as e:
1254
- msg = f"Error in AstraDBVectorStore._build_search_args: {e}"
1255
- raise ValueError(msg) from e
1256
-
1257
- if not search_args:
1258
- self.log("No search input or filters provided. Skipping search.")
1259
- return []
1260
-
1261
- docs = []
1262
- search_method = "search" if "query" in search_args else "metadata_search"
1263
-
1264
- try:
1265
- self.log(f"Calling vector_store.{search_method} with args: {search_args}")
1266
- docs = getattr(vector_store, search_method)(**search_args)
1267
- except Exception as e:
1268
- msg = f"Error performing {search_method} in AstraDBVectorStore: {e}"
1269
- raise ValueError(msg) from e
1270
-
1271
- self.log(f"Retrieved documents: {len(docs)}")
1272
-
1273
- data = docs_to_data(docs)
1274
- self.log(f"Converted documents to data: {len(data)}")
1275
- self.status = data
1276
-
1277
- return data
1278
-
1279
- def get_retriever_kwargs(self):
1280
- search_args = self._build_search_args()
1281
-
1282
- return {
1283
- "search_type": self._map_search_type(),
1284
- "search_kwargs": search_args,
1285
- }