lfx-nightly 0.1.13.dev2__py3-none-any.whl → 0.1.13.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lfx-nightly might be problematic. Click here for more details.

Files changed (53) hide show
  1. lfx/_assets/component_index.json +1 -1
  2. lfx/base/agents/agent.py +17 -1
  3. lfx/base/agents/utils.py +15 -2
  4. lfx/base/datastax/__init__.py +5 -0
  5. lfx/{components/vectorstores/astradb.py → base/datastax/astradb_base.py} +81 -471
  6. lfx/base/mcp/util.py +30 -10
  7. lfx/components/datastax/__init__.py +12 -6
  8. lfx/components/datastax/{astra_assistant_manager.py → astradb_assistant_manager.py} +1 -0
  9. lfx/components/datastax/astradb_chatmemory.py +40 -0
  10. lfx/components/datastax/astradb_cql.py +5 -31
  11. lfx/components/datastax/astradb_graph.py +9 -123
  12. lfx/components/datastax/astradb_tool.py +12 -52
  13. lfx/components/datastax/astradb_vectorstore.py +133 -976
  14. lfx/components/datastax/create_assistant.py +1 -0
  15. lfx/components/datastax/create_thread.py +1 -0
  16. lfx/components/datastax/dotenv.py +1 -0
  17. lfx/components/datastax/get_assistant.py +1 -0
  18. lfx/components/datastax/getenvvar.py +1 -0
  19. lfx/components/datastax/graph_rag.py +1 -1
  20. lfx/components/datastax/list_assistants.py +1 -0
  21. lfx/components/datastax/run.py +1 -0
  22. lfx/components/knowledge_bases/ingestion.py +17 -9
  23. lfx/components/knowledge_bases/retrieval.py +16 -8
  24. lfx/components/vectorstores/__init__.py +0 -6
  25. lfx/graph/edge/base.py +2 -2
  26. {lfx_nightly-0.1.13.dev2.dist-info → lfx_nightly-0.1.13.dev3.dist-info}/METADATA +1 -1
  27. {lfx_nightly-0.1.13.dev2.dist-info → lfx_nightly-0.1.13.dev3.dist-info}/RECORD +30 -51
  28. lfx/components/datastax/astra_db.py +0 -77
  29. lfx/components/datastax/cassandra.py +0 -92
  30. lfx/components/vectorstores/astradb_graph.py +0 -326
  31. lfx/components/vectorstores/cassandra.py +0 -264
  32. lfx/components/vectorstores/cassandra_graph.py +0 -238
  33. lfx/components/vectorstores/chroma.py +0 -167
  34. lfx/components/vectorstores/clickhouse.py +0 -135
  35. lfx/components/vectorstores/couchbase.py +0 -102
  36. lfx/components/vectorstores/elasticsearch.py +0 -267
  37. lfx/components/vectorstores/faiss.py +0 -111
  38. lfx/components/vectorstores/graph_rag.py +0 -141
  39. lfx/components/vectorstores/hcd.py +0 -314
  40. lfx/components/vectorstores/milvus.py +0 -115
  41. lfx/components/vectorstores/mongodb_atlas.py +0 -213
  42. lfx/components/vectorstores/opensearch.py +0 -243
  43. lfx/components/vectorstores/pgvector.py +0 -72
  44. lfx/components/vectorstores/pinecone.py +0 -134
  45. lfx/components/vectorstores/qdrant.py +0 -109
  46. lfx/components/vectorstores/supabase.py +0 -76
  47. lfx/components/vectorstores/upstash.py +0 -124
  48. lfx/components/vectorstores/vectara.py +0 -97
  49. lfx/components/vectorstores/vectara_rag.py +0 -164
  50. lfx/components/vectorstores/weaviate.py +0 -89
  51. /lfx/components/datastax/{astra_vectorize.py → astradb_vectorize.py} +0 -0
  52. {lfx_nightly-0.1.13.dev2.dist-info → lfx_nightly-0.1.13.dev3.dist-info}/WHEEL +0 -0
  53. {lfx_nightly-0.1.13.dev2.dist-info → lfx_nightly-0.1.13.dev3.dist-info}/entry_points.txt +0 -0
@@ -1,41 +1,23 @@
1
1
  import re
2
2
  from collections import defaultdict
3
3
  from dataclasses import asdict, dataclass, field
4
+ from typing import Any
4
5
 
5
6
  from astrapy import DataAPIClient, Database
6
- from astrapy.data.info.reranking import RerankServiceOptions
7
- from astrapy.info import CollectionDescriptor, CollectionLexicalOptions, CollectionRerankOptions
8
- from langchain_astradb import AstraDBVectorStore, VectorServiceOptions
9
- from langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment
10
- from langchain_core.documents import Document
11
-
12
- from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
13
- from lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection
14
- from lfx.helpers.data import docs_to_data
15
- from lfx.inputs.inputs import FloatInput, NestedDictInput
7
+ from langchain_astradb.utils.astradb import _AstraDBCollectionEnvironment
8
+
9
+ from lfx.custom.custom_component.component import Component
16
10
  from lfx.io import (
17
11
  BoolInput,
18
12
  DropdownInput,
19
- HandleInput,
20
13
  IntInput,
21
- QueryInput,
22
14
  SecretStrInput,
23
15
  StrInput,
24
16
  )
25
- from lfx.schema.data import Data
26
- from lfx.serialization import serialize
27
- from lfx.utils.version import get_version_info
28
-
29
17
 
30
- @vector_store_connection
31
- class AstraDBVectorStoreComponent(LCVectorStoreComponent):
32
- display_name: str = "Astra DB"
33
- description: str = "Ingest and search documents in Astra DB"
34
- documentation: str = "https://docs.datastax.com/en/langflow/astra-components.html"
35
- name = "AstraDB"
36
- icon: str = "AstraDB"
37
18
 
38
- _cached_vector_store: AstraDBVectorStore | None = None
19
+ class AstraDBBaseComponent(Component):
20
+ """Base class for AstraDB components with common functionality."""
39
21
 
40
22
  @dataclass
41
23
  class NewDatabaseInput:
@@ -186,74 +168,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
186
168
  combobox=True,
187
169
  show=False,
188
170
  ),
189
- HandleInput(
190
- name="embedding_model",
191
- display_name="Embedding Model",
192
- input_types=["Embeddings"],
193
- info="Specify the Embedding Model. Not required for Astra Vectorize collections.",
194
- required=False,
195
- show=False,
196
- ),
197
- *LCVectorStoreComponent.inputs,
198
- DropdownInput(
199
- name="search_method",
200
- display_name="Search Method",
201
- info=(
202
- "Determine how your content is matched: Vector finds semantic similarity, "
203
- "and Hybrid Search (suggested) combines both approaches "
204
- "with a reranker."
205
- ),
206
- options=["Hybrid Search", "Vector Search"], # TODO: Restore Lexical Search?
207
- options_metadata=[{"icon": "SearchHybrid"}, {"icon": "SearchVector"}],
208
- value="Vector Search",
209
- advanced=True,
210
- real_time_refresh=True,
211
- ),
212
- DropdownInput(
213
- name="reranker",
214
- display_name="Reranker",
215
- info="Post-retrieval model that re-scores results for optimal relevance ranking.",
216
- show=False,
217
- toggle=True,
218
- ),
219
- QueryInput(
220
- name="lexical_terms",
221
- display_name="Lexical Terms",
222
- info="Add additional terms/keywords to augment search precision.",
223
- placeholder="Enter terms to search...",
224
- separator=" ",
225
- show=False,
226
- value="",
227
- ),
228
- IntInput(
229
- name="number_of_results",
230
- display_name="Number of Search Results",
231
- info="Number of search results to return.",
232
- advanced=True,
233
- value=4,
234
- ),
235
- DropdownInput(
236
- name="search_type",
237
- display_name="Search Type",
238
- info="Search type to use",
239
- options=["Similarity", "Similarity with score threshold", "MMR (Max Marginal Relevance)"],
240
- value="Similarity",
241
- advanced=True,
242
- ),
243
- FloatInput(
244
- name="search_score_threshold",
245
- display_name="Search Score Threshold",
246
- info="Minimum similarity score threshold for search results. "
247
- "(when using 'Similarity with score threshold')",
248
- value=0,
249
- advanced=True,
250
- ),
251
- NestedDictInput(
252
- name="advanced_search_filter",
253
- display_name="Search Metadata Filter",
254
- info="Optional dictionary of filters to apply to the search query.",
255
- advanced=True,
256
- ),
257
171
  BoolInput(
258
172
  name="autodetect_collection",
259
173
  display_name="Autodetect Collection",
@@ -261,75 +175,51 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
261
175
  advanced=True,
262
176
  value=True,
263
177
  ),
264
- StrInput(
265
- name="content_field",
266
- display_name="Content Field",
267
- info="Field to use as the text content field for the vector store.",
268
- advanced=True,
269
- ),
270
- StrInput(
271
- name="deletion_field",
272
- display_name="Deletion Based On Field",
273
- info="When this parameter is provided, documents in the target collection with "
274
- "metadata field values matching the input metadata field value will be deleted "
275
- "before new data is loaded.",
276
- advanced=True,
277
- ),
278
- BoolInput(
279
- name="ignore_invalid_documents",
280
- display_name="Ignore Invalid Documents",
281
- info="Boolean flag to determine whether to ignore invalid documents at runtime.",
282
- advanced=True,
283
- ),
284
- NestedDictInput(
285
- name="astradb_vectorstore_kwargs",
286
- display_name="AstraDBVectorStore Parameters",
287
- info="Optional dictionary of additional parameters for the AstraDBVectorStore.",
288
- advanced=True,
289
- ),
290
178
  ]
291
179
 
292
180
  @classmethod
293
- def map_cloud_providers(cls):
294
- # TODO: Programmatically fetch the regions for each cloud provider
295
- return {
296
- "dev": {
297
- "Amazon Web Services": {
298
- "id": "aws",
299
- "regions": ["us-west-2"],
300
- },
301
- "Google Cloud Platform": {
302
- "id": "gcp",
303
- "regions": ["us-central1", "europe-west4"],
304
- },
305
- },
306
- "test": {
307
- "Google Cloud Platform": {
308
- "id": "gcp",
309
- "regions": ["us-central1"],
310
- },
311
- },
312
- "prod": {
313
- "Amazon Web Services": {
314
- "id": "aws",
315
- "regions": ["us-east-2", "ap-south-1", "eu-west-1"],
316
- },
317
- "Google Cloud Platform": {
318
- "id": "gcp",
319
- "regions": ["us-east1"],
320
- },
321
- "Microsoft Azure": {
322
- "id": "azure",
323
- "regions": ["westus3"],
324
- },
325
- },
181
+ def get_environment(cls, environment: str | None = None) -> str:
182
+ if not environment:
183
+ return "prod"
184
+ return environment
185
+
186
+ @classmethod
187
+ def map_cloud_providers(cls, token: str, environment: str | None = None) -> dict[str, dict[str, Any]]:
188
+ """Fetch all available cloud providers and regions."""
189
+ # Get the admin object
190
+ client = DataAPIClient(environment=cls.get_environment(environment))
191
+ admin_client = client.get_admin(token=token)
192
+
193
+ # Get the list of available regions
194
+ available_regions = admin_client.find_available_regions(only_org_enabled_regions=True)
195
+
196
+ provider_mapping: dict[str, dict[str, str]] = {
197
+ "AWS": {"name": "Amazon Web Services", "id": "aws"},
198
+ "GCP": {"name": "Google Cloud Platform", "id": "gcp"},
199
+ "Azure": {"name": "Microsoft Azure", "id": "azure"},
326
200
  }
327
201
 
202
+ result: dict[str, dict[str, Any]] = {}
203
+ for region_info in available_regions:
204
+ cloud_provider = region_info.cloud_provider
205
+ region = region_info.name
206
+
207
+ if cloud_provider in provider_mapping:
208
+ provider_name = provider_mapping[cloud_provider]["name"]
209
+ provider_id = provider_mapping[cloud_provider]["id"]
210
+
211
+ if provider_name not in result:
212
+ result[provider_name] = {"id": provider_id, "regions": []}
213
+
214
+ result[provider_name]["regions"].append(region)
215
+
216
+ return result
217
+
328
218
  @classmethod
329
219
  def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):
330
220
  try:
331
221
  # Get the admin object
332
- client = DataAPIClient(environment=environment)
222
+ client = DataAPIClient(environment=cls.get_environment(environment))
333
223
  admin_client = client.get_admin()
334
224
  db_admin = admin_client.get_database_admin(api_endpoint, token=token)
335
225
 
@@ -361,14 +251,15 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
361
251
  environment: str | None = None,
362
252
  keyspace: str | None = None,
363
253
  ):
364
- client = DataAPIClient(environment=environment)
254
+ # Get the environment, set to prod if null like
255
+ my_env = cls.get_environment(environment)
256
+
257
+ # Initialize the Data API client
258
+ client = DataAPIClient(environment=my_env)
365
259
 
366
260
  # Get the admin object
367
261
  admin_client = client.get_admin(token=token)
368
262
 
369
- # Get the environment, set to prod if null like
370
- my_env = environment or "prod"
371
-
372
263
  # Raise a value error if name isn't provided
373
264
  if not new_database_name:
374
265
  msg = "Database name is required to create a new database."
@@ -377,7 +268,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
377
268
  # Call the create database function
378
269
  return await admin_client.async_create_database(
379
270
  name=new_database_name,
380
- cloud_provider=cls.map_cloud_providers()[my_env][cloud_provider]["id"],
271
+ cloud_provider=cls.map_cloud_providers(token=token, environment=my_env)[cloud_provider]["id"],
381
272
  region=region,
382
273
  keyspace=keyspace,
383
274
  wait_until_active=False,
@@ -394,11 +285,21 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
394
285
  dimension: int | None = None,
395
286
  embedding_generation_provider: str | None = None,
396
287
  embedding_generation_model: str | None = None,
397
- reranker: str | None = None,
398
288
  ):
399
289
  # Build vectorize options, if needed
400
290
  vectorize_options = None
401
291
  if not dimension:
292
+ try:
293
+ from langchain_astradb import VectorServiceOptions
294
+ except ImportError as e:
295
+ msg = (
296
+ "langchain-astradb is required to create AstraDB collections with "
297
+ "Astra Vectorize embeddings. Please install it with "
298
+ "`pip install langchain-astradb`."
299
+ )
300
+ raise ImportError(msg) from e
301
+
302
+ environment = cls.get_environment(environment)
402
303
  providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)
403
304
  vectorize_options = VectorServiceOptions(
404
305
  provider=providers.get(embedding_generation_provider, [None, []])[0],
@@ -421,19 +322,11 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
421
322
  "collection_vector_service_options": vectorize_options,
422
323
  }
423
324
 
424
- # Add optional arguments if the reranker is set
425
- if reranker:
426
- # Split the reranker field into a provider a model name
427
- provider, _ = reranker.split("/")
428
- base_args["collection_rerank"] = CollectionRerankOptions(
429
- service=RerankServiceOptions(provider=provider, model_name=reranker),
430
- )
431
- base_args["collection_lexical"] = CollectionLexicalOptions(analyzer="STANDARD")
432
-
433
325
  _AstraDBCollectionEnvironment(**base_args)
434
326
 
435
327
  @classmethod
436
328
  def get_database_list_static(cls, token: str, environment: str | None = None):
329
+ environment = cls.get_environment(environment)
437
330
  client = DataAPIClient(environment=environment)
438
331
 
439
332
  # Get the admin object
@@ -503,6 +396,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
503
396
  return None
504
397
 
505
398
  # Grab the database object
399
+ environment = cls.get_environment(environment)
506
400
  db = cls.get_database_list_static(token=token, environment=environment).get(database_name)
507
401
  if not db:
508
402
  return None
@@ -551,7 +445,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
551
445
  msg = f"Error fetching database object: {e}"
552
446
  raise ValueError(msg) from e
553
447
 
554
- def collection_data(self, collection_name: str, database: Database | None = None):
448
+ def collection_data(self, collection_name: str, database: Database = None):
555
449
  try:
556
450
  if not database:
557
451
  client = DataAPIClient(environment=self.environment)
@@ -588,7 +482,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
588
482
  raise ValueError(msg) from e
589
483
 
590
484
  @classmethod
591
- def get_provider_icon(cls, collection: CollectionDescriptor | None = None, provider_name: str | None = None) -> str:
485
+ def get_provider_icon(cls, collection=None, provider_name: str | None = None) -> str:
592
486
  # Get the provider name from the collection
593
487
  provider_name = provider_name or (
594
488
  collection.definition.vector.service.provider
@@ -757,9 +651,13 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
757
651
  database_options = self._initialize_database_options()
758
652
 
759
653
  # Update cloud provider options
760
- env = self.environment
761
654
  template = build_config["database_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
762
- template["02_cloud_provider"]["options"] = list(self.map_cloud_providers()[env].keys())
655
+ template["02_cloud_provider"]["options"] = list(
656
+ self.map_cloud_providers(
657
+ token=self.token,
658
+ environment=self.environment,
659
+ ).keys()
660
+ )
763
661
 
764
662
  # Update database configuration
765
663
  database_config = build_config["database_name"]
@@ -796,43 +694,12 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
796
694
 
797
695
  return build_config
798
696
 
799
- def _handle_hybrid_search_options(self, build_config: dict) -> dict:
800
- """Set hybrid search options in the build configuration."""
801
- # Detect what hybrid options are available
802
- # Get the admin object
803
- client = DataAPIClient(environment=self.environment)
804
- admin_client = client.get_admin()
805
- db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)
806
-
807
- # We will try to get the reranking providers to see if its hybrid emabled
808
- try:
809
- providers = db_admin.find_reranking_providers()
810
- build_config["reranker"]["options"] = [
811
- model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models
812
- ]
813
- build_config["reranker"]["options_metadata"] = [
814
- {"icon": self.get_provider_icon(provider_name=model.name.split("/")[0])}
815
- for provider in providers.reranking_providers.values()
816
- for model in provider.models
817
- ]
818
- build_config["reranker"]["value"] = build_config["reranker"]["options"][0]
819
-
820
- # Set the default search field to hybrid search
821
- build_config["search_method"]["show"] = True
822
- build_config["search_method"]["options"] = ["Hybrid Search", "Vector Search"]
823
- build_config["search_method"]["value"] = "Hybrid Search"
824
- except Exception as _: # noqa: BLE001
825
- build_config["reranker"]["options"] = []
826
- build_config["reranker"]["options_metadata"] = []
827
-
828
- # Set the default search field to vector search
829
- build_config["search_method"]["show"] = False
830
- build_config["search_method"]["options"] = ["Vector Search"]
831
- build_config["search_method"]["value"] = "Vector Search"
832
-
833
- return build_config
834
-
835
- async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:
697
+ async def update_build_config(
698
+ self,
699
+ build_config: dict,
700
+ field_value: str | dict,
701
+ field_name: str | None = None,
702
+ ) -> dict:
836
703
  """Update build configuration based on field name and value."""
837
704
  # Early return if no token provided
838
705
  if not self.token:
@@ -877,29 +744,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
877
744
  if field_name == "collection_name" and not isinstance(field_value, dict):
878
745
  return self._handle_collection_selection(build_config, field_value)
879
746
 
880
- # Search method selection change
881
- if field_name == "search_method":
882
- is_vector_search = field_value == "Vector Search"
883
- is_autodetect = build_config["autodetect_collection"]["value"]
884
-
885
- # Configure lexical terms (same for both cases)
886
- build_config["lexical_terms"]["show"] = not is_vector_search
887
- build_config["lexical_terms"]["value"] = "" if is_vector_search else build_config["lexical_terms"]["value"]
888
-
889
- # Disable reranker disabling if hybrid search is selected
890
- build_config["reranker"]["show"] = not is_vector_search
891
- build_config["reranker"]["toggle_disable"] = not is_vector_search
892
- build_config["reranker"]["toggle_value"] = True
893
- build_config["reranker"]["value"] = build_config["reranker"]["options"][0]
894
-
895
- # Toggle search type and score threshold based on search method
896
- build_config["search_type"]["show"] = is_vector_search
897
- build_config["search_score_threshold"]["show"] = is_vector_search
898
-
899
- # Make sure the search_type is set to "Similarity"
900
- if not is_vector_search or is_autodetect:
901
- build_config["search_type"]["value"] = "Similarity"
902
-
903
747
  return build_config
904
748
 
905
749
  async def _create_new_database(self, build_config: dict, field_value: dict) -> None:
@@ -930,12 +774,14 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
930
774
 
931
775
  def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:
932
776
  """Update cloud provider regions in build config."""
933
- env = self.environment
934
777
  cloud_provider = field_value["02_cloud_provider"]
935
778
 
936
779
  # Update the region options based on the selected cloud provider
937
780
  template = build_config["database_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
938
- template["03_region"]["options"] = self.map_cloud_providers()[env][cloud_provider]["regions"]
781
+ template["03_region"]["options"] = self.map_cloud_providers(
782
+ token=self.token,
783
+ environment=self.environment,
784
+ )[cloud_provider]["regions"]
939
785
 
940
786
  # Reset the the 03_region value if it's not in the new options
941
787
  if template["03_region"]["value"] not in template["03_region"]["options"]:
@@ -956,7 +802,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
956
802
  dimension=field_value.get("04_dimension") if embedding_provider == "Bring your own" else None,
957
803
  embedding_generation_provider=embedding_provider,
958
804
  embedding_generation_model=field_value.get("03_embedding_generation_model"),
959
- reranker=self.reranker,
960
805
  )
961
806
  except Exception as e:
962
807
  msg = f"Error creating collection: {e}"
@@ -969,8 +814,8 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
969
814
  "options": build_config["collection_name"]["options"] + [field_value["01_new_collection_name"]],
970
815
  }
971
816
  )
972
- build_config["embedding_model"]["show"] = not bool(provider)
973
- build_config["embedding_model"]["required"] = not bool(provider)
817
+
818
+ # Update collection metadata
974
819
  build_config["collection_name"]["options_metadata"].append(
975
820
  {
976
821
  "records": 0,
@@ -980,10 +825,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
980
825
  }
981
826
  )
982
827
 
983
- # Make sure we always show the reranker options if the collection is hybrid enabled
984
- # And right now they always are
985
- build_config["lexical_terms"]["show"] = True
986
-
987
828
  def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:
988
829
  """Handle database selection and update related configurations."""
989
830
  build_config = self.reset_database_list(build_config)
@@ -1031,9 +872,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
1031
872
  # Reset provider options
1032
873
  build_config = self.reset_provider_options(build_config)
1033
874
 
1034
- # Handle hybrid search options
1035
- build_config = self._handle_hybrid_search_options(build_config)
1036
-
1037
875
  return self.reset_collection_list(build_config)
1038
876
 
1039
877
  def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:
@@ -1054,232 +892,4 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
1054
892
  )
1055
893
  build_config["autodetect_collection"]["value"] = False
1056
894
 
1057
- if not field_value:
1058
- return build_config
1059
-
1060
- # Get the selected collection index
1061
- index = build_config["collection_name"]["options"].index(field_value)
1062
-
1063
- # Set the provider of the selected collection
1064
- provider = build_config["collection_name"]["options_metadata"][index]["provider"]
1065
- build_config["embedding_model"]["show"] = not bool(provider)
1066
- build_config["embedding_model"]["required"] = not bool(provider)
1067
-
1068
- # Grab the collection object
1069
- database = self.get_database_object(api_endpoint=build_config["api_endpoint"]["value"])
1070
- collection = database.get_collection(
1071
- name=field_value,
1072
- keyspace=build_config["keyspace"]["value"],
1073
- )
1074
-
1075
- # Check if hybrid and lexical are enabled
1076
- col_options = collection.options()
1077
- hyb_enabled = col_options.rerank and col_options.rerank.enabled
1078
- lex_enabled = col_options.lexical and col_options.lexical.enabled
1079
- user_hyb_enabled = build_config["search_method"]["value"] == "Hybrid Search"
1080
-
1081
- # Reranker visible when both the collection supports it and the user selected Hybrid
1082
- hybrid_active = bool(hyb_enabled and user_hyb_enabled)
1083
- build_config["reranker"]["show"] = hybrid_active
1084
- build_config["reranker"]["toggle_value"] = hybrid_active
1085
- build_config["reranker"]["toggle_disable"] = False # allow user to toggle if visible
1086
-
1087
- # If hybrid is active, lock search_type to "Similarity"
1088
- if hybrid_active:
1089
- build_config["search_type"]["value"] = "Similarity"
1090
-
1091
- # Show the lexical terms option only if the collection enables lexical search
1092
- build_config["lexical_terms"]["show"] = bool(lex_enabled)
1093
-
1094
895
  return build_config
1095
-
1096
- @check_cached_vector_store
1097
- def build_vector_store(self):
1098
- try:
1099
- from langchain_astradb import AstraDBVectorStore
1100
- except ImportError as e:
1101
- msg = (
1102
- "Could not import langchain Astra DB integration package. "
1103
- "Please install it with `pip install langchain-astradb`."
1104
- )
1105
- raise ImportError(msg) from e
1106
-
1107
- # Get the embedding model and additional params
1108
- embedding_params = {"embedding": self.embedding_model} if self.embedding_model else {}
1109
-
1110
- # Get the additional parameters
1111
- additional_params = self.astradb_vectorstore_kwargs or {}
1112
-
1113
- # Get Langflow version and platform information
1114
- __version__ = get_version_info()["version"]
1115
- langflow_prefix = ""
1116
- # if os.getenv("AWS_EXECUTION_ENV") == "AWS_ECS_FARGATE": # TODO: More precise way of detecting
1117
- # langflow_prefix = "ds-"
1118
-
1119
- # Get the database object
1120
- database = self.get_database_object()
1121
- autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection
1122
-
1123
- # Bundle up the auto-detect parameters
1124
- autodetect_params = {
1125
- "autodetect_collection": autodetect,
1126
- "content_field": (
1127
- self.content_field
1128
- if self.content_field and embedding_params
1129
- else (
1130
- "page_content"
1131
- if embedding_params
1132
- and self.collection_data(collection_name=self.collection_name, database=database) == 0
1133
- else None
1134
- )
1135
- ),
1136
- "ignore_invalid_documents": self.ignore_invalid_documents,
1137
- }
1138
-
1139
- # Choose HybridSearchMode based on the selected param
1140
- hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == "Hybrid Search" else HybridSearchMode.OFF
1141
-
1142
- # Attempt to build the Vector Store object
1143
- try:
1144
- vector_store = AstraDBVectorStore(
1145
- # Astra DB Authentication Parameters
1146
- token=self.token,
1147
- api_endpoint=database.api_endpoint,
1148
- namespace=database.keyspace,
1149
- collection_name=self.collection_name,
1150
- environment=self.environment,
1151
- # Hybrid Search Parameters
1152
- hybrid_search=hybrid_search_mode,
1153
- # Astra DB Usage Tracking Parameters
1154
- ext_callers=[(f"{langflow_prefix}langflow", __version__)],
1155
- # Astra DB Vector Store Parameters
1156
- **autodetect_params,
1157
- **embedding_params,
1158
- **additional_params,
1159
- )
1160
- except Exception as e:
1161
- msg = f"Error initializing AstraDBVectorStore: {e}"
1162
- raise ValueError(msg) from e
1163
-
1164
- # Add documents to the vector store
1165
- self._add_documents_to_vector_store(vector_store)
1166
-
1167
- return vector_store
1168
-
1169
- def _add_documents_to_vector_store(self, vector_store) -> None:
1170
- self.ingest_data = self._prepare_ingest_data()
1171
-
1172
- documents = []
1173
- for _input in self.ingest_data or []:
1174
- if isinstance(_input, Data):
1175
- documents.append(_input.to_lc_document())
1176
- else:
1177
- msg = "Vector Store Inputs must be Data objects."
1178
- raise TypeError(msg)
1179
-
1180
- documents = [
1181
- Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents
1182
- ]
1183
-
1184
- if documents and self.deletion_field:
1185
- self.log(f"Deleting documents where {self.deletion_field}")
1186
- try:
1187
- database = self.get_database_object()
1188
- collection = database.get_collection(self.collection_name, keyspace=database.keyspace)
1189
- delete_values = list({doc.metadata[self.deletion_field] for doc in documents})
1190
- self.log(f"Deleting documents where {self.deletion_field} matches {delete_values}.")
1191
- collection.delete_many({f"metadata.{self.deletion_field}": {"$in": delete_values}})
1192
- except Exception as e:
1193
- msg = f"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}"
1194
- raise ValueError(msg) from e
1195
-
1196
- if documents:
1197
- self.log(f"Adding {len(documents)} documents to the Vector Store.")
1198
- try:
1199
- vector_store.add_documents(documents)
1200
- except Exception as e:
1201
- msg = f"Error adding documents to AstraDBVectorStore: {e}"
1202
- raise ValueError(msg) from e
1203
- else:
1204
- self.log("No documents to add to the Vector Store.")
1205
-
1206
- def _map_search_type(self) -> str:
1207
- search_type_mapping = {
1208
- "Similarity with score threshold": "similarity_score_threshold",
1209
- "MMR (Max Marginal Relevance)": "mmr",
1210
- }
1211
-
1212
- return search_type_mapping.get(self.search_type, "similarity")
1213
-
1214
- def _build_search_args(self):
1215
- # Clean up the search query
1216
- query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None
1217
- lexical_terms = self.lexical_terms or None
1218
-
1219
- # Check if we have a search query, and if so set the args
1220
- if query:
1221
- args = {
1222
- "query": query,
1223
- "search_type": self._map_search_type(),
1224
- "k": self.number_of_results,
1225
- "score_threshold": self.search_score_threshold,
1226
- "lexical_query": lexical_terms,
1227
- }
1228
- elif self.advanced_search_filter:
1229
- args = {
1230
- "n": self.number_of_results,
1231
- }
1232
- else:
1233
- return {}
1234
-
1235
- filter_arg = self.advanced_search_filter or {}
1236
- if filter_arg:
1237
- args["filter"] = filter_arg
1238
-
1239
- return args
1240
-
1241
- def search_documents(self, vector_store=None) -> list[Data]:
1242
- vector_store = vector_store or self.build_vector_store()
1243
-
1244
- self.log(f"Search input: {self.search_query}")
1245
- self.log(f"Search type: {self.search_type}")
1246
- self.log(f"Number of results: {self.number_of_results}")
1247
- self.log(f"store.hybrid_search: {vector_store.hybrid_search}")
1248
- self.log(f"Lexical terms: {self.lexical_terms}")
1249
- self.log(f"Reranker: {self.reranker}")
1250
-
1251
- try:
1252
- search_args = self._build_search_args()
1253
- except Exception as e:
1254
- msg = f"Error in AstraDBVectorStore._build_search_args: {e}"
1255
- raise ValueError(msg) from e
1256
-
1257
- if not search_args:
1258
- self.log("No search input or filters provided. Skipping search.")
1259
- return []
1260
-
1261
- docs = []
1262
- search_method = "search" if "query" in search_args else "metadata_search"
1263
-
1264
- try:
1265
- self.log(f"Calling vector_store.{search_method} with args: {search_args}")
1266
- docs = getattr(vector_store, search_method)(**search_args)
1267
- except Exception as e:
1268
- msg = f"Error performing {search_method} in AstraDBVectorStore: {e}"
1269
- raise ValueError(msg) from e
1270
-
1271
- self.log(f"Retrieved documents: {len(docs)}")
1272
-
1273
- data = docs_to_data(docs)
1274
- self.log(f"Converted documents to data: {len(data)}")
1275
- self.status = data
1276
-
1277
- return data
1278
-
1279
- def get_retriever_kwargs(self):
1280
- search_args = self._build_search_args()
1281
-
1282
- return {
1283
- "search_type": self._map_search_type(),
1284
- "search_kwargs": search_args,
1285
- }