lfx-nightly 0.1.12.dev42__py3-none-any.whl → 0.2.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lfx/_assets/component_index.json +1 -1
- lfx/base/agents/agent.py +109 -29
- lfx/base/agents/events.py +102 -35
- lfx/base/agents/utils.py +15 -2
- lfx/base/composio/composio_base.py +24 -9
- lfx/base/datastax/__init__.py +5 -0
- lfx/{components/vectorstores/astradb.py → base/datastax/astradb_base.py} +84 -473
- lfx/base/io/chat.py +5 -4
- lfx/base/mcp/util.py +101 -15
- lfx/base/models/cometapi_constants.py +54 -0
- lfx/base/models/model_input_constants.py +74 -7
- lfx/base/models/ollama_constants.py +3 -0
- lfx/base/models/watsonx_constants.py +12 -0
- lfx/cli/commands.py +1 -1
- lfx/components/agents/__init__.py +3 -1
- lfx/components/agents/agent.py +47 -4
- lfx/components/agents/altk_agent.py +366 -0
- lfx/components/agents/cuga_agent.py +1 -1
- lfx/components/agents/mcp_component.py +32 -2
- lfx/components/amazon/amazon_bedrock_converse.py +1 -1
- lfx/components/apify/apify_actor.py +3 -3
- lfx/components/cometapi/__init__.py +32 -0
- lfx/components/cometapi/cometapi.py +166 -0
- lfx/components/datastax/__init__.py +12 -6
- lfx/components/datastax/{astra_assistant_manager.py → astradb_assistant_manager.py} +1 -0
- lfx/components/datastax/astradb_chatmemory.py +40 -0
- lfx/components/datastax/astradb_cql.py +5 -31
- lfx/components/datastax/astradb_graph.py +9 -123
- lfx/components/datastax/astradb_tool.py +12 -52
- lfx/components/datastax/astradb_vectorstore.py +133 -976
- lfx/components/datastax/create_assistant.py +1 -0
- lfx/components/datastax/create_thread.py +1 -0
- lfx/components/datastax/dotenv.py +1 -0
- lfx/components/datastax/get_assistant.py +1 -0
- lfx/components/datastax/getenvvar.py +1 -0
- lfx/components/datastax/graph_rag.py +1 -1
- lfx/components/datastax/list_assistants.py +1 -0
- lfx/components/datastax/run.py +1 -0
- lfx/components/docling/__init__.py +3 -0
- lfx/components/docling/docling_remote_vlm.py +284 -0
- lfx/components/helpers/memory.py +19 -4
- lfx/components/ibm/watsonx.py +25 -21
- lfx/components/input_output/chat.py +8 -0
- lfx/components/input_output/chat_output.py +8 -0
- lfx/components/knowledge_bases/ingestion.py +17 -9
- lfx/components/knowledge_bases/retrieval.py +16 -8
- lfx/components/logic/loop.py +4 -0
- lfx/components/mistral/mistral_embeddings.py +1 -1
- lfx/components/models/embedding_model.py +88 -7
- lfx/components/ollama/ollama.py +221 -14
- lfx/components/openrouter/openrouter.py +49 -147
- lfx/components/processing/parser.py +6 -1
- lfx/components/processing/structured_output.py +55 -17
- lfx/components/vectorstores/__init__.py +0 -6
- lfx/custom/custom_component/component.py +3 -2
- lfx/field_typing/constants.py +1 -0
- lfx/graph/edge/base.py +2 -2
- lfx/graph/graph/base.py +1 -1
- lfx/graph/graph/schema.py +3 -2
- lfx/graph/vertex/vertex_types.py +1 -1
- lfx/io/schema.py +6 -0
- lfx/memory/stubs.py +26 -7
- lfx/schema/message.py +6 -0
- lfx/schema/schema.py +5 -0
- lfx/services/settings/constants.py +1 -0
- {lfx_nightly-0.1.12.dev42.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/METADATA +1 -1
- {lfx_nightly-0.1.12.dev42.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/RECORD +70 -85
- lfx/components/datastax/astra_db.py +0 -77
- lfx/components/datastax/cassandra.py +0 -92
- lfx/components/vectorstores/astradb_graph.py +0 -326
- lfx/components/vectorstores/cassandra.py +0 -264
- lfx/components/vectorstores/cassandra_graph.py +0 -238
- lfx/components/vectorstores/chroma.py +0 -167
- lfx/components/vectorstores/clickhouse.py +0 -135
- lfx/components/vectorstores/couchbase.py +0 -102
- lfx/components/vectorstores/elasticsearch.py +0 -267
- lfx/components/vectorstores/faiss.py +0 -111
- lfx/components/vectorstores/graph_rag.py +0 -141
- lfx/components/vectorstores/hcd.py +0 -314
- lfx/components/vectorstores/milvus.py +0 -115
- lfx/components/vectorstores/mongodb_atlas.py +0 -213
- lfx/components/vectorstores/opensearch.py +0 -243
- lfx/components/vectorstores/pgvector.py +0 -72
- lfx/components/vectorstores/pinecone.py +0 -134
- lfx/components/vectorstores/qdrant.py +0 -109
- lfx/components/vectorstores/supabase.py +0 -76
- lfx/components/vectorstores/upstash.py +0 -124
- lfx/components/vectorstores/vectara.py +0 -97
- lfx/components/vectorstores/vectara_rag.py +0 -164
- lfx/components/vectorstores/weaviate.py +0 -89
- /lfx/components/datastax/{astra_vectorize.py → astradb_vectorize.py} +0 -0
- {lfx_nightly-0.1.12.dev42.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/WHEEL +0 -0
- {lfx_nightly-0.1.12.dev42.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/entry_points.txt +0 -0
|
@@ -1,41 +1,24 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from dataclasses import asdict, dataclass, field
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
from astrapy import DataAPIClient, Database
|
|
6
|
-
from
|
|
7
|
-
|
|
8
|
-
from
|
|
9
|
-
from langchain_astradb.utils.astradb import HybridSearchMode, _AstraDBCollectionEnvironment
|
|
10
|
-
from langchain_core.documents import Document
|
|
11
|
-
|
|
12
|
-
from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
|
|
13
|
-
from lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection
|
|
14
|
-
from lfx.helpers.data import docs_to_data
|
|
15
|
-
from lfx.inputs.inputs import FloatInput, NestedDictInput
|
|
7
|
+
from langchain_astradb.utils.astradb import _AstraDBCollectionEnvironment
|
|
8
|
+
|
|
9
|
+
from lfx.custom.custom_component.component import Component
|
|
16
10
|
from lfx.io import (
|
|
17
11
|
BoolInput,
|
|
18
12
|
DropdownInput,
|
|
19
|
-
HandleInput,
|
|
20
13
|
IntInput,
|
|
21
|
-
QueryInput,
|
|
22
14
|
SecretStrInput,
|
|
23
15
|
StrInput,
|
|
24
16
|
)
|
|
25
|
-
from lfx.
|
|
26
|
-
from lfx.serialization import serialize
|
|
27
|
-
from lfx.utils.version import get_version_info
|
|
28
|
-
|
|
17
|
+
from lfx.log.logger import logger
|
|
29
18
|
|
|
30
|
-
@vector_store_connection
|
|
31
|
-
class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
32
|
-
display_name: str = "Astra DB"
|
|
33
|
-
description: str = "Ingest and search documents in Astra DB"
|
|
34
|
-
documentation: str = "https://docs.datastax.com/en/langflow/astra-components.html"
|
|
35
|
-
name = "AstraDB"
|
|
36
|
-
icon: str = "AstraDB"
|
|
37
19
|
|
|
38
|
-
|
|
20
|
+
class AstraDBBaseComponent(Component):
|
|
21
|
+
"""Base class for AstraDB components with common functionality."""
|
|
39
22
|
|
|
40
23
|
@dataclass
|
|
41
24
|
class NewDatabaseInput:
|
|
@@ -186,74 +169,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
186
169
|
combobox=True,
|
|
187
170
|
show=False,
|
|
188
171
|
),
|
|
189
|
-
HandleInput(
|
|
190
|
-
name="embedding_model",
|
|
191
|
-
display_name="Embedding Model",
|
|
192
|
-
input_types=["Embeddings"],
|
|
193
|
-
info="Specify the Embedding Model. Not required for Astra Vectorize collections.",
|
|
194
|
-
required=False,
|
|
195
|
-
show=False,
|
|
196
|
-
),
|
|
197
|
-
*LCVectorStoreComponent.inputs,
|
|
198
|
-
DropdownInput(
|
|
199
|
-
name="search_method",
|
|
200
|
-
display_name="Search Method",
|
|
201
|
-
info=(
|
|
202
|
-
"Determine how your content is matched: Vector finds semantic similarity, "
|
|
203
|
-
"and Hybrid Search (suggested) combines both approaches "
|
|
204
|
-
"with a reranker."
|
|
205
|
-
),
|
|
206
|
-
options=["Hybrid Search", "Vector Search"], # TODO: Restore Lexical Search?
|
|
207
|
-
options_metadata=[{"icon": "SearchHybrid"}, {"icon": "SearchVector"}],
|
|
208
|
-
value="Vector Search",
|
|
209
|
-
advanced=True,
|
|
210
|
-
real_time_refresh=True,
|
|
211
|
-
),
|
|
212
|
-
DropdownInput(
|
|
213
|
-
name="reranker",
|
|
214
|
-
display_name="Reranker",
|
|
215
|
-
info="Post-retrieval model that re-scores results for optimal relevance ranking.",
|
|
216
|
-
show=False,
|
|
217
|
-
toggle=True,
|
|
218
|
-
),
|
|
219
|
-
QueryInput(
|
|
220
|
-
name="lexical_terms",
|
|
221
|
-
display_name="Lexical Terms",
|
|
222
|
-
info="Add additional terms/keywords to augment search precision.",
|
|
223
|
-
placeholder="Enter terms to search...",
|
|
224
|
-
separator=" ",
|
|
225
|
-
show=False,
|
|
226
|
-
value="",
|
|
227
|
-
),
|
|
228
|
-
IntInput(
|
|
229
|
-
name="number_of_results",
|
|
230
|
-
display_name="Number of Search Results",
|
|
231
|
-
info="Number of search results to return.",
|
|
232
|
-
advanced=True,
|
|
233
|
-
value=4,
|
|
234
|
-
),
|
|
235
|
-
DropdownInput(
|
|
236
|
-
name="search_type",
|
|
237
|
-
display_name="Search Type",
|
|
238
|
-
info="Search type to use",
|
|
239
|
-
options=["Similarity", "Similarity with score threshold", "MMR (Max Marginal Relevance)"],
|
|
240
|
-
value="Similarity",
|
|
241
|
-
advanced=True,
|
|
242
|
-
),
|
|
243
|
-
FloatInput(
|
|
244
|
-
name="search_score_threshold",
|
|
245
|
-
display_name="Search Score Threshold",
|
|
246
|
-
info="Minimum similarity score threshold for search results. "
|
|
247
|
-
"(when using 'Similarity with score threshold')",
|
|
248
|
-
value=0,
|
|
249
|
-
advanced=True,
|
|
250
|
-
),
|
|
251
|
-
NestedDictInput(
|
|
252
|
-
name="advanced_search_filter",
|
|
253
|
-
display_name="Search Metadata Filter",
|
|
254
|
-
info="Optional dictionary of filters to apply to the search query.",
|
|
255
|
-
advanced=True,
|
|
256
|
-
),
|
|
257
172
|
BoolInput(
|
|
258
173
|
name="autodetect_collection",
|
|
259
174
|
display_name="Autodetect Collection",
|
|
@@ -261,75 +176,51 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
261
176
|
advanced=True,
|
|
262
177
|
value=True,
|
|
263
178
|
),
|
|
264
|
-
StrInput(
|
|
265
|
-
name="content_field",
|
|
266
|
-
display_name="Content Field",
|
|
267
|
-
info="Field to use as the text content field for the vector store.",
|
|
268
|
-
advanced=True,
|
|
269
|
-
),
|
|
270
|
-
StrInput(
|
|
271
|
-
name="deletion_field",
|
|
272
|
-
display_name="Deletion Based On Field",
|
|
273
|
-
info="When this parameter is provided, documents in the target collection with "
|
|
274
|
-
"metadata field values matching the input metadata field value will be deleted "
|
|
275
|
-
"before new data is loaded.",
|
|
276
|
-
advanced=True,
|
|
277
|
-
),
|
|
278
|
-
BoolInput(
|
|
279
|
-
name="ignore_invalid_documents",
|
|
280
|
-
display_name="Ignore Invalid Documents",
|
|
281
|
-
info="Boolean flag to determine whether to ignore invalid documents at runtime.",
|
|
282
|
-
advanced=True,
|
|
283
|
-
),
|
|
284
|
-
NestedDictInput(
|
|
285
|
-
name="astradb_vectorstore_kwargs",
|
|
286
|
-
display_name="AstraDBVectorStore Parameters",
|
|
287
|
-
info="Optional dictionary of additional parameters for the AstraDBVectorStore.",
|
|
288
|
-
advanced=True,
|
|
289
|
-
),
|
|
290
179
|
]
|
|
291
180
|
|
|
292
181
|
@classmethod
|
|
293
|
-
def
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
},
|
|
312
|
-
"prod": {
|
|
313
|
-
"Amazon Web Services": {
|
|
314
|
-
"id": "aws",
|
|
315
|
-
"regions": ["us-east-2", "ap-south-1", "eu-west-1"],
|
|
316
|
-
},
|
|
317
|
-
"Google Cloud Platform": {
|
|
318
|
-
"id": "gcp",
|
|
319
|
-
"regions": ["us-east1"],
|
|
320
|
-
},
|
|
321
|
-
"Microsoft Azure": {
|
|
322
|
-
"id": "azure",
|
|
323
|
-
"regions": ["westus3"],
|
|
324
|
-
},
|
|
325
|
-
},
|
|
182
|
+
def get_environment(cls, environment: str | None = None) -> str:
|
|
183
|
+
if not environment:
|
|
184
|
+
return "prod"
|
|
185
|
+
return environment
|
|
186
|
+
|
|
187
|
+
@classmethod
|
|
188
|
+
def map_cloud_providers(cls, token: str, environment: str | None = None) -> dict[str, dict[str, Any]]:
|
|
189
|
+
"""Fetch all available cloud providers and regions."""
|
|
190
|
+
# Get the admin object
|
|
191
|
+
client = DataAPIClient(environment=cls.get_environment(environment))
|
|
192
|
+
admin_client = client.get_admin(token=token)
|
|
193
|
+
|
|
194
|
+
# Get the list of available regions
|
|
195
|
+
available_regions = admin_client.find_available_regions(only_org_enabled_regions=True)
|
|
196
|
+
|
|
197
|
+
provider_mapping: dict[str, dict[str, str]] = {
|
|
198
|
+
"AWS": {"name": "Amazon Web Services", "id": "aws"},
|
|
199
|
+
"GCP": {"name": "Google Cloud Platform", "id": "gcp"},
|
|
200
|
+
"Azure": {"name": "Microsoft Azure", "id": "azure"},
|
|
326
201
|
}
|
|
327
202
|
|
|
203
|
+
result: dict[str, dict[str, Any]] = {}
|
|
204
|
+
for region_info in available_regions:
|
|
205
|
+
cloud_provider = region_info.cloud_provider
|
|
206
|
+
region = region_info.name
|
|
207
|
+
|
|
208
|
+
if cloud_provider in provider_mapping:
|
|
209
|
+
provider_name = provider_mapping[cloud_provider]["name"]
|
|
210
|
+
provider_id = provider_mapping[cloud_provider]["id"]
|
|
211
|
+
|
|
212
|
+
if provider_name not in result:
|
|
213
|
+
result[provider_name] = {"id": provider_id, "regions": []}
|
|
214
|
+
|
|
215
|
+
result[provider_name]["regions"].append(region)
|
|
216
|
+
|
|
217
|
+
return result
|
|
218
|
+
|
|
328
219
|
@classmethod
|
|
329
220
|
def get_vectorize_providers(cls, token: str, environment: str | None = None, api_endpoint: str | None = None):
|
|
330
221
|
try:
|
|
331
222
|
# Get the admin object
|
|
332
|
-
client = DataAPIClient(environment=environment)
|
|
223
|
+
client = DataAPIClient(environment=cls.get_environment(environment))
|
|
333
224
|
admin_client = client.get_admin()
|
|
334
225
|
db_admin = admin_client.get_database_admin(api_endpoint, token=token)
|
|
335
226
|
|
|
@@ -361,14 +252,15 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
361
252
|
environment: str | None = None,
|
|
362
253
|
keyspace: str | None = None,
|
|
363
254
|
):
|
|
364
|
-
|
|
255
|
+
# Get the environment, set to prod if null like
|
|
256
|
+
my_env = cls.get_environment(environment)
|
|
257
|
+
|
|
258
|
+
# Initialize the Data API client
|
|
259
|
+
client = DataAPIClient(environment=my_env)
|
|
365
260
|
|
|
366
261
|
# Get the admin object
|
|
367
262
|
admin_client = client.get_admin(token=token)
|
|
368
263
|
|
|
369
|
-
# Get the environment, set to prod if null like
|
|
370
|
-
my_env = environment or "prod"
|
|
371
|
-
|
|
372
264
|
# Raise a value error if name isn't provided
|
|
373
265
|
if not new_database_name:
|
|
374
266
|
msg = "Database name is required to create a new database."
|
|
@@ -377,7 +269,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
377
269
|
# Call the create database function
|
|
378
270
|
return await admin_client.async_create_database(
|
|
379
271
|
name=new_database_name,
|
|
380
|
-
cloud_provider=cls.map_cloud_providers()[
|
|
272
|
+
cloud_provider=cls.map_cloud_providers(token=token, environment=my_env)[cloud_provider]["id"],
|
|
381
273
|
region=region,
|
|
382
274
|
keyspace=keyspace,
|
|
383
275
|
wait_until_active=False,
|
|
@@ -394,11 +286,21 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
394
286
|
dimension: int | None = None,
|
|
395
287
|
embedding_generation_provider: str | None = None,
|
|
396
288
|
embedding_generation_model: str | None = None,
|
|
397
|
-
reranker: str | None = None,
|
|
398
289
|
):
|
|
399
290
|
# Build vectorize options, if needed
|
|
400
291
|
vectorize_options = None
|
|
401
292
|
if not dimension:
|
|
293
|
+
try:
|
|
294
|
+
from langchain_astradb import VectorServiceOptions
|
|
295
|
+
except ImportError as e:
|
|
296
|
+
msg = (
|
|
297
|
+
"langchain-astradb is required to create AstraDB collections with "
|
|
298
|
+
"Astra Vectorize embeddings. Please install it with "
|
|
299
|
+
"`pip install langchain-astradb`."
|
|
300
|
+
)
|
|
301
|
+
raise ImportError(msg) from e
|
|
302
|
+
|
|
303
|
+
environment = cls.get_environment(environment)
|
|
402
304
|
providers = cls.get_vectorize_providers(token=token, environment=environment, api_endpoint=api_endpoint)
|
|
403
305
|
vectorize_options = VectorServiceOptions(
|
|
404
306
|
provider=providers.get(embedding_generation_provider, [None, []])[0],
|
|
@@ -421,19 +323,11 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
421
323
|
"collection_vector_service_options": vectorize_options,
|
|
422
324
|
}
|
|
423
325
|
|
|
424
|
-
# Add optional arguments if the reranker is set
|
|
425
|
-
if reranker:
|
|
426
|
-
# Split the reranker field into a provider a model name
|
|
427
|
-
provider, _ = reranker.split("/")
|
|
428
|
-
base_args["collection_rerank"] = CollectionRerankOptions(
|
|
429
|
-
service=RerankServiceOptions(provider=provider, model_name=reranker),
|
|
430
|
-
)
|
|
431
|
-
base_args["collection_lexical"] = CollectionLexicalOptions(analyzer="STANDARD")
|
|
432
|
-
|
|
433
326
|
_AstraDBCollectionEnvironment(**base_args)
|
|
434
327
|
|
|
435
328
|
@classmethod
|
|
436
329
|
def get_database_list_static(cls, token: str, environment: str | None = None):
|
|
330
|
+
environment = cls.get_environment(environment)
|
|
437
331
|
client = DataAPIClient(environment=environment)
|
|
438
332
|
|
|
439
333
|
# Get the admin object
|
|
@@ -471,8 +365,8 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
471
365
|
"status": db.status if db.status != "ACTIVE" else None,
|
|
472
366
|
"org_id": db.org_id if db.org_id else None,
|
|
473
367
|
}
|
|
474
|
-
except Exception: # noqa: BLE001
|
|
475
|
-
|
|
368
|
+
except Exception as e: # noqa: BLE001
|
|
369
|
+
logger.debug("Failed to get metadata for database %s: %s", db.name, e)
|
|
476
370
|
|
|
477
371
|
return db_info_dict
|
|
478
372
|
|
|
@@ -503,6 +397,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
503
397
|
return None
|
|
504
398
|
|
|
505
399
|
# Grab the database object
|
|
400
|
+
environment = cls.get_environment(environment)
|
|
506
401
|
db = cls.get_database_list_static(token=token, environment=environment).get(database_name)
|
|
507
402
|
if not db:
|
|
508
403
|
return None
|
|
@@ -551,7 +446,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
551
446
|
msg = f"Error fetching database object: {e}"
|
|
552
447
|
raise ValueError(msg) from e
|
|
553
448
|
|
|
554
|
-
def collection_data(self, collection_name: str, database: Database
|
|
449
|
+
def collection_data(self, collection_name: str, database: Database = None):
|
|
555
450
|
try:
|
|
556
451
|
if not database:
|
|
557
452
|
client = DataAPIClient(environment=self.environment)
|
|
@@ -588,7 +483,7 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
588
483
|
raise ValueError(msg) from e
|
|
589
484
|
|
|
590
485
|
@classmethod
|
|
591
|
-
def get_provider_icon(cls, collection
|
|
486
|
+
def get_provider_icon(cls, collection=None, provider_name: str | None = None) -> str:
|
|
592
487
|
# Get the provider name from the collection
|
|
593
488
|
provider_name = provider_name or (
|
|
594
489
|
collection.definition.vector.service.provider
|
|
@@ -757,9 +652,13 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
757
652
|
database_options = self._initialize_database_options()
|
|
758
653
|
|
|
759
654
|
# Update cloud provider options
|
|
760
|
-
env = self.environment
|
|
761
655
|
template = build_config["database_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
|
|
762
|
-
template["02_cloud_provider"]["options"] = list(
|
|
656
|
+
template["02_cloud_provider"]["options"] = list(
|
|
657
|
+
self.map_cloud_providers(
|
|
658
|
+
token=self.token,
|
|
659
|
+
environment=self.environment,
|
|
660
|
+
).keys()
|
|
661
|
+
)
|
|
763
662
|
|
|
764
663
|
# Update database configuration
|
|
765
664
|
database_config = build_config["database_name"]
|
|
@@ -796,43 +695,12 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
796
695
|
|
|
797
696
|
return build_config
|
|
798
697
|
|
|
799
|
-
def
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
db_admin = admin_client.get_database_admin(self.get_api_endpoint(), token=self.token)
|
|
806
|
-
|
|
807
|
-
# We will try to get the reranking providers to see if its hybrid emabled
|
|
808
|
-
try:
|
|
809
|
-
providers = db_admin.find_reranking_providers()
|
|
810
|
-
build_config["reranker"]["options"] = [
|
|
811
|
-
model.name for provider_data in providers.reranking_providers.values() for model in provider_data.models
|
|
812
|
-
]
|
|
813
|
-
build_config["reranker"]["options_metadata"] = [
|
|
814
|
-
{"icon": self.get_provider_icon(provider_name=model.name.split("/")[0])}
|
|
815
|
-
for provider in providers.reranking_providers.values()
|
|
816
|
-
for model in provider.models
|
|
817
|
-
]
|
|
818
|
-
build_config["reranker"]["value"] = build_config["reranker"]["options"][0]
|
|
819
|
-
|
|
820
|
-
# Set the default search field to hybrid search
|
|
821
|
-
build_config["search_method"]["show"] = True
|
|
822
|
-
build_config["search_method"]["options"] = ["Hybrid Search", "Vector Search"]
|
|
823
|
-
build_config["search_method"]["value"] = "Hybrid Search"
|
|
824
|
-
except Exception as _: # noqa: BLE001
|
|
825
|
-
build_config["reranker"]["options"] = []
|
|
826
|
-
build_config["reranker"]["options_metadata"] = []
|
|
827
|
-
|
|
828
|
-
# Set the default search field to vector search
|
|
829
|
-
build_config["search_method"]["show"] = False
|
|
830
|
-
build_config["search_method"]["options"] = ["Vector Search"]
|
|
831
|
-
build_config["search_method"]["value"] = "Vector Search"
|
|
832
|
-
|
|
833
|
-
return build_config
|
|
834
|
-
|
|
835
|
-
async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:
|
|
698
|
+
async def update_build_config(
|
|
699
|
+
self,
|
|
700
|
+
build_config: dict,
|
|
701
|
+
field_value: str | dict,
|
|
702
|
+
field_name: str | None = None,
|
|
703
|
+
) -> dict:
|
|
836
704
|
"""Update build configuration based on field name and value."""
|
|
837
705
|
# Early return if no token provided
|
|
838
706
|
if not self.token:
|
|
@@ -877,29 +745,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
877
745
|
if field_name == "collection_name" and not isinstance(field_value, dict):
|
|
878
746
|
return self._handle_collection_selection(build_config, field_value)
|
|
879
747
|
|
|
880
|
-
# Search method selection change
|
|
881
|
-
if field_name == "search_method":
|
|
882
|
-
is_vector_search = field_value == "Vector Search"
|
|
883
|
-
is_autodetect = build_config["autodetect_collection"]["value"]
|
|
884
|
-
|
|
885
|
-
# Configure lexical terms (same for both cases)
|
|
886
|
-
build_config["lexical_terms"]["show"] = not is_vector_search
|
|
887
|
-
build_config["lexical_terms"]["value"] = "" if is_vector_search else build_config["lexical_terms"]["value"]
|
|
888
|
-
|
|
889
|
-
# Disable reranker disabling if hybrid search is selected
|
|
890
|
-
build_config["reranker"]["show"] = not is_vector_search
|
|
891
|
-
build_config["reranker"]["toggle_disable"] = not is_vector_search
|
|
892
|
-
build_config["reranker"]["toggle_value"] = True
|
|
893
|
-
build_config["reranker"]["value"] = build_config["reranker"]["options"][0]
|
|
894
|
-
|
|
895
|
-
# Toggle search type and score threshold based on search method
|
|
896
|
-
build_config["search_type"]["show"] = is_vector_search
|
|
897
|
-
build_config["search_score_threshold"]["show"] = is_vector_search
|
|
898
|
-
|
|
899
|
-
# Make sure the search_type is set to "Similarity"
|
|
900
|
-
if not is_vector_search or is_autodetect:
|
|
901
|
-
build_config["search_type"]["value"] = "Similarity"
|
|
902
|
-
|
|
903
748
|
return build_config
|
|
904
749
|
|
|
905
750
|
async def _create_new_database(self, build_config: dict, field_value: dict) -> None:
|
|
@@ -930,12 +775,14 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
930
775
|
|
|
931
776
|
def _update_cloud_regions(self, build_config: dict, field_value: dict) -> dict:
|
|
932
777
|
"""Update cloud provider regions in build config."""
|
|
933
|
-
env = self.environment
|
|
934
778
|
cloud_provider = field_value["02_cloud_provider"]
|
|
935
779
|
|
|
936
780
|
# Update the region options based on the selected cloud provider
|
|
937
781
|
template = build_config["database_name"]["dialog_inputs"]["fields"]["data"]["node"]["template"]
|
|
938
|
-
template["03_region"]["options"] = self.map_cloud_providers(
|
|
782
|
+
template["03_region"]["options"] = self.map_cloud_providers(
|
|
783
|
+
token=self.token,
|
|
784
|
+
environment=self.environment,
|
|
785
|
+
)[cloud_provider]["regions"]
|
|
939
786
|
|
|
940
787
|
# Reset the the 03_region value if it's not in the new options
|
|
941
788
|
if template["03_region"]["value"] not in template["03_region"]["options"]:
|
|
@@ -956,7 +803,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
956
803
|
dimension=field_value.get("04_dimension") if embedding_provider == "Bring your own" else None,
|
|
957
804
|
embedding_generation_provider=embedding_provider,
|
|
958
805
|
embedding_generation_model=field_value.get("03_embedding_generation_model"),
|
|
959
|
-
reranker=self.reranker,
|
|
960
806
|
)
|
|
961
807
|
except Exception as e:
|
|
962
808
|
msg = f"Error creating collection: {e}"
|
|
@@ -969,8 +815,8 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
969
815
|
"options": build_config["collection_name"]["options"] + [field_value["01_new_collection_name"]],
|
|
970
816
|
}
|
|
971
817
|
)
|
|
972
|
-
|
|
973
|
-
|
|
818
|
+
|
|
819
|
+
# Update collection metadata
|
|
974
820
|
build_config["collection_name"]["options_metadata"].append(
|
|
975
821
|
{
|
|
976
822
|
"records": 0,
|
|
@@ -980,10 +826,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
980
826
|
}
|
|
981
827
|
)
|
|
982
828
|
|
|
983
|
-
# Make sure we always show the reranker options if the collection is hybrid enabled
|
|
984
|
-
# And right now they always are
|
|
985
|
-
build_config["lexical_terms"]["show"] = True
|
|
986
|
-
|
|
987
829
|
def _handle_database_selection(self, build_config: dict, field_value: str) -> dict:
|
|
988
830
|
"""Handle database selection and update related configurations."""
|
|
989
831
|
build_config = self.reset_database_list(build_config)
|
|
@@ -1031,9 +873,6 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
1031
873
|
# Reset provider options
|
|
1032
874
|
build_config = self.reset_provider_options(build_config)
|
|
1033
875
|
|
|
1034
|
-
# Handle hybrid search options
|
|
1035
|
-
build_config = self._handle_hybrid_search_options(build_config)
|
|
1036
|
-
|
|
1037
876
|
return self.reset_collection_list(build_config)
|
|
1038
877
|
|
|
1039
878
|
def _handle_collection_selection(self, build_config: dict, field_value: str) -> dict:
|
|
@@ -1054,232 +893,4 @@ class AstraDBVectorStoreComponent(LCVectorStoreComponent):
|
|
|
1054
893
|
)
|
|
1055
894
|
build_config["autodetect_collection"]["value"] = False
|
|
1056
895
|
|
|
1057
|
-
if not field_value:
|
|
1058
|
-
return build_config
|
|
1059
|
-
|
|
1060
|
-
# Get the selected collection index
|
|
1061
|
-
index = build_config["collection_name"]["options"].index(field_value)
|
|
1062
|
-
|
|
1063
|
-
# Set the provider of the selected collection
|
|
1064
|
-
provider = build_config["collection_name"]["options_metadata"][index]["provider"]
|
|
1065
|
-
build_config["embedding_model"]["show"] = not bool(provider)
|
|
1066
|
-
build_config["embedding_model"]["required"] = not bool(provider)
|
|
1067
|
-
|
|
1068
|
-
# Grab the collection object
|
|
1069
|
-
database = self.get_database_object(api_endpoint=build_config["api_endpoint"]["value"])
|
|
1070
|
-
collection = database.get_collection(
|
|
1071
|
-
name=field_value,
|
|
1072
|
-
keyspace=build_config["keyspace"]["value"],
|
|
1073
|
-
)
|
|
1074
|
-
|
|
1075
|
-
# Check if hybrid and lexical are enabled
|
|
1076
|
-
col_options = collection.options()
|
|
1077
|
-
hyb_enabled = col_options.rerank and col_options.rerank.enabled
|
|
1078
|
-
lex_enabled = col_options.lexical and col_options.lexical.enabled
|
|
1079
|
-
user_hyb_enabled = build_config["search_method"]["value"] == "Hybrid Search"
|
|
1080
|
-
|
|
1081
|
-
# Reranker visible when both the collection supports it and the user selected Hybrid
|
|
1082
|
-
hybrid_active = bool(hyb_enabled and user_hyb_enabled)
|
|
1083
|
-
build_config["reranker"]["show"] = hybrid_active
|
|
1084
|
-
build_config["reranker"]["toggle_value"] = hybrid_active
|
|
1085
|
-
build_config["reranker"]["toggle_disable"] = False # allow user to toggle if visible
|
|
1086
|
-
|
|
1087
|
-
# If hybrid is active, lock search_type to "Similarity"
|
|
1088
|
-
if hybrid_active:
|
|
1089
|
-
build_config["search_type"]["value"] = "Similarity"
|
|
1090
|
-
|
|
1091
|
-
# Show the lexical terms option only if the collection enables lexical search
|
|
1092
|
-
build_config["lexical_terms"]["show"] = bool(lex_enabled)
|
|
1093
|
-
|
|
1094
896
|
return build_config
|
|
1095
|
-
|
|
1096
|
-
@check_cached_vector_store
|
|
1097
|
-
def build_vector_store(self):
|
|
1098
|
-
try:
|
|
1099
|
-
from langchain_astradb import AstraDBVectorStore
|
|
1100
|
-
except ImportError as e:
|
|
1101
|
-
msg = (
|
|
1102
|
-
"Could not import langchain Astra DB integration package. "
|
|
1103
|
-
"Please install it with `pip install langchain-astradb`."
|
|
1104
|
-
)
|
|
1105
|
-
raise ImportError(msg) from e
|
|
1106
|
-
|
|
1107
|
-
# Get the embedding model and additional params
|
|
1108
|
-
embedding_params = {"embedding": self.embedding_model} if self.embedding_model else {}
|
|
1109
|
-
|
|
1110
|
-
# Get the additional parameters
|
|
1111
|
-
additional_params = self.astradb_vectorstore_kwargs or {}
|
|
1112
|
-
|
|
1113
|
-
# Get Langflow version and platform information
|
|
1114
|
-
__version__ = get_version_info()["version"]
|
|
1115
|
-
langflow_prefix = ""
|
|
1116
|
-
# if os.getenv("AWS_EXECUTION_ENV") == "AWS_ECS_FARGATE": # TODO: More precise way of detecting
|
|
1117
|
-
# langflow_prefix = "ds-"
|
|
1118
|
-
|
|
1119
|
-
# Get the database object
|
|
1120
|
-
database = self.get_database_object()
|
|
1121
|
-
autodetect = self.collection_name in database.list_collection_names() and self.autodetect_collection
|
|
1122
|
-
|
|
1123
|
-
# Bundle up the auto-detect parameters
|
|
1124
|
-
autodetect_params = {
|
|
1125
|
-
"autodetect_collection": autodetect,
|
|
1126
|
-
"content_field": (
|
|
1127
|
-
self.content_field
|
|
1128
|
-
if self.content_field and embedding_params
|
|
1129
|
-
else (
|
|
1130
|
-
"page_content"
|
|
1131
|
-
if embedding_params
|
|
1132
|
-
and self.collection_data(collection_name=self.collection_name, database=database) == 0
|
|
1133
|
-
else None
|
|
1134
|
-
)
|
|
1135
|
-
),
|
|
1136
|
-
"ignore_invalid_documents": self.ignore_invalid_documents,
|
|
1137
|
-
}
|
|
1138
|
-
|
|
1139
|
-
# Choose HybridSearchMode based on the selected param
|
|
1140
|
-
hybrid_search_mode = HybridSearchMode.DEFAULT if self.search_method == "Hybrid Search" else HybridSearchMode.OFF
|
|
1141
|
-
|
|
1142
|
-
# Attempt to build the Vector Store object
|
|
1143
|
-
try:
|
|
1144
|
-
vector_store = AstraDBVectorStore(
|
|
1145
|
-
# Astra DB Authentication Parameters
|
|
1146
|
-
token=self.token,
|
|
1147
|
-
api_endpoint=database.api_endpoint,
|
|
1148
|
-
namespace=database.keyspace,
|
|
1149
|
-
collection_name=self.collection_name,
|
|
1150
|
-
environment=self.environment,
|
|
1151
|
-
# Hybrid Search Parameters
|
|
1152
|
-
hybrid_search=hybrid_search_mode,
|
|
1153
|
-
# Astra DB Usage Tracking Parameters
|
|
1154
|
-
ext_callers=[(f"{langflow_prefix}langflow", __version__)],
|
|
1155
|
-
# Astra DB Vector Store Parameters
|
|
1156
|
-
**autodetect_params,
|
|
1157
|
-
**embedding_params,
|
|
1158
|
-
**additional_params,
|
|
1159
|
-
)
|
|
1160
|
-
except Exception as e:
|
|
1161
|
-
msg = f"Error initializing AstraDBVectorStore: {e}"
|
|
1162
|
-
raise ValueError(msg) from e
|
|
1163
|
-
|
|
1164
|
-
# Add documents to the vector store
|
|
1165
|
-
self._add_documents_to_vector_store(vector_store)
|
|
1166
|
-
|
|
1167
|
-
return vector_store
|
|
1168
|
-
|
|
1169
|
-
def _add_documents_to_vector_store(self, vector_store) -> None:
|
|
1170
|
-
self.ingest_data = self._prepare_ingest_data()
|
|
1171
|
-
|
|
1172
|
-
documents = []
|
|
1173
|
-
for _input in self.ingest_data or []:
|
|
1174
|
-
if isinstance(_input, Data):
|
|
1175
|
-
documents.append(_input.to_lc_document())
|
|
1176
|
-
else:
|
|
1177
|
-
msg = "Vector Store Inputs must be Data objects."
|
|
1178
|
-
raise TypeError(msg)
|
|
1179
|
-
|
|
1180
|
-
documents = [
|
|
1181
|
-
Document(page_content=doc.page_content, metadata=serialize(doc.metadata, to_str=True)) for doc in documents
|
|
1182
|
-
]
|
|
1183
|
-
|
|
1184
|
-
if documents and self.deletion_field:
|
|
1185
|
-
self.log(f"Deleting documents where {self.deletion_field}")
|
|
1186
|
-
try:
|
|
1187
|
-
database = self.get_database_object()
|
|
1188
|
-
collection = database.get_collection(self.collection_name, keyspace=database.keyspace)
|
|
1189
|
-
delete_values = list({doc.metadata[self.deletion_field] for doc in documents})
|
|
1190
|
-
self.log(f"Deleting documents where {self.deletion_field} matches {delete_values}.")
|
|
1191
|
-
collection.delete_many({f"metadata.{self.deletion_field}": {"$in": delete_values}})
|
|
1192
|
-
except Exception as e:
|
|
1193
|
-
msg = f"Error deleting documents from AstraDBVectorStore based on '{self.deletion_field}': {e}"
|
|
1194
|
-
raise ValueError(msg) from e
|
|
1195
|
-
|
|
1196
|
-
if documents:
|
|
1197
|
-
self.log(f"Adding {len(documents)} documents to the Vector Store.")
|
|
1198
|
-
try:
|
|
1199
|
-
vector_store.add_documents(documents)
|
|
1200
|
-
except Exception as e:
|
|
1201
|
-
msg = f"Error adding documents to AstraDBVectorStore: {e}"
|
|
1202
|
-
raise ValueError(msg) from e
|
|
1203
|
-
else:
|
|
1204
|
-
self.log("No documents to add to the Vector Store.")
|
|
1205
|
-
|
|
1206
|
-
def _map_search_type(self) -> str:
|
|
1207
|
-
search_type_mapping = {
|
|
1208
|
-
"Similarity with score threshold": "similarity_score_threshold",
|
|
1209
|
-
"MMR (Max Marginal Relevance)": "mmr",
|
|
1210
|
-
}
|
|
1211
|
-
|
|
1212
|
-
return search_type_mapping.get(self.search_type, "similarity")
|
|
1213
|
-
|
|
1214
|
-
def _build_search_args(self):
|
|
1215
|
-
# Clean up the search query
|
|
1216
|
-
query = self.search_query if isinstance(self.search_query, str) and self.search_query.strip() else None
|
|
1217
|
-
lexical_terms = self.lexical_terms or None
|
|
1218
|
-
|
|
1219
|
-
# Check if we have a search query, and if so set the args
|
|
1220
|
-
if query:
|
|
1221
|
-
args = {
|
|
1222
|
-
"query": query,
|
|
1223
|
-
"search_type": self._map_search_type(),
|
|
1224
|
-
"k": self.number_of_results,
|
|
1225
|
-
"score_threshold": self.search_score_threshold,
|
|
1226
|
-
"lexical_query": lexical_terms,
|
|
1227
|
-
}
|
|
1228
|
-
elif self.advanced_search_filter:
|
|
1229
|
-
args = {
|
|
1230
|
-
"n": self.number_of_results,
|
|
1231
|
-
}
|
|
1232
|
-
else:
|
|
1233
|
-
return {}
|
|
1234
|
-
|
|
1235
|
-
filter_arg = self.advanced_search_filter or {}
|
|
1236
|
-
if filter_arg:
|
|
1237
|
-
args["filter"] = filter_arg
|
|
1238
|
-
|
|
1239
|
-
return args
|
|
1240
|
-
|
|
1241
|
-
def search_documents(self, vector_store=None) -> list[Data]:
|
|
1242
|
-
vector_store = vector_store or self.build_vector_store()
|
|
1243
|
-
|
|
1244
|
-
self.log(f"Search input: {self.search_query}")
|
|
1245
|
-
self.log(f"Search type: {self.search_type}")
|
|
1246
|
-
self.log(f"Number of results: {self.number_of_results}")
|
|
1247
|
-
self.log(f"store.hybrid_search: {vector_store.hybrid_search}")
|
|
1248
|
-
self.log(f"Lexical terms: {self.lexical_terms}")
|
|
1249
|
-
self.log(f"Reranker: {self.reranker}")
|
|
1250
|
-
|
|
1251
|
-
try:
|
|
1252
|
-
search_args = self._build_search_args()
|
|
1253
|
-
except Exception as e:
|
|
1254
|
-
msg = f"Error in AstraDBVectorStore._build_search_args: {e}"
|
|
1255
|
-
raise ValueError(msg) from e
|
|
1256
|
-
|
|
1257
|
-
if not search_args:
|
|
1258
|
-
self.log("No search input or filters provided. Skipping search.")
|
|
1259
|
-
return []
|
|
1260
|
-
|
|
1261
|
-
docs = []
|
|
1262
|
-
search_method = "search" if "query" in search_args else "metadata_search"
|
|
1263
|
-
|
|
1264
|
-
try:
|
|
1265
|
-
self.log(f"Calling vector_store.{search_method} with args: {search_args}")
|
|
1266
|
-
docs = getattr(vector_store, search_method)(**search_args)
|
|
1267
|
-
except Exception as e:
|
|
1268
|
-
msg = f"Error performing {search_method} in AstraDBVectorStore: {e}"
|
|
1269
|
-
raise ValueError(msg) from e
|
|
1270
|
-
|
|
1271
|
-
self.log(f"Retrieved documents: {len(docs)}")
|
|
1272
|
-
|
|
1273
|
-
data = docs_to_data(docs)
|
|
1274
|
-
self.log(f"Converted documents to data: {len(data)}")
|
|
1275
|
-
self.status = data
|
|
1276
|
-
|
|
1277
|
-
return data
|
|
1278
|
-
|
|
1279
|
-
def get_retriever_kwargs(self):
|
|
1280
|
-
search_args = self._build_search_args()
|
|
1281
|
-
|
|
1282
|
-
return {
|
|
1283
|
-
"search_type": self._map_search_type(),
|
|
1284
|
-
"search_kwargs": search_args,
|
|
1285
|
-
}
|