lfx-nightly 0.1.12.dev42__py3-none-any.whl → 0.2.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lfx/_assets/component_index.json +1 -1
- lfx/base/agents/agent.py +109 -29
- lfx/base/agents/events.py +102 -35
- lfx/base/agents/utils.py +15 -2
- lfx/base/composio/composio_base.py +24 -9
- lfx/base/datastax/__init__.py +5 -0
- lfx/{components/vectorstores/astradb.py → base/datastax/astradb_base.py} +84 -473
- lfx/base/io/chat.py +5 -4
- lfx/base/mcp/util.py +101 -15
- lfx/base/models/cometapi_constants.py +54 -0
- lfx/base/models/model_input_constants.py +74 -7
- lfx/base/models/ollama_constants.py +3 -0
- lfx/base/models/watsonx_constants.py +12 -0
- lfx/cli/commands.py +1 -1
- lfx/components/agents/__init__.py +3 -1
- lfx/components/agents/agent.py +47 -4
- lfx/components/agents/altk_agent.py +366 -0
- lfx/components/agents/cuga_agent.py +1 -1
- lfx/components/agents/mcp_component.py +32 -2
- lfx/components/amazon/amazon_bedrock_converse.py +1 -1
- lfx/components/apify/apify_actor.py +3 -3
- lfx/components/cometapi/__init__.py +32 -0
- lfx/components/cometapi/cometapi.py +166 -0
- lfx/components/datastax/__init__.py +12 -6
- lfx/components/datastax/{astra_assistant_manager.py → astradb_assistant_manager.py} +1 -0
- lfx/components/datastax/astradb_chatmemory.py +40 -0
- lfx/components/datastax/astradb_cql.py +5 -31
- lfx/components/datastax/astradb_graph.py +9 -123
- lfx/components/datastax/astradb_tool.py +12 -52
- lfx/components/datastax/astradb_vectorstore.py +133 -976
- lfx/components/datastax/create_assistant.py +1 -0
- lfx/components/datastax/create_thread.py +1 -0
- lfx/components/datastax/dotenv.py +1 -0
- lfx/components/datastax/get_assistant.py +1 -0
- lfx/components/datastax/getenvvar.py +1 -0
- lfx/components/datastax/graph_rag.py +1 -1
- lfx/components/datastax/list_assistants.py +1 -0
- lfx/components/datastax/run.py +1 -0
- lfx/components/docling/__init__.py +3 -0
- lfx/components/docling/docling_remote_vlm.py +284 -0
- lfx/components/helpers/memory.py +19 -4
- lfx/components/ibm/watsonx.py +25 -21
- lfx/components/input_output/chat.py +8 -0
- lfx/components/input_output/chat_output.py +8 -0
- lfx/components/knowledge_bases/ingestion.py +17 -9
- lfx/components/knowledge_bases/retrieval.py +16 -8
- lfx/components/logic/loop.py +4 -0
- lfx/components/mistral/mistral_embeddings.py +1 -1
- lfx/components/models/embedding_model.py +88 -7
- lfx/components/ollama/ollama.py +221 -14
- lfx/components/openrouter/openrouter.py +49 -147
- lfx/components/processing/parser.py +6 -1
- lfx/components/processing/structured_output.py +55 -17
- lfx/components/vectorstores/__init__.py +0 -6
- lfx/custom/custom_component/component.py +3 -2
- lfx/field_typing/constants.py +1 -0
- lfx/graph/edge/base.py +2 -2
- lfx/graph/graph/base.py +1 -1
- lfx/graph/graph/schema.py +3 -2
- lfx/graph/vertex/vertex_types.py +1 -1
- lfx/io/schema.py +6 -0
- lfx/memory/stubs.py +26 -7
- lfx/schema/message.py +6 -0
- lfx/schema/schema.py +5 -0
- lfx/services/settings/constants.py +1 -0
- {lfx_nightly-0.1.12.dev42.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/METADATA +1 -1
- {lfx_nightly-0.1.12.dev42.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/RECORD +70 -85
- lfx/components/datastax/astra_db.py +0 -77
- lfx/components/datastax/cassandra.py +0 -92
- lfx/components/vectorstores/astradb_graph.py +0 -326
- lfx/components/vectorstores/cassandra.py +0 -264
- lfx/components/vectorstores/cassandra_graph.py +0 -238
- lfx/components/vectorstores/chroma.py +0 -167
- lfx/components/vectorstores/clickhouse.py +0 -135
- lfx/components/vectorstores/couchbase.py +0 -102
- lfx/components/vectorstores/elasticsearch.py +0 -267
- lfx/components/vectorstores/faiss.py +0 -111
- lfx/components/vectorstores/graph_rag.py +0 -141
- lfx/components/vectorstores/hcd.py +0 -314
- lfx/components/vectorstores/milvus.py +0 -115
- lfx/components/vectorstores/mongodb_atlas.py +0 -213
- lfx/components/vectorstores/opensearch.py +0 -243
- lfx/components/vectorstores/pgvector.py +0 -72
- lfx/components/vectorstores/pinecone.py +0 -134
- lfx/components/vectorstores/qdrant.py +0 -109
- lfx/components/vectorstores/supabase.py +0 -76
- lfx/components/vectorstores/upstash.py +0 -124
- lfx/components/vectorstores/vectara.py +0 -97
- lfx/components/vectorstores/vectara_rag.py +0 -164
- lfx/components/vectorstores/weaviate.py +0 -89
- /lfx/components/datastax/{astra_vectorize.py → astradb_vectorize.py} +0 -0
- {lfx_nightly-0.1.12.dev42.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/WHEEL +0 -0
- {lfx_nightly-0.1.12.dev42.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/entry_points.txt +0 -0
|
@@ -1,243 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from typing import Any
|
|
3
|
-
|
|
4
|
-
from langchain_community.vectorstores import OpenSearchVectorSearch
|
|
5
|
-
|
|
6
|
-
from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
|
|
7
|
-
from lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection
|
|
8
|
-
from lfx.io import (
|
|
9
|
-
BoolInput,
|
|
10
|
-
DropdownInput,
|
|
11
|
-
FloatInput,
|
|
12
|
-
HandleInput,
|
|
13
|
-
IntInput,
|
|
14
|
-
MultilineInput,
|
|
15
|
-
SecretStrInput,
|
|
16
|
-
StrInput,
|
|
17
|
-
)
|
|
18
|
-
from lfx.schema.data import Data
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
@vector_store_connection
|
|
22
|
-
class OpenSearchVectorStoreComponent(LCVectorStoreComponent):
|
|
23
|
-
"""OpenSearch Vector Store with advanced, customizable search capabilities."""
|
|
24
|
-
|
|
25
|
-
display_name: str = "OpenSearch"
|
|
26
|
-
description: str = "OpenSearch Vector Store with advanced, customizable search capabilities."
|
|
27
|
-
name = "OpenSearch"
|
|
28
|
-
icon = "OpenSearch"
|
|
29
|
-
|
|
30
|
-
inputs = [
|
|
31
|
-
StrInput(
|
|
32
|
-
name="opensearch_url",
|
|
33
|
-
display_name="OpenSearch URL",
|
|
34
|
-
value="http://localhost:9200",
|
|
35
|
-
info="URL for OpenSearch cluster (e.g. https://192.168.1.1:9200).",
|
|
36
|
-
),
|
|
37
|
-
StrInput(
|
|
38
|
-
name="index_name",
|
|
39
|
-
display_name="Index Name",
|
|
40
|
-
value="langflow",
|
|
41
|
-
info="The index name where the vectors will be stored in OpenSearch cluster.",
|
|
42
|
-
),
|
|
43
|
-
*LCVectorStoreComponent.inputs,
|
|
44
|
-
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
|
|
45
|
-
DropdownInput(
|
|
46
|
-
name="search_type",
|
|
47
|
-
display_name="Search Type",
|
|
48
|
-
options=["similarity", "similarity_score_threshold", "mmr"],
|
|
49
|
-
value="similarity",
|
|
50
|
-
advanced=True,
|
|
51
|
-
),
|
|
52
|
-
IntInput(
|
|
53
|
-
name="number_of_results",
|
|
54
|
-
display_name="Number of Results",
|
|
55
|
-
info="Number of results to return.",
|
|
56
|
-
advanced=True,
|
|
57
|
-
value=4,
|
|
58
|
-
),
|
|
59
|
-
FloatInput(
|
|
60
|
-
name="search_score_threshold",
|
|
61
|
-
display_name="Search Score Threshold",
|
|
62
|
-
info="Minimum similarity score threshold for search results.",
|
|
63
|
-
value=0.0,
|
|
64
|
-
advanced=True,
|
|
65
|
-
),
|
|
66
|
-
StrInput(
|
|
67
|
-
name="username",
|
|
68
|
-
display_name="Username",
|
|
69
|
-
value="admin",
|
|
70
|
-
advanced=True,
|
|
71
|
-
),
|
|
72
|
-
SecretStrInput(
|
|
73
|
-
name="password",
|
|
74
|
-
display_name="Password",
|
|
75
|
-
value="admin",
|
|
76
|
-
advanced=True,
|
|
77
|
-
),
|
|
78
|
-
BoolInput(
|
|
79
|
-
name="use_ssl",
|
|
80
|
-
display_name="Use SSL",
|
|
81
|
-
value=True,
|
|
82
|
-
advanced=True,
|
|
83
|
-
),
|
|
84
|
-
BoolInput(
|
|
85
|
-
name="verify_certs",
|
|
86
|
-
display_name="Verify Certificates",
|
|
87
|
-
value=False,
|
|
88
|
-
advanced=True,
|
|
89
|
-
),
|
|
90
|
-
MultilineInput(
|
|
91
|
-
name="hybrid_search_query",
|
|
92
|
-
display_name="Hybrid Search Query",
|
|
93
|
-
value="",
|
|
94
|
-
advanced=True,
|
|
95
|
-
info=(
|
|
96
|
-
"Provide a custom hybrid search query in JSON format. This allows you to combine "
|
|
97
|
-
"vector similarity and keyword matching."
|
|
98
|
-
),
|
|
99
|
-
),
|
|
100
|
-
]
|
|
101
|
-
|
|
102
|
-
@check_cached_vector_store
|
|
103
|
-
def build_vector_store(self) -> OpenSearchVectorSearch:
|
|
104
|
-
"""Builds the OpenSearch Vector Store object."""
|
|
105
|
-
try:
|
|
106
|
-
from langchain_community.vectorstores import OpenSearchVectorSearch
|
|
107
|
-
except ImportError as e:
|
|
108
|
-
error_message = f"Failed to import required modules: {e}"
|
|
109
|
-
self.log(error_message)
|
|
110
|
-
raise ImportError(error_message) from e
|
|
111
|
-
|
|
112
|
-
try:
|
|
113
|
-
opensearch = OpenSearchVectorSearch(
|
|
114
|
-
index_name=self.index_name,
|
|
115
|
-
embedding_function=self.embedding,
|
|
116
|
-
opensearch_url=self.opensearch_url,
|
|
117
|
-
http_auth=(self.username, self.password),
|
|
118
|
-
use_ssl=self.use_ssl,
|
|
119
|
-
verify_certs=self.verify_certs,
|
|
120
|
-
ssl_assert_hostname=False,
|
|
121
|
-
ssl_show_warn=False,
|
|
122
|
-
)
|
|
123
|
-
except Exception as e:
|
|
124
|
-
error_message = f"Failed to create OpenSearchVectorSearch instance: {e}"
|
|
125
|
-
self.log(error_message)
|
|
126
|
-
raise RuntimeError(error_message) from e
|
|
127
|
-
|
|
128
|
-
if self.ingest_data:
|
|
129
|
-
self._add_documents_to_vector_store(opensearch)
|
|
130
|
-
|
|
131
|
-
return opensearch
|
|
132
|
-
|
|
133
|
-
def _add_documents_to_vector_store(self, vector_store: "OpenSearchVectorSearch") -> None:
|
|
134
|
-
"""Adds documents to the Vector Store."""
|
|
135
|
-
# Convert DataFrame to Data if needed using parent's method
|
|
136
|
-
self.ingest_data = self._prepare_ingest_data()
|
|
137
|
-
|
|
138
|
-
documents = []
|
|
139
|
-
for _input in self.ingest_data or []:
|
|
140
|
-
if isinstance(_input, Data):
|
|
141
|
-
documents.append(_input.to_lc_document())
|
|
142
|
-
else:
|
|
143
|
-
error_message = f"Expected Data object, got {type(_input)}"
|
|
144
|
-
self.log(error_message)
|
|
145
|
-
raise TypeError(error_message)
|
|
146
|
-
|
|
147
|
-
if documents and self.embedding is not None:
|
|
148
|
-
self.log(f"Adding {len(documents)} documents to the Vector Store.")
|
|
149
|
-
try:
|
|
150
|
-
vector_store.add_documents(documents)
|
|
151
|
-
except Exception as e:
|
|
152
|
-
error_message = f"Error adding documents to Vector Store: {e}"
|
|
153
|
-
self.log(error_message)
|
|
154
|
-
raise RuntimeError(error_message) from e
|
|
155
|
-
else:
|
|
156
|
-
self.log("No documents to add to the Vector Store.")
|
|
157
|
-
|
|
158
|
-
def search(self, query: str | None = None) -> list[dict[str, Any]]:
|
|
159
|
-
"""Search for similar documents in the vector store or retrieve all documents if no query is provided."""
|
|
160
|
-
try:
|
|
161
|
-
vector_store = self.build_vector_store()
|
|
162
|
-
|
|
163
|
-
query = query or ""
|
|
164
|
-
|
|
165
|
-
if self.hybrid_search_query.strip():
|
|
166
|
-
try:
|
|
167
|
-
hybrid_query = json.loads(self.hybrid_search_query)
|
|
168
|
-
except json.JSONDecodeError as e:
|
|
169
|
-
error_message = f"Invalid hybrid search query JSON: {e}"
|
|
170
|
-
self.log(error_message)
|
|
171
|
-
raise ValueError(error_message) from e
|
|
172
|
-
|
|
173
|
-
results = vector_store.client.search(index=self.index_name, body=hybrid_query)
|
|
174
|
-
|
|
175
|
-
processed_results = []
|
|
176
|
-
for hit in results.get("hits", {}).get("hits", []):
|
|
177
|
-
source = hit.get("_source", {})
|
|
178
|
-
text = source.get("text", "")
|
|
179
|
-
metadata = source.get("metadata", {})
|
|
180
|
-
|
|
181
|
-
if isinstance(text, dict):
|
|
182
|
-
text = text.get("text", "")
|
|
183
|
-
|
|
184
|
-
processed_results.append(
|
|
185
|
-
{
|
|
186
|
-
"page_content": text,
|
|
187
|
-
"metadata": metadata,
|
|
188
|
-
}
|
|
189
|
-
)
|
|
190
|
-
return processed_results
|
|
191
|
-
|
|
192
|
-
search_kwargs = {"k": self.number_of_results}
|
|
193
|
-
search_type = self.search_type.lower()
|
|
194
|
-
|
|
195
|
-
if search_type == "similarity":
|
|
196
|
-
results = vector_store.similarity_search(query, **search_kwargs)
|
|
197
|
-
return [{"page_content": doc.page_content, "metadata": doc.metadata} for doc in results]
|
|
198
|
-
if search_type == "similarity_score_threshold":
|
|
199
|
-
search_kwargs["score_threshold"] = self.search_score_threshold
|
|
200
|
-
results = vector_store.similarity_search_with_relevance_scores(query, **search_kwargs)
|
|
201
|
-
return [
|
|
202
|
-
{
|
|
203
|
-
"page_content": doc.page_content,
|
|
204
|
-
"metadata": doc.metadata,
|
|
205
|
-
"score": score,
|
|
206
|
-
}
|
|
207
|
-
for doc, score in results
|
|
208
|
-
]
|
|
209
|
-
if search_type == "mmr":
|
|
210
|
-
results = vector_store.max_marginal_relevance_search(query, **search_kwargs)
|
|
211
|
-
return [{"page_content": doc.page_content, "metadata": doc.metadata} for doc in results]
|
|
212
|
-
|
|
213
|
-
except Exception as e:
|
|
214
|
-
error_message = f"Error during search: {e}"
|
|
215
|
-
self.log(error_message)
|
|
216
|
-
raise RuntimeError(error_message) from e
|
|
217
|
-
|
|
218
|
-
error_message = f"Error during search. Invalid search type: {self.search_type}"
|
|
219
|
-
self.log(error_message)
|
|
220
|
-
raise ValueError(error_message)
|
|
221
|
-
|
|
222
|
-
def search_documents(self) -> list[Data]:
|
|
223
|
-
"""Search for documents in the vector store based on the search input.
|
|
224
|
-
|
|
225
|
-
If no search input is provided, retrieve all documents.
|
|
226
|
-
"""
|
|
227
|
-
try:
|
|
228
|
-
query = self.search_query.strip() if self.search_query else None
|
|
229
|
-
results = self.search(query)
|
|
230
|
-
retrieved_data = [
|
|
231
|
-
Data(
|
|
232
|
-
file_path=result["metadata"].get("file_path", ""),
|
|
233
|
-
text=result["page_content"],
|
|
234
|
-
)
|
|
235
|
-
for result in results
|
|
236
|
-
]
|
|
237
|
-
except Exception as e:
|
|
238
|
-
error_message = f"Error during document search: {e}"
|
|
239
|
-
self.log(error_message)
|
|
240
|
-
raise RuntimeError(error_message) from e
|
|
241
|
-
|
|
242
|
-
self.status = retrieved_data
|
|
243
|
-
return retrieved_data
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
from langchain_community.vectorstores import PGVector
|
|
2
|
-
|
|
3
|
-
from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
|
|
4
|
-
from lfx.helpers.data import docs_to_data
|
|
5
|
-
from lfx.io import HandleInput, IntInput, SecretStrInput, StrInput
|
|
6
|
-
from lfx.schema.data import Data
|
|
7
|
-
from lfx.utils.connection_string_parser import transform_connection_string
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class PGVectorStoreComponent(LCVectorStoreComponent):
|
|
11
|
-
display_name = "PGVector"
|
|
12
|
-
description = "PGVector Vector Store with search capabilities"
|
|
13
|
-
name = "pgvector"
|
|
14
|
-
icon = "cpu"
|
|
15
|
-
|
|
16
|
-
inputs = [
|
|
17
|
-
SecretStrInput(name="pg_server_url", display_name="PostgreSQL Server Connection String", required=True),
|
|
18
|
-
StrInput(name="collection_name", display_name="Table", required=True),
|
|
19
|
-
*LCVectorStoreComponent.inputs,
|
|
20
|
-
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"], required=True),
|
|
21
|
-
IntInput(
|
|
22
|
-
name="number_of_results",
|
|
23
|
-
display_name="Number of Results",
|
|
24
|
-
info="Number of results to return.",
|
|
25
|
-
value=4,
|
|
26
|
-
advanced=True,
|
|
27
|
-
),
|
|
28
|
-
]
|
|
29
|
-
|
|
30
|
-
@check_cached_vector_store
|
|
31
|
-
def build_vector_store(self) -> PGVector:
|
|
32
|
-
# Convert DataFrame to Data if needed using parent's method
|
|
33
|
-
self.ingest_data = self._prepare_ingest_data()
|
|
34
|
-
|
|
35
|
-
documents = []
|
|
36
|
-
for _input in self.ingest_data or []:
|
|
37
|
-
if isinstance(_input, Data):
|
|
38
|
-
documents.append(_input.to_lc_document())
|
|
39
|
-
else:
|
|
40
|
-
documents.append(_input)
|
|
41
|
-
|
|
42
|
-
connection_string_parsed = transform_connection_string(self.pg_server_url)
|
|
43
|
-
|
|
44
|
-
if documents:
|
|
45
|
-
pgvector = PGVector.from_documents(
|
|
46
|
-
embedding=self.embedding,
|
|
47
|
-
documents=documents,
|
|
48
|
-
collection_name=self.collection_name,
|
|
49
|
-
connection_string=connection_string_parsed,
|
|
50
|
-
)
|
|
51
|
-
else:
|
|
52
|
-
pgvector = PGVector.from_existing_index(
|
|
53
|
-
embedding=self.embedding,
|
|
54
|
-
collection_name=self.collection_name,
|
|
55
|
-
connection_string=connection_string_parsed,
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
return pgvector
|
|
59
|
-
|
|
60
|
-
def search_documents(self) -> list[Data]:
|
|
61
|
-
vector_store = self.build_vector_store()
|
|
62
|
-
|
|
63
|
-
if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
|
|
64
|
-
docs = vector_store.similarity_search(
|
|
65
|
-
query=self.search_query,
|
|
66
|
-
k=self.number_of_results,
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
data = docs_to_data(docs)
|
|
70
|
-
self.status = data
|
|
71
|
-
return data
|
|
72
|
-
return []
|
|
@@ -1,134 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from langchain_core.vectorstores import VectorStore
|
|
3
|
-
|
|
4
|
-
from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
|
|
5
|
-
from lfx.helpers.data import docs_to_data
|
|
6
|
-
from lfx.io import DropdownInput, HandleInput, IntInput, SecretStrInput, StrInput
|
|
7
|
-
from lfx.schema.data import Data
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class PineconeVectorStoreComponent(LCVectorStoreComponent):
|
|
11
|
-
display_name = "Pinecone"
|
|
12
|
-
description = "Pinecone Vector Store with search capabilities"
|
|
13
|
-
name = "Pinecone"
|
|
14
|
-
icon = "Pinecone"
|
|
15
|
-
inputs = [
|
|
16
|
-
StrInput(name="index_name", display_name="Index Name", required=True),
|
|
17
|
-
StrInput(name="namespace", display_name="Namespace", info="Namespace for the index."),
|
|
18
|
-
DropdownInput(
|
|
19
|
-
name="distance_strategy",
|
|
20
|
-
display_name="Distance Strategy",
|
|
21
|
-
options=["Cosine", "Euclidean", "Dot Product"],
|
|
22
|
-
value="Cosine",
|
|
23
|
-
advanced=True,
|
|
24
|
-
),
|
|
25
|
-
SecretStrInput(name="pinecone_api_key", display_name="Pinecone API Key", required=True),
|
|
26
|
-
StrInput(
|
|
27
|
-
name="text_key",
|
|
28
|
-
display_name="Text Key",
|
|
29
|
-
info="Key in the record to use as text.",
|
|
30
|
-
value="text",
|
|
31
|
-
advanced=True,
|
|
32
|
-
),
|
|
33
|
-
*LCVectorStoreComponent.inputs,
|
|
34
|
-
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
|
|
35
|
-
IntInput(
|
|
36
|
-
name="number_of_results",
|
|
37
|
-
display_name="Number of Results",
|
|
38
|
-
info="Number of results to return.",
|
|
39
|
-
value=4,
|
|
40
|
-
advanced=True,
|
|
41
|
-
),
|
|
42
|
-
]
|
|
43
|
-
|
|
44
|
-
@check_cached_vector_store
|
|
45
|
-
def build_vector_store(self) -> VectorStore:
|
|
46
|
-
"""Build and return a Pinecone vector store instance."""
|
|
47
|
-
try:
|
|
48
|
-
from langchain_pinecone import PineconeVectorStore
|
|
49
|
-
except ImportError as e:
|
|
50
|
-
msg = "langchain-pinecone is not installed. Please install it with `pip install langchain-pinecone`."
|
|
51
|
-
raise ValueError(msg) from e
|
|
52
|
-
|
|
53
|
-
try:
|
|
54
|
-
from langchain_pinecone._utilities import DistanceStrategy
|
|
55
|
-
|
|
56
|
-
# Wrap the embedding model to ensure float32 output
|
|
57
|
-
wrapped_embeddings = Float32Embeddings(self.embedding)
|
|
58
|
-
|
|
59
|
-
# Convert distance strategy
|
|
60
|
-
distance_strategy = self.distance_strategy.replace(" ", "_").upper()
|
|
61
|
-
distance_strategy = DistanceStrategy[distance_strategy]
|
|
62
|
-
|
|
63
|
-
# Initialize Pinecone instance with wrapped embeddings
|
|
64
|
-
pinecone = PineconeVectorStore(
|
|
65
|
-
index_name=self.index_name,
|
|
66
|
-
embedding=wrapped_embeddings, # Use wrapped embeddings
|
|
67
|
-
text_key=self.text_key,
|
|
68
|
-
namespace=self.namespace,
|
|
69
|
-
distance_strategy=distance_strategy,
|
|
70
|
-
pinecone_api_key=self.pinecone_api_key,
|
|
71
|
-
)
|
|
72
|
-
except Exception as e:
|
|
73
|
-
error_msg = "Error building Pinecone vector store"
|
|
74
|
-
raise ValueError(error_msg) from e
|
|
75
|
-
else:
|
|
76
|
-
self.ingest_data = self._prepare_ingest_data()
|
|
77
|
-
|
|
78
|
-
# Process documents if any
|
|
79
|
-
documents = []
|
|
80
|
-
if self.ingest_data:
|
|
81
|
-
# Convert DataFrame to Data if needed using parent's method
|
|
82
|
-
|
|
83
|
-
for doc in self.ingest_data:
|
|
84
|
-
if isinstance(doc, Data):
|
|
85
|
-
documents.append(doc.to_lc_document())
|
|
86
|
-
else:
|
|
87
|
-
documents.append(doc)
|
|
88
|
-
|
|
89
|
-
if documents:
|
|
90
|
-
pinecone.add_documents(documents)
|
|
91
|
-
|
|
92
|
-
return pinecone
|
|
93
|
-
|
|
94
|
-
def search_documents(self) -> list[Data]:
|
|
95
|
-
"""Search documents in the vector store."""
|
|
96
|
-
try:
|
|
97
|
-
if not self.search_query or not isinstance(self.search_query, str) or not self.search_query.strip():
|
|
98
|
-
return []
|
|
99
|
-
|
|
100
|
-
vector_store = self.build_vector_store()
|
|
101
|
-
docs = vector_store.similarity_search(
|
|
102
|
-
query=self.search_query,
|
|
103
|
-
k=self.number_of_results,
|
|
104
|
-
)
|
|
105
|
-
except Exception as e:
|
|
106
|
-
error_msg = "Error searching documents"
|
|
107
|
-
raise ValueError(error_msg) from e
|
|
108
|
-
else:
|
|
109
|
-
data = docs_to_data(docs)
|
|
110
|
-
self.status = data
|
|
111
|
-
return data
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
class Float32Embeddings:
|
|
115
|
-
"""Wrapper class to ensure float32 embeddings."""
|
|
116
|
-
|
|
117
|
-
def __init__(self, base_embeddings):
|
|
118
|
-
self.base_embeddings = base_embeddings
|
|
119
|
-
|
|
120
|
-
def embed_documents(self, texts):
|
|
121
|
-
embeddings = self.base_embeddings.embed_documents(texts)
|
|
122
|
-
if isinstance(embeddings, np.ndarray):
|
|
123
|
-
return [[self._force_float32(x) for x in vec] for vec in embeddings]
|
|
124
|
-
return [[self._force_float32(x) for x in vec] for vec in embeddings]
|
|
125
|
-
|
|
126
|
-
def embed_query(self, text):
|
|
127
|
-
embedding = self.base_embeddings.embed_query(text)
|
|
128
|
-
if isinstance(embedding, np.ndarray):
|
|
129
|
-
return [self._force_float32(x) for x in embedding]
|
|
130
|
-
return [self._force_float32(x) for x in embedding]
|
|
131
|
-
|
|
132
|
-
def _force_float32(self, value):
|
|
133
|
-
"""Convert any numeric type to Python float."""
|
|
134
|
-
return float(np.float32(value))
|
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
from langchain.embeddings.base import Embeddings
|
|
2
|
-
from langchain_community.vectorstores import Qdrant
|
|
3
|
-
|
|
4
|
-
from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
|
|
5
|
-
from lfx.helpers.data import docs_to_data
|
|
6
|
-
from lfx.io import (
|
|
7
|
-
DropdownInput,
|
|
8
|
-
HandleInput,
|
|
9
|
-
IntInput,
|
|
10
|
-
SecretStrInput,
|
|
11
|
-
StrInput,
|
|
12
|
-
)
|
|
13
|
-
from lfx.schema.data import Data
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class QdrantVectorStoreComponent(LCVectorStoreComponent):
|
|
17
|
-
display_name = "Qdrant"
|
|
18
|
-
description = "Qdrant Vector Store with search capabilities"
|
|
19
|
-
icon = "Qdrant"
|
|
20
|
-
|
|
21
|
-
inputs = [
|
|
22
|
-
StrInput(name="collection_name", display_name="Collection Name", required=True),
|
|
23
|
-
StrInput(name="host", display_name="Host", value="localhost", advanced=True),
|
|
24
|
-
IntInput(name="port", display_name="Port", value=6333, advanced=True),
|
|
25
|
-
IntInput(name="grpc_port", display_name="gRPC Port", value=6334, advanced=True),
|
|
26
|
-
SecretStrInput(name="api_key", display_name="API Key", advanced=True),
|
|
27
|
-
StrInput(name="prefix", display_name="Prefix", advanced=True),
|
|
28
|
-
IntInput(name="timeout", display_name="Timeout", advanced=True),
|
|
29
|
-
StrInput(name="path", display_name="Path", advanced=True),
|
|
30
|
-
StrInput(name="url", display_name="URL", advanced=True),
|
|
31
|
-
DropdownInput(
|
|
32
|
-
name="distance_func",
|
|
33
|
-
display_name="Distance Function",
|
|
34
|
-
options=["Cosine", "Euclidean", "Dot Product"],
|
|
35
|
-
value="Cosine",
|
|
36
|
-
advanced=True,
|
|
37
|
-
),
|
|
38
|
-
StrInput(name="content_payload_key", display_name="Content Payload Key", value="page_content", advanced=True),
|
|
39
|
-
StrInput(name="metadata_payload_key", display_name="Metadata Payload Key", value="metadata", advanced=True),
|
|
40
|
-
*LCVectorStoreComponent.inputs,
|
|
41
|
-
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
|
|
42
|
-
IntInput(
|
|
43
|
-
name="number_of_results",
|
|
44
|
-
display_name="Number of Results",
|
|
45
|
-
info="Number of results to return.",
|
|
46
|
-
value=4,
|
|
47
|
-
advanced=True,
|
|
48
|
-
),
|
|
49
|
-
]
|
|
50
|
-
|
|
51
|
-
@check_cached_vector_store
|
|
52
|
-
def build_vector_store(self) -> Qdrant:
|
|
53
|
-
qdrant_kwargs = {
|
|
54
|
-
"collection_name": self.collection_name,
|
|
55
|
-
"content_payload_key": self.content_payload_key,
|
|
56
|
-
"metadata_payload_key": self.metadata_payload_key,
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
server_kwargs = {
|
|
60
|
-
"host": self.host or None,
|
|
61
|
-
"port": int(self.port), # Ensure port is an integer
|
|
62
|
-
"grpc_port": int(self.grpc_port), # Ensure grpc_port is an integer
|
|
63
|
-
"api_key": self.api_key,
|
|
64
|
-
"prefix": self.prefix,
|
|
65
|
-
# Ensure timeout is an integer
|
|
66
|
-
"timeout": int(self.timeout) if self.timeout else None,
|
|
67
|
-
"path": self.path or None,
|
|
68
|
-
"url": self.url or None,
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
server_kwargs = {k: v for k, v in server_kwargs.items() if v is not None}
|
|
72
|
-
|
|
73
|
-
# Convert DataFrame to Data if needed using parent's method
|
|
74
|
-
self.ingest_data = self._prepare_ingest_data()
|
|
75
|
-
|
|
76
|
-
documents = []
|
|
77
|
-
for _input in self.ingest_data or []:
|
|
78
|
-
if isinstance(_input, Data):
|
|
79
|
-
documents.append(_input.to_lc_document())
|
|
80
|
-
else:
|
|
81
|
-
documents.append(_input)
|
|
82
|
-
|
|
83
|
-
if not isinstance(self.embedding, Embeddings):
|
|
84
|
-
msg = "Invalid embedding object"
|
|
85
|
-
raise TypeError(msg)
|
|
86
|
-
|
|
87
|
-
if documents:
|
|
88
|
-
qdrant = Qdrant.from_documents(documents, embedding=self.embedding, **qdrant_kwargs, **server_kwargs)
|
|
89
|
-
else:
|
|
90
|
-
from qdrant_client import QdrantClient
|
|
91
|
-
|
|
92
|
-
client = QdrantClient(**server_kwargs)
|
|
93
|
-
qdrant = Qdrant(embeddings=self.embedding, client=client, **qdrant_kwargs)
|
|
94
|
-
|
|
95
|
-
return qdrant
|
|
96
|
-
|
|
97
|
-
def search_documents(self) -> list[Data]:
|
|
98
|
-
vector_store = self.build_vector_store()
|
|
99
|
-
|
|
100
|
-
if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
|
|
101
|
-
docs = vector_store.similarity_search(
|
|
102
|
-
query=self.search_query,
|
|
103
|
-
k=self.number_of_results,
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
data = docs_to_data(docs)
|
|
107
|
-
self.status = data
|
|
108
|
-
return data
|
|
109
|
-
return []
|
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
from langchain_community.vectorstores import SupabaseVectorStore
|
|
2
|
-
from supabase.client import Client, create_client
|
|
3
|
-
|
|
4
|
-
from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
|
|
5
|
-
from lfx.helpers.data import docs_to_data
|
|
6
|
-
from lfx.io import HandleInput, IntInput, SecretStrInput, StrInput
|
|
7
|
-
from lfx.schema.data import Data
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class SupabaseVectorStoreComponent(LCVectorStoreComponent):
|
|
11
|
-
display_name = "Supabase"
|
|
12
|
-
description = "Supabase Vector Store with search capabilities"
|
|
13
|
-
name = "SupabaseVectorStore"
|
|
14
|
-
icon = "Supabase"
|
|
15
|
-
|
|
16
|
-
inputs = [
|
|
17
|
-
StrInput(name="supabase_url", display_name="Supabase URL", required=True),
|
|
18
|
-
SecretStrInput(name="supabase_service_key", display_name="Supabase Service Key", required=True),
|
|
19
|
-
StrInput(name="table_name", display_name="Table Name", advanced=True),
|
|
20
|
-
StrInput(name="query_name", display_name="Query Name"),
|
|
21
|
-
*LCVectorStoreComponent.inputs,
|
|
22
|
-
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
|
|
23
|
-
IntInput(
|
|
24
|
-
name="number_of_results",
|
|
25
|
-
display_name="Number of Results",
|
|
26
|
-
info="Number of results to return.",
|
|
27
|
-
value=4,
|
|
28
|
-
advanced=True,
|
|
29
|
-
),
|
|
30
|
-
]
|
|
31
|
-
|
|
32
|
-
@check_cached_vector_store
|
|
33
|
-
def build_vector_store(self) -> SupabaseVectorStore:
|
|
34
|
-
supabase: Client = create_client(self.supabase_url, supabase_key=self.supabase_service_key)
|
|
35
|
-
|
|
36
|
-
# Convert DataFrame to Data if needed using parent's method
|
|
37
|
-
self.ingest_data = self._prepare_ingest_data()
|
|
38
|
-
|
|
39
|
-
documents = []
|
|
40
|
-
for _input in self.ingest_data or []:
|
|
41
|
-
if isinstance(_input, Data):
|
|
42
|
-
documents.append(_input.to_lc_document())
|
|
43
|
-
else:
|
|
44
|
-
documents.append(_input)
|
|
45
|
-
|
|
46
|
-
if documents:
|
|
47
|
-
supabase_vs = SupabaseVectorStore.from_documents(
|
|
48
|
-
documents=documents,
|
|
49
|
-
embedding=self.embedding,
|
|
50
|
-
query_name=self.query_name,
|
|
51
|
-
client=supabase,
|
|
52
|
-
table_name=self.table_name,
|
|
53
|
-
)
|
|
54
|
-
else:
|
|
55
|
-
supabase_vs = SupabaseVectorStore(
|
|
56
|
-
client=supabase,
|
|
57
|
-
embedding=self.embedding,
|
|
58
|
-
table_name=self.table_name,
|
|
59
|
-
query_name=self.query_name,
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
return supabase_vs
|
|
63
|
-
|
|
64
|
-
def search_documents(self) -> list[Data]:
|
|
65
|
-
vector_store = self.build_vector_store()
|
|
66
|
-
|
|
67
|
-
if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
|
|
68
|
-
docs = vector_store.similarity_search(
|
|
69
|
-
query=self.search_query,
|
|
70
|
-
k=self.number_of_results,
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
data = docs_to_data(docs)
|
|
74
|
-
self.status = data
|
|
75
|
-
return data
|
|
76
|
-
return []
|