lfx-nightly 0.1.12.dev42__py3-none-any.whl → 0.2.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. lfx/_assets/component_index.json +1 -1
  2. lfx/base/agents/agent.py +109 -29
  3. lfx/base/agents/events.py +102 -35
  4. lfx/base/agents/utils.py +15 -2
  5. lfx/base/composio/composio_base.py +24 -9
  6. lfx/base/datastax/__init__.py +5 -0
  7. lfx/{components/vectorstores/astradb.py → base/datastax/astradb_base.py} +84 -473
  8. lfx/base/io/chat.py +5 -4
  9. lfx/base/mcp/util.py +101 -15
  10. lfx/base/models/cometapi_constants.py +54 -0
  11. lfx/base/models/model_input_constants.py +74 -7
  12. lfx/base/models/ollama_constants.py +3 -0
  13. lfx/base/models/watsonx_constants.py +12 -0
  14. lfx/cli/commands.py +1 -1
  15. lfx/components/agents/__init__.py +3 -1
  16. lfx/components/agents/agent.py +47 -4
  17. lfx/components/agents/altk_agent.py +366 -0
  18. lfx/components/agents/cuga_agent.py +1 -1
  19. lfx/components/agents/mcp_component.py +32 -2
  20. lfx/components/amazon/amazon_bedrock_converse.py +1 -1
  21. lfx/components/apify/apify_actor.py +3 -3
  22. lfx/components/cometapi/__init__.py +32 -0
  23. lfx/components/cometapi/cometapi.py +166 -0
  24. lfx/components/datastax/__init__.py +12 -6
  25. lfx/components/datastax/{astra_assistant_manager.py → astradb_assistant_manager.py} +1 -0
  26. lfx/components/datastax/astradb_chatmemory.py +40 -0
  27. lfx/components/datastax/astradb_cql.py +5 -31
  28. lfx/components/datastax/astradb_graph.py +9 -123
  29. lfx/components/datastax/astradb_tool.py +12 -52
  30. lfx/components/datastax/astradb_vectorstore.py +133 -976
  31. lfx/components/datastax/create_assistant.py +1 -0
  32. lfx/components/datastax/create_thread.py +1 -0
  33. lfx/components/datastax/dotenv.py +1 -0
  34. lfx/components/datastax/get_assistant.py +1 -0
  35. lfx/components/datastax/getenvvar.py +1 -0
  36. lfx/components/datastax/graph_rag.py +1 -1
  37. lfx/components/datastax/list_assistants.py +1 -0
  38. lfx/components/datastax/run.py +1 -0
  39. lfx/components/docling/__init__.py +3 -0
  40. lfx/components/docling/docling_remote_vlm.py +284 -0
  41. lfx/components/helpers/memory.py +19 -4
  42. lfx/components/ibm/watsonx.py +25 -21
  43. lfx/components/input_output/chat.py +8 -0
  44. lfx/components/input_output/chat_output.py +8 -0
  45. lfx/components/knowledge_bases/ingestion.py +17 -9
  46. lfx/components/knowledge_bases/retrieval.py +16 -8
  47. lfx/components/logic/loop.py +4 -0
  48. lfx/components/mistral/mistral_embeddings.py +1 -1
  49. lfx/components/models/embedding_model.py +88 -7
  50. lfx/components/ollama/ollama.py +221 -14
  51. lfx/components/openrouter/openrouter.py +49 -147
  52. lfx/components/processing/parser.py +6 -1
  53. lfx/components/processing/structured_output.py +55 -17
  54. lfx/components/vectorstores/__init__.py +0 -6
  55. lfx/custom/custom_component/component.py +3 -2
  56. lfx/field_typing/constants.py +1 -0
  57. lfx/graph/edge/base.py +2 -2
  58. lfx/graph/graph/base.py +1 -1
  59. lfx/graph/graph/schema.py +3 -2
  60. lfx/graph/vertex/vertex_types.py +1 -1
  61. lfx/io/schema.py +6 -0
  62. lfx/memory/stubs.py +26 -7
  63. lfx/schema/message.py +6 -0
  64. lfx/schema/schema.py +5 -0
  65. lfx/services/settings/constants.py +1 -0
  66. {lfx_nightly-0.1.12.dev42.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/METADATA +1 -1
  67. {lfx_nightly-0.1.12.dev42.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/RECORD +70 -85
  68. lfx/components/datastax/astra_db.py +0 -77
  69. lfx/components/datastax/cassandra.py +0 -92
  70. lfx/components/vectorstores/astradb_graph.py +0 -326
  71. lfx/components/vectorstores/cassandra.py +0 -264
  72. lfx/components/vectorstores/cassandra_graph.py +0 -238
  73. lfx/components/vectorstores/chroma.py +0 -167
  74. lfx/components/vectorstores/clickhouse.py +0 -135
  75. lfx/components/vectorstores/couchbase.py +0 -102
  76. lfx/components/vectorstores/elasticsearch.py +0 -267
  77. lfx/components/vectorstores/faiss.py +0 -111
  78. lfx/components/vectorstores/graph_rag.py +0 -141
  79. lfx/components/vectorstores/hcd.py +0 -314
  80. lfx/components/vectorstores/milvus.py +0 -115
  81. lfx/components/vectorstores/mongodb_atlas.py +0 -213
  82. lfx/components/vectorstores/opensearch.py +0 -243
  83. lfx/components/vectorstores/pgvector.py +0 -72
  84. lfx/components/vectorstores/pinecone.py +0 -134
  85. lfx/components/vectorstores/qdrant.py +0 -109
  86. lfx/components/vectorstores/supabase.py +0 -76
  87. lfx/components/vectorstores/upstash.py +0 -124
  88. lfx/components/vectorstores/vectara.py +0 -97
  89. lfx/components/vectorstores/vectara_rag.py +0 -164
  90. lfx/components/vectorstores/weaviate.py +0 -89
  91. /lfx/components/datastax/{astra_vectorize.py → astradb_vectorize.py} +0 -0
  92. {lfx_nightly-0.1.12.dev42.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/WHEEL +0 -0
  93. {lfx_nightly-0.1.12.dev42.dist-info → lfx_nightly-0.2.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -1,314 +0,0 @@
1
- from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
2
- from lfx.helpers.data import docs_to_data
3
- from lfx.inputs.inputs import DictInput, FloatInput
4
- from lfx.io import (
5
- BoolInput,
6
- DropdownInput,
7
- HandleInput,
8
- IntInput,
9
- MultilineInput,
10
- SecretStrInput,
11
- StrInput,
12
- )
13
- from lfx.schema.data import Data
14
-
15
-
16
- class HCDVectorStoreComponent(LCVectorStoreComponent):
17
- display_name: str = "Hyper-Converged Database"
18
- description: str = "Implementation of Vector Store using Hyper-Converged Database (HCD) with search capabilities"
19
- name = "HCD"
20
- icon: str = "HCD"
21
-
22
- inputs = [
23
- StrInput(
24
- name="collection_name",
25
- display_name="Collection Name",
26
- info="The name of the collection within HCD where the vectors will be stored.",
27
- required=True,
28
- ),
29
- StrInput(
30
- name="username",
31
- display_name="HCD Username",
32
- info="Authentication username for accessing HCD.",
33
- value="hcd-superuser",
34
- required=True,
35
- ),
36
- SecretStrInput(
37
- name="password",
38
- display_name="HCD Password",
39
- info="Authentication password for accessing HCD.",
40
- value="HCD_PASSWORD",
41
- required=True,
42
- ),
43
- SecretStrInput(
44
- name="api_endpoint",
45
- display_name="HCD API Endpoint",
46
- info="API endpoint URL for the HCD service.",
47
- value="HCD_API_ENDPOINT",
48
- required=True,
49
- ),
50
- *LCVectorStoreComponent.inputs,
51
- StrInput(
52
- name="namespace",
53
- display_name="Namespace",
54
- info="Optional namespace within HCD to use for the collection.",
55
- value="default_namespace",
56
- advanced=True,
57
- ),
58
- MultilineInput(
59
- name="ca_certificate",
60
- display_name="CA Certificate",
61
- info="Optional CA certificate for TLS connections to HCD.",
62
- advanced=True,
63
- ),
64
- DropdownInput(
65
- name="metric",
66
- display_name="Metric",
67
- info="Optional distance metric for vector comparisons in the vector store.",
68
- options=["cosine", "dot_product", "euclidean"],
69
- advanced=True,
70
- ),
71
- IntInput(
72
- name="batch_size",
73
- display_name="Batch Size",
74
- info="Optional number of data to process in a single batch.",
75
- advanced=True,
76
- ),
77
- IntInput(
78
- name="bulk_insert_batch_concurrency",
79
- display_name="Bulk Insert Batch Concurrency",
80
- info="Optional concurrency level for bulk insert operations.",
81
- advanced=True,
82
- ),
83
- IntInput(
84
- name="bulk_insert_overwrite_concurrency",
85
- display_name="Bulk Insert Overwrite Concurrency",
86
- info="Optional concurrency level for bulk insert operations that overwrite existing data.",
87
- advanced=True,
88
- ),
89
- IntInput(
90
- name="bulk_delete_concurrency",
91
- display_name="Bulk Delete Concurrency",
92
- info="Optional concurrency level for bulk delete operations.",
93
- advanced=True,
94
- ),
95
- DropdownInput(
96
- name="setup_mode",
97
- display_name="Setup Mode",
98
- info="Configuration mode for setting up the vector store, with options like 'Sync', 'Async', or 'Off'.",
99
- options=["Sync", "Async", "Off"],
100
- advanced=True,
101
- value="Sync",
102
- ),
103
- BoolInput(
104
- name="pre_delete_collection",
105
- display_name="Pre Delete Collection",
106
- info="Boolean flag to determine whether to delete the collection before creating a new one.",
107
- advanced=True,
108
- ),
109
- StrInput(
110
- name="metadata_indexing_include",
111
- display_name="Metadata Indexing Include",
112
- info="Optional list of metadata fields to include in the indexing.",
113
- advanced=True,
114
- ),
115
- HandleInput(
116
- name="embedding",
117
- display_name="Embedding or Astra Vectorize",
118
- input_types=["Embeddings", "dict"],
119
- # TODO: This should be optional, but need to refactor langchain-astradb first.
120
- info="Allows either an embedding model or an Astra Vectorize configuration.",
121
- ),
122
- StrInput(
123
- name="metadata_indexing_exclude",
124
- display_name="Metadata Indexing Exclude",
125
- info="Optional list of metadata fields to exclude from the indexing.",
126
- advanced=True,
127
- ),
128
- StrInput(
129
- name="collection_indexing_policy",
130
- display_name="Collection Indexing Policy",
131
- info="Optional dictionary defining the indexing policy for the collection.",
132
- advanced=True,
133
- ),
134
- IntInput(
135
- name="number_of_results",
136
- display_name="Number of Results",
137
- info="Number of results to return.",
138
- advanced=True,
139
- value=4,
140
- ),
141
- DropdownInput(
142
- name="search_type",
143
- display_name="Search Type",
144
- info="Search type to use",
145
- options=["Similarity", "Similarity with score threshold", "MMR (Max Marginal Relevance)"],
146
- value="Similarity",
147
- advanced=True,
148
- ),
149
- FloatInput(
150
- name="search_score_threshold",
151
- display_name="Search Score Threshold",
152
- info="Minimum similarity score threshold for search results. "
153
- "(when using 'Similarity with score threshold')",
154
- value=0,
155
- advanced=True,
156
- ),
157
- DictInput(
158
- name="search_filter",
159
- display_name="Search Metadata Filter",
160
- info="Optional dictionary of filters to apply to the search query.",
161
- advanced=True,
162
- is_list=True,
163
- ),
164
- ]
165
-
166
- @check_cached_vector_store
167
- def build_vector_store(self):
168
- try:
169
- from langchain_astradb import AstraDBVectorStore
170
- from langchain_astradb.utils.astradb import SetupMode
171
- except ImportError as e:
172
- msg = (
173
- "Could not import langchain Astra DB integration package. "
174
- "Please install it with `pip install langchain-astradb`."
175
- )
176
- raise ImportError(msg) from e
177
-
178
- try:
179
- from astrapy.authentication import UsernamePasswordTokenProvider
180
- from astrapy.constants import Environment
181
- except ImportError as e:
182
- msg = "Could not import astrapy integration package. Please install it with `pip install astrapy`."
183
- raise ImportError(msg) from e
184
-
185
- try:
186
- if not self.setup_mode:
187
- self.setup_mode = self._inputs["setup_mode"].options[0]
188
-
189
- setup_mode_value = SetupMode[self.setup_mode.upper()]
190
- except KeyError as e:
191
- msg = f"Invalid setup mode: {self.setup_mode}"
192
- raise ValueError(msg) from e
193
-
194
- if not isinstance(self.embedding, dict):
195
- embedding_dict = {"embedding": self.embedding}
196
- else:
197
- from astrapy.info import VectorServiceOptions
198
-
199
- dict_options = self.embedding.get("collection_vector_service_options", {})
200
- dict_options["authentication"] = {
201
- k: v for k, v in dict_options.get("authentication", {}).items() if k and v
202
- }
203
- dict_options["parameters"] = {k: v for k, v in dict_options.get("parameters", {}).items() if k and v}
204
- embedding_dict = {"collection_vector_service_options": VectorServiceOptions.from_dict(dict_options)}
205
- collection_embedding_api_key = self.embedding.get("collection_embedding_api_key")
206
- if collection_embedding_api_key:
207
- embedding_dict["collection_embedding_api_key"] = collection_embedding_api_key
208
-
209
- token_provider = UsernamePasswordTokenProvider(self.username, self.password)
210
- vector_store_kwargs = {
211
- **embedding_dict,
212
- "collection_name": self.collection_name,
213
- "token": token_provider,
214
- "api_endpoint": self.api_endpoint,
215
- "namespace": self.namespace,
216
- "metric": self.metric or None,
217
- "batch_size": self.batch_size or None,
218
- "bulk_insert_batch_concurrency": self.bulk_insert_batch_concurrency or None,
219
- "bulk_insert_overwrite_concurrency": self.bulk_insert_overwrite_concurrency or None,
220
- "bulk_delete_concurrency": self.bulk_delete_concurrency or None,
221
- "setup_mode": setup_mode_value,
222
- "pre_delete_collection": self.pre_delete_collection or False,
223
- "environment": Environment.HCD,
224
- }
225
-
226
- if self.metadata_indexing_include:
227
- vector_store_kwargs["metadata_indexing_include"] = self.metadata_indexing_include
228
- elif self.metadata_indexing_exclude:
229
- vector_store_kwargs["metadata_indexing_exclude"] = self.metadata_indexing_exclude
230
- elif self.collection_indexing_policy:
231
- vector_store_kwargs["collection_indexing_policy"] = self.collection_indexing_policy
232
-
233
- try:
234
- vector_store = AstraDBVectorStore(**vector_store_kwargs)
235
- except Exception as e:
236
- msg = f"Error initializing AstraDBVectorStore: {e}"
237
- raise ValueError(msg) from e
238
-
239
- self._add_documents_to_vector_store(vector_store)
240
- return vector_store
241
-
242
- def _add_documents_to_vector_store(self, vector_store) -> None:
243
- # Convert DataFrame to Data if needed using parent's method
244
- self.ingest_data = self._prepare_ingest_data()
245
-
246
- documents = []
247
- for _input in self.ingest_data or []:
248
- if isinstance(_input, Data):
249
- documents.append(_input.to_lc_document())
250
- else:
251
- msg = "Vector Store Inputs must be Data objects."
252
- raise TypeError(msg)
253
-
254
- if documents:
255
- self.log(f"Adding {len(documents)} documents to the Vector Store.")
256
- try:
257
- vector_store.add_documents(documents)
258
- except Exception as e:
259
- msg = f"Error adding documents to AstraDBVectorStore: {e}"
260
- raise ValueError(msg) from e
261
- else:
262
- self.log("No documents to add to the Vector Store.")
263
-
264
- def _map_search_type(self) -> str:
265
- if self.search_type == "Similarity with score threshold":
266
- return "similarity_score_threshold"
267
- if self.search_type == "MMR (Max Marginal Relevance)":
268
- return "mmr"
269
- return "similarity"
270
-
271
- def _build_search_args(self):
272
- args = {
273
- "k": self.number_of_results,
274
- "score_threshold": self.search_score_threshold,
275
- }
276
-
277
- if self.search_filter:
278
- clean_filter = {k: v for k, v in self.search_filter.items() if k and v}
279
- if len(clean_filter) > 0:
280
- args["filter"] = clean_filter
281
- return args
282
-
283
- def search_documents(self) -> list[Data]:
284
- vector_store = self.build_vector_store()
285
-
286
- self.log(f"Search query: {self.search_query}")
287
- self.log(f"Search type: {self.search_type}")
288
- self.log(f"Number of results: {self.number_of_results}")
289
-
290
- if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
291
- try:
292
- search_type = self._map_search_type()
293
- search_args = self._build_search_args()
294
-
295
- docs = vector_store.search(query=self.search_query, search_type=search_type, **search_args)
296
- except Exception as e:
297
- msg = f"Error performing search in AstraDBVectorStore: {e}"
298
- raise ValueError(msg) from e
299
-
300
- self.log(f"Retrieved documents: {len(docs)}")
301
-
302
- data = docs_to_data(docs)
303
- self.log(f"Converted documents to data: {len(data)}")
304
- self.status = data
305
- return data
306
- self.log("No search input provided. Skipping search.")
307
- return []
308
-
309
- def get_retriever_kwargs(self):
310
- search_args = self._build_search_args()
311
- return {
312
- "search_type": self._map_search_type(),
313
- "search_kwargs": search_args,
314
- }
@@ -1,115 +0,0 @@
1
- from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
2
- from lfx.helpers.data import docs_to_data
3
- from lfx.io import (
4
- BoolInput,
5
- DictInput,
6
- DropdownInput,
7
- FloatInput,
8
- HandleInput,
9
- IntInput,
10
- SecretStrInput,
11
- StrInput,
12
- )
13
- from lfx.schema.data import Data
14
-
15
-
16
- class MilvusVectorStoreComponent(LCVectorStoreComponent):
17
- """Milvus vector store with search capabilities."""
18
-
19
- display_name: str = "Milvus"
20
- description: str = "Milvus vector store with search capabilities"
21
- name = "Milvus"
22
- icon = "Milvus"
23
-
24
- inputs = [
25
- StrInput(name="collection_name", display_name="Collection Name", value="langflow"),
26
- StrInput(name="collection_description", display_name="Collection Description", value=""),
27
- StrInput(
28
- name="uri",
29
- display_name="Connection URI",
30
- value="http://localhost:19530",
31
- ),
32
- SecretStrInput(
33
- name="password",
34
- display_name="Token",
35
- value="",
36
- info="Ignore this field if no token is required to make connection.",
37
- ),
38
- DictInput(name="connection_args", display_name="Other Connection Arguments", advanced=True),
39
- StrInput(name="primary_field", display_name="Primary Field Name", value="pk"),
40
- StrInput(name="text_field", display_name="Text Field Name", value="text"),
41
- StrInput(name="vector_field", display_name="Vector Field Name", value="vector"),
42
- DropdownInput(
43
- name="consistency_level",
44
- display_name="Consistencey Level",
45
- options=["Bounded", "Session", "Strong", "Eventual"],
46
- value="Session",
47
- advanced=True,
48
- ),
49
- DictInput(name="index_params", display_name="Index Parameters", advanced=True),
50
- DictInput(name="search_params", display_name="Search Parameters", advanced=True),
51
- BoolInput(name="drop_old", display_name="Drop Old Collection", value=False, advanced=True),
52
- FloatInput(name="timeout", display_name="Timeout", advanced=True),
53
- *LCVectorStoreComponent.inputs,
54
- HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
55
- IntInput(
56
- name="number_of_results",
57
- display_name="Number of Results",
58
- info="Number of results to return.",
59
- value=4,
60
- advanced=True,
61
- ),
62
- ]
63
-
64
- @check_cached_vector_store
65
- def build_vector_store(self):
66
- try:
67
- from langchain_milvus.vectorstores import Milvus as LangchainMilvus
68
- except ImportError as e:
69
- msg = "Could not import Milvus integration package. Please install it with `pip install langchain-milvus`."
70
- raise ImportError(msg) from e
71
- self.connection_args.update(uri=self.uri, token=self.password)
72
- milvus_store = LangchainMilvus(
73
- embedding_function=self.embedding,
74
- collection_name=self.collection_name,
75
- collection_description=self.collection_description,
76
- connection_args=self.connection_args,
77
- consistency_level=self.consistency_level,
78
- index_params=self.index_params,
79
- search_params=self.search_params,
80
- drop_old=self.drop_old,
81
- auto_id=True,
82
- primary_field=self.primary_field,
83
- text_field=self.text_field,
84
- vector_field=self.vector_field,
85
- timeout=self.timeout,
86
- )
87
-
88
- # Convert DataFrame to Data if needed using parent's method
89
- self.ingest_data = self._prepare_ingest_data()
90
-
91
- documents = []
92
- for _input in self.ingest_data or []:
93
- if isinstance(_input, Data):
94
- documents.append(_input.to_lc_document())
95
- else:
96
- documents.append(_input)
97
-
98
- if documents:
99
- milvus_store.add_documents(documents)
100
-
101
- return milvus_store
102
-
103
- def search_documents(self) -> list[Data]:
104
- vector_store = self.build_vector_store()
105
-
106
- if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
107
- docs = vector_store.similarity_search(
108
- query=self.search_query,
109
- k=self.number_of_results,
110
- )
111
-
112
- data = docs_to_data(docs)
113
- self.status = data
114
- return data
115
- return []
@@ -1,213 +0,0 @@
1
- import tempfile
2
- import time
3
-
4
- import certifi
5
- from langchain_community.vectorstores import MongoDBAtlasVectorSearch
6
- from pymongo.collection import Collection
7
- from pymongo.operations import SearchIndexModel
8
-
9
- from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
10
- from lfx.helpers.data import docs_to_data
11
- from lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, SecretStrInput, StrInput
12
- from lfx.schema.data import Data
13
-
14
-
15
- class MongoVectorStoreComponent(LCVectorStoreComponent):
16
- display_name = "MongoDB Atlas"
17
- description = "MongoDB Atlas Vector Store with search capabilities"
18
- name = "MongoDBAtlasVector"
19
- icon = "MongoDB"
20
- INSERT_MODES = ["append", "overwrite"]
21
- SIMILARITY_OPTIONS = ["cosine", "euclidean", "dotProduct"]
22
- QUANTIZATION_OPTIONS = ["scalar", "binary"]
23
- inputs = [
24
- SecretStrInput(name="mongodb_atlas_cluster_uri", display_name="MongoDB Atlas Cluster URI", required=True),
25
- BoolInput(name="enable_mtls", display_name="Enable mTLS", value=False, advanced=True, required=True),
26
- SecretStrInput(
27
- name="mongodb_atlas_client_cert",
28
- display_name="MongoDB Atlas Combined Client Certificate",
29
- required=False,
30
- info="Client Certificate combined with the private key in the following format:\n "
31
- "-----BEGIN PRIVATE KEY-----\n...\n -----END PRIVATE KEY-----\n-----BEGIN CERTIFICATE-----\n"
32
- "...\n-----END CERTIFICATE-----\n",
33
- ),
34
- StrInput(name="db_name", display_name="Database Name", required=True),
35
- StrInput(name="collection_name", display_name="Collection Name", required=True),
36
- StrInput(
37
- name="index_name",
38
- display_name="Index Name",
39
- required=True,
40
- info="The name of Atlas Search index, it should be a Vector Search.",
41
- ),
42
- *LCVectorStoreComponent.inputs,
43
- DropdownInput(
44
- name="insert_mode",
45
- display_name="Insert Mode",
46
- options=INSERT_MODES,
47
- value=INSERT_MODES[0],
48
- info="How to insert new documents into the collection.",
49
- advanced=True,
50
- ),
51
- HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"]),
52
- IntInput(
53
- name="number_of_results",
54
- display_name="Number of Results",
55
- info="Number of results to return.",
56
- value=4,
57
- advanced=True,
58
- ),
59
- StrInput(
60
- name="index_field",
61
- display_name="Index Field",
62
- advanced=True,
63
- required=True,
64
- info="The field to index.",
65
- value="embedding",
66
- ),
67
- StrInput(
68
- name="filter_field", display_name="Filter Field", advanced=True, info="The field to filter the index."
69
- ),
70
- IntInput(
71
- name="number_dimensions",
72
- display_name="Number of Dimensions",
73
- info="Embedding Context Length.",
74
- value=1536,
75
- advanced=True,
76
- required=True,
77
- ),
78
- DropdownInput(
79
- name="similarity",
80
- display_name="Similarity",
81
- options=SIMILARITY_OPTIONS,
82
- value=SIMILARITY_OPTIONS[0],
83
- info="The method used to measure the similarity between vectors.",
84
- advanced=True,
85
- ),
86
- DropdownInput(
87
- name="quantization",
88
- display_name="Quantization",
89
- options=QUANTIZATION_OPTIONS,
90
- value=None,
91
- info="Quantization reduces memory costs converting 32-bit floats to smaller data types",
92
- advanced=True,
93
- ),
94
- ]
95
-
96
- @check_cached_vector_store
97
- def build_vector_store(self) -> MongoDBAtlasVectorSearch:
98
- try:
99
- from pymongo import MongoClient
100
- except ImportError as e:
101
- msg = "Please install pymongo to use MongoDB Atlas Vector Store"
102
- raise ImportError(msg) from e
103
-
104
- # Create temporary files for the client certificate
105
- if self.enable_mtls:
106
- client_cert_path = None
107
- try:
108
- client_cert = self.mongodb_atlas_client_cert.replace(" ", "\n")
109
- client_cert = client_cert.replace("-----BEGIN\nPRIVATE\nKEY-----", "-----BEGIN PRIVATE KEY-----")
110
- client_cert = client_cert.replace(
111
- "-----END\nPRIVATE\nKEY-----\n-----BEGIN\nCERTIFICATE-----",
112
- "-----END PRIVATE KEY-----\n-----BEGIN CERTIFICATE-----",
113
- )
114
- client_cert = client_cert.replace("-----END\nCERTIFICATE-----", "-----END CERTIFICATE-----")
115
- with tempfile.NamedTemporaryFile(delete=False) as client_cert_file:
116
- client_cert_file.write(client_cert.encode("utf-8"))
117
- client_cert_path = client_cert_file.name
118
-
119
- except Exception as e:
120
- msg = f"Failed to write certificate to temporary file: {e}"
121
- raise ValueError(msg) from e
122
-
123
- try:
124
- mongo_client: MongoClient = (
125
- MongoClient(
126
- self.mongodb_atlas_cluster_uri,
127
- tls=True,
128
- tlsCertificateKeyFile=client_cert_path,
129
- tlsCAFile=certifi.where(),
130
- )
131
- if self.enable_mtls
132
- else MongoClient(self.mongodb_atlas_cluster_uri)
133
- )
134
-
135
- collection = mongo_client[self.db_name][self.collection_name]
136
-
137
- except Exception as e:
138
- msg = f"Failed to connect to MongoDB Atlas: {e}"
139
- raise ValueError(msg) from e
140
-
141
- # Convert DataFrame to Data if needed using parent's method
142
- self.ingest_data = self._prepare_ingest_data()
143
-
144
- documents = []
145
- for _input in self.ingest_data or []:
146
- if isinstance(_input, Data):
147
- documents.append(_input.to_lc_document())
148
- else:
149
- documents.append(_input)
150
-
151
- if documents:
152
- self.__insert_mode(collection)
153
-
154
- return MongoDBAtlasVectorSearch.from_documents(
155
- documents=documents, embedding=self.embedding, collection=collection, index_name=self.index_name
156
- )
157
- return MongoDBAtlasVectorSearch(embedding=self.embedding, collection=collection, index_name=self.index_name)
158
-
159
- def search_documents(self) -> list[Data]:
160
- from bson.objectid import ObjectId
161
-
162
- vector_store = self.build_vector_store()
163
-
164
- self.verify_search_index(vector_store._collection)
165
-
166
- if self.search_query and isinstance(self.search_query, str):
167
- docs = vector_store.similarity_search(
168
- query=self.search_query,
169
- k=self.number_of_results,
170
- )
171
- for doc in docs:
172
- doc.metadata = {
173
- key: str(value) if isinstance(value, ObjectId) else value for key, value in doc.metadata.items()
174
- }
175
-
176
- data = docs_to_data(docs)
177
- self.status = data
178
- return data
179
- return []
180
-
181
- def __insert_mode(self, collection: Collection) -> None:
182
- if self.insert_mode == "overwrite":
183
- collection.delete_many({}) # Delete all documents while preserving collection structure
184
-
185
- def verify_search_index(self, collection: Collection) -> None:
186
- """Verify if the search index exists, if not, create it.
187
-
188
- Args:
189
- collection (Collection): The collection to verify the search index on.
190
- """
191
- indexes = collection.list_search_indexes()
192
-
193
- index_names_types = {idx["name"]: idx["type"] for idx in indexes}
194
- index_names = list(index_names_types.keys())
195
- index_type = index_names_types.get(self.index_name)
196
- if self.index_name not in index_names and index_type != "vectorSearch":
197
- collection.create_search_index(self.__create_index_definition())
198
-
199
- time.sleep(20) # Give some time for index to be ready
200
-
201
- def __create_index_definition(self) -> SearchIndexModel:
202
- fields = [
203
- {
204
- "type": "vector",
205
- "path": self.index_field,
206
- "numDimensions": self.number_dimensions,
207
- "similarity": self.similarity,
208
- "quantization": self.quantization,
209
- }
210
- ]
211
- if self.filter_field:
212
- fields.append({"type": "filter", "path": self.filter_field})
213
- return SearchIndexModel(definition={"fields": fields}, name=self.index_name, type="vectorSearch")