kailash 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +33 -1
- kailash/access_control/__init__.py +129 -0
- kailash/access_control/managers.py +461 -0
- kailash/access_control/rule_evaluators.py +467 -0
- kailash/access_control_abac.py +825 -0
- kailash/config/__init__.py +27 -0
- kailash/config/database_config.py +359 -0
- kailash/database/__init__.py +28 -0
- kailash/database/execution_pipeline.py +499 -0
- kailash/middleware/__init__.py +306 -0
- kailash/middleware/auth/__init__.py +33 -0
- kailash/middleware/auth/access_control.py +436 -0
- kailash/middleware/auth/auth_manager.py +422 -0
- kailash/middleware/auth/jwt_auth.py +477 -0
- kailash/middleware/auth/kailash_jwt_auth.py +616 -0
- kailash/middleware/communication/__init__.py +37 -0
- kailash/middleware/communication/ai_chat.py +989 -0
- kailash/middleware/communication/api_gateway.py +802 -0
- kailash/middleware/communication/events.py +470 -0
- kailash/middleware/communication/realtime.py +710 -0
- kailash/middleware/core/__init__.py +21 -0
- kailash/middleware/core/agent_ui.py +890 -0
- kailash/middleware/core/schema.py +643 -0
- kailash/middleware/core/workflows.py +396 -0
- kailash/middleware/database/__init__.py +63 -0
- kailash/middleware/database/base.py +113 -0
- kailash/middleware/database/base_models.py +525 -0
- kailash/middleware/database/enums.py +106 -0
- kailash/middleware/database/migrations.py +12 -0
- kailash/{api/database.py → middleware/database/models.py} +183 -291
- kailash/middleware/database/repositories.py +685 -0
- kailash/middleware/database/session_manager.py +19 -0
- kailash/middleware/mcp/__init__.py +38 -0
- kailash/middleware/mcp/client_integration.py +585 -0
- kailash/middleware/mcp/enhanced_server.py +576 -0
- kailash/nodes/__init__.py +25 -3
- kailash/nodes/admin/__init__.py +35 -0
- kailash/nodes/admin/audit_log.py +794 -0
- kailash/nodes/admin/permission_check.py +864 -0
- kailash/nodes/admin/role_management.py +823 -0
- kailash/nodes/admin/security_event.py +1519 -0
- kailash/nodes/admin/user_management.py +944 -0
- kailash/nodes/ai/a2a.py +24 -7
- kailash/nodes/ai/ai_providers.py +1 -0
- kailash/nodes/ai/embedding_generator.py +11 -11
- kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
- kailash/nodes/ai/llm_agent.py +407 -2
- kailash/nodes/ai/self_organizing.py +85 -10
- kailash/nodes/api/auth.py +287 -6
- kailash/nodes/api/rest.py +151 -0
- kailash/nodes/auth/__init__.py +17 -0
- kailash/nodes/auth/directory_integration.py +1228 -0
- kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
- kailash/nodes/auth/mfa.py +2338 -0
- kailash/nodes/auth/risk_assessment.py +872 -0
- kailash/nodes/auth/session_management.py +1093 -0
- kailash/nodes/auth/sso.py +1040 -0
- kailash/nodes/base.py +344 -13
- kailash/nodes/base_cycle_aware.py +4 -2
- kailash/nodes/base_with_acl.py +1 -1
- kailash/nodes/code/python.py +293 -12
- kailash/nodes/compliance/__init__.py +9 -0
- kailash/nodes/compliance/data_retention.py +1888 -0
- kailash/nodes/compliance/gdpr.py +2004 -0
- kailash/nodes/data/__init__.py +22 -2
- kailash/nodes/data/async_connection.py +469 -0
- kailash/nodes/data/async_sql.py +757 -0
- kailash/nodes/data/async_vector.py +598 -0
- kailash/nodes/data/readers.py +767 -0
- kailash/nodes/data/retrieval.py +360 -1
- kailash/nodes/data/sharepoint_graph.py +397 -21
- kailash/nodes/data/sql.py +94 -5
- kailash/nodes/data/streaming.py +68 -8
- kailash/nodes/data/vector_db.py +54 -4
- kailash/nodes/enterprise/__init__.py +13 -0
- kailash/nodes/enterprise/batch_processor.py +741 -0
- kailash/nodes/enterprise/data_lineage.py +497 -0
- kailash/nodes/logic/convergence.py +31 -9
- kailash/nodes/logic/operations.py +14 -3
- kailash/nodes/mixins/__init__.py +8 -0
- kailash/nodes/mixins/event_emitter.py +201 -0
- kailash/nodes/mixins/mcp.py +9 -4
- kailash/nodes/mixins/security.py +165 -0
- kailash/nodes/monitoring/__init__.py +7 -0
- kailash/nodes/monitoring/performance_benchmark.py +2497 -0
- kailash/nodes/rag/__init__.py +284 -0
- kailash/nodes/rag/advanced.py +1615 -0
- kailash/nodes/rag/agentic.py +773 -0
- kailash/nodes/rag/conversational.py +999 -0
- kailash/nodes/rag/evaluation.py +875 -0
- kailash/nodes/rag/federated.py +1188 -0
- kailash/nodes/rag/graph.py +721 -0
- kailash/nodes/rag/multimodal.py +671 -0
- kailash/nodes/rag/optimized.py +933 -0
- kailash/nodes/rag/privacy.py +1059 -0
- kailash/nodes/rag/query_processing.py +1335 -0
- kailash/nodes/rag/realtime.py +764 -0
- kailash/nodes/rag/registry.py +547 -0
- kailash/nodes/rag/router.py +837 -0
- kailash/nodes/rag/similarity.py +1854 -0
- kailash/nodes/rag/strategies.py +566 -0
- kailash/nodes/rag/workflows.py +575 -0
- kailash/nodes/security/__init__.py +19 -0
- kailash/nodes/security/abac_evaluator.py +1411 -0
- kailash/nodes/security/audit_log.py +91 -0
- kailash/nodes/security/behavior_analysis.py +1893 -0
- kailash/nodes/security/credential_manager.py +401 -0
- kailash/nodes/security/rotating_credentials.py +760 -0
- kailash/nodes/security/security_event.py +132 -0
- kailash/nodes/security/threat_detection.py +1103 -0
- kailash/nodes/testing/__init__.py +9 -0
- kailash/nodes/testing/credential_testing.py +499 -0
- kailash/nodes/transform/__init__.py +10 -2
- kailash/nodes/transform/chunkers.py +592 -1
- kailash/nodes/transform/processors.py +484 -14
- kailash/nodes/validation.py +321 -0
- kailash/runtime/access_controlled.py +1 -1
- kailash/runtime/async_local.py +41 -7
- kailash/runtime/docker.py +1 -1
- kailash/runtime/local.py +474 -55
- kailash/runtime/parallel.py +1 -1
- kailash/runtime/parallel_cyclic.py +1 -1
- kailash/runtime/testing.py +210 -2
- kailash/utils/migrations/__init__.py +25 -0
- kailash/utils/migrations/generator.py +433 -0
- kailash/utils/migrations/models.py +231 -0
- kailash/utils/migrations/runner.py +489 -0
- kailash/utils/secure_logging.py +342 -0
- kailash/workflow/__init__.py +16 -0
- kailash/workflow/cyclic_runner.py +3 -4
- kailash/workflow/graph.py +70 -2
- kailash/workflow/resilience.py +249 -0
- kailash/workflow/templates.py +726 -0
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/METADATA +253 -20
- kailash-0.4.0.dist-info/RECORD +223 -0
- kailash/api/__init__.py +0 -17
- kailash/api/__main__.py +0 -6
- kailash/api/studio_secure.py +0 -893
- kailash/mcp/__main__.py +0 -13
- kailash/mcp/server_new.py +0 -336
- kailash/mcp/servers/__init__.py +0 -12
- kailash-0.3.1.dist-info/RECORD +0 -136
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/WHEEL +0 -0
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,598 @@
|
|
1
|
+
"""Asynchronous PostgreSQL vector database node for pgvector operations.
|
2
|
+
|
3
|
+
This module provides async nodes for working with PostgreSQL's pgvector extension,
|
4
|
+
enabling high-performance vector similarity search and embedding storage for AI/ML
|
5
|
+
workflows.
|
6
|
+
|
7
|
+
Design Philosophy:
|
8
|
+
1. Optimized for AI/ML workflows with embeddings
|
9
|
+
2. Support for all pgvector distance metrics
|
10
|
+
3. Efficient batch operations
|
11
|
+
4. Index management utilities
|
12
|
+
5. Hybrid search capabilities
|
13
|
+
6. Compatible with external repositories
|
14
|
+
|
15
|
+
Key Features:
|
16
|
+
- Vector similarity search with multiple distance metrics
|
17
|
+
- Batch embedding insertion for efficiency
|
18
|
+
- HNSW and IVFFlat index support
|
19
|
+
- Metadata filtering with vector search
|
20
|
+
- Query optimization helpers
|
21
|
+
- Connection pooling via AsyncConnectionManager
|
22
|
+
"""
|
23
|
+
|
24
|
+
import json
|
25
|
+
import logging
|
26
|
+
from dataclasses import dataclass
|
27
|
+
from enum import Enum
|
28
|
+
from typing import Any, Dict, List, Optional, Union
|
29
|
+
|
30
|
+
import numpy as np
|
31
|
+
|
32
|
+
from kailash.nodes.base import NodeParameter, register_node
|
33
|
+
from kailash.nodes.base_async import AsyncNode
|
34
|
+
from kailash.nodes.data.async_connection import PoolConfig, get_connection_manager
|
35
|
+
from kailash.sdk_exceptions import NodeExecutionError, NodeValidationError
|
36
|
+
|
37
|
+
logger = logging.getLogger(__name__)
|
38
|
+
|
39
|
+
|
40
|
+
class DistanceMetric(Enum):
|
41
|
+
"""Supported distance metrics for vector similarity."""
|
42
|
+
|
43
|
+
L2 = "l2" # Euclidean distance
|
44
|
+
COSINE = "cosine" # Cosine distance
|
45
|
+
IP = "ip" # Inner product (dot product)
|
46
|
+
|
47
|
+
|
48
|
+
class IndexType(Enum):
|
49
|
+
"""Supported vector index types."""
|
50
|
+
|
51
|
+
HNSW = "hnsw" # Hierarchical Navigable Small World
|
52
|
+
IVFFLAT = "ivfflat" # Inverted File Flat
|
53
|
+
NONE = "none" # No index (exact search)
|
54
|
+
|
55
|
+
|
56
|
+
@dataclass
|
57
|
+
class VectorSearchResult:
|
58
|
+
"""Result from vector similarity search."""
|
59
|
+
|
60
|
+
id: Any
|
61
|
+
distance: float
|
62
|
+
vector: Optional[List[float]] = None
|
63
|
+
metadata: Optional[Dict[str, Any]] = None
|
64
|
+
|
65
|
+
|
66
|
+
@register_node()
|
67
|
+
class AsyncPostgreSQLVectorNode(AsyncNode):
|
68
|
+
"""Asynchronous PostgreSQL pgvector node for vector operations.
|
69
|
+
|
70
|
+
This node provides high-performance vector similarity search and embedding
|
71
|
+
storage using PostgreSQL's pgvector extension. It supports multiple distance
|
72
|
+
metrics, index types, and hybrid search with metadata filtering.
|
73
|
+
|
74
|
+
Parameters:
|
75
|
+
connection_string: PostgreSQL connection string
|
76
|
+
host: Database host (if no connection_string)
|
77
|
+
port: Database port (default: 5432)
|
78
|
+
database: Database name
|
79
|
+
user: Database user
|
80
|
+
password: Database password
|
81
|
+
table_name: Table to operate on
|
82
|
+
vector_column: Column name for vectors (default: "embedding")
|
83
|
+
dimension: Vector dimension (required for table creation)
|
84
|
+
distance_metric: Distance metric (l2, cosine, ip)
|
85
|
+
index_type: Index type (hnsw, ivfflat, none)
|
86
|
+
operation: Operation to perform (search, insert, create_table, create_index)
|
87
|
+
vector: Vector for search or insert
|
88
|
+
vectors: Batch of vectors for bulk insert
|
89
|
+
metadata: Metadata for insert operations
|
90
|
+
metadata_filter: SQL WHERE clause for hybrid search
|
91
|
+
limit: Number of results to return
|
92
|
+
ef_search: HNSW ef parameter for search
|
93
|
+
probes: IVFFlat probes parameter
|
94
|
+
|
95
|
+
Example:
|
96
|
+
>>> # Vector similarity search
|
97
|
+
>>> node = AsyncPostgreSQLVectorNode(
|
98
|
+
... name="vector_search",
|
99
|
+
... connection_string="postgresql://localhost/vectordb",
|
100
|
+
... table_name="documents",
|
101
|
+
... operation="search",
|
102
|
+
... vector=[0.1, 0.2, 0.3, ...],
|
103
|
+
... distance_metric="cosine",
|
104
|
+
... limit=10
|
105
|
+
... )
|
106
|
+
>>> results = await node.async_run()
|
107
|
+
>>> similar_docs = results["matches"]
|
108
|
+
|
109
|
+
>>> # Batch insert embeddings
|
110
|
+
>>> node = AsyncPostgreSQLVectorNode(
|
111
|
+
... name="insert_embeddings",
|
112
|
+
... connection_string="postgresql://localhost/vectordb",
|
113
|
+
... table_name="documents",
|
114
|
+
... operation="insert",
|
115
|
+
... vectors=embeddings, # List of vectors
|
116
|
+
... metadata=[{"doc_id": 1}, {"doc_id": 2}, ...]
|
117
|
+
... )
|
118
|
+
"""
|
119
|
+
|
120
|
+
def __init__(self, **config):
|
121
|
+
self._connection_manager = None
|
122
|
+
super().__init__(**config)
|
123
|
+
self._connection_manager = get_connection_manager()
|
124
|
+
|
125
|
+
def get_parameters(self) -> dict[str, NodeParameter]:
|
126
|
+
"""Define the parameters this node accepts."""
|
127
|
+
params = [
|
128
|
+
# Connection parameters
|
129
|
+
NodeParameter(
|
130
|
+
name="connection_string",
|
131
|
+
type=str,
|
132
|
+
required=False,
|
133
|
+
description="PostgreSQL connection string",
|
134
|
+
),
|
135
|
+
NodeParameter(
|
136
|
+
name="host", type=str, required=False, description="Database host"
|
137
|
+
),
|
138
|
+
NodeParameter(
|
139
|
+
name="port",
|
140
|
+
type=int,
|
141
|
+
required=False,
|
142
|
+
default=5432,
|
143
|
+
description="Database port",
|
144
|
+
),
|
145
|
+
NodeParameter(
|
146
|
+
name="database", type=str, required=False, description="Database name"
|
147
|
+
),
|
148
|
+
NodeParameter(
|
149
|
+
name="user", type=str, required=False, description="Database user"
|
150
|
+
),
|
151
|
+
NodeParameter(
|
152
|
+
name="password",
|
153
|
+
type=str,
|
154
|
+
required=False,
|
155
|
+
description="Database password",
|
156
|
+
),
|
157
|
+
# Table configuration
|
158
|
+
NodeParameter(
|
159
|
+
name="table_name",
|
160
|
+
type=str,
|
161
|
+
required=True,
|
162
|
+
description="Table name for vector operations",
|
163
|
+
),
|
164
|
+
NodeParameter(
|
165
|
+
name="vector_column",
|
166
|
+
type=str,
|
167
|
+
required=False,
|
168
|
+
default="embedding",
|
169
|
+
description="Column name for vectors",
|
170
|
+
),
|
171
|
+
NodeParameter(
|
172
|
+
name="dimension",
|
173
|
+
type=int,
|
174
|
+
required=False,
|
175
|
+
description="Vector dimension (required for table creation)",
|
176
|
+
),
|
177
|
+
# Operation parameters
|
178
|
+
NodeParameter(
|
179
|
+
name="operation",
|
180
|
+
type=str,
|
181
|
+
required=True,
|
182
|
+
description="Operation: search, insert, create_table, create_index",
|
183
|
+
),
|
184
|
+
NodeParameter(
|
185
|
+
name="distance_metric",
|
186
|
+
type=str,
|
187
|
+
required=False,
|
188
|
+
default="l2",
|
189
|
+
description="Distance metric: l2, cosine, ip",
|
190
|
+
),
|
191
|
+
NodeParameter(
|
192
|
+
name="index_type",
|
193
|
+
type=str,
|
194
|
+
required=False,
|
195
|
+
default="hnsw",
|
196
|
+
description="Index type: hnsw, ivfflat, none",
|
197
|
+
),
|
198
|
+
# Search parameters
|
199
|
+
NodeParameter(
|
200
|
+
name="vector",
|
201
|
+
type=list,
|
202
|
+
required=False,
|
203
|
+
description="Query vector for search or single insert",
|
204
|
+
),
|
205
|
+
NodeParameter(
|
206
|
+
name="limit",
|
207
|
+
type=int,
|
208
|
+
required=False,
|
209
|
+
default=10,
|
210
|
+
description="Number of results to return",
|
211
|
+
),
|
212
|
+
NodeParameter(
|
213
|
+
name="metadata_filter",
|
214
|
+
type=str,
|
215
|
+
required=False,
|
216
|
+
description="SQL WHERE clause for metadata filtering",
|
217
|
+
),
|
218
|
+
NodeParameter(
|
219
|
+
name="ef_search",
|
220
|
+
type=int,
|
221
|
+
required=False,
|
222
|
+
description="HNSW ef parameter for search",
|
223
|
+
),
|
224
|
+
NodeParameter(
|
225
|
+
name="probes",
|
226
|
+
type=int,
|
227
|
+
required=False,
|
228
|
+
description="IVFFlat probes parameter",
|
229
|
+
),
|
230
|
+
# Insert parameters
|
231
|
+
NodeParameter(
|
232
|
+
name="vectors",
|
233
|
+
type=list,
|
234
|
+
required=False,
|
235
|
+
description="Batch of vectors for bulk insert",
|
236
|
+
),
|
237
|
+
NodeParameter(
|
238
|
+
name="metadata",
|
239
|
+
type=Any,
|
240
|
+
required=False,
|
241
|
+
description="Metadata for insert (dict or list of dicts)",
|
242
|
+
),
|
243
|
+
# Index parameters
|
244
|
+
NodeParameter(
|
245
|
+
name="m",
|
246
|
+
type=int,
|
247
|
+
required=False,
|
248
|
+
default=16,
|
249
|
+
description="HNSW M parameter",
|
250
|
+
),
|
251
|
+
NodeParameter(
|
252
|
+
name="ef_construction",
|
253
|
+
type=int,
|
254
|
+
required=False,
|
255
|
+
default=64,
|
256
|
+
description="HNSW ef_construction parameter",
|
257
|
+
),
|
258
|
+
NodeParameter(
|
259
|
+
name="lists",
|
260
|
+
type=int,
|
261
|
+
required=False,
|
262
|
+
default=100,
|
263
|
+
description="IVFFlat lists parameter",
|
264
|
+
),
|
265
|
+
# Pool configuration
|
266
|
+
NodeParameter(
|
267
|
+
name="pool_size",
|
268
|
+
type=int,
|
269
|
+
required=False,
|
270
|
+
default=10,
|
271
|
+
description="Connection pool size",
|
272
|
+
),
|
273
|
+
NodeParameter(
|
274
|
+
name="tenant_id",
|
275
|
+
type=str,
|
276
|
+
required=False,
|
277
|
+
default="default",
|
278
|
+
description="Tenant ID for connection isolation",
|
279
|
+
),
|
280
|
+
]
|
281
|
+
|
282
|
+
# Convert list to dict as required by base class
|
283
|
+
return {param.name: param for param in params}
|
284
|
+
|
285
|
+
def validate_config(self):
|
286
|
+
"""Validate node configuration."""
|
287
|
+
super().validate_config()
|
288
|
+
|
289
|
+
# Validate connection parameters
|
290
|
+
if not self.config.get("connection_string"):
|
291
|
+
if not all(
|
292
|
+
[
|
293
|
+
self.config.get("host"),
|
294
|
+
self.config.get("database"),
|
295
|
+
self.config.get("user"),
|
296
|
+
]
|
297
|
+
):
|
298
|
+
raise NodeValidationError(
|
299
|
+
"Either connection_string or host/database/user required"
|
300
|
+
)
|
301
|
+
|
302
|
+
# Validate operation
|
303
|
+
operation = self.config.get("operation", "").lower()
|
304
|
+
if operation not in ["search", "insert", "create_table", "create_index"]:
|
305
|
+
raise NodeValidationError(
|
306
|
+
f"Invalid operation: {operation}. "
|
307
|
+
"Must be one of: search, insert, create_table, create_index"
|
308
|
+
)
|
309
|
+
|
310
|
+
# Validate operation-specific requirements
|
311
|
+
if operation == "search":
|
312
|
+
if not self.config.get("vector"):
|
313
|
+
raise NodeValidationError("vector required for search operation")
|
314
|
+
elif operation == "insert":
|
315
|
+
if not (self.config.get("vector") or self.config.get("vectors")):
|
316
|
+
raise NodeValidationError("vector or vectors required for insert")
|
317
|
+
elif operation == "create_table":
|
318
|
+
if not self.config.get("dimension"):
|
319
|
+
raise NodeValidationError("dimension required for create_table")
|
320
|
+
|
321
|
+
# Validate distance metric
|
322
|
+
metric = self.config.get("distance_metric", "l2").lower()
|
323
|
+
if metric not in ["l2", "cosine", "ip"]:
|
324
|
+
raise NodeValidationError(
|
325
|
+
f"Invalid distance_metric: {metric}. " "Must be one of: l2, cosine, ip"
|
326
|
+
)
|
327
|
+
|
328
|
+
# Validate index type
|
329
|
+
index_type = self.config.get("index_type", "hnsw").lower()
|
330
|
+
if index_type not in ["hnsw", "ivfflat", "none"]:
|
331
|
+
raise NodeValidationError(
|
332
|
+
f"Invalid index_type: {index_type}. "
|
333
|
+
"Must be one of: hnsw, ivfflat, none"
|
334
|
+
)
|
335
|
+
|
336
|
+
def _get_db_config(self) -> dict:
|
337
|
+
"""Get database configuration."""
|
338
|
+
if self.config.get("connection_string"):
|
339
|
+
return {
|
340
|
+
"type": "postgresql",
|
341
|
+
"connection_string": self.config["connection_string"],
|
342
|
+
}
|
343
|
+
else:
|
344
|
+
return {
|
345
|
+
"type": "postgresql",
|
346
|
+
"host": self.config["host"],
|
347
|
+
"port": self.config.get("port", 5432),
|
348
|
+
"database": self.config["database"],
|
349
|
+
"user": self.config["user"],
|
350
|
+
"password": self.config.get("password", ""),
|
351
|
+
}
|
352
|
+
|
353
|
+
def _get_distance_operator(self, metric: str) -> str:
|
354
|
+
"""Get pgvector distance operator for metric."""
|
355
|
+
operators = {"l2": "<->", "cosine": "<=>", "ip": "<#>"}
|
356
|
+
return operators.get(metric, "<->")
|
357
|
+
|
358
|
+
async def _ensure_extension(self, conn):
|
359
|
+
"""Ensure pgvector extension is installed."""
|
360
|
+
try:
|
361
|
+
await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
362
|
+
except Exception as e:
|
363
|
+
# Extension might already exist or user lacks permissions
|
364
|
+
logger.debug(f"pgvector extension check: {e}")
|
365
|
+
|
366
|
+
async def _create_table(self, conn) -> dict[str, Any]:
|
367
|
+
"""Create vector table."""
|
368
|
+
table_name = self.config["table_name"]
|
369
|
+
vector_column = self.config.get("vector_column", "embedding")
|
370
|
+
dimension = self.config["dimension"]
|
371
|
+
|
372
|
+
await self._ensure_extension(conn)
|
373
|
+
|
374
|
+
# Create table with vector column
|
375
|
+
query = f"""
|
376
|
+
CREATE TABLE IF NOT EXISTS {table_name} (
|
377
|
+
id SERIAL PRIMARY KEY,
|
378
|
+
{vector_column} vector({dimension}),
|
379
|
+
metadata JSONB,
|
380
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
381
|
+
)
|
382
|
+
"""
|
383
|
+
|
384
|
+
await conn.execute(query)
|
385
|
+
|
386
|
+
return {
|
387
|
+
"result": {
|
388
|
+
"status": "success",
|
389
|
+
"table": table_name,
|
390
|
+
"dimension": dimension,
|
391
|
+
"message": f"Table {table_name} created successfully",
|
392
|
+
}
|
393
|
+
}
|
394
|
+
|
395
|
+
async def _create_index(self, conn) -> dict[str, Any]:
|
396
|
+
"""Create vector index."""
|
397
|
+
table_name = self.config["table_name"]
|
398
|
+
vector_column = self.config.get("vector_column", "embedding")
|
399
|
+
index_type = self.config.get("index_type", "hnsw").lower()
|
400
|
+
distance_metric = self.config.get("distance_metric", "l2").lower()
|
401
|
+
|
402
|
+
# Get distance function for index
|
403
|
+
distance_func = {
|
404
|
+
"l2": "vector_l2_ops",
|
405
|
+
"cosine": "vector_cosine_ops",
|
406
|
+
"ip": "vector_ip_ops",
|
407
|
+
}.get(distance_metric, "vector_l2_ops")
|
408
|
+
|
409
|
+
index_name = f"{table_name}_{vector_column}_{index_type}_idx"
|
410
|
+
|
411
|
+
if index_type == "hnsw":
|
412
|
+
m = self.config.get("m", 16)
|
413
|
+
ef_construction = self.config.get("ef_construction", 64)
|
414
|
+
query = f"""
|
415
|
+
CREATE INDEX IF NOT EXISTS {index_name}
|
416
|
+
ON {table_name}
|
417
|
+
USING hnsw ({vector_column} {distance_func})
|
418
|
+
WITH (m = {m}, ef_construction = {ef_construction})
|
419
|
+
"""
|
420
|
+
elif index_type == "ivfflat":
|
421
|
+
lists = self.config.get("lists", 100)
|
422
|
+
query = f"""
|
423
|
+
CREATE INDEX IF NOT EXISTS {index_name}
|
424
|
+
ON {table_name}
|
425
|
+
USING ivfflat ({vector_column} {distance_func})
|
426
|
+
WITH (lists = {lists})
|
427
|
+
"""
|
428
|
+
else:
|
429
|
+
return {
|
430
|
+
"result": {
|
431
|
+
"status": "skipped",
|
432
|
+
"message": "No index created (exact search mode)",
|
433
|
+
}
|
434
|
+
}
|
435
|
+
|
436
|
+
await conn.execute(query)
|
437
|
+
|
438
|
+
return {
|
439
|
+
"result": {
|
440
|
+
"status": "success",
|
441
|
+
"index": index_name,
|
442
|
+
"type": index_type,
|
443
|
+
"message": f"Index {index_name} created successfully",
|
444
|
+
}
|
445
|
+
}
|
446
|
+
|
447
|
+
async def _insert_vectors(self, conn, **inputs) -> dict[str, Any]:
|
448
|
+
"""Insert vectors into table."""
|
449
|
+
table_name = self.config["table_name"]
|
450
|
+
vector_column = self.config.get("vector_column", "embedding")
|
451
|
+
|
452
|
+
# Get vectors and metadata
|
453
|
+
vectors = inputs.get("vectors") or self.config.get("vectors")
|
454
|
+
single_vector = inputs.get("vector") or self.config.get("vector")
|
455
|
+
metadata = inputs.get("metadata") or self.config.get("metadata")
|
456
|
+
|
457
|
+
if single_vector and not vectors:
|
458
|
+
vectors = [single_vector]
|
459
|
+
if metadata and not isinstance(metadata, list):
|
460
|
+
metadata = [metadata]
|
461
|
+
|
462
|
+
if not vectors:
|
463
|
+
raise NodeExecutionError("No vectors provided for insert")
|
464
|
+
|
465
|
+
# Prepare batch insert
|
466
|
+
inserted_count = 0
|
467
|
+
|
468
|
+
if metadata:
|
469
|
+
# Insert with metadata
|
470
|
+
query = f"""
|
471
|
+
INSERT INTO {table_name} ({vector_column}, metadata)
|
472
|
+
VALUES ($1, $2)
|
473
|
+
"""
|
474
|
+
|
475
|
+
for i, vector in enumerate(vectors):
|
476
|
+
meta = metadata[i] if i < len(metadata) else {}
|
477
|
+
await conn.execute(query, vector, json.dumps(meta))
|
478
|
+
inserted_count += 1
|
479
|
+
else:
|
480
|
+
# Insert vectors only
|
481
|
+
query = f"""
|
482
|
+
INSERT INTO {table_name} ({vector_column})
|
483
|
+
VALUES ($1)
|
484
|
+
"""
|
485
|
+
|
486
|
+
for vector in vectors:
|
487
|
+
await conn.execute(query, vector)
|
488
|
+
inserted_count += 1
|
489
|
+
|
490
|
+
return {
|
491
|
+
"result": {
|
492
|
+
"status": "success",
|
493
|
+
"inserted_count": inserted_count,
|
494
|
+
"message": f"Inserted {inserted_count} vectors",
|
495
|
+
}
|
496
|
+
}
|
497
|
+
|
498
|
+
async def _search_vectors(self, conn, **inputs) -> dict[str, Any]:
|
499
|
+
"""Search for similar vectors."""
|
500
|
+
table_name = self.config["table_name"]
|
501
|
+
vector_column = self.config.get("vector_column", "embedding")
|
502
|
+
|
503
|
+
# Get search parameters
|
504
|
+
query_vector = inputs.get("vector") or self.config.get("vector")
|
505
|
+
limit = inputs.get("limit") or self.config.get("limit", 10)
|
506
|
+
metadata_filter = inputs.get("metadata_filter") or self.config.get(
|
507
|
+
"metadata_filter"
|
508
|
+
)
|
509
|
+
distance_metric = self.config.get("distance_metric", "l2").lower()
|
510
|
+
|
511
|
+
if not query_vector:
|
512
|
+
raise NodeExecutionError("No query vector provided for search")
|
513
|
+
|
514
|
+
# Set search parameters if provided
|
515
|
+
if self.config.get("ef_search"):
|
516
|
+
await conn.execute(f"SET hnsw.ef_search = {self.config['ef_search']}")
|
517
|
+
if self.config.get("probes"):
|
518
|
+
await conn.execute(f"SET ivfflat.probes = {self.config['probes']}")
|
519
|
+
|
520
|
+
# Build search query
|
521
|
+
distance_op = self._get_distance_operator(distance_metric)
|
522
|
+
|
523
|
+
base_query = f"""
|
524
|
+
SELECT
|
525
|
+
id,
|
526
|
+
{vector_column} AS vector,
|
527
|
+
metadata,
|
528
|
+
{vector_column} {distance_op} $1 AS distance
|
529
|
+
FROM {table_name}
|
530
|
+
"""
|
531
|
+
|
532
|
+
if metadata_filter:
|
533
|
+
base_query += f" WHERE {metadata_filter}"
|
534
|
+
|
535
|
+
base_query += f"""
|
536
|
+
ORDER BY {vector_column} {distance_op} $1
|
537
|
+
LIMIT {limit}
|
538
|
+
"""
|
539
|
+
|
540
|
+
# Execute search
|
541
|
+
rows = await conn.fetch(base_query, query_vector)
|
542
|
+
|
543
|
+
# Format results
|
544
|
+
matches = []
|
545
|
+
for row in rows:
|
546
|
+
matches.append(
|
547
|
+
{
|
548
|
+
"id": row["id"],
|
549
|
+
"distance": float(row["distance"]),
|
550
|
+
"vector": list(row["vector"]) if row["vector"] else None,
|
551
|
+
"metadata": row["metadata"],
|
552
|
+
}
|
553
|
+
)
|
554
|
+
|
555
|
+
return {
|
556
|
+
"result": {
|
557
|
+
"matches": matches,
|
558
|
+
"count": len(matches),
|
559
|
+
"distance_metric": distance_metric,
|
560
|
+
}
|
561
|
+
}
|
562
|
+
|
563
|
+
async def async_run(self, **inputs) -> dict[str, Any]:
|
564
|
+
"""Execute vector database operation."""
|
565
|
+
try:
|
566
|
+
operation = (inputs.get("operation") or self.config["operation"]).lower()
|
567
|
+
tenant_id = inputs.get("tenant_id") or self.config.get(
|
568
|
+
"tenant_id", "default"
|
569
|
+
)
|
570
|
+
|
571
|
+
# Get database connection
|
572
|
+
db_config = self._get_db_config()
|
573
|
+
pool_config = PoolConfig(
|
574
|
+
min_size=1, max_size=self.config.get("pool_size", 10)
|
575
|
+
)
|
576
|
+
|
577
|
+
async with self._connection_manager.get_connection(
|
578
|
+
tenant_id=tenant_id, db_config=db_config, pool_config=pool_config
|
579
|
+
) as conn:
|
580
|
+
if operation == "create_table":
|
581
|
+
return await self._create_table(conn)
|
582
|
+
elif operation == "create_index":
|
583
|
+
return await self._create_index(conn)
|
584
|
+
elif operation == "insert":
|
585
|
+
return await self._insert_vectors(conn, **inputs)
|
586
|
+
elif operation == "search":
|
587
|
+
return await self._search_vectors(conn, **inputs)
|
588
|
+
else:
|
589
|
+
raise NodeExecutionError(f"Unknown operation: {operation}")
|
590
|
+
|
591
|
+
except Exception as e:
|
592
|
+
raise NodeExecutionError(f"Vector operation failed: {str(e)}")
|
593
|
+
|
594
|
+
def run(self, **inputs) -> dict[str, Any]:
|
595
|
+
"""Synchronous run method - delegates to async_run."""
|
596
|
+
import asyncio
|
597
|
+
|
598
|
+
return asyncio.run(self.async_run(**inputs))
|