kailash 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. kailash/__init__.py +33 -1
  2. kailash/access_control/__init__.py +129 -0
  3. kailash/access_control/managers.py +461 -0
  4. kailash/access_control/rule_evaluators.py +467 -0
  5. kailash/access_control_abac.py +825 -0
  6. kailash/config/__init__.py +27 -0
  7. kailash/config/database_config.py +359 -0
  8. kailash/database/__init__.py +28 -0
  9. kailash/database/execution_pipeline.py +499 -0
  10. kailash/middleware/__init__.py +306 -0
  11. kailash/middleware/auth/__init__.py +33 -0
  12. kailash/middleware/auth/access_control.py +436 -0
  13. kailash/middleware/auth/auth_manager.py +422 -0
  14. kailash/middleware/auth/jwt_auth.py +477 -0
  15. kailash/middleware/auth/kailash_jwt_auth.py +616 -0
  16. kailash/middleware/communication/__init__.py +37 -0
  17. kailash/middleware/communication/ai_chat.py +989 -0
  18. kailash/middleware/communication/api_gateway.py +802 -0
  19. kailash/middleware/communication/events.py +470 -0
  20. kailash/middleware/communication/realtime.py +710 -0
  21. kailash/middleware/core/__init__.py +21 -0
  22. kailash/middleware/core/agent_ui.py +890 -0
  23. kailash/middleware/core/schema.py +643 -0
  24. kailash/middleware/core/workflows.py +396 -0
  25. kailash/middleware/database/__init__.py +63 -0
  26. kailash/middleware/database/base.py +113 -0
  27. kailash/middleware/database/base_models.py +525 -0
  28. kailash/middleware/database/enums.py +106 -0
  29. kailash/middleware/database/migrations.py +12 -0
  30. kailash/{api/database.py → middleware/database/models.py} +183 -291
  31. kailash/middleware/database/repositories.py +685 -0
  32. kailash/middleware/database/session_manager.py +19 -0
  33. kailash/middleware/mcp/__init__.py +38 -0
  34. kailash/middleware/mcp/client_integration.py +585 -0
  35. kailash/middleware/mcp/enhanced_server.py +576 -0
  36. kailash/nodes/__init__.py +25 -3
  37. kailash/nodes/admin/__init__.py +35 -0
  38. kailash/nodes/admin/audit_log.py +794 -0
  39. kailash/nodes/admin/permission_check.py +864 -0
  40. kailash/nodes/admin/role_management.py +823 -0
  41. kailash/nodes/admin/security_event.py +1519 -0
  42. kailash/nodes/admin/user_management.py +944 -0
  43. kailash/nodes/ai/a2a.py +24 -7
  44. kailash/nodes/ai/ai_providers.py +1 -0
  45. kailash/nodes/ai/embedding_generator.py +11 -11
  46. kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
  47. kailash/nodes/ai/llm_agent.py +407 -2
  48. kailash/nodes/ai/self_organizing.py +85 -10
  49. kailash/nodes/api/auth.py +287 -6
  50. kailash/nodes/api/rest.py +151 -0
  51. kailash/nodes/auth/__init__.py +17 -0
  52. kailash/nodes/auth/directory_integration.py +1228 -0
  53. kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
  54. kailash/nodes/auth/mfa.py +2338 -0
  55. kailash/nodes/auth/risk_assessment.py +872 -0
  56. kailash/nodes/auth/session_management.py +1093 -0
  57. kailash/nodes/auth/sso.py +1040 -0
  58. kailash/nodes/base.py +344 -13
  59. kailash/nodes/base_cycle_aware.py +4 -2
  60. kailash/nodes/base_with_acl.py +1 -1
  61. kailash/nodes/code/python.py +283 -10
  62. kailash/nodes/compliance/__init__.py +9 -0
  63. kailash/nodes/compliance/data_retention.py +1888 -0
  64. kailash/nodes/compliance/gdpr.py +2004 -0
  65. kailash/nodes/data/__init__.py +22 -2
  66. kailash/nodes/data/async_connection.py +469 -0
  67. kailash/nodes/data/async_sql.py +757 -0
  68. kailash/nodes/data/async_vector.py +598 -0
  69. kailash/nodes/data/readers.py +767 -0
  70. kailash/nodes/data/retrieval.py +360 -1
  71. kailash/nodes/data/sharepoint_graph.py +397 -21
  72. kailash/nodes/data/sql.py +94 -5
  73. kailash/nodes/data/streaming.py +68 -8
  74. kailash/nodes/data/vector_db.py +54 -4
  75. kailash/nodes/enterprise/__init__.py +13 -0
  76. kailash/nodes/enterprise/batch_processor.py +741 -0
  77. kailash/nodes/enterprise/data_lineage.py +497 -0
  78. kailash/nodes/logic/convergence.py +31 -9
  79. kailash/nodes/logic/operations.py +14 -3
  80. kailash/nodes/mixins/__init__.py +8 -0
  81. kailash/nodes/mixins/event_emitter.py +201 -0
  82. kailash/nodes/mixins/mcp.py +9 -4
  83. kailash/nodes/mixins/security.py +165 -0
  84. kailash/nodes/monitoring/__init__.py +7 -0
  85. kailash/nodes/monitoring/performance_benchmark.py +2497 -0
  86. kailash/nodes/rag/__init__.py +284 -0
  87. kailash/nodes/rag/advanced.py +1615 -0
  88. kailash/nodes/rag/agentic.py +773 -0
  89. kailash/nodes/rag/conversational.py +999 -0
  90. kailash/nodes/rag/evaluation.py +875 -0
  91. kailash/nodes/rag/federated.py +1188 -0
  92. kailash/nodes/rag/graph.py +721 -0
  93. kailash/nodes/rag/multimodal.py +671 -0
  94. kailash/nodes/rag/optimized.py +933 -0
  95. kailash/nodes/rag/privacy.py +1059 -0
  96. kailash/nodes/rag/query_processing.py +1335 -0
  97. kailash/nodes/rag/realtime.py +764 -0
  98. kailash/nodes/rag/registry.py +547 -0
  99. kailash/nodes/rag/router.py +837 -0
  100. kailash/nodes/rag/similarity.py +1854 -0
  101. kailash/nodes/rag/strategies.py +566 -0
  102. kailash/nodes/rag/workflows.py +575 -0
  103. kailash/nodes/security/__init__.py +19 -0
  104. kailash/nodes/security/abac_evaluator.py +1411 -0
  105. kailash/nodes/security/audit_log.py +91 -0
  106. kailash/nodes/security/behavior_analysis.py +1893 -0
  107. kailash/nodes/security/credential_manager.py +401 -0
  108. kailash/nodes/security/rotating_credentials.py +760 -0
  109. kailash/nodes/security/security_event.py +132 -0
  110. kailash/nodes/security/threat_detection.py +1103 -0
  111. kailash/nodes/testing/__init__.py +9 -0
  112. kailash/nodes/testing/credential_testing.py +499 -0
  113. kailash/nodes/transform/__init__.py +10 -2
  114. kailash/nodes/transform/chunkers.py +592 -1
  115. kailash/nodes/transform/processors.py +484 -14
  116. kailash/nodes/validation.py +321 -0
  117. kailash/runtime/access_controlled.py +1 -1
  118. kailash/runtime/async_local.py +41 -7
  119. kailash/runtime/docker.py +1 -1
  120. kailash/runtime/local.py +474 -55
  121. kailash/runtime/parallel.py +1 -1
  122. kailash/runtime/parallel_cyclic.py +1 -1
  123. kailash/runtime/testing.py +210 -2
  124. kailash/utils/migrations/__init__.py +25 -0
  125. kailash/utils/migrations/generator.py +433 -0
  126. kailash/utils/migrations/models.py +231 -0
  127. kailash/utils/migrations/runner.py +489 -0
  128. kailash/utils/secure_logging.py +342 -0
  129. kailash/workflow/__init__.py +16 -0
  130. kailash/workflow/cyclic_runner.py +3 -4
  131. kailash/workflow/graph.py +70 -2
  132. kailash/workflow/resilience.py +249 -0
  133. kailash/workflow/templates.py +726 -0
  134. {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/METADATA +253 -20
  135. kailash-0.4.0.dist-info/RECORD +223 -0
  136. kailash/api/__init__.py +0 -17
  137. kailash/api/__main__.py +0 -6
  138. kailash/api/studio_secure.py +0 -893
  139. kailash/mcp/__main__.py +0 -13
  140. kailash/mcp/server_new.py +0 -336
  141. kailash/mcp/servers/__init__.py +0 -12
  142. kailash-0.3.2.dist-info/RECORD +0 -136
  143. {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/WHEEL +0 -0
  144. {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/entry_points.txt +0 -0
  145. {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/licenses/LICENSE +0 -0
  146. {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,598 @@
1
+ """Asynchronous PostgreSQL vector database node for pgvector operations.
2
+
3
+ This module provides async nodes for working with PostgreSQL's pgvector extension,
4
+ enabling high-performance vector similarity search and embedding storage for AI/ML
5
+ workflows.
6
+
7
+ Design Philosophy:
8
+ 1. Optimized for AI/ML workflows with embeddings
9
+ 2. Support for all pgvector distance metrics
10
+ 3. Efficient batch operations
11
+ 4. Index management utilities
12
+ 5. Hybrid search capabilities
13
+ 6. Compatible with external repositories
14
+
15
+ Key Features:
16
+ - Vector similarity search with multiple distance metrics
17
+ - Batch embedding insertion for efficiency
18
+ - HNSW and IVFFlat index support
19
+ - Metadata filtering with vector search
20
+ - Query optimization helpers
21
+ - Connection pooling via AsyncConnectionManager
22
+ """
23
+
24
+ import json
25
+ import logging
26
+ from dataclasses import dataclass
27
+ from enum import Enum
28
+ from typing import Any, Dict, List, Optional, Union
29
+
30
+ import numpy as np
31
+
32
+ from kailash.nodes.base import NodeParameter, register_node
33
+ from kailash.nodes.base_async import AsyncNode
34
+ from kailash.nodes.data.async_connection import PoolConfig, get_connection_manager
35
+ from kailash.sdk_exceptions import NodeExecutionError, NodeValidationError
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ class DistanceMetric(Enum):
41
+ """Supported distance metrics for vector similarity."""
42
+
43
+ L2 = "l2" # Euclidean distance
44
+ COSINE = "cosine" # Cosine distance
45
+ IP = "ip" # Inner product (dot product)
46
+
47
+
48
+ class IndexType(Enum):
49
+ """Supported vector index types."""
50
+
51
+ HNSW = "hnsw" # Hierarchical Navigable Small World
52
+ IVFFLAT = "ivfflat" # Inverted File Flat
53
+ NONE = "none" # No index (exact search)
54
+
55
+
56
+ @dataclass
57
+ class VectorSearchResult:
58
+ """Result from vector similarity search."""
59
+
60
+ id: Any
61
+ distance: float
62
+ vector: Optional[List[float]] = None
63
+ metadata: Optional[Dict[str, Any]] = None
64
+
65
+
66
+ @register_node()
67
+ class AsyncPostgreSQLVectorNode(AsyncNode):
68
+ """Asynchronous PostgreSQL pgvector node for vector operations.
69
+
70
+ This node provides high-performance vector similarity search and embedding
71
+ storage using PostgreSQL's pgvector extension. It supports multiple distance
72
+ metrics, index types, and hybrid search with metadata filtering.
73
+
74
+ Parameters:
75
+ connection_string: PostgreSQL connection string
76
+ host: Database host (if no connection_string)
77
+ port: Database port (default: 5432)
78
+ database: Database name
79
+ user: Database user
80
+ password: Database password
81
+ table_name: Table to operate on
82
+ vector_column: Column name for vectors (default: "embedding")
83
+ dimension: Vector dimension (required for table creation)
84
+ distance_metric: Distance metric (l2, cosine, ip)
85
+ index_type: Index type (hnsw, ivfflat, none)
86
+ operation: Operation to perform (search, insert, create_table, create_index)
87
+ vector: Vector for search or insert
88
+ vectors: Batch of vectors for bulk insert
89
+ metadata: Metadata for insert operations
90
+ metadata_filter: SQL WHERE clause for hybrid search
91
+ limit: Number of results to return
92
+ ef_search: HNSW ef parameter for search
93
+ probes: IVFFlat probes parameter
94
+
95
+ Example:
96
+ >>> # Vector similarity search
97
+ >>> node = AsyncPostgreSQLVectorNode(
98
+ ... name="vector_search",
99
+ ... connection_string="postgresql://localhost/vectordb",
100
+ ... table_name="documents",
101
+ ... operation="search",
102
+ ... vector=[0.1, 0.2, 0.3, ...],
103
+ ... distance_metric="cosine",
104
+ ... limit=10
105
+ ... )
106
+ >>> results = await node.async_run()
107
+ >>> similar_docs = results["matches"]
108
+
109
+ >>> # Batch insert embeddings
110
+ >>> node = AsyncPostgreSQLVectorNode(
111
+ ... name="insert_embeddings",
112
+ ... connection_string="postgresql://localhost/vectordb",
113
+ ... table_name="documents",
114
+ ... operation="insert",
115
+ ... vectors=embeddings, # List of vectors
116
+ ... metadata=[{"doc_id": 1}, {"doc_id": 2}, ...]
117
+ ... )
118
+ """
119
+
120
+ def __init__(self, **config):
121
+ self._connection_manager = None
122
+ super().__init__(**config)
123
+ self._connection_manager = get_connection_manager()
124
+
125
+ def get_parameters(self) -> dict[str, NodeParameter]:
126
+ """Define the parameters this node accepts."""
127
+ params = [
128
+ # Connection parameters
129
+ NodeParameter(
130
+ name="connection_string",
131
+ type=str,
132
+ required=False,
133
+ description="PostgreSQL connection string",
134
+ ),
135
+ NodeParameter(
136
+ name="host", type=str, required=False, description="Database host"
137
+ ),
138
+ NodeParameter(
139
+ name="port",
140
+ type=int,
141
+ required=False,
142
+ default=5432,
143
+ description="Database port",
144
+ ),
145
+ NodeParameter(
146
+ name="database", type=str, required=False, description="Database name"
147
+ ),
148
+ NodeParameter(
149
+ name="user", type=str, required=False, description="Database user"
150
+ ),
151
+ NodeParameter(
152
+ name="password",
153
+ type=str,
154
+ required=False,
155
+ description="Database password",
156
+ ),
157
+ # Table configuration
158
+ NodeParameter(
159
+ name="table_name",
160
+ type=str,
161
+ required=True,
162
+ description="Table name for vector operations",
163
+ ),
164
+ NodeParameter(
165
+ name="vector_column",
166
+ type=str,
167
+ required=False,
168
+ default="embedding",
169
+ description="Column name for vectors",
170
+ ),
171
+ NodeParameter(
172
+ name="dimension",
173
+ type=int,
174
+ required=False,
175
+ description="Vector dimension (required for table creation)",
176
+ ),
177
+ # Operation parameters
178
+ NodeParameter(
179
+ name="operation",
180
+ type=str,
181
+ required=True,
182
+ description="Operation: search, insert, create_table, create_index",
183
+ ),
184
+ NodeParameter(
185
+ name="distance_metric",
186
+ type=str,
187
+ required=False,
188
+ default="l2",
189
+ description="Distance metric: l2, cosine, ip",
190
+ ),
191
+ NodeParameter(
192
+ name="index_type",
193
+ type=str,
194
+ required=False,
195
+ default="hnsw",
196
+ description="Index type: hnsw, ivfflat, none",
197
+ ),
198
+ # Search parameters
199
+ NodeParameter(
200
+ name="vector",
201
+ type=list,
202
+ required=False,
203
+ description="Query vector for search or single insert",
204
+ ),
205
+ NodeParameter(
206
+ name="limit",
207
+ type=int,
208
+ required=False,
209
+ default=10,
210
+ description="Number of results to return",
211
+ ),
212
+ NodeParameter(
213
+ name="metadata_filter",
214
+ type=str,
215
+ required=False,
216
+ description="SQL WHERE clause for metadata filtering",
217
+ ),
218
+ NodeParameter(
219
+ name="ef_search",
220
+ type=int,
221
+ required=False,
222
+ description="HNSW ef parameter for search",
223
+ ),
224
+ NodeParameter(
225
+ name="probes",
226
+ type=int,
227
+ required=False,
228
+ description="IVFFlat probes parameter",
229
+ ),
230
+ # Insert parameters
231
+ NodeParameter(
232
+ name="vectors",
233
+ type=list,
234
+ required=False,
235
+ description="Batch of vectors for bulk insert",
236
+ ),
237
+ NodeParameter(
238
+ name="metadata",
239
+ type=Any,
240
+ required=False,
241
+ description="Metadata for insert (dict or list of dicts)",
242
+ ),
243
+ # Index parameters
244
+ NodeParameter(
245
+ name="m",
246
+ type=int,
247
+ required=False,
248
+ default=16,
249
+ description="HNSW M parameter",
250
+ ),
251
+ NodeParameter(
252
+ name="ef_construction",
253
+ type=int,
254
+ required=False,
255
+ default=64,
256
+ description="HNSW ef_construction parameter",
257
+ ),
258
+ NodeParameter(
259
+ name="lists",
260
+ type=int,
261
+ required=False,
262
+ default=100,
263
+ description="IVFFlat lists parameter",
264
+ ),
265
+ # Pool configuration
266
+ NodeParameter(
267
+ name="pool_size",
268
+ type=int,
269
+ required=False,
270
+ default=10,
271
+ description="Connection pool size",
272
+ ),
273
+ NodeParameter(
274
+ name="tenant_id",
275
+ type=str,
276
+ required=False,
277
+ default="default",
278
+ description="Tenant ID for connection isolation",
279
+ ),
280
+ ]
281
+
282
+ # Convert list to dict as required by base class
283
+ return {param.name: param for param in params}
284
+
285
+ def validate_config(self):
286
+ """Validate node configuration."""
287
+ super().validate_config()
288
+
289
+ # Validate connection parameters
290
+ if not self.config.get("connection_string"):
291
+ if not all(
292
+ [
293
+ self.config.get("host"),
294
+ self.config.get("database"),
295
+ self.config.get("user"),
296
+ ]
297
+ ):
298
+ raise NodeValidationError(
299
+ "Either connection_string or host/database/user required"
300
+ )
301
+
302
+ # Validate operation
303
+ operation = self.config.get("operation", "").lower()
304
+ if operation not in ["search", "insert", "create_table", "create_index"]:
305
+ raise NodeValidationError(
306
+ f"Invalid operation: {operation}. "
307
+ "Must be one of: search, insert, create_table, create_index"
308
+ )
309
+
310
+ # Validate operation-specific requirements
311
+ if operation == "search":
312
+ if not self.config.get("vector"):
313
+ raise NodeValidationError("vector required for search operation")
314
+ elif operation == "insert":
315
+ if not (self.config.get("vector") or self.config.get("vectors")):
316
+ raise NodeValidationError("vector or vectors required for insert")
317
+ elif operation == "create_table":
318
+ if not self.config.get("dimension"):
319
+ raise NodeValidationError("dimension required for create_table")
320
+
321
+ # Validate distance metric
322
+ metric = self.config.get("distance_metric", "l2").lower()
323
+ if metric not in ["l2", "cosine", "ip"]:
324
+ raise NodeValidationError(
325
+ f"Invalid distance_metric: {metric}. " "Must be one of: l2, cosine, ip"
326
+ )
327
+
328
+ # Validate index type
329
+ index_type = self.config.get("index_type", "hnsw").lower()
330
+ if index_type not in ["hnsw", "ivfflat", "none"]:
331
+ raise NodeValidationError(
332
+ f"Invalid index_type: {index_type}. "
333
+ "Must be one of: hnsw, ivfflat, none"
334
+ )
335
+
336
+ def _get_db_config(self) -> dict:
337
+ """Get database configuration."""
338
+ if self.config.get("connection_string"):
339
+ return {
340
+ "type": "postgresql",
341
+ "connection_string": self.config["connection_string"],
342
+ }
343
+ else:
344
+ return {
345
+ "type": "postgresql",
346
+ "host": self.config["host"],
347
+ "port": self.config.get("port", 5432),
348
+ "database": self.config["database"],
349
+ "user": self.config["user"],
350
+ "password": self.config.get("password", ""),
351
+ }
352
+
353
+ def _get_distance_operator(self, metric: str) -> str:
354
+ """Get pgvector distance operator for metric."""
355
+ operators = {"l2": "<->", "cosine": "<=>", "ip": "<#>"}
356
+ return operators.get(metric, "<->")
357
+
358
+ async def _ensure_extension(self, conn):
359
+ """Ensure pgvector extension is installed."""
360
+ try:
361
+ await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
362
+ except Exception as e:
363
+ # Extension might already exist or user lacks permissions
364
+ logger.debug(f"pgvector extension check: {e}")
365
+
366
+ async def _create_table(self, conn) -> dict[str, Any]:
367
+ """Create vector table."""
368
+ table_name = self.config["table_name"]
369
+ vector_column = self.config.get("vector_column", "embedding")
370
+ dimension = self.config["dimension"]
371
+
372
+ await self._ensure_extension(conn)
373
+
374
+ # Create table with vector column
375
+ query = f"""
376
+ CREATE TABLE IF NOT EXISTS {table_name} (
377
+ id SERIAL PRIMARY KEY,
378
+ {vector_column} vector({dimension}),
379
+ metadata JSONB,
380
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
381
+ )
382
+ """
383
+
384
+ await conn.execute(query)
385
+
386
+ return {
387
+ "result": {
388
+ "status": "success",
389
+ "table": table_name,
390
+ "dimension": dimension,
391
+ "message": f"Table {table_name} created successfully",
392
+ }
393
+ }
394
+
395
+ async def _create_index(self, conn) -> dict[str, Any]:
396
+ """Create vector index."""
397
+ table_name = self.config["table_name"]
398
+ vector_column = self.config.get("vector_column", "embedding")
399
+ index_type = self.config.get("index_type", "hnsw").lower()
400
+ distance_metric = self.config.get("distance_metric", "l2").lower()
401
+
402
+ # Get distance function for index
403
+ distance_func = {
404
+ "l2": "vector_l2_ops",
405
+ "cosine": "vector_cosine_ops",
406
+ "ip": "vector_ip_ops",
407
+ }.get(distance_metric, "vector_l2_ops")
408
+
409
+ index_name = f"{table_name}_{vector_column}_{index_type}_idx"
410
+
411
+ if index_type == "hnsw":
412
+ m = self.config.get("m", 16)
413
+ ef_construction = self.config.get("ef_construction", 64)
414
+ query = f"""
415
+ CREATE INDEX IF NOT EXISTS {index_name}
416
+ ON {table_name}
417
+ USING hnsw ({vector_column} {distance_func})
418
+ WITH (m = {m}, ef_construction = {ef_construction})
419
+ """
420
+ elif index_type == "ivfflat":
421
+ lists = self.config.get("lists", 100)
422
+ query = f"""
423
+ CREATE INDEX IF NOT EXISTS {index_name}
424
+ ON {table_name}
425
+ USING ivfflat ({vector_column} {distance_func})
426
+ WITH (lists = {lists})
427
+ """
428
+ else:
429
+ return {
430
+ "result": {
431
+ "status": "skipped",
432
+ "message": "No index created (exact search mode)",
433
+ }
434
+ }
435
+
436
+ await conn.execute(query)
437
+
438
+ return {
439
+ "result": {
440
+ "status": "success",
441
+ "index": index_name,
442
+ "type": index_type,
443
+ "message": f"Index {index_name} created successfully",
444
+ }
445
+ }
446
+
447
+ async def _insert_vectors(self, conn, **inputs) -> dict[str, Any]:
448
+ """Insert vectors into table."""
449
+ table_name = self.config["table_name"]
450
+ vector_column = self.config.get("vector_column", "embedding")
451
+
452
+ # Get vectors and metadata
453
+ vectors = inputs.get("vectors") or self.config.get("vectors")
454
+ single_vector = inputs.get("vector") or self.config.get("vector")
455
+ metadata = inputs.get("metadata") or self.config.get("metadata")
456
+
457
+ if single_vector and not vectors:
458
+ vectors = [single_vector]
459
+ if metadata and not isinstance(metadata, list):
460
+ metadata = [metadata]
461
+
462
+ if not vectors:
463
+ raise NodeExecutionError("No vectors provided for insert")
464
+
465
+ # Prepare batch insert
466
+ inserted_count = 0
467
+
468
+ if metadata:
469
+ # Insert with metadata
470
+ query = f"""
471
+ INSERT INTO {table_name} ({vector_column}, metadata)
472
+ VALUES ($1, $2)
473
+ """
474
+
475
+ for i, vector in enumerate(vectors):
476
+ meta = metadata[i] if i < len(metadata) else {}
477
+ await conn.execute(query, vector, json.dumps(meta))
478
+ inserted_count += 1
479
+ else:
480
+ # Insert vectors only
481
+ query = f"""
482
+ INSERT INTO {table_name} ({vector_column})
483
+ VALUES ($1)
484
+ """
485
+
486
+ for vector in vectors:
487
+ await conn.execute(query, vector)
488
+ inserted_count += 1
489
+
490
+ return {
491
+ "result": {
492
+ "status": "success",
493
+ "inserted_count": inserted_count,
494
+ "message": f"Inserted {inserted_count} vectors",
495
+ }
496
+ }
497
+
498
+ async def _search_vectors(self, conn, **inputs) -> dict[str, Any]:
499
+ """Search for similar vectors."""
500
+ table_name = self.config["table_name"]
501
+ vector_column = self.config.get("vector_column", "embedding")
502
+
503
+ # Get search parameters
504
+ query_vector = inputs.get("vector") or self.config.get("vector")
505
+ limit = inputs.get("limit") or self.config.get("limit", 10)
506
+ metadata_filter = inputs.get("metadata_filter") or self.config.get(
507
+ "metadata_filter"
508
+ )
509
+ distance_metric = self.config.get("distance_metric", "l2").lower()
510
+
511
+ if not query_vector:
512
+ raise NodeExecutionError("No query vector provided for search")
513
+
514
+ # Set search parameters if provided
515
+ if self.config.get("ef_search"):
516
+ await conn.execute(f"SET hnsw.ef_search = {self.config['ef_search']}")
517
+ if self.config.get("probes"):
518
+ await conn.execute(f"SET ivfflat.probes = {self.config['probes']}")
519
+
520
+ # Build search query
521
+ distance_op = self._get_distance_operator(distance_metric)
522
+
523
+ base_query = f"""
524
+ SELECT
525
+ id,
526
+ {vector_column} AS vector,
527
+ metadata,
528
+ {vector_column} {distance_op} $1 AS distance
529
+ FROM {table_name}
530
+ """
531
+
532
+ if metadata_filter:
533
+ base_query += f" WHERE {metadata_filter}"
534
+
535
+ base_query += f"""
536
+ ORDER BY {vector_column} {distance_op} $1
537
+ LIMIT {limit}
538
+ """
539
+
540
+ # Execute search
541
+ rows = await conn.fetch(base_query, query_vector)
542
+
543
+ # Format results
544
+ matches = []
545
+ for row in rows:
546
+ matches.append(
547
+ {
548
+ "id": row["id"],
549
+ "distance": float(row["distance"]),
550
+ "vector": list(row["vector"]) if row["vector"] else None,
551
+ "metadata": row["metadata"],
552
+ }
553
+ )
554
+
555
+ return {
556
+ "result": {
557
+ "matches": matches,
558
+ "count": len(matches),
559
+ "distance_metric": distance_metric,
560
+ }
561
+ }
562
+
563
+ async def async_run(self, **inputs) -> dict[str, Any]:
564
+ """Execute vector database operation."""
565
+ try:
566
+ operation = (inputs.get("operation") or self.config["operation"]).lower()
567
+ tenant_id = inputs.get("tenant_id") or self.config.get(
568
+ "tenant_id", "default"
569
+ )
570
+
571
+ # Get database connection
572
+ db_config = self._get_db_config()
573
+ pool_config = PoolConfig(
574
+ min_size=1, max_size=self.config.get("pool_size", 10)
575
+ )
576
+
577
+ async with self._connection_manager.get_connection(
578
+ tenant_id=tenant_id, db_config=db_config, pool_config=pool_config
579
+ ) as conn:
580
+ if operation == "create_table":
581
+ return await self._create_table(conn)
582
+ elif operation == "create_index":
583
+ return await self._create_index(conn)
584
+ elif operation == "insert":
585
+ return await self._insert_vectors(conn, **inputs)
586
+ elif operation == "search":
587
+ return await self._search_vectors(conn, **inputs)
588
+ else:
589
+ raise NodeExecutionError(f"Unknown operation: {operation}")
590
+
591
+ except Exception as e:
592
+ raise NodeExecutionError(f"Vector operation failed: {str(e)}")
593
+
594
+ def run(self, **inputs) -> dict[str, Any]:
595
+ """Synchronous run method - delegates to async_run."""
596
+ import asyncio
597
+
598
+ return asyncio.run(self.async_run(**inputs))