kailash 0.6.6__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. kailash/__init__.py +35 -5
  2. kailash/access_control.py +64 -46
  3. kailash/adapters/__init__.py +5 -0
  4. kailash/adapters/mcp_platform_adapter.py +273 -0
  5. kailash/api/workflow_api.py +34 -3
  6. kailash/channels/__init__.py +21 -0
  7. kailash/channels/api_channel.py +409 -0
  8. kailash/channels/base.py +271 -0
  9. kailash/channels/cli_channel.py +661 -0
  10. kailash/channels/event_router.py +496 -0
  11. kailash/channels/mcp_channel.py +648 -0
  12. kailash/channels/session.py +423 -0
  13. kailash/mcp_server/discovery.py +57 -18
  14. kailash/middleware/communication/api_gateway.py +23 -3
  15. kailash/middleware/communication/realtime.py +83 -0
  16. kailash/middleware/core/agent_ui.py +1 -1
  17. kailash/middleware/gateway/storage_backends.py +393 -0
  18. kailash/middleware/mcp/enhanced_server.py +22 -16
  19. kailash/nexus/__init__.py +21 -0
  20. kailash/nexus/cli/__init__.py +5 -0
  21. kailash/nexus/cli/__main__.py +6 -0
  22. kailash/nexus/cli/main.py +176 -0
  23. kailash/nexus/factory.py +413 -0
  24. kailash/nexus/gateway.py +545 -0
  25. kailash/nodes/__init__.py +8 -5
  26. kailash/nodes/ai/iterative_llm_agent.py +988 -17
  27. kailash/nodes/ai/llm_agent.py +29 -9
  28. kailash/nodes/api/__init__.py +2 -2
  29. kailash/nodes/api/monitoring.py +1 -1
  30. kailash/nodes/base.py +29 -5
  31. kailash/nodes/base_async.py +54 -14
  32. kailash/nodes/code/async_python.py +1 -1
  33. kailash/nodes/code/python.py +50 -6
  34. kailash/nodes/data/async_sql.py +90 -0
  35. kailash/nodes/data/bulk_operations.py +939 -0
  36. kailash/nodes/data/query_builder.py +373 -0
  37. kailash/nodes/data/query_cache.py +512 -0
  38. kailash/nodes/monitoring/__init__.py +10 -0
  39. kailash/nodes/monitoring/deadlock_detector.py +964 -0
  40. kailash/nodes/monitoring/performance_anomaly.py +1078 -0
  41. kailash/nodes/monitoring/race_condition_detector.py +1151 -0
  42. kailash/nodes/monitoring/transaction_metrics.py +790 -0
  43. kailash/nodes/monitoring/transaction_monitor.py +931 -0
  44. kailash/nodes/security/behavior_analysis.py +414 -0
  45. kailash/nodes/system/__init__.py +17 -0
  46. kailash/nodes/system/command_parser.py +820 -0
  47. kailash/nodes/transaction/__init__.py +48 -0
  48. kailash/nodes/transaction/distributed_transaction_manager.py +983 -0
  49. kailash/nodes/transaction/saga_coordinator.py +652 -0
  50. kailash/nodes/transaction/saga_state_storage.py +411 -0
  51. kailash/nodes/transaction/saga_step.py +467 -0
  52. kailash/nodes/transaction/transaction_context.py +756 -0
  53. kailash/nodes/transaction/two_phase_commit.py +978 -0
  54. kailash/nodes/transform/processors.py +17 -1
  55. kailash/nodes/validation/__init__.py +21 -0
  56. kailash/nodes/validation/test_executor.py +532 -0
  57. kailash/nodes/validation/validation_nodes.py +447 -0
  58. kailash/resources/factory.py +1 -1
  59. kailash/runtime/access_controlled.py +9 -7
  60. kailash/runtime/async_local.py +84 -21
  61. kailash/runtime/local.py +21 -2
  62. kailash/runtime/parameter_injector.py +187 -31
  63. kailash/runtime/runner.py +6 -4
  64. kailash/runtime/testing.py +1 -1
  65. kailash/security.py +22 -3
  66. kailash/servers/__init__.py +32 -0
  67. kailash/servers/durable_workflow_server.py +430 -0
  68. kailash/servers/enterprise_workflow_server.py +522 -0
  69. kailash/servers/gateway.py +183 -0
  70. kailash/servers/workflow_server.py +293 -0
  71. kailash/utils/data_validation.py +192 -0
  72. kailash/workflow/builder.py +382 -15
  73. kailash/workflow/cyclic_runner.py +102 -10
  74. kailash/workflow/validation.py +144 -8
  75. kailash/workflow/visualization.py +99 -27
  76. {kailash-0.6.6.dist-info → kailash-0.8.0.dist-info}/METADATA +3 -2
  77. {kailash-0.6.6.dist-info → kailash-0.8.0.dist-info}/RECORD +81 -40
  78. kailash/workflow/builder_improvements.py +0 -207
  79. {kailash-0.6.6.dist-info → kailash-0.8.0.dist-info}/WHEEL +0 -0
  80. {kailash-0.6.6.dist-info → kailash-0.8.0.dist-info}/entry_points.txt +0 -0
  81. {kailash-0.6.6.dist-info → kailash-0.8.0.dist-info}/licenses/LICENSE +0 -0
  82. {kailash-0.6.6.dist-info → kailash-0.8.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,939 @@
1
+ """Bulk operations support for database nodes.
2
+
3
+ This module provides bulk CRUD operations for efficient data processing
4
+ in Kailash workflows. It extends the async SQL database capabilities
5
+ with optimized bulk operations for different databases.
6
+
7
+ Key Features:
8
+ - Database-specific bulk optimizations
9
+ - Chunking for large datasets
10
+ - Progress tracking and reporting
11
+ - Configurable error handling strategies
12
+ - Type validation for bulk data
13
+ """
14
+
15
+ import asyncio
16
+ import json
17
+ import logging
18
+ from abc import ABC, abstractmethod
19
+ from dataclasses import dataclass, field
20
+ from datetime import datetime
21
+ from enum import Enum
22
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
23
+
24
+ from kailash.nodes.base import NodeParameter, register_node
25
+ from kailash.nodes.data.async_sql import AsyncSQLDatabaseNode, DatabaseType
26
+ from kailash.sdk_exceptions import NodeExecutionError, NodeValidationError
27
+
28
+ # Import List and Dict types if not already present
29
+ if "List" not in globals():
30
+ from typing import List
31
+ if "Dict" not in globals():
32
+ from typing import Dict
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class BulkErrorStrategy(Enum):
38
+ """Error handling strategies for bulk operations."""
39
+
40
+ FAIL_FAST = "fail_fast" # Stop on first error
41
+ CONTINUE = "continue" # Continue processing, collect errors
42
+ ROLLBACK = "rollback" # Rollback entire operation on any error
43
+
44
+
45
+ @dataclass
46
+ class BulkOperationResult:
47
+ """Result of a bulk operation."""
48
+
49
+ total_records: int
50
+ successful_records: int
51
+ failed_records: int
52
+ errors: List[Dict[str, Any]] = field(default_factory=list)
53
+ execution_time_ms: float = 0.0
54
+
55
+ @property
56
+ def success_rate(self) -> float:
57
+ """Calculate success rate as percentage."""
58
+ if self.total_records == 0:
59
+ return 0.0
60
+ return (self.successful_records / self.total_records) * 100
61
+
62
+
63
+ class BulkOperationMixin:
64
+ """Mixin for bulk operations support."""
65
+
66
+ def setup_bulk_operations(self, config: Dict[str, Any]):
67
+ """Setup bulk operation configuration."""
68
+ self.chunk_size: int = config.get("chunk_size", 1000)
69
+ self.error_strategy: BulkErrorStrategy = BulkErrorStrategy(
70
+ config.get("error_strategy", "fail_fast")
71
+ )
72
+ self.report_progress: bool = config.get("report_progress", True)
73
+ self.progress_interval: int = config.get("progress_interval", 100)
74
+
75
+ def validate_bulk_data(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
76
+ """Validate records before bulk operation.
77
+
78
+ Args:
79
+ records: List of records to validate
80
+
81
+ Returns:
82
+ Validated records
83
+
84
+ Raises:
85
+ NodeValidationError: If validation fails
86
+ """
87
+ if not records:
88
+ raise NodeValidationError("No records provided for bulk operation")
89
+
90
+ if not isinstance(records, list):
91
+ raise NodeValidationError("Records must be a list")
92
+
93
+ # Validate each record is a dictionary
94
+ for i, record in enumerate(records):
95
+ if not isinstance(record, dict):
96
+ raise NodeValidationError(f"Record at index {i} must be a dictionary")
97
+
98
+ return records
99
+
100
+ def chunk_records(
101
+ self, records: List[Dict[str, Any]], chunk_size: Optional[int] = None
102
+ ) -> Iterator[List[Dict[str, Any]]]:
103
+ """Chunk large datasets for processing.
104
+
105
+ Args:
106
+ records: List of records to chunk
107
+ chunk_size: Size of each chunk (defaults to self.chunk_size)
108
+
109
+ Yields:
110
+ Chunks of records
111
+ """
112
+ size = chunk_size or self.chunk_size
113
+ for i in range(0, len(records), size):
114
+ yield records[i : i + size]
115
+
116
+ async def report_progress_async(self, current: int, total: int, operation: str):
117
+ """Report progress of bulk operation.
118
+
119
+ Args:
120
+ current: Current record number
121
+ total: Total number of records
122
+ operation: Operation being performed
123
+ """
124
+ if self.report_progress and current % self.progress_interval == 0:
125
+ percentage = (current / total) * 100 if total > 0 else 0
126
+ logger.info(
127
+ f"Bulk {operation} progress: {current}/{total} ({percentage:.1f}%)"
128
+ )
129
+
130
+
131
+ @register_node()
132
+ class BulkCreateNode(AsyncSQLDatabaseNode, BulkOperationMixin):
133
+ """Bulk insert operations with database-specific optimizations."""
134
+
135
+ def __init__(self, **config):
136
+ """Initialize bulk create node."""
137
+ # Initialize parent class
138
+ super().__init__(**config)
139
+
140
+ # Setup bulk operations
141
+ self.setup_bulk_operations(config)
142
+
143
+ # Table and columns configuration
144
+ self.table_name = config.get("table_name")
145
+ self.columns = config.get("columns", [])
146
+ self.returning_columns = config.get("returning_columns", ["id"])
147
+
148
+ def get_parameters(self) -> Dict[str, NodeParameter]:
149
+ """Get node parameters."""
150
+ params = super().get_parameters().copy()
151
+ bulk_params = {
152
+ "records": NodeParameter(
153
+ name="records",
154
+ type=list,
155
+ description="List of records to insert",
156
+ required=True,
157
+ ),
158
+ "table_name": NodeParameter(
159
+ name="table_name",
160
+ type=str,
161
+ description="Target table name",
162
+ required=True,
163
+ ),
164
+ "columns": NodeParameter(
165
+ name="columns",
166
+ type=list,
167
+ description="Column names (auto-detected if not provided)",
168
+ required=False,
169
+ ),
170
+ "chunk_size": NodeParameter(
171
+ name="chunk_size",
172
+ type=int,
173
+ description="Number of records per chunk",
174
+ required=False,
175
+ default_value=1000,
176
+ ),
177
+ "error_strategy": NodeParameter(
178
+ name="error_strategy",
179
+ type=str,
180
+ description="Error handling strategy: fail_fast, continue, rollback",
181
+ required=False,
182
+ default_value="fail_fast",
183
+ ),
184
+ "returning_columns": NodeParameter(
185
+ name="returning_columns",
186
+ type=list,
187
+ description="Columns to return after insert",
188
+ required=False,
189
+ default_value=["id"],
190
+ ),
191
+ }
192
+ params.update(bulk_params)
193
+ # Remove query requirement for bulk operations
194
+ if "query" in params:
195
+ params["query"].required = False
196
+ return params
197
+
198
+ async def async_run(self, **kwargs) -> Dict[str, Any]:
199
+ """Execute bulk insert operation."""
200
+ start_time = datetime.now()
201
+
202
+ # Get records from parameters
203
+ records = kwargs.get("records", [])
204
+ records = self.validate_bulk_data(records)
205
+
206
+ # Auto-detect columns if not provided
207
+ if not self.columns and records:
208
+ self.columns = list(records[0].keys())
209
+
210
+ # Use table name from kwargs if provided
211
+ table_name = kwargs.get("table_name", self.table_name)
212
+ if not table_name:
213
+ raise NodeValidationError("table_name is required")
214
+ self.table_name = table_name
215
+
216
+ # Get adapter
217
+ adapter = await self._get_adapter()
218
+
219
+ # Determine database type for optimization
220
+ db_type = DatabaseType(self.config.get("database_type", "postgresql").lower())
221
+
222
+ result = BulkOperationResult(
223
+ total_records=len(records), successful_records=0, failed_records=0
224
+ )
225
+
226
+ try:
227
+ if db_type == DatabaseType.POSTGRESQL:
228
+ await self._bulk_insert_postgresql(adapter, records, result)
229
+ elif db_type == DatabaseType.MYSQL:
230
+ await self._bulk_insert_mysql(adapter, records, result)
231
+ else:
232
+ await self._bulk_insert_generic(adapter, records, result)
233
+
234
+ except Exception as e:
235
+ if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
236
+ raise NodeExecutionError(f"Bulk insert failed: {str(e)}")
237
+ else:
238
+ result.errors.append({"error": str(e), "type": "general_error"})
239
+
240
+ # Calculate execution time
241
+ result.execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
242
+
243
+ return {
244
+ "status": "success" if result.failed_records == 0 else "partial_success",
245
+ "total_records": result.total_records,
246
+ "successful_records": result.successful_records,
247
+ "failed_records": result.failed_records,
248
+ "success_rate": result.success_rate,
249
+ "execution_time_ms": result.execution_time_ms,
250
+ "errors": (
251
+ result.errors[:10] if result.errors else []
252
+ ), # Limit errors returned
253
+ }
254
+
255
+ async def _bulk_insert_postgresql(
256
+ self, adapter, records: List[Dict], result: BulkOperationResult
257
+ ):
258
+ """PostgreSQL-optimized bulk insert using COPY."""
259
+ # For very large datasets, use COPY command
260
+ if len(records) > 10000:
261
+ # TODO: Implement COPY FROM for maximum performance
262
+ # For now, fall back to multi-row INSERT
263
+ pass
264
+
265
+ # Use multi-row INSERT with RETURNING
266
+ for chunk in self.chunk_records(records):
267
+ try:
268
+ # Build multi-row INSERT query
269
+ placeholders = []
270
+ values = []
271
+ for i, record in enumerate(chunk):
272
+ row_placeholders = []
273
+ for col in self.columns:
274
+ param_num = i * len(self.columns) + self.columns.index(col) + 1
275
+ row_placeholders.append(f"${param_num}")
276
+ values.append(record.get(col))
277
+ placeholders.append(f"({', '.join(row_placeholders)})")
278
+
279
+ query = f"""
280
+ INSERT INTO {self.table_name} ({', '.join(self.columns)})
281
+ VALUES {', '.join(placeholders)}
282
+ RETURNING {', '.join(self.returning_columns)}
283
+ """
284
+
285
+ rows = await adapter.fetch_all(query, *values)
286
+ result.successful_records += len(chunk)
287
+
288
+ # Report progress
289
+ await self.report_progress_async(
290
+ result.successful_records, result.total_records, "insert"
291
+ )
292
+
293
+ except Exception as e:
294
+ if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
295
+ raise
296
+ result.failed_records += len(chunk)
297
+ result.errors.append(
298
+ {
299
+ "chunk_start": result.successful_records
300
+ + result.failed_records
301
+ - len(chunk),
302
+ "chunk_size": len(chunk),
303
+ "error": str(e),
304
+ }
305
+ )
306
+
307
+ async def _bulk_insert_mysql(
308
+ self, adapter, records: List[Dict], result: BulkOperationResult
309
+ ):
310
+ """MySQL-optimized bulk insert."""
311
+ # MySQL supports multi-row INSERT efficiently
312
+ for chunk in self.chunk_records(records):
313
+ try:
314
+ # Build multi-row INSERT query
315
+ placeholders = []
316
+ values = []
317
+ for record in chunk:
318
+ row_placeholders = []
319
+ for col in self.columns:
320
+ row_placeholders.append("%s")
321
+ values.append(record.get(col))
322
+ placeholders.append(f"({', '.join(row_placeholders)})")
323
+
324
+ query = f"""
325
+ INSERT INTO {self.table_name} ({', '.join(self.columns)})
326
+ VALUES {', '.join(placeholders)}
327
+ """
328
+
329
+ await adapter.execute(query, *values)
330
+ result.successful_records += len(chunk)
331
+
332
+ # Report progress
333
+ await self.report_progress_async(
334
+ result.successful_records, result.total_records, "insert"
335
+ )
336
+
337
+ except Exception as e:
338
+ if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
339
+ raise
340
+ result.failed_records += len(chunk)
341
+ result.errors.append(
342
+ {
343
+ "chunk_start": result.successful_records
344
+ + result.failed_records
345
+ - len(chunk),
346
+ "chunk_size": len(chunk),
347
+ "error": str(e),
348
+ }
349
+ )
350
+
351
+ async def _bulk_insert_generic(
352
+ self, adapter, records: List[Dict], result: BulkOperationResult
353
+ ):
354
+ """Generic bulk insert for other databases."""
355
+ # Fall back to individual inserts for SQLite and others
356
+ for i, record in enumerate(records):
357
+ try:
358
+ placeholders = ", ".join(["?" for _ in self.columns])
359
+ values = [record.get(col) for col in self.columns]
360
+
361
+ query = f"""
362
+ INSERT INTO {self.table_name} ({', '.join(self.columns)})
363
+ VALUES ({placeholders})
364
+ """
365
+
366
+ await adapter.execute(query, *values)
367
+ result.successful_records += 1
368
+
369
+ # Report progress
370
+ if (i + 1) % self.progress_interval == 0:
371
+ await self.report_progress_async(
372
+ i + 1, result.total_records, "insert"
373
+ )
374
+
375
+ except Exception as e:
376
+ if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
377
+ raise
378
+ result.failed_records += 1
379
+ result.errors.append({"record_index": i, "error": str(e)})
380
+
381
+
382
+ @register_node()
383
+ class BulkUpdateNode(AsyncSQLDatabaseNode, BulkOperationMixin):
384
+ """Bulk update operations with efficient strategies."""
385
+
386
+ def __init__(self, **config):
387
+ """Initialize bulk update node."""
388
+ super().__init__(**config)
389
+
390
+ # Setup bulk operations
391
+ self.setup_bulk_operations(config)
392
+
393
+ # Configuration
394
+ self.table_name = config.get("table_name")
395
+ self.update_strategy = config.get(
396
+ "update_strategy", "case"
397
+ ) # case, temp_table, individual
398
+
399
+ def get_parameters(self) -> Dict[str, NodeParameter]:
400
+ """Get node parameters."""
401
+ params = super().get_parameters().copy()
402
+ bulk_params = {
403
+ "table_name": NodeParameter(
404
+ name="table_name",
405
+ type=str,
406
+ description="Target table name",
407
+ required=True,
408
+ ),
409
+ "filter": NodeParameter(
410
+ name="filter",
411
+ type=dict,
412
+ description="Filter conditions for records to update",
413
+ required=False,
414
+ ),
415
+ "updates": NodeParameter(
416
+ name="updates",
417
+ type=dict,
418
+ description="Update values or expressions",
419
+ required=True,
420
+ ),
421
+ "update_strategy": NodeParameter(
422
+ name="update_strategy",
423
+ type=str,
424
+ description="Update strategy: case, temp_table, individual",
425
+ required=False,
426
+ default_value="case",
427
+ ),
428
+ }
429
+ params.update(bulk_params)
430
+ # Remove query requirement for bulk operations
431
+ if "query" in params:
432
+ params["query"].required = False
433
+ return params
434
+
435
+ async def async_run(self, **kwargs) -> Dict[str, Any]:
436
+ """Execute bulk update operation."""
437
+ start_time = datetime.now()
438
+
439
+ # Get parameters
440
+ table_name = kwargs.get("table_name", self.table_name)
441
+ filter_conditions = kwargs.get("filter", {})
442
+ updates = kwargs.get("updates", {})
443
+
444
+ if not updates:
445
+ raise NodeValidationError("No update values provided")
446
+
447
+ # Get adapter
448
+ adapter = await self._get_adapter()
449
+
450
+ result = BulkOperationResult(
451
+ total_records=0, successful_records=0, failed_records=0
452
+ )
453
+
454
+ try:
455
+ # Build and execute update query
456
+ query, params = self._build_update_query(
457
+ table_name, filter_conditions, updates
458
+ )
459
+
460
+ # Execute update
461
+ affected_rows = await adapter.execute(query, *params)
462
+ result.successful_records = affected_rows
463
+ result.total_records = affected_rows
464
+
465
+ except Exception as e:
466
+ if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
467
+ raise NodeExecutionError(f"Bulk update failed: {str(e)}")
468
+ result.errors.append({"error": str(e), "type": "update_error"})
469
+
470
+ # Calculate execution time
471
+ result.execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
472
+
473
+ return {
474
+ "status": "success" if result.failed_records == 0 else "failed",
475
+ "updated_count": result.successful_records,
476
+ "execution_time_ms": result.execution_time_ms,
477
+ "errors": result.errors,
478
+ }
479
+
480
+ def _build_update_query(
481
+ self, table_name: str, filter_conditions: Dict, updates: Dict
482
+ ) -> Tuple[str, List]:
483
+ """Build UPDATE query with parameters."""
484
+ # Build SET clause
485
+ set_clauses = []
486
+ params = []
487
+ param_count = 1
488
+
489
+ for column, value in updates.items():
490
+ if isinstance(value, str) and any(
491
+ op in value for op in ["+", "-", "*", "/"]
492
+ ):
493
+ # Expression (e.g., "stock - 1")
494
+ set_clauses.append(f"{column} = {value}")
495
+ else:
496
+ # Direct value
497
+ set_clauses.append(f"{column} = ${param_count}")
498
+ params.append(value)
499
+ param_count += 1
500
+
501
+ # Build WHERE clause from filter
502
+ where_clauses = []
503
+ for column, condition in filter_conditions.items():
504
+ if isinstance(condition, dict):
505
+ # Complex condition (e.g., {"$gte": 100})
506
+ for op, value in condition.items():
507
+ sql_op = self._get_sql_operator(op)
508
+ where_clauses.append(f"{column} {sql_op} ${param_count}")
509
+ params.append(value)
510
+ param_count += 1
511
+ else:
512
+ # Simple equality
513
+ where_clauses.append(f"{column} = ${param_count}")
514
+ params.append(condition)
515
+ param_count += 1
516
+
517
+ # Build final query
518
+ query = f"UPDATE {table_name} SET {', '.join(set_clauses)}"
519
+ if where_clauses:
520
+ query += f" WHERE {' AND '.join(where_clauses)}"
521
+
522
+ return query, params
523
+
524
+ def _get_sql_operator(self, mongo_op: str) -> str:
525
+ """Convert MongoDB-style operator to SQL."""
526
+ operator_map = {
527
+ "$eq": "=",
528
+ "$ne": "!=",
529
+ "$lt": "<",
530
+ "$lte": "<=",
531
+ "$gt": ">",
532
+ "$gte": ">=",
533
+ "$in": "IN",
534
+ "$nin": "NOT IN",
535
+ }
536
+ return operator_map.get(mongo_op, "=")
537
+
538
+
539
+ @register_node()
540
+ class BulkDeleteNode(AsyncSQLDatabaseNode, BulkOperationMixin):
541
+ """Bulk delete operations with safety checks."""
542
+
543
+ def __init__(self, **config):
544
+ """Initialize bulk delete node."""
545
+ super().__init__(**config)
546
+
547
+ # Setup bulk operations
548
+ self.setup_bulk_operations(config)
549
+
550
+ # Configuration
551
+ self.table_name = config.get("table_name")
552
+ self.soft_delete = config.get("soft_delete", False)
553
+ self.require_filter = config.get("require_filter", True)
554
+
555
+ def get_parameters(self) -> Dict[str, NodeParameter]:
556
+ """Get node parameters."""
557
+ params = super().get_parameters().copy()
558
+ bulk_params = {
559
+ "table_name": NodeParameter(
560
+ name="table_name",
561
+ type=str,
562
+ description="Target table name",
563
+ required=True,
564
+ ),
565
+ "filter": NodeParameter(
566
+ name="filter",
567
+ type=dict,
568
+ description="Filter conditions for records to delete",
569
+ required=False,
570
+ ),
571
+ "soft_delete": NodeParameter(
572
+ name="soft_delete",
573
+ type=bool,
574
+ description="Use soft delete (set deleted_at)",
575
+ required=False,
576
+ default_value=False,
577
+ ),
578
+ "require_filter": NodeParameter(
579
+ name="require_filter",
580
+ type=bool,
581
+ description="Require filter to prevent accidental full table deletion",
582
+ required=False,
583
+ default_value=True,
584
+ ),
585
+ }
586
+ params.update(bulk_params)
587
+ # Remove query requirement for bulk operations
588
+ if "query" in params:
589
+ params["query"].required = False
590
+ return params
591
+
592
+ async def async_run(self, **kwargs) -> Dict[str, Any]:
593
+ """Execute bulk delete operation."""
594
+ start_time = datetime.now()
595
+
596
+ # Get parameters
597
+ table_name = kwargs.get("table_name", self.table_name)
598
+ filter_conditions = kwargs.get("filter", {})
599
+
600
+ # Safety check
601
+ if self.require_filter and not filter_conditions:
602
+ raise NodeValidationError(
603
+ "Filter required for bulk delete. Set require_filter=False to delete all records."
604
+ )
605
+
606
+ # Get adapter
607
+ adapter = await self._get_adapter()
608
+
609
+ result = BulkOperationResult(
610
+ total_records=0, successful_records=0, failed_records=0
611
+ )
612
+
613
+ try:
614
+ if self.soft_delete:
615
+ # Update with deleted_at timestamp
616
+ query = f"UPDATE {table_name} SET deleted_at = CURRENT_TIMESTAMP"
617
+ else:
618
+ # Hard delete
619
+ query = f"DELETE FROM {table_name}"
620
+
621
+ # Add WHERE clause
622
+ params = []
623
+ if filter_conditions:
624
+ where_clause, params = self._build_where_clause(filter_conditions)
625
+ query += f" WHERE {where_clause}"
626
+
627
+ # Execute delete
628
+ affected_rows = await adapter.execute(query, *params)
629
+ result.successful_records = affected_rows
630
+ result.total_records = affected_rows
631
+
632
+ except Exception as e:
633
+ if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
634
+ raise NodeExecutionError(f"Bulk delete failed: {str(e)}")
635
+ result.errors.append({"error": str(e), "type": "delete_error"})
636
+
637
+ # Calculate execution time
638
+ result.execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
639
+
640
+ return {
641
+ "status": "success" if result.failed_records == 0 else "failed",
642
+ "deleted_count": result.successful_records,
643
+ "soft_delete": self.soft_delete,
644
+ "execution_time_ms": result.execution_time_ms,
645
+ "errors": result.errors,
646
+ }
647
+
648
+ def _build_where_clause(self, filter_conditions: Dict) -> Tuple[str, List]:
649
+ """Build WHERE clause from filter conditions."""
650
+ where_clauses = []
651
+ params = []
652
+ param_count = 1
653
+
654
+ for column, condition in filter_conditions.items():
655
+ if isinstance(condition, dict):
656
+ # Complex condition
657
+ for op, value in condition.items():
658
+ sql_op = self._get_sql_operator(op)
659
+ if op in ["$in", "$nin"]:
660
+ placeholders = ", ".join(
661
+ [
662
+ f"${i}"
663
+ for i in range(param_count, param_count + len(value))
664
+ ]
665
+ )
666
+ where_clauses.append(f"{column} {sql_op} ({placeholders})")
667
+ params.extend(value)
668
+ param_count += len(value)
669
+ else:
670
+ where_clauses.append(f"{column} {sql_op} ${param_count}")
671
+ params.append(value)
672
+ param_count += 1
673
+ else:
674
+ # Simple equality
675
+ where_clauses.append(f"{column} = ${param_count}")
676
+ params.append(condition)
677
+ param_count += 1
678
+
679
+ return " AND ".join(where_clauses), params
680
+
681
+ def _get_sql_operator(self, mongo_op: str) -> str:
682
+ """Convert MongoDB-style operator to SQL."""
683
+ operator_map = {
684
+ "$eq": "=",
685
+ "$ne": "!=",
686
+ "$lt": "<",
687
+ "$lte": "<=",
688
+ "$gt": ">",
689
+ "$gte": ">=",
690
+ "$in": "IN",
691
+ "$nin": "NOT IN",
692
+ }
693
+ return operator_map.get(mongo_op, "=")
694
+
695
+
696
+ @register_node()
697
+ class BulkUpsertNode(AsyncSQLDatabaseNode, BulkOperationMixin):
698
+ """Bulk insert or update (upsert) operations."""
699
+
700
+ def __init__(self, **config):
701
+ """Initialize bulk upsert node."""
702
+ super().__init__(**config)
703
+
704
+ # Setup bulk operations
705
+ self.setup_bulk_operations(config)
706
+
707
+ # Configuration
708
+ self.table_name = config.get("table_name")
709
+ self.conflict_columns = config.get("conflict_columns", [])
710
+ self.update_columns = config.get("update_columns", [])
711
+
712
+ def get_parameters(self) -> Dict[str, NodeParameter]:
713
+ """Get node parameters."""
714
+ params = super().get_parameters().copy()
715
+ bulk_params = {
716
+ "records": NodeParameter(
717
+ name="records",
718
+ type=list,
719
+ description="List of records to upsert",
720
+ required=True,
721
+ ),
722
+ "table_name": NodeParameter(
723
+ name="table_name",
724
+ type=str,
725
+ description="Target table name",
726
+ required=True,
727
+ ),
728
+ "conflict_columns": NodeParameter(
729
+ name="conflict_columns",
730
+ type=list,
731
+ description="Columns that determine uniqueness",
732
+ required=True,
733
+ ),
734
+ "update_columns": NodeParameter(
735
+ name="update_columns",
736
+ type=list,
737
+ description="Columns to update on conflict",
738
+ required=False,
739
+ ),
740
+ }
741
+ params.update(bulk_params)
742
+ # Remove query requirement for bulk operations
743
+ if "query" in params:
744
+ params["query"].required = False
745
+ return params
746
+
747
+ async def async_run(self, **kwargs) -> Dict[str, Any]:
748
+ """Execute bulk upsert operation."""
749
+ start_time = datetime.now()
750
+
751
+ # Get parameters
752
+ records = kwargs.get("records", [])
753
+ records = self.validate_bulk_data(records)
754
+
755
+ # Auto-detect columns
756
+ if records:
757
+ all_columns = list(records[0].keys())
758
+ if not self.update_columns:
759
+ # Update all columns except conflict columns
760
+ self.update_columns = [
761
+ col for col in all_columns if col not in self.conflict_columns
762
+ ]
763
+
764
+ # Get adapter and database type
765
+ adapter = await self._get_adapter()
766
+ db_type = DatabaseType(self.config["database_type"].lower())
767
+
768
+ result = BulkOperationResult(
769
+ total_records=len(records), successful_records=0, failed_records=0
770
+ )
771
+
772
+ try:
773
+ if db_type == DatabaseType.POSTGRESQL:
774
+ await self._upsert_postgresql(adapter, records, result)
775
+ elif db_type == DatabaseType.MYSQL:
776
+ await self._upsert_mysql(adapter, records, result)
777
+ else:
778
+ # SQLite doesn't have native upsert, use INSERT OR REPLACE
779
+ await self._upsert_sqlite(adapter, records, result)
780
+
781
+ except Exception as e:
782
+ if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
783
+ raise NodeExecutionError(f"Bulk upsert failed: {str(e)}")
784
+ result.errors.append({"error": str(e), "type": "upsert_error"})
785
+
786
+ # Calculate execution time
787
+ result.execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
788
+
789
+ return {
790
+ "status": "success" if result.failed_records == 0 else "partial_success",
791
+ "total_records": result.total_records,
792
+ "successful_records": result.successful_records,
793
+ "failed_records": result.failed_records,
794
+ "success_rate": result.success_rate,
795
+ "execution_time_ms": result.execution_time_ms,
796
+ "errors": result.errors[:10] if result.errors else [],
797
+ }
798
+
799
+ async def _upsert_postgresql(
800
+ self, adapter, records: List[Dict], result: BulkOperationResult
801
+ ):
802
+ """PostgreSQL UPSERT using INSERT ... ON CONFLICT."""
803
+ all_columns = list(records[0].keys()) if records else []
804
+
805
+ for chunk in self.chunk_records(records):
806
+ try:
807
+ # Build INSERT ... ON CONFLICT query
808
+ placeholders = []
809
+ values = []
810
+ for i, record in enumerate(chunk):
811
+ row_placeholders = []
812
+ for col in all_columns:
813
+ param_num = i * len(all_columns) + all_columns.index(col) + 1
814
+ row_placeholders.append(f"${param_num}")
815
+ values.append(record.get(col))
816
+ placeholders.append(f"({', '.join(row_placeholders)})")
817
+
818
+ # Build update clause
819
+ update_clauses = []
820
+ for col in self.update_columns:
821
+ update_clauses.append(f"{col} = EXCLUDED.{col}")
822
+
823
+ query = f"""
824
+ INSERT INTO {self.table_name} ({', '.join(all_columns)})
825
+ VALUES {', '.join(placeholders)}
826
+ ON CONFLICT ({', '.join(self.conflict_columns)})
827
+ DO UPDATE SET {', '.join(update_clauses)}
828
+ """
829
+
830
+ await adapter.execute(query, *values)
831
+ result.successful_records += len(chunk)
832
+
833
+ # Report progress
834
+ await self.report_progress_async(
835
+ result.successful_records, result.total_records, "upsert"
836
+ )
837
+
838
+ except Exception as e:
839
+ if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
840
+ raise
841
+ result.failed_records += len(chunk)
842
+ result.errors.append(
843
+ {
844
+ "chunk_start": result.successful_records
845
+ + result.failed_records
846
+ - len(chunk),
847
+ "chunk_size": len(chunk),
848
+ "error": str(e),
849
+ }
850
+ )
851
+
852
+ async def _upsert_mysql(
853
+ self, adapter, records: List[Dict], result: BulkOperationResult
854
+ ):
855
+ """MySQL UPSERT using INSERT ... ON DUPLICATE KEY UPDATE."""
856
+ all_columns = list(records[0].keys()) if records else []
857
+
858
+ for chunk in self.chunk_records(records):
859
+ try:
860
+ # Build INSERT ... ON DUPLICATE KEY UPDATE query
861
+ placeholders = []
862
+ values = []
863
+ for record in chunk:
864
+ row_placeholders = []
865
+ for col in all_columns:
866
+ row_placeholders.append("%s")
867
+ values.append(record.get(col))
868
+ placeholders.append(f"({', '.join(row_placeholders)})")
869
+
870
+ # Build update clause
871
+ update_clauses = []
872
+ for col in self.update_columns:
873
+ update_clauses.append(f"{col} = VALUES({col})")
874
+
875
+ query = f"""
876
+ INSERT INTO {self.table_name} ({', '.join(all_columns)})
877
+ VALUES {', '.join(placeholders)}
878
+ ON DUPLICATE KEY UPDATE {', '.join(update_clauses)}
879
+ """
880
+
881
+ await adapter.execute(query, *values)
882
+ result.successful_records += len(chunk)
883
+
884
+ # Report progress
885
+ await self.report_progress_async(
886
+ result.successful_records, result.total_records, "upsert"
887
+ )
888
+
889
+ except Exception as e:
890
+ if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
891
+ raise
892
+ result.failed_records += len(chunk)
893
+ result.errors.append(
894
+ {
895
+ "chunk_start": result.successful_records
896
+ + result.failed_records
897
+ - len(chunk),
898
+ "chunk_size": len(chunk),
899
+ "error": str(e),
900
+ }
901
+ )
902
+
903
+ async def _upsert_sqlite(
904
+ self, adapter, records: List[Dict], result: BulkOperationResult
905
+ ):
906
+ """SQLite UPSERT using INSERT OR REPLACE."""
907
+ all_columns = list(records[0].keys()) if records else []
908
+
909
+ for record in records:
910
+ try:
911
+ placeholders = ", ".join(["?" for _ in all_columns])
912
+ values = [record.get(col) for col in all_columns]
913
+
914
+ query = f"""
915
+ INSERT OR REPLACE INTO {self.table_name} ({', '.join(all_columns)})
916
+ VALUES ({placeholders})
917
+ """
918
+
919
+ await adapter.execute(query, *values)
920
+ result.successful_records += 1
921
+
922
+ # Report progress
923
+ if result.successful_records % self.progress_interval == 0:
924
+ await self.report_progress_async(
925
+ result.successful_records, result.total_records, "upsert"
926
+ )
927
+
928
+ except Exception as e:
929
+ if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
930
+ raise
931
+ result.failed_records += 1
932
+ result.errors.append(
933
+ {
934
+ "record_index": result.successful_records
935
+ + result.failed_records
936
+ - 1,
937
+ "error": str(e),
938
+ }
939
+ )