kailash 0.6.6__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +35 -5
- kailash/access_control.py +64 -46
- kailash/adapters/__init__.py +5 -0
- kailash/adapters/mcp_platform_adapter.py +273 -0
- kailash/api/workflow_api.py +34 -3
- kailash/channels/__init__.py +21 -0
- kailash/channels/api_channel.py +409 -0
- kailash/channels/base.py +271 -0
- kailash/channels/cli_channel.py +661 -0
- kailash/channels/event_router.py +496 -0
- kailash/channels/mcp_channel.py +648 -0
- kailash/channels/session.py +423 -0
- kailash/mcp_server/discovery.py +57 -18
- kailash/middleware/communication/api_gateway.py +23 -3
- kailash/middleware/communication/realtime.py +83 -0
- kailash/middleware/core/agent_ui.py +1 -1
- kailash/middleware/gateway/storage_backends.py +393 -0
- kailash/middleware/mcp/enhanced_server.py +22 -16
- kailash/nexus/__init__.py +21 -0
- kailash/nexus/cli/__init__.py +5 -0
- kailash/nexus/cli/__main__.py +6 -0
- kailash/nexus/cli/main.py +176 -0
- kailash/nexus/factory.py +413 -0
- kailash/nexus/gateway.py +545 -0
- kailash/nodes/__init__.py +8 -5
- kailash/nodes/ai/iterative_llm_agent.py +988 -17
- kailash/nodes/ai/llm_agent.py +29 -9
- kailash/nodes/api/__init__.py +2 -2
- kailash/nodes/api/monitoring.py +1 -1
- kailash/nodes/base.py +29 -5
- kailash/nodes/base_async.py +54 -14
- kailash/nodes/code/async_python.py +1 -1
- kailash/nodes/code/python.py +50 -6
- kailash/nodes/data/async_sql.py +90 -0
- kailash/nodes/data/bulk_operations.py +939 -0
- kailash/nodes/data/query_builder.py +373 -0
- kailash/nodes/data/query_cache.py +512 -0
- kailash/nodes/monitoring/__init__.py +10 -0
- kailash/nodes/monitoring/deadlock_detector.py +964 -0
- kailash/nodes/monitoring/performance_anomaly.py +1078 -0
- kailash/nodes/monitoring/race_condition_detector.py +1151 -0
- kailash/nodes/monitoring/transaction_metrics.py +790 -0
- kailash/nodes/monitoring/transaction_monitor.py +931 -0
- kailash/nodes/security/behavior_analysis.py +414 -0
- kailash/nodes/system/__init__.py +17 -0
- kailash/nodes/system/command_parser.py +820 -0
- kailash/nodes/transaction/__init__.py +48 -0
- kailash/nodes/transaction/distributed_transaction_manager.py +983 -0
- kailash/nodes/transaction/saga_coordinator.py +652 -0
- kailash/nodes/transaction/saga_state_storage.py +411 -0
- kailash/nodes/transaction/saga_step.py +467 -0
- kailash/nodes/transaction/transaction_context.py +756 -0
- kailash/nodes/transaction/two_phase_commit.py +978 -0
- kailash/nodes/transform/processors.py +17 -1
- kailash/nodes/validation/__init__.py +21 -0
- kailash/nodes/validation/test_executor.py +532 -0
- kailash/nodes/validation/validation_nodes.py +447 -0
- kailash/resources/factory.py +1 -1
- kailash/runtime/access_controlled.py +9 -7
- kailash/runtime/async_local.py +84 -21
- kailash/runtime/local.py +21 -2
- kailash/runtime/parameter_injector.py +187 -31
- kailash/runtime/runner.py +6 -4
- kailash/runtime/testing.py +1 -1
- kailash/security.py +22 -3
- kailash/servers/__init__.py +32 -0
- kailash/servers/durable_workflow_server.py +430 -0
- kailash/servers/enterprise_workflow_server.py +522 -0
- kailash/servers/gateway.py +183 -0
- kailash/servers/workflow_server.py +293 -0
- kailash/utils/data_validation.py +192 -0
- kailash/workflow/builder.py +382 -15
- kailash/workflow/cyclic_runner.py +102 -10
- kailash/workflow/validation.py +144 -8
- kailash/workflow/visualization.py +99 -27
- {kailash-0.6.6.dist-info → kailash-0.8.0.dist-info}/METADATA +3 -2
- {kailash-0.6.6.dist-info → kailash-0.8.0.dist-info}/RECORD +81 -40
- kailash/workflow/builder_improvements.py +0 -207
- {kailash-0.6.6.dist-info → kailash-0.8.0.dist-info}/WHEEL +0 -0
- {kailash-0.6.6.dist-info → kailash-0.8.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.6.6.dist-info → kailash-0.8.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.6.6.dist-info → kailash-0.8.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,939 @@
|
|
1
|
+
"""Bulk operations support for database nodes.
|
2
|
+
|
3
|
+
This module provides bulk CRUD operations for efficient data processing
|
4
|
+
in Kailash workflows. It extends the async SQL database capabilities
|
5
|
+
with optimized bulk operations for different databases.
|
6
|
+
|
7
|
+
Key Features:
|
8
|
+
- Database-specific bulk optimizations
|
9
|
+
- Chunking for large datasets
|
10
|
+
- Progress tracking and reporting
|
11
|
+
- Configurable error handling strategies
|
12
|
+
- Type validation for bulk data
|
13
|
+
"""
|
14
|
+
|
15
|
+
import asyncio
|
16
|
+
import json
|
17
|
+
import logging
|
18
|
+
from abc import ABC, abstractmethod
|
19
|
+
from dataclasses import dataclass, field
|
20
|
+
from datetime import datetime
|
21
|
+
from enum import Enum
|
22
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
23
|
+
|
24
|
+
from kailash.nodes.base import NodeParameter, register_node
|
25
|
+
from kailash.nodes.data.async_sql import AsyncSQLDatabaseNode, DatabaseType
|
26
|
+
from kailash.sdk_exceptions import NodeExecutionError, NodeValidationError
|
27
|
+
|
28
|
+
# Import List and Dict types if not already present
|
29
|
+
if "List" not in globals():
|
30
|
+
from typing import List
|
31
|
+
if "Dict" not in globals():
|
32
|
+
from typing import Dict
|
33
|
+
|
34
|
+
logger = logging.getLogger(__name__)
|
35
|
+
|
36
|
+
|
37
|
+
class BulkErrorStrategy(Enum):
|
38
|
+
"""Error handling strategies for bulk operations."""
|
39
|
+
|
40
|
+
FAIL_FAST = "fail_fast" # Stop on first error
|
41
|
+
CONTINUE = "continue" # Continue processing, collect errors
|
42
|
+
ROLLBACK = "rollback" # Rollback entire operation on any error
|
43
|
+
|
44
|
+
|
45
|
+
@dataclass
|
46
|
+
class BulkOperationResult:
|
47
|
+
"""Result of a bulk operation."""
|
48
|
+
|
49
|
+
total_records: int
|
50
|
+
successful_records: int
|
51
|
+
failed_records: int
|
52
|
+
errors: List[Dict[str, Any]] = field(default_factory=list)
|
53
|
+
execution_time_ms: float = 0.0
|
54
|
+
|
55
|
+
@property
|
56
|
+
def success_rate(self) -> float:
|
57
|
+
"""Calculate success rate as percentage."""
|
58
|
+
if self.total_records == 0:
|
59
|
+
return 0.0
|
60
|
+
return (self.successful_records / self.total_records) * 100
|
61
|
+
|
62
|
+
|
63
|
+
class BulkOperationMixin:
|
64
|
+
"""Mixin for bulk operations support."""
|
65
|
+
|
66
|
+
def setup_bulk_operations(self, config: Dict[str, Any]):
|
67
|
+
"""Setup bulk operation configuration."""
|
68
|
+
self.chunk_size: int = config.get("chunk_size", 1000)
|
69
|
+
self.error_strategy: BulkErrorStrategy = BulkErrorStrategy(
|
70
|
+
config.get("error_strategy", "fail_fast")
|
71
|
+
)
|
72
|
+
self.report_progress: bool = config.get("report_progress", True)
|
73
|
+
self.progress_interval: int = config.get("progress_interval", 100)
|
74
|
+
|
75
|
+
def validate_bulk_data(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
76
|
+
"""Validate records before bulk operation.
|
77
|
+
|
78
|
+
Args:
|
79
|
+
records: List of records to validate
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
Validated records
|
83
|
+
|
84
|
+
Raises:
|
85
|
+
NodeValidationError: If validation fails
|
86
|
+
"""
|
87
|
+
if not records:
|
88
|
+
raise NodeValidationError("No records provided for bulk operation")
|
89
|
+
|
90
|
+
if not isinstance(records, list):
|
91
|
+
raise NodeValidationError("Records must be a list")
|
92
|
+
|
93
|
+
# Validate each record is a dictionary
|
94
|
+
for i, record in enumerate(records):
|
95
|
+
if not isinstance(record, dict):
|
96
|
+
raise NodeValidationError(f"Record at index {i} must be a dictionary")
|
97
|
+
|
98
|
+
return records
|
99
|
+
|
100
|
+
def chunk_records(
|
101
|
+
self, records: List[Dict[str, Any]], chunk_size: Optional[int] = None
|
102
|
+
) -> Iterator[List[Dict[str, Any]]]:
|
103
|
+
"""Chunk large datasets for processing.
|
104
|
+
|
105
|
+
Args:
|
106
|
+
records: List of records to chunk
|
107
|
+
chunk_size: Size of each chunk (defaults to self.chunk_size)
|
108
|
+
|
109
|
+
Yields:
|
110
|
+
Chunks of records
|
111
|
+
"""
|
112
|
+
size = chunk_size or self.chunk_size
|
113
|
+
for i in range(0, len(records), size):
|
114
|
+
yield records[i : i + size]
|
115
|
+
|
116
|
+
async def report_progress_async(self, current: int, total: int, operation: str):
|
117
|
+
"""Report progress of bulk operation.
|
118
|
+
|
119
|
+
Args:
|
120
|
+
current: Current record number
|
121
|
+
total: Total number of records
|
122
|
+
operation: Operation being performed
|
123
|
+
"""
|
124
|
+
if self.report_progress and current % self.progress_interval == 0:
|
125
|
+
percentage = (current / total) * 100 if total > 0 else 0
|
126
|
+
logger.info(
|
127
|
+
f"Bulk {operation} progress: {current}/{total} ({percentage:.1f}%)"
|
128
|
+
)
|
129
|
+
|
130
|
+
|
131
|
+
@register_node()
|
132
|
+
class BulkCreateNode(AsyncSQLDatabaseNode, BulkOperationMixin):
|
133
|
+
"""Bulk insert operations with database-specific optimizations."""
|
134
|
+
|
135
|
+
def __init__(self, **config):
|
136
|
+
"""Initialize bulk create node."""
|
137
|
+
# Initialize parent class
|
138
|
+
super().__init__(**config)
|
139
|
+
|
140
|
+
# Setup bulk operations
|
141
|
+
self.setup_bulk_operations(config)
|
142
|
+
|
143
|
+
# Table and columns configuration
|
144
|
+
self.table_name = config.get("table_name")
|
145
|
+
self.columns = config.get("columns", [])
|
146
|
+
self.returning_columns = config.get("returning_columns", ["id"])
|
147
|
+
|
148
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
149
|
+
"""Get node parameters."""
|
150
|
+
params = super().get_parameters().copy()
|
151
|
+
bulk_params = {
|
152
|
+
"records": NodeParameter(
|
153
|
+
name="records",
|
154
|
+
type=list,
|
155
|
+
description="List of records to insert",
|
156
|
+
required=True,
|
157
|
+
),
|
158
|
+
"table_name": NodeParameter(
|
159
|
+
name="table_name",
|
160
|
+
type=str,
|
161
|
+
description="Target table name",
|
162
|
+
required=True,
|
163
|
+
),
|
164
|
+
"columns": NodeParameter(
|
165
|
+
name="columns",
|
166
|
+
type=list,
|
167
|
+
description="Column names (auto-detected if not provided)",
|
168
|
+
required=False,
|
169
|
+
),
|
170
|
+
"chunk_size": NodeParameter(
|
171
|
+
name="chunk_size",
|
172
|
+
type=int,
|
173
|
+
description="Number of records per chunk",
|
174
|
+
required=False,
|
175
|
+
default_value=1000,
|
176
|
+
),
|
177
|
+
"error_strategy": NodeParameter(
|
178
|
+
name="error_strategy",
|
179
|
+
type=str,
|
180
|
+
description="Error handling strategy: fail_fast, continue, rollback",
|
181
|
+
required=False,
|
182
|
+
default_value="fail_fast",
|
183
|
+
),
|
184
|
+
"returning_columns": NodeParameter(
|
185
|
+
name="returning_columns",
|
186
|
+
type=list,
|
187
|
+
description="Columns to return after insert",
|
188
|
+
required=False,
|
189
|
+
default_value=["id"],
|
190
|
+
),
|
191
|
+
}
|
192
|
+
params.update(bulk_params)
|
193
|
+
# Remove query requirement for bulk operations
|
194
|
+
if "query" in params:
|
195
|
+
params["query"].required = False
|
196
|
+
return params
|
197
|
+
|
198
|
+
async def async_run(self, **kwargs) -> Dict[str, Any]:
|
199
|
+
"""Execute bulk insert operation."""
|
200
|
+
start_time = datetime.now()
|
201
|
+
|
202
|
+
# Get records from parameters
|
203
|
+
records = kwargs.get("records", [])
|
204
|
+
records = self.validate_bulk_data(records)
|
205
|
+
|
206
|
+
# Auto-detect columns if not provided
|
207
|
+
if not self.columns and records:
|
208
|
+
self.columns = list(records[0].keys())
|
209
|
+
|
210
|
+
# Use table name from kwargs if provided
|
211
|
+
table_name = kwargs.get("table_name", self.table_name)
|
212
|
+
if not table_name:
|
213
|
+
raise NodeValidationError("table_name is required")
|
214
|
+
self.table_name = table_name
|
215
|
+
|
216
|
+
# Get adapter
|
217
|
+
adapter = await self._get_adapter()
|
218
|
+
|
219
|
+
# Determine database type for optimization
|
220
|
+
db_type = DatabaseType(self.config.get("database_type", "postgresql").lower())
|
221
|
+
|
222
|
+
result = BulkOperationResult(
|
223
|
+
total_records=len(records), successful_records=0, failed_records=0
|
224
|
+
)
|
225
|
+
|
226
|
+
try:
|
227
|
+
if db_type == DatabaseType.POSTGRESQL:
|
228
|
+
await self._bulk_insert_postgresql(adapter, records, result)
|
229
|
+
elif db_type == DatabaseType.MYSQL:
|
230
|
+
await self._bulk_insert_mysql(adapter, records, result)
|
231
|
+
else:
|
232
|
+
await self._bulk_insert_generic(adapter, records, result)
|
233
|
+
|
234
|
+
except Exception as e:
|
235
|
+
if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
|
236
|
+
raise NodeExecutionError(f"Bulk insert failed: {str(e)}")
|
237
|
+
else:
|
238
|
+
result.errors.append({"error": str(e), "type": "general_error"})
|
239
|
+
|
240
|
+
# Calculate execution time
|
241
|
+
result.execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
242
|
+
|
243
|
+
return {
|
244
|
+
"status": "success" if result.failed_records == 0 else "partial_success",
|
245
|
+
"total_records": result.total_records,
|
246
|
+
"successful_records": result.successful_records,
|
247
|
+
"failed_records": result.failed_records,
|
248
|
+
"success_rate": result.success_rate,
|
249
|
+
"execution_time_ms": result.execution_time_ms,
|
250
|
+
"errors": (
|
251
|
+
result.errors[:10] if result.errors else []
|
252
|
+
), # Limit errors returned
|
253
|
+
}
|
254
|
+
|
255
|
+
async def _bulk_insert_postgresql(
|
256
|
+
self, adapter, records: List[Dict], result: BulkOperationResult
|
257
|
+
):
|
258
|
+
"""PostgreSQL-optimized bulk insert using COPY."""
|
259
|
+
# For very large datasets, use COPY command
|
260
|
+
if len(records) > 10000:
|
261
|
+
# TODO: Implement COPY FROM for maximum performance
|
262
|
+
# For now, fall back to multi-row INSERT
|
263
|
+
pass
|
264
|
+
|
265
|
+
# Use multi-row INSERT with RETURNING
|
266
|
+
for chunk in self.chunk_records(records):
|
267
|
+
try:
|
268
|
+
# Build multi-row INSERT query
|
269
|
+
placeholders = []
|
270
|
+
values = []
|
271
|
+
for i, record in enumerate(chunk):
|
272
|
+
row_placeholders = []
|
273
|
+
for col in self.columns:
|
274
|
+
param_num = i * len(self.columns) + self.columns.index(col) + 1
|
275
|
+
row_placeholders.append(f"${param_num}")
|
276
|
+
values.append(record.get(col))
|
277
|
+
placeholders.append(f"({', '.join(row_placeholders)})")
|
278
|
+
|
279
|
+
query = f"""
|
280
|
+
INSERT INTO {self.table_name} ({', '.join(self.columns)})
|
281
|
+
VALUES {', '.join(placeholders)}
|
282
|
+
RETURNING {', '.join(self.returning_columns)}
|
283
|
+
"""
|
284
|
+
|
285
|
+
rows = await adapter.fetch_all(query, *values)
|
286
|
+
result.successful_records += len(chunk)
|
287
|
+
|
288
|
+
# Report progress
|
289
|
+
await self.report_progress_async(
|
290
|
+
result.successful_records, result.total_records, "insert"
|
291
|
+
)
|
292
|
+
|
293
|
+
except Exception as e:
|
294
|
+
if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
|
295
|
+
raise
|
296
|
+
result.failed_records += len(chunk)
|
297
|
+
result.errors.append(
|
298
|
+
{
|
299
|
+
"chunk_start": result.successful_records
|
300
|
+
+ result.failed_records
|
301
|
+
- len(chunk),
|
302
|
+
"chunk_size": len(chunk),
|
303
|
+
"error": str(e),
|
304
|
+
}
|
305
|
+
)
|
306
|
+
|
307
|
+
async def _bulk_insert_mysql(
|
308
|
+
self, adapter, records: List[Dict], result: BulkOperationResult
|
309
|
+
):
|
310
|
+
"""MySQL-optimized bulk insert."""
|
311
|
+
# MySQL supports multi-row INSERT efficiently
|
312
|
+
for chunk in self.chunk_records(records):
|
313
|
+
try:
|
314
|
+
# Build multi-row INSERT query
|
315
|
+
placeholders = []
|
316
|
+
values = []
|
317
|
+
for record in chunk:
|
318
|
+
row_placeholders = []
|
319
|
+
for col in self.columns:
|
320
|
+
row_placeholders.append("%s")
|
321
|
+
values.append(record.get(col))
|
322
|
+
placeholders.append(f"({', '.join(row_placeholders)})")
|
323
|
+
|
324
|
+
query = f"""
|
325
|
+
INSERT INTO {self.table_name} ({', '.join(self.columns)})
|
326
|
+
VALUES {', '.join(placeholders)}
|
327
|
+
"""
|
328
|
+
|
329
|
+
await adapter.execute(query, *values)
|
330
|
+
result.successful_records += len(chunk)
|
331
|
+
|
332
|
+
# Report progress
|
333
|
+
await self.report_progress_async(
|
334
|
+
result.successful_records, result.total_records, "insert"
|
335
|
+
)
|
336
|
+
|
337
|
+
except Exception as e:
|
338
|
+
if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
|
339
|
+
raise
|
340
|
+
result.failed_records += len(chunk)
|
341
|
+
result.errors.append(
|
342
|
+
{
|
343
|
+
"chunk_start": result.successful_records
|
344
|
+
+ result.failed_records
|
345
|
+
- len(chunk),
|
346
|
+
"chunk_size": len(chunk),
|
347
|
+
"error": str(e),
|
348
|
+
}
|
349
|
+
)
|
350
|
+
|
351
|
+
async def _bulk_insert_generic(
|
352
|
+
self, adapter, records: List[Dict], result: BulkOperationResult
|
353
|
+
):
|
354
|
+
"""Generic bulk insert for other databases."""
|
355
|
+
# Fall back to individual inserts for SQLite and others
|
356
|
+
for i, record in enumerate(records):
|
357
|
+
try:
|
358
|
+
placeholders = ", ".join(["?" for _ in self.columns])
|
359
|
+
values = [record.get(col) for col in self.columns]
|
360
|
+
|
361
|
+
query = f"""
|
362
|
+
INSERT INTO {self.table_name} ({', '.join(self.columns)})
|
363
|
+
VALUES ({placeholders})
|
364
|
+
"""
|
365
|
+
|
366
|
+
await adapter.execute(query, *values)
|
367
|
+
result.successful_records += 1
|
368
|
+
|
369
|
+
# Report progress
|
370
|
+
if (i + 1) % self.progress_interval == 0:
|
371
|
+
await self.report_progress_async(
|
372
|
+
i + 1, result.total_records, "insert"
|
373
|
+
)
|
374
|
+
|
375
|
+
except Exception as e:
|
376
|
+
if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
|
377
|
+
raise
|
378
|
+
result.failed_records += 1
|
379
|
+
result.errors.append({"record_index": i, "error": str(e)})
|
380
|
+
|
381
|
+
|
382
|
+
@register_node()
|
383
|
+
class BulkUpdateNode(AsyncSQLDatabaseNode, BulkOperationMixin):
|
384
|
+
"""Bulk update operations with efficient strategies."""
|
385
|
+
|
386
|
+
def __init__(self, **config):
|
387
|
+
"""Initialize bulk update node."""
|
388
|
+
super().__init__(**config)
|
389
|
+
|
390
|
+
# Setup bulk operations
|
391
|
+
self.setup_bulk_operations(config)
|
392
|
+
|
393
|
+
# Configuration
|
394
|
+
self.table_name = config.get("table_name")
|
395
|
+
self.update_strategy = config.get(
|
396
|
+
"update_strategy", "case"
|
397
|
+
) # case, temp_table, individual
|
398
|
+
|
399
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
400
|
+
"""Get node parameters."""
|
401
|
+
params = super().get_parameters().copy()
|
402
|
+
bulk_params = {
|
403
|
+
"table_name": NodeParameter(
|
404
|
+
name="table_name",
|
405
|
+
type=str,
|
406
|
+
description="Target table name",
|
407
|
+
required=True,
|
408
|
+
),
|
409
|
+
"filter": NodeParameter(
|
410
|
+
name="filter",
|
411
|
+
type=dict,
|
412
|
+
description="Filter conditions for records to update",
|
413
|
+
required=False,
|
414
|
+
),
|
415
|
+
"updates": NodeParameter(
|
416
|
+
name="updates",
|
417
|
+
type=dict,
|
418
|
+
description="Update values or expressions",
|
419
|
+
required=True,
|
420
|
+
),
|
421
|
+
"update_strategy": NodeParameter(
|
422
|
+
name="update_strategy",
|
423
|
+
type=str,
|
424
|
+
description="Update strategy: case, temp_table, individual",
|
425
|
+
required=False,
|
426
|
+
default_value="case",
|
427
|
+
),
|
428
|
+
}
|
429
|
+
params.update(bulk_params)
|
430
|
+
# Remove query requirement for bulk operations
|
431
|
+
if "query" in params:
|
432
|
+
params["query"].required = False
|
433
|
+
return params
|
434
|
+
|
435
|
+
async def async_run(self, **kwargs) -> Dict[str, Any]:
|
436
|
+
"""Execute bulk update operation."""
|
437
|
+
start_time = datetime.now()
|
438
|
+
|
439
|
+
# Get parameters
|
440
|
+
table_name = kwargs.get("table_name", self.table_name)
|
441
|
+
filter_conditions = kwargs.get("filter", {})
|
442
|
+
updates = kwargs.get("updates", {})
|
443
|
+
|
444
|
+
if not updates:
|
445
|
+
raise NodeValidationError("No update values provided")
|
446
|
+
|
447
|
+
# Get adapter
|
448
|
+
adapter = await self._get_adapter()
|
449
|
+
|
450
|
+
result = BulkOperationResult(
|
451
|
+
total_records=0, successful_records=0, failed_records=0
|
452
|
+
)
|
453
|
+
|
454
|
+
try:
|
455
|
+
# Build and execute update query
|
456
|
+
query, params = self._build_update_query(
|
457
|
+
table_name, filter_conditions, updates
|
458
|
+
)
|
459
|
+
|
460
|
+
# Execute update
|
461
|
+
affected_rows = await adapter.execute(query, *params)
|
462
|
+
result.successful_records = affected_rows
|
463
|
+
result.total_records = affected_rows
|
464
|
+
|
465
|
+
except Exception as e:
|
466
|
+
if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
|
467
|
+
raise NodeExecutionError(f"Bulk update failed: {str(e)}")
|
468
|
+
result.errors.append({"error": str(e), "type": "update_error"})
|
469
|
+
|
470
|
+
# Calculate execution time
|
471
|
+
result.execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
472
|
+
|
473
|
+
return {
|
474
|
+
"status": "success" if result.failed_records == 0 else "failed",
|
475
|
+
"updated_count": result.successful_records,
|
476
|
+
"execution_time_ms": result.execution_time_ms,
|
477
|
+
"errors": result.errors,
|
478
|
+
}
|
479
|
+
|
480
|
+
def _build_update_query(
|
481
|
+
self, table_name: str, filter_conditions: Dict, updates: Dict
|
482
|
+
) -> Tuple[str, List]:
|
483
|
+
"""Build UPDATE query with parameters."""
|
484
|
+
# Build SET clause
|
485
|
+
set_clauses = []
|
486
|
+
params = []
|
487
|
+
param_count = 1
|
488
|
+
|
489
|
+
for column, value in updates.items():
|
490
|
+
if isinstance(value, str) and any(
|
491
|
+
op in value for op in ["+", "-", "*", "/"]
|
492
|
+
):
|
493
|
+
# Expression (e.g., "stock - 1")
|
494
|
+
set_clauses.append(f"{column} = {value}")
|
495
|
+
else:
|
496
|
+
# Direct value
|
497
|
+
set_clauses.append(f"{column} = ${param_count}")
|
498
|
+
params.append(value)
|
499
|
+
param_count += 1
|
500
|
+
|
501
|
+
# Build WHERE clause from filter
|
502
|
+
where_clauses = []
|
503
|
+
for column, condition in filter_conditions.items():
|
504
|
+
if isinstance(condition, dict):
|
505
|
+
# Complex condition (e.g., {"$gte": 100})
|
506
|
+
for op, value in condition.items():
|
507
|
+
sql_op = self._get_sql_operator(op)
|
508
|
+
where_clauses.append(f"{column} {sql_op} ${param_count}")
|
509
|
+
params.append(value)
|
510
|
+
param_count += 1
|
511
|
+
else:
|
512
|
+
# Simple equality
|
513
|
+
where_clauses.append(f"{column} = ${param_count}")
|
514
|
+
params.append(condition)
|
515
|
+
param_count += 1
|
516
|
+
|
517
|
+
# Build final query
|
518
|
+
query = f"UPDATE {table_name} SET {', '.join(set_clauses)}"
|
519
|
+
if where_clauses:
|
520
|
+
query += f" WHERE {' AND '.join(where_clauses)}"
|
521
|
+
|
522
|
+
return query, params
|
523
|
+
|
524
|
+
def _get_sql_operator(self, mongo_op: str) -> str:
|
525
|
+
"""Convert MongoDB-style operator to SQL."""
|
526
|
+
operator_map = {
|
527
|
+
"$eq": "=",
|
528
|
+
"$ne": "!=",
|
529
|
+
"$lt": "<",
|
530
|
+
"$lte": "<=",
|
531
|
+
"$gt": ">",
|
532
|
+
"$gte": ">=",
|
533
|
+
"$in": "IN",
|
534
|
+
"$nin": "NOT IN",
|
535
|
+
}
|
536
|
+
return operator_map.get(mongo_op, "=")
|
537
|
+
|
538
|
+
|
539
|
+
@register_node()
|
540
|
+
class BulkDeleteNode(AsyncSQLDatabaseNode, BulkOperationMixin):
|
541
|
+
"""Bulk delete operations with safety checks."""
|
542
|
+
|
543
|
+
def __init__(self, **config):
|
544
|
+
"""Initialize bulk delete node."""
|
545
|
+
super().__init__(**config)
|
546
|
+
|
547
|
+
# Setup bulk operations
|
548
|
+
self.setup_bulk_operations(config)
|
549
|
+
|
550
|
+
# Configuration
|
551
|
+
self.table_name = config.get("table_name")
|
552
|
+
self.soft_delete = config.get("soft_delete", False)
|
553
|
+
self.require_filter = config.get("require_filter", True)
|
554
|
+
|
555
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
556
|
+
"""Get node parameters."""
|
557
|
+
params = super().get_parameters().copy()
|
558
|
+
bulk_params = {
|
559
|
+
"table_name": NodeParameter(
|
560
|
+
name="table_name",
|
561
|
+
type=str,
|
562
|
+
description="Target table name",
|
563
|
+
required=True,
|
564
|
+
),
|
565
|
+
"filter": NodeParameter(
|
566
|
+
name="filter",
|
567
|
+
type=dict,
|
568
|
+
description="Filter conditions for records to delete",
|
569
|
+
required=False,
|
570
|
+
),
|
571
|
+
"soft_delete": NodeParameter(
|
572
|
+
name="soft_delete",
|
573
|
+
type=bool,
|
574
|
+
description="Use soft delete (set deleted_at)",
|
575
|
+
required=False,
|
576
|
+
default_value=False,
|
577
|
+
),
|
578
|
+
"require_filter": NodeParameter(
|
579
|
+
name="require_filter",
|
580
|
+
type=bool,
|
581
|
+
description="Require filter to prevent accidental full table deletion",
|
582
|
+
required=False,
|
583
|
+
default_value=True,
|
584
|
+
),
|
585
|
+
}
|
586
|
+
params.update(bulk_params)
|
587
|
+
# Remove query requirement for bulk operations
|
588
|
+
if "query" in params:
|
589
|
+
params["query"].required = False
|
590
|
+
return params
|
591
|
+
|
592
|
+
async def async_run(self, **kwargs) -> Dict[str, Any]:
|
593
|
+
"""Execute bulk delete operation."""
|
594
|
+
start_time = datetime.now()
|
595
|
+
|
596
|
+
# Get parameters
|
597
|
+
table_name = kwargs.get("table_name", self.table_name)
|
598
|
+
filter_conditions = kwargs.get("filter", {})
|
599
|
+
|
600
|
+
# Safety check
|
601
|
+
if self.require_filter and not filter_conditions:
|
602
|
+
raise NodeValidationError(
|
603
|
+
"Filter required for bulk delete. Set require_filter=False to delete all records."
|
604
|
+
)
|
605
|
+
|
606
|
+
# Get adapter
|
607
|
+
adapter = await self._get_adapter()
|
608
|
+
|
609
|
+
result = BulkOperationResult(
|
610
|
+
total_records=0, successful_records=0, failed_records=0
|
611
|
+
)
|
612
|
+
|
613
|
+
try:
|
614
|
+
if self.soft_delete:
|
615
|
+
# Update with deleted_at timestamp
|
616
|
+
query = f"UPDATE {table_name} SET deleted_at = CURRENT_TIMESTAMP"
|
617
|
+
else:
|
618
|
+
# Hard delete
|
619
|
+
query = f"DELETE FROM {table_name}"
|
620
|
+
|
621
|
+
# Add WHERE clause
|
622
|
+
params = []
|
623
|
+
if filter_conditions:
|
624
|
+
where_clause, params = self._build_where_clause(filter_conditions)
|
625
|
+
query += f" WHERE {where_clause}"
|
626
|
+
|
627
|
+
# Execute delete
|
628
|
+
affected_rows = await adapter.execute(query, *params)
|
629
|
+
result.successful_records = affected_rows
|
630
|
+
result.total_records = affected_rows
|
631
|
+
|
632
|
+
except Exception as e:
|
633
|
+
if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
|
634
|
+
raise NodeExecutionError(f"Bulk delete failed: {str(e)}")
|
635
|
+
result.errors.append({"error": str(e), "type": "delete_error"})
|
636
|
+
|
637
|
+
# Calculate execution time
|
638
|
+
result.execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
639
|
+
|
640
|
+
return {
|
641
|
+
"status": "success" if result.failed_records == 0 else "failed",
|
642
|
+
"deleted_count": result.successful_records,
|
643
|
+
"soft_delete": self.soft_delete,
|
644
|
+
"execution_time_ms": result.execution_time_ms,
|
645
|
+
"errors": result.errors,
|
646
|
+
}
|
647
|
+
|
648
|
+
def _build_where_clause(self, filter_conditions: Dict) -> Tuple[str, List]:
|
649
|
+
"""Build WHERE clause from filter conditions."""
|
650
|
+
where_clauses = []
|
651
|
+
params = []
|
652
|
+
param_count = 1
|
653
|
+
|
654
|
+
for column, condition in filter_conditions.items():
|
655
|
+
if isinstance(condition, dict):
|
656
|
+
# Complex condition
|
657
|
+
for op, value in condition.items():
|
658
|
+
sql_op = self._get_sql_operator(op)
|
659
|
+
if op in ["$in", "$nin"]:
|
660
|
+
placeholders = ", ".join(
|
661
|
+
[
|
662
|
+
f"${i}"
|
663
|
+
for i in range(param_count, param_count + len(value))
|
664
|
+
]
|
665
|
+
)
|
666
|
+
where_clauses.append(f"{column} {sql_op} ({placeholders})")
|
667
|
+
params.extend(value)
|
668
|
+
param_count += len(value)
|
669
|
+
else:
|
670
|
+
where_clauses.append(f"{column} {sql_op} ${param_count}")
|
671
|
+
params.append(value)
|
672
|
+
param_count += 1
|
673
|
+
else:
|
674
|
+
# Simple equality
|
675
|
+
where_clauses.append(f"{column} = ${param_count}")
|
676
|
+
params.append(condition)
|
677
|
+
param_count += 1
|
678
|
+
|
679
|
+
return " AND ".join(where_clauses), params
|
680
|
+
|
681
|
+
def _get_sql_operator(self, mongo_op: str) -> str:
|
682
|
+
"""Convert MongoDB-style operator to SQL."""
|
683
|
+
operator_map = {
|
684
|
+
"$eq": "=",
|
685
|
+
"$ne": "!=",
|
686
|
+
"$lt": "<",
|
687
|
+
"$lte": "<=",
|
688
|
+
"$gt": ">",
|
689
|
+
"$gte": ">=",
|
690
|
+
"$in": "IN",
|
691
|
+
"$nin": "NOT IN",
|
692
|
+
}
|
693
|
+
return operator_map.get(mongo_op, "=")
|
694
|
+
|
695
|
+
|
696
|
+
@register_node()
|
697
|
+
class BulkUpsertNode(AsyncSQLDatabaseNode, BulkOperationMixin):
|
698
|
+
"""Bulk insert or update (upsert) operations."""
|
699
|
+
|
700
|
+
def __init__(self, **config):
|
701
|
+
"""Initialize bulk upsert node."""
|
702
|
+
super().__init__(**config)
|
703
|
+
|
704
|
+
# Setup bulk operations
|
705
|
+
self.setup_bulk_operations(config)
|
706
|
+
|
707
|
+
# Configuration
|
708
|
+
self.table_name = config.get("table_name")
|
709
|
+
self.conflict_columns = config.get("conflict_columns", [])
|
710
|
+
self.update_columns = config.get("update_columns", [])
|
711
|
+
|
712
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
713
|
+
"""Get node parameters."""
|
714
|
+
params = super().get_parameters().copy()
|
715
|
+
bulk_params = {
|
716
|
+
"records": NodeParameter(
|
717
|
+
name="records",
|
718
|
+
type=list,
|
719
|
+
description="List of records to upsert",
|
720
|
+
required=True,
|
721
|
+
),
|
722
|
+
"table_name": NodeParameter(
|
723
|
+
name="table_name",
|
724
|
+
type=str,
|
725
|
+
description="Target table name",
|
726
|
+
required=True,
|
727
|
+
),
|
728
|
+
"conflict_columns": NodeParameter(
|
729
|
+
name="conflict_columns",
|
730
|
+
type=list,
|
731
|
+
description="Columns that determine uniqueness",
|
732
|
+
required=True,
|
733
|
+
),
|
734
|
+
"update_columns": NodeParameter(
|
735
|
+
name="update_columns",
|
736
|
+
type=list,
|
737
|
+
description="Columns to update on conflict",
|
738
|
+
required=False,
|
739
|
+
),
|
740
|
+
}
|
741
|
+
params.update(bulk_params)
|
742
|
+
# Remove query requirement for bulk operations
|
743
|
+
if "query" in params:
|
744
|
+
params["query"].required = False
|
745
|
+
return params
|
746
|
+
|
747
|
+
async def async_run(self, **kwargs) -> Dict[str, Any]:
|
748
|
+
"""Execute bulk upsert operation."""
|
749
|
+
start_time = datetime.now()
|
750
|
+
|
751
|
+
# Get parameters
|
752
|
+
records = kwargs.get("records", [])
|
753
|
+
records = self.validate_bulk_data(records)
|
754
|
+
|
755
|
+
# Auto-detect columns
|
756
|
+
if records:
|
757
|
+
all_columns = list(records[0].keys())
|
758
|
+
if not self.update_columns:
|
759
|
+
# Update all columns except conflict columns
|
760
|
+
self.update_columns = [
|
761
|
+
col for col in all_columns if col not in self.conflict_columns
|
762
|
+
]
|
763
|
+
|
764
|
+
# Get adapter and database type
|
765
|
+
adapter = await self._get_adapter()
|
766
|
+
db_type = DatabaseType(self.config["database_type"].lower())
|
767
|
+
|
768
|
+
result = BulkOperationResult(
|
769
|
+
total_records=len(records), successful_records=0, failed_records=0
|
770
|
+
)
|
771
|
+
|
772
|
+
try:
|
773
|
+
if db_type == DatabaseType.POSTGRESQL:
|
774
|
+
await self._upsert_postgresql(adapter, records, result)
|
775
|
+
elif db_type == DatabaseType.MYSQL:
|
776
|
+
await self._upsert_mysql(adapter, records, result)
|
777
|
+
else:
|
778
|
+
# SQLite doesn't have native upsert, use INSERT OR REPLACE
|
779
|
+
await self._upsert_sqlite(adapter, records, result)
|
780
|
+
|
781
|
+
except Exception as e:
|
782
|
+
if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
|
783
|
+
raise NodeExecutionError(f"Bulk upsert failed: {str(e)}")
|
784
|
+
result.errors.append({"error": str(e), "type": "upsert_error"})
|
785
|
+
|
786
|
+
# Calculate execution time
|
787
|
+
result.execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
788
|
+
|
789
|
+
return {
|
790
|
+
"status": "success" if result.failed_records == 0 else "partial_success",
|
791
|
+
"total_records": result.total_records,
|
792
|
+
"successful_records": result.successful_records,
|
793
|
+
"failed_records": result.failed_records,
|
794
|
+
"success_rate": result.success_rate,
|
795
|
+
"execution_time_ms": result.execution_time_ms,
|
796
|
+
"errors": result.errors[:10] if result.errors else [],
|
797
|
+
}
|
798
|
+
|
799
|
+
async def _upsert_postgresql(
|
800
|
+
self, adapter, records: List[Dict], result: BulkOperationResult
|
801
|
+
):
|
802
|
+
"""PostgreSQL UPSERT using INSERT ... ON CONFLICT."""
|
803
|
+
all_columns = list(records[0].keys()) if records else []
|
804
|
+
|
805
|
+
for chunk in self.chunk_records(records):
|
806
|
+
try:
|
807
|
+
# Build INSERT ... ON CONFLICT query
|
808
|
+
placeholders = []
|
809
|
+
values = []
|
810
|
+
for i, record in enumerate(chunk):
|
811
|
+
row_placeholders = []
|
812
|
+
for col in all_columns:
|
813
|
+
param_num = i * len(all_columns) + all_columns.index(col) + 1
|
814
|
+
row_placeholders.append(f"${param_num}")
|
815
|
+
values.append(record.get(col))
|
816
|
+
placeholders.append(f"({', '.join(row_placeholders)})")
|
817
|
+
|
818
|
+
# Build update clause
|
819
|
+
update_clauses = []
|
820
|
+
for col in self.update_columns:
|
821
|
+
update_clauses.append(f"{col} = EXCLUDED.{col}")
|
822
|
+
|
823
|
+
query = f"""
|
824
|
+
INSERT INTO {self.table_name} ({', '.join(all_columns)})
|
825
|
+
VALUES {', '.join(placeholders)}
|
826
|
+
ON CONFLICT ({', '.join(self.conflict_columns)})
|
827
|
+
DO UPDATE SET {', '.join(update_clauses)}
|
828
|
+
"""
|
829
|
+
|
830
|
+
await adapter.execute(query, *values)
|
831
|
+
result.successful_records += len(chunk)
|
832
|
+
|
833
|
+
# Report progress
|
834
|
+
await self.report_progress_async(
|
835
|
+
result.successful_records, result.total_records, "upsert"
|
836
|
+
)
|
837
|
+
|
838
|
+
except Exception as e:
|
839
|
+
if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
|
840
|
+
raise
|
841
|
+
result.failed_records += len(chunk)
|
842
|
+
result.errors.append(
|
843
|
+
{
|
844
|
+
"chunk_start": result.successful_records
|
845
|
+
+ result.failed_records
|
846
|
+
- len(chunk),
|
847
|
+
"chunk_size": len(chunk),
|
848
|
+
"error": str(e),
|
849
|
+
}
|
850
|
+
)
|
851
|
+
|
852
|
+
async def _upsert_mysql(
|
853
|
+
self, adapter, records: List[Dict], result: BulkOperationResult
|
854
|
+
):
|
855
|
+
"""MySQL UPSERT using INSERT ... ON DUPLICATE KEY UPDATE."""
|
856
|
+
all_columns = list(records[0].keys()) if records else []
|
857
|
+
|
858
|
+
for chunk in self.chunk_records(records):
|
859
|
+
try:
|
860
|
+
# Build INSERT ... ON DUPLICATE KEY UPDATE query
|
861
|
+
placeholders = []
|
862
|
+
values = []
|
863
|
+
for record in chunk:
|
864
|
+
row_placeholders = []
|
865
|
+
for col in all_columns:
|
866
|
+
row_placeholders.append("%s")
|
867
|
+
values.append(record.get(col))
|
868
|
+
placeholders.append(f"({', '.join(row_placeholders)})")
|
869
|
+
|
870
|
+
# Build update clause
|
871
|
+
update_clauses = []
|
872
|
+
for col in self.update_columns:
|
873
|
+
update_clauses.append(f"{col} = VALUES({col})")
|
874
|
+
|
875
|
+
query = f"""
|
876
|
+
INSERT INTO {self.table_name} ({', '.join(all_columns)})
|
877
|
+
VALUES {', '.join(placeholders)}
|
878
|
+
ON DUPLICATE KEY UPDATE {', '.join(update_clauses)}
|
879
|
+
"""
|
880
|
+
|
881
|
+
await adapter.execute(query, *values)
|
882
|
+
result.successful_records += len(chunk)
|
883
|
+
|
884
|
+
# Report progress
|
885
|
+
await self.report_progress_async(
|
886
|
+
result.successful_records, result.total_records, "upsert"
|
887
|
+
)
|
888
|
+
|
889
|
+
except Exception as e:
|
890
|
+
if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
|
891
|
+
raise
|
892
|
+
result.failed_records += len(chunk)
|
893
|
+
result.errors.append(
|
894
|
+
{
|
895
|
+
"chunk_start": result.successful_records
|
896
|
+
+ result.failed_records
|
897
|
+
- len(chunk),
|
898
|
+
"chunk_size": len(chunk),
|
899
|
+
"error": str(e),
|
900
|
+
}
|
901
|
+
)
|
902
|
+
|
903
|
+
async def _upsert_sqlite(
|
904
|
+
self, adapter, records: List[Dict], result: BulkOperationResult
|
905
|
+
):
|
906
|
+
"""SQLite UPSERT using INSERT OR REPLACE."""
|
907
|
+
all_columns = list(records[0].keys()) if records else []
|
908
|
+
|
909
|
+
for record in records:
|
910
|
+
try:
|
911
|
+
placeholders = ", ".join(["?" for _ in all_columns])
|
912
|
+
values = [record.get(col) for col in all_columns]
|
913
|
+
|
914
|
+
query = f"""
|
915
|
+
INSERT OR REPLACE INTO {self.table_name} ({', '.join(all_columns)})
|
916
|
+
VALUES ({placeholders})
|
917
|
+
"""
|
918
|
+
|
919
|
+
await adapter.execute(query, *values)
|
920
|
+
result.successful_records += 1
|
921
|
+
|
922
|
+
# Report progress
|
923
|
+
if result.successful_records % self.progress_interval == 0:
|
924
|
+
await self.report_progress_async(
|
925
|
+
result.successful_records, result.total_records, "upsert"
|
926
|
+
)
|
927
|
+
|
928
|
+
except Exception as e:
|
929
|
+
if self.error_strategy == BulkErrorStrategy.FAIL_FAST:
|
930
|
+
raise
|
931
|
+
result.failed_records += 1
|
932
|
+
result.errors.append(
|
933
|
+
{
|
934
|
+
"record_index": result.successful_records
|
935
|
+
+ result.failed_records
|
936
|
+
- 1,
|
937
|
+
"error": str(e),
|
938
|
+
}
|
939
|
+
)
|