kailash 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. kailash/__init__.py +33 -1
  2. kailash/access_control/__init__.py +129 -0
  3. kailash/access_control/managers.py +461 -0
  4. kailash/access_control/rule_evaluators.py +467 -0
  5. kailash/access_control_abac.py +825 -0
  6. kailash/config/__init__.py +27 -0
  7. kailash/config/database_config.py +359 -0
  8. kailash/database/__init__.py +28 -0
  9. kailash/database/execution_pipeline.py +499 -0
  10. kailash/middleware/__init__.py +306 -0
  11. kailash/middleware/auth/__init__.py +33 -0
  12. kailash/middleware/auth/access_control.py +436 -0
  13. kailash/middleware/auth/auth_manager.py +422 -0
  14. kailash/middleware/auth/jwt_auth.py +477 -0
  15. kailash/middleware/auth/kailash_jwt_auth.py +616 -0
  16. kailash/middleware/communication/__init__.py +37 -0
  17. kailash/middleware/communication/ai_chat.py +989 -0
  18. kailash/middleware/communication/api_gateway.py +802 -0
  19. kailash/middleware/communication/events.py +470 -0
  20. kailash/middleware/communication/realtime.py +710 -0
  21. kailash/middleware/core/__init__.py +21 -0
  22. kailash/middleware/core/agent_ui.py +890 -0
  23. kailash/middleware/core/schema.py +643 -0
  24. kailash/middleware/core/workflows.py +396 -0
  25. kailash/middleware/database/__init__.py +63 -0
  26. kailash/middleware/database/base.py +113 -0
  27. kailash/middleware/database/base_models.py +525 -0
  28. kailash/middleware/database/enums.py +106 -0
  29. kailash/middleware/database/migrations.py +12 -0
  30. kailash/{api/database.py → middleware/database/models.py} +183 -291
  31. kailash/middleware/database/repositories.py +685 -0
  32. kailash/middleware/database/session_manager.py +19 -0
  33. kailash/middleware/mcp/__init__.py +38 -0
  34. kailash/middleware/mcp/client_integration.py +585 -0
  35. kailash/middleware/mcp/enhanced_server.py +576 -0
  36. kailash/nodes/__init__.py +25 -3
  37. kailash/nodes/admin/__init__.py +35 -0
  38. kailash/nodes/admin/audit_log.py +794 -0
  39. kailash/nodes/admin/permission_check.py +864 -0
  40. kailash/nodes/admin/role_management.py +823 -0
  41. kailash/nodes/admin/security_event.py +1519 -0
  42. kailash/nodes/admin/user_management.py +944 -0
  43. kailash/nodes/ai/a2a.py +24 -7
  44. kailash/nodes/ai/ai_providers.py +1 -0
  45. kailash/nodes/ai/embedding_generator.py +11 -11
  46. kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
  47. kailash/nodes/ai/llm_agent.py +407 -2
  48. kailash/nodes/ai/self_organizing.py +85 -10
  49. kailash/nodes/api/auth.py +287 -6
  50. kailash/nodes/api/rest.py +151 -0
  51. kailash/nodes/auth/__init__.py +17 -0
  52. kailash/nodes/auth/directory_integration.py +1228 -0
  53. kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
  54. kailash/nodes/auth/mfa.py +2338 -0
  55. kailash/nodes/auth/risk_assessment.py +872 -0
  56. kailash/nodes/auth/session_management.py +1093 -0
  57. kailash/nodes/auth/sso.py +1040 -0
  58. kailash/nodes/base.py +344 -13
  59. kailash/nodes/base_cycle_aware.py +4 -2
  60. kailash/nodes/base_with_acl.py +1 -1
  61. kailash/nodes/code/python.py +293 -12
  62. kailash/nodes/compliance/__init__.py +9 -0
  63. kailash/nodes/compliance/data_retention.py +1888 -0
  64. kailash/nodes/compliance/gdpr.py +2004 -0
  65. kailash/nodes/data/__init__.py +22 -2
  66. kailash/nodes/data/async_connection.py +469 -0
  67. kailash/nodes/data/async_sql.py +757 -0
  68. kailash/nodes/data/async_vector.py +598 -0
  69. kailash/nodes/data/readers.py +767 -0
  70. kailash/nodes/data/retrieval.py +360 -1
  71. kailash/nodes/data/sharepoint_graph.py +397 -21
  72. kailash/nodes/data/sql.py +94 -5
  73. kailash/nodes/data/streaming.py +68 -8
  74. kailash/nodes/data/vector_db.py +54 -4
  75. kailash/nodes/enterprise/__init__.py +13 -0
  76. kailash/nodes/enterprise/batch_processor.py +741 -0
  77. kailash/nodes/enterprise/data_lineage.py +497 -0
  78. kailash/nodes/logic/convergence.py +31 -9
  79. kailash/nodes/logic/operations.py +14 -3
  80. kailash/nodes/mixins/__init__.py +8 -0
  81. kailash/nodes/mixins/event_emitter.py +201 -0
  82. kailash/nodes/mixins/mcp.py +9 -4
  83. kailash/nodes/mixins/security.py +165 -0
  84. kailash/nodes/monitoring/__init__.py +7 -0
  85. kailash/nodes/monitoring/performance_benchmark.py +2497 -0
  86. kailash/nodes/rag/__init__.py +284 -0
  87. kailash/nodes/rag/advanced.py +1615 -0
  88. kailash/nodes/rag/agentic.py +773 -0
  89. kailash/nodes/rag/conversational.py +999 -0
  90. kailash/nodes/rag/evaluation.py +875 -0
  91. kailash/nodes/rag/federated.py +1188 -0
  92. kailash/nodes/rag/graph.py +721 -0
  93. kailash/nodes/rag/multimodal.py +671 -0
  94. kailash/nodes/rag/optimized.py +933 -0
  95. kailash/nodes/rag/privacy.py +1059 -0
  96. kailash/nodes/rag/query_processing.py +1335 -0
  97. kailash/nodes/rag/realtime.py +764 -0
  98. kailash/nodes/rag/registry.py +547 -0
  99. kailash/nodes/rag/router.py +837 -0
  100. kailash/nodes/rag/similarity.py +1854 -0
  101. kailash/nodes/rag/strategies.py +566 -0
  102. kailash/nodes/rag/workflows.py +575 -0
  103. kailash/nodes/security/__init__.py +19 -0
  104. kailash/nodes/security/abac_evaluator.py +1411 -0
  105. kailash/nodes/security/audit_log.py +91 -0
  106. kailash/nodes/security/behavior_analysis.py +1893 -0
  107. kailash/nodes/security/credential_manager.py +401 -0
  108. kailash/nodes/security/rotating_credentials.py +760 -0
  109. kailash/nodes/security/security_event.py +132 -0
  110. kailash/nodes/security/threat_detection.py +1103 -0
  111. kailash/nodes/testing/__init__.py +9 -0
  112. kailash/nodes/testing/credential_testing.py +499 -0
  113. kailash/nodes/transform/__init__.py +10 -2
  114. kailash/nodes/transform/chunkers.py +592 -1
  115. kailash/nodes/transform/processors.py +484 -14
  116. kailash/nodes/validation.py +321 -0
  117. kailash/runtime/access_controlled.py +1 -1
  118. kailash/runtime/async_local.py +41 -7
  119. kailash/runtime/docker.py +1 -1
  120. kailash/runtime/local.py +474 -55
  121. kailash/runtime/parallel.py +1 -1
  122. kailash/runtime/parallel_cyclic.py +1 -1
  123. kailash/runtime/testing.py +210 -2
  124. kailash/utils/migrations/__init__.py +25 -0
  125. kailash/utils/migrations/generator.py +433 -0
  126. kailash/utils/migrations/models.py +231 -0
  127. kailash/utils/migrations/runner.py +489 -0
  128. kailash/utils/secure_logging.py +342 -0
  129. kailash/workflow/__init__.py +16 -0
  130. kailash/workflow/cyclic_runner.py +3 -4
  131. kailash/workflow/graph.py +70 -2
  132. kailash/workflow/resilience.py +249 -0
  133. kailash/workflow/templates.py +726 -0
  134. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/METADATA +253 -20
  135. kailash-0.4.0.dist-info/RECORD +223 -0
  136. kailash/api/__init__.py +0 -17
  137. kailash/api/__main__.py +0 -6
  138. kailash/api/studio_secure.py +0 -893
  139. kailash/mcp/__main__.py +0 -13
  140. kailash/mcp/server_new.py +0 -336
  141. kailash/mcp/servers/__init__.py +0 -12
  142. kailash-0.3.1.dist-info/RECORD +0 -136
  143. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/WHEEL +0 -0
  144. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/entry_points.txt +0 -0
  145. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/licenses/LICENSE +0 -0
  146. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,741 @@
1
+ """Batch processing node for optimized operations with rate limiting and progress tracking.
2
+
3
+ This module provides intelligent batch processing capabilities that optimize
4
+ operations for APIs, databases, and data processing tasks. It includes
5
+ rate limiting, parallel processing, progress tracking, and automatic
6
+ error recovery.
7
+
8
+ Key Features:
9
+ - Intelligent batching strategies
10
+ - API rate limit awareness
11
+ - Parallel processing support
12
+ - Progress tracking and reporting
13
+ - Automatic retry and error recovery
14
+ - Memory management
15
+ - Configurable batch sizes
16
+ """
17
+
18
+ import asyncio
19
+ import math
20
+ import time
21
+ from concurrent.futures import ThreadPoolExecutor, as_completed
22
+ from datetime import datetime, timedelta
23
+ from typing import Any, Callable, Dict, List, Optional, Union
24
+
25
+ from kailash.nodes.base import Node, NodeMetadata, NodeParameter, register_node
26
+ from kailash.sdk_exceptions import NodeConfigurationError, NodeExecutionError
27
+
28
+
29
+ @register_node()
30
+ class BatchProcessorNode(Node):
31
+ """Node for intelligent batch processing with optimization and rate limiting.
32
+
33
+ This node processes large datasets or operations in optimized batches,
34
+ providing rate limiting, parallel processing, progress tracking, and
35
+ automatic error recovery for enterprise-scale operations.
36
+
37
+ Key capabilities:
38
+ 1. Intelligent batching strategies
39
+ 2. API rate limit awareness
40
+ 3. Parallel processing support
41
+ 4. Progress tracking and reporting
42
+ 5. Automatic retry and error recovery
43
+ 6. Memory management optimization
44
+
45
+ Example:
46
+ >>> processor = BatchProcessorNode()
47
+ >>> result = processor.execute(
48
+ ... operation="process_data",
49
+ ... data_items=large_dataset,
50
+ ... batch_size=100,
51
+ ... processing_function="data_transformation",
52
+ ... rate_limit_per_second=10,
53
+ ... parallel_workers=4,
54
+ ... retry_failed_batches=True
55
+ ... )
56
+ """
57
+
58
+ def get_metadata(self) -> NodeMetadata:
59
+ """Get node metadata for discovery and orchestration."""
60
+ return NodeMetadata(
61
+ name="Batch Processor Node",
62
+ description="Intelligent batch processing with optimization and rate limiting",
63
+ tags={"enterprise", "batch", "processing", "optimization", "parallel"},
64
+ version="1.0.0",
65
+ author="Kailash SDK",
66
+ )
67
+
68
+ def get_parameters(self) -> Dict[str, NodeParameter]:
69
+ """Define input parameters for batch processing operations."""
70
+ return {
71
+ "operation": NodeParameter(
72
+ name="operation",
73
+ type=str,
74
+ required=False,
75
+ default="process_data",
76
+ description="Operation: process_data, api_batch_calls, database_batch_operations",
77
+ ),
78
+ "data_items": NodeParameter(
79
+ name="data_items",
80
+ type=list,
81
+ required=False,
82
+ description="List of data items to process in batches",
83
+ ),
84
+ "batch_size": NodeParameter(
85
+ name="batch_size",
86
+ type=int,
87
+ required=False,
88
+ default=100,
89
+ description="Number of items to process per batch",
90
+ ),
91
+ "processing_function": NodeParameter(
92
+ name="processing_function",
93
+ type=str,
94
+ required=False,
95
+ description="Name of the processing function to apply to each batch",
96
+ ),
97
+ "processing_code": NodeParameter(
98
+ name="processing_code",
99
+ type=str,
100
+ required=False,
101
+ description="Python code to execute for each batch",
102
+ ),
103
+ "rate_limit_per_second": NodeParameter(
104
+ name="rate_limit_per_second",
105
+ type=float,
106
+ required=False,
107
+ default=10.0,
108
+ description="Maximum operations per second (rate limiting)",
109
+ ),
110
+ "parallel_workers": NodeParameter(
111
+ name="parallel_workers",
112
+ type=int,
113
+ required=False,
114
+ default=1,
115
+ description="Number of parallel workers for processing",
116
+ ),
117
+ "retry_failed_batches": NodeParameter(
118
+ name="retry_failed_batches",
119
+ type=bool,
120
+ required=False,
121
+ default=True,
122
+ description="Whether to retry failed batches",
123
+ ),
124
+ "max_retries": NodeParameter(
125
+ name="max_retries",
126
+ type=int,
127
+ required=False,
128
+ default=3,
129
+ description="Maximum number of retries for failed batches",
130
+ ),
131
+ "retry_delay": NodeParameter(
132
+ name="retry_delay",
133
+ type=float,
134
+ required=False,
135
+ default=1.0,
136
+ description="Delay between retries in seconds",
137
+ ),
138
+ "progress_callback": NodeParameter(
139
+ name="progress_callback",
140
+ type=str,
141
+ required=False,
142
+ description="Function name to call for progress updates",
143
+ ),
144
+ "memory_limit_mb": NodeParameter(
145
+ name="memory_limit_mb",
146
+ type=int,
147
+ required=False,
148
+ default=1024,
149
+ description="Memory limit in MB for batch processing",
150
+ ),
151
+ "adaptive_batch_size": NodeParameter(
152
+ name="adaptive_batch_size",
153
+ type=bool,
154
+ required=False,
155
+ default=True,
156
+ description="Whether to adapt batch size based on performance",
157
+ ),
158
+ "api_endpoint": NodeParameter(
159
+ name="api_endpoint",
160
+ type=str,
161
+ required=False,
162
+ description="API endpoint for batch API calls",
163
+ ),
164
+ "api_headers": NodeParameter(
165
+ name="api_headers",
166
+ type=dict,
167
+ required=False,
168
+ default={},
169
+ description="Headers for API requests",
170
+ ),
171
+ "database_config": NodeParameter(
172
+ name="database_config",
173
+ type=dict,
174
+ required=False,
175
+ description="Database configuration for batch database operations",
176
+ ),
177
+ }
178
+
179
+ def __init__(self, **kwargs):
180
+ """Initialize the BatchProcessorNode."""
181
+ super().__init__(**kwargs)
182
+ self._processing_stats = {
183
+ "total_items": 0,
184
+ "processed_items": 0,
185
+ "failed_items": 0,
186
+ "total_batches": 0,
187
+ "successful_batches": 0,
188
+ "failed_batches": 0,
189
+ "start_time": None,
190
+ "end_time": None,
191
+ "processing_rate": 0.0,
192
+ }
193
+
194
+ def _create_batches(
195
+ self, data_items: List[Any], batch_size: int
196
+ ) -> List[List[Any]]:
197
+ """Create batches from the data items."""
198
+ batches = []
199
+ for i in range(0, len(data_items), batch_size):
200
+ batch = data_items[i : i + batch_size]
201
+ batches.append(batch)
202
+ return batches
203
+
204
+ def _calculate_optimal_batch_size(
205
+ self,
206
+ total_items: int,
207
+ rate_limit: float,
208
+ parallel_workers: int,
209
+ memory_limit_mb: int,
210
+ ) -> int:
211
+ """Calculate optimal batch size based on constraints."""
212
+ # Estimate memory per item (rough heuristic: 1KB per item)
213
+ estimated_memory_per_item = 1024 # bytes
214
+ max_items_by_memory = (
215
+ memory_limit_mb * 1024 * 1024
216
+ ) // estimated_memory_per_item
217
+
218
+ # Calculate based on rate limiting
219
+ # If we have parallel workers, we can process more items per second
220
+ effective_rate = rate_limit * parallel_workers
221
+
222
+ # Target processing time per batch (1-10 seconds)
223
+ target_batch_time = min(10.0, max(1.0, 60.0 / effective_rate))
224
+ rate_based_batch_size = int(effective_rate * target_batch_time)
225
+
226
+ # Use minimum of constraints, but ensure at least 1 item per batch
227
+ optimal_size = max(
228
+ 1, min(max_items_by_memory, rate_based_batch_size, total_items)
229
+ )
230
+
231
+ return optimal_size
232
+
233
+ def _execute_processing_code(
234
+ self, batch: List[Any], processing_code: str
235
+ ) -> Dict[str, Any]:
236
+ """Execute custom processing code on a batch."""
237
+ try:
238
+ # Create execution context
239
+ exec_globals = {
240
+ "__builtins__": __builtins__,
241
+ "batch": batch,
242
+ "len": len,
243
+ "range": range,
244
+ "enumerate": enumerate,
245
+ "datetime": datetime,
246
+ }
247
+
248
+ # Execute the code
249
+ exec(processing_code, exec_globals)
250
+
251
+ # Get the result (expect 'result' variable to be set)
252
+ if "result" in exec_globals:
253
+ return {
254
+ "success": True,
255
+ "result": exec_globals["result"],
256
+ "processed_count": len(batch),
257
+ }
258
+ else:
259
+ return {
260
+ "success": False,
261
+ "error": "Processing code must set 'result' variable",
262
+ "processed_count": 0,
263
+ }
264
+
265
+ except Exception as e:
266
+ return {
267
+ "success": False,
268
+ "error": str(e),
269
+ "processed_count": 0,
270
+ }
271
+
272
+ def _process_single_batch(
273
+ self,
274
+ batch: List[Any],
275
+ batch_index: int,
276
+ processing_code: Optional[str] = None,
277
+ processing_function: Optional[str] = None,
278
+ ) -> Dict[str, Any]:
279
+ """Process a single batch of data."""
280
+ batch_start_time = time.time()
281
+
282
+ try:
283
+ if processing_code:
284
+ result = self._execute_processing_code(batch, processing_code)
285
+ elif processing_function:
286
+ # For this example, we'll use a simple default processing
287
+ # In a real implementation, this would look up the function
288
+ result = {
289
+ "success": True,
290
+ "result": [f"processed_{item}" for item in batch],
291
+ "processed_count": len(batch),
292
+ }
293
+ else:
294
+ # Default processing: just pass through with metadata
295
+ result = {
296
+ "success": True,
297
+ "result": batch,
298
+ "processed_count": len(batch),
299
+ }
300
+
301
+ batch_end_time = time.time()
302
+ processing_time = batch_end_time - batch_start_time
303
+
304
+ return {
305
+ "batch_index": batch_index,
306
+ "success": result["success"],
307
+ "result": result["result"],
308
+ "processed_count": result["processed_count"],
309
+ "processing_time": processing_time,
310
+ "items_per_second": (
311
+ len(batch) / processing_time
312
+ if processing_time > 0
313
+ else float("inf")
314
+ ),
315
+ "error": result.get("error"),
316
+ }
317
+
318
+ except Exception as e:
319
+ batch_end_time = time.time()
320
+ processing_time = batch_end_time - batch_start_time
321
+
322
+ return {
323
+ "batch_index": batch_index,
324
+ "success": False,
325
+ "result": None,
326
+ "processed_count": 0,
327
+ "processing_time": processing_time,
328
+ "items_per_second": 0.0,
329
+ "error": str(e),
330
+ }
331
+
332
+ def _process_batch_with_retry(
333
+ self,
334
+ batch: List[Any],
335
+ batch_index: int,
336
+ max_retries: int,
337
+ retry_delay: float,
338
+ processing_code: Optional[str] = None,
339
+ processing_function: Optional[str] = None,
340
+ ) -> Dict[str, Any]:
341
+ """Process a batch with retry logic."""
342
+ last_result = None
343
+
344
+ for attempt in range(max_retries + 1):
345
+ if attempt > 0:
346
+ time.sleep(retry_delay * (2 ** (attempt - 1))) # Exponential backoff
347
+
348
+ result = self._process_single_batch(
349
+ batch, batch_index, processing_code, processing_function
350
+ )
351
+
352
+ if result["success"]:
353
+ if attempt > 0:
354
+ result["retry_attempts"] = attempt
355
+ return result
356
+
357
+ last_result = result
358
+
359
+ # All retries failed
360
+ last_result["retry_attempts"] = max_retries
361
+ last_result["final_failure"] = True
362
+ return last_result
363
+
364
+ def _update_progress(
365
+ self,
366
+ processed_batches: int,
367
+ total_batches: int,
368
+ processed_items: int,
369
+ total_items: int,
370
+ start_time: float,
371
+ progress_callback: Optional[str] = None,
372
+ ):
373
+ """Update progress and call progress callback if provided."""
374
+ current_time = time.time()
375
+ elapsed_time = current_time - start_time
376
+
377
+ batch_progress = processed_batches / total_batches if total_batches > 0 else 0
378
+ item_progress = processed_items / total_items if total_items > 0 else 0
379
+
380
+ # Calculate rates
381
+ batches_per_second = processed_batches / elapsed_time if elapsed_time > 0 else 0
382
+ items_per_second = processed_items / elapsed_time if elapsed_time > 0 else 0
383
+
384
+ # Estimate time remaining
385
+ if batches_per_second > 0:
386
+ remaining_batches = total_batches - processed_batches
387
+ estimated_time_remaining = remaining_batches / batches_per_second
388
+ else:
389
+ estimated_time_remaining = float("inf")
390
+
391
+ progress_info = {
392
+ "batch_progress": batch_progress,
393
+ "item_progress": item_progress,
394
+ "processed_batches": processed_batches,
395
+ "total_batches": total_batches,
396
+ "processed_items": processed_items,
397
+ "total_items": total_items,
398
+ "elapsed_time": elapsed_time,
399
+ "batches_per_second": batches_per_second,
400
+ "items_per_second": items_per_second,
401
+ "estimated_time_remaining": estimated_time_remaining,
402
+ }
403
+
404
+ # Call progress callback if provided
405
+ if progress_callback:
406
+ try:
407
+ callback_func = eval(progress_callback)
408
+ callback_func(progress_info)
409
+ except:
410
+ pass # Ignore callback errors
411
+
412
+ return progress_info
413
+
414
+ def _process_data_batches(
415
+ self,
416
+ data_items: List[Any],
417
+ batch_size: int,
418
+ processing_code: Optional[str] = None,
419
+ processing_function: Optional[str] = None,
420
+ rate_limit_per_second: float = 10.0,
421
+ parallel_workers: int = 1,
422
+ retry_failed_batches: bool = True,
423
+ max_retries: int = 3,
424
+ retry_delay: float = 1.0,
425
+ progress_callback: Optional[str] = None,
426
+ adaptive_batch_size: bool = True,
427
+ memory_limit_mb: int = 1024,
428
+ ) -> Dict[str, Any]:
429
+ """Process data items in batches."""
430
+ start_time = time.time()
431
+
432
+ # Calculate optimal batch size if adaptive
433
+ if adaptive_batch_size:
434
+ optimal_batch_size = self._calculate_optimal_batch_size(
435
+ len(data_items),
436
+ rate_limit_per_second,
437
+ parallel_workers,
438
+ memory_limit_mb,
439
+ )
440
+ batch_size = min(batch_size, optimal_batch_size)
441
+
442
+ # Create batches
443
+ batches = self._create_batches(data_items, batch_size)
444
+ total_batches = len(batches)
445
+
446
+ # Initialize tracking
447
+ self._processing_stats.update(
448
+ {
449
+ "total_items": len(data_items),
450
+ "total_batches": total_batches,
451
+ "start_time": start_time,
452
+ }
453
+ )
454
+
455
+ successful_results = []
456
+ failed_results = []
457
+ processed_items = 0
458
+
459
+ # Calculate delay between batches for rate limiting
460
+ batch_delay = (
461
+ 1.0 / (rate_limit_per_second / batch_size)
462
+ if rate_limit_per_second > 0
463
+ else 0
464
+ )
465
+
466
+ if parallel_workers == 1:
467
+ # Sequential processing
468
+ for i, batch in enumerate(batches):
469
+ if i > 0 and batch_delay > 0:
470
+ time.sleep(batch_delay)
471
+
472
+ if retry_failed_batches:
473
+ result = self._process_batch_with_retry(
474
+ batch,
475
+ i,
476
+ max_retries,
477
+ retry_delay,
478
+ processing_code,
479
+ processing_function,
480
+ )
481
+ else:
482
+ result = self._process_single_batch(
483
+ batch, i, processing_code, processing_function
484
+ )
485
+
486
+ if result["success"]:
487
+ successful_results.append(result)
488
+ processed_items += result["processed_count"]
489
+ else:
490
+ failed_results.append(result)
491
+
492
+ # Update progress
493
+ self._update_progress(
494
+ i + 1,
495
+ total_batches,
496
+ processed_items,
497
+ len(data_items),
498
+ start_time,
499
+ progress_callback,
500
+ )
501
+
502
+ else:
503
+ # Parallel processing
504
+ with ThreadPoolExecutor(max_workers=parallel_workers) as executor:
505
+ # Submit all batches
506
+ future_to_batch = {}
507
+ for i, batch in enumerate(batches):
508
+ if retry_failed_batches:
509
+ future = executor.submit(
510
+ self._process_batch_with_retry,
511
+ batch,
512
+ i,
513
+ max_retries,
514
+ retry_delay,
515
+ processing_code,
516
+ processing_function,
517
+ )
518
+ else:
519
+ future = executor.submit(
520
+ self._process_single_batch,
521
+ batch,
522
+ i,
523
+ processing_code,
524
+ processing_function,
525
+ )
526
+ future_to_batch[future] = (i, batch)
527
+
528
+ # Process completed batches
529
+ completed_batches = 0
530
+ for future in as_completed(future_to_batch):
531
+ batch_index, batch = future_to_batch[future]
532
+
533
+ try:
534
+ result = future.result()
535
+
536
+ if result["success"]:
537
+ successful_results.append(result)
538
+ processed_items += result["processed_count"]
539
+ else:
540
+ failed_results.append(result)
541
+
542
+ completed_batches += 1
543
+
544
+ # Update progress
545
+ self._update_progress(
546
+ completed_batches,
547
+ total_batches,
548
+ processed_items,
549
+ len(data_items),
550
+ start_time,
551
+ progress_callback,
552
+ )
553
+
554
+ # Rate limiting for parallel processing
555
+ if batch_delay > 0:
556
+ time.sleep(batch_delay / parallel_workers)
557
+
558
+ except Exception as e:
559
+ failed_results.append(
560
+ {
561
+ "batch_index": batch_index,
562
+ "success": False,
563
+ "error": str(e),
564
+ "processed_count": 0,
565
+ }
566
+ )
567
+ completed_batches += 1
568
+
569
+ end_time = time.time()
570
+ total_processing_time = end_time - start_time
571
+
572
+ # Update final stats
573
+ self._processing_stats.update(
574
+ {
575
+ "processed_items": processed_items,
576
+ "failed_items": len(data_items) - processed_items,
577
+ "successful_batches": len(successful_results),
578
+ "failed_batches": len(failed_results),
579
+ "end_time": end_time,
580
+ "processing_rate": (
581
+ processed_items / total_processing_time
582
+ if total_processing_time > 0
583
+ else 0
584
+ ),
585
+ }
586
+ )
587
+
588
+ # Compile all results
589
+ all_successful_results = []
590
+ for result in successful_results:
591
+ if isinstance(result["result"], list):
592
+ all_successful_results.extend(result["result"])
593
+ else:
594
+ all_successful_results.append(result["result"])
595
+
596
+ return {
597
+ "success": len(failed_results) == 0,
598
+ "processed_items": processed_items,
599
+ "failed_items": len(data_items) - processed_items,
600
+ "total_batches": total_batches,
601
+ "successful_batches": len(successful_results),
602
+ "failed_batches": len(failed_results),
603
+ "processing_time": total_processing_time,
604
+ "processing_rate": (
605
+ processed_items / total_processing_time
606
+ if total_processing_time > 0
607
+ else 0
608
+ ),
609
+ "batch_size_used": batch_size,
610
+ "results": all_successful_results,
611
+ "successful_batch_details": successful_results,
612
+ "failed_batch_details": failed_results,
613
+ "statistics": self._processing_stats,
614
+ }
615
+
616
+ def run(self, **kwargs) -> Dict[str, Any]:
617
+ """Execute batch processing operation."""
618
+ operation = kwargs.get("operation", "process_data")
619
+
620
+ if operation == "process_data":
621
+ data_items = kwargs.get("data_items", [])
622
+ if not data_items:
623
+ raise NodeConfigurationError(
624
+ "data_items is required for process_data operation"
625
+ )
626
+
627
+ return self._process_data_batches(
628
+ data_items=data_items,
629
+ batch_size=kwargs.get("batch_size", 100),
630
+ processing_code=kwargs.get("processing_code"),
631
+ processing_function=kwargs.get("processing_function"),
632
+ rate_limit_per_second=kwargs.get("rate_limit_per_second", 10.0),
633
+ parallel_workers=kwargs.get("parallel_workers", 1),
634
+ retry_failed_batches=kwargs.get("retry_failed_batches", True),
635
+ max_retries=kwargs.get("max_retries", 3),
636
+ retry_delay=kwargs.get("retry_delay", 1.0),
637
+ progress_callback=kwargs.get("progress_callback"),
638
+ adaptive_batch_size=kwargs.get("adaptive_batch_size", True),
639
+ memory_limit_mb=kwargs.get("memory_limit_mb", 1024),
640
+ )
641
+
642
+ elif operation == "api_batch_calls":
643
+ # For API batch calls, we'd implement specific API handling
644
+ # This is a simplified version
645
+ data_items = kwargs.get("data_items", [])
646
+ api_endpoint = kwargs.get("api_endpoint")
647
+
648
+ if not api_endpoint:
649
+ raise NodeConfigurationError(
650
+ "api_endpoint is required for api_batch_calls operation"
651
+ )
652
+
653
+ # Create processing code for API calls
654
+ api_processing_code = f"""
655
+ import requests
656
+ import json
657
+
658
+ results = []
659
+ for item in batch:
660
+ try:
661
+ response = requests.post('{api_endpoint}', json=item, headers={kwargs.get('api_headers', {})})
662
+ if response.status_code == 200:
663
+ results.append(response.json())
664
+ else:
665
+ results.append({{'error': f'HTTP {{response.status_code}}', 'item': item}})
666
+ except Exception as e:
667
+ results.append({{'error': str(e), 'item': item}})
668
+
669
+ result = results
670
+ """
671
+
672
+ return self._process_data_batches(
673
+ data_items=data_items,
674
+ batch_size=kwargs.get(
675
+ "batch_size", 10
676
+ ), # Smaller batches for API calls
677
+ processing_code=api_processing_code,
678
+ rate_limit_per_second=kwargs.get(
679
+ "rate_limit_per_second", 5.0
680
+ ), # More conservative for APIs
681
+ parallel_workers=kwargs.get("parallel_workers", 2),
682
+ retry_failed_batches=kwargs.get("retry_failed_batches", True),
683
+ max_retries=kwargs.get("max_retries", 3),
684
+ retry_delay=kwargs.get("retry_delay", 2.0),
685
+ progress_callback=kwargs.get("progress_callback"),
686
+ adaptive_batch_size=kwargs.get("adaptive_batch_size", True),
687
+ memory_limit_mb=kwargs.get("memory_limit_mb", 512),
688
+ )
689
+
690
+ elif operation == "database_batch_operations":
691
+ # For database batch operations
692
+ data_items = kwargs.get("data_items", [])
693
+ database_config = kwargs.get("database_config", {})
694
+
695
+ if not database_config:
696
+ raise NodeConfigurationError(
697
+ "database_config is required for database_batch_operations"
698
+ )
699
+
700
+ # Create processing code for database operations
701
+ db_processing_code = """
702
+ # This would typically use actual database connections
703
+ # For this example, we'll simulate database operations
704
+
705
+ import time
706
+ results = []
707
+
708
+ for item in batch:
709
+ # Simulate database operation
710
+ time.sleep(0.01) # Simulate processing time
711
+ results.append({
712
+ 'id': item.get('id', 'unknown'),
713
+ 'status': 'processed',
714
+ 'timestamp': datetime.now().isoformat()
715
+ })
716
+
717
+ result = results
718
+ """
719
+
720
+ return self._process_data_batches(
721
+ data_items=data_items,
722
+ batch_size=kwargs.get(
723
+ "batch_size", 1000
724
+ ), # Larger batches for DB operations
725
+ processing_code=db_processing_code,
726
+ rate_limit_per_second=kwargs.get("rate_limit_per_second", 50.0),
727
+ parallel_workers=kwargs.get("parallel_workers", 4),
728
+ retry_failed_batches=kwargs.get("retry_failed_batches", True),
729
+ max_retries=kwargs.get("max_retries", 2),
730
+ retry_delay=kwargs.get("retry_delay", 1.0),
731
+ progress_callback=kwargs.get("progress_callback"),
732
+ adaptive_batch_size=kwargs.get("adaptive_batch_size", True),
733
+ memory_limit_mb=kwargs.get("memory_limit_mb", 2048),
734
+ )
735
+
736
+ else:
737
+ raise NodeConfigurationError(f"Invalid operation: {operation}")
738
+
739
+ async def async_run(self, **kwargs) -> Dict[str, Any]:
740
+ """Async execution method for enterprise integration."""
741
+ return self.run(**kwargs)