kailash 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/access_control/__init__.py +1 -1
  3. kailash/core/actors/adaptive_pool_controller.py +630 -0
  4. kailash/core/actors/connection_actor.py +3 -3
  5. kailash/core/ml/__init__.py +1 -0
  6. kailash/core/ml/query_patterns.py +544 -0
  7. kailash/core/monitoring/__init__.py +19 -0
  8. kailash/core/monitoring/connection_metrics.py +488 -0
  9. kailash/core/optimization/__init__.py +1 -0
  10. kailash/core/resilience/__init__.py +17 -0
  11. kailash/core/resilience/circuit_breaker.py +382 -0
  12. kailash/gateway/api.py +7 -5
  13. kailash/gateway/enhanced_gateway.py +1 -1
  14. kailash/middleware/auth/access_control.py +11 -11
  15. kailash/middleware/communication/ai_chat.py +7 -7
  16. kailash/middleware/communication/api_gateway.py +5 -15
  17. kailash/middleware/gateway/checkpoint_manager.py +45 -8
  18. kailash/middleware/gateway/event_store.py +66 -26
  19. kailash/middleware/mcp/enhanced_server.py +2 -2
  20. kailash/nodes/admin/permission_check.py +110 -30
  21. kailash/nodes/admin/schema.sql +387 -0
  22. kailash/nodes/admin/tenant_isolation.py +249 -0
  23. kailash/nodes/admin/transaction_utils.py +244 -0
  24. kailash/nodes/admin/user_management.py +37 -9
  25. kailash/nodes/ai/ai_providers.py +55 -3
  26. kailash/nodes/ai/llm_agent.py +115 -13
  27. kailash/nodes/data/query_pipeline.py +641 -0
  28. kailash/nodes/data/query_router.py +895 -0
  29. kailash/nodes/data/sql.py +24 -0
  30. kailash/nodes/data/workflow_connection_pool.py +451 -23
  31. kailash/nodes/monitoring/__init__.py +3 -5
  32. kailash/nodes/monitoring/connection_dashboard.py +822 -0
  33. kailash/nodes/rag/__init__.py +1 -3
  34. kailash/resources/registry.py +6 -0
  35. kailash/runtime/async_local.py +7 -0
  36. kailash/utils/export.py +152 -0
  37. kailash/workflow/builder.py +42 -0
  38. kailash/workflow/graph.py +86 -17
  39. kailash/workflow/templates.py +4 -9
  40. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/METADATA +14 -1
  41. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/RECORD +45 -31
  42. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/WHEEL +0 -0
  43. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/entry_points.txt +0 -0
  44. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/licenses/LICENSE +0 -0
  45. {kailash-0.6.0.dist-info → kailash-0.6.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,641 @@
1
+ """Query pipelining support for batch query execution.
2
+
3
+ This module implements query pipelining to batch multiple queries together,
4
+ reducing round-trip latency and improving throughput for bulk operations.
5
+ It maintains result ordering and handles partial failures gracefully.
6
+
7
+ Features:
8
+ - Automatic query batching with configurable size
9
+ - Pipeline optimization for related queries
10
+ - Result ordering preservation
11
+ - Partial failure handling with retry logic
12
+ - Transaction support for atomic operations
13
+
14
+ Example:
15
+ >>> pipeline = QueryPipelineNode(
16
+ ... name="bulk_processor",
17
+ ... connection_pool="main_pool",
18
+ ... batch_size=100,
19
+ ... flush_interval=0.1
20
+ ... )
21
+ >>>
22
+ >>> # Add queries to pipeline
23
+ >>> await pipeline.add_query("INSERT INTO users VALUES (?, ?)", [1, "Alice"])
24
+ >>> await pipeline.add_query("INSERT INTO users VALUES (?, ?)", [2, "Bob"])
25
+ >>>
26
+ >>> # Execute pipeline
27
+ >>> results = await pipeline.flush()
28
+ """
29
+
30
+ import asyncio
31
+ import logging
32
+ import time
33
+ from collections import deque
34
+ from dataclasses import dataclass, field
35
+ from enum import Enum
36
+ from typing import Any, Dict, List, Optional, Tuple, Union
37
+
38
+ from kailash.nodes.base import Node, NodeParameter, register_node
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+
43
+ class PipelineStrategy(Enum):
44
+ """Strategy for pipeline execution."""
45
+
46
+ SEQUENTIAL = "sequential" # Execute in order, stop on first failure
47
+ PARALLEL = "parallel" # Execute in parallel where possible
48
+ TRANSACTIONAL = "transactional" # All or nothing within transaction
49
+ BEST_EFFORT = "best_effort" # Continue on failures
50
+
51
+
52
+ @dataclass
53
+ class PipelinedQuery:
54
+ """Single query in the pipeline."""
55
+
56
+ id: str
57
+ query: str
58
+ parameters: Optional[List[Any]]
59
+ callback_id: Optional[str] = None
60
+ retry_count: int = 0
61
+ max_retries: int = 3
62
+ created_at: float = field(default_factory=time.time)
63
+
64
+ def can_retry(self) -> bool:
65
+ """Check if query can be retried."""
66
+ return self.retry_count < self.max_retries
67
+
68
+
69
+ @dataclass
70
+ class PipelineResult:
71
+ """Result of pipeline execution."""
72
+
73
+ query_id: str
74
+ success: bool
75
+ result: Optional[Any] = None
76
+ error: Optional[Exception] = None
77
+ execution_time_ms: float = 0.0
78
+ retry_count: int = 0
79
+
80
+
81
+ @dataclass
82
+ class PipelineBatch:
83
+ """Batch of queries to execute together."""
84
+
85
+ id: str
86
+ queries: List[PipelinedQuery]
87
+ strategy: PipelineStrategy
88
+ created_at: float = field(default_factory=time.time)
89
+
90
+ def size(self) -> int:
91
+ """Get batch size."""
92
+ return len(self.queries)
93
+
94
+ def can_parallelize(self) -> bool:
95
+ """Check if batch can be parallelized."""
96
+ if self.strategy != PipelineStrategy.PARALLEL:
97
+ return False
98
+
99
+ # Simple heuristic: SELECTs can be parallel, writes should be sequential
100
+ for query in self.queries:
101
+ if not query.query.strip().upper().startswith("SELECT"):
102
+ return False
103
+ return True
104
+
105
+
106
+ class QueryPipelineOptimizer:
107
+ """Optimizes query order and batching for better performance."""
108
+
109
+ @staticmethod
110
+ def optimize_batch(queries: List[PipelinedQuery]) -> List[PipelinedQuery]:
111
+ """Optimize query order within batch.
112
+
113
+ Strategies:
114
+ - Group similar queries together
115
+ - Put SELECTs before writes when possible
116
+ - Keep dependent queries in order
117
+ """
118
+ # Separate reads and writes
119
+ reads = []
120
+ writes = []
121
+
122
+ for query in queries:
123
+ query_upper = query.query.strip().upper()
124
+ if query_upper.startswith("SELECT"):
125
+ reads.append(query)
126
+ else:
127
+ writes.append(query)
128
+
129
+ # For now, simple optimization: reads first, then writes
130
+ # This allows better connection reuse and caching
131
+ return reads + writes
132
+
133
+ @staticmethod
134
+ def can_merge_queries(q1: PipelinedQuery, q2: PipelinedQuery) -> bool:
135
+ """Check if two queries can be merged into single statement."""
136
+ # Check if both are same type of INSERT into same table
137
+ q1_upper = q1.query.strip().upper()
138
+ q2_upper = q2.query.strip().upper()
139
+
140
+ if q1_upper.startswith("INSERT INTO") and q2_upper.startswith("INSERT INTO"):
141
+ # Extract table names (simple parsing)
142
+ try:
143
+ table1 = q1_upper.split("INSERT INTO")[1].split()[0]
144
+ table2 = q2_upper.split("INSERT INTO")[1].split()[0]
145
+ return table1 == table2
146
+ except:
147
+ return False
148
+
149
+ return False
150
+
151
+
152
+ @register_node()
153
+ class QueryPipelineNode(Node):
154
+ """Node for executing queries in pipeline/batch mode.
155
+
156
+ Batches multiple queries together to reduce round-trip latency
157
+ and improve throughput. Supports various execution strategies
158
+ and handles partial failures gracefully.
159
+ """
160
+
161
+ def __init__(self, **config):
162
+ """Initialize query pipeline node.
163
+
164
+ Args:
165
+ connection_pool: Name of connection pool to use
166
+ batch_size: Maximum queries per batch (default: 100)
167
+ flush_interval: Auto-flush interval in seconds (default: 0.1)
168
+ strategy: Execution strategy (default: best_effort)
169
+ enable_optimization: Enable query optimization (default: True)
170
+ """
171
+ self.connection_pool_name = config.get("connection_pool")
172
+ self.batch_size = config.get("batch_size", 100)
173
+ self.flush_interval = config.get("flush_interval", 0.1)
174
+ self.strategy = PipelineStrategy(config.get("strategy", "best_effort"))
175
+ self.enable_optimization = config.get("enable_optimization", True)
176
+
177
+ super().__init__(**config)
178
+
179
+ # Pipeline state
180
+ self._queue: deque[PipelinedQuery] = deque()
181
+ self._results: Dict[str, PipelineResult] = {}
182
+ self._batch_counter = 0
183
+ self._flush_task: Optional[asyncio.Task] = None
184
+ self._lock = asyncio.Lock()
185
+
186
+ # Metrics
187
+ self._total_queries = 0
188
+ self._total_batches = 0
189
+ self._total_failures = 0
190
+
191
+ # Auto-flush task will be started on first use
192
+ self._flush_task = None
193
+
194
+ # Direct pool reference
195
+ self._connection_pool = None
196
+
197
+ def set_connection_pool(self, pool):
198
+ """Set the connection pool directly.
199
+
200
+ Args:
201
+ pool: Connection pool instance
202
+ """
203
+ self._connection_pool = pool
204
+
205
+ def get_parameters(self) -> Dict[str, NodeParameter]:
206
+ """Get node parameters."""
207
+ return {
208
+ "connection_pool": NodeParameter(
209
+ name="connection_pool",
210
+ type=str,
211
+ required=True,
212
+ description="Name of connection pool to use",
213
+ ),
214
+ "batch_size": NodeParameter(
215
+ name="batch_size",
216
+ type=int,
217
+ default=100,
218
+ description="Maximum queries per batch",
219
+ ),
220
+ "flush_interval": NodeParameter(
221
+ name="flush_interval",
222
+ type=float,
223
+ default=0.1,
224
+ description="Auto-flush interval in seconds",
225
+ ),
226
+ "strategy": NodeParameter(
227
+ name="strategy",
228
+ type=str,
229
+ default="best_effort",
230
+ description="Execution strategy",
231
+ choices=["sequential", "parallel", "transactional", "best_effort"],
232
+ ),
233
+ "enable_optimization": NodeParameter(
234
+ name="enable_optimization",
235
+ type=bool,
236
+ default=True,
237
+ description="Enable query optimization",
238
+ ),
239
+ "queries": NodeParameter(
240
+ name="queries",
241
+ type=list,
242
+ required=False,
243
+ description="List of queries to execute",
244
+ ),
245
+ }
246
+
247
+ async def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
248
+ """Execute queries in pipeline mode.
249
+
250
+ Input can be:
251
+ 1. Single query: {"query": "...", "parameters": [...]}
252
+ 2. Multiple queries: {"queries": [{"query": "...", "parameters": [...]}, ...]}
253
+ 3. Add to pipeline: {"action": "add", "query": "...", "parameters": [...]}
254
+ 4. Flush pipeline: {"action": "flush"}
255
+ """
256
+ action = input_data.get("action", "execute")
257
+
258
+ if action == "add":
259
+ # Add query to pipeline
260
+ query_id = await self.add_query(
261
+ input_data["query"],
262
+ input_data.get("parameters"),
263
+ input_data.get("callback_id"),
264
+ )
265
+ return {"query_id": query_id, "queued": True}
266
+
267
+ elif action == "flush":
268
+ # Flush pipeline
269
+ results = await self.flush()
270
+ return {"results": results, "count": len(results)}
271
+
272
+ elif action == "status":
273
+ # Get pipeline status
274
+ return self.get_status()
275
+
276
+ else:
277
+ # Execute queries immediately
278
+ queries_data = input_data.get("queries", [input_data])
279
+ if not isinstance(queries_data, list):
280
+ queries_data = [queries_data]
281
+
282
+ # Add all queries
283
+ query_ids = []
284
+ for query_data in queries_data:
285
+ query_id = await self.add_query(
286
+ query_data["query"], query_data.get("parameters")
287
+ )
288
+ query_ids.append(query_id)
289
+
290
+ # Flush and get results
291
+ results = await self.flush()
292
+
293
+ # Map results back to query IDs
294
+ results_map = {r.query_id: r for r in results}
295
+ ordered_results = [results_map.get(qid) for qid in query_ids]
296
+
297
+ return {
298
+ "results": ordered_results,
299
+ "success": all(r.success for r in ordered_results if r),
300
+ "count": len(ordered_results),
301
+ }
302
+
303
+ async def add_query(
304
+ self,
305
+ query: str,
306
+ parameters: Optional[List[Any]] = None,
307
+ callback_id: Optional[str] = None,
308
+ ) -> str:
309
+ """Add query to pipeline.
310
+
311
+ Args:
312
+ query: SQL query to execute
313
+ parameters: Query parameters
314
+ callback_id: Optional callback identifier
315
+
316
+ Returns:
317
+ Query ID for tracking
318
+ """
319
+ query_id = f"pq_{self._total_queries}_{int(time.time() * 1000)}"
320
+
321
+ pipelined_query = PipelinedQuery(
322
+ id=query_id, query=query, parameters=parameters, callback_id=callback_id
323
+ )
324
+
325
+ async with self._lock:
326
+ # Start auto-flush task if not started
327
+ if self._flush_task is None and self.flush_interval > 0:
328
+ self._start_auto_flush()
329
+
330
+ self._queue.append(pipelined_query)
331
+ self._total_queries += 1
332
+
333
+ # Check if we should flush
334
+ if len(self._queue) >= self.batch_size:
335
+ asyncio.create_task(self.flush())
336
+
337
+ return query_id
338
+
339
+ async def flush(self) -> List[PipelineResult]:
340
+ """Flush pipeline and execute all queued queries.
341
+
342
+ Returns:
343
+ List of results for all queries
344
+ """
345
+ async with self._lock:
346
+ if not self._queue:
347
+ return []
348
+
349
+ # Create batch
350
+ batch_id = f"batch_{self._batch_counter}"
351
+ self._batch_counter += 1
352
+
353
+ queries = list(self._queue)
354
+ self._queue.clear()
355
+
356
+ batch = PipelineBatch(id=batch_id, queries=queries, strategy=self.strategy)
357
+
358
+ # Execute batch
359
+ results = await self._execute_batch(batch)
360
+
361
+ # Store results
362
+ for result in results:
363
+ self._results[result.query_id] = result
364
+
365
+ return results
366
+
367
+ async def _execute_batch(self, batch: PipelineBatch) -> List[PipelineResult]:
368
+ """Execute a batch of queries.
369
+
370
+ Args:
371
+ batch: Batch to execute
372
+
373
+ Returns:
374
+ List of results
375
+ """
376
+ # Get connection pool from various sources
377
+ pool = None
378
+
379
+ # 1. Check if pool was directly set
380
+ if hasattr(self, "_connection_pool") and self._connection_pool:
381
+ pool = self._connection_pool
382
+ # 2. Check context
383
+ elif hasattr(self, "context"):
384
+ if hasattr(self.context, "resource_registry"):
385
+ pool = self.context.resource_registry.get(self.connection_pool_name)
386
+ elif (
387
+ hasattr(self.context, "resources")
388
+ and self.connection_pool_name in self.context.resources
389
+ ):
390
+ pool = self.context.resources[self.connection_pool_name]
391
+ # 3. Check runtime
392
+ elif hasattr(self, "runtime"):
393
+ if hasattr(self.runtime, "resource_registry"):
394
+ pool = self.runtime.resource_registry.get(self.connection_pool_name)
395
+ elif (
396
+ hasattr(self.runtime, "resources")
397
+ and self.connection_pool_name in self.runtime.resources
398
+ ):
399
+ pool = self.runtime.resources[self.connection_pool_name]
400
+
401
+ if not pool:
402
+ logger.error(f"Connection pool '{self.connection_pool_name}' not found")
403
+ return [
404
+ PipelineResult(
405
+ query_id=q.id,
406
+ success=False,
407
+ error=ValueError("Connection pool not found"),
408
+ )
409
+ for q in batch.queries
410
+ ]
411
+
412
+ # Optimize batch if enabled
413
+ queries = batch.queries
414
+ if self.enable_optimization:
415
+ queries = QueryPipelineOptimizer.optimize_batch(queries)
416
+
417
+ # Execute based on strategy
418
+ if batch.strategy == PipelineStrategy.TRANSACTIONAL:
419
+ return await self._execute_transactional(pool, queries)
420
+ elif batch.strategy == PipelineStrategy.PARALLEL and batch.can_parallelize():
421
+ return await self._execute_parallel(pool, queries)
422
+ else:
423
+ return await self._execute_sequential(pool, queries, batch.strategy)
424
+
425
+ async def _execute_sequential(
426
+ self, pool, queries: List[PipelinedQuery], strategy: PipelineStrategy
427
+ ) -> List[PipelineResult]:
428
+ """Execute queries sequentially."""
429
+ results = []
430
+
431
+ async with pool.acquire() as connection:
432
+ for query in queries:
433
+ start_time = time.time()
434
+
435
+ try:
436
+ # Execute query
437
+ if query.parameters:
438
+ result = await connection.execute(
439
+ query.query, *query.parameters
440
+ )
441
+ else:
442
+ result = await connection.execute(query.query)
443
+
444
+ results.append(
445
+ PipelineResult(
446
+ query_id=query.id,
447
+ success=True,
448
+ result=result,
449
+ execution_time_ms=(time.time() - start_time) * 1000,
450
+ retry_count=query.retry_count,
451
+ )
452
+ )
453
+
454
+ except Exception as e:
455
+ logger.error(f"Pipeline query failed: {e}")
456
+ self._total_failures += 1
457
+
458
+ results.append(
459
+ PipelineResult(
460
+ query_id=query.id,
461
+ success=False,
462
+ error=e,
463
+ execution_time_ms=(time.time() - start_time) * 1000,
464
+ retry_count=query.retry_count,
465
+ )
466
+ )
467
+
468
+ # Stop on first failure for sequential strategy
469
+ if strategy == PipelineStrategy.SEQUENTIAL:
470
+ # Add remaining queries as not executed
471
+ for remaining in queries[len(results) :]:
472
+ results.append(
473
+ PipelineResult(
474
+ query_id=remaining.id,
475
+ success=False,
476
+ error=Exception(
477
+ "Not executed due to previous failure"
478
+ ),
479
+ )
480
+ )
481
+ break
482
+
483
+ return results
484
+
485
+ async def _execute_parallel(
486
+ self, pool, queries: List[PipelinedQuery]
487
+ ) -> List[PipelineResult]:
488
+ """Execute queries in parallel."""
489
+ tasks = []
490
+
491
+ for query in queries:
492
+ task = asyncio.create_task(self._execute_single_query(pool, query))
493
+ tasks.append(task)
494
+
495
+ # Wait for all to complete
496
+ results = await asyncio.gather(*tasks, return_exceptions=True)
497
+
498
+ # Convert exceptions to results
499
+ final_results = []
500
+ for i, result in enumerate(results):
501
+ if isinstance(result, Exception):
502
+ final_results.append(
503
+ PipelineResult(
504
+ query_id=queries[i].id,
505
+ success=False,
506
+ error=result,
507
+ retry_count=queries[i].retry_count,
508
+ )
509
+ )
510
+ self._total_failures += 1
511
+ else:
512
+ final_results.append(result)
513
+
514
+ return final_results
515
+
516
+ async def _execute_transactional(
517
+ self, pool, queries: List[PipelinedQuery]
518
+ ) -> List[PipelineResult]:
519
+ """Execute queries within a transaction."""
520
+ results = []
521
+
522
+ async with pool.acquire() as connection:
523
+ try:
524
+ # Start transaction
525
+ await connection.execute("BEGIN")
526
+
527
+ # Execute all queries
528
+ for query in queries:
529
+ start_time = time.time()
530
+
531
+ if query.parameters:
532
+ result = await connection.execute(
533
+ query.query, *query.parameters
534
+ )
535
+ else:
536
+ result = await connection.execute(query.query)
537
+
538
+ results.append(
539
+ PipelineResult(
540
+ query_id=query.id,
541
+ success=True,
542
+ result=result,
543
+ execution_time_ms=(time.time() - start_time) * 1000,
544
+ retry_count=query.retry_count,
545
+ )
546
+ )
547
+
548
+ # Commit transaction
549
+ await connection.execute("COMMIT")
550
+
551
+ except Exception as e:
552
+ # Rollback on any error
553
+ try:
554
+ await connection.execute("ROLLBACK")
555
+ except:
556
+ pass
557
+
558
+ logger.error(f"Transaction failed: {e}")
559
+ self._total_failures += len(queries)
560
+
561
+ # All queries fail in transaction
562
+ return [
563
+ PipelineResult(
564
+ query_id=q.id, success=False, error=e, retry_count=q.retry_count
565
+ )
566
+ for q in queries
567
+ ]
568
+
569
+ return results
570
+
571
+ async def _execute_single_query(
572
+ self, pool, query: PipelinedQuery
573
+ ) -> PipelineResult:
574
+ """Execute a single query."""
575
+ start_time = time.time()
576
+
577
+ try:
578
+ async with pool.acquire() as connection:
579
+ if query.parameters:
580
+ result = await connection.execute(query.query, *query.parameters)
581
+ else:
582
+ result = await connection.execute(query.query)
583
+
584
+ return PipelineResult(
585
+ query_id=query.id,
586
+ success=True,
587
+ result=result,
588
+ execution_time_ms=(time.time() - start_time) * 1000,
589
+ retry_count=query.retry_count,
590
+ )
591
+
592
+ except Exception as e:
593
+ return PipelineResult(
594
+ query_id=query.id,
595
+ success=False,
596
+ error=e,
597
+ execution_time_ms=(time.time() - start_time) * 1000,
598
+ retry_count=query.retry_count,
599
+ )
600
+
601
+ def _start_auto_flush(self):
602
+ """Start auto-flush task."""
603
+
604
+ async def auto_flush():
605
+ while True:
606
+ await asyncio.sleep(self.flush_interval)
607
+ if self._queue:
608
+ await self.flush()
609
+
610
+ self._flush_task = asyncio.create_task(auto_flush())
611
+
612
+ async def close(self):
613
+ """Close pipeline and cleanup."""
614
+ # Cancel auto-flush
615
+ if self._flush_task:
616
+ self._flush_task.cancel()
617
+ try:
618
+ await self._flush_task
619
+ except asyncio.CancelledError:
620
+ pass
621
+
622
+ # Flush any remaining queries
623
+ await self.flush()
624
+
625
+ def get_status(self) -> Dict[str, Any]:
626
+ """Get pipeline status."""
627
+ return {
628
+ "queued_queries": len(self._queue),
629
+ "total_queries": self._total_queries,
630
+ "total_batches": self._total_batches,
631
+ "total_failures": self._total_failures,
632
+ "batch_size": self.batch_size,
633
+ "flush_interval": self.flush_interval,
634
+ "strategy": self.strategy.value,
635
+ "success_rate": (self._total_queries - self._total_failures)
636
+ / max(1, self._total_queries),
637
+ }
638
+
639
+ def get_result(self, query_id: str) -> Optional[PipelineResult]:
640
+ """Get result for specific query ID."""
641
+ return self._results.get(query_id)