kailash 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/access_control.py +740 -0
  3. kailash/api/__main__.py +6 -0
  4. kailash/api/auth.py +668 -0
  5. kailash/api/custom_nodes.py +285 -0
  6. kailash/api/custom_nodes_secure.py +377 -0
  7. kailash/api/database.py +620 -0
  8. kailash/api/studio.py +915 -0
  9. kailash/api/studio_secure.py +893 -0
  10. kailash/mcp/__init__.py +53 -0
  11. kailash/mcp/__main__.py +13 -0
  12. kailash/mcp/ai_registry_server.py +712 -0
  13. kailash/mcp/client.py +447 -0
  14. kailash/mcp/client_new.py +334 -0
  15. kailash/mcp/server.py +293 -0
  16. kailash/mcp/server_new.py +336 -0
  17. kailash/mcp/servers/__init__.py +12 -0
  18. kailash/mcp/servers/ai_registry.py +289 -0
  19. kailash/nodes/__init__.py +4 -2
  20. kailash/nodes/ai/__init__.py +2 -0
  21. kailash/nodes/ai/a2a.py +714 -67
  22. kailash/nodes/ai/intelligent_agent_orchestrator.py +31 -37
  23. kailash/nodes/ai/iterative_llm_agent.py +1280 -0
  24. kailash/nodes/ai/llm_agent.py +324 -1
  25. kailash/nodes/ai/self_organizing.py +5 -6
  26. kailash/nodes/base.py +15 -2
  27. kailash/nodes/base_async.py +45 -0
  28. kailash/nodes/base_cycle_aware.py +374 -0
  29. kailash/nodes/base_with_acl.py +338 -0
  30. kailash/nodes/code/python.py +135 -27
  31. kailash/nodes/data/__init__.py +1 -2
  32. kailash/nodes/data/readers.py +16 -6
  33. kailash/nodes/data/sql.py +699 -256
  34. kailash/nodes/data/writers.py +16 -6
  35. kailash/nodes/logic/__init__.py +8 -0
  36. kailash/nodes/logic/convergence.py +642 -0
  37. kailash/nodes/logic/loop.py +153 -0
  38. kailash/nodes/logic/operations.py +187 -27
  39. kailash/nodes/mixins/__init__.py +11 -0
  40. kailash/nodes/mixins/mcp.py +228 -0
  41. kailash/nodes/mixins.py +387 -0
  42. kailash/runtime/__init__.py +2 -1
  43. kailash/runtime/access_controlled.py +458 -0
  44. kailash/runtime/local.py +106 -33
  45. kailash/runtime/parallel_cyclic.py +529 -0
  46. kailash/sdk_exceptions.py +90 -5
  47. kailash/security.py +845 -0
  48. kailash/tracking/manager.py +38 -15
  49. kailash/tracking/models.py +1 -1
  50. kailash/tracking/storage/filesystem.py +30 -2
  51. kailash/utils/__init__.py +8 -0
  52. kailash/workflow/__init__.py +18 -0
  53. kailash/workflow/convergence.py +270 -0
  54. kailash/workflow/cycle_analyzer.py +889 -0
  55. kailash/workflow/cycle_builder.py +579 -0
  56. kailash/workflow/cycle_config.py +725 -0
  57. kailash/workflow/cycle_debugger.py +860 -0
  58. kailash/workflow/cycle_exceptions.py +615 -0
  59. kailash/workflow/cycle_profiler.py +741 -0
  60. kailash/workflow/cycle_state.py +338 -0
  61. kailash/workflow/cyclic_runner.py +985 -0
  62. kailash/workflow/graph.py +500 -39
  63. kailash/workflow/migration.py +809 -0
  64. kailash/workflow/safety.py +365 -0
  65. kailash/workflow/templates.py +763 -0
  66. kailash/workflow/validation.py +751 -0
  67. {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/METADATA +259 -12
  68. kailash-0.2.1.dist-info/RECORD +125 -0
  69. kailash/nodes/mcp/__init__.py +0 -11
  70. kailash/nodes/mcp/client.py +0 -554
  71. kailash/nodes/mcp/resource.py +0 -682
  72. kailash/nodes/mcp/server.py +0 -577
  73. kailash-0.1.5.dist-info/RECORD +0 -88
  74. {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/WHEEL +0 -0
  75. {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/entry_points.txt +0 -0
  76. {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/licenses/LICENSE +0 -0
  77. {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,751 @@
1
+ """
2
+ Comprehensive Validation and Linting for Cyclic Workflows.
3
+
4
+ This module provides extensive validation and linting capabilities to identify
5
+ common issues, performance anti-patterns, security vulnerabilities, and potential
6
+ problems in cyclic workflows before execution. It acts as a quality gate to
7
+ ensure workflow reliability and optimal performance.
8
+
9
+ Design Philosophy:
10
+ Provides proactive quality assurance through comprehensive static analysis
11
+ of workflow structures, configurations, and patterns. Identifies issues
12
+ early in the development cycle with specific, actionable recommendations
13
+ for resolution and optimization.
14
+
15
+ Key Features:
16
+ - Multi-category validation (safety, performance, compatibility)
17
+ - Severity-based issue classification (error, warning, info)
18
+ - Specific error codes with documentation links
19
+ - Actionable suggestions for issue resolution
20
+ - Comprehensive reporting with categorization
21
+ - Integration with development workflows
22
+
23
+ Validation Categories:
24
+ - Convergence: Cycle termination and convergence conditions
25
+ - Safety: Resource limits and infinite loop prevention
26
+ - Performance: Anti-patterns and optimization opportunities
27
+ - Parameter Mapping: Cycle parameter flow validation
28
+ - Node Compatibility: Cycle-aware node validation
29
+ - Resource Usage: Memory and file handle management
30
+
31
+ Issue Severity Levels:
32
+ - ERROR: Critical issues that prevent execution or cause failures
33
+ - WARNING: Potential issues that may impact performance or reliability
34
+ - INFO: Suggestions for improvement and best practices
35
+
36
+ Core Components:
37
+ - ValidationIssue: Structured issue representation with metadata
38
+ - IssueSeverity: Enumeration of severity levels
39
+ - CycleLinter: Main validation engine with comprehensive checks
40
+ - Reporting system with categorization and filtering
41
+
42
+ Validation Algorithms:
43
+ - Static analysis of cycle configurations
44
+ - Pattern recognition for common anti-patterns
45
+ - Resource usage analysis and leak detection
46
+ - Security validation for parameter access
47
+ - Performance bottleneck identification
48
+
49
+ Upstream Dependencies:
50
+ - Workflow graph structure and cycle detection
51
+ - Node implementations and configuration validation
52
+ - Cycle configuration and safety systems
53
+
54
+ Downstream Consumers:
55
+ - Development tools and IDEs for real-time validation
56
+ - CI/CD pipelines for automated quality gates
57
+ - Performance optimization tools
58
+ - Security analysis and compliance systems
59
+ - Educational and training materials
60
+
61
+ Examples:
62
+ Basic workflow validation:
63
+
64
+ >>> from kailash.workflow.validation import CycleLinter, IssueSeverity
65
+ >>> linter = CycleLinter(workflow)
66
+ >>> issues = linter.check_all()
67
+ >>> # Filter by severity
68
+ >>> errors = linter.get_issues_by_severity(IssueSeverity.ERROR)
69
+ >>> warnings = linter.get_issues_by_severity(IssueSeverity.WARNING)
70
+ >>> for error in errors:
71
+ ... print(f"ERROR {error.code}: {error.message}")
72
+ ... if error.suggestion:
73
+ ... print(f" Suggestion: {error.suggestion}")
74
+
75
+ Comprehensive reporting:
76
+
77
+ >>> report = linter.generate_report()
78
+ >>> print(f"Total issues: {report['summary']['total_issues']}")
79
+ >>> print(f"Critical errors: {report['summary']['errors']}")
80
+ >>> print(f"Affected cycles: {report['summary']['affected_cycles']}")
81
+ >>> # Category-specific analysis
82
+ >>> for category, issues in report['by_category'].items():
83
+ ... print(f"{category.upper()} ({len(issues)} issues):")
84
+ ... for issue in issues:
85
+ ... print(f" {issue.code}: {issue.message}")
86
+
87
+ Targeted validation:
88
+
89
+ >>> # Validate specific cycle
90
+ >>> cycle_issues = linter.get_issues_for_cycle("optimization_cycle")
91
+ >>> # Validate specific node
92
+ >>> node_issues = linter.get_issues_for_node("processor")
93
+ >>> # Get recommendations
94
+ >>> recommendations = report['recommendations']
95
+ >>> for rec in recommendations:
96
+ ... print(f" {rec}")
97
+
98
+ Validation Checks:
99
+ The linter performs comprehensive checks including:
100
+
101
+ - **CYC001-002**: Convergence condition validation
102
+ - **CYC003-004**: Infinite loop prevention
103
+ - **CYC005-006**: Safety limit configuration
104
+ - **CYC007-009**: Performance anti-pattern detection
105
+ - **CYC010-011**: Parameter mapping validation
106
+ - **CYC012-013**: Node compatibility checks
107
+ - **CYC014-015**: Convergence condition syntax validation
108
+ - **CYC016-017**: Resource usage and leak detection
109
+
110
+ See Also:
111
+ - :mod:`kailash.workflow.migration` for workflow optimization
112
+ - :mod:`kailash.workflow.safety` for safety mechanisms
113
+ - :doc:`/guides/validation` for validation best practices
114
+ """
115
+
116
+ import re
117
+ from dataclasses import dataclass
118
+ from enum import Enum
119
+ from typing import Any, Dict, List, Optional
120
+
121
+ from . import Workflow
122
+
123
+
124
+ class IssueSeverity(Enum):
125
+ """Severity levels for validation issues."""
126
+
127
+ ERROR = "error"
128
+ WARNING = "warning"
129
+ INFO = "info"
130
+
131
+
132
+ @dataclass
133
+ class ValidationIssue:
134
+ """Represents a validation issue found in a workflow."""
135
+
136
+ severity: IssueSeverity
137
+ category: str
138
+ code: str
139
+ message: str
140
+ node_id: Optional[str] = None
141
+ cycle_id: Optional[str] = None
142
+ suggestion: Optional[str] = None
143
+ documentation_link: Optional[str] = None
144
+
145
+
146
+ class CycleLinter:
147
+ """
148
+ Comprehensive linter for cyclic workflows.
149
+
150
+ Analyzes workflows for common issues, performance anti-patterns,
151
+ and potential problems specific to cyclic execution.
152
+ """
153
+
154
+ def __init__(self, workflow: Workflow):
155
+ """
156
+ Initialize linter with target workflow.
157
+
158
+ Args:
159
+ workflow: The workflow to analyze
160
+ """
161
+ self.workflow = workflow
162
+ self.graph = workflow.graph
163
+ self.issues: List[ValidationIssue] = []
164
+
165
+ def check_all(self) -> List[ValidationIssue]:
166
+ """
167
+ Run all validation checks on the workflow.
168
+
169
+ Returns:
170
+ List of all validation issues found
171
+
172
+ Example:
173
+ >>> workflow = create_problematic_workflow()
174
+ >>> linter = CycleLinter(workflow)
175
+ >>> issues = linter.check_all()
176
+ >>> for issue in issues:
177
+ ... print(f"{issue.severity.value}: {issue.message}")
178
+ """
179
+ self.issues = []
180
+
181
+ # Run all checks
182
+ self._check_cycles_have_convergence()
183
+ self._check_for_infinite_loop_potential()
184
+ self._check_safety_limits()
185
+ self._check_performance_anti_patterns()
186
+ self._check_parameter_mapping()
187
+ self._check_node_compatibility()
188
+ self._check_convergence_conditions()
189
+ self._check_resource_usage()
190
+
191
+ return self.issues
192
+
193
+ def _check_cycles_have_convergence(self):
194
+ """Check that all cycles have appropriate convergence conditions."""
195
+ if hasattr(self.workflow, "get_cycle_groups"):
196
+ cycle_groups = self.workflow.get_cycle_groups()
197
+
198
+ for cycle_id, cycle_edges in cycle_groups.items():
199
+ for source, target, edge_data in cycle_edges:
200
+ if not edge_data.get("convergence_check") and not edge_data.get(
201
+ "max_iterations"
202
+ ):
203
+ self.issues.append(
204
+ ValidationIssue(
205
+ severity=IssueSeverity.ERROR,
206
+ category="convergence",
207
+ code="CYC001",
208
+ message=f"Cycle {cycle_id} lacks convergence condition and max_iterations",
209
+ cycle_id=cycle_id,
210
+ suggestion="Add convergence_check parameter or set max_iterations",
211
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
212
+ )
213
+ )
214
+
215
+ elif not edge_data.get("convergence_check"):
216
+ self.issues.append(
217
+ ValidationIssue(
218
+ severity=IssueSeverity.WARNING,
219
+ category="convergence",
220
+ code="CYC002",
221
+ message=f"Cycle {cycle_id} relies only on max_iterations without convergence check",
222
+ cycle_id=cycle_id,
223
+ suggestion="Consider adding convergence_check for early termination",
224
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
225
+ )
226
+ )
227
+
228
+ def _check_for_infinite_loop_potential(self):
229
+ """Check for patterns that could lead to infinite loops."""
230
+ if hasattr(self.workflow, "get_cycle_groups"):
231
+ cycle_groups = self.workflow.get_cycle_groups()
232
+
233
+ for cycle_id, cycle_edges in cycle_groups.items():
234
+ for source, target, edge_data in cycle_edges:
235
+ max_iter = edge_data.get("max_iterations")
236
+ convergence = edge_data.get("convergence_check")
237
+
238
+ # Check for very high or missing max_iterations
239
+ if max_iter is None or max_iter > 10000:
240
+ self.issues.append(
241
+ ValidationIssue(
242
+ severity=IssueSeverity.WARNING,
243
+ category="safety",
244
+ code="CYC003",
245
+ message=f"Cycle {cycle_id} has very high or no max_iterations limit",
246
+ cycle_id=cycle_id,
247
+ suggestion="Set reasonable max_iterations (e.g., 100-1000) as safety limit",
248
+ documentation_link="guide/mistakes/066-infinite-cycles.md",
249
+ )
250
+ )
251
+
252
+ # Check for potentially unreachable convergence conditions
253
+ if convergence:
254
+ if self._is_potentially_unreachable_condition(convergence):
255
+ self.issues.append(
256
+ ValidationIssue(
257
+ severity=IssueSeverity.WARNING,
258
+ category="convergence",
259
+ code="CYC004",
260
+ message=f"Convergence condition '{convergence}' may be unreachable",
261
+ cycle_id=cycle_id,
262
+ suggestion="Verify convergence condition is achievable",
263
+ documentation_link="guide/mistakes/066-infinite-cycles.md",
264
+ )
265
+ )
266
+
267
+ def _check_safety_limits(self):
268
+ """Check for appropriate safety limits on cycles."""
269
+ if hasattr(self.workflow, "get_cycle_groups"):
270
+ cycle_groups = self.workflow.get_cycle_groups()
271
+
272
+ for cycle_id, cycle_edges in cycle_groups.items():
273
+ for source, target, edge_data in cycle_edges:
274
+ # Check timeout
275
+ if not edge_data.get("timeout"):
276
+ self.issues.append(
277
+ ValidationIssue(
278
+ severity=IssueSeverity.INFO,
279
+ category="safety",
280
+ code="CYC005",
281
+ message=f"Cycle {cycle_id} has no timeout limit",
282
+ cycle_id=cycle_id,
283
+ suggestion="Consider adding timeout parameter for safety",
284
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
285
+ )
286
+ )
287
+
288
+ # Check memory limit
289
+ if not edge_data.get("memory_limit"):
290
+ self.issues.append(
291
+ ValidationIssue(
292
+ severity=IssueSeverity.INFO,
293
+ category="safety",
294
+ code="CYC006",
295
+ message=f"Cycle {cycle_id} has no memory limit",
296
+ cycle_id=cycle_id,
297
+ suggestion="Consider adding memory_limit parameter for safety",
298
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
299
+ )
300
+ )
301
+
302
+ def _check_performance_anti_patterns(self):
303
+ """Check for performance anti-patterns."""
304
+ # Use the workflow's cycle detection
305
+ if hasattr(self.workflow, "get_cycle_groups"):
306
+ cycle_groups = self.workflow.get_cycle_groups()
307
+
308
+ for cycle_id, cycle_edges in cycle_groups.items():
309
+ # Get unique nodes in the cycle
310
+ cycle_nodes = set()
311
+ for source, target, _ in cycle_edges:
312
+ cycle_nodes.add(source)
313
+ cycle_nodes.add(target)
314
+ cycle_nodes = list(cycle_nodes)
315
+
316
+ # Check for very small cycles (may have high overhead)
317
+ if len(cycle_nodes) == 1:
318
+ node_id = cycle_nodes[0]
319
+ self.issues.append(
320
+ ValidationIssue(
321
+ severity=IssueSeverity.INFO,
322
+ category="performance",
323
+ code="CYC007",
324
+ message=f"Single-node cycle {cycle_id} may have high overhead",
325
+ node_id=node_id,
326
+ cycle_id=cycle_id,
327
+ suggestion="Consider if cycle is necessary or if logic can be internal to node",
328
+ documentation_link="guide/reference/pattern-library/06-performance-patterns.md",
329
+ )
330
+ )
331
+
332
+ # Check for very large cycles (may be hard to debug)
333
+ elif len(cycle_nodes) > 10:
334
+ self.issues.append(
335
+ ValidationIssue(
336
+ severity=IssueSeverity.WARNING,
337
+ category="complexity",
338
+ code="CYC008",
339
+ message=f"Large cycle {cycle_id} with {len(cycle_nodes)} nodes may be hard to debug",
340
+ cycle_id=cycle_id,
341
+ suggestion="Consider breaking into smaller cycles or using nested workflows",
342
+ documentation_link="guide/reference/pattern-library/04-complex-patterns.md",
343
+ )
344
+ )
345
+
346
+ # Check for cycles with expensive operations
347
+ for node_id in cycle_nodes:
348
+ if self._is_expensive_operation(node_id):
349
+ self.issues.append(
350
+ ValidationIssue(
351
+ severity=IssueSeverity.WARNING,
352
+ category="performance",
353
+ code="CYC009",
354
+ message=f"Expensive operation '{node_id}' in cycle {cycle_id}",
355
+ node_id=node_id,
356
+ cycle_id=cycle_id,
357
+ suggestion="Consider caching, optimization, or moving outside cycle",
358
+ documentation_link="guide/reference/pattern-library/06-performance-patterns.md",
359
+ )
360
+ )
361
+
362
+ def _check_parameter_mapping(self):
363
+ """Check for parameter mapping issues in cycles."""
364
+ if hasattr(self.workflow, "get_cycle_groups"):
365
+ cycle_groups = self.workflow.get_cycle_groups()
366
+
367
+ for cycle_id, cycle_edges in cycle_groups.items():
368
+ # Get cycle nodes for checking
369
+ cycle_nodes = set()
370
+ for s, t, _ in cycle_edges:
371
+ cycle_nodes.add(s)
372
+ cycle_nodes.add(t)
373
+
374
+ # Check each edge for issues
375
+ for source, target, edge_data in cycle_edges:
376
+ mapping = edge_data.get("mapping", {})
377
+
378
+ # Check for identity mappings (common mistake)
379
+ for source_param, target_param in mapping.items():
380
+ if source_param == target_param:
381
+ self.issues.append(
382
+ ValidationIssue(
383
+ severity=IssueSeverity.WARNING,
384
+ category="parameter_mapping",
385
+ code="CYC010",
386
+ message=f"Identity mapping '{source_param}' -> '{target_param}' in cycle {cycle_id}",
387
+ cycle_id=cycle_id,
388
+ suggestion="Use 'result.field' -> 'field' pattern for cycle parameter propagation",
389
+ documentation_link="guide/mistakes/063-cyclic-parameter-propagation-multi-fix.md",
390
+ )
391
+ )
392
+
393
+ # Check for missing parameter propagation
394
+ if not mapping and len(cycle_nodes) > 1:
395
+ self.issues.append(
396
+ ValidationIssue(
397
+ severity=IssueSeverity.INFO,
398
+ category="parameter_mapping",
399
+ code="CYC011",
400
+ message=f"Cycle {cycle_id} has no parameter mapping",
401
+ cycle_id=cycle_id,
402
+ suggestion="Consider if parameters need to propagate between iterations",
403
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
404
+ )
405
+ )
406
+
407
+ def _check_node_compatibility(self):
408
+ """Check for node compatibility issues with cycles."""
409
+ if hasattr(self.workflow, "get_cycle_groups"):
410
+ cycle_groups = self.workflow.get_cycle_groups()
411
+
412
+ for cycle_id, cycle_edges in cycle_groups.items():
413
+ # Get unique nodes in the cycle
414
+ cycle_nodes = set()
415
+ for source, target, _ in cycle_edges:
416
+ cycle_nodes.add(source)
417
+ cycle_nodes.add(target)
418
+
419
+ for node_id in cycle_nodes:
420
+ node = self.workflow.nodes.get(node_id)
421
+ if not node:
422
+ continue
423
+
424
+ # Check if node supports cycle context
425
+ if hasattr(node, "run"):
426
+ # Check if node accesses cycle context safely
427
+ if self._uses_unsafe_cycle_access(node):
428
+ self.issues.append(
429
+ ValidationIssue(
430
+ severity=IssueSeverity.ERROR,
431
+ category="node_compatibility",
432
+ code="CYC012",
433
+ message=f"Node '{node_id}' uses unsafe cycle context access",
434
+ node_id=node_id,
435
+ cycle_id=cycle_id,
436
+ suggestion="Use context.get('cycle', {}) instead of direct access",
437
+ documentation_link="guide/reference/cheatsheet/022-cycle-debugging-troubleshooting.md",
438
+ )
439
+ )
440
+
441
+ # Check for PythonCodeNode parameter access
442
+ if hasattr(node, "code") and node.code:
443
+ if self._has_unsafe_parameter_access(node.code):
444
+ self.issues.append(
445
+ ValidationIssue(
446
+ severity=IssueSeverity.WARNING,
447
+ category="node_compatibility",
448
+ code="CYC013",
449
+ message=f"PythonCodeNode '{node_id}' may have unsafe parameter access",
450
+ node_id=node_id,
451
+ cycle_id=cycle_id,
452
+ suggestion="Use try/except pattern for cycle parameter access",
453
+ documentation_link="guide/mistakes/064-pythoncodenode-none-input-validation-error.md",
454
+ )
455
+ )
456
+
457
+ def _check_convergence_conditions(self):
458
+ """Check convergence conditions for validity."""
459
+ if hasattr(self.workflow, "get_cycle_groups"):
460
+ cycle_groups = self.workflow.get_cycle_groups()
461
+
462
+ for cycle_id, cycle_edges in cycle_groups.items():
463
+ for source, target, edge_data in cycle_edges:
464
+ convergence = edge_data.get("convergence_check")
465
+
466
+ if convergence:
467
+ # Check for valid Python syntax
468
+ if not self._is_valid_condition_syntax(convergence):
469
+ self.issues.append(
470
+ ValidationIssue(
471
+ severity=IssueSeverity.ERROR,
472
+ category="convergence",
473
+ code="CYC014",
474
+ message=f"Invalid convergence condition syntax: '{convergence}'",
475
+ cycle_id=cycle_id,
476
+ suggestion="Ensure condition is valid Python expression",
477
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
478
+ )
479
+ )
480
+
481
+ # Check for common mistakes
482
+ if self._has_convergence_condition_issues(convergence):
483
+ self.issues.append(
484
+ ValidationIssue(
485
+ severity=IssueSeverity.WARNING,
486
+ category="convergence",
487
+ code="CYC015",
488
+ message=f"Potential issue in convergence condition: '{convergence}'",
489
+ cycle_id=cycle_id,
490
+ suggestion="Verify field names and comparison operators",
491
+ documentation_link="guide/mistakes/066-infinite-cycles.md",
492
+ )
493
+ )
494
+
495
+ def _check_resource_usage(self):
496
+ """Check for potential resource usage issues."""
497
+ if hasattr(self.workflow, "get_cycle_groups"):
498
+ cycle_groups = self.workflow.get_cycle_groups()
499
+
500
+ for cycle_id, cycle_edges in cycle_groups.items():
501
+ # Get unique nodes in the cycle
502
+ cycle_nodes = set()
503
+ for source, target, _ in cycle_edges:
504
+ cycle_nodes.add(source)
505
+ cycle_nodes.add(target)
506
+
507
+ # Check for potential memory leaks
508
+ for node_id in cycle_nodes:
509
+ if self._may_have_memory_leak(node_id):
510
+ self.issues.append(
511
+ ValidationIssue(
512
+ severity=IssueSeverity.WARNING,
513
+ category="resource_usage",
514
+ code="CYC016",
515
+ message=f"Node '{node_id}' may have memory leak in cycle",
516
+ node_id=node_id,
517
+ cycle_id=cycle_id,
518
+ suggestion="Ensure proper cleanup of resources in cyclic execution",
519
+ documentation_link="guide/mistakes/016-memory-leaks-in-long-running-processes.md",
520
+ )
521
+ )
522
+
523
+ # Check for file handle management
524
+ for node_id in cycle_nodes:
525
+ if self._may_leak_file_handles(node_id):
526
+ self.issues.append(
527
+ ValidationIssue(
528
+ severity=IssueSeverity.WARNING,
529
+ category="resource_usage",
530
+ code="CYC017",
531
+ message=f"Node '{node_id}' may leak file handles in cycle",
532
+ node_id=node_id,
533
+ cycle_id=cycle_id,
534
+ suggestion="Use context managers (with statements) for file operations",
535
+ documentation_link="guide/mistakes/022-resource-cleanup-issues.md",
536
+ )
537
+ )
538
+
539
+ def _get_cycle_id(self, cycle_nodes: List[str]) -> str:
540
+ """Generate a cycle identifier from cycle nodes."""
541
+ return f"cycle_{'-'.join(sorted(cycle_nodes))}"
542
+
543
+ def _is_potentially_unreachable_condition(self, condition: str) -> bool:
544
+ """Check if convergence condition might be unreachable."""
545
+ # Simple heuristics for potentially problematic conditions
546
+ problematic_patterns = [
547
+ r".*==\s*True\s*$", # exact boolean match
548
+ r".*==\s*1\.0\s*$", # exact float match
549
+ r".*>\s*1\.0\s*$", # probability > 1.0
550
+ r".*<\s*0\.0\s*$", # probability < 0.0
551
+ ]
552
+
553
+ for pattern in problematic_patterns:
554
+ if re.search(pattern, condition):
555
+ return True
556
+
557
+ return False
558
+
559
+ def _is_expensive_operation(self, node_id: str) -> bool:
560
+ """Check if node represents an expensive operation."""
561
+ expensive_keywords = [
562
+ "train",
563
+ "model",
564
+ "neural",
565
+ "deep",
566
+ "learning",
567
+ "api",
568
+ "request",
569
+ "http",
570
+ "download",
571
+ "upload",
572
+ "database",
573
+ "query",
574
+ "sql",
575
+ "file",
576
+ "io",
577
+ "read",
578
+ "write",
579
+ ]
580
+
581
+ node_id_lower = node_id.lower()
582
+ return any(keyword in node_id_lower for keyword in expensive_keywords)
583
+
584
+ def _uses_unsafe_cycle_access(self, node) -> bool:
585
+ """Check if node uses unsafe cycle context access."""
586
+ # This would require more sophisticated code analysis
587
+ # For now, return False as a placeholder
588
+ return False
589
+
590
+ def _has_unsafe_parameter_access(self, code: str) -> bool:
591
+ """Check if PythonCodeNode has unsafe parameter access."""
592
+ # Look for direct parameter access without try/except
593
+ lines = code.split("\n")
594
+
595
+ for line in lines:
596
+ line = line.strip()
597
+ if line and not line.startswith("#"):
598
+ # Check for variable access that might be parameters
599
+ if re.match(r"^[a-zA-Z_]\w*\s*=", line):
600
+ var_name = line.split("=")[0].strip()
601
+ # If variable is used before definition, might be parameter
602
+ if not self._is_defined_before_use(var_name, code):
603
+ return True
604
+
605
+ return False
606
+
607
+ def _is_defined_before_use(self, var_name: str, code: str) -> bool:
608
+ """Check if variable is defined before use in code."""
609
+ lines = code.split("\n")
610
+ defined = False
611
+
612
+ for line in lines:
613
+ line = line.strip()
614
+ if line.startswith(f"{var_name} =") or line.startswith(f"{var_name}="):
615
+ defined = True
616
+ elif var_name in line and not defined:
617
+ # Used before definition
618
+ return False
619
+
620
+ return True
621
+
622
+ def _is_valid_condition_syntax(self, condition: str) -> bool:
623
+ """Check if convergence condition has valid Python syntax."""
624
+ try:
625
+ compile(condition, "<string>", "eval")
626
+ return True
627
+ except SyntaxError:
628
+ return False
629
+
630
+ def _has_convergence_condition_issues(self, condition: str) -> bool:
631
+ """Check for common issues in convergence conditions."""
632
+ # Check for undefined variables (common field names)
633
+ undefined_vars = [
634
+ "done",
635
+ "converged",
636
+ "finished",
637
+ "complete",
638
+ "quality",
639
+ "error",
640
+ ]
641
+
642
+ for var in undefined_vars:
643
+ if var in condition:
644
+ # Might be using undefined variable
645
+ return True
646
+
647
+ return False
648
+
649
+ def _may_have_memory_leak(self, node_id: str) -> bool:
650
+ """Check if node might have memory leaks."""
651
+ leak_keywords = ["accumulate", "collect", "gather", "cache", "store"]
652
+ node_id_lower = node_id.lower()
653
+ return any(keyword in node_id_lower for keyword in leak_keywords)
654
+
655
+ def _may_leak_file_handles(self, node_id: str) -> bool:
656
+ """Check if node might leak file handles."""
657
+ file_keywords = ["file", "read", "write", "open", "csv", "json", "log"]
658
+ node_id_lower = node_id.lower()
659
+ return any(keyword in node_id_lower for keyword in file_keywords)
660
+
661
+ def get_issues_by_severity(self, severity: IssueSeverity) -> List[ValidationIssue]:
662
+ """Get all issues of a specific severity level."""
663
+ return [issue for issue in self.issues if issue.severity == severity]
664
+
665
+ def get_issues_by_category(self, category: str) -> List[ValidationIssue]:
666
+ """Get all issues of a specific category."""
667
+ return [issue for issue in self.issues if issue.category == category]
668
+
669
+ def get_issues_for_cycle(self, cycle_id: str) -> List[ValidationIssue]:
670
+ """Get all issues for a specific cycle."""
671
+ return [issue for issue in self.issues if issue.cycle_id == cycle_id]
672
+
673
+ def get_issues_for_node(self, node_id: str) -> List[ValidationIssue]:
674
+ """Get all issues for a specific node."""
675
+ return [issue for issue in self.issues if issue.node_id == node_id]
676
+
677
+ def generate_report(self) -> Dict[str, Any]:
678
+ """
679
+ Generate comprehensive validation report.
680
+
681
+ Returns:
682
+ Dict containing validation report with summary and details
683
+
684
+ Example:
685
+ >>> from kailash import Workflow
686
+ >>> workflow = Workflow("test", "Test Workflow")
687
+ >>> linter = CycleLinter(workflow)
688
+ >>> linter.check_all()
689
+ >>> report = linter.generate_report()
690
+ >>> print(f"Found {report['summary']['total_issues']} issues")
691
+ """
692
+ errors = self.get_issues_by_severity(IssueSeverity.ERROR)
693
+ warnings = self.get_issues_by_severity(IssueSeverity.WARNING)
694
+ info = self.get_issues_by_severity(IssueSeverity.INFO)
695
+
696
+ # Group by category
697
+ by_category = {}
698
+ for issue in self.issues:
699
+ if issue.category not in by_category:
700
+ by_category[issue.category] = []
701
+ by_category[issue.category].append(issue)
702
+
703
+ # Group by cycle
704
+ by_cycle = {}
705
+ for issue in self.issues:
706
+ if issue.cycle_id:
707
+ if issue.cycle_id not in by_cycle:
708
+ by_cycle[issue.cycle_id] = []
709
+ by_cycle[issue.cycle_id].append(issue)
710
+
711
+ return {
712
+ "summary": {
713
+ "total_issues": len(self.issues),
714
+ "errors": len(errors),
715
+ "warnings": len(warnings),
716
+ "info": len(info),
717
+ "categories": list(by_category.keys()),
718
+ "affected_cycles": len(by_cycle),
719
+ },
720
+ "issues": self.issues,
721
+ "by_severity": {"errors": errors, "warnings": warnings, "info": info},
722
+ "by_category": by_category,
723
+ "by_cycle": by_cycle,
724
+ "recommendations": self._generate_recommendations(),
725
+ }
726
+
727
+ def _generate_recommendations(self) -> List[str]:
728
+ """Generate high-level recommendations based on found issues."""
729
+ recommendations = []
730
+
731
+ errors = self.get_issues_by_severity(IssueSeverity.ERROR)
732
+ if errors:
733
+ recommendations.append(
734
+ f"Fix {len(errors)} critical errors before deployment"
735
+ )
736
+
737
+ convergence_issues = self.get_issues_by_category("convergence")
738
+ if convergence_issues:
739
+ recommendations.append("Review convergence conditions for all cycles")
740
+
741
+ performance_issues = self.get_issues_by_category("performance")
742
+ if performance_issues:
743
+ recommendations.append("Optimize cycles to improve performance")
744
+
745
+ safety_issues = self.get_issues_by_category("safety")
746
+ if safety_issues:
747
+ recommendations.append(
748
+ "Add safety limits (timeout, max_iterations) to cycles"
749
+ )
750
+
751
+ return recommendations