kailash 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/access_control.py +740 -0
  3. kailash/api/__main__.py +6 -0
  4. kailash/api/auth.py +668 -0
  5. kailash/api/custom_nodes.py +285 -0
  6. kailash/api/custom_nodes_secure.py +377 -0
  7. kailash/api/database.py +620 -0
  8. kailash/api/studio.py +915 -0
  9. kailash/api/studio_secure.py +893 -0
  10. kailash/mcp/__init__.py +53 -0
  11. kailash/mcp/__main__.py +13 -0
  12. kailash/mcp/ai_registry_server.py +712 -0
  13. kailash/mcp/client.py +447 -0
  14. kailash/mcp/client_new.py +334 -0
  15. kailash/mcp/server.py +293 -0
  16. kailash/mcp/server_new.py +336 -0
  17. kailash/mcp/servers/__init__.py +12 -0
  18. kailash/mcp/servers/ai_registry.py +289 -0
  19. kailash/nodes/__init__.py +4 -2
  20. kailash/nodes/ai/__init__.py +38 -0
  21. kailash/nodes/ai/a2a.py +1790 -0
  22. kailash/nodes/ai/agents.py +116 -2
  23. kailash/nodes/ai/ai_providers.py +206 -8
  24. kailash/nodes/ai/intelligent_agent_orchestrator.py +2108 -0
  25. kailash/nodes/ai/iterative_llm_agent.py +1280 -0
  26. kailash/nodes/ai/llm_agent.py +324 -1
  27. kailash/nodes/ai/self_organizing.py +1623 -0
  28. kailash/nodes/api/http.py +106 -25
  29. kailash/nodes/api/rest.py +116 -21
  30. kailash/nodes/base.py +15 -2
  31. kailash/nodes/base_async.py +45 -0
  32. kailash/nodes/base_cycle_aware.py +374 -0
  33. kailash/nodes/base_with_acl.py +338 -0
  34. kailash/nodes/code/python.py +135 -27
  35. kailash/nodes/data/readers.py +116 -53
  36. kailash/nodes/data/writers.py +16 -6
  37. kailash/nodes/logic/__init__.py +8 -0
  38. kailash/nodes/logic/async_operations.py +48 -9
  39. kailash/nodes/logic/convergence.py +642 -0
  40. kailash/nodes/logic/loop.py +153 -0
  41. kailash/nodes/logic/operations.py +212 -27
  42. kailash/nodes/logic/workflow.py +26 -18
  43. kailash/nodes/mixins/__init__.py +11 -0
  44. kailash/nodes/mixins/mcp.py +228 -0
  45. kailash/nodes/mixins.py +387 -0
  46. kailash/nodes/transform/__init__.py +8 -1
  47. kailash/nodes/transform/processors.py +119 -4
  48. kailash/runtime/__init__.py +2 -1
  49. kailash/runtime/access_controlled.py +458 -0
  50. kailash/runtime/local.py +106 -33
  51. kailash/runtime/parallel_cyclic.py +529 -0
  52. kailash/sdk_exceptions.py +90 -5
  53. kailash/security.py +845 -0
  54. kailash/tracking/manager.py +38 -15
  55. kailash/tracking/models.py +1 -1
  56. kailash/tracking/storage/filesystem.py +30 -2
  57. kailash/utils/__init__.py +8 -0
  58. kailash/workflow/__init__.py +18 -0
  59. kailash/workflow/convergence.py +270 -0
  60. kailash/workflow/cycle_analyzer.py +768 -0
  61. kailash/workflow/cycle_builder.py +573 -0
  62. kailash/workflow/cycle_config.py +709 -0
  63. kailash/workflow/cycle_debugger.py +760 -0
  64. kailash/workflow/cycle_exceptions.py +601 -0
  65. kailash/workflow/cycle_profiler.py +671 -0
  66. kailash/workflow/cycle_state.py +338 -0
  67. kailash/workflow/cyclic_runner.py +985 -0
  68. kailash/workflow/graph.py +500 -39
  69. kailash/workflow/migration.py +768 -0
  70. kailash/workflow/safety.py +365 -0
  71. kailash/workflow/templates.py +744 -0
  72. kailash/workflow/validation.py +693 -0
  73. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/METADATA +446 -13
  74. kailash-0.2.0.dist-info/RECORD +125 -0
  75. kailash/nodes/mcp/__init__.py +0 -11
  76. kailash/nodes/mcp/client.py +0 -554
  77. kailash/nodes/mcp/resource.py +0 -682
  78. kailash/nodes/mcp/server.py +0 -577
  79. kailash-0.1.4.dist-info/RECORD +0 -85
  80. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/WHEEL +0 -0
  81. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/entry_points.txt +0 -0
  82. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/licenses/LICENSE +0 -0
  83. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,693 @@
1
+ """
2
+ Comprehensive Validation and Linting for Cyclic Workflows.
3
+
4
+ This module provides extensive validation and linting capabilities to identify
5
+ common issues, performance anti-patterns, security vulnerabilities, and potential
6
+ problems in cyclic workflows before execution. It acts as a quality gate to
7
+ ensure workflow reliability and optimal performance.
8
+
9
+ Design Philosophy:
10
+ Provides proactive quality assurance through comprehensive static analysis
11
+ of workflow structures, configurations, and patterns. Identifies issues
12
+ early in the development cycle with specific, actionable recommendations
13
+ for resolution and optimization.
14
+
15
+ Key Features:
16
+ - Multi-category validation (safety, performance, compatibility)
17
+ - Severity-based issue classification (error, warning, info)
18
+ - Specific error codes with documentation links
19
+ - Actionable suggestions for issue resolution
20
+ - Comprehensive reporting with categorization
21
+ - Integration with development workflows
22
+
23
+ Validation Categories:
24
+ - Convergence: Cycle termination and convergence conditions
25
+ - Safety: Resource limits and infinite loop prevention
26
+ - Performance: Anti-patterns and optimization opportunities
27
+ - Parameter Mapping: Cycle parameter flow validation
28
+ - Node Compatibility: Cycle-aware node validation
29
+ - Resource Usage: Memory and file handle management
30
+
31
+ Issue Severity Levels:
32
+ - ERROR: Critical issues that prevent execution or cause failures
33
+ - WARNING: Potential issues that may impact performance or reliability
34
+ - INFO: Suggestions for improvement and best practices
35
+
36
+ Core Components:
37
+ - ValidationIssue: Structured issue representation with metadata
38
+ - IssueSeverity: Enumeration of severity levels
39
+ - CycleLinter: Main validation engine with comprehensive checks
40
+ - Reporting system with categorization and filtering
41
+
42
+ Validation Algorithms:
43
+ - Static analysis of cycle configurations
44
+ - Pattern recognition for common anti-patterns
45
+ - Resource usage analysis and leak detection
46
+ - Security validation for parameter access
47
+ - Performance bottleneck identification
48
+
49
+ Upstream Dependencies:
50
+ - Workflow graph structure and cycle detection
51
+ - Node implementations and configuration validation
52
+ - Cycle configuration and safety systems
53
+
54
+ Downstream Consumers:
55
+ - Development tools and IDEs for real-time validation
56
+ - CI/CD pipelines for automated quality gates
57
+ - Performance optimization tools
58
+ - Security analysis and compliance systems
59
+ - Educational and training materials
60
+
61
+ Examples:
62
+ Basic workflow validation:
63
+
64
+ >>> from kailash.workflow.validation import CycleLinter, IssueSeverity
65
+ >>> linter = CycleLinter(workflow)
66
+ >>> issues = linter.check_all()
67
+ >>> # Filter by severity
68
+ >>> errors = linter.get_issues_by_severity(IssueSeverity.ERROR)
69
+ >>> warnings = linter.get_issues_by_severity(IssueSeverity.WARNING)
70
+ >>> for error in errors:
71
+ ... print(f"ERROR {error.code}: {error.message}")
72
+ ... if error.suggestion:
73
+ ... print(f" Suggestion: {error.suggestion}")
74
+
75
+ Comprehensive reporting:
76
+
77
+ >>> report = linter.generate_report()
78
+ >>> print(f"Total issues: {report['summary']['total_issues']}")
79
+ >>> print(f"Critical errors: {report['summary']['errors']}")
80
+ >>> print(f"Affected cycles: {report['summary']['affected_cycles']}")
81
+ >>> # Category-specific analysis
82
+ >>> for category, issues in report['by_category'].items():
83
+ ... print(f"{category.upper()} ({len(issues)} issues):")
84
+ ... for issue in issues:
85
+ ... print(f" {issue.code}: {issue.message}")
86
+
87
+ Targeted validation:
88
+
89
+ >>> # Validate specific cycle
90
+ >>> cycle_issues = linter.get_issues_for_cycle("optimization_cycle")
91
+ >>> # Validate specific node
92
+ >>> node_issues = linter.get_issues_for_node("processor")
93
+ >>> # Get recommendations
94
+ >>> recommendations = report['recommendations']
95
+ >>> for rec in recommendations:
96
+ ... print(f" {rec}")
97
+
98
+ Validation Checks:
99
+ The linter performs comprehensive checks including:
100
+
101
+ - **CYC001-002**: Convergence condition validation
102
+ - **CYC003-004**: Infinite loop prevention
103
+ - **CYC005-006**: Safety limit configuration
104
+ - **CYC007-009**: Performance anti-pattern detection
105
+ - **CYC010-011**: Parameter mapping validation
106
+ - **CYC012-013**: Node compatibility checks
107
+ - **CYC014-015**: Convergence condition syntax validation
108
+ - **CYC016-017**: Resource usage and leak detection
109
+
110
+ See Also:
111
+ - :mod:`kailash.workflow.migration` for workflow optimization
112
+ - :mod:`kailash.workflow.safety` for safety mechanisms
113
+ - :doc:`/guides/validation` for validation best practices
114
+ """
115
+
116
+ from typing import Dict, Any, List, Optional
117
+ from dataclasses import dataclass
118
+ from enum import Enum
119
+ import re
120
+
121
+ from . import Workflow
122
+
123
+
124
+ class IssueSeverity(Enum):
125
+ """Severity levels for validation issues."""
126
+ ERROR = "error"
127
+ WARNING = "warning"
128
+ INFO = "info"
129
+
130
+
131
+ @dataclass
132
+ class ValidationIssue:
133
+ """Represents a validation issue found in a workflow."""
134
+ severity: IssueSeverity
135
+ category: str
136
+ code: str
137
+ message: str
138
+ node_id: Optional[str] = None
139
+ cycle_id: Optional[str] = None
140
+ suggestion: Optional[str] = None
141
+ documentation_link: Optional[str] = None
142
+
143
+
144
+ class CycleLinter:
145
+ """
146
+ Comprehensive linter for cyclic workflows.
147
+
148
+ Analyzes workflows for common issues, performance anti-patterns,
149
+ and potential problems specific to cyclic execution.
150
+ """
151
+
152
+ def __init__(self, workflow: Workflow):
153
+ """
154
+ Initialize linter with target workflow.
155
+
156
+ Args:
157
+ workflow: The workflow to analyze
158
+ """
159
+ self.workflow = workflow
160
+ self.graph = workflow.graph
161
+ self.issues: List[ValidationIssue] = []
162
+
163
+ def check_all(self) -> List[ValidationIssue]:
164
+ """
165
+ Run all validation checks on the workflow.
166
+
167
+ Returns:
168
+ List of all validation issues found
169
+
170
+ Example:
171
+ >>> workflow = create_problematic_workflow()
172
+ >>> linter = CycleLinter(workflow)
173
+ >>> issues = linter.check_all()
174
+ >>> for issue in issues:
175
+ ... print(f"{issue.severity.value}: {issue.message}")
176
+ """
177
+ self.issues = []
178
+
179
+ # Run all checks
180
+ self._check_cycles_have_convergence()
181
+ self._check_for_infinite_loop_potential()
182
+ self._check_safety_limits()
183
+ self._check_performance_anti_patterns()
184
+ self._check_parameter_mapping()
185
+ self._check_node_compatibility()
186
+ self._check_convergence_conditions()
187
+ self._check_resource_usage()
188
+
189
+ return self.issues
190
+
191
+ def _check_cycles_have_convergence(self):
192
+ """Check that all cycles have appropriate convergence conditions."""
193
+ if hasattr(self.workflow, 'get_cycle_groups'):
194
+ cycle_groups = self.workflow.get_cycle_groups()
195
+
196
+ for cycle_id, cycle_edges in cycle_groups.items():
197
+ for source, target, edge_data in cycle_edges:
198
+ if not edge_data.get('convergence_check') and not edge_data.get('max_iterations'):
199
+ self.issues.append(ValidationIssue(
200
+ severity=IssueSeverity.ERROR,
201
+ category="convergence",
202
+ code="CYC001",
203
+ message=f"Cycle {cycle_id} lacks convergence condition and max_iterations",
204
+ cycle_id=cycle_id,
205
+ suggestion="Add convergence_check parameter or set max_iterations",
206
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
207
+ ))
208
+
209
+ elif not edge_data.get('convergence_check'):
210
+ self.issues.append(ValidationIssue(
211
+ severity=IssueSeverity.WARNING,
212
+ category="convergence",
213
+ code="CYC002",
214
+ message=f"Cycle {cycle_id} relies only on max_iterations without convergence check",
215
+ cycle_id=cycle_id,
216
+ suggestion="Consider adding convergence_check for early termination",
217
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
218
+ ))
219
+
220
+ def _check_for_infinite_loop_potential(self):
221
+ """Check for patterns that could lead to infinite loops."""
222
+ if hasattr(self.workflow, 'get_cycle_groups'):
223
+ cycle_groups = self.workflow.get_cycle_groups()
224
+
225
+ for cycle_id, cycle_edges in cycle_groups.items():
226
+ for source, target, edge_data in cycle_edges:
227
+ max_iter = edge_data.get('max_iterations')
228
+ convergence = edge_data.get('convergence_check')
229
+
230
+ # Check for very high or missing max_iterations
231
+ if max_iter is None or max_iter > 10000:
232
+ self.issues.append(ValidationIssue(
233
+ severity=IssueSeverity.WARNING,
234
+ category="safety",
235
+ code="CYC003",
236
+ message=f"Cycle {cycle_id} has very high or no max_iterations limit",
237
+ cycle_id=cycle_id,
238
+ suggestion="Set reasonable max_iterations (e.g., 100-1000) as safety limit",
239
+ documentation_link="guide/mistakes/066-infinite-cycles.md"
240
+ ))
241
+
242
+ # Check for potentially unreachable convergence conditions
243
+ if convergence:
244
+ if self._is_potentially_unreachable_condition(convergence):
245
+ self.issues.append(ValidationIssue(
246
+ severity=IssueSeverity.WARNING,
247
+ category="convergence",
248
+ code="CYC004",
249
+ message=f"Convergence condition '{convergence}' may be unreachable",
250
+ cycle_id=cycle_id,
251
+ suggestion="Verify convergence condition is achievable",
252
+ documentation_link="guide/mistakes/066-infinite-cycles.md"
253
+ ))
254
+
255
+ def _check_safety_limits(self):
256
+ """Check for appropriate safety limits on cycles."""
257
+ if hasattr(self.workflow, 'get_cycle_groups'):
258
+ cycle_groups = self.workflow.get_cycle_groups()
259
+
260
+ for cycle_id, cycle_edges in cycle_groups.items():
261
+ for source, target, edge_data in cycle_edges:
262
+ # Check timeout
263
+ if not edge_data.get('timeout'):
264
+ self.issues.append(ValidationIssue(
265
+ severity=IssueSeverity.INFO,
266
+ category="safety",
267
+ code="CYC005",
268
+ message=f"Cycle {cycle_id} has no timeout limit",
269
+ cycle_id=cycle_id,
270
+ suggestion="Consider adding timeout parameter for safety",
271
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
272
+ ))
273
+
274
+ # Check memory limit
275
+ if not edge_data.get('memory_limit'):
276
+ self.issues.append(ValidationIssue(
277
+ severity=IssueSeverity.INFO,
278
+ category="safety",
279
+ code="CYC006",
280
+ message=f"Cycle {cycle_id} has no memory limit",
281
+ cycle_id=cycle_id,
282
+ suggestion="Consider adding memory_limit parameter for safety",
283
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
284
+ ))
285
+
286
+ def _check_performance_anti_patterns(self):
287
+ """Check for performance anti-patterns."""
288
+ # Use the workflow's cycle detection
289
+ if hasattr(self.workflow, 'get_cycle_groups'):
290
+ cycle_groups = self.workflow.get_cycle_groups()
291
+
292
+ for cycle_id, cycle_edges in cycle_groups.items():
293
+ # Get unique nodes in the cycle
294
+ cycle_nodes = set()
295
+ for source, target, _ in cycle_edges:
296
+ cycle_nodes.add(source)
297
+ cycle_nodes.add(target)
298
+ cycle_nodes = list(cycle_nodes)
299
+
300
+ # Check for very small cycles (may have high overhead)
301
+ if len(cycle_nodes) == 1:
302
+ node_id = cycle_nodes[0]
303
+ self.issues.append(ValidationIssue(
304
+ severity=IssueSeverity.INFO,
305
+ category="performance",
306
+ code="CYC007",
307
+ message=f"Single-node cycle {cycle_id} may have high overhead",
308
+ node_id=node_id,
309
+ cycle_id=cycle_id,
310
+ suggestion="Consider if cycle is necessary or if logic can be internal to node",
311
+ documentation_link="guide/reference/pattern-library/06-performance-patterns.md"
312
+ ))
313
+
314
+ # Check for very large cycles (may be hard to debug)
315
+ elif len(cycle_nodes) > 10:
316
+ self.issues.append(ValidationIssue(
317
+ severity=IssueSeverity.WARNING,
318
+ category="complexity",
319
+ code="CYC008",
320
+ message=f"Large cycle {cycle_id} with {len(cycle_nodes)} nodes may be hard to debug",
321
+ cycle_id=cycle_id,
322
+ suggestion="Consider breaking into smaller cycles or using nested workflows",
323
+ documentation_link="guide/reference/pattern-library/04-complex-patterns.md"
324
+ ))
325
+
326
+ # Check for cycles with expensive operations
327
+ for node_id in cycle_nodes:
328
+ if self._is_expensive_operation(node_id):
329
+ self.issues.append(ValidationIssue(
330
+ severity=IssueSeverity.WARNING,
331
+ category="performance",
332
+ code="CYC009",
333
+ message=f"Expensive operation '{node_id}' in cycle {cycle_id}",
334
+ node_id=node_id,
335
+ cycle_id=cycle_id,
336
+ suggestion="Consider caching, optimization, or moving outside cycle",
337
+ documentation_link="guide/reference/pattern-library/06-performance-patterns.md"
338
+ ))
339
+
340
+ def _check_parameter_mapping(self):
341
+ """Check for parameter mapping issues in cycles."""
342
+ if hasattr(self.workflow, 'get_cycle_groups'):
343
+ cycle_groups = self.workflow.get_cycle_groups()
344
+
345
+ for cycle_id, cycle_edges in cycle_groups.items():
346
+ # Get cycle nodes for checking
347
+ cycle_nodes = set()
348
+ for s, t, _ in cycle_edges:
349
+ cycle_nodes.add(s)
350
+ cycle_nodes.add(t)
351
+
352
+ # Check each edge for issues
353
+ for source, target, edge_data in cycle_edges:
354
+ mapping = edge_data.get('mapping', {})
355
+
356
+ # Check for identity mappings (common mistake)
357
+ for source_param, target_param in mapping.items():
358
+ if source_param == target_param:
359
+ self.issues.append(ValidationIssue(
360
+ severity=IssueSeverity.WARNING,
361
+ category="parameter_mapping",
362
+ code="CYC010",
363
+ message=f"Identity mapping '{source_param}' -> '{target_param}' in cycle {cycle_id}",
364
+ cycle_id=cycle_id,
365
+ suggestion="Use 'result.field' -> 'field' pattern for cycle parameter propagation",
366
+ documentation_link="guide/mistakes/063-cyclic-parameter-propagation-multi-fix.md"
367
+ ))
368
+
369
+ # Check for missing parameter propagation
370
+ if not mapping and len(cycle_nodes) > 1:
371
+ self.issues.append(ValidationIssue(
372
+ severity=IssueSeverity.INFO,
373
+ category="parameter_mapping",
374
+ code="CYC011",
375
+ message=f"Cycle {cycle_id} has no parameter mapping",
376
+ cycle_id=cycle_id,
377
+ suggestion="Consider if parameters need to propagate between iterations",
378
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
379
+ ))
380
+
381
+ def _check_node_compatibility(self):
382
+ """Check for node compatibility issues with cycles."""
383
+ if hasattr(self.workflow, 'get_cycle_groups'):
384
+ cycle_groups = self.workflow.get_cycle_groups()
385
+
386
+ for cycle_id, cycle_edges in cycle_groups.items():
387
+ # Get unique nodes in the cycle
388
+ cycle_nodes = set()
389
+ for source, target, _ in cycle_edges:
390
+ cycle_nodes.add(source)
391
+ cycle_nodes.add(target)
392
+
393
+ for node_id in cycle_nodes:
394
+ node = self.workflow.nodes.get(node_id)
395
+ if not node:
396
+ continue
397
+
398
+ # Check if node supports cycle context
399
+ if hasattr(node, 'run'):
400
+ # Check if node accesses cycle context safely
401
+ if self._uses_unsafe_cycle_access(node):
402
+ self.issues.append(ValidationIssue(
403
+ severity=IssueSeverity.ERROR,
404
+ category="node_compatibility",
405
+ code="CYC012",
406
+ message=f"Node '{node_id}' uses unsafe cycle context access",
407
+ node_id=node_id,
408
+ cycle_id=cycle_id,
409
+ suggestion="Use context.get('cycle', {}) instead of direct access",
410
+ documentation_link="guide/reference/cheatsheet/022-cycle-debugging-troubleshooting.md"
411
+ ))
412
+
413
+ # Check for PythonCodeNode parameter access
414
+ if hasattr(node, 'code') and node.code:
415
+ if self._has_unsafe_parameter_access(node.code):
416
+ self.issues.append(ValidationIssue(
417
+ severity=IssueSeverity.WARNING,
418
+ category="node_compatibility",
419
+ code="CYC013",
420
+ message=f"PythonCodeNode '{node_id}' may have unsafe parameter access",
421
+ node_id=node_id,
422
+ cycle_id=cycle_id,
423
+ suggestion="Use try/except pattern for cycle parameter access",
424
+ documentation_link="guide/mistakes/064-pythoncodenode-none-input-validation-error.md"
425
+ ))
426
+
427
+ def _check_convergence_conditions(self):
428
+ """Check convergence conditions for validity."""
429
+ if hasattr(self.workflow, 'get_cycle_groups'):
430
+ cycle_groups = self.workflow.get_cycle_groups()
431
+
432
+ for cycle_id, cycle_edges in cycle_groups.items():
433
+ for source, target, edge_data in cycle_edges:
434
+ convergence = edge_data.get('convergence_check')
435
+
436
+ if convergence:
437
+ # Check for valid Python syntax
438
+ if not self._is_valid_condition_syntax(convergence):
439
+ self.issues.append(ValidationIssue(
440
+ severity=IssueSeverity.ERROR,
441
+ category="convergence",
442
+ code="CYC014",
443
+ message=f"Invalid convergence condition syntax: '{convergence}'",
444
+ cycle_id=cycle_id,
445
+ suggestion="Ensure condition is valid Python expression",
446
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
447
+ ))
448
+
449
+ # Check for common mistakes
450
+ if self._has_convergence_condition_issues(convergence):
451
+ self.issues.append(ValidationIssue(
452
+ severity=IssueSeverity.WARNING,
453
+ category="convergence",
454
+ code="CYC015",
455
+ message=f"Potential issue in convergence condition: '{convergence}'",
456
+ cycle_id=cycle_id,
457
+ suggestion="Verify field names and comparison operators",
458
+ documentation_link="guide/mistakes/066-infinite-cycles.md"
459
+ ))
460
+
461
+ def _check_resource_usage(self):
462
+ """Check for potential resource usage issues."""
463
+ if hasattr(self.workflow, 'get_cycle_groups'):
464
+ cycle_groups = self.workflow.get_cycle_groups()
465
+
466
+ for cycle_id, cycle_edges in cycle_groups.items():
467
+ # Get unique nodes in the cycle
468
+ cycle_nodes = set()
469
+ for source, target, _ in cycle_edges:
470
+ cycle_nodes.add(source)
471
+ cycle_nodes.add(target)
472
+
473
+ # Check for potential memory leaks
474
+ for node_id in cycle_nodes:
475
+ if self._may_have_memory_leak(node_id):
476
+ self.issues.append(ValidationIssue(
477
+ severity=IssueSeverity.WARNING,
478
+ category="resource_usage",
479
+ code="CYC016",
480
+ message=f"Node '{node_id}' may have memory leak in cycle",
481
+ node_id=node_id,
482
+ cycle_id=cycle_id,
483
+ suggestion="Ensure proper cleanup of resources in cyclic execution",
484
+ documentation_link="guide/mistakes/016-memory-leaks-in-long-running-processes.md"
485
+ ))
486
+
487
+ # Check for file handle management
488
+ for node_id in cycle_nodes:
489
+ if self._may_leak_file_handles(node_id):
490
+ self.issues.append(ValidationIssue(
491
+ severity=IssueSeverity.WARNING,
492
+ category="resource_usage",
493
+ code="CYC017",
494
+ message=f"Node '{node_id}' may leak file handles in cycle",
495
+ node_id=node_id,
496
+ cycle_id=cycle_id,
497
+ suggestion="Use context managers (with statements) for file operations",
498
+ documentation_link="guide/mistakes/022-resource-cleanup-issues.md"
499
+ ))
500
+
501
+ def _get_cycle_id(self, cycle_nodes: List[str]) -> str:
502
+ """Generate a cycle identifier from cycle nodes."""
503
+ return f"cycle_{'-'.join(sorted(cycle_nodes))}"
504
+
505
+ def _is_potentially_unreachable_condition(self, condition: str) -> bool:
506
+ """Check if convergence condition might be unreachable."""
507
+ # Simple heuristics for potentially problematic conditions
508
+ problematic_patterns = [
509
+ r'.*==\s*True\s*$', # exact boolean match
510
+ r'.*==\s*1\.0\s*$', # exact float match
511
+ r'.*>\s*1\.0\s*$', # probability > 1.0
512
+ r'.*<\s*0\.0\s*$', # probability < 0.0
513
+ ]
514
+
515
+ for pattern in problematic_patterns:
516
+ if re.search(pattern, condition):
517
+ return True
518
+
519
+ return False
520
+
521
+ def _is_expensive_operation(self, node_id: str) -> bool:
522
+ """Check if node represents an expensive operation."""
523
+ expensive_keywords = [
524
+ 'train', 'model', 'neural', 'deep', 'learning',
525
+ 'api', 'request', 'http', 'download', 'upload',
526
+ 'database', 'query', 'sql',
527
+ 'file', 'io', 'read', 'write'
528
+ ]
529
+
530
+ node_id_lower = node_id.lower()
531
+ return any(keyword in node_id_lower for keyword in expensive_keywords)
532
+
533
+ def _uses_unsafe_cycle_access(self, node) -> bool:
534
+ """Check if node uses unsafe cycle context access."""
535
+ # This would require more sophisticated code analysis
536
+ # For now, return False as a placeholder
537
+ return False
538
+
539
+ def _has_unsafe_parameter_access(self, code: str) -> bool:
540
+ """Check if PythonCodeNode has unsafe parameter access."""
541
+ # Look for direct parameter access without try/except
542
+ lines = code.split('\n')
543
+
544
+ for line in lines:
545
+ line = line.strip()
546
+ if line and not line.startswith('#'):
547
+ # Check for variable access that might be parameters
548
+ if re.match(r'^[a-zA-Z_]\w*\s*=', line):
549
+ var_name = line.split('=')[0].strip()
550
+ # If variable is used before definition, might be parameter
551
+ if not self._is_defined_before_use(var_name, code):
552
+ return True
553
+
554
+ return False
555
+
556
+ def _is_defined_before_use(self, var_name: str, code: str) -> bool:
557
+ """Check if variable is defined before use in code."""
558
+ lines = code.split('\n')
559
+ defined = False
560
+
561
+ for line in lines:
562
+ line = line.strip()
563
+ if line.startswith(f'{var_name} =') or line.startswith(f'{var_name}='):
564
+ defined = True
565
+ elif var_name in line and not defined:
566
+ # Used before definition
567
+ return False
568
+
569
+ return True
570
+
571
+ def _is_valid_condition_syntax(self, condition: str) -> bool:
572
+ """Check if convergence condition has valid Python syntax."""
573
+ try:
574
+ compile(condition, '<string>', 'eval')
575
+ return True
576
+ except SyntaxError:
577
+ return False
578
+
579
+ def _has_convergence_condition_issues(self, condition: str) -> bool:
580
+ """Check for common issues in convergence conditions."""
581
+ # Check for undefined variables (common field names)
582
+ undefined_vars = ['done', 'converged', 'finished', 'complete', 'quality', 'error']
583
+
584
+ for var in undefined_vars:
585
+ if var in condition:
586
+ # Might be using undefined variable
587
+ return True
588
+
589
+ return False
590
+
591
+ def _may_have_memory_leak(self, node_id: str) -> bool:
592
+ """Check if node might have memory leaks."""
593
+ leak_keywords = ['accumulate', 'collect', 'gather', 'cache', 'store']
594
+ node_id_lower = node_id.lower()
595
+ return any(keyword in node_id_lower for keyword in leak_keywords)
596
+
597
+ def _may_leak_file_handles(self, node_id: str) -> bool:
598
+ """Check if node might leak file handles."""
599
+ file_keywords = ['file', 'read', 'write', 'open', 'csv', 'json', 'log']
600
+ node_id_lower = node_id.lower()
601
+ return any(keyword in node_id_lower for keyword in file_keywords)
602
+
603
+ def get_issues_by_severity(self, severity: IssueSeverity) -> List[ValidationIssue]:
604
+ """Get all issues of a specific severity level."""
605
+ return [issue for issue in self.issues if issue.severity == severity]
606
+
607
+ def get_issues_by_category(self, category: str) -> List[ValidationIssue]:
608
+ """Get all issues of a specific category."""
609
+ return [issue for issue in self.issues if issue.category == category]
610
+
611
+ def get_issues_for_cycle(self, cycle_id: str) -> List[ValidationIssue]:
612
+ """Get all issues for a specific cycle."""
613
+ return [issue for issue in self.issues if issue.cycle_id == cycle_id]
614
+
615
+ def get_issues_for_node(self, node_id: str) -> List[ValidationIssue]:
616
+ """Get all issues for a specific node."""
617
+ return [issue for issue in self.issues if issue.node_id == node_id]
618
+
619
+ def generate_report(self) -> Dict[str, Any]:
620
+ """
621
+ Generate comprehensive validation report.
622
+
623
+ Returns:
624
+ Dict containing validation report with summary and details
625
+
626
+ Example:
627
+ >>> from kailash import Workflow
628
+ >>> workflow = Workflow("test", "Test Workflow")
629
+ >>> linter = CycleLinter(workflow)
630
+ >>> linter.check_all()
631
+ >>> report = linter.generate_report()
632
+ >>> print(f"Found {report['summary']['total_issues']} issues")
633
+ """
634
+ errors = self.get_issues_by_severity(IssueSeverity.ERROR)
635
+ warnings = self.get_issues_by_severity(IssueSeverity.WARNING)
636
+ info = self.get_issues_by_severity(IssueSeverity.INFO)
637
+
638
+ # Group by category
639
+ by_category = {}
640
+ for issue in self.issues:
641
+ if issue.category not in by_category:
642
+ by_category[issue.category] = []
643
+ by_category[issue.category].append(issue)
644
+
645
+ # Group by cycle
646
+ by_cycle = {}
647
+ for issue in self.issues:
648
+ if issue.cycle_id:
649
+ if issue.cycle_id not in by_cycle:
650
+ by_cycle[issue.cycle_id] = []
651
+ by_cycle[issue.cycle_id].append(issue)
652
+
653
+ return {
654
+ 'summary': {
655
+ 'total_issues': len(self.issues),
656
+ 'errors': len(errors),
657
+ 'warnings': len(warnings),
658
+ 'info': len(info),
659
+ 'categories': list(by_category.keys()),
660
+ 'affected_cycles': len(by_cycle)
661
+ },
662
+ 'issues': self.issues,
663
+ 'by_severity': {
664
+ 'errors': errors,
665
+ 'warnings': warnings,
666
+ 'info': info
667
+ },
668
+ 'by_category': by_category,
669
+ 'by_cycle': by_cycle,
670
+ 'recommendations': self._generate_recommendations()
671
+ }
672
+
673
+ def _generate_recommendations(self) -> List[str]:
674
+ """Generate high-level recommendations based on found issues."""
675
+ recommendations = []
676
+
677
+ errors = self.get_issues_by_severity(IssueSeverity.ERROR)
678
+ if errors:
679
+ recommendations.append(f"Fix {len(errors)} critical errors before deployment")
680
+
681
+ convergence_issues = self.get_issues_by_category("convergence")
682
+ if convergence_issues:
683
+ recommendations.append("Review convergence conditions for all cycles")
684
+
685
+ performance_issues = self.get_issues_by_category("performance")
686
+ if performance_issues:
687
+ recommendations.append("Optimize cycles to improve performance")
688
+
689
+ safety_issues = self.get_issues_by_category("safety")
690
+ if safety_issues:
691
+ recommendations.append("Add safety limits (timeout, max_iterations) to cycles")
692
+
693
+ return recommendations