kailash 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -1
- kailash/access_control.py +740 -0
- kailash/api/__main__.py +6 -0
- kailash/api/auth.py +668 -0
- kailash/api/custom_nodes.py +285 -0
- kailash/api/custom_nodes_secure.py +377 -0
- kailash/api/database.py +620 -0
- kailash/api/studio.py +915 -0
- kailash/api/studio_secure.py +893 -0
- kailash/mcp/__init__.py +53 -0
- kailash/mcp/__main__.py +13 -0
- kailash/mcp/ai_registry_server.py +712 -0
- kailash/mcp/client.py +447 -0
- kailash/mcp/client_new.py +334 -0
- kailash/mcp/server.py +293 -0
- kailash/mcp/server_new.py +336 -0
- kailash/mcp/servers/__init__.py +12 -0
- kailash/mcp/servers/ai_registry.py +289 -0
- kailash/nodes/__init__.py +4 -2
- kailash/nodes/ai/__init__.py +38 -0
- kailash/nodes/ai/a2a.py +1790 -0
- kailash/nodes/ai/agents.py +116 -2
- kailash/nodes/ai/ai_providers.py +206 -8
- kailash/nodes/ai/intelligent_agent_orchestrator.py +2108 -0
- kailash/nodes/ai/iterative_llm_agent.py +1280 -0
- kailash/nodes/ai/llm_agent.py +324 -1
- kailash/nodes/ai/self_organizing.py +1623 -0
- kailash/nodes/api/http.py +106 -25
- kailash/nodes/api/rest.py +116 -21
- kailash/nodes/base.py +15 -2
- kailash/nodes/base_async.py +45 -0
- kailash/nodes/base_cycle_aware.py +374 -0
- kailash/nodes/base_with_acl.py +338 -0
- kailash/nodes/code/python.py +135 -27
- kailash/nodes/data/readers.py +116 -53
- kailash/nodes/data/writers.py +16 -6
- kailash/nodes/logic/__init__.py +8 -0
- kailash/nodes/logic/async_operations.py +48 -9
- kailash/nodes/logic/convergence.py +642 -0
- kailash/nodes/logic/loop.py +153 -0
- kailash/nodes/logic/operations.py +212 -27
- kailash/nodes/logic/workflow.py +26 -18
- kailash/nodes/mixins/__init__.py +11 -0
- kailash/nodes/mixins/mcp.py +228 -0
- kailash/nodes/mixins.py +387 -0
- kailash/nodes/transform/__init__.py +8 -1
- kailash/nodes/transform/processors.py +119 -4
- kailash/runtime/__init__.py +2 -1
- kailash/runtime/access_controlled.py +458 -0
- kailash/runtime/local.py +106 -33
- kailash/runtime/parallel_cyclic.py +529 -0
- kailash/sdk_exceptions.py +90 -5
- kailash/security.py +845 -0
- kailash/tracking/manager.py +38 -15
- kailash/tracking/models.py +1 -1
- kailash/tracking/storage/filesystem.py +30 -2
- kailash/utils/__init__.py +8 -0
- kailash/workflow/__init__.py +18 -0
- kailash/workflow/convergence.py +270 -0
- kailash/workflow/cycle_analyzer.py +768 -0
- kailash/workflow/cycle_builder.py +573 -0
- kailash/workflow/cycle_config.py +709 -0
- kailash/workflow/cycle_debugger.py +760 -0
- kailash/workflow/cycle_exceptions.py +601 -0
- kailash/workflow/cycle_profiler.py +671 -0
- kailash/workflow/cycle_state.py +338 -0
- kailash/workflow/cyclic_runner.py +985 -0
- kailash/workflow/graph.py +500 -39
- kailash/workflow/migration.py +768 -0
- kailash/workflow/safety.py +365 -0
- kailash/workflow/templates.py +744 -0
- kailash/workflow/validation.py +693 -0
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/METADATA +446 -13
- kailash-0.2.0.dist-info/RECORD +125 -0
- kailash/nodes/mcp/__init__.py +0 -11
- kailash/nodes/mcp/client.py +0 -554
- kailash/nodes/mcp/resource.py +0 -682
- kailash/nodes/mcp/server.py +0 -577
- kailash-0.1.4.dist-info/RECORD +0 -85
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/WHEEL +0 -0
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,693 @@
|
|
1
|
+
"""
|
2
|
+
Comprehensive Validation and Linting for Cyclic Workflows.
|
3
|
+
|
4
|
+
This module provides extensive validation and linting capabilities to identify
|
5
|
+
common issues, performance anti-patterns, security vulnerabilities, and potential
|
6
|
+
problems in cyclic workflows before execution. It acts as a quality gate to
|
7
|
+
ensure workflow reliability and optimal performance.
|
8
|
+
|
9
|
+
Design Philosophy:
|
10
|
+
Provides proactive quality assurance through comprehensive static analysis
|
11
|
+
of workflow structures, configurations, and patterns. Identifies issues
|
12
|
+
early in the development cycle with specific, actionable recommendations
|
13
|
+
for resolution and optimization.
|
14
|
+
|
15
|
+
Key Features:
|
16
|
+
- Multi-category validation (safety, performance, compatibility)
|
17
|
+
- Severity-based issue classification (error, warning, info)
|
18
|
+
- Specific error codes with documentation links
|
19
|
+
- Actionable suggestions for issue resolution
|
20
|
+
- Comprehensive reporting with categorization
|
21
|
+
- Integration with development workflows
|
22
|
+
|
23
|
+
Validation Categories:
|
24
|
+
- Convergence: Cycle termination and convergence conditions
|
25
|
+
- Safety: Resource limits and infinite loop prevention
|
26
|
+
- Performance: Anti-patterns and optimization opportunities
|
27
|
+
- Parameter Mapping: Cycle parameter flow validation
|
28
|
+
- Node Compatibility: Cycle-aware node validation
|
29
|
+
- Resource Usage: Memory and file handle management
|
30
|
+
|
31
|
+
Issue Severity Levels:
|
32
|
+
- ERROR: Critical issues that prevent execution or cause failures
|
33
|
+
- WARNING: Potential issues that may impact performance or reliability
|
34
|
+
- INFO: Suggestions for improvement and best practices
|
35
|
+
|
36
|
+
Core Components:
|
37
|
+
- ValidationIssue: Structured issue representation with metadata
|
38
|
+
- IssueSeverity: Enumeration of severity levels
|
39
|
+
- CycleLinter: Main validation engine with comprehensive checks
|
40
|
+
- Reporting system with categorization and filtering
|
41
|
+
|
42
|
+
Validation Algorithms:
|
43
|
+
- Static analysis of cycle configurations
|
44
|
+
- Pattern recognition for common anti-patterns
|
45
|
+
- Resource usage analysis and leak detection
|
46
|
+
- Security validation for parameter access
|
47
|
+
- Performance bottleneck identification
|
48
|
+
|
49
|
+
Upstream Dependencies:
|
50
|
+
- Workflow graph structure and cycle detection
|
51
|
+
- Node implementations and configuration validation
|
52
|
+
- Cycle configuration and safety systems
|
53
|
+
|
54
|
+
Downstream Consumers:
|
55
|
+
- Development tools and IDEs for real-time validation
|
56
|
+
- CI/CD pipelines for automated quality gates
|
57
|
+
- Performance optimization tools
|
58
|
+
- Security analysis and compliance systems
|
59
|
+
- Educational and training materials
|
60
|
+
|
61
|
+
Examples:
|
62
|
+
Basic workflow validation:
|
63
|
+
|
64
|
+
>>> from kailash.workflow.validation import CycleLinter, IssueSeverity
|
65
|
+
>>> linter = CycleLinter(workflow)
|
66
|
+
>>> issues = linter.check_all()
|
67
|
+
>>> # Filter by severity
|
68
|
+
>>> errors = linter.get_issues_by_severity(IssueSeverity.ERROR)
|
69
|
+
>>> warnings = linter.get_issues_by_severity(IssueSeverity.WARNING)
|
70
|
+
>>> for error in errors:
|
71
|
+
... print(f"ERROR {error.code}: {error.message}")
|
72
|
+
... if error.suggestion:
|
73
|
+
... print(f" Suggestion: {error.suggestion}")
|
74
|
+
|
75
|
+
Comprehensive reporting:
|
76
|
+
|
77
|
+
>>> report = linter.generate_report()
|
78
|
+
>>> print(f"Total issues: {report['summary']['total_issues']}")
|
79
|
+
>>> print(f"Critical errors: {report['summary']['errors']}")
|
80
|
+
>>> print(f"Affected cycles: {report['summary']['affected_cycles']}")
|
81
|
+
>>> # Category-specific analysis
|
82
|
+
>>> for category, issues in report['by_category'].items():
|
83
|
+
... print(f"{category.upper()} ({len(issues)} issues):")
|
84
|
+
... for issue in issues:
|
85
|
+
... print(f" {issue.code}: {issue.message}")
|
86
|
+
|
87
|
+
Targeted validation:
|
88
|
+
|
89
|
+
>>> # Validate specific cycle
|
90
|
+
>>> cycle_issues = linter.get_issues_for_cycle("optimization_cycle")
|
91
|
+
>>> # Validate specific node
|
92
|
+
>>> node_issues = linter.get_issues_for_node("processor")
|
93
|
+
>>> # Get recommendations
|
94
|
+
>>> recommendations = report['recommendations']
|
95
|
+
>>> for rec in recommendations:
|
96
|
+
... print(f" {rec}")
|
97
|
+
|
98
|
+
Validation Checks:
|
99
|
+
The linter performs comprehensive checks including:
|
100
|
+
|
101
|
+
- **CYC001-002**: Convergence condition validation
|
102
|
+
- **CYC003-004**: Infinite loop prevention
|
103
|
+
- **CYC005-006**: Safety limit configuration
|
104
|
+
- **CYC007-009**: Performance anti-pattern detection
|
105
|
+
- **CYC010-011**: Parameter mapping validation
|
106
|
+
- **CYC012-013**: Node compatibility checks
|
107
|
+
- **CYC014-015**: Convergence condition syntax validation
|
108
|
+
- **CYC016-017**: Resource usage and leak detection
|
109
|
+
|
110
|
+
See Also:
|
111
|
+
- :mod:`kailash.workflow.migration` for workflow optimization
|
112
|
+
- :mod:`kailash.workflow.safety` for safety mechanisms
|
113
|
+
- :doc:`/guides/validation` for validation best practices
|
114
|
+
"""
|
115
|
+
|
116
|
+
from typing import Dict, Any, List, Optional
|
117
|
+
from dataclasses import dataclass
|
118
|
+
from enum import Enum
|
119
|
+
import re
|
120
|
+
|
121
|
+
from . import Workflow
|
122
|
+
|
123
|
+
|
124
|
+
class IssueSeverity(Enum):
|
125
|
+
"""Severity levels for validation issues."""
|
126
|
+
ERROR = "error"
|
127
|
+
WARNING = "warning"
|
128
|
+
INFO = "info"
|
129
|
+
|
130
|
+
|
131
|
+
@dataclass
|
132
|
+
class ValidationIssue:
|
133
|
+
"""Represents a validation issue found in a workflow."""
|
134
|
+
severity: IssueSeverity
|
135
|
+
category: str
|
136
|
+
code: str
|
137
|
+
message: str
|
138
|
+
node_id: Optional[str] = None
|
139
|
+
cycle_id: Optional[str] = None
|
140
|
+
suggestion: Optional[str] = None
|
141
|
+
documentation_link: Optional[str] = None
|
142
|
+
|
143
|
+
|
144
|
+
class CycleLinter:
|
145
|
+
"""
|
146
|
+
Comprehensive linter for cyclic workflows.
|
147
|
+
|
148
|
+
Analyzes workflows for common issues, performance anti-patterns,
|
149
|
+
and potential problems specific to cyclic execution.
|
150
|
+
"""
|
151
|
+
|
152
|
+
def __init__(self, workflow: Workflow):
|
153
|
+
"""
|
154
|
+
Initialize linter with target workflow.
|
155
|
+
|
156
|
+
Args:
|
157
|
+
workflow: The workflow to analyze
|
158
|
+
"""
|
159
|
+
self.workflow = workflow
|
160
|
+
self.graph = workflow.graph
|
161
|
+
self.issues: List[ValidationIssue] = []
|
162
|
+
|
163
|
+
def check_all(self) -> List[ValidationIssue]:
|
164
|
+
"""
|
165
|
+
Run all validation checks on the workflow.
|
166
|
+
|
167
|
+
Returns:
|
168
|
+
List of all validation issues found
|
169
|
+
|
170
|
+
Example:
|
171
|
+
>>> workflow = create_problematic_workflow()
|
172
|
+
>>> linter = CycleLinter(workflow)
|
173
|
+
>>> issues = linter.check_all()
|
174
|
+
>>> for issue in issues:
|
175
|
+
... print(f"{issue.severity.value}: {issue.message}")
|
176
|
+
"""
|
177
|
+
self.issues = []
|
178
|
+
|
179
|
+
# Run all checks
|
180
|
+
self._check_cycles_have_convergence()
|
181
|
+
self._check_for_infinite_loop_potential()
|
182
|
+
self._check_safety_limits()
|
183
|
+
self._check_performance_anti_patterns()
|
184
|
+
self._check_parameter_mapping()
|
185
|
+
self._check_node_compatibility()
|
186
|
+
self._check_convergence_conditions()
|
187
|
+
self._check_resource_usage()
|
188
|
+
|
189
|
+
return self.issues
|
190
|
+
|
191
|
+
def _check_cycles_have_convergence(self):
|
192
|
+
"""Check that all cycles have appropriate convergence conditions."""
|
193
|
+
if hasattr(self.workflow, 'get_cycle_groups'):
|
194
|
+
cycle_groups = self.workflow.get_cycle_groups()
|
195
|
+
|
196
|
+
for cycle_id, cycle_edges in cycle_groups.items():
|
197
|
+
for source, target, edge_data in cycle_edges:
|
198
|
+
if not edge_data.get('convergence_check') and not edge_data.get('max_iterations'):
|
199
|
+
self.issues.append(ValidationIssue(
|
200
|
+
severity=IssueSeverity.ERROR,
|
201
|
+
category="convergence",
|
202
|
+
code="CYC001",
|
203
|
+
message=f"Cycle {cycle_id} lacks convergence condition and max_iterations",
|
204
|
+
cycle_id=cycle_id,
|
205
|
+
suggestion="Add convergence_check parameter or set max_iterations",
|
206
|
+
documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
|
207
|
+
))
|
208
|
+
|
209
|
+
elif not edge_data.get('convergence_check'):
|
210
|
+
self.issues.append(ValidationIssue(
|
211
|
+
severity=IssueSeverity.WARNING,
|
212
|
+
category="convergence",
|
213
|
+
code="CYC002",
|
214
|
+
message=f"Cycle {cycle_id} relies only on max_iterations without convergence check",
|
215
|
+
cycle_id=cycle_id,
|
216
|
+
suggestion="Consider adding convergence_check for early termination",
|
217
|
+
documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
|
218
|
+
))
|
219
|
+
|
220
|
+
def _check_for_infinite_loop_potential(self):
|
221
|
+
"""Check for patterns that could lead to infinite loops."""
|
222
|
+
if hasattr(self.workflow, 'get_cycle_groups'):
|
223
|
+
cycle_groups = self.workflow.get_cycle_groups()
|
224
|
+
|
225
|
+
for cycle_id, cycle_edges in cycle_groups.items():
|
226
|
+
for source, target, edge_data in cycle_edges:
|
227
|
+
max_iter = edge_data.get('max_iterations')
|
228
|
+
convergence = edge_data.get('convergence_check')
|
229
|
+
|
230
|
+
# Check for very high or missing max_iterations
|
231
|
+
if max_iter is None or max_iter > 10000:
|
232
|
+
self.issues.append(ValidationIssue(
|
233
|
+
severity=IssueSeverity.WARNING,
|
234
|
+
category="safety",
|
235
|
+
code="CYC003",
|
236
|
+
message=f"Cycle {cycle_id} has very high or no max_iterations limit",
|
237
|
+
cycle_id=cycle_id,
|
238
|
+
suggestion="Set reasonable max_iterations (e.g., 100-1000) as safety limit",
|
239
|
+
documentation_link="guide/mistakes/066-infinite-cycles.md"
|
240
|
+
))
|
241
|
+
|
242
|
+
# Check for potentially unreachable convergence conditions
|
243
|
+
if convergence:
|
244
|
+
if self._is_potentially_unreachable_condition(convergence):
|
245
|
+
self.issues.append(ValidationIssue(
|
246
|
+
severity=IssueSeverity.WARNING,
|
247
|
+
category="convergence",
|
248
|
+
code="CYC004",
|
249
|
+
message=f"Convergence condition '{convergence}' may be unreachable",
|
250
|
+
cycle_id=cycle_id,
|
251
|
+
suggestion="Verify convergence condition is achievable",
|
252
|
+
documentation_link="guide/mistakes/066-infinite-cycles.md"
|
253
|
+
))
|
254
|
+
|
255
|
+
def _check_safety_limits(self):
|
256
|
+
"""Check for appropriate safety limits on cycles."""
|
257
|
+
if hasattr(self.workflow, 'get_cycle_groups'):
|
258
|
+
cycle_groups = self.workflow.get_cycle_groups()
|
259
|
+
|
260
|
+
for cycle_id, cycle_edges in cycle_groups.items():
|
261
|
+
for source, target, edge_data in cycle_edges:
|
262
|
+
# Check timeout
|
263
|
+
if not edge_data.get('timeout'):
|
264
|
+
self.issues.append(ValidationIssue(
|
265
|
+
severity=IssueSeverity.INFO,
|
266
|
+
category="safety",
|
267
|
+
code="CYC005",
|
268
|
+
message=f"Cycle {cycle_id} has no timeout limit",
|
269
|
+
cycle_id=cycle_id,
|
270
|
+
suggestion="Consider adding timeout parameter for safety",
|
271
|
+
documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
|
272
|
+
))
|
273
|
+
|
274
|
+
# Check memory limit
|
275
|
+
if not edge_data.get('memory_limit'):
|
276
|
+
self.issues.append(ValidationIssue(
|
277
|
+
severity=IssueSeverity.INFO,
|
278
|
+
category="safety",
|
279
|
+
code="CYC006",
|
280
|
+
message=f"Cycle {cycle_id} has no memory limit",
|
281
|
+
cycle_id=cycle_id,
|
282
|
+
suggestion="Consider adding memory_limit parameter for safety",
|
283
|
+
documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
|
284
|
+
))
|
285
|
+
|
286
|
+
def _check_performance_anti_patterns(self):
|
287
|
+
"""Check for performance anti-patterns."""
|
288
|
+
# Use the workflow's cycle detection
|
289
|
+
if hasattr(self.workflow, 'get_cycle_groups'):
|
290
|
+
cycle_groups = self.workflow.get_cycle_groups()
|
291
|
+
|
292
|
+
for cycle_id, cycle_edges in cycle_groups.items():
|
293
|
+
# Get unique nodes in the cycle
|
294
|
+
cycle_nodes = set()
|
295
|
+
for source, target, _ in cycle_edges:
|
296
|
+
cycle_nodes.add(source)
|
297
|
+
cycle_nodes.add(target)
|
298
|
+
cycle_nodes = list(cycle_nodes)
|
299
|
+
|
300
|
+
# Check for very small cycles (may have high overhead)
|
301
|
+
if len(cycle_nodes) == 1:
|
302
|
+
node_id = cycle_nodes[0]
|
303
|
+
self.issues.append(ValidationIssue(
|
304
|
+
severity=IssueSeverity.INFO,
|
305
|
+
category="performance",
|
306
|
+
code="CYC007",
|
307
|
+
message=f"Single-node cycle {cycle_id} may have high overhead",
|
308
|
+
node_id=node_id,
|
309
|
+
cycle_id=cycle_id,
|
310
|
+
suggestion="Consider if cycle is necessary or if logic can be internal to node",
|
311
|
+
documentation_link="guide/reference/pattern-library/06-performance-patterns.md"
|
312
|
+
))
|
313
|
+
|
314
|
+
# Check for very large cycles (may be hard to debug)
|
315
|
+
elif len(cycle_nodes) > 10:
|
316
|
+
self.issues.append(ValidationIssue(
|
317
|
+
severity=IssueSeverity.WARNING,
|
318
|
+
category="complexity",
|
319
|
+
code="CYC008",
|
320
|
+
message=f"Large cycle {cycle_id} with {len(cycle_nodes)} nodes may be hard to debug",
|
321
|
+
cycle_id=cycle_id,
|
322
|
+
suggestion="Consider breaking into smaller cycles or using nested workflows",
|
323
|
+
documentation_link="guide/reference/pattern-library/04-complex-patterns.md"
|
324
|
+
))
|
325
|
+
|
326
|
+
# Check for cycles with expensive operations
|
327
|
+
for node_id in cycle_nodes:
|
328
|
+
if self._is_expensive_operation(node_id):
|
329
|
+
self.issues.append(ValidationIssue(
|
330
|
+
severity=IssueSeverity.WARNING,
|
331
|
+
category="performance",
|
332
|
+
code="CYC009",
|
333
|
+
message=f"Expensive operation '{node_id}' in cycle {cycle_id}",
|
334
|
+
node_id=node_id,
|
335
|
+
cycle_id=cycle_id,
|
336
|
+
suggestion="Consider caching, optimization, or moving outside cycle",
|
337
|
+
documentation_link="guide/reference/pattern-library/06-performance-patterns.md"
|
338
|
+
))
|
339
|
+
|
340
|
+
def _check_parameter_mapping(self):
|
341
|
+
"""Check for parameter mapping issues in cycles."""
|
342
|
+
if hasattr(self.workflow, 'get_cycle_groups'):
|
343
|
+
cycle_groups = self.workflow.get_cycle_groups()
|
344
|
+
|
345
|
+
for cycle_id, cycle_edges in cycle_groups.items():
|
346
|
+
# Get cycle nodes for checking
|
347
|
+
cycle_nodes = set()
|
348
|
+
for s, t, _ in cycle_edges:
|
349
|
+
cycle_nodes.add(s)
|
350
|
+
cycle_nodes.add(t)
|
351
|
+
|
352
|
+
# Check each edge for issues
|
353
|
+
for source, target, edge_data in cycle_edges:
|
354
|
+
mapping = edge_data.get('mapping', {})
|
355
|
+
|
356
|
+
# Check for identity mappings (common mistake)
|
357
|
+
for source_param, target_param in mapping.items():
|
358
|
+
if source_param == target_param:
|
359
|
+
self.issues.append(ValidationIssue(
|
360
|
+
severity=IssueSeverity.WARNING,
|
361
|
+
category="parameter_mapping",
|
362
|
+
code="CYC010",
|
363
|
+
message=f"Identity mapping '{source_param}' -> '{target_param}' in cycle {cycle_id}",
|
364
|
+
cycle_id=cycle_id,
|
365
|
+
suggestion="Use 'result.field' -> 'field' pattern for cycle parameter propagation",
|
366
|
+
documentation_link="guide/mistakes/063-cyclic-parameter-propagation-multi-fix.md"
|
367
|
+
))
|
368
|
+
|
369
|
+
# Check for missing parameter propagation
|
370
|
+
if not mapping and len(cycle_nodes) > 1:
|
371
|
+
self.issues.append(ValidationIssue(
|
372
|
+
severity=IssueSeverity.INFO,
|
373
|
+
category="parameter_mapping",
|
374
|
+
code="CYC011",
|
375
|
+
message=f"Cycle {cycle_id} has no parameter mapping",
|
376
|
+
cycle_id=cycle_id,
|
377
|
+
suggestion="Consider if parameters need to propagate between iterations",
|
378
|
+
documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
|
379
|
+
))
|
380
|
+
|
381
|
+
def _check_node_compatibility(self):
|
382
|
+
"""Check for node compatibility issues with cycles."""
|
383
|
+
if hasattr(self.workflow, 'get_cycle_groups'):
|
384
|
+
cycle_groups = self.workflow.get_cycle_groups()
|
385
|
+
|
386
|
+
for cycle_id, cycle_edges in cycle_groups.items():
|
387
|
+
# Get unique nodes in the cycle
|
388
|
+
cycle_nodes = set()
|
389
|
+
for source, target, _ in cycle_edges:
|
390
|
+
cycle_nodes.add(source)
|
391
|
+
cycle_nodes.add(target)
|
392
|
+
|
393
|
+
for node_id in cycle_nodes:
|
394
|
+
node = self.workflow.nodes.get(node_id)
|
395
|
+
if not node:
|
396
|
+
continue
|
397
|
+
|
398
|
+
# Check if node supports cycle context
|
399
|
+
if hasattr(node, 'run'):
|
400
|
+
# Check if node accesses cycle context safely
|
401
|
+
if self._uses_unsafe_cycle_access(node):
|
402
|
+
self.issues.append(ValidationIssue(
|
403
|
+
severity=IssueSeverity.ERROR,
|
404
|
+
category="node_compatibility",
|
405
|
+
code="CYC012",
|
406
|
+
message=f"Node '{node_id}' uses unsafe cycle context access",
|
407
|
+
node_id=node_id,
|
408
|
+
cycle_id=cycle_id,
|
409
|
+
suggestion="Use context.get('cycle', {}) instead of direct access",
|
410
|
+
documentation_link="guide/reference/cheatsheet/022-cycle-debugging-troubleshooting.md"
|
411
|
+
))
|
412
|
+
|
413
|
+
# Check for PythonCodeNode parameter access
|
414
|
+
if hasattr(node, 'code') and node.code:
|
415
|
+
if self._has_unsafe_parameter_access(node.code):
|
416
|
+
self.issues.append(ValidationIssue(
|
417
|
+
severity=IssueSeverity.WARNING,
|
418
|
+
category="node_compatibility",
|
419
|
+
code="CYC013",
|
420
|
+
message=f"PythonCodeNode '{node_id}' may have unsafe parameter access",
|
421
|
+
node_id=node_id,
|
422
|
+
cycle_id=cycle_id,
|
423
|
+
suggestion="Use try/except pattern for cycle parameter access",
|
424
|
+
documentation_link="guide/mistakes/064-pythoncodenode-none-input-validation-error.md"
|
425
|
+
))
|
426
|
+
|
427
|
+
def _check_convergence_conditions(self):
|
428
|
+
"""Check convergence conditions for validity."""
|
429
|
+
if hasattr(self.workflow, 'get_cycle_groups'):
|
430
|
+
cycle_groups = self.workflow.get_cycle_groups()
|
431
|
+
|
432
|
+
for cycle_id, cycle_edges in cycle_groups.items():
|
433
|
+
for source, target, edge_data in cycle_edges:
|
434
|
+
convergence = edge_data.get('convergence_check')
|
435
|
+
|
436
|
+
if convergence:
|
437
|
+
# Check for valid Python syntax
|
438
|
+
if not self._is_valid_condition_syntax(convergence):
|
439
|
+
self.issues.append(ValidationIssue(
|
440
|
+
severity=IssueSeverity.ERROR,
|
441
|
+
category="convergence",
|
442
|
+
code="CYC014",
|
443
|
+
message=f"Invalid convergence condition syntax: '{convergence}'",
|
444
|
+
cycle_id=cycle_id,
|
445
|
+
suggestion="Ensure condition is valid Python expression",
|
446
|
+
documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
|
447
|
+
))
|
448
|
+
|
449
|
+
# Check for common mistakes
|
450
|
+
if self._has_convergence_condition_issues(convergence):
|
451
|
+
self.issues.append(ValidationIssue(
|
452
|
+
severity=IssueSeverity.WARNING,
|
453
|
+
category="convergence",
|
454
|
+
code="CYC015",
|
455
|
+
message=f"Potential issue in convergence condition: '{convergence}'",
|
456
|
+
cycle_id=cycle_id,
|
457
|
+
suggestion="Verify field names and comparison operators",
|
458
|
+
documentation_link="guide/mistakes/066-infinite-cycles.md"
|
459
|
+
))
|
460
|
+
|
461
|
+
def _check_resource_usage(self):
|
462
|
+
"""Check for potential resource usage issues."""
|
463
|
+
if hasattr(self.workflow, 'get_cycle_groups'):
|
464
|
+
cycle_groups = self.workflow.get_cycle_groups()
|
465
|
+
|
466
|
+
for cycle_id, cycle_edges in cycle_groups.items():
|
467
|
+
# Get unique nodes in the cycle
|
468
|
+
cycle_nodes = set()
|
469
|
+
for source, target, _ in cycle_edges:
|
470
|
+
cycle_nodes.add(source)
|
471
|
+
cycle_nodes.add(target)
|
472
|
+
|
473
|
+
# Check for potential memory leaks
|
474
|
+
for node_id in cycle_nodes:
|
475
|
+
if self._may_have_memory_leak(node_id):
|
476
|
+
self.issues.append(ValidationIssue(
|
477
|
+
severity=IssueSeverity.WARNING,
|
478
|
+
category="resource_usage",
|
479
|
+
code="CYC016",
|
480
|
+
message=f"Node '{node_id}' may have memory leak in cycle",
|
481
|
+
node_id=node_id,
|
482
|
+
cycle_id=cycle_id,
|
483
|
+
suggestion="Ensure proper cleanup of resources in cyclic execution",
|
484
|
+
documentation_link="guide/mistakes/016-memory-leaks-in-long-running-processes.md"
|
485
|
+
))
|
486
|
+
|
487
|
+
# Check for file handle management
|
488
|
+
for node_id in cycle_nodes:
|
489
|
+
if self._may_leak_file_handles(node_id):
|
490
|
+
self.issues.append(ValidationIssue(
|
491
|
+
severity=IssueSeverity.WARNING,
|
492
|
+
category="resource_usage",
|
493
|
+
code="CYC017",
|
494
|
+
message=f"Node '{node_id}' may leak file handles in cycle",
|
495
|
+
node_id=node_id,
|
496
|
+
cycle_id=cycle_id,
|
497
|
+
suggestion="Use context managers (with statements) for file operations",
|
498
|
+
documentation_link="guide/mistakes/022-resource-cleanup-issues.md"
|
499
|
+
))
|
500
|
+
|
501
|
+
def _get_cycle_id(self, cycle_nodes: List[str]) -> str:
|
502
|
+
"""Generate a cycle identifier from cycle nodes."""
|
503
|
+
return f"cycle_{'-'.join(sorted(cycle_nodes))}"
|
504
|
+
|
505
|
+
def _is_potentially_unreachable_condition(self, condition: str) -> bool:
|
506
|
+
"""Check if convergence condition might be unreachable."""
|
507
|
+
# Simple heuristics for potentially problematic conditions
|
508
|
+
problematic_patterns = [
|
509
|
+
r'.*==\s*True\s*$', # exact boolean match
|
510
|
+
r'.*==\s*1\.0\s*$', # exact float match
|
511
|
+
r'.*>\s*1\.0\s*$', # probability > 1.0
|
512
|
+
r'.*<\s*0\.0\s*$', # probability < 0.0
|
513
|
+
]
|
514
|
+
|
515
|
+
for pattern in problematic_patterns:
|
516
|
+
if re.search(pattern, condition):
|
517
|
+
return True
|
518
|
+
|
519
|
+
return False
|
520
|
+
|
521
|
+
def _is_expensive_operation(self, node_id: str) -> bool:
|
522
|
+
"""Check if node represents an expensive operation."""
|
523
|
+
expensive_keywords = [
|
524
|
+
'train', 'model', 'neural', 'deep', 'learning',
|
525
|
+
'api', 'request', 'http', 'download', 'upload',
|
526
|
+
'database', 'query', 'sql',
|
527
|
+
'file', 'io', 'read', 'write'
|
528
|
+
]
|
529
|
+
|
530
|
+
node_id_lower = node_id.lower()
|
531
|
+
return any(keyword in node_id_lower for keyword in expensive_keywords)
|
532
|
+
|
533
|
+
def _uses_unsafe_cycle_access(self, node) -> bool:
|
534
|
+
"""Check if node uses unsafe cycle context access."""
|
535
|
+
# This would require more sophisticated code analysis
|
536
|
+
# For now, return False as a placeholder
|
537
|
+
return False
|
538
|
+
|
539
|
+
def _has_unsafe_parameter_access(self, code: str) -> bool:
|
540
|
+
"""Check if PythonCodeNode has unsafe parameter access."""
|
541
|
+
# Look for direct parameter access without try/except
|
542
|
+
lines = code.split('\n')
|
543
|
+
|
544
|
+
for line in lines:
|
545
|
+
line = line.strip()
|
546
|
+
if line and not line.startswith('#'):
|
547
|
+
# Check for variable access that might be parameters
|
548
|
+
if re.match(r'^[a-zA-Z_]\w*\s*=', line):
|
549
|
+
var_name = line.split('=')[0].strip()
|
550
|
+
# If variable is used before definition, might be parameter
|
551
|
+
if not self._is_defined_before_use(var_name, code):
|
552
|
+
return True
|
553
|
+
|
554
|
+
return False
|
555
|
+
|
556
|
+
def _is_defined_before_use(self, var_name: str, code: str) -> bool:
|
557
|
+
"""Check if variable is defined before use in code."""
|
558
|
+
lines = code.split('\n')
|
559
|
+
defined = False
|
560
|
+
|
561
|
+
for line in lines:
|
562
|
+
line = line.strip()
|
563
|
+
if line.startswith(f'{var_name} =') or line.startswith(f'{var_name}='):
|
564
|
+
defined = True
|
565
|
+
elif var_name in line and not defined:
|
566
|
+
# Used before definition
|
567
|
+
return False
|
568
|
+
|
569
|
+
return True
|
570
|
+
|
571
|
+
def _is_valid_condition_syntax(self, condition: str) -> bool:
|
572
|
+
"""Check if convergence condition has valid Python syntax."""
|
573
|
+
try:
|
574
|
+
compile(condition, '<string>', 'eval')
|
575
|
+
return True
|
576
|
+
except SyntaxError:
|
577
|
+
return False
|
578
|
+
|
579
|
+
def _has_convergence_condition_issues(self, condition: str) -> bool:
|
580
|
+
"""Check for common issues in convergence conditions."""
|
581
|
+
# Check for undefined variables (common field names)
|
582
|
+
undefined_vars = ['done', 'converged', 'finished', 'complete', 'quality', 'error']
|
583
|
+
|
584
|
+
for var in undefined_vars:
|
585
|
+
if var in condition:
|
586
|
+
# Might be using undefined variable
|
587
|
+
return True
|
588
|
+
|
589
|
+
return False
|
590
|
+
|
591
|
+
def _may_have_memory_leak(self, node_id: str) -> bool:
|
592
|
+
"""Check if node might have memory leaks."""
|
593
|
+
leak_keywords = ['accumulate', 'collect', 'gather', 'cache', 'store']
|
594
|
+
node_id_lower = node_id.lower()
|
595
|
+
return any(keyword in node_id_lower for keyword in leak_keywords)
|
596
|
+
|
597
|
+
def _may_leak_file_handles(self, node_id: str) -> bool:
|
598
|
+
"""Check if node might leak file handles."""
|
599
|
+
file_keywords = ['file', 'read', 'write', 'open', 'csv', 'json', 'log']
|
600
|
+
node_id_lower = node_id.lower()
|
601
|
+
return any(keyword in node_id_lower for keyword in file_keywords)
|
602
|
+
|
603
|
+
def get_issues_by_severity(self, severity: IssueSeverity) -> List[ValidationIssue]:
|
604
|
+
"""Get all issues of a specific severity level."""
|
605
|
+
return [issue for issue in self.issues if issue.severity == severity]
|
606
|
+
|
607
|
+
def get_issues_by_category(self, category: str) -> List[ValidationIssue]:
|
608
|
+
"""Get all issues of a specific category."""
|
609
|
+
return [issue for issue in self.issues if issue.category == category]
|
610
|
+
|
611
|
+
def get_issues_for_cycle(self, cycle_id: str) -> List[ValidationIssue]:
|
612
|
+
"""Get all issues for a specific cycle."""
|
613
|
+
return [issue for issue in self.issues if issue.cycle_id == cycle_id]
|
614
|
+
|
615
|
+
def get_issues_for_node(self, node_id: str) -> List[ValidationIssue]:
|
616
|
+
"""Get all issues for a specific node."""
|
617
|
+
return [issue for issue in self.issues if issue.node_id == node_id]
|
618
|
+
|
619
|
+
def generate_report(self) -> Dict[str, Any]:
|
620
|
+
"""
|
621
|
+
Generate comprehensive validation report.
|
622
|
+
|
623
|
+
Returns:
|
624
|
+
Dict containing validation report with summary and details
|
625
|
+
|
626
|
+
Example:
|
627
|
+
>>> from kailash import Workflow
|
628
|
+
>>> workflow = Workflow("test", "Test Workflow")
|
629
|
+
>>> linter = CycleLinter(workflow)
|
630
|
+
>>> linter.check_all()
|
631
|
+
>>> report = linter.generate_report()
|
632
|
+
>>> print(f"Found {report['summary']['total_issues']} issues")
|
633
|
+
"""
|
634
|
+
errors = self.get_issues_by_severity(IssueSeverity.ERROR)
|
635
|
+
warnings = self.get_issues_by_severity(IssueSeverity.WARNING)
|
636
|
+
info = self.get_issues_by_severity(IssueSeverity.INFO)
|
637
|
+
|
638
|
+
# Group by category
|
639
|
+
by_category = {}
|
640
|
+
for issue in self.issues:
|
641
|
+
if issue.category not in by_category:
|
642
|
+
by_category[issue.category] = []
|
643
|
+
by_category[issue.category].append(issue)
|
644
|
+
|
645
|
+
# Group by cycle
|
646
|
+
by_cycle = {}
|
647
|
+
for issue in self.issues:
|
648
|
+
if issue.cycle_id:
|
649
|
+
if issue.cycle_id not in by_cycle:
|
650
|
+
by_cycle[issue.cycle_id] = []
|
651
|
+
by_cycle[issue.cycle_id].append(issue)
|
652
|
+
|
653
|
+
return {
|
654
|
+
'summary': {
|
655
|
+
'total_issues': len(self.issues),
|
656
|
+
'errors': len(errors),
|
657
|
+
'warnings': len(warnings),
|
658
|
+
'info': len(info),
|
659
|
+
'categories': list(by_category.keys()),
|
660
|
+
'affected_cycles': len(by_cycle)
|
661
|
+
},
|
662
|
+
'issues': self.issues,
|
663
|
+
'by_severity': {
|
664
|
+
'errors': errors,
|
665
|
+
'warnings': warnings,
|
666
|
+
'info': info
|
667
|
+
},
|
668
|
+
'by_category': by_category,
|
669
|
+
'by_cycle': by_cycle,
|
670
|
+
'recommendations': self._generate_recommendations()
|
671
|
+
}
|
672
|
+
|
673
|
+
def _generate_recommendations(self) -> List[str]:
|
674
|
+
"""Generate high-level recommendations based on found issues."""
|
675
|
+
recommendations = []
|
676
|
+
|
677
|
+
errors = self.get_issues_by_severity(IssueSeverity.ERROR)
|
678
|
+
if errors:
|
679
|
+
recommendations.append(f"Fix {len(errors)} critical errors before deployment")
|
680
|
+
|
681
|
+
convergence_issues = self.get_issues_by_category("convergence")
|
682
|
+
if convergence_issues:
|
683
|
+
recommendations.append("Review convergence conditions for all cycles")
|
684
|
+
|
685
|
+
performance_issues = self.get_issues_by_category("performance")
|
686
|
+
if performance_issues:
|
687
|
+
recommendations.append("Optimize cycles to improve performance")
|
688
|
+
|
689
|
+
safety_issues = self.get_issues_by_category("safety")
|
690
|
+
if safety_issues:
|
691
|
+
recommendations.append("Add safety limits (timeout, max_iterations) to cycles")
|
692
|
+
|
693
|
+
return recommendations
|