kailash 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/mcp/server_new.py +6 -6
- kailash/nodes/data/__init__.py +1 -2
- kailash/nodes/data/sql.py +699 -256
- kailash/workflow/cycle_analyzer.py +346 -225
- kailash/workflow/cycle_builder.py +75 -69
- kailash/workflow/cycle_config.py +62 -46
- kailash/workflow/cycle_debugger.py +284 -184
- kailash/workflow/cycle_exceptions.py +111 -97
- kailash/workflow/cycle_profiler.py +272 -202
- kailash/workflow/migration.py +238 -197
- kailash/workflow/templates.py +124 -105
- kailash/workflow/validation.py +356 -298
- {kailash-0.2.0.dist-info → kailash-0.2.1.dist-info}/METADATA +4 -1
- {kailash-0.2.0.dist-info → kailash-0.2.1.dist-info}/RECORD +18 -18
- {kailash-0.2.0.dist-info → kailash-0.2.1.dist-info}/WHEEL +0 -0
- {kailash-0.2.0.dist-info → kailash-0.2.1.dist-info}/entry_points.txt +0 -0
- {kailash-0.2.0.dist-info → kailash-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.2.0.dist-info → kailash-0.2.1.dist-info}/top_level.txt +0 -0
kailash/workflow/validation.py
CHANGED
@@ -60,7 +60,7 @@ Downstream Consumers:
|
|
60
60
|
|
61
61
|
Examples:
|
62
62
|
Basic workflow validation:
|
63
|
-
|
63
|
+
|
64
64
|
>>> from kailash.workflow.validation import CycleLinter, IssueSeverity
|
65
65
|
>>> linter = CycleLinter(workflow)
|
66
66
|
>>> issues = linter.check_all()
|
@@ -71,9 +71,9 @@ Examples:
|
|
71
71
|
... print(f"ERROR {error.code}: {error.message}")
|
72
72
|
... if error.suggestion:
|
73
73
|
... print(f" Suggestion: {error.suggestion}")
|
74
|
-
|
74
|
+
|
75
75
|
Comprehensive reporting:
|
76
|
-
|
76
|
+
|
77
77
|
>>> report = linter.generate_report()
|
78
78
|
>>> print(f"Total issues: {report['summary']['total_issues']}")
|
79
79
|
>>> print(f"Critical errors: {report['summary']['errors']}")
|
@@ -83,9 +83,9 @@ Examples:
|
|
83
83
|
... print(f"{category.upper()} ({len(issues)} issues):")
|
84
84
|
... for issue in issues:
|
85
85
|
... print(f" {issue.code}: {issue.message}")
|
86
|
-
|
86
|
+
|
87
87
|
Targeted validation:
|
88
|
-
|
88
|
+
|
89
89
|
>>> # Validate specific cycle
|
90
90
|
>>> cycle_issues = linter.get_issues_for_cycle("optimization_cycle")
|
91
91
|
>>> # Validate specific node
|
@@ -97,7 +97,7 @@ Examples:
|
|
97
97
|
|
98
98
|
Validation Checks:
|
99
99
|
The linter performs comprehensive checks including:
|
100
|
-
|
100
|
+
|
101
101
|
- **CYC001-002**: Convergence condition validation
|
102
102
|
- **CYC003-004**: Infinite loop prevention
|
103
103
|
- **CYC005-006**: Safety limit configuration
|
@@ -113,16 +113,17 @@ See Also:
|
|
113
113
|
- :doc:`/guides/validation` for validation best practices
|
114
114
|
"""
|
115
115
|
|
116
|
-
|
116
|
+
import re
|
117
117
|
from dataclasses import dataclass
|
118
118
|
from enum import Enum
|
119
|
-
import
|
119
|
+
from typing import Any, Dict, List, Optional
|
120
120
|
|
121
121
|
from . import Workflow
|
122
122
|
|
123
123
|
|
124
124
|
class IssueSeverity(Enum):
|
125
125
|
"""Severity levels for validation issues."""
|
126
|
+
|
126
127
|
ERROR = "error"
|
127
128
|
WARNING = "warning"
|
128
129
|
INFO = "info"
|
@@ -131,6 +132,7 @@ class IssueSeverity(Enum):
|
|
131
132
|
@dataclass
|
132
133
|
class ValidationIssue:
|
133
134
|
"""Represents a validation issue found in a workflow."""
|
135
|
+
|
134
136
|
severity: IssueSeverity
|
135
137
|
category: str
|
136
138
|
code: str
|
@@ -144,29 +146,29 @@ class ValidationIssue:
|
|
144
146
|
class CycleLinter:
|
145
147
|
"""
|
146
148
|
Comprehensive linter for cyclic workflows.
|
147
|
-
|
149
|
+
|
148
150
|
Analyzes workflows for common issues, performance anti-patterns,
|
149
151
|
and potential problems specific to cyclic execution.
|
150
152
|
"""
|
151
|
-
|
153
|
+
|
152
154
|
def __init__(self, workflow: Workflow):
|
153
155
|
"""
|
154
156
|
Initialize linter with target workflow.
|
155
|
-
|
157
|
+
|
156
158
|
Args:
|
157
159
|
workflow: The workflow to analyze
|
158
160
|
"""
|
159
161
|
self.workflow = workflow
|
160
162
|
self.graph = workflow.graph
|
161
163
|
self.issues: List[ValidationIssue] = []
|
162
|
-
|
164
|
+
|
163
165
|
def check_all(self) -> List[ValidationIssue]:
|
164
166
|
"""
|
165
167
|
Run all validation checks on the workflow.
|
166
|
-
|
168
|
+
|
167
169
|
Returns:
|
168
170
|
List of all validation issues found
|
169
|
-
|
171
|
+
|
170
172
|
Example:
|
171
173
|
>>> workflow = create_problematic_workflow()
|
172
174
|
>>> linter = CycleLinter(workflow)
|
@@ -175,7 +177,7 @@ class CycleLinter:
|
|
175
177
|
... print(f"{issue.severity.value}: {issue.message}")
|
176
178
|
"""
|
177
179
|
self.issues = []
|
178
|
-
|
180
|
+
|
179
181
|
# Run all checks
|
180
182
|
self._check_cycles_have_convergence()
|
181
183
|
self._check_for_infinite_loop_potential()
|
@@ -185,110 +187,124 @@ class CycleLinter:
|
|
185
187
|
self._check_node_compatibility()
|
186
188
|
self._check_convergence_conditions()
|
187
189
|
self._check_resource_usage()
|
188
|
-
|
190
|
+
|
189
191
|
return self.issues
|
190
|
-
|
192
|
+
|
191
193
|
def _check_cycles_have_convergence(self):
|
192
194
|
"""Check that all cycles have appropriate convergence conditions."""
|
193
|
-
if hasattr(self.workflow,
|
195
|
+
if hasattr(self.workflow, "get_cycle_groups"):
|
194
196
|
cycle_groups = self.workflow.get_cycle_groups()
|
195
|
-
|
197
|
+
|
196
198
|
for cycle_id, cycle_edges in cycle_groups.items():
|
197
199
|
for source, target, edge_data in cycle_edges:
|
198
|
-
if not edge_data.get(
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
200
|
+
if not edge_data.get("convergence_check") and not edge_data.get(
|
201
|
+
"max_iterations"
|
202
|
+
):
|
203
|
+
self.issues.append(
|
204
|
+
ValidationIssue(
|
205
|
+
severity=IssueSeverity.ERROR,
|
206
|
+
category="convergence",
|
207
|
+
code="CYC001",
|
208
|
+
message=f"Cycle {cycle_id} lacks convergence condition and max_iterations",
|
209
|
+
cycle_id=cycle_id,
|
210
|
+
suggestion="Add convergence_check parameter or set max_iterations",
|
211
|
+
documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
|
212
|
+
)
|
213
|
+
)
|
214
|
+
|
215
|
+
elif not edge_data.get("convergence_check"):
|
216
|
+
self.issues.append(
|
217
|
+
ValidationIssue(
|
218
|
+
severity=IssueSeverity.WARNING,
|
219
|
+
category="convergence",
|
220
|
+
code="CYC002",
|
221
|
+
message=f"Cycle {cycle_id} relies only on max_iterations without convergence check",
|
222
|
+
cycle_id=cycle_id,
|
223
|
+
suggestion="Consider adding convergence_check for early termination",
|
224
|
+
documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
|
225
|
+
)
|
226
|
+
)
|
227
|
+
|
220
228
|
def _check_for_infinite_loop_potential(self):
|
221
229
|
"""Check for patterns that could lead to infinite loops."""
|
222
|
-
if hasattr(self.workflow,
|
230
|
+
if hasattr(self.workflow, "get_cycle_groups"):
|
223
231
|
cycle_groups = self.workflow.get_cycle_groups()
|
224
|
-
|
232
|
+
|
225
233
|
for cycle_id, cycle_edges in cycle_groups.items():
|
226
234
|
for source, target, edge_data in cycle_edges:
|
227
|
-
max_iter = edge_data.get(
|
228
|
-
convergence = edge_data.get(
|
229
|
-
|
235
|
+
max_iter = edge_data.get("max_iterations")
|
236
|
+
convergence = edge_data.get("convergence_check")
|
237
|
+
|
230
238
|
# Check for very high or missing max_iterations
|
231
239
|
if max_iter is None or max_iter > 10000:
|
232
|
-
self.issues.append(
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
240
|
+
self.issues.append(
|
241
|
+
ValidationIssue(
|
242
|
+
severity=IssueSeverity.WARNING,
|
243
|
+
category="safety",
|
244
|
+
code="CYC003",
|
245
|
+
message=f"Cycle {cycle_id} has very high or no max_iterations limit",
|
246
|
+
cycle_id=cycle_id,
|
247
|
+
suggestion="Set reasonable max_iterations (e.g., 100-1000) as safety limit",
|
248
|
+
documentation_link="guide/mistakes/066-infinite-cycles.md",
|
249
|
+
)
|
250
|
+
)
|
251
|
+
|
242
252
|
# Check for potentially unreachable convergence conditions
|
243
253
|
if convergence:
|
244
254
|
if self._is_potentially_unreachable_condition(convergence):
|
245
|
-
self.issues.append(
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
+
self.issues.append(
|
256
|
+
ValidationIssue(
|
257
|
+
severity=IssueSeverity.WARNING,
|
258
|
+
category="convergence",
|
259
|
+
code="CYC004",
|
260
|
+
message=f"Convergence condition '{convergence}' may be unreachable",
|
261
|
+
cycle_id=cycle_id,
|
262
|
+
suggestion="Verify convergence condition is achievable",
|
263
|
+
documentation_link="guide/mistakes/066-infinite-cycles.md",
|
264
|
+
)
|
265
|
+
)
|
266
|
+
|
255
267
|
def _check_safety_limits(self):
|
256
268
|
"""Check for appropriate safety limits on cycles."""
|
257
|
-
if hasattr(self.workflow,
|
269
|
+
if hasattr(self.workflow, "get_cycle_groups"):
|
258
270
|
cycle_groups = self.workflow.get_cycle_groups()
|
259
|
-
|
271
|
+
|
260
272
|
for cycle_id, cycle_edges in cycle_groups.items():
|
261
273
|
for source, target, edge_data in cycle_edges:
|
262
274
|
# Check timeout
|
263
|
-
if not edge_data.get(
|
264
|
-
self.issues.append(
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
275
|
+
if not edge_data.get("timeout"):
|
276
|
+
self.issues.append(
|
277
|
+
ValidationIssue(
|
278
|
+
severity=IssueSeverity.INFO,
|
279
|
+
category="safety",
|
280
|
+
code="CYC005",
|
281
|
+
message=f"Cycle {cycle_id} has no timeout limit",
|
282
|
+
cycle_id=cycle_id,
|
283
|
+
suggestion="Consider adding timeout parameter for safety",
|
284
|
+
documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
|
285
|
+
)
|
286
|
+
)
|
287
|
+
|
274
288
|
# Check memory limit
|
275
|
-
if not edge_data.get(
|
276
|
-
self.issues.append(
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
289
|
+
if not edge_data.get("memory_limit"):
|
290
|
+
self.issues.append(
|
291
|
+
ValidationIssue(
|
292
|
+
severity=IssueSeverity.INFO,
|
293
|
+
category="safety",
|
294
|
+
code="CYC006",
|
295
|
+
message=f"Cycle {cycle_id} has no memory limit",
|
296
|
+
cycle_id=cycle_id,
|
297
|
+
suggestion="Consider adding memory_limit parameter for safety",
|
298
|
+
documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
|
299
|
+
)
|
300
|
+
)
|
301
|
+
|
286
302
|
def _check_performance_anti_patterns(self):
|
287
303
|
"""Check for performance anti-patterns."""
|
288
304
|
# Use the workflow's cycle detection
|
289
|
-
if hasattr(self.workflow,
|
305
|
+
if hasattr(self.workflow, "get_cycle_groups"):
|
290
306
|
cycle_groups = self.workflow.get_cycle_groups()
|
291
|
-
|
307
|
+
|
292
308
|
for cycle_id, cycle_edges in cycle_groups.items():
|
293
309
|
# Get unique nodes in the cycle
|
294
310
|
cycle_nodes = set()
|
@@ -296,333 +312,375 @@ class CycleLinter:
|
|
296
312
|
cycle_nodes.add(source)
|
297
313
|
cycle_nodes.add(target)
|
298
314
|
cycle_nodes = list(cycle_nodes)
|
299
|
-
|
315
|
+
|
300
316
|
# Check for very small cycles (may have high overhead)
|
301
317
|
if len(cycle_nodes) == 1:
|
302
318
|
node_id = cycle_nodes[0]
|
303
|
-
self.issues.append(
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
319
|
+
self.issues.append(
|
320
|
+
ValidationIssue(
|
321
|
+
severity=IssueSeverity.INFO,
|
322
|
+
category="performance",
|
323
|
+
code="CYC007",
|
324
|
+
message=f"Single-node cycle {cycle_id} may have high overhead",
|
325
|
+
node_id=node_id,
|
326
|
+
cycle_id=cycle_id,
|
327
|
+
suggestion="Consider if cycle is necessary or if logic can be internal to node",
|
328
|
+
documentation_link="guide/reference/pattern-library/06-performance-patterns.md",
|
329
|
+
)
|
330
|
+
)
|
331
|
+
|
314
332
|
# Check for very large cycles (may be hard to debug)
|
315
333
|
elif len(cycle_nodes) > 10:
|
316
|
-
self.issues.append(
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
334
|
+
self.issues.append(
|
335
|
+
ValidationIssue(
|
336
|
+
severity=IssueSeverity.WARNING,
|
337
|
+
category="complexity",
|
338
|
+
code="CYC008",
|
339
|
+
message=f"Large cycle {cycle_id} with {len(cycle_nodes)} nodes may be hard to debug",
|
340
|
+
cycle_id=cycle_id,
|
341
|
+
suggestion="Consider breaking into smaller cycles or using nested workflows",
|
342
|
+
documentation_link="guide/reference/pattern-library/04-complex-patterns.md",
|
343
|
+
)
|
344
|
+
)
|
345
|
+
|
326
346
|
# Check for cycles with expensive operations
|
327
347
|
for node_id in cycle_nodes:
|
328
348
|
if self._is_expensive_operation(node_id):
|
329
|
-
self.issues.append(
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
349
|
+
self.issues.append(
|
350
|
+
ValidationIssue(
|
351
|
+
severity=IssueSeverity.WARNING,
|
352
|
+
category="performance",
|
353
|
+
code="CYC009",
|
354
|
+
message=f"Expensive operation '{node_id}' in cycle {cycle_id}",
|
355
|
+
node_id=node_id,
|
356
|
+
cycle_id=cycle_id,
|
357
|
+
suggestion="Consider caching, optimization, or moving outside cycle",
|
358
|
+
documentation_link="guide/reference/pattern-library/06-performance-patterns.md",
|
359
|
+
)
|
360
|
+
)
|
361
|
+
|
340
362
|
def _check_parameter_mapping(self):
|
341
363
|
"""Check for parameter mapping issues in cycles."""
|
342
|
-
if hasattr(self.workflow,
|
364
|
+
if hasattr(self.workflow, "get_cycle_groups"):
|
343
365
|
cycle_groups = self.workflow.get_cycle_groups()
|
344
|
-
|
366
|
+
|
345
367
|
for cycle_id, cycle_edges in cycle_groups.items():
|
346
368
|
# Get cycle nodes for checking
|
347
369
|
cycle_nodes = set()
|
348
370
|
for s, t, _ in cycle_edges:
|
349
371
|
cycle_nodes.add(s)
|
350
372
|
cycle_nodes.add(t)
|
351
|
-
|
373
|
+
|
352
374
|
# Check each edge for issues
|
353
375
|
for source, target, edge_data in cycle_edges:
|
354
|
-
mapping = edge_data.get(
|
355
|
-
|
376
|
+
mapping = edge_data.get("mapping", {})
|
377
|
+
|
356
378
|
# Check for identity mappings (common mistake)
|
357
379
|
for source_param, target_param in mapping.items():
|
358
380
|
if source_param == target_param:
|
359
|
-
self.issues.append(
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
381
|
+
self.issues.append(
|
382
|
+
ValidationIssue(
|
383
|
+
severity=IssueSeverity.WARNING,
|
384
|
+
category="parameter_mapping",
|
385
|
+
code="CYC010",
|
386
|
+
message=f"Identity mapping '{source_param}' -> '{target_param}' in cycle {cycle_id}",
|
387
|
+
cycle_id=cycle_id,
|
388
|
+
suggestion="Use 'result.field' -> 'field' pattern for cycle parameter propagation",
|
389
|
+
documentation_link="guide/mistakes/063-cyclic-parameter-propagation-multi-fix.md",
|
390
|
+
)
|
391
|
+
)
|
392
|
+
|
369
393
|
# Check for missing parameter propagation
|
370
394
|
if not mapping and len(cycle_nodes) > 1:
|
371
|
-
self.issues.append(
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
395
|
+
self.issues.append(
|
396
|
+
ValidationIssue(
|
397
|
+
severity=IssueSeverity.INFO,
|
398
|
+
category="parameter_mapping",
|
399
|
+
code="CYC011",
|
400
|
+
message=f"Cycle {cycle_id} has no parameter mapping",
|
401
|
+
cycle_id=cycle_id,
|
402
|
+
suggestion="Consider if parameters need to propagate between iterations",
|
403
|
+
documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
|
404
|
+
)
|
405
|
+
)
|
406
|
+
|
381
407
|
def _check_node_compatibility(self):
|
382
408
|
"""Check for node compatibility issues with cycles."""
|
383
|
-
if hasattr(self.workflow,
|
409
|
+
if hasattr(self.workflow, "get_cycle_groups"):
|
384
410
|
cycle_groups = self.workflow.get_cycle_groups()
|
385
|
-
|
411
|
+
|
386
412
|
for cycle_id, cycle_edges in cycle_groups.items():
|
387
413
|
# Get unique nodes in the cycle
|
388
414
|
cycle_nodes = set()
|
389
415
|
for source, target, _ in cycle_edges:
|
390
416
|
cycle_nodes.add(source)
|
391
417
|
cycle_nodes.add(target)
|
392
|
-
|
418
|
+
|
393
419
|
for node_id in cycle_nodes:
|
394
420
|
node = self.workflow.nodes.get(node_id)
|
395
421
|
if not node:
|
396
422
|
continue
|
397
|
-
|
423
|
+
|
398
424
|
# Check if node supports cycle context
|
399
|
-
if hasattr(node,
|
425
|
+
if hasattr(node, "run"):
|
400
426
|
# Check if node accesses cycle context safely
|
401
427
|
if self._uses_unsafe_cycle_access(node):
|
402
|
-
self.issues.append(
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
428
|
+
self.issues.append(
|
429
|
+
ValidationIssue(
|
430
|
+
severity=IssueSeverity.ERROR,
|
431
|
+
category="node_compatibility",
|
432
|
+
code="CYC012",
|
433
|
+
message=f"Node '{node_id}' uses unsafe cycle context access",
|
434
|
+
node_id=node_id,
|
435
|
+
cycle_id=cycle_id,
|
436
|
+
suggestion="Use context.get('cycle', {}) instead of direct access",
|
437
|
+
documentation_link="guide/reference/cheatsheet/022-cycle-debugging-troubleshooting.md",
|
438
|
+
)
|
439
|
+
)
|
440
|
+
|
413
441
|
# Check for PythonCodeNode parameter access
|
414
|
-
if hasattr(node,
|
442
|
+
if hasattr(node, "code") and node.code:
|
415
443
|
if self._has_unsafe_parameter_access(node.code):
|
416
|
-
self.issues.append(
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
444
|
+
self.issues.append(
|
445
|
+
ValidationIssue(
|
446
|
+
severity=IssueSeverity.WARNING,
|
447
|
+
category="node_compatibility",
|
448
|
+
code="CYC013",
|
449
|
+
message=f"PythonCodeNode '{node_id}' may have unsafe parameter access",
|
450
|
+
node_id=node_id,
|
451
|
+
cycle_id=cycle_id,
|
452
|
+
suggestion="Use try/except pattern for cycle parameter access",
|
453
|
+
documentation_link="guide/mistakes/064-pythoncodenode-none-input-validation-error.md",
|
454
|
+
)
|
455
|
+
)
|
456
|
+
|
427
457
|
def _check_convergence_conditions(self):
|
428
458
|
"""Check convergence conditions for validity."""
|
429
|
-
if hasattr(self.workflow,
|
459
|
+
if hasattr(self.workflow, "get_cycle_groups"):
|
430
460
|
cycle_groups = self.workflow.get_cycle_groups()
|
431
|
-
|
461
|
+
|
432
462
|
for cycle_id, cycle_edges in cycle_groups.items():
|
433
463
|
for source, target, edge_data in cycle_edges:
|
434
|
-
convergence = edge_data.get(
|
435
|
-
|
464
|
+
convergence = edge_data.get("convergence_check")
|
465
|
+
|
436
466
|
if convergence:
|
437
467
|
# Check for valid Python syntax
|
438
468
|
if not self._is_valid_condition_syntax(convergence):
|
439
|
-
self.issues.append(
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
469
|
+
self.issues.append(
|
470
|
+
ValidationIssue(
|
471
|
+
severity=IssueSeverity.ERROR,
|
472
|
+
category="convergence",
|
473
|
+
code="CYC014",
|
474
|
+
message=f"Invalid convergence condition syntax: '{convergence}'",
|
475
|
+
cycle_id=cycle_id,
|
476
|
+
suggestion="Ensure condition is valid Python expression",
|
477
|
+
documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
|
478
|
+
)
|
479
|
+
)
|
480
|
+
|
449
481
|
# Check for common mistakes
|
450
482
|
if self._has_convergence_condition_issues(convergence):
|
451
|
-
self.issues.append(
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
483
|
+
self.issues.append(
|
484
|
+
ValidationIssue(
|
485
|
+
severity=IssueSeverity.WARNING,
|
486
|
+
category="convergence",
|
487
|
+
code="CYC015",
|
488
|
+
message=f"Potential issue in convergence condition: '{convergence}'",
|
489
|
+
cycle_id=cycle_id,
|
490
|
+
suggestion="Verify field names and comparison operators",
|
491
|
+
documentation_link="guide/mistakes/066-infinite-cycles.md",
|
492
|
+
)
|
493
|
+
)
|
494
|
+
|
461
495
|
def _check_resource_usage(self):
|
462
496
|
"""Check for potential resource usage issues."""
|
463
|
-
if hasattr(self.workflow,
|
497
|
+
if hasattr(self.workflow, "get_cycle_groups"):
|
464
498
|
cycle_groups = self.workflow.get_cycle_groups()
|
465
|
-
|
499
|
+
|
466
500
|
for cycle_id, cycle_edges in cycle_groups.items():
|
467
501
|
# Get unique nodes in the cycle
|
468
502
|
cycle_nodes = set()
|
469
503
|
for source, target, _ in cycle_edges:
|
470
504
|
cycle_nodes.add(source)
|
471
505
|
cycle_nodes.add(target)
|
472
|
-
|
506
|
+
|
473
507
|
# Check for potential memory leaks
|
474
508
|
for node_id in cycle_nodes:
|
475
509
|
if self._may_have_memory_leak(node_id):
|
476
|
-
self.issues.append(
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
510
|
+
self.issues.append(
|
511
|
+
ValidationIssue(
|
512
|
+
severity=IssueSeverity.WARNING,
|
513
|
+
category="resource_usage",
|
514
|
+
code="CYC016",
|
515
|
+
message=f"Node '{node_id}' may have memory leak in cycle",
|
516
|
+
node_id=node_id,
|
517
|
+
cycle_id=cycle_id,
|
518
|
+
suggestion="Ensure proper cleanup of resources in cyclic execution",
|
519
|
+
documentation_link="guide/mistakes/016-memory-leaks-in-long-running-processes.md",
|
520
|
+
)
|
521
|
+
)
|
522
|
+
|
487
523
|
# Check for file handle management
|
488
524
|
for node_id in cycle_nodes:
|
489
525
|
if self._may_leak_file_handles(node_id):
|
490
|
-
self.issues.append(
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
526
|
+
self.issues.append(
|
527
|
+
ValidationIssue(
|
528
|
+
severity=IssueSeverity.WARNING,
|
529
|
+
category="resource_usage",
|
530
|
+
code="CYC017",
|
531
|
+
message=f"Node '{node_id}' may leak file handles in cycle",
|
532
|
+
node_id=node_id,
|
533
|
+
cycle_id=cycle_id,
|
534
|
+
suggestion="Use context managers (with statements) for file operations",
|
535
|
+
documentation_link="guide/mistakes/022-resource-cleanup-issues.md",
|
536
|
+
)
|
537
|
+
)
|
538
|
+
|
501
539
|
def _get_cycle_id(self, cycle_nodes: List[str]) -> str:
|
502
540
|
"""Generate a cycle identifier from cycle nodes."""
|
503
541
|
return f"cycle_{'-'.join(sorted(cycle_nodes))}"
|
504
|
-
|
542
|
+
|
505
543
|
def _is_potentially_unreachable_condition(self, condition: str) -> bool:
|
506
544
|
"""Check if convergence condition might be unreachable."""
|
507
545
|
# Simple heuristics for potentially problematic conditions
|
508
546
|
problematic_patterns = [
|
509
|
-
r
|
510
|
-
r
|
511
|
-
r
|
512
|
-
r
|
547
|
+
r".*==\s*True\s*$", # exact boolean match
|
548
|
+
r".*==\s*1\.0\s*$", # exact float match
|
549
|
+
r".*>\s*1\.0\s*$", # probability > 1.0
|
550
|
+
r".*<\s*0\.0\s*$", # probability < 0.0
|
513
551
|
]
|
514
|
-
|
552
|
+
|
515
553
|
for pattern in problematic_patterns:
|
516
554
|
if re.search(pattern, condition):
|
517
555
|
return True
|
518
|
-
|
556
|
+
|
519
557
|
return False
|
520
|
-
|
558
|
+
|
521
559
|
def _is_expensive_operation(self, node_id: str) -> bool:
|
522
560
|
"""Check if node represents an expensive operation."""
|
523
561
|
expensive_keywords = [
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
562
|
+
"train",
|
563
|
+
"model",
|
564
|
+
"neural",
|
565
|
+
"deep",
|
566
|
+
"learning",
|
567
|
+
"api",
|
568
|
+
"request",
|
569
|
+
"http",
|
570
|
+
"download",
|
571
|
+
"upload",
|
572
|
+
"database",
|
573
|
+
"query",
|
574
|
+
"sql",
|
575
|
+
"file",
|
576
|
+
"io",
|
577
|
+
"read",
|
578
|
+
"write",
|
528
579
|
]
|
529
|
-
|
580
|
+
|
530
581
|
node_id_lower = node_id.lower()
|
531
582
|
return any(keyword in node_id_lower for keyword in expensive_keywords)
|
532
|
-
|
583
|
+
|
533
584
|
def _uses_unsafe_cycle_access(self, node) -> bool:
|
534
585
|
"""Check if node uses unsafe cycle context access."""
|
535
586
|
# This would require more sophisticated code analysis
|
536
587
|
# For now, return False as a placeholder
|
537
588
|
return False
|
538
|
-
|
589
|
+
|
539
590
|
def _has_unsafe_parameter_access(self, code: str) -> bool:
|
540
591
|
"""Check if PythonCodeNode has unsafe parameter access."""
|
541
592
|
# Look for direct parameter access without try/except
|
542
|
-
lines = code.split(
|
543
|
-
|
593
|
+
lines = code.split("\n")
|
594
|
+
|
544
595
|
for line in lines:
|
545
596
|
line = line.strip()
|
546
|
-
if line and not line.startswith(
|
597
|
+
if line and not line.startswith("#"):
|
547
598
|
# Check for variable access that might be parameters
|
548
|
-
if re.match(r
|
549
|
-
var_name = line.split(
|
599
|
+
if re.match(r"^[a-zA-Z_]\w*\s*=", line):
|
600
|
+
var_name = line.split("=")[0].strip()
|
550
601
|
# If variable is used before definition, might be parameter
|
551
602
|
if not self._is_defined_before_use(var_name, code):
|
552
603
|
return True
|
553
|
-
|
604
|
+
|
554
605
|
return False
|
555
|
-
|
606
|
+
|
556
607
|
def _is_defined_before_use(self, var_name: str, code: str) -> bool:
|
557
608
|
"""Check if variable is defined before use in code."""
|
558
|
-
lines = code.split(
|
609
|
+
lines = code.split("\n")
|
559
610
|
defined = False
|
560
|
-
|
611
|
+
|
561
612
|
for line in lines:
|
562
613
|
line = line.strip()
|
563
|
-
if line.startswith(f
|
614
|
+
if line.startswith(f"{var_name} =") or line.startswith(f"{var_name}="):
|
564
615
|
defined = True
|
565
616
|
elif var_name in line and not defined:
|
566
617
|
# Used before definition
|
567
618
|
return False
|
568
|
-
|
619
|
+
|
569
620
|
return True
|
570
|
-
|
621
|
+
|
571
622
|
def _is_valid_condition_syntax(self, condition: str) -> bool:
|
572
623
|
"""Check if convergence condition has valid Python syntax."""
|
573
624
|
try:
|
574
|
-
compile(condition,
|
625
|
+
compile(condition, "<string>", "eval")
|
575
626
|
return True
|
576
627
|
except SyntaxError:
|
577
628
|
return False
|
578
|
-
|
629
|
+
|
579
630
|
def _has_convergence_condition_issues(self, condition: str) -> bool:
|
580
631
|
"""Check for common issues in convergence conditions."""
|
581
632
|
# Check for undefined variables (common field names)
|
582
|
-
undefined_vars = [
|
583
|
-
|
633
|
+
undefined_vars = [
|
634
|
+
"done",
|
635
|
+
"converged",
|
636
|
+
"finished",
|
637
|
+
"complete",
|
638
|
+
"quality",
|
639
|
+
"error",
|
640
|
+
]
|
641
|
+
|
584
642
|
for var in undefined_vars:
|
585
643
|
if var in condition:
|
586
644
|
# Might be using undefined variable
|
587
645
|
return True
|
588
|
-
|
646
|
+
|
589
647
|
return False
|
590
|
-
|
648
|
+
|
591
649
|
def _may_have_memory_leak(self, node_id: str) -> bool:
|
592
650
|
"""Check if node might have memory leaks."""
|
593
|
-
leak_keywords = [
|
651
|
+
leak_keywords = ["accumulate", "collect", "gather", "cache", "store"]
|
594
652
|
node_id_lower = node_id.lower()
|
595
653
|
return any(keyword in node_id_lower for keyword in leak_keywords)
|
596
|
-
|
654
|
+
|
597
655
|
def _may_leak_file_handles(self, node_id: str) -> bool:
|
598
656
|
"""Check if node might leak file handles."""
|
599
|
-
file_keywords = [
|
657
|
+
file_keywords = ["file", "read", "write", "open", "csv", "json", "log"]
|
600
658
|
node_id_lower = node_id.lower()
|
601
659
|
return any(keyword in node_id_lower for keyword in file_keywords)
|
602
|
-
|
660
|
+
|
603
661
|
def get_issues_by_severity(self, severity: IssueSeverity) -> List[ValidationIssue]:
|
604
662
|
"""Get all issues of a specific severity level."""
|
605
663
|
return [issue for issue in self.issues if issue.severity == severity]
|
606
|
-
|
664
|
+
|
607
665
|
def get_issues_by_category(self, category: str) -> List[ValidationIssue]:
|
608
666
|
"""Get all issues of a specific category."""
|
609
667
|
return [issue for issue in self.issues if issue.category == category]
|
610
|
-
|
668
|
+
|
611
669
|
def get_issues_for_cycle(self, cycle_id: str) -> List[ValidationIssue]:
|
612
670
|
"""Get all issues for a specific cycle."""
|
613
671
|
return [issue for issue in self.issues if issue.cycle_id == cycle_id]
|
614
|
-
|
672
|
+
|
615
673
|
def get_issues_for_node(self, node_id: str) -> List[ValidationIssue]:
|
616
674
|
"""Get all issues for a specific node."""
|
617
675
|
return [issue for issue in self.issues if issue.node_id == node_id]
|
618
|
-
|
676
|
+
|
619
677
|
def generate_report(self) -> Dict[str, Any]:
|
620
678
|
"""
|
621
679
|
Generate comprehensive validation report.
|
622
|
-
|
680
|
+
|
623
681
|
Returns:
|
624
682
|
Dict containing validation report with summary and details
|
625
|
-
|
683
|
+
|
626
684
|
Example:
|
627
685
|
>>> from kailash import Workflow
|
628
686
|
>>> workflow = Workflow("test", "Test Workflow")
|
@@ -634,14 +692,14 @@ class CycleLinter:
|
|
634
692
|
errors = self.get_issues_by_severity(IssueSeverity.ERROR)
|
635
693
|
warnings = self.get_issues_by_severity(IssueSeverity.WARNING)
|
636
694
|
info = self.get_issues_by_severity(IssueSeverity.INFO)
|
637
|
-
|
695
|
+
|
638
696
|
# Group by category
|
639
697
|
by_category = {}
|
640
698
|
for issue in self.issues:
|
641
699
|
if issue.category not in by_category:
|
642
700
|
by_category[issue.category] = []
|
643
701
|
by_category[issue.category].append(issue)
|
644
|
-
|
702
|
+
|
645
703
|
# Group by cycle
|
646
704
|
by_cycle = {}
|
647
705
|
for issue in self.issues:
|
@@ -649,45 +707,45 @@ class CycleLinter:
|
|
649
707
|
if issue.cycle_id not in by_cycle:
|
650
708
|
by_cycle[issue.cycle_id] = []
|
651
709
|
by_cycle[issue.cycle_id].append(issue)
|
652
|
-
|
710
|
+
|
653
711
|
return {
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
},
|
662
|
-
'issues': self.issues,
|
663
|
-
'by_severity': {
|
664
|
-
'errors': errors,
|
665
|
-
'warnings': warnings,
|
666
|
-
'info': info
|
712
|
+
"summary": {
|
713
|
+
"total_issues": len(self.issues),
|
714
|
+
"errors": len(errors),
|
715
|
+
"warnings": len(warnings),
|
716
|
+
"info": len(info),
|
717
|
+
"categories": list(by_category.keys()),
|
718
|
+
"affected_cycles": len(by_cycle),
|
667
719
|
},
|
668
|
-
|
669
|
-
|
670
|
-
|
720
|
+
"issues": self.issues,
|
721
|
+
"by_severity": {"errors": errors, "warnings": warnings, "info": info},
|
722
|
+
"by_category": by_category,
|
723
|
+
"by_cycle": by_cycle,
|
724
|
+
"recommendations": self._generate_recommendations(),
|
671
725
|
}
|
672
|
-
|
726
|
+
|
673
727
|
def _generate_recommendations(self) -> List[str]:
|
674
728
|
"""Generate high-level recommendations based on found issues."""
|
675
729
|
recommendations = []
|
676
|
-
|
730
|
+
|
677
731
|
errors = self.get_issues_by_severity(IssueSeverity.ERROR)
|
678
732
|
if errors:
|
679
|
-
recommendations.append(
|
680
|
-
|
733
|
+
recommendations.append(
|
734
|
+
f"Fix {len(errors)} critical errors before deployment"
|
735
|
+
)
|
736
|
+
|
681
737
|
convergence_issues = self.get_issues_by_category("convergence")
|
682
738
|
if convergence_issues:
|
683
739
|
recommendations.append("Review convergence conditions for all cycles")
|
684
|
-
|
740
|
+
|
685
741
|
performance_issues = self.get_issues_by_category("performance")
|
686
742
|
if performance_issues:
|
687
743
|
recommendations.append("Optimize cycles to improve performance")
|
688
|
-
|
744
|
+
|
689
745
|
safety_issues = self.get_issues_by_category("safety")
|
690
746
|
if safety_issues:
|
691
|
-
recommendations.append(
|
692
|
-
|
693
|
-
|
747
|
+
recommendations.append(
|
748
|
+
"Add safety limits (timeout, max_iterations) to cycles"
|
749
|
+
)
|
750
|
+
|
751
|
+
return recommendations
|