kailash 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -60,7 +60,7 @@ Downstream Consumers:
60
60
 
61
61
  Examples:
62
62
  Basic workflow validation:
63
-
63
+
64
64
  >>> from kailash.workflow.validation import CycleLinter, IssueSeverity
65
65
  >>> linter = CycleLinter(workflow)
66
66
  >>> issues = linter.check_all()
@@ -71,9 +71,9 @@ Examples:
71
71
  ... print(f"ERROR {error.code}: {error.message}")
72
72
  ... if error.suggestion:
73
73
  ... print(f" Suggestion: {error.suggestion}")
74
-
74
+
75
75
  Comprehensive reporting:
76
-
76
+
77
77
  >>> report = linter.generate_report()
78
78
  >>> print(f"Total issues: {report['summary']['total_issues']}")
79
79
  >>> print(f"Critical errors: {report['summary']['errors']}")
@@ -83,9 +83,9 @@ Examples:
83
83
  ... print(f"{category.upper()} ({len(issues)} issues):")
84
84
  ... for issue in issues:
85
85
  ... print(f" {issue.code}: {issue.message}")
86
-
86
+
87
87
  Targeted validation:
88
-
88
+
89
89
  >>> # Validate specific cycle
90
90
  >>> cycle_issues = linter.get_issues_for_cycle("optimization_cycle")
91
91
  >>> # Validate specific node
@@ -97,7 +97,7 @@ Examples:
97
97
 
98
98
  Validation Checks:
99
99
  The linter performs comprehensive checks including:
100
-
100
+
101
101
  - **CYC001-002**: Convergence condition validation
102
102
  - **CYC003-004**: Infinite loop prevention
103
103
  - **CYC005-006**: Safety limit configuration
@@ -113,16 +113,17 @@ See Also:
113
113
  - :doc:`/guides/validation` for validation best practices
114
114
  """
115
115
 
116
- from typing import Dict, Any, List, Optional
116
+ import re
117
117
  from dataclasses import dataclass
118
118
  from enum import Enum
119
- import re
119
+ from typing import Any, Dict, List, Optional
120
120
 
121
121
  from . import Workflow
122
122
 
123
123
 
124
124
  class IssueSeverity(Enum):
125
125
  """Severity levels for validation issues."""
126
+
126
127
  ERROR = "error"
127
128
  WARNING = "warning"
128
129
  INFO = "info"
@@ -131,6 +132,7 @@ class IssueSeverity(Enum):
131
132
  @dataclass
132
133
  class ValidationIssue:
133
134
  """Represents a validation issue found in a workflow."""
135
+
134
136
  severity: IssueSeverity
135
137
  category: str
136
138
  code: str
@@ -144,29 +146,29 @@ class ValidationIssue:
144
146
  class CycleLinter:
145
147
  """
146
148
  Comprehensive linter for cyclic workflows.
147
-
149
+
148
150
  Analyzes workflows for common issues, performance anti-patterns,
149
151
  and potential problems specific to cyclic execution.
150
152
  """
151
-
153
+
152
154
  def __init__(self, workflow: Workflow):
153
155
  """
154
156
  Initialize linter with target workflow.
155
-
157
+
156
158
  Args:
157
159
  workflow: The workflow to analyze
158
160
  """
159
161
  self.workflow = workflow
160
162
  self.graph = workflow.graph
161
163
  self.issues: List[ValidationIssue] = []
162
-
164
+
163
165
  def check_all(self) -> List[ValidationIssue]:
164
166
  """
165
167
  Run all validation checks on the workflow.
166
-
168
+
167
169
  Returns:
168
170
  List of all validation issues found
169
-
171
+
170
172
  Example:
171
173
  >>> workflow = create_problematic_workflow()
172
174
  >>> linter = CycleLinter(workflow)
@@ -175,7 +177,7 @@ class CycleLinter:
175
177
  ... print(f"{issue.severity.value}: {issue.message}")
176
178
  """
177
179
  self.issues = []
178
-
180
+
179
181
  # Run all checks
180
182
  self._check_cycles_have_convergence()
181
183
  self._check_for_infinite_loop_potential()
@@ -185,110 +187,124 @@ class CycleLinter:
185
187
  self._check_node_compatibility()
186
188
  self._check_convergence_conditions()
187
189
  self._check_resource_usage()
188
-
190
+
189
191
  return self.issues
190
-
192
+
191
193
  def _check_cycles_have_convergence(self):
192
194
  """Check that all cycles have appropriate convergence conditions."""
193
- if hasattr(self.workflow, 'get_cycle_groups'):
195
+ if hasattr(self.workflow, "get_cycle_groups"):
194
196
  cycle_groups = self.workflow.get_cycle_groups()
195
-
197
+
196
198
  for cycle_id, cycle_edges in cycle_groups.items():
197
199
  for source, target, edge_data in cycle_edges:
198
- if not edge_data.get('convergence_check') and not edge_data.get('max_iterations'):
199
- self.issues.append(ValidationIssue(
200
- severity=IssueSeverity.ERROR,
201
- category="convergence",
202
- code="CYC001",
203
- message=f"Cycle {cycle_id} lacks convergence condition and max_iterations",
204
- cycle_id=cycle_id,
205
- suggestion="Add convergence_check parameter or set max_iterations",
206
- documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
207
- ))
208
-
209
- elif not edge_data.get('convergence_check'):
210
- self.issues.append(ValidationIssue(
211
- severity=IssueSeverity.WARNING,
212
- category="convergence",
213
- code="CYC002",
214
- message=f"Cycle {cycle_id} relies only on max_iterations without convergence check",
215
- cycle_id=cycle_id,
216
- suggestion="Consider adding convergence_check for early termination",
217
- documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
218
- ))
219
-
200
+ if not edge_data.get("convergence_check") and not edge_data.get(
201
+ "max_iterations"
202
+ ):
203
+ self.issues.append(
204
+ ValidationIssue(
205
+ severity=IssueSeverity.ERROR,
206
+ category="convergence",
207
+ code="CYC001",
208
+ message=f"Cycle {cycle_id} lacks convergence condition and max_iterations",
209
+ cycle_id=cycle_id,
210
+ suggestion="Add convergence_check parameter or set max_iterations",
211
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
212
+ )
213
+ )
214
+
215
+ elif not edge_data.get("convergence_check"):
216
+ self.issues.append(
217
+ ValidationIssue(
218
+ severity=IssueSeverity.WARNING,
219
+ category="convergence",
220
+ code="CYC002",
221
+ message=f"Cycle {cycle_id} relies only on max_iterations without convergence check",
222
+ cycle_id=cycle_id,
223
+ suggestion="Consider adding convergence_check for early termination",
224
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
225
+ )
226
+ )
227
+
220
228
  def _check_for_infinite_loop_potential(self):
221
229
  """Check for patterns that could lead to infinite loops."""
222
- if hasattr(self.workflow, 'get_cycle_groups'):
230
+ if hasattr(self.workflow, "get_cycle_groups"):
223
231
  cycle_groups = self.workflow.get_cycle_groups()
224
-
232
+
225
233
  for cycle_id, cycle_edges in cycle_groups.items():
226
234
  for source, target, edge_data in cycle_edges:
227
- max_iter = edge_data.get('max_iterations')
228
- convergence = edge_data.get('convergence_check')
229
-
235
+ max_iter = edge_data.get("max_iterations")
236
+ convergence = edge_data.get("convergence_check")
237
+
230
238
  # Check for very high or missing max_iterations
231
239
  if max_iter is None or max_iter > 10000:
232
- self.issues.append(ValidationIssue(
233
- severity=IssueSeverity.WARNING,
234
- category="safety",
235
- code="CYC003",
236
- message=f"Cycle {cycle_id} has very high or no max_iterations limit",
237
- cycle_id=cycle_id,
238
- suggestion="Set reasonable max_iterations (e.g., 100-1000) as safety limit",
239
- documentation_link="guide/mistakes/066-infinite-cycles.md"
240
- ))
241
-
240
+ self.issues.append(
241
+ ValidationIssue(
242
+ severity=IssueSeverity.WARNING,
243
+ category="safety",
244
+ code="CYC003",
245
+ message=f"Cycle {cycle_id} has very high or no max_iterations limit",
246
+ cycle_id=cycle_id,
247
+ suggestion="Set reasonable max_iterations (e.g., 100-1000) as safety limit",
248
+ documentation_link="guide/mistakes/066-infinite-cycles.md",
249
+ )
250
+ )
251
+
242
252
  # Check for potentially unreachable convergence conditions
243
253
  if convergence:
244
254
  if self._is_potentially_unreachable_condition(convergence):
245
- self.issues.append(ValidationIssue(
246
- severity=IssueSeverity.WARNING,
247
- category="convergence",
248
- code="CYC004",
249
- message=f"Convergence condition '{convergence}' may be unreachable",
250
- cycle_id=cycle_id,
251
- suggestion="Verify convergence condition is achievable",
252
- documentation_link="guide/mistakes/066-infinite-cycles.md"
253
- ))
254
-
255
+ self.issues.append(
256
+ ValidationIssue(
257
+ severity=IssueSeverity.WARNING,
258
+ category="convergence",
259
+ code="CYC004",
260
+ message=f"Convergence condition '{convergence}' may be unreachable",
261
+ cycle_id=cycle_id,
262
+ suggestion="Verify convergence condition is achievable",
263
+ documentation_link="guide/mistakes/066-infinite-cycles.md",
264
+ )
265
+ )
266
+
255
267
  def _check_safety_limits(self):
256
268
  """Check for appropriate safety limits on cycles."""
257
- if hasattr(self.workflow, 'get_cycle_groups'):
269
+ if hasattr(self.workflow, "get_cycle_groups"):
258
270
  cycle_groups = self.workflow.get_cycle_groups()
259
-
271
+
260
272
  for cycle_id, cycle_edges in cycle_groups.items():
261
273
  for source, target, edge_data in cycle_edges:
262
274
  # Check timeout
263
- if not edge_data.get('timeout'):
264
- self.issues.append(ValidationIssue(
265
- severity=IssueSeverity.INFO,
266
- category="safety",
267
- code="CYC005",
268
- message=f"Cycle {cycle_id} has no timeout limit",
269
- cycle_id=cycle_id,
270
- suggestion="Consider adding timeout parameter for safety",
271
- documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
272
- ))
273
-
275
+ if not edge_data.get("timeout"):
276
+ self.issues.append(
277
+ ValidationIssue(
278
+ severity=IssueSeverity.INFO,
279
+ category="safety",
280
+ code="CYC005",
281
+ message=f"Cycle {cycle_id} has no timeout limit",
282
+ cycle_id=cycle_id,
283
+ suggestion="Consider adding timeout parameter for safety",
284
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
285
+ )
286
+ )
287
+
274
288
  # Check memory limit
275
- if not edge_data.get('memory_limit'):
276
- self.issues.append(ValidationIssue(
277
- severity=IssueSeverity.INFO,
278
- category="safety",
279
- code="CYC006",
280
- message=f"Cycle {cycle_id} has no memory limit",
281
- cycle_id=cycle_id,
282
- suggestion="Consider adding memory_limit parameter for safety",
283
- documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
284
- ))
285
-
289
+ if not edge_data.get("memory_limit"):
290
+ self.issues.append(
291
+ ValidationIssue(
292
+ severity=IssueSeverity.INFO,
293
+ category="safety",
294
+ code="CYC006",
295
+ message=f"Cycle {cycle_id} has no memory limit",
296
+ cycle_id=cycle_id,
297
+ suggestion="Consider adding memory_limit parameter for safety",
298
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
299
+ )
300
+ )
301
+
286
302
  def _check_performance_anti_patterns(self):
287
303
  """Check for performance anti-patterns."""
288
304
  # Use the workflow's cycle detection
289
- if hasattr(self.workflow, 'get_cycle_groups'):
305
+ if hasattr(self.workflow, "get_cycle_groups"):
290
306
  cycle_groups = self.workflow.get_cycle_groups()
291
-
307
+
292
308
  for cycle_id, cycle_edges in cycle_groups.items():
293
309
  # Get unique nodes in the cycle
294
310
  cycle_nodes = set()
@@ -296,333 +312,375 @@ class CycleLinter:
296
312
  cycle_nodes.add(source)
297
313
  cycle_nodes.add(target)
298
314
  cycle_nodes = list(cycle_nodes)
299
-
315
+
300
316
  # Check for very small cycles (may have high overhead)
301
317
  if len(cycle_nodes) == 1:
302
318
  node_id = cycle_nodes[0]
303
- self.issues.append(ValidationIssue(
304
- severity=IssueSeverity.INFO,
305
- category="performance",
306
- code="CYC007",
307
- message=f"Single-node cycle {cycle_id} may have high overhead",
308
- node_id=node_id,
309
- cycle_id=cycle_id,
310
- suggestion="Consider if cycle is necessary or if logic can be internal to node",
311
- documentation_link="guide/reference/pattern-library/06-performance-patterns.md"
312
- ))
313
-
319
+ self.issues.append(
320
+ ValidationIssue(
321
+ severity=IssueSeverity.INFO,
322
+ category="performance",
323
+ code="CYC007",
324
+ message=f"Single-node cycle {cycle_id} may have high overhead",
325
+ node_id=node_id,
326
+ cycle_id=cycle_id,
327
+ suggestion="Consider if cycle is necessary or if logic can be internal to node",
328
+ documentation_link="guide/reference/pattern-library/06-performance-patterns.md",
329
+ )
330
+ )
331
+
314
332
  # Check for very large cycles (may be hard to debug)
315
333
  elif len(cycle_nodes) > 10:
316
- self.issues.append(ValidationIssue(
317
- severity=IssueSeverity.WARNING,
318
- category="complexity",
319
- code="CYC008",
320
- message=f"Large cycle {cycle_id} with {len(cycle_nodes)} nodes may be hard to debug",
321
- cycle_id=cycle_id,
322
- suggestion="Consider breaking into smaller cycles or using nested workflows",
323
- documentation_link="guide/reference/pattern-library/04-complex-patterns.md"
324
- ))
325
-
334
+ self.issues.append(
335
+ ValidationIssue(
336
+ severity=IssueSeverity.WARNING,
337
+ category="complexity",
338
+ code="CYC008",
339
+ message=f"Large cycle {cycle_id} with {len(cycle_nodes)} nodes may be hard to debug",
340
+ cycle_id=cycle_id,
341
+ suggestion="Consider breaking into smaller cycles or using nested workflows",
342
+ documentation_link="guide/reference/pattern-library/04-complex-patterns.md",
343
+ )
344
+ )
345
+
326
346
  # Check for cycles with expensive operations
327
347
  for node_id in cycle_nodes:
328
348
  if self._is_expensive_operation(node_id):
329
- self.issues.append(ValidationIssue(
330
- severity=IssueSeverity.WARNING,
331
- category="performance",
332
- code="CYC009",
333
- message=f"Expensive operation '{node_id}' in cycle {cycle_id}",
334
- node_id=node_id,
335
- cycle_id=cycle_id,
336
- suggestion="Consider caching, optimization, or moving outside cycle",
337
- documentation_link="guide/reference/pattern-library/06-performance-patterns.md"
338
- ))
339
-
349
+ self.issues.append(
350
+ ValidationIssue(
351
+ severity=IssueSeverity.WARNING,
352
+ category="performance",
353
+ code="CYC009",
354
+ message=f"Expensive operation '{node_id}' in cycle {cycle_id}",
355
+ node_id=node_id,
356
+ cycle_id=cycle_id,
357
+ suggestion="Consider caching, optimization, or moving outside cycle",
358
+ documentation_link="guide/reference/pattern-library/06-performance-patterns.md",
359
+ )
360
+ )
361
+
340
362
  def _check_parameter_mapping(self):
341
363
  """Check for parameter mapping issues in cycles."""
342
- if hasattr(self.workflow, 'get_cycle_groups'):
364
+ if hasattr(self.workflow, "get_cycle_groups"):
343
365
  cycle_groups = self.workflow.get_cycle_groups()
344
-
366
+
345
367
  for cycle_id, cycle_edges in cycle_groups.items():
346
368
  # Get cycle nodes for checking
347
369
  cycle_nodes = set()
348
370
  for s, t, _ in cycle_edges:
349
371
  cycle_nodes.add(s)
350
372
  cycle_nodes.add(t)
351
-
373
+
352
374
  # Check each edge for issues
353
375
  for source, target, edge_data in cycle_edges:
354
- mapping = edge_data.get('mapping', {})
355
-
376
+ mapping = edge_data.get("mapping", {})
377
+
356
378
  # Check for identity mappings (common mistake)
357
379
  for source_param, target_param in mapping.items():
358
380
  if source_param == target_param:
359
- self.issues.append(ValidationIssue(
360
- severity=IssueSeverity.WARNING,
361
- category="parameter_mapping",
362
- code="CYC010",
363
- message=f"Identity mapping '{source_param}' -> '{target_param}' in cycle {cycle_id}",
364
- cycle_id=cycle_id,
365
- suggestion="Use 'result.field' -> 'field' pattern for cycle parameter propagation",
366
- documentation_link="guide/mistakes/063-cyclic-parameter-propagation-multi-fix.md"
367
- ))
368
-
381
+ self.issues.append(
382
+ ValidationIssue(
383
+ severity=IssueSeverity.WARNING,
384
+ category="parameter_mapping",
385
+ code="CYC010",
386
+ message=f"Identity mapping '{source_param}' -> '{target_param}' in cycle {cycle_id}",
387
+ cycle_id=cycle_id,
388
+ suggestion="Use 'result.field' -> 'field' pattern for cycle parameter propagation",
389
+ documentation_link="guide/mistakes/063-cyclic-parameter-propagation-multi-fix.md",
390
+ )
391
+ )
392
+
369
393
  # Check for missing parameter propagation
370
394
  if not mapping and len(cycle_nodes) > 1:
371
- self.issues.append(ValidationIssue(
372
- severity=IssueSeverity.INFO,
373
- category="parameter_mapping",
374
- code="CYC011",
375
- message=f"Cycle {cycle_id} has no parameter mapping",
376
- cycle_id=cycle_id,
377
- suggestion="Consider if parameters need to propagate between iterations",
378
- documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
379
- ))
380
-
395
+ self.issues.append(
396
+ ValidationIssue(
397
+ severity=IssueSeverity.INFO,
398
+ category="parameter_mapping",
399
+ code="CYC011",
400
+ message=f"Cycle {cycle_id} has no parameter mapping",
401
+ cycle_id=cycle_id,
402
+ suggestion="Consider if parameters need to propagate between iterations",
403
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
404
+ )
405
+ )
406
+
381
407
  def _check_node_compatibility(self):
382
408
  """Check for node compatibility issues with cycles."""
383
- if hasattr(self.workflow, 'get_cycle_groups'):
409
+ if hasattr(self.workflow, "get_cycle_groups"):
384
410
  cycle_groups = self.workflow.get_cycle_groups()
385
-
411
+
386
412
  for cycle_id, cycle_edges in cycle_groups.items():
387
413
  # Get unique nodes in the cycle
388
414
  cycle_nodes = set()
389
415
  for source, target, _ in cycle_edges:
390
416
  cycle_nodes.add(source)
391
417
  cycle_nodes.add(target)
392
-
418
+
393
419
  for node_id in cycle_nodes:
394
420
  node = self.workflow.nodes.get(node_id)
395
421
  if not node:
396
422
  continue
397
-
423
+
398
424
  # Check if node supports cycle context
399
- if hasattr(node, 'run'):
425
+ if hasattr(node, "run"):
400
426
  # Check if node accesses cycle context safely
401
427
  if self._uses_unsafe_cycle_access(node):
402
- self.issues.append(ValidationIssue(
403
- severity=IssueSeverity.ERROR,
404
- category="node_compatibility",
405
- code="CYC012",
406
- message=f"Node '{node_id}' uses unsafe cycle context access",
407
- node_id=node_id,
408
- cycle_id=cycle_id,
409
- suggestion="Use context.get('cycle', {}) instead of direct access",
410
- documentation_link="guide/reference/cheatsheet/022-cycle-debugging-troubleshooting.md"
411
- ))
412
-
428
+ self.issues.append(
429
+ ValidationIssue(
430
+ severity=IssueSeverity.ERROR,
431
+ category="node_compatibility",
432
+ code="CYC012",
433
+ message=f"Node '{node_id}' uses unsafe cycle context access",
434
+ node_id=node_id,
435
+ cycle_id=cycle_id,
436
+ suggestion="Use context.get('cycle', {}) instead of direct access",
437
+ documentation_link="guide/reference/cheatsheet/022-cycle-debugging-troubleshooting.md",
438
+ )
439
+ )
440
+
413
441
  # Check for PythonCodeNode parameter access
414
- if hasattr(node, 'code') and node.code:
442
+ if hasattr(node, "code") and node.code:
415
443
  if self._has_unsafe_parameter_access(node.code):
416
- self.issues.append(ValidationIssue(
417
- severity=IssueSeverity.WARNING,
418
- category="node_compatibility",
419
- code="CYC013",
420
- message=f"PythonCodeNode '{node_id}' may have unsafe parameter access",
421
- node_id=node_id,
422
- cycle_id=cycle_id,
423
- suggestion="Use try/except pattern for cycle parameter access",
424
- documentation_link="guide/mistakes/064-pythoncodenode-none-input-validation-error.md"
425
- ))
426
-
444
+ self.issues.append(
445
+ ValidationIssue(
446
+ severity=IssueSeverity.WARNING,
447
+ category="node_compatibility",
448
+ code="CYC013",
449
+ message=f"PythonCodeNode '{node_id}' may have unsafe parameter access",
450
+ node_id=node_id,
451
+ cycle_id=cycle_id,
452
+ suggestion="Use try/except pattern for cycle parameter access",
453
+ documentation_link="guide/mistakes/064-pythoncodenode-none-input-validation-error.md",
454
+ )
455
+ )
456
+
427
457
  def _check_convergence_conditions(self):
428
458
  """Check convergence conditions for validity."""
429
- if hasattr(self.workflow, 'get_cycle_groups'):
459
+ if hasattr(self.workflow, "get_cycle_groups"):
430
460
  cycle_groups = self.workflow.get_cycle_groups()
431
-
461
+
432
462
  for cycle_id, cycle_edges in cycle_groups.items():
433
463
  for source, target, edge_data in cycle_edges:
434
- convergence = edge_data.get('convergence_check')
435
-
464
+ convergence = edge_data.get("convergence_check")
465
+
436
466
  if convergence:
437
467
  # Check for valid Python syntax
438
468
  if not self._is_valid_condition_syntax(convergence):
439
- self.issues.append(ValidationIssue(
440
- severity=IssueSeverity.ERROR,
441
- category="convergence",
442
- code="CYC014",
443
- message=f"Invalid convergence condition syntax: '{convergence}'",
444
- cycle_id=cycle_id,
445
- suggestion="Ensure condition is valid Python expression",
446
- documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md"
447
- ))
448
-
469
+ self.issues.append(
470
+ ValidationIssue(
471
+ severity=IssueSeverity.ERROR,
472
+ category="convergence",
473
+ code="CYC014",
474
+ message=f"Invalid convergence condition syntax: '{convergence}'",
475
+ cycle_id=cycle_id,
476
+ suggestion="Ensure condition is valid Python expression",
477
+ documentation_link="guide/reference/cheatsheet/019-cyclic-workflows-basics.md",
478
+ )
479
+ )
480
+
449
481
  # Check for common mistakes
450
482
  if self._has_convergence_condition_issues(convergence):
451
- self.issues.append(ValidationIssue(
452
- severity=IssueSeverity.WARNING,
453
- category="convergence",
454
- code="CYC015",
455
- message=f"Potential issue in convergence condition: '{convergence}'",
456
- cycle_id=cycle_id,
457
- suggestion="Verify field names and comparison operators",
458
- documentation_link="guide/mistakes/066-infinite-cycles.md"
459
- ))
460
-
483
+ self.issues.append(
484
+ ValidationIssue(
485
+ severity=IssueSeverity.WARNING,
486
+ category="convergence",
487
+ code="CYC015",
488
+ message=f"Potential issue in convergence condition: '{convergence}'",
489
+ cycle_id=cycle_id,
490
+ suggestion="Verify field names and comparison operators",
491
+ documentation_link="guide/mistakes/066-infinite-cycles.md",
492
+ )
493
+ )
494
+
461
495
  def _check_resource_usage(self):
462
496
  """Check for potential resource usage issues."""
463
- if hasattr(self.workflow, 'get_cycle_groups'):
497
+ if hasattr(self.workflow, "get_cycle_groups"):
464
498
  cycle_groups = self.workflow.get_cycle_groups()
465
-
499
+
466
500
  for cycle_id, cycle_edges in cycle_groups.items():
467
501
  # Get unique nodes in the cycle
468
502
  cycle_nodes = set()
469
503
  for source, target, _ in cycle_edges:
470
504
  cycle_nodes.add(source)
471
505
  cycle_nodes.add(target)
472
-
506
+
473
507
  # Check for potential memory leaks
474
508
  for node_id in cycle_nodes:
475
509
  if self._may_have_memory_leak(node_id):
476
- self.issues.append(ValidationIssue(
477
- severity=IssueSeverity.WARNING,
478
- category="resource_usage",
479
- code="CYC016",
480
- message=f"Node '{node_id}' may have memory leak in cycle",
481
- node_id=node_id,
482
- cycle_id=cycle_id,
483
- suggestion="Ensure proper cleanup of resources in cyclic execution",
484
- documentation_link="guide/mistakes/016-memory-leaks-in-long-running-processes.md"
485
- ))
486
-
510
+ self.issues.append(
511
+ ValidationIssue(
512
+ severity=IssueSeverity.WARNING,
513
+ category="resource_usage",
514
+ code="CYC016",
515
+ message=f"Node '{node_id}' may have memory leak in cycle",
516
+ node_id=node_id,
517
+ cycle_id=cycle_id,
518
+ suggestion="Ensure proper cleanup of resources in cyclic execution",
519
+ documentation_link="guide/mistakes/016-memory-leaks-in-long-running-processes.md",
520
+ )
521
+ )
522
+
487
523
  # Check for file handle management
488
524
  for node_id in cycle_nodes:
489
525
  if self._may_leak_file_handles(node_id):
490
- self.issues.append(ValidationIssue(
491
- severity=IssueSeverity.WARNING,
492
- category="resource_usage",
493
- code="CYC017",
494
- message=f"Node '{node_id}' may leak file handles in cycle",
495
- node_id=node_id,
496
- cycle_id=cycle_id,
497
- suggestion="Use context managers (with statements) for file operations",
498
- documentation_link="guide/mistakes/022-resource-cleanup-issues.md"
499
- ))
500
-
526
+ self.issues.append(
527
+ ValidationIssue(
528
+ severity=IssueSeverity.WARNING,
529
+ category="resource_usage",
530
+ code="CYC017",
531
+ message=f"Node '{node_id}' may leak file handles in cycle",
532
+ node_id=node_id,
533
+ cycle_id=cycle_id,
534
+ suggestion="Use context managers (with statements) for file operations",
535
+ documentation_link="guide/mistakes/022-resource-cleanup-issues.md",
536
+ )
537
+ )
538
+
501
539
  def _get_cycle_id(self, cycle_nodes: List[str]) -> str:
502
540
  """Generate a cycle identifier from cycle nodes."""
503
541
  return f"cycle_{'-'.join(sorted(cycle_nodes))}"
504
-
542
+
505
543
  def _is_potentially_unreachable_condition(self, condition: str) -> bool:
506
544
  """Check if convergence condition might be unreachable."""
507
545
  # Simple heuristics for potentially problematic conditions
508
546
  problematic_patterns = [
509
- r'.*==\s*True\s*$', # exact boolean match
510
- r'.*==\s*1\.0\s*$', # exact float match
511
- r'.*>\s*1\.0\s*$', # probability > 1.0
512
- r'.*<\s*0\.0\s*$', # probability < 0.0
547
+ r".*==\s*True\s*$", # exact boolean match
548
+ r".*==\s*1\.0\s*$", # exact float match
549
+ r".*>\s*1\.0\s*$", # probability > 1.0
550
+ r".*<\s*0\.0\s*$", # probability < 0.0
513
551
  ]
514
-
552
+
515
553
  for pattern in problematic_patterns:
516
554
  if re.search(pattern, condition):
517
555
  return True
518
-
556
+
519
557
  return False
520
-
558
+
521
559
  def _is_expensive_operation(self, node_id: str) -> bool:
522
560
  """Check if node represents an expensive operation."""
523
561
  expensive_keywords = [
524
- 'train', 'model', 'neural', 'deep', 'learning',
525
- 'api', 'request', 'http', 'download', 'upload',
526
- 'database', 'query', 'sql',
527
- 'file', 'io', 'read', 'write'
562
+ "train",
563
+ "model",
564
+ "neural",
565
+ "deep",
566
+ "learning",
567
+ "api",
568
+ "request",
569
+ "http",
570
+ "download",
571
+ "upload",
572
+ "database",
573
+ "query",
574
+ "sql",
575
+ "file",
576
+ "io",
577
+ "read",
578
+ "write",
528
579
  ]
529
-
580
+
530
581
  node_id_lower = node_id.lower()
531
582
  return any(keyword in node_id_lower for keyword in expensive_keywords)
532
-
583
+
533
584
  def _uses_unsafe_cycle_access(self, node) -> bool:
534
585
  """Check if node uses unsafe cycle context access."""
535
586
  # This would require more sophisticated code analysis
536
587
  # For now, return False as a placeholder
537
588
  return False
538
-
589
+
539
590
  def _has_unsafe_parameter_access(self, code: str) -> bool:
540
591
  """Check if PythonCodeNode has unsafe parameter access."""
541
592
  # Look for direct parameter access without try/except
542
- lines = code.split('\n')
543
-
593
+ lines = code.split("\n")
594
+
544
595
  for line in lines:
545
596
  line = line.strip()
546
- if line and not line.startswith('#'):
597
+ if line and not line.startswith("#"):
547
598
  # Check for variable access that might be parameters
548
- if re.match(r'^[a-zA-Z_]\w*\s*=', line):
549
- var_name = line.split('=')[0].strip()
599
+ if re.match(r"^[a-zA-Z_]\w*\s*=", line):
600
+ var_name = line.split("=")[0].strip()
550
601
  # If variable is used before definition, might be parameter
551
602
  if not self._is_defined_before_use(var_name, code):
552
603
  return True
553
-
604
+
554
605
  return False
555
-
606
+
556
607
  def _is_defined_before_use(self, var_name: str, code: str) -> bool:
557
608
  """Check if variable is defined before use in code."""
558
- lines = code.split('\n')
609
+ lines = code.split("\n")
559
610
  defined = False
560
-
611
+
561
612
  for line in lines:
562
613
  line = line.strip()
563
- if line.startswith(f'{var_name} =') or line.startswith(f'{var_name}='):
614
+ if line.startswith(f"{var_name} =") or line.startswith(f"{var_name}="):
564
615
  defined = True
565
616
  elif var_name in line and not defined:
566
617
  # Used before definition
567
618
  return False
568
-
619
+
569
620
  return True
570
-
621
+
571
622
  def _is_valid_condition_syntax(self, condition: str) -> bool:
572
623
  """Check if convergence condition has valid Python syntax."""
573
624
  try:
574
- compile(condition, '<string>', 'eval')
625
+ compile(condition, "<string>", "eval")
575
626
  return True
576
627
  except SyntaxError:
577
628
  return False
578
-
629
+
579
630
  def _has_convergence_condition_issues(self, condition: str) -> bool:
580
631
  """Check for common issues in convergence conditions."""
581
632
  # Check for undefined variables (common field names)
582
- undefined_vars = ['done', 'converged', 'finished', 'complete', 'quality', 'error']
583
-
633
+ undefined_vars = [
634
+ "done",
635
+ "converged",
636
+ "finished",
637
+ "complete",
638
+ "quality",
639
+ "error",
640
+ ]
641
+
584
642
  for var in undefined_vars:
585
643
  if var in condition:
586
644
  # Might be using undefined variable
587
645
  return True
588
-
646
+
589
647
  return False
590
-
648
+
591
649
  def _may_have_memory_leak(self, node_id: str) -> bool:
592
650
  """Check if node might have memory leaks."""
593
- leak_keywords = ['accumulate', 'collect', 'gather', 'cache', 'store']
651
+ leak_keywords = ["accumulate", "collect", "gather", "cache", "store"]
594
652
  node_id_lower = node_id.lower()
595
653
  return any(keyword in node_id_lower for keyword in leak_keywords)
596
-
654
+
597
655
  def _may_leak_file_handles(self, node_id: str) -> bool:
598
656
  """Check if node might leak file handles."""
599
- file_keywords = ['file', 'read', 'write', 'open', 'csv', 'json', 'log']
657
+ file_keywords = ["file", "read", "write", "open", "csv", "json", "log"]
600
658
  node_id_lower = node_id.lower()
601
659
  return any(keyword in node_id_lower for keyword in file_keywords)
602
-
660
+
603
661
  def get_issues_by_severity(self, severity: IssueSeverity) -> List[ValidationIssue]:
604
662
  """Get all issues of a specific severity level."""
605
663
  return [issue for issue in self.issues if issue.severity == severity]
606
-
664
+
607
665
  def get_issues_by_category(self, category: str) -> List[ValidationIssue]:
608
666
  """Get all issues of a specific category."""
609
667
  return [issue for issue in self.issues if issue.category == category]
610
-
668
+
611
669
  def get_issues_for_cycle(self, cycle_id: str) -> List[ValidationIssue]:
612
670
  """Get all issues for a specific cycle."""
613
671
  return [issue for issue in self.issues if issue.cycle_id == cycle_id]
614
-
672
+
615
673
  def get_issues_for_node(self, node_id: str) -> List[ValidationIssue]:
616
674
  """Get all issues for a specific node."""
617
675
  return [issue for issue in self.issues if issue.node_id == node_id]
618
-
676
+
619
677
  def generate_report(self) -> Dict[str, Any]:
620
678
  """
621
679
  Generate comprehensive validation report.
622
-
680
+
623
681
  Returns:
624
682
  Dict containing validation report with summary and details
625
-
683
+
626
684
  Example:
627
685
  >>> from kailash import Workflow
628
686
  >>> workflow = Workflow("test", "Test Workflow")
@@ -634,14 +692,14 @@ class CycleLinter:
634
692
  errors = self.get_issues_by_severity(IssueSeverity.ERROR)
635
693
  warnings = self.get_issues_by_severity(IssueSeverity.WARNING)
636
694
  info = self.get_issues_by_severity(IssueSeverity.INFO)
637
-
695
+
638
696
  # Group by category
639
697
  by_category = {}
640
698
  for issue in self.issues:
641
699
  if issue.category not in by_category:
642
700
  by_category[issue.category] = []
643
701
  by_category[issue.category].append(issue)
644
-
702
+
645
703
  # Group by cycle
646
704
  by_cycle = {}
647
705
  for issue in self.issues:
@@ -649,45 +707,45 @@ class CycleLinter:
649
707
  if issue.cycle_id not in by_cycle:
650
708
  by_cycle[issue.cycle_id] = []
651
709
  by_cycle[issue.cycle_id].append(issue)
652
-
710
+
653
711
  return {
654
- 'summary': {
655
- 'total_issues': len(self.issues),
656
- 'errors': len(errors),
657
- 'warnings': len(warnings),
658
- 'info': len(info),
659
- 'categories': list(by_category.keys()),
660
- 'affected_cycles': len(by_cycle)
661
- },
662
- 'issues': self.issues,
663
- 'by_severity': {
664
- 'errors': errors,
665
- 'warnings': warnings,
666
- 'info': info
712
+ "summary": {
713
+ "total_issues": len(self.issues),
714
+ "errors": len(errors),
715
+ "warnings": len(warnings),
716
+ "info": len(info),
717
+ "categories": list(by_category.keys()),
718
+ "affected_cycles": len(by_cycle),
667
719
  },
668
- 'by_category': by_category,
669
- 'by_cycle': by_cycle,
670
- 'recommendations': self._generate_recommendations()
720
+ "issues": self.issues,
721
+ "by_severity": {"errors": errors, "warnings": warnings, "info": info},
722
+ "by_category": by_category,
723
+ "by_cycle": by_cycle,
724
+ "recommendations": self._generate_recommendations(),
671
725
  }
672
-
726
+
673
727
  def _generate_recommendations(self) -> List[str]:
674
728
  """Generate high-level recommendations based on found issues."""
675
729
  recommendations = []
676
-
730
+
677
731
  errors = self.get_issues_by_severity(IssueSeverity.ERROR)
678
732
  if errors:
679
- recommendations.append(f"Fix {len(errors)} critical errors before deployment")
680
-
733
+ recommendations.append(
734
+ f"Fix {len(errors)} critical errors before deployment"
735
+ )
736
+
681
737
  convergence_issues = self.get_issues_by_category("convergence")
682
738
  if convergence_issues:
683
739
  recommendations.append("Review convergence conditions for all cycles")
684
-
740
+
685
741
  performance_issues = self.get_issues_by_category("performance")
686
742
  if performance_issues:
687
743
  recommendations.append("Optimize cycles to improve performance")
688
-
744
+
689
745
  safety_issues = self.get_issues_by_category("safety")
690
746
  if safety_issues:
691
- recommendations.append("Add safety limits (timeout, max_iterations) to cycles")
692
-
693
- return recommendations
747
+ recommendations.append(
748
+ "Add safety limits (timeout, max_iterations) to cycles"
749
+ )
750
+
751
+ return recommendations