kailash 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/access_control.py +740 -0
  3. kailash/api/__main__.py +6 -0
  4. kailash/api/auth.py +668 -0
  5. kailash/api/custom_nodes.py +285 -0
  6. kailash/api/custom_nodes_secure.py +377 -0
  7. kailash/api/database.py +620 -0
  8. kailash/api/studio.py +915 -0
  9. kailash/api/studio_secure.py +893 -0
  10. kailash/mcp/__init__.py +53 -0
  11. kailash/mcp/__main__.py +13 -0
  12. kailash/mcp/ai_registry_server.py +712 -0
  13. kailash/mcp/client.py +447 -0
  14. kailash/mcp/client_new.py +334 -0
  15. kailash/mcp/server.py +293 -0
  16. kailash/mcp/server_new.py +336 -0
  17. kailash/mcp/servers/__init__.py +12 -0
  18. kailash/mcp/servers/ai_registry.py +289 -0
  19. kailash/nodes/__init__.py +4 -2
  20. kailash/nodes/ai/__init__.py +38 -0
  21. kailash/nodes/ai/a2a.py +1790 -0
  22. kailash/nodes/ai/agents.py +116 -2
  23. kailash/nodes/ai/ai_providers.py +206 -8
  24. kailash/nodes/ai/intelligent_agent_orchestrator.py +2108 -0
  25. kailash/nodes/ai/iterative_llm_agent.py +1280 -0
  26. kailash/nodes/ai/llm_agent.py +324 -1
  27. kailash/nodes/ai/self_organizing.py +1623 -0
  28. kailash/nodes/api/http.py +106 -25
  29. kailash/nodes/api/rest.py +116 -21
  30. kailash/nodes/base.py +15 -2
  31. kailash/nodes/base_async.py +45 -0
  32. kailash/nodes/base_cycle_aware.py +374 -0
  33. kailash/nodes/base_with_acl.py +338 -0
  34. kailash/nodes/code/python.py +135 -27
  35. kailash/nodes/data/readers.py +116 -53
  36. kailash/nodes/data/writers.py +16 -6
  37. kailash/nodes/logic/__init__.py +8 -0
  38. kailash/nodes/logic/async_operations.py +48 -9
  39. kailash/nodes/logic/convergence.py +642 -0
  40. kailash/nodes/logic/loop.py +153 -0
  41. kailash/nodes/logic/operations.py +212 -27
  42. kailash/nodes/logic/workflow.py +26 -18
  43. kailash/nodes/mixins/__init__.py +11 -0
  44. kailash/nodes/mixins/mcp.py +228 -0
  45. kailash/nodes/mixins.py +387 -0
  46. kailash/nodes/transform/__init__.py +8 -1
  47. kailash/nodes/transform/processors.py +119 -4
  48. kailash/runtime/__init__.py +2 -1
  49. kailash/runtime/access_controlled.py +458 -0
  50. kailash/runtime/local.py +106 -33
  51. kailash/runtime/parallel_cyclic.py +529 -0
  52. kailash/sdk_exceptions.py +90 -5
  53. kailash/security.py +845 -0
  54. kailash/tracking/manager.py +38 -15
  55. kailash/tracking/models.py +1 -1
  56. kailash/tracking/storage/filesystem.py +30 -2
  57. kailash/utils/__init__.py +8 -0
  58. kailash/workflow/__init__.py +18 -0
  59. kailash/workflow/convergence.py +270 -0
  60. kailash/workflow/cycle_analyzer.py +768 -0
  61. kailash/workflow/cycle_builder.py +573 -0
  62. kailash/workflow/cycle_config.py +709 -0
  63. kailash/workflow/cycle_debugger.py +760 -0
  64. kailash/workflow/cycle_exceptions.py +601 -0
  65. kailash/workflow/cycle_profiler.py +671 -0
  66. kailash/workflow/cycle_state.py +338 -0
  67. kailash/workflow/cyclic_runner.py +985 -0
  68. kailash/workflow/graph.py +500 -39
  69. kailash/workflow/migration.py +768 -0
  70. kailash/workflow/safety.py +365 -0
  71. kailash/workflow/templates.py +744 -0
  72. kailash/workflow/validation.py +693 -0
  73. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/METADATA +446 -13
  74. kailash-0.2.0.dist-info/RECORD +125 -0
  75. kailash/nodes/mcp/__init__.py +0 -11
  76. kailash/nodes/mcp/client.py +0 -554
  77. kailash/nodes/mcp/resource.py +0 -682
  78. kailash/nodes/mcp/server.py +0 -577
  79. kailash-0.1.4.dist-info/RECORD +0 -85
  80. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/WHEEL +0 -0
  81. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/entry_points.txt +0 -0
  82. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/licenses/LICENSE +0 -0
  83. {kailash-0.1.4.dist-info → kailash-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,768 @@
1
+ """
2
+ Intelligent Migration System for DAG to Cyclic Workflow Conversion.
3
+
4
+ This module provides comprehensive tools to analyze existing DAG workflows
5
+ and intelligently suggest or automatically convert them to use cyclic patterns
6
+ where appropriate. It identifies optimization opportunities, provides detailed
7
+ implementation guidance, and automates the conversion process.
8
+
9
+ Design Philosophy:
10
+ Provides intelligent analysis of existing workflows to identify patterns
11
+ that would benefit from cyclification, offering both automated conversion
12
+ and detailed guidance for manual implementation. Focuses on preserving
13
+ workflow semantics while optimizing for performance and maintainability.
14
+
15
+ Key Features:
16
+ - Pattern recognition for cyclification opportunities
17
+ - Confidence scoring for conversion recommendations
18
+ - Automated conversion with safety validation
19
+ - Detailed implementation guidance with code examples
20
+ - Risk assessment and migration planning
21
+ - Template-based conversion for common patterns
22
+
23
+ Analysis Capabilities:
24
+ - Retry pattern detection in manual implementations
25
+ - Iterative improvement pattern identification
26
+ - Data validation and cleaning pattern recognition
27
+ - Batch processing pattern analysis
28
+ - Numerical convergence pattern detection
29
+ - Performance anti-pattern identification
30
+
31
+ Core Components:
32
+ - CyclificationOpportunity: Identified conversion opportunity
33
+ - CyclificationSuggestion: Detailed implementation guidance
34
+ - DAGToCycleConverter: Main analysis and conversion engine
35
+ - Pattern detection algorithms for common workflows
36
+
37
+ Conversion Strategy:
38
+ - Non-destructive analysis preserving original workflows
39
+ - Confidence-based prioritization of opportunities
40
+ - Template-based conversion for reliability
41
+ - Comprehensive validation of converted workflows
42
+ - Rollback capabilities for failed conversions
43
+
44
+ Upstream Dependencies:
45
+ - Existing workflow structures and node implementations
46
+ - CycleTemplates for automated conversion patterns
47
+ - Workflow validation and safety systems
48
+
49
+ Downstream Consumers:
50
+ - Workflow development tools and IDEs
51
+ - Automated workflow optimization systems
52
+ - Migration planning and execution tools
53
+ - Performance optimization recommendations
54
+ - Educational and training systems
55
+
56
+ Examples:
57
+ Analyze workflow for opportunities:
58
+
59
+ >>> from kailash.workflow.migration import DAGToCycleConverter
60
+ >>> converter = DAGToCycleConverter(existing_workflow)
61
+ >>> opportunities = converter.analyze_cyclification_opportunities()
62
+ >>> for opp in opportunities:
63
+ ... print(f"Found {opp.pattern_type}: {opp.description}")
64
+ ... print(f"Confidence: {opp.confidence:.2f}")
65
+ ... print(f"Expected benefit: {opp.estimated_benefit}")
66
+
67
+ Generate detailed migration guidance:
68
+
69
+ >>> suggestions = converter.generate_detailed_suggestions()
70
+ >>> for suggestion in suggestions:
71
+ ... print(f"Found {suggestion.opportunity.pattern_type}")
72
+ ... print(f"Implementation steps:")
73
+ ... for step in suggestion.implementation_steps:
74
+ ... print(f" {step}")
75
+ ... print(f"Code example: {suggestion.code_example}")
76
+ ... print(f"Expected outcome: {suggestion.expected_outcome}")
77
+
78
+ Automated conversion:
79
+
80
+ >>> # Convert specific nodes to cycle
81
+ >>> cycle_id = converter.convert_to_cycle(
82
+ ... nodes=["processor", "evaluator"],
83
+ ... convergence_strategy="quality_improvement",
84
+ ... max_iterations=50
85
+ ... )
86
+ >>> print(f"Created cycle: {cycle_id}")
87
+
88
+ Comprehensive migration report:
89
+
90
+ >>> report = converter.generate_migration_report()
91
+ >>> print(f"Total opportunities: {report['summary']['total_opportunities']}")
92
+ >>> print(f"High confidence: {report['summary']['high_confidence']}")
93
+ >>> # Implementation priority order
94
+ >>> for item in report['implementation_order']:
95
+ ... print(f"{item['priority']}: {item['justification']}")
96
+
97
+ See Also:
98
+ - :mod:`kailash.workflow.templates` for conversion patterns
99
+ - :mod:`kailash.workflow.validation` for workflow analysis
100
+ - :doc:`/guides/migration` for migration best practices
101
+ """
102
+
103
+ from typing import Dict, Any, List, Optional
104
+ from dataclasses import dataclass
105
+ from collections import defaultdict
106
+ import re
107
+
108
+ from . import Workflow
109
+ from .templates import CycleTemplates
110
+
111
+
112
+ @dataclass
113
+ class CyclificationOpportunity:
114
+ """Represents an opportunity to convert a DAG pattern to a cycle."""
115
+ nodes: List[str]
116
+ pattern_type: str
117
+ confidence: float
118
+ description: str
119
+ suggested_convergence: Optional[str] = None
120
+ estimated_benefit: str = "unknown"
121
+ implementation_complexity: str = "medium"
122
+
123
+
124
+ @dataclass
125
+ class CyclificationSuggestion:
126
+ """Detailed suggestion for converting nodes to a cycle."""
127
+ opportunity: CyclificationOpportunity
128
+ implementation_steps: List[str]
129
+ code_example: str
130
+ expected_outcome: str
131
+ risks: List[str]
132
+
133
+
134
+ class DAGToCycleConverter:
135
+ """
136
+ Analyzer and converter for transforming DAG workflows into cyclic workflows.
137
+
138
+ This class helps identify patterns in existing workflows that could benefit
139
+ from cyclic execution and provides tools to convert them.
140
+ """
141
+
142
+ def __init__(self, workflow: Workflow):
143
+ """
144
+ Initialize converter with target workflow.
145
+
146
+ Args:
147
+ workflow: The workflow to analyze and potentially convert
148
+ """
149
+ self.workflow = workflow
150
+ self.graph = workflow.graph
151
+ self.opportunities: List[CyclificationOpportunity] = []
152
+
153
+ def analyze_cyclification_opportunities(self) -> List[CyclificationOpportunity]:
154
+ """
155
+ Analyze workflow for patterns that could benefit from cyclification.
156
+
157
+ Returns:
158
+ List of identified cyclification opportunities
159
+
160
+ Example:
161
+ >>> workflow = create_example_workflow()
162
+ >>> converter = DAGToCycleConverter(workflow)
163
+ >>> opportunities = converter.analyze_cyclification_opportunities()
164
+ >>> for opp in opportunities:
165
+ ... print(f"{opp.pattern_type}: {opp.description}")
166
+ """
167
+ self.opportunities = []
168
+
169
+ # Analyze different patterns
170
+ self._detect_retry_patterns()
171
+ self._detect_iterative_improvement_patterns()
172
+ self._detect_validation_patterns()
173
+ self._detect_batch_processing_patterns()
174
+ self._detect_convergence_patterns()
175
+
176
+ # Sort by confidence and potential benefit
177
+ self.opportunities.sort(key=lambda x: x.confidence, reverse=True)
178
+
179
+ return self.opportunities
180
+
181
+ def _detect_retry_patterns(self):
182
+ """Detect patterns that look like manual retry logic."""
183
+ nodes = self.workflow.nodes
184
+
185
+ # Look for nodes with similar names suggesting retry logic
186
+ retry_patterns = [
187
+ r'.*[_\-]retry[_\-]?.*',
188
+ r'.*[_\-]attempt[_\-]?[0-9]*',
189
+ r'.*[_\-]backup[_\-]?.*',
190
+ r'.*[_\-]fallback[_\-]?.*',
191
+ r'.*[_\-]redundant[_\-]?.*',
192
+ r'.*[_\-]failover[_\-]?.*'
193
+ ]
194
+
195
+ for node_id, node in nodes.items():
196
+ for pattern in retry_patterns:
197
+ if re.match(pattern, node_id, re.IGNORECASE):
198
+ # Found potential retry pattern
199
+ related_nodes = self._find_related_nodes(node_id)
200
+
201
+ opportunity = CyclificationOpportunity(
202
+ nodes=[node_id] + related_nodes,
203
+ pattern_type="retry_cycle",
204
+ confidence=0.7,
205
+ description=f"Node '{node_id}' appears to implement retry logic manually",
206
+ suggested_convergence="success == True",
207
+ estimated_benefit="improved_reliability",
208
+ implementation_complexity="low"
209
+ )
210
+ self.opportunities.append(opportunity)
211
+
212
+ def _detect_iterative_improvement_patterns(self):
213
+ """Detect patterns that perform iterative improvement."""
214
+ nodes = self.workflow.nodes
215
+
216
+ # Look for processor-evaluator pairs
217
+ improvement_keywords = ['process', 'improve', 'optimize', 'refine', 'enhance']
218
+ evaluation_keywords = ['evaluate', 'assess', 'validate', 'check', 'score']
219
+
220
+ processors = []
221
+ evaluators = []
222
+
223
+ for node_id in nodes:
224
+ node_id_lower = node_id.lower()
225
+ if any(keyword in node_id_lower for keyword in improvement_keywords):
226
+ processors.append(node_id)
227
+ if any(keyword in node_id_lower for keyword in evaluation_keywords):
228
+ evaluators.append(node_id)
229
+
230
+ # Look for processor-evaluator pairs that are connected
231
+ for processor in processors:
232
+ for evaluator in evaluators:
233
+ if self._are_connected(processor, evaluator):
234
+ opportunity = CyclificationOpportunity(
235
+ nodes=[processor, evaluator],
236
+ pattern_type="optimization_cycle",
237
+ confidence=0.8,
238
+ description=f"'{processor}' and '{evaluator}' form iterative improvement pattern",
239
+ suggested_convergence="quality > 0.9",
240
+ estimated_benefit="automatic_convergence",
241
+ implementation_complexity="medium"
242
+ )
243
+ self.opportunities.append(opportunity)
244
+
245
+ def _detect_validation_patterns(self):
246
+ """Detect data validation and cleaning patterns."""
247
+ nodes = self.workflow.nodes
248
+
249
+ cleaning_keywords = ['clean', 'sanitize', 'normalize', 'transform']
250
+ validation_keywords = ['validate', 'verify', 'check', 'audit']
251
+
252
+ cleaners = []
253
+ validators = []
254
+
255
+ for node_id in nodes:
256
+ node_id_lower = node_id.lower()
257
+ if any(keyword in node_id_lower for keyword in cleaning_keywords):
258
+ cleaners.append(node_id)
259
+ if any(keyword in node_id_lower for keyword in validation_keywords):
260
+ validators.append(node_id)
261
+
262
+ # Look for cleaner-validator pairs
263
+ for cleaner in cleaners:
264
+ for validator in validators:
265
+ if self._are_connected(cleaner, validator):
266
+ opportunity = CyclificationOpportunity(
267
+ nodes=[cleaner, validator],
268
+ pattern_type="data_quality_cycle",
269
+ confidence=0.75,
270
+ description=f"'{cleaner}' and '{validator}' form data quality improvement pattern",
271
+ suggested_convergence="quality_score >= 0.95",
272
+ estimated_benefit="improved_data_quality",
273
+ implementation_complexity="low"
274
+ )
275
+ self.opportunities.append(opportunity)
276
+
277
+ def _detect_batch_processing_patterns(self):
278
+ """Detect patterns that process data in chunks."""
279
+ nodes = self.workflow.nodes
280
+
281
+ batch_keywords = ['batch', 'chunk', 'segment', 'partition', 'split']
282
+
283
+ for node_id in nodes:
284
+ node_id_lower = node_id.lower()
285
+ if any(keyword in node_id_lower for keyword in batch_keywords):
286
+ opportunity = CyclificationOpportunity(
287
+ nodes=[node_id],
288
+ pattern_type="batch_processing_cycle",
289
+ confidence=0.6,
290
+ description=f"'{node_id}' appears to process data in batches",
291
+ suggested_convergence="all_batches_processed == True",
292
+ estimated_benefit="memory_efficiency",
293
+ implementation_complexity="medium"
294
+ )
295
+ self.opportunities.append(opportunity)
296
+
297
+ def _detect_convergence_patterns(self):
298
+ """Detect numerical convergence patterns."""
299
+ nodes = self.workflow.nodes
300
+
301
+ convergence_keywords = ['converge', 'iterate', 'approximate', 'solve', 'calculate']
302
+
303
+ for node_id in nodes:
304
+ node_id_lower = node_id.lower()
305
+ if any(keyword in node_id_lower for keyword in convergence_keywords):
306
+ opportunity = CyclificationOpportunity(
307
+ nodes=[node_id],
308
+ pattern_type="convergence_cycle",
309
+ confidence=0.5,
310
+ description=f"'{node_id}' may perform iterative calculations",
311
+ suggested_convergence="difference < 0.001",
312
+ estimated_benefit="numerical_stability",
313
+ implementation_complexity="high"
314
+ )
315
+ self.opportunities.append(opportunity)
316
+
317
+ def _find_related_nodes(self, node_id: str) -> List[str]:
318
+ """Find nodes that are closely related to the given node."""
319
+ related = []
320
+
321
+ # Find direct connections from NetworkX graph
322
+ graph = self.workflow.graph
323
+
324
+ # Find predecessors and successors
325
+ if node_id in graph:
326
+ related.extend(graph.predecessors(node_id))
327
+ related.extend(graph.successors(node_id))
328
+
329
+ return list(set(related))
330
+
331
+ def _are_connected(self, node1: str, node2: str) -> bool:
332
+ """Check if two nodes are directly connected."""
333
+ graph = self.workflow.graph
334
+
335
+ # Check if there's an edge between the nodes in either direction
336
+ return graph.has_edge(node1, node2) or graph.has_edge(node2, node1)
337
+
338
+ def generate_detailed_suggestions(self) -> List[CyclificationSuggestion]:
339
+ """
340
+ Generate detailed suggestions with implementation guidance.
341
+
342
+ Returns:
343
+ List of detailed suggestions for cyclification
344
+
345
+ Example:
346
+ >>> converter = DAGToCycleConverter(workflow)
347
+ >>> converter.analyze_cyclification_opportunities()
348
+ >>> suggestions = converter.generate_detailed_suggestions()
349
+ >>> for suggestion in suggestions:
350
+ ... print(suggestion.code_example)
351
+ """
352
+ suggestions = []
353
+
354
+ for opportunity in self.opportunities:
355
+ suggestion = self._create_detailed_suggestion(opportunity)
356
+ suggestions.append(suggestion)
357
+
358
+ return suggestions
359
+
360
+ def _create_detailed_suggestion(self, opportunity: CyclificationOpportunity) -> CyclificationSuggestion:
361
+ """Create detailed implementation suggestion for an opportunity."""
362
+
363
+ if opportunity.pattern_type == "retry_cycle":
364
+ return self._create_retry_suggestion(opportunity)
365
+ elif opportunity.pattern_type == "optimization_cycle":
366
+ return self._create_optimization_suggestion(opportunity)
367
+ elif opportunity.pattern_type == "data_quality_cycle":
368
+ return self._create_data_quality_suggestion(opportunity)
369
+ elif opportunity.pattern_type == "batch_processing_cycle":
370
+ return self._create_batch_processing_suggestion(opportunity)
371
+ elif opportunity.pattern_type == "convergence_cycle":
372
+ return self._create_convergence_suggestion(opportunity)
373
+ else:
374
+ return self._create_generic_suggestion(opportunity)
375
+
376
+ def _create_retry_suggestion(self, opportunity: CyclificationOpportunity) -> CyclificationSuggestion:
377
+ """Create suggestion for retry cycle conversion."""
378
+ main_node = opportunity.nodes[0]
379
+
380
+ code_example = f'''
381
+ # Before: Manual retry logic (complex, error-prone)
382
+ # Multiple nodes handling retries manually
383
+
384
+ # After: Using retry cycle template
385
+ cycle_id = workflow.add_retry_cycle(
386
+ target_node="{main_node}",
387
+ max_retries=3,
388
+ backoff_strategy="exponential",
389
+ success_condition="success == True"
390
+ )
391
+
392
+ print(f"Created retry cycle: {{cycle_id}}")
393
+ '''
394
+
395
+ implementation_steps = [
396
+ f"Identify the main node that needs retry logic: '{main_node}'",
397
+ "Remove manual retry handling from existing nodes",
398
+ "Apply retry cycle template with appropriate parameters",
399
+ "Test with failure scenarios to ensure proper retry behavior",
400
+ "Monitor retry patterns in production"
401
+ ]
402
+
403
+ return CyclificationSuggestion(
404
+ opportunity=opportunity,
405
+ implementation_steps=implementation_steps,
406
+ code_example=code_example,
407
+ expected_outcome="Simplified retry logic with exponential backoff and better error handling",
408
+ risks=["May change timing of operations", "Retry behavior might differ from manual implementation"]
409
+ )
410
+
411
+ def _create_optimization_suggestion(self, opportunity: CyclificationOpportunity) -> CyclificationSuggestion:
412
+ """Create suggestion for optimization cycle conversion."""
413
+ nodes = opportunity.nodes
414
+ processor = nodes[0] if nodes else "processor"
415
+ evaluator = nodes[1] if len(nodes) > 1 else "evaluator"
416
+
417
+ code_example = f'''
418
+ # Before: Manual iterative improvement (fixed iterations, no early stopping)
419
+ # Complex logic to manage improvement loops
420
+
421
+ # After: Using optimization cycle template
422
+ cycle_id = workflow.add_optimization_cycle(
423
+ processor_node="{processor}",
424
+ evaluator_node="{evaluator}",
425
+ convergence="quality > 0.95",
426
+ max_iterations=100
427
+ )
428
+
429
+ print(f"Created optimization cycle: {{cycle_id}}")
430
+ '''
431
+
432
+ implementation_steps = [
433
+ f"Ensure '{processor}' generates/improves solutions",
434
+ f"Ensure '{evaluator}' produces quality metrics",
435
+ "Define appropriate convergence criteria",
436
+ "Apply optimization cycle template",
437
+ "Fine-tune convergence thresholds based on testing"
438
+ ]
439
+
440
+ return CyclificationSuggestion(
441
+ opportunity=opportunity,
442
+ implementation_steps=implementation_steps,
443
+ code_example=code_example,
444
+ expected_outcome="Automatic convergence detection with early stopping for better performance",
445
+ risks=["Convergence criteria may need tuning", "May require more iterations than fixed approach"]
446
+ )
447
+
448
+ def _create_data_quality_suggestion(self, opportunity: CyclificationOpportunity) -> CyclificationSuggestion:
449
+ """Create suggestion for data quality cycle conversion."""
450
+ nodes = opportunity.nodes
451
+ cleaner = nodes[0] if nodes else "cleaner"
452
+ validator = nodes[1] if len(nodes) > 1 else "validator"
453
+
454
+ code_example = f'''
455
+ # Before: Single-pass cleaning (may miss quality issues)
456
+ # Fixed cleaning pipeline without quality feedback
457
+
458
+ # After: Using data quality cycle template
459
+ cycle_id = workflow.add_data_quality_cycle(
460
+ cleaner_node="{cleaner}",
461
+ validator_node="{validator}",
462
+ quality_threshold=0.98,
463
+ max_iterations=5
464
+ )
465
+
466
+ print(f"Created data quality cycle: {{cycle_id}}")
467
+ '''
468
+
469
+ implementation_steps = [
470
+ f"Ensure '{cleaner}' can improve data quality iteratively",
471
+ f"Ensure '{validator}' produces numeric quality scores",
472
+ "Define appropriate quality threshold",
473
+ "Apply data quality cycle template",
474
+ "Monitor quality improvements over iterations"
475
+ ]
476
+
477
+ return CyclificationSuggestion(
478
+ opportunity=opportunity,
479
+ implementation_steps=implementation_steps,
480
+ code_example=code_example,
481
+ expected_outcome="Higher data quality through iterative improvement with automatic stopping",
482
+ risks=["May increase processing time", "Quality metrics need to be meaningful"]
483
+ )
484
+
485
+ def _create_batch_processing_suggestion(self, opportunity: CyclificationOpportunity) -> CyclificationSuggestion:
486
+ """Create suggestion for batch processing cycle conversion."""
487
+ node = opportunity.nodes[0] if opportunity.nodes else "processor"
488
+
489
+ code_example = f'''
490
+ # Before: Manual batch handling (complex state management)
491
+ # Custom logic for batch iteration and completion
492
+
493
+ # After: Using batch processing cycle template
494
+ cycle_id = workflow.add_batch_processing_cycle(
495
+ processor_node="{node}",
496
+ batch_size=100,
497
+ total_items=10000 # If known
498
+ )
499
+
500
+ print(f"Created batch processing cycle: {{cycle_id}}")
501
+ '''
502
+
503
+ implementation_steps = [
504
+ f"Modify '{node}' to process batches instead of full dataset",
505
+ "Determine appropriate batch size for memory constraints",
506
+ "Apply batch processing cycle template",
507
+ "Test with various dataset sizes",
508
+ "Monitor memory usage and processing time"
509
+ ]
510
+
511
+ return CyclificationSuggestion(
512
+ opportunity=opportunity,
513
+ implementation_steps=implementation_steps,
514
+ code_example=code_example,
515
+ expected_outcome="Memory-efficient processing of large datasets with automatic batch management",
516
+ risks=["Batch size may need tuning", "May change processing order/behavior"]
517
+ )
518
+
519
+ def _create_convergence_suggestion(self, opportunity: CyclificationOpportunity) -> CyclificationSuggestion:
520
+ """Create suggestion for convergence cycle conversion."""
521
+ node = opportunity.nodes[0] if opportunity.nodes else "processor"
522
+
523
+ code_example = f'''
524
+ # Before: Fixed iterations (may over/under-compute)
525
+ # Manual convergence checking
526
+
527
+ # After: Using convergence cycle template
528
+ cycle_id = workflow.add_convergence_cycle(
529
+ processor_node="{node}",
530
+ tolerance=0.001,
531
+ max_iterations=1000
532
+ )
533
+
534
+ print(f"Created convergence cycle: {{cycle_id}}")
535
+ '''
536
+
537
+ implementation_steps = [
538
+ f"Ensure '{node}' produces numeric values for convergence checking",
539
+ "Determine appropriate tolerance for convergence",
540
+ "Apply convergence cycle template",
541
+ "Test with various starting conditions",
542
+ "Validate convergence behavior"
543
+ ]
544
+
545
+ return CyclificationSuggestion(
546
+ opportunity=opportunity,
547
+ implementation_steps=implementation_steps,
548
+ code_example=code_example,
549
+ expected_outcome="Automatic convergence detection with optimal iteration count",
550
+ risks=["Tolerance may need adjustment", "Convergence behavior may differ from fixed iterations"]
551
+ )
552
+
553
+ def _create_generic_suggestion(self, opportunity: CyclificationOpportunity) -> CyclificationSuggestion:
554
+ """Create generic suggestion for unknown pattern types."""
555
+ return CyclificationSuggestion(
556
+ opportunity=opportunity,
557
+ implementation_steps=["Analyze pattern manually", "Choose appropriate cycle template"],
558
+ code_example="# Manual analysis required",
559
+ expected_outcome="Pattern-specific benefits",
560
+ risks=["Requires manual analysis"]
561
+ )
562
+
563
+ def convert_to_cycle(
564
+ self,
565
+ nodes: List[str],
566
+ convergence_strategy: str = "error_reduction",
567
+ cycle_type: Optional[str] = None,
568
+ **kwargs
569
+ ) -> str:
570
+ """
571
+ Convert specific nodes to a cycle using the specified strategy.
572
+
573
+ Args:
574
+ nodes: List of node IDs to include in the cycle
575
+ convergence_strategy: Strategy for convergence ("error_reduction", "quality_improvement", etc.)
576
+ cycle_type: Specific cycle type to use, or auto-detect if None
577
+ **kwargs: Additional parameters for cycle creation
578
+
579
+ Returns:
580
+ str: The created cycle identifier
581
+
582
+ Example:
583
+ >>> converter = DAGToCycleConverter(workflow)
584
+ >>> cycle_id = converter.convert_to_cycle(
585
+ ... nodes=["processor", "evaluator"],
586
+ ... convergence_strategy="quality_improvement",
587
+ ... max_iterations=50
588
+ ... )
589
+ """
590
+ if cycle_type is None:
591
+ cycle_type = self._detect_cycle_type(nodes, convergence_strategy)
592
+
593
+ if cycle_type == "optimization":
594
+ return self._convert_to_optimization_cycle(nodes, **kwargs)
595
+ elif cycle_type == "retry":
596
+ return self._convert_to_retry_cycle(nodes, **kwargs)
597
+ elif cycle_type == "data_quality":
598
+ return self._convert_to_data_quality_cycle(nodes, **kwargs)
599
+ elif cycle_type == "batch_processing":
600
+ return self._convert_to_batch_processing_cycle(nodes, **kwargs)
601
+ elif cycle_type == "convergence":
602
+ return self._convert_to_convergence_cycle(nodes, **kwargs)
603
+ else:
604
+ raise ValueError(f"Unknown cycle type: {cycle_type}")
605
+
606
+ def _detect_cycle_type(self, nodes: List[str], strategy: str) -> str:
607
+ """Detect the most appropriate cycle type for given nodes and strategy."""
608
+ if strategy == "error_reduction" or strategy == "quality_improvement":
609
+ return "optimization"
610
+ elif strategy == "retry_logic":
611
+ return "retry"
612
+ elif strategy == "data_cleaning":
613
+ return "data_quality"
614
+ elif strategy == "batch_processing":
615
+ return "batch_processing"
616
+ elif strategy == "numerical_convergence":
617
+ return "convergence"
618
+ else:
619
+ # Default to optimization for unknown strategies
620
+ return "optimization"
621
+
622
+ def _convert_to_optimization_cycle(self, nodes: List[str], **kwargs) -> str:
623
+ """Convert nodes to optimization cycle."""
624
+ if len(nodes) < 2:
625
+ raise ValueError("Optimization cycle requires at least 2 nodes")
626
+
627
+ return CycleTemplates.optimization_cycle(
628
+ self.workflow,
629
+ processor_node=nodes[0],
630
+ evaluator_node=nodes[1],
631
+ **kwargs
632
+ )
633
+
634
+ def _convert_to_retry_cycle(self, nodes: List[str], **kwargs) -> str:
635
+ """Convert nodes to retry cycle."""
636
+ if len(nodes) < 1:
637
+ raise ValueError("Retry cycle requires at least 1 node")
638
+
639
+ return CycleTemplates.retry_cycle(
640
+ self.workflow,
641
+ target_node=nodes[0],
642
+ **kwargs
643
+ )
644
+
645
+ def _convert_to_data_quality_cycle(self, nodes: List[str], **kwargs) -> str:
646
+ """Convert nodes to data quality cycle."""
647
+ if len(nodes) < 2:
648
+ raise ValueError("Data quality cycle requires at least 2 nodes")
649
+
650
+ return CycleTemplates.data_quality_cycle(
651
+ self.workflow,
652
+ cleaner_node=nodes[0],
653
+ validator_node=nodes[1],
654
+ **kwargs
655
+ )
656
+
657
+ def _convert_to_batch_processing_cycle(self, nodes: List[str], **kwargs) -> str:
658
+ """Convert nodes to batch processing cycle."""
659
+ if len(nodes) < 1:
660
+ raise ValueError("Batch processing cycle requires at least 1 node")
661
+
662
+ return CycleTemplates.batch_processing_cycle(
663
+ self.workflow,
664
+ processor_node=nodes[0],
665
+ **kwargs
666
+ )
667
+
668
+ def _convert_to_convergence_cycle(self, nodes: List[str], **kwargs) -> str:
669
+ """Convert nodes to convergence cycle."""
670
+ if len(nodes) < 1:
671
+ raise ValueError("Convergence cycle requires at least 1 node")
672
+
673
+ return CycleTemplates.convergence_cycle(
674
+ self.workflow,
675
+ processor_node=nodes[0],
676
+ **kwargs
677
+ )
678
+
679
+ def generate_migration_report(self) -> Dict[str, Any]:
680
+ """
681
+ Generate comprehensive migration report with analysis and recommendations.
682
+
683
+ Returns:
684
+ Dict containing migration analysis and recommendations
685
+
686
+ Example:
687
+ >>> converter = DAGToCycleConverter(workflow)
688
+ >>> converter.analyze_cyclification_opportunities()
689
+ >>> report = converter.generate_migration_report()
690
+ >>> print(report['summary']['total_opportunities'])
691
+ """
692
+ opportunities = self.analyze_cyclification_opportunities()
693
+ suggestions = self.generate_detailed_suggestions()
694
+
695
+ # Categorize by pattern type
696
+ by_pattern = defaultdict(list)
697
+ for opp in opportunities:
698
+ by_pattern[opp.pattern_type].append(opp)
699
+
700
+ # Calculate potential benefits
701
+ high_confidence = [opp for opp in opportunities if opp.confidence >= 0.7]
702
+ medium_confidence = [opp for opp in opportunities if 0.4 <= opp.confidence < 0.7]
703
+ low_confidence = [opp for opp in opportunities if opp.confidence < 0.4]
704
+
705
+ return {
706
+ 'summary': {
707
+ 'total_opportunities': len(opportunities),
708
+ 'high_confidence': len(high_confidence),
709
+ 'medium_confidence': len(medium_confidence),
710
+ 'low_confidence': len(low_confidence),
711
+ 'pattern_distribution': {k: len(v) for k, v in by_pattern.items()}
712
+ },
713
+ 'opportunities': opportunities,
714
+ 'detailed_suggestions': suggestions,
715
+ 'recommendations': self._generate_migration_recommendations(opportunities),
716
+ 'implementation_order': self._suggest_implementation_order(opportunities)
717
+ }
718
+
719
+ def _generate_migration_recommendations(self, opportunities: List[CyclificationOpportunity]) -> List[str]:
720
+ """Generate high-level recommendations for migration."""
721
+ recommendations = []
722
+
723
+ high_confidence = [opp for opp in opportunities if opp.confidence >= 0.7]
724
+ if high_confidence:
725
+ recommendations.append(
726
+ f"Start with {len(high_confidence)} high-confidence opportunities for immediate benefits"
727
+ )
728
+
729
+ pattern_counts = defaultdict(int)
730
+ for opp in opportunities:
731
+ pattern_counts[opp.pattern_type] += 1
732
+
733
+ most_common = max(pattern_counts.items(), key=lambda x: x[1]) if pattern_counts else None
734
+ if most_common:
735
+ recommendations.append(
736
+ f"Focus on {most_common[0]} patterns ({most_common[1]} opportunities) for consistency"
737
+ )
738
+
739
+ low_complexity = [opp for opp in opportunities if opp.implementation_complexity == "low"]
740
+ if low_complexity:
741
+ recommendations.append(
742
+ f"Begin with {len(low_complexity)} low-complexity conversions to build confidence"
743
+ )
744
+
745
+ return recommendations
746
+
747
+ def _suggest_implementation_order(self, opportunities: List[CyclificationOpportunity]) -> List[Dict[str, Any]]:
748
+ """Suggest order for implementing cyclification opportunities."""
749
+ # Sort by: confidence desc, complexity asc (low=1, medium=2, high=3)
750
+ complexity_score = {"low": 1, "medium": 2, "high": 3}
751
+
752
+ def sort_key(opp):
753
+ return (-opp.confidence, complexity_score.get(opp.implementation_complexity, 2))
754
+
755
+ sorted_opportunities = sorted(opportunities, key=sort_key)
756
+
757
+ implementation_order = []
758
+ for i, opp in enumerate(sorted_opportunities, 1):
759
+ implementation_order.append({
760
+ 'priority': i,
761
+ 'pattern_type': opp.pattern_type,
762
+ 'nodes': opp.nodes,
763
+ 'confidence': opp.confidence,
764
+ 'complexity': opp.implementation_complexity,
765
+ 'justification': f"Priority {i}: {opp.description}"
766
+ })
767
+
768
+ return implementation_order