kailash 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. kailash/__init__.py +1 -1
  2. kailash/access_control.py +740 -0
  3. kailash/api/__main__.py +6 -0
  4. kailash/api/auth.py +668 -0
  5. kailash/api/custom_nodes.py +285 -0
  6. kailash/api/custom_nodes_secure.py +377 -0
  7. kailash/api/database.py +620 -0
  8. kailash/api/studio.py +915 -0
  9. kailash/api/studio_secure.py +893 -0
  10. kailash/mcp/__init__.py +53 -0
  11. kailash/mcp/__main__.py +13 -0
  12. kailash/mcp/ai_registry_server.py +712 -0
  13. kailash/mcp/client.py +447 -0
  14. kailash/mcp/client_new.py +334 -0
  15. kailash/mcp/server.py +293 -0
  16. kailash/mcp/server_new.py +336 -0
  17. kailash/mcp/servers/__init__.py +12 -0
  18. kailash/mcp/servers/ai_registry.py +289 -0
  19. kailash/nodes/__init__.py +4 -2
  20. kailash/nodes/ai/__init__.py +2 -0
  21. kailash/nodes/ai/a2a.py +714 -67
  22. kailash/nodes/ai/intelligent_agent_orchestrator.py +31 -37
  23. kailash/nodes/ai/iterative_llm_agent.py +1280 -0
  24. kailash/nodes/ai/llm_agent.py +324 -1
  25. kailash/nodes/ai/self_organizing.py +5 -6
  26. kailash/nodes/base.py +15 -2
  27. kailash/nodes/base_async.py +45 -0
  28. kailash/nodes/base_cycle_aware.py +374 -0
  29. kailash/nodes/base_with_acl.py +338 -0
  30. kailash/nodes/code/python.py +135 -27
  31. kailash/nodes/data/__init__.py +1 -2
  32. kailash/nodes/data/readers.py +16 -6
  33. kailash/nodes/data/sql.py +699 -256
  34. kailash/nodes/data/writers.py +16 -6
  35. kailash/nodes/logic/__init__.py +8 -0
  36. kailash/nodes/logic/convergence.py +642 -0
  37. kailash/nodes/logic/loop.py +153 -0
  38. kailash/nodes/logic/operations.py +187 -27
  39. kailash/nodes/mixins/__init__.py +11 -0
  40. kailash/nodes/mixins/mcp.py +228 -0
  41. kailash/nodes/mixins.py +387 -0
  42. kailash/runtime/__init__.py +2 -1
  43. kailash/runtime/access_controlled.py +458 -0
  44. kailash/runtime/local.py +106 -33
  45. kailash/runtime/parallel_cyclic.py +529 -0
  46. kailash/sdk_exceptions.py +90 -5
  47. kailash/security.py +845 -0
  48. kailash/tracking/manager.py +38 -15
  49. kailash/tracking/models.py +1 -1
  50. kailash/tracking/storage/filesystem.py +30 -2
  51. kailash/utils/__init__.py +8 -0
  52. kailash/workflow/__init__.py +18 -0
  53. kailash/workflow/convergence.py +270 -0
  54. kailash/workflow/cycle_analyzer.py +889 -0
  55. kailash/workflow/cycle_builder.py +579 -0
  56. kailash/workflow/cycle_config.py +725 -0
  57. kailash/workflow/cycle_debugger.py +860 -0
  58. kailash/workflow/cycle_exceptions.py +615 -0
  59. kailash/workflow/cycle_profiler.py +741 -0
  60. kailash/workflow/cycle_state.py +338 -0
  61. kailash/workflow/cyclic_runner.py +985 -0
  62. kailash/workflow/graph.py +500 -39
  63. kailash/workflow/migration.py +809 -0
  64. kailash/workflow/safety.py +365 -0
  65. kailash/workflow/templates.py +763 -0
  66. kailash/workflow/validation.py +751 -0
  67. {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/METADATA +259 -12
  68. kailash-0.2.1.dist-info/RECORD +125 -0
  69. kailash/nodes/mcp/__init__.py +0 -11
  70. kailash/nodes/mcp/client.py +0 -554
  71. kailash/nodes/mcp/resource.py +0 -682
  72. kailash/nodes/mcp/server.py +0 -577
  73. kailash-0.1.5.dist-info/RECORD +0 -88
  74. {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/WHEEL +0 -0
  75. {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/entry_points.txt +0 -0
  76. {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/licenses/LICENSE +0 -0
  77. {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,809 @@
1
+ """
2
+ Intelligent Migration System for DAG to Cyclic Workflow Conversion.
3
+
4
+ This module provides comprehensive tools to analyze existing DAG workflows
5
+ and intelligently suggest or automatically convert them to use cyclic patterns
6
+ where appropriate. It identifies optimization opportunities, provides detailed
7
+ implementation guidance, and automates the conversion process.
8
+
9
+ Design Philosophy:
10
+ Provides intelligent analysis of existing workflows to identify patterns
11
+ that would benefit from cyclification, offering both automated conversion
12
+ and detailed guidance for manual implementation. Focuses on preserving
13
+ workflow semantics while optimizing for performance and maintainability.
14
+
15
+ Key Features:
16
+ - Pattern recognition for cyclification opportunities
17
+ - Confidence scoring for conversion recommendations
18
+ - Automated conversion with safety validation
19
+ - Detailed implementation guidance with code examples
20
+ - Risk assessment and migration planning
21
+ - Template-based conversion for common patterns
22
+
23
+ Analysis Capabilities:
24
+ - Retry pattern detection in manual implementations
25
+ - Iterative improvement pattern identification
26
+ - Data validation and cleaning pattern recognition
27
+ - Batch processing pattern analysis
28
+ - Numerical convergence pattern detection
29
+ - Performance anti-pattern identification
30
+
31
+ Core Components:
32
+ - CyclificationOpportunity: Identified conversion opportunity
33
+ - CyclificationSuggestion: Detailed implementation guidance
34
+ - DAGToCycleConverter: Main analysis and conversion engine
35
+ - Pattern detection algorithms for common workflows
36
+
37
+ Conversion Strategy:
38
+ - Non-destructive analysis preserving original workflows
39
+ - Confidence-based prioritization of opportunities
40
+ - Template-based conversion for reliability
41
+ - Comprehensive validation of converted workflows
42
+ - Rollback capabilities for failed conversions
43
+
44
+ Upstream Dependencies:
45
+ - Existing workflow structures and node implementations
46
+ - CycleTemplates for automated conversion patterns
47
+ - Workflow validation and safety systems
48
+
49
+ Downstream Consumers:
50
+ - Workflow development tools and IDEs
51
+ - Automated workflow optimization systems
52
+ - Migration planning and execution tools
53
+ - Performance optimization recommendations
54
+ - Educational and training systems
55
+
56
+ Examples:
57
+ Analyze workflow for opportunities:
58
+
59
+ >>> from kailash.workflow.migration import DAGToCycleConverter
60
+ >>> converter = DAGToCycleConverter(existing_workflow)
61
+ >>> opportunities = converter.analyze_cyclification_opportunities()
62
+ >>> for opp in opportunities:
63
+ ... print(f"Found {opp.pattern_type}: {opp.description}")
64
+ ... print(f"Confidence: {opp.confidence:.2f}")
65
+ ... print(f"Expected benefit: {opp.estimated_benefit}")
66
+
67
+ Generate detailed migration guidance:
68
+
69
+ >>> suggestions = converter.generate_detailed_suggestions()
70
+ >>> for suggestion in suggestions:
71
+ ... print(f"Found {suggestion.opportunity.pattern_type}")
72
+ ... print(f"Implementation steps:")
73
+ ... for step in suggestion.implementation_steps:
74
+ ... print(f" {step}")
75
+ ... print(f"Code example: {suggestion.code_example}")
76
+ ... print(f"Expected outcome: {suggestion.expected_outcome}")
77
+
78
+ Automated conversion:
79
+
80
+ >>> # Convert specific nodes to cycle
81
+ >>> cycle_id = converter.convert_to_cycle(
82
+ ... nodes=["processor", "evaluator"],
83
+ ... convergence_strategy="quality_improvement",
84
+ ... max_iterations=50
85
+ ... )
86
+ >>> print(f"Created cycle: {cycle_id}")
87
+
88
+ Comprehensive migration report:
89
+
90
+ >>> report = converter.generate_migration_report()
91
+ >>> print(f"Total opportunities: {report['summary']['total_opportunities']}")
92
+ >>> print(f"High confidence: {report['summary']['high_confidence']}")
93
+ >>> # Implementation priority order
94
+ >>> for item in report['implementation_order']:
95
+ ... print(f"{item['priority']}: {item['justification']}")
96
+
97
+ See Also:
98
+ - :mod:`kailash.workflow.templates` for conversion patterns
99
+ - :mod:`kailash.workflow.validation` for workflow analysis
100
+ - :doc:`/guides/migration` for migration best practices
101
+ """
102
+
103
+ import re
104
+ from collections import defaultdict
105
+ from dataclasses import dataclass
106
+ from typing import Any, Dict, List, Optional
107
+
108
+ from . import Workflow
109
+ from .templates import CycleTemplates
110
+
111
+
112
+ @dataclass
113
+ class CyclificationOpportunity:
114
+ """Represents an opportunity to convert a DAG pattern to a cycle."""
115
+
116
+ nodes: List[str]
117
+ pattern_type: str
118
+ confidence: float
119
+ description: str
120
+ suggested_convergence: Optional[str] = None
121
+ estimated_benefit: str = "unknown"
122
+ implementation_complexity: str = "medium"
123
+
124
+
125
+ @dataclass
126
+ class CyclificationSuggestion:
127
+ """Detailed suggestion for converting nodes to a cycle."""
128
+
129
+ opportunity: CyclificationOpportunity
130
+ implementation_steps: List[str]
131
+ code_example: str
132
+ expected_outcome: str
133
+ risks: List[str]
134
+
135
+
136
+ class DAGToCycleConverter:
137
+ """
138
+ Analyzer and converter for transforming DAG workflows into cyclic workflows.
139
+
140
+ This class helps identify patterns in existing workflows that could benefit
141
+ from cyclic execution and provides tools to convert them.
142
+ """
143
+
144
+ def __init__(self, workflow: Workflow):
145
+ """
146
+ Initialize converter with target workflow.
147
+
148
+ Args:
149
+ workflow: The workflow to analyze and potentially convert
150
+ """
151
+ self.workflow = workflow
152
+ self.graph = workflow.graph
153
+ self.opportunities: List[CyclificationOpportunity] = []
154
+
155
+ def analyze_cyclification_opportunities(self) -> List[CyclificationOpportunity]:
156
+ """
157
+ Analyze workflow for patterns that could benefit from cyclification.
158
+
159
+ Returns:
160
+ List of identified cyclification opportunities
161
+
162
+ Example:
163
+ >>> workflow = create_example_workflow()
164
+ >>> converter = DAGToCycleConverter(workflow)
165
+ >>> opportunities = converter.analyze_cyclification_opportunities()
166
+ >>> for opp in opportunities:
167
+ ... print(f"{opp.pattern_type}: {opp.description}")
168
+ """
169
+ self.opportunities = []
170
+
171
+ # Analyze different patterns
172
+ self._detect_retry_patterns()
173
+ self._detect_iterative_improvement_patterns()
174
+ self._detect_validation_patterns()
175
+ self._detect_batch_processing_patterns()
176
+ self._detect_convergence_patterns()
177
+
178
+ # Sort by confidence and potential benefit
179
+ self.opportunities.sort(key=lambda x: x.confidence, reverse=True)
180
+
181
+ return self.opportunities
182
+
183
+ def _detect_retry_patterns(self):
184
+ """Detect patterns that look like manual retry logic."""
185
+ nodes = self.workflow.nodes
186
+
187
+ # Look for nodes with similar names suggesting retry logic
188
+ retry_patterns = [
189
+ r".*[_\-]retry[_\-]?.*",
190
+ r".*[_\-]attempt[_\-]?[0-9]*",
191
+ r".*[_\-]backup[_\-]?.*",
192
+ r".*[_\-]fallback[_\-]?.*",
193
+ r".*[_\-]redundant[_\-]?.*",
194
+ r".*[_\-]failover[_\-]?.*",
195
+ ]
196
+
197
+ for node_id, node in nodes.items():
198
+ for pattern in retry_patterns:
199
+ if re.match(pattern, node_id, re.IGNORECASE):
200
+ # Found potential retry pattern
201
+ related_nodes = self._find_related_nodes(node_id)
202
+
203
+ opportunity = CyclificationOpportunity(
204
+ nodes=[node_id] + related_nodes,
205
+ pattern_type="retry_cycle",
206
+ confidence=0.7,
207
+ description=f"Node '{node_id}' appears to implement retry logic manually",
208
+ suggested_convergence="success == True",
209
+ estimated_benefit="improved_reliability",
210
+ implementation_complexity="low",
211
+ )
212
+ self.opportunities.append(opportunity)
213
+
214
+ def _detect_iterative_improvement_patterns(self):
215
+ """Detect patterns that perform iterative improvement."""
216
+ nodes = self.workflow.nodes
217
+
218
+ # Look for processor-evaluator pairs
219
+ improvement_keywords = ["process", "improve", "optimize", "refine", "enhance"]
220
+ evaluation_keywords = ["evaluate", "assess", "validate", "check", "score"]
221
+
222
+ processors = []
223
+ evaluators = []
224
+
225
+ for node_id in nodes:
226
+ node_id_lower = node_id.lower()
227
+ if any(keyword in node_id_lower for keyword in improvement_keywords):
228
+ processors.append(node_id)
229
+ if any(keyword in node_id_lower for keyword in evaluation_keywords):
230
+ evaluators.append(node_id)
231
+
232
+ # Look for processor-evaluator pairs that are connected
233
+ for processor in processors:
234
+ for evaluator in evaluators:
235
+ if self._are_connected(processor, evaluator):
236
+ opportunity = CyclificationOpportunity(
237
+ nodes=[processor, evaluator],
238
+ pattern_type="optimization_cycle",
239
+ confidence=0.8,
240
+ description=f"'{processor}' and '{evaluator}' form iterative improvement pattern",
241
+ suggested_convergence="quality > 0.9",
242
+ estimated_benefit="automatic_convergence",
243
+ implementation_complexity="medium",
244
+ )
245
+ self.opportunities.append(opportunity)
246
+
247
+ def _detect_validation_patterns(self):
248
+ """Detect data validation and cleaning patterns."""
249
+ nodes = self.workflow.nodes
250
+
251
+ cleaning_keywords = ["clean", "sanitize", "normalize", "transform"]
252
+ validation_keywords = ["validate", "verify", "check", "audit"]
253
+
254
+ cleaners = []
255
+ validators = []
256
+
257
+ for node_id in nodes:
258
+ node_id_lower = node_id.lower()
259
+ if any(keyword in node_id_lower for keyword in cleaning_keywords):
260
+ cleaners.append(node_id)
261
+ if any(keyword in node_id_lower for keyword in validation_keywords):
262
+ validators.append(node_id)
263
+
264
+ # Look for cleaner-validator pairs
265
+ for cleaner in cleaners:
266
+ for validator in validators:
267
+ if self._are_connected(cleaner, validator):
268
+ opportunity = CyclificationOpportunity(
269
+ nodes=[cleaner, validator],
270
+ pattern_type="data_quality_cycle",
271
+ confidence=0.75,
272
+ description=f"'{cleaner}' and '{validator}' form data quality improvement pattern",
273
+ suggested_convergence="quality_score >= 0.95",
274
+ estimated_benefit="improved_data_quality",
275
+ implementation_complexity="low",
276
+ )
277
+ self.opportunities.append(opportunity)
278
+
279
+ def _detect_batch_processing_patterns(self):
280
+ """Detect patterns that process data in chunks."""
281
+ nodes = self.workflow.nodes
282
+
283
+ batch_keywords = ["batch", "chunk", "segment", "partition", "split"]
284
+
285
+ for node_id in nodes:
286
+ node_id_lower = node_id.lower()
287
+ if any(keyword in node_id_lower for keyword in batch_keywords):
288
+ opportunity = CyclificationOpportunity(
289
+ nodes=[node_id],
290
+ pattern_type="batch_processing_cycle",
291
+ confidence=0.6,
292
+ description=f"'{node_id}' appears to process data in batches",
293
+ suggested_convergence="all_batches_processed == True",
294
+ estimated_benefit="memory_efficiency",
295
+ implementation_complexity="medium",
296
+ )
297
+ self.opportunities.append(opportunity)
298
+
299
+ def _detect_convergence_patterns(self):
300
+ """Detect numerical convergence patterns."""
301
+ nodes = self.workflow.nodes
302
+
303
+ convergence_keywords = [
304
+ "converge",
305
+ "iterate",
306
+ "approximate",
307
+ "solve",
308
+ "calculate",
309
+ ]
310
+
311
+ for node_id in nodes:
312
+ node_id_lower = node_id.lower()
313
+ if any(keyword in node_id_lower for keyword in convergence_keywords):
314
+ opportunity = CyclificationOpportunity(
315
+ nodes=[node_id],
316
+ pattern_type="convergence_cycle",
317
+ confidence=0.5,
318
+ description=f"'{node_id}' may perform iterative calculations",
319
+ suggested_convergence="difference < 0.001",
320
+ estimated_benefit="numerical_stability",
321
+ implementation_complexity="high",
322
+ )
323
+ self.opportunities.append(opportunity)
324
+
325
+ def _find_related_nodes(self, node_id: str) -> List[str]:
326
+ """Find nodes that are closely related to the given node."""
327
+ related = []
328
+
329
+ # Find direct connections from NetworkX graph
330
+ graph = self.workflow.graph
331
+
332
+ # Find predecessors and successors
333
+ if node_id in graph:
334
+ related.extend(graph.predecessors(node_id))
335
+ related.extend(graph.successors(node_id))
336
+
337
+ return list(set(related))
338
+
339
+ def _are_connected(self, node1: str, node2: str) -> bool:
340
+ """Check if two nodes are directly connected."""
341
+ graph = self.workflow.graph
342
+
343
+ # Check if there's an edge between the nodes in either direction
344
+ return graph.has_edge(node1, node2) or graph.has_edge(node2, node1)
345
+
346
+ def generate_detailed_suggestions(self) -> List[CyclificationSuggestion]:
347
+ """
348
+ Generate detailed suggestions with implementation guidance.
349
+
350
+ Returns:
351
+ List of detailed suggestions for cyclification
352
+
353
+ Example:
354
+ >>> converter = DAGToCycleConverter(workflow)
355
+ >>> converter.analyze_cyclification_opportunities()
356
+ >>> suggestions = converter.generate_detailed_suggestions()
357
+ >>> for suggestion in suggestions:
358
+ ... print(suggestion.code_example)
359
+ """
360
+ suggestions = []
361
+
362
+ for opportunity in self.opportunities:
363
+ suggestion = self._create_detailed_suggestion(opportunity)
364
+ suggestions.append(suggestion)
365
+
366
+ return suggestions
367
+
368
+ def _create_detailed_suggestion(
369
+ self, opportunity: CyclificationOpportunity
370
+ ) -> CyclificationSuggestion:
371
+ """Create detailed implementation suggestion for an opportunity."""
372
+
373
+ if opportunity.pattern_type == "retry_cycle":
374
+ return self._create_retry_suggestion(opportunity)
375
+ elif opportunity.pattern_type == "optimization_cycle":
376
+ return self._create_optimization_suggestion(opportunity)
377
+ elif opportunity.pattern_type == "data_quality_cycle":
378
+ return self._create_data_quality_suggestion(opportunity)
379
+ elif opportunity.pattern_type == "batch_processing_cycle":
380
+ return self._create_batch_processing_suggestion(opportunity)
381
+ elif opportunity.pattern_type == "convergence_cycle":
382
+ return self._create_convergence_suggestion(opportunity)
383
+ else:
384
+ return self._create_generic_suggestion(opportunity)
385
+
386
+ def _create_retry_suggestion(
387
+ self, opportunity: CyclificationOpportunity
388
+ ) -> CyclificationSuggestion:
389
+ """Create suggestion for retry cycle conversion."""
390
+ main_node = opportunity.nodes[0]
391
+
392
+ code_example = f"""
393
+ # Before: Manual retry logic (complex, error-prone)
394
+ # Multiple nodes handling retries manually
395
+
396
+ # After: Using retry cycle template
397
+ cycle_id = workflow.add_retry_cycle(
398
+ target_node="{main_node}",
399
+ max_retries=3,
400
+ backoff_strategy="exponential",
401
+ success_condition="success == True"
402
+ )
403
+
404
+ print(f"Created retry cycle: {{cycle_id}}")
405
+ """
406
+
407
+ implementation_steps = [
408
+ f"Identify the main node that needs retry logic: '{main_node}'",
409
+ "Remove manual retry handling from existing nodes",
410
+ "Apply retry cycle template with appropriate parameters",
411
+ "Test with failure scenarios to ensure proper retry behavior",
412
+ "Monitor retry patterns in production",
413
+ ]
414
+
415
+ return CyclificationSuggestion(
416
+ opportunity=opportunity,
417
+ implementation_steps=implementation_steps,
418
+ code_example=code_example,
419
+ expected_outcome="Simplified retry logic with exponential backoff and better error handling",
420
+ risks=[
421
+ "May change timing of operations",
422
+ "Retry behavior might differ from manual implementation",
423
+ ],
424
+ )
425
+
426
+ def _create_optimization_suggestion(
427
+ self, opportunity: CyclificationOpportunity
428
+ ) -> CyclificationSuggestion:
429
+ """Create suggestion for optimization cycle conversion."""
430
+ nodes = opportunity.nodes
431
+ processor = nodes[0] if nodes else "processor"
432
+ evaluator = nodes[1] if len(nodes) > 1 else "evaluator"
433
+
434
+ code_example = f"""
435
+ # Before: Manual iterative improvement (fixed iterations, no early stopping)
436
+ # Complex logic to manage improvement loops
437
+
438
+ # After: Using optimization cycle template
439
+ cycle_id = workflow.add_optimization_cycle(
440
+ processor_node="{processor}",
441
+ evaluator_node="{evaluator}",
442
+ convergence="quality > 0.95",
443
+ max_iterations=100
444
+ )
445
+
446
+ print(f"Created optimization cycle: {{cycle_id}}")
447
+ """
448
+
449
+ implementation_steps = [
450
+ f"Ensure '{processor}' generates/improves solutions",
451
+ f"Ensure '{evaluator}' produces quality metrics",
452
+ "Define appropriate convergence criteria",
453
+ "Apply optimization cycle template",
454
+ "Fine-tune convergence thresholds based on testing",
455
+ ]
456
+
457
+ return CyclificationSuggestion(
458
+ opportunity=opportunity,
459
+ implementation_steps=implementation_steps,
460
+ code_example=code_example,
461
+ expected_outcome="Automatic convergence detection with early stopping for better performance",
462
+ risks=[
463
+ "Convergence criteria may need tuning",
464
+ "May require more iterations than fixed approach",
465
+ ],
466
+ )
467
+
468
+ def _create_data_quality_suggestion(
469
+ self, opportunity: CyclificationOpportunity
470
+ ) -> CyclificationSuggestion:
471
+ """Create suggestion for data quality cycle conversion."""
472
+ nodes = opportunity.nodes
473
+ cleaner = nodes[0] if nodes else "cleaner"
474
+ validator = nodes[1] if len(nodes) > 1 else "validator"
475
+
476
+ code_example = f"""
477
+ # Before: Single-pass cleaning (may miss quality issues)
478
+ # Fixed cleaning pipeline without quality feedback
479
+
480
+ # After: Using data quality cycle template
481
+ cycle_id = workflow.add_data_quality_cycle(
482
+ cleaner_node="{cleaner}",
483
+ validator_node="{validator}",
484
+ quality_threshold=0.98,
485
+ max_iterations=5
486
+ )
487
+
488
+ print(f"Created data quality cycle: {{cycle_id}}")
489
+ """
490
+
491
+ implementation_steps = [
492
+ f"Ensure '{cleaner}' can improve data quality iteratively",
493
+ f"Ensure '{validator}' produces numeric quality scores",
494
+ "Define appropriate quality threshold",
495
+ "Apply data quality cycle template",
496
+ "Monitor quality improvements over iterations",
497
+ ]
498
+
499
+ return CyclificationSuggestion(
500
+ opportunity=opportunity,
501
+ implementation_steps=implementation_steps,
502
+ code_example=code_example,
503
+ expected_outcome="Higher data quality through iterative improvement with automatic stopping",
504
+ risks=[
505
+ "May increase processing time",
506
+ "Quality metrics need to be meaningful",
507
+ ],
508
+ )
509
+
510
+ def _create_batch_processing_suggestion(
511
+ self, opportunity: CyclificationOpportunity
512
+ ) -> CyclificationSuggestion:
513
+ """Create suggestion for batch processing cycle conversion."""
514
+ node = opportunity.nodes[0] if opportunity.nodes else "processor"
515
+
516
+ code_example = f"""
517
+ # Before: Manual batch handling (complex state management)
518
+ # Custom logic for batch iteration and completion
519
+
520
+ # After: Using batch processing cycle template
521
+ cycle_id = workflow.add_batch_processing_cycle(
522
+ processor_node="{node}",
523
+ batch_size=100,
524
+ total_items=10000 # If known
525
+ )
526
+
527
+ print(f"Created batch processing cycle: {{cycle_id}}")
528
+ """
529
+
530
+ implementation_steps = [
531
+ f"Modify '{node}' to process batches instead of full dataset",
532
+ "Determine appropriate batch size for memory constraints",
533
+ "Apply batch processing cycle template",
534
+ "Test with various dataset sizes",
535
+ "Monitor memory usage and processing time",
536
+ ]
537
+
538
+ return CyclificationSuggestion(
539
+ opportunity=opportunity,
540
+ implementation_steps=implementation_steps,
541
+ code_example=code_example,
542
+ expected_outcome="Memory-efficient processing of large datasets with automatic batch management",
543
+ risks=[
544
+ "Batch size may need tuning",
545
+ "May change processing order/behavior",
546
+ ],
547
+ )
548
+
549
+ def _create_convergence_suggestion(
550
+ self, opportunity: CyclificationOpportunity
551
+ ) -> CyclificationSuggestion:
552
+ """Create suggestion for convergence cycle conversion."""
553
+ node = opportunity.nodes[0] if opportunity.nodes else "processor"
554
+
555
+ code_example = f"""
556
+ # Before: Fixed iterations (may over/under-compute)
557
+ # Manual convergence checking
558
+
559
+ # After: Using convergence cycle template
560
+ cycle_id = workflow.add_convergence_cycle(
561
+ processor_node="{node}",
562
+ tolerance=0.001,
563
+ max_iterations=1000
564
+ )
565
+
566
+ print(f"Created convergence cycle: {{cycle_id}}")
567
+ """
568
+
569
+ implementation_steps = [
570
+ f"Ensure '{node}' produces numeric values for convergence checking",
571
+ "Determine appropriate tolerance for convergence",
572
+ "Apply convergence cycle template",
573
+ "Test with various starting conditions",
574
+ "Validate convergence behavior",
575
+ ]
576
+
577
+ return CyclificationSuggestion(
578
+ opportunity=opportunity,
579
+ implementation_steps=implementation_steps,
580
+ code_example=code_example,
581
+ expected_outcome="Automatic convergence detection with optimal iteration count",
582
+ risks=[
583
+ "Tolerance may need adjustment",
584
+ "Convergence behavior may differ from fixed iterations",
585
+ ],
586
+ )
587
+
588
+ def _create_generic_suggestion(
589
+ self, opportunity: CyclificationOpportunity
590
+ ) -> CyclificationSuggestion:
591
+ """Create generic suggestion for unknown pattern types."""
592
+ return CyclificationSuggestion(
593
+ opportunity=opportunity,
594
+ implementation_steps=[
595
+ "Analyze pattern manually",
596
+ "Choose appropriate cycle template",
597
+ ],
598
+ code_example="# Manual analysis required",
599
+ expected_outcome="Pattern-specific benefits",
600
+ risks=["Requires manual analysis"],
601
+ )
602
+
603
+ def convert_to_cycle(
604
+ self,
605
+ nodes: List[str],
606
+ convergence_strategy: str = "error_reduction",
607
+ cycle_type: Optional[str] = None,
608
+ **kwargs,
609
+ ) -> str:
610
+ """
611
+ Convert specific nodes to a cycle using the specified strategy.
612
+
613
+ Args:
614
+ nodes: List of node IDs to include in the cycle
615
+ convergence_strategy: Strategy for convergence ("error_reduction", "quality_improvement", etc.)
616
+ cycle_type: Specific cycle type to use, or auto-detect if None
617
+ **kwargs: Additional parameters for cycle creation
618
+
619
+ Returns:
620
+ str: The created cycle identifier
621
+
622
+ Example:
623
+ >>> converter = DAGToCycleConverter(workflow)
624
+ >>> cycle_id = converter.convert_to_cycle(
625
+ ... nodes=["processor", "evaluator"],
626
+ ... convergence_strategy="quality_improvement",
627
+ ... max_iterations=50
628
+ ... )
629
+ """
630
+ if cycle_type is None:
631
+ cycle_type = self._detect_cycle_type(nodes, convergence_strategy)
632
+
633
+ if cycle_type == "optimization":
634
+ return self._convert_to_optimization_cycle(nodes, **kwargs)
635
+ elif cycle_type == "retry":
636
+ return self._convert_to_retry_cycle(nodes, **kwargs)
637
+ elif cycle_type == "data_quality":
638
+ return self._convert_to_data_quality_cycle(nodes, **kwargs)
639
+ elif cycle_type == "batch_processing":
640
+ return self._convert_to_batch_processing_cycle(nodes, **kwargs)
641
+ elif cycle_type == "convergence":
642
+ return self._convert_to_convergence_cycle(nodes, **kwargs)
643
+ else:
644
+ raise ValueError(f"Unknown cycle type: {cycle_type}")
645
+
646
+ def _detect_cycle_type(self, nodes: List[str], strategy: str) -> str:
647
+ """Detect the most appropriate cycle type for given nodes and strategy."""
648
+ if strategy == "error_reduction" or strategy == "quality_improvement":
649
+ return "optimization"
650
+ elif strategy == "retry_logic":
651
+ return "retry"
652
+ elif strategy == "data_cleaning":
653
+ return "data_quality"
654
+ elif strategy == "batch_processing":
655
+ return "batch_processing"
656
+ elif strategy == "numerical_convergence":
657
+ return "convergence"
658
+ else:
659
+ # Default to optimization for unknown strategies
660
+ return "optimization"
661
+
662
+ def _convert_to_optimization_cycle(self, nodes: List[str], **kwargs) -> str:
663
+ """Convert nodes to optimization cycle."""
664
+ if len(nodes) < 2:
665
+ raise ValueError("Optimization cycle requires at least 2 nodes")
666
+
667
+ return CycleTemplates.optimization_cycle(
668
+ self.workflow, processor_node=nodes[0], evaluator_node=nodes[1], **kwargs
669
+ )
670
+
671
+ def _convert_to_retry_cycle(self, nodes: List[str], **kwargs) -> str:
672
+ """Convert nodes to retry cycle."""
673
+ if len(nodes) < 1:
674
+ raise ValueError("Retry cycle requires at least 1 node")
675
+
676
+ return CycleTemplates.retry_cycle(self.workflow, target_node=nodes[0], **kwargs)
677
+
678
+ def _convert_to_data_quality_cycle(self, nodes: List[str], **kwargs) -> str:
679
+ """Convert nodes to data quality cycle."""
680
+ if len(nodes) < 2:
681
+ raise ValueError("Data quality cycle requires at least 2 nodes")
682
+
683
+ return CycleTemplates.data_quality_cycle(
684
+ self.workflow, cleaner_node=nodes[0], validator_node=nodes[1], **kwargs
685
+ )
686
+
687
+ def _convert_to_batch_processing_cycle(self, nodes: List[str], **kwargs) -> str:
688
+ """Convert nodes to batch processing cycle."""
689
+ if len(nodes) < 1:
690
+ raise ValueError("Batch processing cycle requires at least 1 node")
691
+
692
+ return CycleTemplates.batch_processing_cycle(
693
+ self.workflow, processor_node=nodes[0], **kwargs
694
+ )
695
+
696
+ def _convert_to_convergence_cycle(self, nodes: List[str], **kwargs) -> str:
697
+ """Convert nodes to convergence cycle."""
698
+ if len(nodes) < 1:
699
+ raise ValueError("Convergence cycle requires at least 1 node")
700
+
701
+ return CycleTemplates.convergence_cycle(
702
+ self.workflow, processor_node=nodes[0], **kwargs
703
+ )
704
+
705
+ def generate_migration_report(self) -> Dict[str, Any]:
706
+ """
707
+ Generate comprehensive migration report with analysis and recommendations.
708
+
709
+ Returns:
710
+ Dict containing migration analysis and recommendations
711
+
712
+ Example:
713
+ >>> converter = DAGToCycleConverter(workflow)
714
+ >>> converter.analyze_cyclification_opportunities()
715
+ >>> report = converter.generate_migration_report()
716
+ >>> print(report['summary']['total_opportunities'])
717
+ """
718
+ opportunities = self.analyze_cyclification_opportunities()
719
+ suggestions = self.generate_detailed_suggestions()
720
+
721
+ # Categorize by pattern type
722
+ by_pattern = defaultdict(list)
723
+ for opp in opportunities:
724
+ by_pattern[opp.pattern_type].append(opp)
725
+
726
+ # Calculate potential benefits
727
+ high_confidence = [opp for opp in opportunities if opp.confidence >= 0.7]
728
+ medium_confidence = [
729
+ opp for opp in opportunities if 0.4 <= opp.confidence < 0.7
730
+ ]
731
+ low_confidence = [opp for opp in opportunities if opp.confidence < 0.4]
732
+
733
+ return {
734
+ "summary": {
735
+ "total_opportunities": len(opportunities),
736
+ "high_confidence": len(high_confidence),
737
+ "medium_confidence": len(medium_confidence),
738
+ "low_confidence": len(low_confidence),
739
+ "pattern_distribution": {k: len(v) for k, v in by_pattern.items()},
740
+ },
741
+ "opportunities": opportunities,
742
+ "detailed_suggestions": suggestions,
743
+ "recommendations": self._generate_migration_recommendations(opportunities),
744
+ "implementation_order": self._suggest_implementation_order(opportunities),
745
+ }
746
+
747
+ def _generate_migration_recommendations(
748
+ self, opportunities: List[CyclificationOpportunity]
749
+ ) -> List[str]:
750
+ """Generate high-level recommendations for migration."""
751
+ recommendations = []
752
+
753
+ high_confidence = [opp for opp in opportunities if opp.confidence >= 0.7]
754
+ if high_confidence:
755
+ recommendations.append(
756
+ f"Start with {len(high_confidence)} high-confidence opportunities for immediate benefits"
757
+ )
758
+
759
+ pattern_counts = defaultdict(int)
760
+ for opp in opportunities:
761
+ pattern_counts[opp.pattern_type] += 1
762
+
763
+ most_common = (
764
+ max(pattern_counts.items(), key=lambda x: x[1]) if pattern_counts else None
765
+ )
766
+ if most_common:
767
+ recommendations.append(
768
+ f"Focus on {most_common[0]} patterns ({most_common[1]} opportunities) for consistency"
769
+ )
770
+
771
+ low_complexity = [
772
+ opp for opp in opportunities if opp.implementation_complexity == "low"
773
+ ]
774
+ if low_complexity:
775
+ recommendations.append(
776
+ f"Begin with {len(low_complexity)} low-complexity conversions to build confidence"
777
+ )
778
+
779
+ return recommendations
780
+
781
+ def _suggest_implementation_order(
782
+ self, opportunities: List[CyclificationOpportunity]
783
+ ) -> List[Dict[str, Any]]:
784
+ """Suggest order for implementing cyclification opportunities."""
785
+ # Sort by: confidence desc, complexity asc (low=1, medium=2, high=3)
786
+ complexity_score = {"low": 1, "medium": 2, "high": 3}
787
+
788
+ def sort_key(opp):
789
+ return (
790
+ -opp.confidence,
791
+ complexity_score.get(opp.implementation_complexity, 2),
792
+ )
793
+
794
+ sorted_opportunities = sorted(opportunities, key=sort_key)
795
+
796
+ implementation_order = []
797
+ for i, opp in enumerate(sorted_opportunities, 1):
798
+ implementation_order.append(
799
+ {
800
+ "priority": i,
801
+ "pattern_type": opp.pattern_type,
802
+ "nodes": opp.nodes,
803
+ "confidence": opp.confidence,
804
+ "complexity": opp.implementation_complexity,
805
+ "justification": f"Priority {i}: {opp.description}",
806
+ }
807
+ )
808
+
809
+ return implementation_order