kailash 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +1 -1
- kailash/access_control.py +740 -0
- kailash/api/__main__.py +6 -0
- kailash/api/auth.py +668 -0
- kailash/api/custom_nodes.py +285 -0
- kailash/api/custom_nodes_secure.py +377 -0
- kailash/api/database.py +620 -0
- kailash/api/studio.py +915 -0
- kailash/api/studio_secure.py +893 -0
- kailash/mcp/__init__.py +53 -0
- kailash/mcp/__main__.py +13 -0
- kailash/mcp/ai_registry_server.py +712 -0
- kailash/mcp/client.py +447 -0
- kailash/mcp/client_new.py +334 -0
- kailash/mcp/server.py +293 -0
- kailash/mcp/server_new.py +336 -0
- kailash/mcp/servers/__init__.py +12 -0
- kailash/mcp/servers/ai_registry.py +289 -0
- kailash/nodes/__init__.py +4 -2
- kailash/nodes/ai/__init__.py +2 -0
- kailash/nodes/ai/a2a.py +714 -67
- kailash/nodes/ai/intelligent_agent_orchestrator.py +31 -37
- kailash/nodes/ai/iterative_llm_agent.py +1280 -0
- kailash/nodes/ai/llm_agent.py +324 -1
- kailash/nodes/ai/self_organizing.py +5 -6
- kailash/nodes/base.py +15 -2
- kailash/nodes/base_async.py +45 -0
- kailash/nodes/base_cycle_aware.py +374 -0
- kailash/nodes/base_with_acl.py +338 -0
- kailash/nodes/code/python.py +135 -27
- kailash/nodes/data/__init__.py +1 -2
- kailash/nodes/data/readers.py +16 -6
- kailash/nodes/data/sql.py +699 -256
- kailash/nodes/data/writers.py +16 -6
- kailash/nodes/logic/__init__.py +8 -0
- kailash/nodes/logic/convergence.py +642 -0
- kailash/nodes/logic/loop.py +153 -0
- kailash/nodes/logic/operations.py +187 -27
- kailash/nodes/mixins/__init__.py +11 -0
- kailash/nodes/mixins/mcp.py +228 -0
- kailash/nodes/mixins.py +387 -0
- kailash/runtime/__init__.py +2 -1
- kailash/runtime/access_controlled.py +458 -0
- kailash/runtime/local.py +106 -33
- kailash/runtime/parallel_cyclic.py +529 -0
- kailash/sdk_exceptions.py +90 -5
- kailash/security.py +845 -0
- kailash/tracking/manager.py +38 -15
- kailash/tracking/models.py +1 -1
- kailash/tracking/storage/filesystem.py +30 -2
- kailash/utils/__init__.py +8 -0
- kailash/workflow/__init__.py +18 -0
- kailash/workflow/convergence.py +270 -0
- kailash/workflow/cycle_analyzer.py +889 -0
- kailash/workflow/cycle_builder.py +579 -0
- kailash/workflow/cycle_config.py +725 -0
- kailash/workflow/cycle_debugger.py +860 -0
- kailash/workflow/cycle_exceptions.py +615 -0
- kailash/workflow/cycle_profiler.py +741 -0
- kailash/workflow/cycle_state.py +338 -0
- kailash/workflow/cyclic_runner.py +985 -0
- kailash/workflow/graph.py +500 -39
- kailash/workflow/migration.py +809 -0
- kailash/workflow/safety.py +365 -0
- kailash/workflow/templates.py +763 -0
- kailash/workflow/validation.py +751 -0
- {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/METADATA +259 -12
- kailash-0.2.1.dist-info/RECORD +125 -0
- kailash/nodes/mcp/__init__.py +0 -11
- kailash/nodes/mcp/client.py +0 -554
- kailash/nodes/mcp/resource.py +0 -682
- kailash/nodes/mcp/server.py +0 -577
- kailash-0.1.5.dist-info/RECORD +0 -88
- {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/WHEEL +0 -0
- {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/entry_points.txt +0 -0
- {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.1.5.dist-info → kailash-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,809 @@
|
|
1
|
+
"""
|
2
|
+
Intelligent Migration System for DAG to Cyclic Workflow Conversion.
|
3
|
+
|
4
|
+
This module provides comprehensive tools to analyze existing DAG workflows
|
5
|
+
and intelligently suggest or automatically convert them to use cyclic patterns
|
6
|
+
where appropriate. It identifies optimization opportunities, provides detailed
|
7
|
+
implementation guidance, and automates the conversion process.
|
8
|
+
|
9
|
+
Design Philosophy:
|
10
|
+
Provides intelligent analysis of existing workflows to identify patterns
|
11
|
+
that would benefit from cyclification, offering both automated conversion
|
12
|
+
and detailed guidance for manual implementation. Focuses on preserving
|
13
|
+
workflow semantics while optimizing for performance and maintainability.
|
14
|
+
|
15
|
+
Key Features:
|
16
|
+
- Pattern recognition for cyclification opportunities
|
17
|
+
- Confidence scoring for conversion recommendations
|
18
|
+
- Automated conversion with safety validation
|
19
|
+
- Detailed implementation guidance with code examples
|
20
|
+
- Risk assessment and migration planning
|
21
|
+
- Template-based conversion for common patterns
|
22
|
+
|
23
|
+
Analysis Capabilities:
|
24
|
+
- Retry pattern detection in manual implementations
|
25
|
+
- Iterative improvement pattern identification
|
26
|
+
- Data validation and cleaning pattern recognition
|
27
|
+
- Batch processing pattern analysis
|
28
|
+
- Numerical convergence pattern detection
|
29
|
+
- Performance anti-pattern identification
|
30
|
+
|
31
|
+
Core Components:
|
32
|
+
- CyclificationOpportunity: Identified conversion opportunity
|
33
|
+
- CyclificationSuggestion: Detailed implementation guidance
|
34
|
+
- DAGToCycleConverter: Main analysis and conversion engine
|
35
|
+
- Pattern detection algorithms for common workflows
|
36
|
+
|
37
|
+
Conversion Strategy:
|
38
|
+
- Non-destructive analysis preserving original workflows
|
39
|
+
- Confidence-based prioritization of opportunities
|
40
|
+
- Template-based conversion for reliability
|
41
|
+
- Comprehensive validation of converted workflows
|
42
|
+
- Rollback capabilities for failed conversions
|
43
|
+
|
44
|
+
Upstream Dependencies:
|
45
|
+
- Existing workflow structures and node implementations
|
46
|
+
- CycleTemplates for automated conversion patterns
|
47
|
+
- Workflow validation and safety systems
|
48
|
+
|
49
|
+
Downstream Consumers:
|
50
|
+
- Workflow development tools and IDEs
|
51
|
+
- Automated workflow optimization systems
|
52
|
+
- Migration planning and execution tools
|
53
|
+
- Performance optimization recommendations
|
54
|
+
- Educational and training systems
|
55
|
+
|
56
|
+
Examples:
|
57
|
+
Analyze workflow for opportunities:
|
58
|
+
|
59
|
+
>>> from kailash.workflow.migration import DAGToCycleConverter
|
60
|
+
>>> converter = DAGToCycleConverter(existing_workflow)
|
61
|
+
>>> opportunities = converter.analyze_cyclification_opportunities()
|
62
|
+
>>> for opp in opportunities:
|
63
|
+
... print(f"Found {opp.pattern_type}: {opp.description}")
|
64
|
+
... print(f"Confidence: {opp.confidence:.2f}")
|
65
|
+
... print(f"Expected benefit: {opp.estimated_benefit}")
|
66
|
+
|
67
|
+
Generate detailed migration guidance:
|
68
|
+
|
69
|
+
>>> suggestions = converter.generate_detailed_suggestions()
|
70
|
+
>>> for suggestion in suggestions:
|
71
|
+
... print(f"Found {suggestion.opportunity.pattern_type}")
|
72
|
+
... print(f"Implementation steps:")
|
73
|
+
... for step in suggestion.implementation_steps:
|
74
|
+
... print(f" {step}")
|
75
|
+
... print(f"Code example: {suggestion.code_example}")
|
76
|
+
... print(f"Expected outcome: {suggestion.expected_outcome}")
|
77
|
+
|
78
|
+
Automated conversion:
|
79
|
+
|
80
|
+
>>> # Convert specific nodes to cycle
|
81
|
+
>>> cycle_id = converter.convert_to_cycle(
|
82
|
+
... nodes=["processor", "evaluator"],
|
83
|
+
... convergence_strategy="quality_improvement",
|
84
|
+
... max_iterations=50
|
85
|
+
... )
|
86
|
+
>>> print(f"Created cycle: {cycle_id}")
|
87
|
+
|
88
|
+
Comprehensive migration report:
|
89
|
+
|
90
|
+
>>> report = converter.generate_migration_report()
|
91
|
+
>>> print(f"Total opportunities: {report['summary']['total_opportunities']}")
|
92
|
+
>>> print(f"High confidence: {report['summary']['high_confidence']}")
|
93
|
+
>>> # Implementation priority order
|
94
|
+
>>> for item in report['implementation_order']:
|
95
|
+
... print(f"{item['priority']}: {item['justification']}")
|
96
|
+
|
97
|
+
See Also:
|
98
|
+
- :mod:`kailash.workflow.templates` for conversion patterns
|
99
|
+
- :mod:`kailash.workflow.validation` for workflow analysis
|
100
|
+
- :doc:`/guides/migration` for migration best practices
|
101
|
+
"""
|
102
|
+
|
103
|
+
import re
|
104
|
+
from collections import defaultdict
|
105
|
+
from dataclasses import dataclass
|
106
|
+
from typing import Any, Dict, List, Optional
|
107
|
+
|
108
|
+
from . import Workflow
|
109
|
+
from .templates import CycleTemplates
|
110
|
+
|
111
|
+
|
112
|
+
@dataclass
|
113
|
+
class CyclificationOpportunity:
|
114
|
+
"""Represents an opportunity to convert a DAG pattern to a cycle."""
|
115
|
+
|
116
|
+
nodes: List[str]
|
117
|
+
pattern_type: str
|
118
|
+
confidence: float
|
119
|
+
description: str
|
120
|
+
suggested_convergence: Optional[str] = None
|
121
|
+
estimated_benefit: str = "unknown"
|
122
|
+
implementation_complexity: str = "medium"
|
123
|
+
|
124
|
+
|
125
|
+
@dataclass
|
126
|
+
class CyclificationSuggestion:
|
127
|
+
"""Detailed suggestion for converting nodes to a cycle."""
|
128
|
+
|
129
|
+
opportunity: CyclificationOpportunity
|
130
|
+
implementation_steps: List[str]
|
131
|
+
code_example: str
|
132
|
+
expected_outcome: str
|
133
|
+
risks: List[str]
|
134
|
+
|
135
|
+
|
136
|
+
class DAGToCycleConverter:
|
137
|
+
"""
|
138
|
+
Analyzer and converter for transforming DAG workflows into cyclic workflows.
|
139
|
+
|
140
|
+
This class helps identify patterns in existing workflows that could benefit
|
141
|
+
from cyclic execution and provides tools to convert them.
|
142
|
+
"""
|
143
|
+
|
144
|
+
def __init__(self, workflow: Workflow):
|
145
|
+
"""
|
146
|
+
Initialize converter with target workflow.
|
147
|
+
|
148
|
+
Args:
|
149
|
+
workflow: The workflow to analyze and potentially convert
|
150
|
+
"""
|
151
|
+
self.workflow = workflow
|
152
|
+
self.graph = workflow.graph
|
153
|
+
self.opportunities: List[CyclificationOpportunity] = []
|
154
|
+
|
155
|
+
def analyze_cyclification_opportunities(self) -> List[CyclificationOpportunity]:
|
156
|
+
"""
|
157
|
+
Analyze workflow for patterns that could benefit from cyclification.
|
158
|
+
|
159
|
+
Returns:
|
160
|
+
List of identified cyclification opportunities
|
161
|
+
|
162
|
+
Example:
|
163
|
+
>>> workflow = create_example_workflow()
|
164
|
+
>>> converter = DAGToCycleConverter(workflow)
|
165
|
+
>>> opportunities = converter.analyze_cyclification_opportunities()
|
166
|
+
>>> for opp in opportunities:
|
167
|
+
... print(f"{opp.pattern_type}: {opp.description}")
|
168
|
+
"""
|
169
|
+
self.opportunities = []
|
170
|
+
|
171
|
+
# Analyze different patterns
|
172
|
+
self._detect_retry_patterns()
|
173
|
+
self._detect_iterative_improvement_patterns()
|
174
|
+
self._detect_validation_patterns()
|
175
|
+
self._detect_batch_processing_patterns()
|
176
|
+
self._detect_convergence_patterns()
|
177
|
+
|
178
|
+
# Sort by confidence and potential benefit
|
179
|
+
self.opportunities.sort(key=lambda x: x.confidence, reverse=True)
|
180
|
+
|
181
|
+
return self.opportunities
|
182
|
+
|
183
|
+
def _detect_retry_patterns(self):
|
184
|
+
"""Detect patterns that look like manual retry logic."""
|
185
|
+
nodes = self.workflow.nodes
|
186
|
+
|
187
|
+
# Look for nodes with similar names suggesting retry logic
|
188
|
+
retry_patterns = [
|
189
|
+
r".*[_\-]retry[_\-]?.*",
|
190
|
+
r".*[_\-]attempt[_\-]?[0-9]*",
|
191
|
+
r".*[_\-]backup[_\-]?.*",
|
192
|
+
r".*[_\-]fallback[_\-]?.*",
|
193
|
+
r".*[_\-]redundant[_\-]?.*",
|
194
|
+
r".*[_\-]failover[_\-]?.*",
|
195
|
+
]
|
196
|
+
|
197
|
+
for node_id, node in nodes.items():
|
198
|
+
for pattern in retry_patterns:
|
199
|
+
if re.match(pattern, node_id, re.IGNORECASE):
|
200
|
+
# Found potential retry pattern
|
201
|
+
related_nodes = self._find_related_nodes(node_id)
|
202
|
+
|
203
|
+
opportunity = CyclificationOpportunity(
|
204
|
+
nodes=[node_id] + related_nodes,
|
205
|
+
pattern_type="retry_cycle",
|
206
|
+
confidence=0.7,
|
207
|
+
description=f"Node '{node_id}' appears to implement retry logic manually",
|
208
|
+
suggested_convergence="success == True",
|
209
|
+
estimated_benefit="improved_reliability",
|
210
|
+
implementation_complexity="low",
|
211
|
+
)
|
212
|
+
self.opportunities.append(opportunity)
|
213
|
+
|
214
|
+
def _detect_iterative_improvement_patterns(self):
|
215
|
+
"""Detect patterns that perform iterative improvement."""
|
216
|
+
nodes = self.workflow.nodes
|
217
|
+
|
218
|
+
# Look for processor-evaluator pairs
|
219
|
+
improvement_keywords = ["process", "improve", "optimize", "refine", "enhance"]
|
220
|
+
evaluation_keywords = ["evaluate", "assess", "validate", "check", "score"]
|
221
|
+
|
222
|
+
processors = []
|
223
|
+
evaluators = []
|
224
|
+
|
225
|
+
for node_id in nodes:
|
226
|
+
node_id_lower = node_id.lower()
|
227
|
+
if any(keyword in node_id_lower for keyword in improvement_keywords):
|
228
|
+
processors.append(node_id)
|
229
|
+
if any(keyword in node_id_lower for keyword in evaluation_keywords):
|
230
|
+
evaluators.append(node_id)
|
231
|
+
|
232
|
+
# Look for processor-evaluator pairs that are connected
|
233
|
+
for processor in processors:
|
234
|
+
for evaluator in evaluators:
|
235
|
+
if self._are_connected(processor, evaluator):
|
236
|
+
opportunity = CyclificationOpportunity(
|
237
|
+
nodes=[processor, evaluator],
|
238
|
+
pattern_type="optimization_cycle",
|
239
|
+
confidence=0.8,
|
240
|
+
description=f"'{processor}' and '{evaluator}' form iterative improvement pattern",
|
241
|
+
suggested_convergence="quality > 0.9",
|
242
|
+
estimated_benefit="automatic_convergence",
|
243
|
+
implementation_complexity="medium",
|
244
|
+
)
|
245
|
+
self.opportunities.append(opportunity)
|
246
|
+
|
247
|
+
def _detect_validation_patterns(self):
|
248
|
+
"""Detect data validation and cleaning patterns."""
|
249
|
+
nodes = self.workflow.nodes
|
250
|
+
|
251
|
+
cleaning_keywords = ["clean", "sanitize", "normalize", "transform"]
|
252
|
+
validation_keywords = ["validate", "verify", "check", "audit"]
|
253
|
+
|
254
|
+
cleaners = []
|
255
|
+
validators = []
|
256
|
+
|
257
|
+
for node_id in nodes:
|
258
|
+
node_id_lower = node_id.lower()
|
259
|
+
if any(keyword in node_id_lower for keyword in cleaning_keywords):
|
260
|
+
cleaners.append(node_id)
|
261
|
+
if any(keyword in node_id_lower for keyword in validation_keywords):
|
262
|
+
validators.append(node_id)
|
263
|
+
|
264
|
+
# Look for cleaner-validator pairs
|
265
|
+
for cleaner in cleaners:
|
266
|
+
for validator in validators:
|
267
|
+
if self._are_connected(cleaner, validator):
|
268
|
+
opportunity = CyclificationOpportunity(
|
269
|
+
nodes=[cleaner, validator],
|
270
|
+
pattern_type="data_quality_cycle",
|
271
|
+
confidence=0.75,
|
272
|
+
description=f"'{cleaner}' and '{validator}' form data quality improvement pattern",
|
273
|
+
suggested_convergence="quality_score >= 0.95",
|
274
|
+
estimated_benefit="improved_data_quality",
|
275
|
+
implementation_complexity="low",
|
276
|
+
)
|
277
|
+
self.opportunities.append(opportunity)
|
278
|
+
|
279
|
+
def _detect_batch_processing_patterns(self):
|
280
|
+
"""Detect patterns that process data in chunks."""
|
281
|
+
nodes = self.workflow.nodes
|
282
|
+
|
283
|
+
batch_keywords = ["batch", "chunk", "segment", "partition", "split"]
|
284
|
+
|
285
|
+
for node_id in nodes:
|
286
|
+
node_id_lower = node_id.lower()
|
287
|
+
if any(keyword in node_id_lower for keyword in batch_keywords):
|
288
|
+
opportunity = CyclificationOpportunity(
|
289
|
+
nodes=[node_id],
|
290
|
+
pattern_type="batch_processing_cycle",
|
291
|
+
confidence=0.6,
|
292
|
+
description=f"'{node_id}' appears to process data in batches",
|
293
|
+
suggested_convergence="all_batches_processed == True",
|
294
|
+
estimated_benefit="memory_efficiency",
|
295
|
+
implementation_complexity="medium",
|
296
|
+
)
|
297
|
+
self.opportunities.append(opportunity)
|
298
|
+
|
299
|
+
def _detect_convergence_patterns(self):
|
300
|
+
"""Detect numerical convergence patterns."""
|
301
|
+
nodes = self.workflow.nodes
|
302
|
+
|
303
|
+
convergence_keywords = [
|
304
|
+
"converge",
|
305
|
+
"iterate",
|
306
|
+
"approximate",
|
307
|
+
"solve",
|
308
|
+
"calculate",
|
309
|
+
]
|
310
|
+
|
311
|
+
for node_id in nodes:
|
312
|
+
node_id_lower = node_id.lower()
|
313
|
+
if any(keyword in node_id_lower for keyword in convergence_keywords):
|
314
|
+
opportunity = CyclificationOpportunity(
|
315
|
+
nodes=[node_id],
|
316
|
+
pattern_type="convergence_cycle",
|
317
|
+
confidence=0.5,
|
318
|
+
description=f"'{node_id}' may perform iterative calculations",
|
319
|
+
suggested_convergence="difference < 0.001",
|
320
|
+
estimated_benefit="numerical_stability",
|
321
|
+
implementation_complexity="high",
|
322
|
+
)
|
323
|
+
self.opportunities.append(opportunity)
|
324
|
+
|
325
|
+
def _find_related_nodes(self, node_id: str) -> List[str]:
|
326
|
+
"""Find nodes that are closely related to the given node."""
|
327
|
+
related = []
|
328
|
+
|
329
|
+
# Find direct connections from NetworkX graph
|
330
|
+
graph = self.workflow.graph
|
331
|
+
|
332
|
+
# Find predecessors and successors
|
333
|
+
if node_id in graph:
|
334
|
+
related.extend(graph.predecessors(node_id))
|
335
|
+
related.extend(graph.successors(node_id))
|
336
|
+
|
337
|
+
return list(set(related))
|
338
|
+
|
339
|
+
def _are_connected(self, node1: str, node2: str) -> bool:
|
340
|
+
"""Check if two nodes are directly connected."""
|
341
|
+
graph = self.workflow.graph
|
342
|
+
|
343
|
+
# Check if there's an edge between the nodes in either direction
|
344
|
+
return graph.has_edge(node1, node2) or graph.has_edge(node2, node1)
|
345
|
+
|
346
|
+
def generate_detailed_suggestions(self) -> List[CyclificationSuggestion]:
|
347
|
+
"""
|
348
|
+
Generate detailed suggestions with implementation guidance.
|
349
|
+
|
350
|
+
Returns:
|
351
|
+
List of detailed suggestions for cyclification
|
352
|
+
|
353
|
+
Example:
|
354
|
+
>>> converter = DAGToCycleConverter(workflow)
|
355
|
+
>>> converter.analyze_cyclification_opportunities()
|
356
|
+
>>> suggestions = converter.generate_detailed_suggestions()
|
357
|
+
>>> for suggestion in suggestions:
|
358
|
+
... print(suggestion.code_example)
|
359
|
+
"""
|
360
|
+
suggestions = []
|
361
|
+
|
362
|
+
for opportunity in self.opportunities:
|
363
|
+
suggestion = self._create_detailed_suggestion(opportunity)
|
364
|
+
suggestions.append(suggestion)
|
365
|
+
|
366
|
+
return suggestions
|
367
|
+
|
368
|
+
def _create_detailed_suggestion(
|
369
|
+
self, opportunity: CyclificationOpportunity
|
370
|
+
) -> CyclificationSuggestion:
|
371
|
+
"""Create detailed implementation suggestion for an opportunity."""
|
372
|
+
|
373
|
+
if opportunity.pattern_type == "retry_cycle":
|
374
|
+
return self._create_retry_suggestion(opportunity)
|
375
|
+
elif opportunity.pattern_type == "optimization_cycle":
|
376
|
+
return self._create_optimization_suggestion(opportunity)
|
377
|
+
elif opportunity.pattern_type == "data_quality_cycle":
|
378
|
+
return self._create_data_quality_suggestion(opportunity)
|
379
|
+
elif opportunity.pattern_type == "batch_processing_cycle":
|
380
|
+
return self._create_batch_processing_suggestion(opportunity)
|
381
|
+
elif opportunity.pattern_type == "convergence_cycle":
|
382
|
+
return self._create_convergence_suggestion(opportunity)
|
383
|
+
else:
|
384
|
+
return self._create_generic_suggestion(opportunity)
|
385
|
+
|
386
|
+
def _create_retry_suggestion(
|
387
|
+
self, opportunity: CyclificationOpportunity
|
388
|
+
) -> CyclificationSuggestion:
|
389
|
+
"""Create suggestion for retry cycle conversion."""
|
390
|
+
main_node = opportunity.nodes[0]
|
391
|
+
|
392
|
+
code_example = f"""
|
393
|
+
# Before: Manual retry logic (complex, error-prone)
|
394
|
+
# Multiple nodes handling retries manually
|
395
|
+
|
396
|
+
# After: Using retry cycle template
|
397
|
+
cycle_id = workflow.add_retry_cycle(
|
398
|
+
target_node="{main_node}",
|
399
|
+
max_retries=3,
|
400
|
+
backoff_strategy="exponential",
|
401
|
+
success_condition="success == True"
|
402
|
+
)
|
403
|
+
|
404
|
+
print(f"Created retry cycle: {{cycle_id}}")
|
405
|
+
"""
|
406
|
+
|
407
|
+
implementation_steps = [
|
408
|
+
f"Identify the main node that needs retry logic: '{main_node}'",
|
409
|
+
"Remove manual retry handling from existing nodes",
|
410
|
+
"Apply retry cycle template with appropriate parameters",
|
411
|
+
"Test with failure scenarios to ensure proper retry behavior",
|
412
|
+
"Monitor retry patterns in production",
|
413
|
+
]
|
414
|
+
|
415
|
+
return CyclificationSuggestion(
|
416
|
+
opportunity=opportunity,
|
417
|
+
implementation_steps=implementation_steps,
|
418
|
+
code_example=code_example,
|
419
|
+
expected_outcome="Simplified retry logic with exponential backoff and better error handling",
|
420
|
+
risks=[
|
421
|
+
"May change timing of operations",
|
422
|
+
"Retry behavior might differ from manual implementation",
|
423
|
+
],
|
424
|
+
)
|
425
|
+
|
426
|
+
def _create_optimization_suggestion(
|
427
|
+
self, opportunity: CyclificationOpportunity
|
428
|
+
) -> CyclificationSuggestion:
|
429
|
+
"""Create suggestion for optimization cycle conversion."""
|
430
|
+
nodes = opportunity.nodes
|
431
|
+
processor = nodes[0] if nodes else "processor"
|
432
|
+
evaluator = nodes[1] if len(nodes) > 1 else "evaluator"
|
433
|
+
|
434
|
+
code_example = f"""
|
435
|
+
# Before: Manual iterative improvement (fixed iterations, no early stopping)
|
436
|
+
# Complex logic to manage improvement loops
|
437
|
+
|
438
|
+
# After: Using optimization cycle template
|
439
|
+
cycle_id = workflow.add_optimization_cycle(
|
440
|
+
processor_node="{processor}",
|
441
|
+
evaluator_node="{evaluator}",
|
442
|
+
convergence="quality > 0.95",
|
443
|
+
max_iterations=100
|
444
|
+
)
|
445
|
+
|
446
|
+
print(f"Created optimization cycle: {{cycle_id}}")
|
447
|
+
"""
|
448
|
+
|
449
|
+
implementation_steps = [
|
450
|
+
f"Ensure '{processor}' generates/improves solutions",
|
451
|
+
f"Ensure '{evaluator}' produces quality metrics",
|
452
|
+
"Define appropriate convergence criteria",
|
453
|
+
"Apply optimization cycle template",
|
454
|
+
"Fine-tune convergence thresholds based on testing",
|
455
|
+
]
|
456
|
+
|
457
|
+
return CyclificationSuggestion(
|
458
|
+
opportunity=opportunity,
|
459
|
+
implementation_steps=implementation_steps,
|
460
|
+
code_example=code_example,
|
461
|
+
expected_outcome="Automatic convergence detection with early stopping for better performance",
|
462
|
+
risks=[
|
463
|
+
"Convergence criteria may need tuning",
|
464
|
+
"May require more iterations than fixed approach",
|
465
|
+
],
|
466
|
+
)
|
467
|
+
|
468
|
+
def _create_data_quality_suggestion(
|
469
|
+
self, opportunity: CyclificationOpportunity
|
470
|
+
) -> CyclificationSuggestion:
|
471
|
+
"""Create suggestion for data quality cycle conversion."""
|
472
|
+
nodes = opportunity.nodes
|
473
|
+
cleaner = nodes[0] if nodes else "cleaner"
|
474
|
+
validator = nodes[1] if len(nodes) > 1 else "validator"
|
475
|
+
|
476
|
+
code_example = f"""
|
477
|
+
# Before: Single-pass cleaning (may miss quality issues)
|
478
|
+
# Fixed cleaning pipeline without quality feedback
|
479
|
+
|
480
|
+
# After: Using data quality cycle template
|
481
|
+
cycle_id = workflow.add_data_quality_cycle(
|
482
|
+
cleaner_node="{cleaner}",
|
483
|
+
validator_node="{validator}",
|
484
|
+
quality_threshold=0.98,
|
485
|
+
max_iterations=5
|
486
|
+
)
|
487
|
+
|
488
|
+
print(f"Created data quality cycle: {{cycle_id}}")
|
489
|
+
"""
|
490
|
+
|
491
|
+
implementation_steps = [
|
492
|
+
f"Ensure '{cleaner}' can improve data quality iteratively",
|
493
|
+
f"Ensure '{validator}' produces numeric quality scores",
|
494
|
+
"Define appropriate quality threshold",
|
495
|
+
"Apply data quality cycle template",
|
496
|
+
"Monitor quality improvements over iterations",
|
497
|
+
]
|
498
|
+
|
499
|
+
return CyclificationSuggestion(
|
500
|
+
opportunity=opportunity,
|
501
|
+
implementation_steps=implementation_steps,
|
502
|
+
code_example=code_example,
|
503
|
+
expected_outcome="Higher data quality through iterative improvement with automatic stopping",
|
504
|
+
risks=[
|
505
|
+
"May increase processing time",
|
506
|
+
"Quality metrics need to be meaningful",
|
507
|
+
],
|
508
|
+
)
|
509
|
+
|
510
|
+
def _create_batch_processing_suggestion(
|
511
|
+
self, opportunity: CyclificationOpportunity
|
512
|
+
) -> CyclificationSuggestion:
|
513
|
+
"""Create suggestion for batch processing cycle conversion."""
|
514
|
+
node = opportunity.nodes[0] if opportunity.nodes else "processor"
|
515
|
+
|
516
|
+
code_example = f"""
|
517
|
+
# Before: Manual batch handling (complex state management)
|
518
|
+
# Custom logic for batch iteration and completion
|
519
|
+
|
520
|
+
# After: Using batch processing cycle template
|
521
|
+
cycle_id = workflow.add_batch_processing_cycle(
|
522
|
+
processor_node="{node}",
|
523
|
+
batch_size=100,
|
524
|
+
total_items=10000 # If known
|
525
|
+
)
|
526
|
+
|
527
|
+
print(f"Created batch processing cycle: {{cycle_id}}")
|
528
|
+
"""
|
529
|
+
|
530
|
+
implementation_steps = [
|
531
|
+
f"Modify '{node}' to process batches instead of full dataset",
|
532
|
+
"Determine appropriate batch size for memory constraints",
|
533
|
+
"Apply batch processing cycle template",
|
534
|
+
"Test with various dataset sizes",
|
535
|
+
"Monitor memory usage and processing time",
|
536
|
+
]
|
537
|
+
|
538
|
+
return CyclificationSuggestion(
|
539
|
+
opportunity=opportunity,
|
540
|
+
implementation_steps=implementation_steps,
|
541
|
+
code_example=code_example,
|
542
|
+
expected_outcome="Memory-efficient processing of large datasets with automatic batch management",
|
543
|
+
risks=[
|
544
|
+
"Batch size may need tuning",
|
545
|
+
"May change processing order/behavior",
|
546
|
+
],
|
547
|
+
)
|
548
|
+
|
549
|
+
def _create_convergence_suggestion(
|
550
|
+
self, opportunity: CyclificationOpportunity
|
551
|
+
) -> CyclificationSuggestion:
|
552
|
+
"""Create suggestion for convergence cycle conversion."""
|
553
|
+
node = opportunity.nodes[0] if opportunity.nodes else "processor"
|
554
|
+
|
555
|
+
code_example = f"""
|
556
|
+
# Before: Fixed iterations (may over/under-compute)
|
557
|
+
# Manual convergence checking
|
558
|
+
|
559
|
+
# After: Using convergence cycle template
|
560
|
+
cycle_id = workflow.add_convergence_cycle(
|
561
|
+
processor_node="{node}",
|
562
|
+
tolerance=0.001,
|
563
|
+
max_iterations=1000
|
564
|
+
)
|
565
|
+
|
566
|
+
print(f"Created convergence cycle: {{cycle_id}}")
|
567
|
+
"""
|
568
|
+
|
569
|
+
implementation_steps = [
|
570
|
+
f"Ensure '{node}' produces numeric values for convergence checking",
|
571
|
+
"Determine appropriate tolerance for convergence",
|
572
|
+
"Apply convergence cycle template",
|
573
|
+
"Test with various starting conditions",
|
574
|
+
"Validate convergence behavior",
|
575
|
+
]
|
576
|
+
|
577
|
+
return CyclificationSuggestion(
|
578
|
+
opportunity=opportunity,
|
579
|
+
implementation_steps=implementation_steps,
|
580
|
+
code_example=code_example,
|
581
|
+
expected_outcome="Automatic convergence detection with optimal iteration count",
|
582
|
+
risks=[
|
583
|
+
"Tolerance may need adjustment",
|
584
|
+
"Convergence behavior may differ from fixed iterations",
|
585
|
+
],
|
586
|
+
)
|
587
|
+
|
588
|
+
def _create_generic_suggestion(
|
589
|
+
self, opportunity: CyclificationOpportunity
|
590
|
+
) -> CyclificationSuggestion:
|
591
|
+
"""Create generic suggestion for unknown pattern types."""
|
592
|
+
return CyclificationSuggestion(
|
593
|
+
opportunity=opportunity,
|
594
|
+
implementation_steps=[
|
595
|
+
"Analyze pattern manually",
|
596
|
+
"Choose appropriate cycle template",
|
597
|
+
],
|
598
|
+
code_example="# Manual analysis required",
|
599
|
+
expected_outcome="Pattern-specific benefits",
|
600
|
+
risks=["Requires manual analysis"],
|
601
|
+
)
|
602
|
+
|
603
|
+
def convert_to_cycle(
|
604
|
+
self,
|
605
|
+
nodes: List[str],
|
606
|
+
convergence_strategy: str = "error_reduction",
|
607
|
+
cycle_type: Optional[str] = None,
|
608
|
+
**kwargs,
|
609
|
+
) -> str:
|
610
|
+
"""
|
611
|
+
Convert specific nodes to a cycle using the specified strategy.
|
612
|
+
|
613
|
+
Args:
|
614
|
+
nodes: List of node IDs to include in the cycle
|
615
|
+
convergence_strategy: Strategy for convergence ("error_reduction", "quality_improvement", etc.)
|
616
|
+
cycle_type: Specific cycle type to use, or auto-detect if None
|
617
|
+
**kwargs: Additional parameters for cycle creation
|
618
|
+
|
619
|
+
Returns:
|
620
|
+
str: The created cycle identifier
|
621
|
+
|
622
|
+
Example:
|
623
|
+
>>> converter = DAGToCycleConverter(workflow)
|
624
|
+
>>> cycle_id = converter.convert_to_cycle(
|
625
|
+
... nodes=["processor", "evaluator"],
|
626
|
+
... convergence_strategy="quality_improvement",
|
627
|
+
... max_iterations=50
|
628
|
+
... )
|
629
|
+
"""
|
630
|
+
if cycle_type is None:
|
631
|
+
cycle_type = self._detect_cycle_type(nodes, convergence_strategy)
|
632
|
+
|
633
|
+
if cycle_type == "optimization":
|
634
|
+
return self._convert_to_optimization_cycle(nodes, **kwargs)
|
635
|
+
elif cycle_type == "retry":
|
636
|
+
return self._convert_to_retry_cycle(nodes, **kwargs)
|
637
|
+
elif cycle_type == "data_quality":
|
638
|
+
return self._convert_to_data_quality_cycle(nodes, **kwargs)
|
639
|
+
elif cycle_type == "batch_processing":
|
640
|
+
return self._convert_to_batch_processing_cycle(nodes, **kwargs)
|
641
|
+
elif cycle_type == "convergence":
|
642
|
+
return self._convert_to_convergence_cycle(nodes, **kwargs)
|
643
|
+
else:
|
644
|
+
raise ValueError(f"Unknown cycle type: {cycle_type}")
|
645
|
+
|
646
|
+
def _detect_cycle_type(self, nodes: List[str], strategy: str) -> str:
|
647
|
+
"""Detect the most appropriate cycle type for given nodes and strategy."""
|
648
|
+
if strategy == "error_reduction" or strategy == "quality_improvement":
|
649
|
+
return "optimization"
|
650
|
+
elif strategy == "retry_logic":
|
651
|
+
return "retry"
|
652
|
+
elif strategy == "data_cleaning":
|
653
|
+
return "data_quality"
|
654
|
+
elif strategy == "batch_processing":
|
655
|
+
return "batch_processing"
|
656
|
+
elif strategy == "numerical_convergence":
|
657
|
+
return "convergence"
|
658
|
+
else:
|
659
|
+
# Default to optimization for unknown strategies
|
660
|
+
return "optimization"
|
661
|
+
|
662
|
+
def _convert_to_optimization_cycle(self, nodes: List[str], **kwargs) -> str:
|
663
|
+
"""Convert nodes to optimization cycle."""
|
664
|
+
if len(nodes) < 2:
|
665
|
+
raise ValueError("Optimization cycle requires at least 2 nodes")
|
666
|
+
|
667
|
+
return CycleTemplates.optimization_cycle(
|
668
|
+
self.workflow, processor_node=nodes[0], evaluator_node=nodes[1], **kwargs
|
669
|
+
)
|
670
|
+
|
671
|
+
def _convert_to_retry_cycle(self, nodes: List[str], **kwargs) -> str:
|
672
|
+
"""Convert nodes to retry cycle."""
|
673
|
+
if len(nodes) < 1:
|
674
|
+
raise ValueError("Retry cycle requires at least 1 node")
|
675
|
+
|
676
|
+
return CycleTemplates.retry_cycle(self.workflow, target_node=nodes[0], **kwargs)
|
677
|
+
|
678
|
+
def _convert_to_data_quality_cycle(self, nodes: List[str], **kwargs) -> str:
|
679
|
+
"""Convert nodes to data quality cycle."""
|
680
|
+
if len(nodes) < 2:
|
681
|
+
raise ValueError("Data quality cycle requires at least 2 nodes")
|
682
|
+
|
683
|
+
return CycleTemplates.data_quality_cycle(
|
684
|
+
self.workflow, cleaner_node=nodes[0], validator_node=nodes[1], **kwargs
|
685
|
+
)
|
686
|
+
|
687
|
+
def _convert_to_batch_processing_cycle(self, nodes: List[str], **kwargs) -> str:
|
688
|
+
"""Convert nodes to batch processing cycle."""
|
689
|
+
if len(nodes) < 1:
|
690
|
+
raise ValueError("Batch processing cycle requires at least 1 node")
|
691
|
+
|
692
|
+
return CycleTemplates.batch_processing_cycle(
|
693
|
+
self.workflow, processor_node=nodes[0], **kwargs
|
694
|
+
)
|
695
|
+
|
696
|
+
def _convert_to_convergence_cycle(self, nodes: List[str], **kwargs) -> str:
|
697
|
+
"""Convert nodes to convergence cycle."""
|
698
|
+
if len(nodes) < 1:
|
699
|
+
raise ValueError("Convergence cycle requires at least 1 node")
|
700
|
+
|
701
|
+
return CycleTemplates.convergence_cycle(
|
702
|
+
self.workflow, processor_node=nodes[0], **kwargs
|
703
|
+
)
|
704
|
+
|
705
|
+
def generate_migration_report(self) -> Dict[str, Any]:
|
706
|
+
"""
|
707
|
+
Generate comprehensive migration report with analysis and recommendations.
|
708
|
+
|
709
|
+
Returns:
|
710
|
+
Dict containing migration analysis and recommendations
|
711
|
+
|
712
|
+
Example:
|
713
|
+
>>> converter = DAGToCycleConverter(workflow)
|
714
|
+
>>> converter.analyze_cyclification_opportunities()
|
715
|
+
>>> report = converter.generate_migration_report()
|
716
|
+
>>> print(report['summary']['total_opportunities'])
|
717
|
+
"""
|
718
|
+
opportunities = self.analyze_cyclification_opportunities()
|
719
|
+
suggestions = self.generate_detailed_suggestions()
|
720
|
+
|
721
|
+
# Categorize by pattern type
|
722
|
+
by_pattern = defaultdict(list)
|
723
|
+
for opp in opportunities:
|
724
|
+
by_pattern[opp.pattern_type].append(opp)
|
725
|
+
|
726
|
+
# Calculate potential benefits
|
727
|
+
high_confidence = [opp for opp in opportunities if opp.confidence >= 0.7]
|
728
|
+
medium_confidence = [
|
729
|
+
opp for opp in opportunities if 0.4 <= opp.confidence < 0.7
|
730
|
+
]
|
731
|
+
low_confidence = [opp for opp in opportunities if opp.confidence < 0.4]
|
732
|
+
|
733
|
+
return {
|
734
|
+
"summary": {
|
735
|
+
"total_opportunities": len(opportunities),
|
736
|
+
"high_confidence": len(high_confidence),
|
737
|
+
"medium_confidence": len(medium_confidence),
|
738
|
+
"low_confidence": len(low_confidence),
|
739
|
+
"pattern_distribution": {k: len(v) for k, v in by_pattern.items()},
|
740
|
+
},
|
741
|
+
"opportunities": opportunities,
|
742
|
+
"detailed_suggestions": suggestions,
|
743
|
+
"recommendations": self._generate_migration_recommendations(opportunities),
|
744
|
+
"implementation_order": self._suggest_implementation_order(opportunities),
|
745
|
+
}
|
746
|
+
|
747
|
+
def _generate_migration_recommendations(
|
748
|
+
self, opportunities: List[CyclificationOpportunity]
|
749
|
+
) -> List[str]:
|
750
|
+
"""Generate high-level recommendations for migration."""
|
751
|
+
recommendations = []
|
752
|
+
|
753
|
+
high_confidence = [opp for opp in opportunities if opp.confidence >= 0.7]
|
754
|
+
if high_confidence:
|
755
|
+
recommendations.append(
|
756
|
+
f"Start with {len(high_confidence)} high-confidence opportunities for immediate benefits"
|
757
|
+
)
|
758
|
+
|
759
|
+
pattern_counts = defaultdict(int)
|
760
|
+
for opp in opportunities:
|
761
|
+
pattern_counts[opp.pattern_type] += 1
|
762
|
+
|
763
|
+
most_common = (
|
764
|
+
max(pattern_counts.items(), key=lambda x: x[1]) if pattern_counts else None
|
765
|
+
)
|
766
|
+
if most_common:
|
767
|
+
recommendations.append(
|
768
|
+
f"Focus on {most_common[0]} patterns ({most_common[1]} opportunities) for consistency"
|
769
|
+
)
|
770
|
+
|
771
|
+
low_complexity = [
|
772
|
+
opp for opp in opportunities if opp.implementation_complexity == "low"
|
773
|
+
]
|
774
|
+
if low_complexity:
|
775
|
+
recommendations.append(
|
776
|
+
f"Begin with {len(low_complexity)} low-complexity conversions to build confidence"
|
777
|
+
)
|
778
|
+
|
779
|
+
return recommendations
|
780
|
+
|
781
|
+
def _suggest_implementation_order(
|
782
|
+
self, opportunities: List[CyclificationOpportunity]
|
783
|
+
) -> List[Dict[str, Any]]:
|
784
|
+
"""Suggest order for implementing cyclification opportunities."""
|
785
|
+
# Sort by: confidence desc, complexity asc (low=1, medium=2, high=3)
|
786
|
+
complexity_score = {"low": 1, "medium": 2, "high": 3}
|
787
|
+
|
788
|
+
def sort_key(opp):
|
789
|
+
return (
|
790
|
+
-opp.confidence,
|
791
|
+
complexity_score.get(opp.implementation_complexity, 2),
|
792
|
+
)
|
793
|
+
|
794
|
+
sorted_opportunities = sorted(opportunities, key=sort_key)
|
795
|
+
|
796
|
+
implementation_order = []
|
797
|
+
for i, opp in enumerate(sorted_opportunities, 1):
|
798
|
+
implementation_order.append(
|
799
|
+
{
|
800
|
+
"priority": i,
|
801
|
+
"pattern_type": opp.pattern_type,
|
802
|
+
"nodes": opp.nodes,
|
803
|
+
"confidence": opp.confidence,
|
804
|
+
"complexity": opp.implementation_complexity,
|
805
|
+
"justification": f"Priority {i}: {opp.description}",
|
806
|
+
}
|
807
|
+
)
|
808
|
+
|
809
|
+
return implementation_order
|