kailash 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/mcp/server_new.py +6 -6
- kailash/nodes/data/__init__.py +1 -2
- kailash/nodes/data/sql.py +699 -256
- kailash/workflow/cycle_analyzer.py +346 -225
- kailash/workflow/cycle_builder.py +75 -69
- kailash/workflow/cycle_config.py +62 -46
- kailash/workflow/cycle_debugger.py +284 -184
- kailash/workflow/cycle_exceptions.py +111 -97
- kailash/workflow/cycle_profiler.py +272 -202
- kailash/workflow/migration.py +238 -197
- kailash/workflow/templates.py +124 -105
- kailash/workflow/validation.py +356 -298
- {kailash-0.2.0.dist-info → kailash-0.2.1.dist-info}/METADATA +4 -1
- {kailash-0.2.0.dist-info → kailash-0.2.1.dist-info}/RECORD +18 -18
- {kailash-0.2.0.dist-info → kailash-0.2.1.dist-info}/WHEEL +0 -0
- {kailash-0.2.0.dist-info → kailash-0.2.1.dist-info}/entry_points.txt +0 -0
- {kailash-0.2.0.dist-info → kailash-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.2.0.dist-info → kailash-0.2.1.dist-info}/top_level.txt +0 -0
kailash/workflow/migration.py
CHANGED
@@ -55,7 +55,7 @@ Downstream Consumers:
|
|
55
55
|
|
56
56
|
Examples:
|
57
57
|
Analyze workflow for opportunities:
|
58
|
-
|
58
|
+
|
59
59
|
>>> from kailash.workflow.migration import DAGToCycleConverter
|
60
60
|
>>> converter = DAGToCycleConverter(existing_workflow)
|
61
61
|
>>> opportunities = converter.analyze_cyclification_opportunities()
|
@@ -63,9 +63,9 @@ Examples:
|
|
63
63
|
... print(f"Found {opp.pattern_type}: {opp.description}")
|
64
64
|
... print(f"Confidence: {opp.confidence:.2f}")
|
65
65
|
... print(f"Expected benefit: {opp.estimated_benefit}")
|
66
|
-
|
66
|
+
|
67
67
|
Generate detailed migration guidance:
|
68
|
-
|
68
|
+
|
69
69
|
>>> suggestions = converter.generate_detailed_suggestions()
|
70
70
|
>>> for suggestion in suggestions:
|
71
71
|
... print(f"Found {suggestion.opportunity.pattern_type}")
|
@@ -74,9 +74,9 @@ Examples:
|
|
74
74
|
... print(f" {step}")
|
75
75
|
... print(f"Code example: {suggestion.code_example}")
|
76
76
|
... print(f"Expected outcome: {suggestion.expected_outcome}")
|
77
|
-
|
77
|
+
|
78
78
|
Automated conversion:
|
79
|
-
|
79
|
+
|
80
80
|
>>> # Convert specific nodes to cycle
|
81
81
|
>>> cycle_id = converter.convert_to_cycle(
|
82
82
|
... nodes=["processor", "evaluator"],
|
@@ -84,9 +84,9 @@ Examples:
|
|
84
84
|
... max_iterations=50
|
85
85
|
... )
|
86
86
|
>>> print(f"Created cycle: {cycle_id}")
|
87
|
-
|
87
|
+
|
88
88
|
Comprehensive migration report:
|
89
|
-
|
89
|
+
|
90
90
|
>>> report = converter.generate_migration_report()
|
91
91
|
>>> print(f"Total opportunities: {report['summary']['total_opportunities']}")
|
92
92
|
>>> print(f"High confidence: {report['summary']['high_confidence']}")
|
@@ -100,10 +100,10 @@ See Also:
|
|
100
100
|
- :doc:`/guides/migration` for migration best practices
|
101
101
|
"""
|
102
102
|
|
103
|
-
from typing import Dict, Any, List, Optional
|
104
|
-
from dataclasses import dataclass
|
105
|
-
from collections import defaultdict
|
106
103
|
import re
|
104
|
+
from collections import defaultdict
|
105
|
+
from dataclasses import dataclass
|
106
|
+
from typing import Any, Dict, List, Optional
|
107
107
|
|
108
108
|
from . import Workflow
|
109
109
|
from .templates import CycleTemplates
|
@@ -112,6 +112,7 @@ from .templates import CycleTemplates
|
|
112
112
|
@dataclass
|
113
113
|
class CyclificationOpportunity:
|
114
114
|
"""Represents an opportunity to convert a DAG pattern to a cycle."""
|
115
|
+
|
115
116
|
nodes: List[str]
|
116
117
|
pattern_type: str
|
117
118
|
confidence: float
|
@@ -124,6 +125,7 @@ class CyclificationOpportunity:
|
|
124
125
|
@dataclass
|
125
126
|
class CyclificationSuggestion:
|
126
127
|
"""Detailed suggestion for converting nodes to a cycle."""
|
128
|
+
|
127
129
|
opportunity: CyclificationOpportunity
|
128
130
|
implementation_steps: List[str]
|
129
131
|
code_example: str
|
@@ -134,29 +136,29 @@ class CyclificationSuggestion:
|
|
134
136
|
class DAGToCycleConverter:
|
135
137
|
"""
|
136
138
|
Analyzer and converter for transforming DAG workflows into cyclic workflows.
|
137
|
-
|
139
|
+
|
138
140
|
This class helps identify patterns in existing workflows that could benefit
|
139
141
|
from cyclic execution and provides tools to convert them.
|
140
142
|
"""
|
141
|
-
|
143
|
+
|
142
144
|
def __init__(self, workflow: Workflow):
|
143
145
|
"""
|
144
146
|
Initialize converter with target workflow.
|
145
|
-
|
147
|
+
|
146
148
|
Args:
|
147
149
|
workflow: The workflow to analyze and potentially convert
|
148
150
|
"""
|
149
151
|
self.workflow = workflow
|
150
152
|
self.graph = workflow.graph
|
151
153
|
self.opportunities: List[CyclificationOpportunity] = []
|
152
|
-
|
154
|
+
|
153
155
|
def analyze_cyclification_opportunities(self) -> List[CyclificationOpportunity]:
|
154
156
|
"""
|
155
157
|
Analyze workflow for patterns that could benefit from cyclification.
|
156
|
-
|
158
|
+
|
157
159
|
Returns:
|
158
160
|
List of identified cyclification opportunities
|
159
|
-
|
161
|
+
|
160
162
|
Example:
|
161
163
|
>>> workflow = create_example_workflow()
|
162
164
|
>>> converter = DAGToCycleConverter(workflow)
|
@@ -165,39 +167,39 @@ class DAGToCycleConverter:
|
|
165
167
|
... print(f"{opp.pattern_type}: {opp.description}")
|
166
168
|
"""
|
167
169
|
self.opportunities = []
|
168
|
-
|
170
|
+
|
169
171
|
# Analyze different patterns
|
170
172
|
self._detect_retry_patterns()
|
171
173
|
self._detect_iterative_improvement_patterns()
|
172
174
|
self._detect_validation_patterns()
|
173
175
|
self._detect_batch_processing_patterns()
|
174
176
|
self._detect_convergence_patterns()
|
175
|
-
|
177
|
+
|
176
178
|
# Sort by confidence and potential benefit
|
177
179
|
self.opportunities.sort(key=lambda x: x.confidence, reverse=True)
|
178
|
-
|
180
|
+
|
179
181
|
return self.opportunities
|
180
|
-
|
182
|
+
|
181
183
|
def _detect_retry_patterns(self):
|
182
184
|
"""Detect patterns that look like manual retry logic."""
|
183
185
|
nodes = self.workflow.nodes
|
184
|
-
|
186
|
+
|
185
187
|
# Look for nodes with similar names suggesting retry logic
|
186
188
|
retry_patterns = [
|
187
|
-
r
|
188
|
-
r
|
189
|
-
r
|
190
|
-
r
|
191
|
-
r
|
192
|
-
r
|
189
|
+
r".*[_\-]retry[_\-]?.*",
|
190
|
+
r".*[_\-]attempt[_\-]?[0-9]*",
|
191
|
+
r".*[_\-]backup[_\-]?.*",
|
192
|
+
r".*[_\-]fallback[_\-]?.*",
|
193
|
+
r".*[_\-]redundant[_\-]?.*",
|
194
|
+
r".*[_\-]failover[_\-]?.*",
|
193
195
|
]
|
194
|
-
|
196
|
+
|
195
197
|
for node_id, node in nodes.items():
|
196
198
|
for pattern in retry_patterns:
|
197
199
|
if re.match(pattern, node_id, re.IGNORECASE):
|
198
200
|
# Found potential retry pattern
|
199
201
|
related_nodes = self._find_related_nodes(node_id)
|
200
|
-
|
202
|
+
|
201
203
|
opportunity = CyclificationOpportunity(
|
202
204
|
nodes=[node_id] + related_nodes,
|
203
205
|
pattern_type="retry_cycle",
|
@@ -205,28 +207,28 @@ class DAGToCycleConverter:
|
|
205
207
|
description=f"Node '{node_id}' appears to implement retry logic manually",
|
206
208
|
suggested_convergence="success == True",
|
207
209
|
estimated_benefit="improved_reliability",
|
208
|
-
implementation_complexity="low"
|
210
|
+
implementation_complexity="low",
|
209
211
|
)
|
210
212
|
self.opportunities.append(opportunity)
|
211
|
-
|
213
|
+
|
212
214
|
def _detect_iterative_improvement_patterns(self):
|
213
215
|
"""Detect patterns that perform iterative improvement."""
|
214
216
|
nodes = self.workflow.nodes
|
215
|
-
|
217
|
+
|
216
218
|
# Look for processor-evaluator pairs
|
217
|
-
improvement_keywords = [
|
218
|
-
evaluation_keywords = [
|
219
|
-
|
219
|
+
improvement_keywords = ["process", "improve", "optimize", "refine", "enhance"]
|
220
|
+
evaluation_keywords = ["evaluate", "assess", "validate", "check", "score"]
|
221
|
+
|
220
222
|
processors = []
|
221
223
|
evaluators = []
|
222
|
-
|
224
|
+
|
223
225
|
for node_id in nodes:
|
224
226
|
node_id_lower = node_id.lower()
|
225
227
|
if any(keyword in node_id_lower for keyword in improvement_keywords):
|
226
228
|
processors.append(node_id)
|
227
229
|
if any(keyword in node_id_lower for keyword in evaluation_keywords):
|
228
230
|
evaluators.append(node_id)
|
229
|
-
|
231
|
+
|
230
232
|
# Look for processor-evaluator pairs that are connected
|
231
233
|
for processor in processors:
|
232
234
|
for evaluator in evaluators:
|
@@ -238,27 +240,27 @@ class DAGToCycleConverter:
|
|
238
240
|
description=f"'{processor}' and '{evaluator}' form iterative improvement pattern",
|
239
241
|
suggested_convergence="quality > 0.9",
|
240
242
|
estimated_benefit="automatic_convergence",
|
241
|
-
implementation_complexity="medium"
|
243
|
+
implementation_complexity="medium",
|
242
244
|
)
|
243
245
|
self.opportunities.append(opportunity)
|
244
|
-
|
246
|
+
|
245
247
|
def _detect_validation_patterns(self):
|
246
248
|
"""Detect data validation and cleaning patterns."""
|
247
249
|
nodes = self.workflow.nodes
|
248
|
-
|
249
|
-
cleaning_keywords = [
|
250
|
-
validation_keywords = [
|
251
|
-
|
250
|
+
|
251
|
+
cleaning_keywords = ["clean", "sanitize", "normalize", "transform"]
|
252
|
+
validation_keywords = ["validate", "verify", "check", "audit"]
|
253
|
+
|
252
254
|
cleaners = []
|
253
255
|
validators = []
|
254
|
-
|
256
|
+
|
255
257
|
for node_id in nodes:
|
256
258
|
node_id_lower = node_id.lower()
|
257
259
|
if any(keyword in node_id_lower for keyword in cleaning_keywords):
|
258
260
|
cleaners.append(node_id)
|
259
261
|
if any(keyword in node_id_lower for keyword in validation_keywords):
|
260
262
|
validators.append(node_id)
|
261
|
-
|
263
|
+
|
262
264
|
# Look for cleaner-validator pairs
|
263
265
|
for cleaner in cleaners:
|
264
266
|
for validator in validators:
|
@@ -270,16 +272,16 @@ class DAGToCycleConverter:
|
|
270
272
|
description=f"'{cleaner}' and '{validator}' form data quality improvement pattern",
|
271
273
|
suggested_convergence="quality_score >= 0.95",
|
272
274
|
estimated_benefit="improved_data_quality",
|
273
|
-
implementation_complexity="low"
|
275
|
+
implementation_complexity="low",
|
274
276
|
)
|
275
277
|
self.opportunities.append(opportunity)
|
276
|
-
|
278
|
+
|
277
279
|
def _detect_batch_processing_patterns(self):
|
278
280
|
"""Detect patterns that process data in chunks."""
|
279
281
|
nodes = self.workflow.nodes
|
280
|
-
|
281
|
-
batch_keywords = [
|
282
|
-
|
282
|
+
|
283
|
+
batch_keywords = ["batch", "chunk", "segment", "partition", "split"]
|
284
|
+
|
283
285
|
for node_id in nodes:
|
284
286
|
node_id_lower = node_id.lower()
|
285
287
|
if any(keyword in node_id_lower for keyword in batch_keywords):
|
@@ -290,16 +292,22 @@ class DAGToCycleConverter:
|
|
290
292
|
description=f"'{node_id}' appears to process data in batches",
|
291
293
|
suggested_convergence="all_batches_processed == True",
|
292
294
|
estimated_benefit="memory_efficiency",
|
293
|
-
implementation_complexity="medium"
|
295
|
+
implementation_complexity="medium",
|
294
296
|
)
|
295
297
|
self.opportunities.append(opportunity)
|
296
|
-
|
298
|
+
|
297
299
|
def _detect_convergence_patterns(self):
|
298
300
|
"""Detect numerical convergence patterns."""
|
299
301
|
nodes = self.workflow.nodes
|
300
|
-
|
301
|
-
convergence_keywords = [
|
302
|
-
|
302
|
+
|
303
|
+
convergence_keywords = [
|
304
|
+
"converge",
|
305
|
+
"iterate",
|
306
|
+
"approximate",
|
307
|
+
"solve",
|
308
|
+
"calculate",
|
309
|
+
]
|
310
|
+
|
303
311
|
for node_id in nodes:
|
304
312
|
node_id_lower = node_id.lower()
|
305
313
|
if any(keyword in node_id_lower for keyword in convergence_keywords):
|
@@ -310,38 +318,38 @@ class DAGToCycleConverter:
|
|
310
318
|
description=f"'{node_id}' may perform iterative calculations",
|
311
319
|
suggested_convergence="difference < 0.001",
|
312
320
|
estimated_benefit="numerical_stability",
|
313
|
-
implementation_complexity="high"
|
321
|
+
implementation_complexity="high",
|
314
322
|
)
|
315
323
|
self.opportunities.append(opportunity)
|
316
|
-
|
324
|
+
|
317
325
|
def _find_related_nodes(self, node_id: str) -> List[str]:
|
318
326
|
"""Find nodes that are closely related to the given node."""
|
319
327
|
related = []
|
320
|
-
|
328
|
+
|
321
329
|
# Find direct connections from NetworkX graph
|
322
330
|
graph = self.workflow.graph
|
323
|
-
|
331
|
+
|
324
332
|
# Find predecessors and successors
|
325
333
|
if node_id in graph:
|
326
334
|
related.extend(graph.predecessors(node_id))
|
327
335
|
related.extend(graph.successors(node_id))
|
328
|
-
|
336
|
+
|
329
337
|
return list(set(related))
|
330
|
-
|
338
|
+
|
331
339
|
def _are_connected(self, node1: str, node2: str) -> bool:
|
332
340
|
"""Check if two nodes are directly connected."""
|
333
341
|
graph = self.workflow.graph
|
334
|
-
|
342
|
+
|
335
343
|
# Check if there's an edge between the nodes in either direction
|
336
344
|
return graph.has_edge(node1, node2) or graph.has_edge(node2, node1)
|
337
|
-
|
345
|
+
|
338
346
|
def generate_detailed_suggestions(self) -> List[CyclificationSuggestion]:
|
339
347
|
"""
|
340
348
|
Generate detailed suggestions with implementation guidance.
|
341
|
-
|
349
|
+
|
342
350
|
Returns:
|
343
351
|
List of detailed suggestions for cyclification
|
344
|
-
|
352
|
+
|
345
353
|
Example:
|
346
354
|
>>> converter = DAGToCycleConverter(workflow)
|
347
355
|
>>> converter.analyze_cyclification_opportunities()
|
@@ -350,16 +358,18 @@ class DAGToCycleConverter:
|
|
350
358
|
... print(suggestion.code_example)
|
351
359
|
"""
|
352
360
|
suggestions = []
|
353
|
-
|
361
|
+
|
354
362
|
for opportunity in self.opportunities:
|
355
363
|
suggestion = self._create_detailed_suggestion(opportunity)
|
356
364
|
suggestions.append(suggestion)
|
357
|
-
|
365
|
+
|
358
366
|
return suggestions
|
359
|
-
|
360
|
-
def _create_detailed_suggestion(
|
367
|
+
|
368
|
+
def _create_detailed_suggestion(
|
369
|
+
self, opportunity: CyclificationOpportunity
|
370
|
+
) -> CyclificationSuggestion:
|
361
371
|
"""Create detailed implementation suggestion for an opportunity."""
|
362
|
-
|
372
|
+
|
363
373
|
if opportunity.pattern_type == "retry_cycle":
|
364
374
|
return self._create_retry_suggestion(opportunity)
|
365
375
|
elif opportunity.pattern_type == "optimization_cycle":
|
@@ -372,12 +382,14 @@ class DAGToCycleConverter:
|
|
372
382
|
return self._create_convergence_suggestion(opportunity)
|
373
383
|
else:
|
374
384
|
return self._create_generic_suggestion(opportunity)
|
375
|
-
|
376
|
-
def _create_retry_suggestion(
|
385
|
+
|
386
|
+
def _create_retry_suggestion(
|
387
|
+
self, opportunity: CyclificationOpportunity
|
388
|
+
) -> CyclificationSuggestion:
|
377
389
|
"""Create suggestion for retry cycle conversion."""
|
378
390
|
main_node = opportunity.nodes[0]
|
379
|
-
|
380
|
-
code_example = f
|
391
|
+
|
392
|
+
code_example = f"""
|
381
393
|
# Before: Manual retry logic (complex, error-prone)
|
382
394
|
# Multiple nodes handling retries manually
|
383
395
|
|
@@ -390,31 +402,36 @@ cycle_id = workflow.add_retry_cycle(
|
|
390
402
|
)
|
391
403
|
|
392
404
|
print(f"Created retry cycle: {{cycle_id}}")
|
393
|
-
|
394
|
-
|
405
|
+
"""
|
406
|
+
|
395
407
|
implementation_steps = [
|
396
408
|
f"Identify the main node that needs retry logic: '{main_node}'",
|
397
409
|
"Remove manual retry handling from existing nodes",
|
398
410
|
"Apply retry cycle template with appropriate parameters",
|
399
411
|
"Test with failure scenarios to ensure proper retry behavior",
|
400
|
-
"Monitor retry patterns in production"
|
412
|
+
"Monitor retry patterns in production",
|
401
413
|
]
|
402
|
-
|
414
|
+
|
403
415
|
return CyclificationSuggestion(
|
404
416
|
opportunity=opportunity,
|
405
417
|
implementation_steps=implementation_steps,
|
406
418
|
code_example=code_example,
|
407
419
|
expected_outcome="Simplified retry logic with exponential backoff and better error handling",
|
408
|
-
risks=[
|
420
|
+
risks=[
|
421
|
+
"May change timing of operations",
|
422
|
+
"Retry behavior might differ from manual implementation",
|
423
|
+
],
|
409
424
|
)
|
410
|
-
|
411
|
-
def _create_optimization_suggestion(
|
425
|
+
|
426
|
+
def _create_optimization_suggestion(
|
427
|
+
self, opportunity: CyclificationOpportunity
|
428
|
+
) -> CyclificationSuggestion:
|
412
429
|
"""Create suggestion for optimization cycle conversion."""
|
413
430
|
nodes = opportunity.nodes
|
414
431
|
processor = nodes[0] if nodes else "processor"
|
415
432
|
evaluator = nodes[1] if len(nodes) > 1 else "evaluator"
|
416
|
-
|
417
|
-
code_example = f
|
433
|
+
|
434
|
+
code_example = f"""
|
418
435
|
# Before: Manual iterative improvement (fixed iterations, no early stopping)
|
419
436
|
# Complex logic to manage improvement loops
|
420
437
|
|
@@ -427,31 +444,36 @@ cycle_id = workflow.add_optimization_cycle(
|
|
427
444
|
)
|
428
445
|
|
429
446
|
print(f"Created optimization cycle: {{cycle_id}}")
|
430
|
-
|
431
|
-
|
447
|
+
"""
|
448
|
+
|
432
449
|
implementation_steps = [
|
433
450
|
f"Ensure '{processor}' generates/improves solutions",
|
434
451
|
f"Ensure '{evaluator}' produces quality metrics",
|
435
452
|
"Define appropriate convergence criteria",
|
436
453
|
"Apply optimization cycle template",
|
437
|
-
"Fine-tune convergence thresholds based on testing"
|
454
|
+
"Fine-tune convergence thresholds based on testing",
|
438
455
|
]
|
439
|
-
|
456
|
+
|
440
457
|
return CyclificationSuggestion(
|
441
458
|
opportunity=opportunity,
|
442
459
|
implementation_steps=implementation_steps,
|
443
460
|
code_example=code_example,
|
444
461
|
expected_outcome="Automatic convergence detection with early stopping for better performance",
|
445
|
-
risks=[
|
462
|
+
risks=[
|
463
|
+
"Convergence criteria may need tuning",
|
464
|
+
"May require more iterations than fixed approach",
|
465
|
+
],
|
446
466
|
)
|
447
|
-
|
448
|
-
def _create_data_quality_suggestion(
|
467
|
+
|
468
|
+
def _create_data_quality_suggestion(
|
469
|
+
self, opportunity: CyclificationOpportunity
|
470
|
+
) -> CyclificationSuggestion:
|
449
471
|
"""Create suggestion for data quality cycle conversion."""
|
450
472
|
nodes = opportunity.nodes
|
451
473
|
cleaner = nodes[0] if nodes else "cleaner"
|
452
474
|
validator = nodes[1] if len(nodes) > 1 else "validator"
|
453
|
-
|
454
|
-
code_example = f
|
475
|
+
|
476
|
+
code_example = f"""
|
455
477
|
# Before: Single-pass cleaning (may miss quality issues)
|
456
478
|
# Fixed cleaning pipeline without quality feedback
|
457
479
|
|
@@ -464,29 +486,34 @@ cycle_id = workflow.add_data_quality_cycle(
|
|
464
486
|
)
|
465
487
|
|
466
488
|
print(f"Created data quality cycle: {{cycle_id}}")
|
467
|
-
|
468
|
-
|
489
|
+
"""
|
490
|
+
|
469
491
|
implementation_steps = [
|
470
492
|
f"Ensure '{cleaner}' can improve data quality iteratively",
|
471
493
|
f"Ensure '{validator}' produces numeric quality scores",
|
472
494
|
"Define appropriate quality threshold",
|
473
495
|
"Apply data quality cycle template",
|
474
|
-
"Monitor quality improvements over iterations"
|
496
|
+
"Monitor quality improvements over iterations",
|
475
497
|
]
|
476
|
-
|
498
|
+
|
477
499
|
return CyclificationSuggestion(
|
478
500
|
opportunity=opportunity,
|
479
501
|
implementation_steps=implementation_steps,
|
480
502
|
code_example=code_example,
|
481
503
|
expected_outcome="Higher data quality through iterative improvement with automatic stopping",
|
482
|
-
risks=[
|
504
|
+
risks=[
|
505
|
+
"May increase processing time",
|
506
|
+
"Quality metrics need to be meaningful",
|
507
|
+
],
|
483
508
|
)
|
484
|
-
|
485
|
-
def _create_batch_processing_suggestion(
|
509
|
+
|
510
|
+
def _create_batch_processing_suggestion(
|
511
|
+
self, opportunity: CyclificationOpportunity
|
512
|
+
) -> CyclificationSuggestion:
|
486
513
|
"""Create suggestion for batch processing cycle conversion."""
|
487
514
|
node = opportunity.nodes[0] if opportunity.nodes else "processor"
|
488
|
-
|
489
|
-
code_example = f
|
515
|
+
|
516
|
+
code_example = f"""
|
490
517
|
# Before: Manual batch handling (complex state management)
|
491
518
|
# Custom logic for batch iteration and completion
|
492
519
|
|
@@ -498,29 +525,34 @@ cycle_id = workflow.add_batch_processing_cycle(
|
|
498
525
|
)
|
499
526
|
|
500
527
|
print(f"Created batch processing cycle: {{cycle_id}}")
|
501
|
-
|
502
|
-
|
528
|
+
"""
|
529
|
+
|
503
530
|
implementation_steps = [
|
504
531
|
f"Modify '{node}' to process batches instead of full dataset",
|
505
532
|
"Determine appropriate batch size for memory constraints",
|
506
533
|
"Apply batch processing cycle template",
|
507
534
|
"Test with various dataset sizes",
|
508
|
-
"Monitor memory usage and processing time"
|
535
|
+
"Monitor memory usage and processing time",
|
509
536
|
]
|
510
|
-
|
537
|
+
|
511
538
|
return CyclificationSuggestion(
|
512
539
|
opportunity=opportunity,
|
513
540
|
implementation_steps=implementation_steps,
|
514
541
|
code_example=code_example,
|
515
542
|
expected_outcome="Memory-efficient processing of large datasets with automatic batch management",
|
516
|
-
risks=[
|
543
|
+
risks=[
|
544
|
+
"Batch size may need tuning",
|
545
|
+
"May change processing order/behavior",
|
546
|
+
],
|
517
547
|
)
|
518
|
-
|
519
|
-
def _create_convergence_suggestion(
|
548
|
+
|
549
|
+
def _create_convergence_suggestion(
|
550
|
+
self, opportunity: CyclificationOpportunity
|
551
|
+
) -> CyclificationSuggestion:
|
520
552
|
"""Create suggestion for convergence cycle conversion."""
|
521
553
|
node = opportunity.nodes[0] if opportunity.nodes else "processor"
|
522
|
-
|
523
|
-
code_example = f
|
554
|
+
|
555
|
+
code_example = f"""
|
524
556
|
# Before: Fixed iterations (may over/under-compute)
|
525
557
|
# Manual convergence checking
|
526
558
|
|
@@ -532,53 +564,61 @@ cycle_id = workflow.add_convergence_cycle(
|
|
532
564
|
)
|
533
565
|
|
534
566
|
print(f"Created convergence cycle: {{cycle_id}}")
|
535
|
-
|
536
|
-
|
567
|
+
"""
|
568
|
+
|
537
569
|
implementation_steps = [
|
538
570
|
f"Ensure '{node}' produces numeric values for convergence checking",
|
539
571
|
"Determine appropriate tolerance for convergence",
|
540
572
|
"Apply convergence cycle template",
|
541
573
|
"Test with various starting conditions",
|
542
|
-
"Validate convergence behavior"
|
574
|
+
"Validate convergence behavior",
|
543
575
|
]
|
544
|
-
|
576
|
+
|
545
577
|
return CyclificationSuggestion(
|
546
578
|
opportunity=opportunity,
|
547
579
|
implementation_steps=implementation_steps,
|
548
580
|
code_example=code_example,
|
549
581
|
expected_outcome="Automatic convergence detection with optimal iteration count",
|
550
|
-
risks=[
|
582
|
+
risks=[
|
583
|
+
"Tolerance may need adjustment",
|
584
|
+
"Convergence behavior may differ from fixed iterations",
|
585
|
+
],
|
551
586
|
)
|
552
|
-
|
553
|
-
def _create_generic_suggestion(
|
587
|
+
|
588
|
+
def _create_generic_suggestion(
|
589
|
+
self, opportunity: CyclificationOpportunity
|
590
|
+
) -> CyclificationSuggestion:
|
554
591
|
"""Create generic suggestion for unknown pattern types."""
|
555
592
|
return CyclificationSuggestion(
|
556
593
|
opportunity=opportunity,
|
557
|
-
implementation_steps=[
|
594
|
+
implementation_steps=[
|
595
|
+
"Analyze pattern manually",
|
596
|
+
"Choose appropriate cycle template",
|
597
|
+
],
|
558
598
|
code_example="# Manual analysis required",
|
559
599
|
expected_outcome="Pattern-specific benefits",
|
560
|
-
risks=["Requires manual analysis"]
|
600
|
+
risks=["Requires manual analysis"],
|
561
601
|
)
|
562
|
-
|
602
|
+
|
563
603
|
def convert_to_cycle(
|
564
604
|
self,
|
565
605
|
nodes: List[str],
|
566
606
|
convergence_strategy: str = "error_reduction",
|
567
607
|
cycle_type: Optional[str] = None,
|
568
|
-
**kwargs
|
608
|
+
**kwargs,
|
569
609
|
) -> str:
|
570
610
|
"""
|
571
611
|
Convert specific nodes to a cycle using the specified strategy.
|
572
|
-
|
612
|
+
|
573
613
|
Args:
|
574
614
|
nodes: List of node IDs to include in the cycle
|
575
615
|
convergence_strategy: Strategy for convergence ("error_reduction", "quality_improvement", etc.)
|
576
616
|
cycle_type: Specific cycle type to use, or auto-detect if None
|
577
617
|
**kwargs: Additional parameters for cycle creation
|
578
|
-
|
618
|
+
|
579
619
|
Returns:
|
580
620
|
str: The created cycle identifier
|
581
|
-
|
621
|
+
|
582
622
|
Example:
|
583
623
|
>>> converter = DAGToCycleConverter(workflow)
|
584
624
|
>>> cycle_id = converter.convert_to_cycle(
|
@@ -589,7 +629,7 @@ print(f"Created convergence cycle: {{cycle_id}}")
|
|
589
629
|
"""
|
590
630
|
if cycle_type is None:
|
591
631
|
cycle_type = self._detect_cycle_type(nodes, convergence_strategy)
|
592
|
-
|
632
|
+
|
593
633
|
if cycle_type == "optimization":
|
594
634
|
return self._convert_to_optimization_cycle(nodes, **kwargs)
|
595
635
|
elif cycle_type == "retry":
|
@@ -602,7 +642,7 @@ print(f"Created convergence cycle: {{cycle_id}}")
|
|
602
642
|
return self._convert_to_convergence_cycle(nodes, **kwargs)
|
603
643
|
else:
|
604
644
|
raise ValueError(f"Unknown cycle type: {cycle_type}")
|
605
|
-
|
645
|
+
|
606
646
|
def _detect_cycle_type(self, nodes: List[str], strategy: str) -> str:
|
607
647
|
"""Detect the most appropriate cycle type for given nodes and strategy."""
|
608
648
|
if strategy == "error_reduction" or strategy == "quality_improvement":
|
@@ -618,71 +658,57 @@ print(f"Created convergence cycle: {{cycle_id}}")
|
|
618
658
|
else:
|
619
659
|
# Default to optimization for unknown strategies
|
620
660
|
return "optimization"
|
621
|
-
|
661
|
+
|
622
662
|
def _convert_to_optimization_cycle(self, nodes: List[str], **kwargs) -> str:
|
623
663
|
"""Convert nodes to optimization cycle."""
|
624
664
|
if len(nodes) < 2:
|
625
665
|
raise ValueError("Optimization cycle requires at least 2 nodes")
|
626
|
-
|
666
|
+
|
627
667
|
return CycleTemplates.optimization_cycle(
|
628
|
-
self.workflow,
|
629
|
-
processor_node=nodes[0],
|
630
|
-
evaluator_node=nodes[1],
|
631
|
-
**kwargs
|
668
|
+
self.workflow, processor_node=nodes[0], evaluator_node=nodes[1], **kwargs
|
632
669
|
)
|
633
|
-
|
670
|
+
|
634
671
|
def _convert_to_retry_cycle(self, nodes: List[str], **kwargs) -> str:
|
635
672
|
"""Convert nodes to retry cycle."""
|
636
673
|
if len(nodes) < 1:
|
637
674
|
raise ValueError("Retry cycle requires at least 1 node")
|
638
|
-
|
639
|
-
return CycleTemplates.retry_cycle(
|
640
|
-
|
641
|
-
target_node=nodes[0],
|
642
|
-
**kwargs
|
643
|
-
)
|
644
|
-
|
675
|
+
|
676
|
+
return CycleTemplates.retry_cycle(self.workflow, target_node=nodes[0], **kwargs)
|
677
|
+
|
645
678
|
def _convert_to_data_quality_cycle(self, nodes: List[str], **kwargs) -> str:
|
646
679
|
"""Convert nodes to data quality cycle."""
|
647
680
|
if len(nodes) < 2:
|
648
681
|
raise ValueError("Data quality cycle requires at least 2 nodes")
|
649
|
-
|
682
|
+
|
650
683
|
return CycleTemplates.data_quality_cycle(
|
651
|
-
self.workflow,
|
652
|
-
cleaner_node=nodes[0],
|
653
|
-
validator_node=nodes[1],
|
654
|
-
**kwargs
|
684
|
+
self.workflow, cleaner_node=nodes[0], validator_node=nodes[1], **kwargs
|
655
685
|
)
|
656
|
-
|
686
|
+
|
657
687
|
def _convert_to_batch_processing_cycle(self, nodes: List[str], **kwargs) -> str:
|
658
688
|
"""Convert nodes to batch processing cycle."""
|
659
689
|
if len(nodes) < 1:
|
660
690
|
raise ValueError("Batch processing cycle requires at least 1 node")
|
661
|
-
|
691
|
+
|
662
692
|
return CycleTemplates.batch_processing_cycle(
|
663
|
-
self.workflow,
|
664
|
-
processor_node=nodes[0],
|
665
|
-
**kwargs
|
693
|
+
self.workflow, processor_node=nodes[0], **kwargs
|
666
694
|
)
|
667
|
-
|
695
|
+
|
668
696
|
def _convert_to_convergence_cycle(self, nodes: List[str], **kwargs) -> str:
|
669
697
|
"""Convert nodes to convergence cycle."""
|
670
698
|
if len(nodes) < 1:
|
671
699
|
raise ValueError("Convergence cycle requires at least 1 node")
|
672
|
-
|
700
|
+
|
673
701
|
return CycleTemplates.convergence_cycle(
|
674
|
-
self.workflow,
|
675
|
-
processor_node=nodes[0],
|
676
|
-
**kwargs
|
702
|
+
self.workflow, processor_node=nodes[0], **kwargs
|
677
703
|
)
|
678
|
-
|
704
|
+
|
679
705
|
def generate_migration_report(self) -> Dict[str, Any]:
|
680
706
|
"""
|
681
707
|
Generate comprehensive migration report with analysis and recommendations.
|
682
|
-
|
708
|
+
|
683
709
|
Returns:
|
684
710
|
Dict containing migration analysis and recommendations
|
685
|
-
|
711
|
+
|
686
712
|
Example:
|
687
713
|
>>> converter = DAGToCycleConverter(workflow)
|
688
714
|
>>> converter.analyze_cyclification_opportunities()
|
@@ -691,78 +717,93 @@ print(f"Created convergence cycle: {{cycle_id}}")
|
|
691
717
|
"""
|
692
718
|
opportunities = self.analyze_cyclification_opportunities()
|
693
719
|
suggestions = self.generate_detailed_suggestions()
|
694
|
-
|
720
|
+
|
695
721
|
# Categorize by pattern type
|
696
722
|
by_pattern = defaultdict(list)
|
697
723
|
for opp in opportunities:
|
698
724
|
by_pattern[opp.pattern_type].append(opp)
|
699
|
-
|
725
|
+
|
700
726
|
# Calculate potential benefits
|
701
727
|
high_confidence = [opp for opp in opportunities if opp.confidence >= 0.7]
|
702
|
-
medium_confidence = [
|
728
|
+
medium_confidence = [
|
729
|
+
opp for opp in opportunities if 0.4 <= opp.confidence < 0.7
|
730
|
+
]
|
703
731
|
low_confidence = [opp for opp in opportunities if opp.confidence < 0.4]
|
704
|
-
|
732
|
+
|
705
733
|
return {
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
734
|
+
"summary": {
|
735
|
+
"total_opportunities": len(opportunities),
|
736
|
+
"high_confidence": len(high_confidence),
|
737
|
+
"medium_confidence": len(medium_confidence),
|
738
|
+
"low_confidence": len(low_confidence),
|
739
|
+
"pattern_distribution": {k: len(v) for k, v in by_pattern.items()},
|
712
740
|
},
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
741
|
+
"opportunities": opportunities,
|
742
|
+
"detailed_suggestions": suggestions,
|
743
|
+
"recommendations": self._generate_migration_recommendations(opportunities),
|
744
|
+
"implementation_order": self._suggest_implementation_order(opportunities),
|
717
745
|
}
|
718
|
-
|
719
|
-
def _generate_migration_recommendations(
|
746
|
+
|
747
|
+
def _generate_migration_recommendations(
|
748
|
+
self, opportunities: List[CyclificationOpportunity]
|
749
|
+
) -> List[str]:
|
720
750
|
"""Generate high-level recommendations for migration."""
|
721
751
|
recommendations = []
|
722
|
-
|
752
|
+
|
723
753
|
high_confidence = [opp for opp in opportunities if opp.confidence >= 0.7]
|
724
754
|
if high_confidence:
|
725
755
|
recommendations.append(
|
726
756
|
f"Start with {len(high_confidence)} high-confidence opportunities for immediate benefits"
|
727
757
|
)
|
728
|
-
|
758
|
+
|
729
759
|
pattern_counts = defaultdict(int)
|
730
760
|
for opp in opportunities:
|
731
761
|
pattern_counts[opp.pattern_type] += 1
|
732
|
-
|
733
|
-
most_common =
|
762
|
+
|
763
|
+
most_common = (
|
764
|
+
max(pattern_counts.items(), key=lambda x: x[1]) if pattern_counts else None
|
765
|
+
)
|
734
766
|
if most_common:
|
735
767
|
recommendations.append(
|
736
768
|
f"Focus on {most_common[0]} patterns ({most_common[1]} opportunities) for consistency"
|
737
769
|
)
|
738
|
-
|
739
|
-
low_complexity = [
|
770
|
+
|
771
|
+
low_complexity = [
|
772
|
+
opp for opp in opportunities if opp.implementation_complexity == "low"
|
773
|
+
]
|
740
774
|
if low_complexity:
|
741
775
|
recommendations.append(
|
742
776
|
f"Begin with {len(low_complexity)} low-complexity conversions to build confidence"
|
743
777
|
)
|
744
|
-
|
778
|
+
|
745
779
|
return recommendations
|
746
|
-
|
747
|
-
def _suggest_implementation_order(
|
780
|
+
|
781
|
+
def _suggest_implementation_order(
|
782
|
+
self, opportunities: List[CyclificationOpportunity]
|
783
|
+
) -> List[Dict[str, Any]]:
|
748
784
|
"""Suggest order for implementing cyclification opportunities."""
|
749
785
|
# Sort by: confidence desc, complexity asc (low=1, medium=2, high=3)
|
750
786
|
complexity_score = {"low": 1, "medium": 2, "high": 3}
|
751
|
-
|
787
|
+
|
752
788
|
def sort_key(opp):
|
753
|
-
return (
|
754
|
-
|
789
|
+
return (
|
790
|
+
-opp.confidence,
|
791
|
+
complexity_score.get(opp.implementation_complexity, 2),
|
792
|
+
)
|
793
|
+
|
755
794
|
sorted_opportunities = sorted(opportunities, key=sort_key)
|
756
|
-
|
795
|
+
|
757
796
|
implementation_order = []
|
758
797
|
for i, opp in enumerate(sorted_opportunities, 1):
|
759
|
-
implementation_order.append(
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
798
|
+
implementation_order.append(
|
799
|
+
{
|
800
|
+
"priority": i,
|
801
|
+
"pattern_type": opp.pattern_type,
|
802
|
+
"nodes": opp.nodes,
|
803
|
+
"confidence": opp.confidence,
|
804
|
+
"complexity": opp.implementation_complexity,
|
805
|
+
"justification": f"Priority {i}: {opp.description}",
|
806
|
+
}
|
807
|
+
)
|
808
|
+
|
809
|
+
return implementation_order
|