claude-flow-novice 2.0.3 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/cli/commands/guidance.js +487 -668
- package/dist/src/cli/commands/index-validate.js +18 -29
- package/dist/src/cli/commands/mcp-troubleshoot.js +230 -282
- package/dist/src/cli/commands/neural-goal-init.js +92 -125
- package/dist/src/cli/commands/swarm-exec.js +317 -393
- package/dist/src/cli/commands/swarm.js +1 -1
- package/dist/src/cli/commands/validate-framework.js +983 -1100
- package/dist/src/cli/commands/validate.js +144 -223
- package/dist/src/cli/simple-commands/__tests__/agent.test.js +265 -277
- package/dist/src/cli/simple-commands/__tests__/memory.test.js +6 -7
- package/dist/src/cli/simple-commands/__tests__/swarm.test.js +373 -356
- package/dist/src/cli/simple-commands/__tests__/task.test.js +6 -7
- package/dist/src/cli/simple-commands/agent.js +157 -193
- package/dist/src/cli/simple-commands/analysis.js +336 -446
- package/dist/src/cli/simple-commands/automation-executor.js +1095 -1339
- package/dist/src/cli/simple-commands/automation.js +481 -469
- package/dist/src/cli/simple-commands/batch-manager.js +261 -313
- package/dist/src/cli/simple-commands/claude-telemetry.js +241 -267
- package/dist/src/cli/simple-commands/claude-track.js +68 -90
- package/dist/src/cli/simple-commands/concurrent-display.js +266 -320
- package/dist/src/cli/simple-commands/config.js +245 -290
- package/dist/src/cli/simple-commands/coordination.js +182 -234
- package/dist/src/cli/simple-commands/enhanced-ui-views.js +812 -615
- package/dist/src/cli/simple-commands/enhanced-webui-complete.js +922 -981
- package/dist/src/cli/simple-commands/fix-hook-variables.js +274 -294
- package/dist/src/cli/simple-commands/github/gh-coordinator.js +378 -457
- package/dist/src/cli/simple-commands/github/github-api.js +535 -574
- package/dist/src/cli/simple-commands/github/init.js +276 -303
- package/dist/src/cli/simple-commands/github.js +222 -247
- package/dist/src/cli/simple-commands/goal.js +51 -63
- package/dist/src/cli/simple-commands/hive-mind/auto-save-middleware.js +208 -278
- package/dist/src/cli/simple-commands/hive-mind/communication.js +601 -696
- package/dist/src/cli/simple-commands/hive-mind/core.js +907 -979
- package/dist/src/cli/simple-commands/hive-mind/db-optimizer.js +406 -655
- package/dist/src/cli/simple-commands/hive-mind/mcp-wrapper.js +1125 -1245
- package/dist/src/cli/simple-commands/hive-mind/memory.js +854 -1090
- package/dist/src/cli/simple-commands/hive-mind/performance-optimizer.js +459 -574
- package/dist/src/cli/simple-commands/hive-mind/performance-test.js +263 -347
- package/dist/src/cli/simple-commands/hive-mind/queen.js +727 -768
- package/dist/src/cli/simple-commands/hive-mind/session-manager.js +745 -1049
- package/dist/src/cli/simple-commands/hive-mind-optimize.js +227 -283
- package/dist/src/cli/simple-commands/hive-mind-wizard.js +174 -217
- package/dist/src/cli/simple-commands/hive-mind.js +1842 -2283
- package/dist/src/cli/simple-commands/hive.js +90 -79
- package/dist/src/cli/simple-commands/hook-safety.js +431 -521
- package/dist/src/cli/simple-commands/hooks/session-start-soul.js +203 -254
- package/dist/src/cli/simple-commands/hooks.js +1064 -1204
- package/dist/src/cli/simple-commands/init/agent-copier.js +294 -319
- package/dist/src/cli/simple-commands/init/batch-init.js +496 -562
- package/dist/src/cli/simple-commands/init/claude-commands/claude-flow-commands.js +13 -19
- package/dist/src/cli/simple-commands/init/claude-commands/optimized-claude-flow-commands.js +13 -19
- package/dist/src/cli/simple-commands/init/claude-commands/optimized-slash-commands.js +61 -88
- package/dist/src/cli/simple-commands/init/claude-commands/optimized-sparc-commands.js +125 -150
- package/dist/src/cli/simple-commands/init/claude-commands/slash-commands.js +42 -49
- package/dist/src/cli/simple-commands/init/claude-commands/sparc-commands.js +43 -61
- package/dist/src/cli/simple-commands/init/copy-revised-templates.js +141 -147
- package/dist/src/cli/simple-commands/init/executable-wrapper.js +31 -44
- package/dist/src/cli/simple-commands/init/gitignore-updater.js +64 -90
- package/dist/src/cli/simple-commands/init/help.js +104 -107
- package/dist/src/cli/simple-commands/init/hive-mind-init.js +509 -528
- package/dist/src/cli/simple-commands/init/index.js +1510 -1759
- package/dist/src/cli/simple-commands/init/performance-monitor.js +234 -317
- package/dist/src/cli/simple-commands/init/rollback/backup-manager.js +441 -504
- package/dist/src/cli/simple-commands/init/rollback/index.js +289 -364
- package/dist/src/cli/simple-commands/init/rollback/recovery-manager.js +652 -728
- package/dist/src/cli/simple-commands/init/rollback/rollback-executor.js +416 -481
- package/dist/src/cli/simple-commands/init/rollback/state-tracker.js +369 -448
- package/dist/src/cli/simple-commands/init/sparc/roo-readme.js +1 -2
- package/dist/src/cli/simple-commands/init/sparc/roomodes-config.js +122 -99
- package/dist/src/cli/simple-commands/init/sparc/workflows.js +32 -37
- package/dist/src/cli/simple-commands/init/sparc-structure.js +55 -62
- package/dist/src/cli/simple-commands/init/template-copier.js +421 -533
- package/dist/src/cli/simple-commands/init/templates/coordination-md.js +3 -6
- package/dist/src/cli/simple-commands/init/templates/enhanced-templates.js +344 -318
- package/dist/src/cli/simple-commands/init/templates/github-safe-enhanced.js +173 -218
- package/dist/src/cli/simple-commands/init/templates/github-safe.js +65 -75
- package/dist/src/cli/simple-commands/init/templates/memory-bank-md.js +3 -6
- package/dist/src/cli/simple-commands/init/templates/readme-files.js +2 -4
- package/dist/src/cli/simple-commands/init/templates/safe-hook-patterns.js +187 -230
- package/dist/src/cli/simple-commands/init/templates/sparc-modes.js +53 -80
- package/dist/src/cli/simple-commands/init/templates/verification-claude-md.js +101 -85
- package/dist/src/cli/simple-commands/init/validation/config-validator.js +283 -330
- package/dist/src/cli/simple-commands/init/validation/health-checker.js +495 -561
- package/dist/src/cli/simple-commands/init/validation/index.js +302 -358
- package/dist/src/cli/simple-commands/init/validation/mode-validator.js +308 -359
- package/dist/src/cli/simple-commands/init/validation/post-init-validator.js +389 -366
- package/dist/src/cli/simple-commands/init/validation/pre-init-validator.js +270 -268
- package/dist/src/cli/simple-commands/init/validation/test-runner.js +427 -447
- package/dist/src/cli/simple-commands/init.js +1 -2
- package/dist/src/cli/simple-commands/mcp-health.js +131 -158
- package/dist/src/cli/simple-commands/mcp-integration-layer.js +533 -634
- package/dist/src/cli/simple-commands/mcp.js +345 -400
- package/dist/src/cli/simple-commands/memory-consolidation.js +426 -537
- package/dist/src/cli/simple-commands/memory.js +247 -311
- package/dist/src/cli/simple-commands/migrate-hooks.js +39 -46
- package/dist/src/cli/simple-commands/monitor.js +294 -363
- package/dist/src/cli/simple-commands/neural.js +51 -65
- package/dist/src/cli/simple-commands/pair-autofix-only.js +538 -662
- package/dist/src/cli/simple-commands/pair-basic.js +528 -656
- package/dist/src/cli/simple-commands/pair-old.js +430 -543
- package/dist/src/cli/simple-commands/pair-working.js +615 -751
- package/dist/src/cli/simple-commands/pair.js +615 -751
- package/dist/src/cli/simple-commands/performance-hooks.js +83 -111
- package/dist/src/cli/simple-commands/performance-metrics.js +348 -433
- package/dist/src/cli/simple-commands/process-ui-enhanced.js +708 -787
- package/dist/src/cli/simple-commands/process-ui.js +230 -254
- package/dist/src/cli/simple-commands/realtime-update-system.js +525 -611
- package/dist/src/cli/simple-commands/sparc/architecture.js +1704 -1530
- package/dist/src/cli/simple-commands/sparc/commands.js +438 -516
- package/dist/src/cli/simple-commands/sparc/completion.js +1224 -1481
- package/dist/src/cli/simple-commands/sparc/coordinator.js +913 -978
- package/dist/src/cli/simple-commands/sparc/index.js +241 -298
- package/dist/src/cli/simple-commands/sparc/phase-base.js +314 -390
- package/dist/src/cli/simple-commands/sparc/pseudocode.js +965 -869
- package/dist/src/cli/simple-commands/sparc/refinement.js +980 -1273
- package/dist/src/cli/simple-commands/sparc/specification.js +559 -645
- package/dist/src/cli/simple-commands/sparc-modes/architect.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/ask.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/code.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/debug.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/devops.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/docs-writer.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/generic.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/index.js +47 -55
- package/dist/src/cli/simple-commands/sparc-modes/integration.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/mcp.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/monitoring.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/optimization.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/security-review.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/sparc-orchestrator.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/spec-pseudocode.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/supabase-admin.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/swarm.js +101 -87
- package/dist/src/cli/simple-commands/sparc-modes/tdd.js +1 -1
- package/dist/src/cli/simple-commands/sparc-modes/tutorial.js +1 -1
- package/dist/src/cli/simple-commands/sparc.js +465 -493
- package/dist/src/cli/simple-commands/start-ui.js +108 -132
- package/dist/src/cli/simple-commands/start-wrapper.js +240 -268
- package/dist/src/cli/simple-commands/start.js +1 -1
- package/dist/src/cli/simple-commands/status.js +254 -275
- package/dist/src/cli/simple-commands/stream-chain-clean.js +128 -171
- package/dist/src/cli/simple-commands/stream-chain-fixed.js +61 -82
- package/dist/src/cli/simple-commands/stream-chain-real.js +267 -331
- package/dist/src/cli/simple-commands/stream-chain-working.js +211 -263
- package/dist/src/cli/simple-commands/stream-chain.js +260 -318
- package/dist/src/cli/simple-commands/stream-processor.js +290 -315
- package/dist/src/cli/simple-commands/swarm-executor.js +189 -222
- package/dist/src/cli/simple-commands/swarm-metrics-integration.js +208 -300
- package/dist/src/cli/simple-commands/swarm-ui.js +623 -703
- package/dist/src/cli/simple-commands/swarm-webui-integration.js +258 -286
- package/dist/src/cli/simple-commands/swarm.js +887 -1082
- package/dist/src/cli/simple-commands/task.js +161 -206
- package/dist/src/cli/simple-commands/timestamp-fix.js +59 -89
- package/dist/src/cli/simple-commands/token-tracker.js +258 -316
- package/dist/src/cli/simple-commands/tool-execution-framework.js +433 -519
- package/dist/src/cli/simple-commands/train-and-stream.js +275 -331
- package/dist/src/cli/simple-commands/training-pipeline.js +619 -725
- package/dist/src/cli/simple-commands/training.js +170 -227
- package/dist/src/cli/simple-commands/verification-hooks.js +261 -284
- package/dist/src/cli/simple-commands/verification-integration.js +389 -417
- package/dist/src/cli/simple-commands/verification-training-integration.js +486 -606
- package/dist/src/cli/simple-commands/verification.js +493 -513
- package/dist/src/cli/simple-commands/web-server.js +766 -836
- package/dist/src/cli/simple-commands/webui-validator.js +106 -124
- package/dist/src/coordination/event-bus/demo-wasm-integration.js +212 -251
- package/dist/src/coordination/event-bus/qe-event-bus.js +608 -748
- package/dist/src/coordination/event-bus/qe-event-bus.test.js +379 -454
- package/dist/src/coordination/iteration-tracker.js +363 -454
- package/dist/src/enterprise/analytics-manager.js +1135 -0
- package/dist/src/enterprise/audit-manager.js +1115 -0
- package/dist/src/enterprise/cloud-manager.js +891 -0
- package/dist/src/enterprise/deployment-manager.js +966 -0
- package/dist/src/enterprise/index.js +6 -0
- package/dist/src/enterprise/project-manager.js +584 -0
- package/dist/src/enterprise/security-manager.js +991 -0
- package/dist/src/index.js +1 -1
- package/dist/src/mcp/DEPRECATED.js +46 -60
- package/dist/src/mcp/fixes/mcp-error-fixes.js +115 -134
- package/dist/src/mcp/implementations/agent-tracker.js +114 -128
- package/dist/src/mcp/implementations/daa-tools.js +292 -350
- package/dist/src/mcp/implementations/workflow-tools.js +329 -361
- package/dist/src/mcp/mcp-config-manager.js +1183 -1331
- package/dist/src/mcp/mcp-server-novice-simplified.js +11 -17
- package/dist/src/mcp/mcp-server-novice.js +11 -17
- package/dist/src/mcp/mcp-server-sdk.js +11 -17
- package/dist/src/mcp/mcp-server.js +1620 -1484
- package/dist/src/mcp/ruv-swarm-wrapper.js +209 -239
- package/dist/src/memory/advanced-serializer.js +609 -589
- package/dist/src/memory/enhanced-examples.js +220 -305
- package/dist/src/memory/enhanced-memory.js +295 -336
- package/dist/src/memory/enhanced-session-serializer.js +408 -492
- package/dist/src/memory/fallback-memory-system.js +900 -1021
- package/dist/src/memory/fallback-store.js +93 -131
- package/dist/src/memory/high-performance-serialization.js +592 -730
- package/dist/src/memory/in-memory-store.js +161 -213
- package/dist/src/memory/index.js +123 -157
- package/dist/src/memory/lock-free-structures.js +578 -764
- package/dist/src/memory/memory-mapped-persistence.js +585 -766
- package/dist/src/memory/memory-pressure-manager.js +569 -707
- package/dist/src/memory/migration.js +358 -445
- package/dist/src/memory/shared-memory.js +641 -768
- package/dist/src/memory/sqlite-store.js +245 -325
- package/dist/src/memory/sqlite-wrapper.js +122 -151
- package/dist/src/memory/swarm-memory.js +470 -603
- package/dist/src/memory/test-example.js +126 -134
- package/dist/src/memory/ultra-fast-memory-store.js +622 -821
- package/dist/src/memory/unified-memory-manager.js +356 -437
- package/dist/src/migration/index.js +92 -0
- package/dist/src/migration/logger.js +121 -0
- package/dist/src/migration/migration-analyzer.js +268 -0
- package/dist/src/migration/migration-runner.js +522 -0
- package/dist/src/migration/migration-validator.js +285 -0
- package/dist/src/migration/progress-reporter.js +150 -0
- package/dist/src/migration/rollback-manager.js +321 -0
- package/dist/src/migration/tests/migration-system.test.js +7 -0
- package/dist/src/migration/types.js +3 -0
- package/dist/src/swarm/CodeRefactoringSwarm.js +777 -952
- package/dist/src/swarm/__tests__/integration.test.js +227 -0
- package/dist/src/swarm/__tests__/prompt-copier.test.js +344 -0
- package/dist/src/swarm/advanced-orchestrator.js +1095 -0
- package/dist/src/swarm/claude-code-interface.js +961 -0
- package/dist/src/swarm/claude-flow-executor.js +229 -0
- package/dist/src/swarm/consensus-coordinator.js +475 -0
- package/dist/src/swarm/coordinator.js +2993 -0
- package/dist/src/swarm/direct-executor.js +1180 -0
- package/dist/src/swarm/error-recovery/advanced-error-detection.js +691 -0
- package/dist/src/swarm/error-recovery/automated-recovery-workflows.js +998 -0
- package/dist/src/swarm/error-recovery/error-recovery-coordinator.js +1197 -0
- package/dist/src/swarm/error-recovery/recovery-monitoring.js +772 -0
- package/dist/src/swarm/error-recovery/resilience-architecture.js +714 -0
- package/dist/src/swarm/error-recovery/self-healing-mechanisms.js +1319 -0
- package/dist/src/swarm/error-recovery/test-error-recovery-effectiveness.js +808 -0
- package/dist/src/swarm/executor-v2.js +322 -0
- package/dist/src/swarm/executor.js +815 -0
- package/dist/src/swarm/hive-mind-integration.js +703 -0
- package/dist/src/swarm/index.js +41 -0
- package/dist/src/swarm/json-output-aggregator.js +267 -0
- package/dist/src/swarm/large-scale-coordinator.js +542 -0
- package/dist/src/swarm/mcp-integration-wrapper.js +628 -0
- package/dist/src/swarm/memory.js +1117 -0
- package/dist/src/swarm/optimizations/__tests__/optimization.test.js +348 -0
- package/dist/src/swarm/optimizations/async-file-manager.js +285 -0
- package/dist/src/swarm/optimizations/circular-buffer.js +162 -0
- package/dist/src/swarm/optimizations/connection-pool.js +244 -0
- package/dist/src/swarm/optimizations/index.js +28 -0
- package/dist/src/swarm/optimizations/optimized-executor.js +320 -0
- package/dist/src/swarm/optimizations/ttl-map.js +234 -0
- package/dist/src/swarm/prompt-cli.js +200 -0
- package/dist/src/swarm/prompt-copier-enhanced.js +202 -0
- package/dist/src/swarm/prompt-copier.js +381 -0
- package/dist/src/swarm/prompt-manager.js +295 -0
- package/dist/src/swarm/prompt-utils.js +310 -0
- package/dist/src/swarm/result-aggregator.js +718 -0
- package/dist/src/swarm/sparc-executor.js +1568 -0
- package/dist/src/swarm/strategies/auto.js +758 -0
- package/dist/src/swarm/strategies/base.js +128 -0
- package/dist/src/swarm/strategies/research.js +914 -0
- package/dist/src/swarm/strategies/strategy-metrics-patch.js +2 -0
- package/dist/src/swarm/types.js +52 -0
- package/dist/src/swarm/workers/copy-worker.js +56 -0
- package/dist/src/utils/__tests__/github-cli-safety-wrapper.test.js +332 -400
- package/dist/src/utils/github-cli-safe.js +56 -64
- package/dist/src/utils/github-cli-safety-wrapper.js +451 -546
- package/dist/src/utils/npx-isolated-cache.js +104 -119
- package/dist/src/utils/preference-manager.js +622 -652
- package/dist/src/utils/timezone-utils.js +86 -105
- package/dist/src/validators/epic-config-schema.js +214 -0
- package/dist/src/validators/index.js +10 -0
- package/dist/src/validators/swarm-init-validator.js +259 -0
- package/dist/src/validators/todowrite-batching-validator.js +215 -0
- package/dist/src/validators/todowrite-integration.js +187 -0
- package/package.json +2 -2
|
@@ -0,0 +1,998 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Automated Recovery Workflows
|
|
3
|
+
* Multi-strategy recovery approaches with context-aware decision making
|
|
4
|
+
*/ function _define_property(obj, key, value) {
|
|
5
|
+
if (key in obj) {
|
|
6
|
+
Object.defineProperty(obj, key, {
|
|
7
|
+
value: value,
|
|
8
|
+
enumerable: true,
|
|
9
|
+
configurable: true,
|
|
10
|
+
writable: true
|
|
11
|
+
});
|
|
12
|
+
} else {
|
|
13
|
+
obj[key] = value;
|
|
14
|
+
}
|
|
15
|
+
return obj;
|
|
16
|
+
}
|
|
17
|
+
import { EventEmitter } from "node:events";
|
|
18
|
+
import { createClient } from "redis";
|
|
19
|
+
export class AutomatedRecoveryWorkflows extends EventEmitter {
|
|
20
|
+
async start() {
|
|
21
|
+
if (this.isRunning) {
|
|
22
|
+
return;
|
|
23
|
+
}
|
|
24
|
+
try {
|
|
25
|
+
await this.redis.connect();
|
|
26
|
+
await this.loadHistoricalData();
|
|
27
|
+
this.logger.info('Automated recovery workflows started', {
|
|
28
|
+
strategies: this.strategies.size,
|
|
29
|
+
maxConcurrent: this.config.global.maxConcurrentRecoveries
|
|
30
|
+
});
|
|
31
|
+
this.isRunning = true;
|
|
32
|
+
this.startMonitoring();
|
|
33
|
+
this.emit('started');
|
|
34
|
+
} catch (error) {
|
|
35
|
+
this.logger.error('Failed to start automated recovery workflows', {
|
|
36
|
+
error
|
|
37
|
+
});
|
|
38
|
+
throw error;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
async stop() {
|
|
42
|
+
if (!this.isRunning) {
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
this.isRunning = false;
|
|
46
|
+
// Wait for active workflows to complete or timeout
|
|
47
|
+
const timeoutMs = this.config.global.defaultTimeoutMs;
|
|
48
|
+
const startTime = Date.now();
|
|
49
|
+
while(this.activeWorkflows.size > 0 && Date.now() - startTime < timeoutMs){
|
|
50
|
+
await new Promise((resolve)=>setTimeout(resolve, 1000));
|
|
51
|
+
}
|
|
52
|
+
// Force stop any remaining workflows
|
|
53
|
+
for (const workflow of this.activeWorkflows.values()){
|
|
54
|
+
await this.stopWorkflow(workflow.id, 'System shutdown');
|
|
55
|
+
}
|
|
56
|
+
await this.saveHistoricalData();
|
|
57
|
+
await this.redis.disconnect();
|
|
58
|
+
this.emit('stopped');
|
|
59
|
+
this.logger.info('Automated recovery workflows stopped');
|
|
60
|
+
}
|
|
61
|
+
initializeStrategies() {
|
|
62
|
+
// Load configured strategies
|
|
63
|
+
this.config.strategies.forEach((strategy)=>{
|
|
64
|
+
this.strategies.set(strategy.id, strategy);
|
|
65
|
+
});
|
|
66
|
+
// Add default strategies if none provided
|
|
67
|
+
if (this.strategies.size === 0) {
|
|
68
|
+
this.addDefaultStrategies();
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
addDefaultStrategies() {
|
|
72
|
+
const defaultStrategies = [
|
|
73
|
+
{
|
|
74
|
+
id: 'agent-restart',
|
|
75
|
+
name: 'Agent Restart Strategy',
|
|
76
|
+
description: 'Restarts failed or unhealthy agents',
|
|
77
|
+
type: 'standard',
|
|
78
|
+
conditions: {
|
|
79
|
+
errorTypes: [
|
|
80
|
+
'agent_timeout',
|
|
81
|
+
'agent_failure',
|
|
82
|
+
'agent_unresponsive'
|
|
83
|
+
],
|
|
84
|
+
severity: [
|
|
85
|
+
'low',
|
|
86
|
+
'medium'
|
|
87
|
+
],
|
|
88
|
+
categories: [
|
|
89
|
+
'agent'
|
|
90
|
+
]
|
|
91
|
+
},
|
|
92
|
+
actions: [
|
|
93
|
+
{
|
|
94
|
+
id: 'stop-agent',
|
|
95
|
+
type: 'restart',
|
|
96
|
+
target: 'agent',
|
|
97
|
+
parameters: {
|
|
98
|
+
graceful: true
|
|
99
|
+
},
|
|
100
|
+
timeoutMs: 30000,
|
|
101
|
+
rollbackAction: {
|
|
102
|
+
id: 'keep-agent-stopped',
|
|
103
|
+
type: 'custom',
|
|
104
|
+
target: 'agent',
|
|
105
|
+
parameters: {
|
|
106
|
+
action: 'no_op'
|
|
107
|
+
},
|
|
108
|
+
timeoutMs: 1000
|
|
109
|
+
}
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
id: 'verify-agent-health',
|
|
113
|
+
type: 'custom',
|
|
114
|
+
target: 'agent',
|
|
115
|
+
parameters: {
|
|
116
|
+
action: 'health_check'
|
|
117
|
+
},
|
|
118
|
+
timeoutMs: 10000
|
|
119
|
+
}
|
|
120
|
+
],
|
|
121
|
+
priority: 1,
|
|
122
|
+
maxRetries: 3,
|
|
123
|
+
cooldownMs: 60000,
|
|
124
|
+
successThreshold: 0.8
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
id: 'task-retry',
|
|
128
|
+
name: 'Task Retry Strategy',
|
|
129
|
+
description: 'Retries failed tasks with exponential backoff',
|
|
130
|
+
type: 'standard',
|
|
131
|
+
conditions: {
|
|
132
|
+
errorTypes: [
|
|
133
|
+
'task_failure',
|
|
134
|
+
'task_timeout'
|
|
135
|
+
],
|
|
136
|
+
severity: [
|
|
137
|
+
'low',
|
|
138
|
+
'medium'
|
|
139
|
+
],
|
|
140
|
+
categories: [
|
|
141
|
+
'task'
|
|
142
|
+
]
|
|
143
|
+
},
|
|
144
|
+
actions: [
|
|
145
|
+
{
|
|
146
|
+
id: 'retry-task',
|
|
147
|
+
type: 'retry',
|
|
148
|
+
target: 'task',
|
|
149
|
+
parameters: {
|
|
150
|
+
backoffMultiplier: 2,
|
|
151
|
+
maxDelay: 30000,
|
|
152
|
+
jitter: true
|
|
153
|
+
},
|
|
154
|
+
timeoutMs: 60000
|
|
155
|
+
}
|
|
156
|
+
],
|
|
157
|
+
priority: 2,
|
|
158
|
+
maxRetries: 5,
|
|
159
|
+
cooldownMs: 5000,
|
|
160
|
+
successThreshold: 0.7
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
id: 'memory-cleanup',
|
|
164
|
+
name: 'Memory Cleanup Strategy',
|
|
165
|
+
description: 'Performs memory cleanup and optimization',
|
|
166
|
+
type: 'conservative',
|
|
167
|
+
conditions: {
|
|
168
|
+
errorTypes: [
|
|
169
|
+
'memory_exhaustion',
|
|
170
|
+
'memory_leak'
|
|
171
|
+
],
|
|
172
|
+
severity: [
|
|
173
|
+
'medium',
|
|
174
|
+
'high'
|
|
175
|
+
],
|
|
176
|
+
categories: [
|
|
177
|
+
'memory',
|
|
178
|
+
'system'
|
|
179
|
+
]
|
|
180
|
+
},
|
|
181
|
+
actions: [
|
|
182
|
+
{
|
|
183
|
+
id: 'garbage-collect',
|
|
184
|
+
type: 'custom',
|
|
185
|
+
target: 'system',
|
|
186
|
+
parameters: {
|
|
187
|
+
action: 'force_gc'
|
|
188
|
+
},
|
|
189
|
+
timeoutMs: 10000
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
id: 'clear-cache',
|
|
193
|
+
type: 'custom',
|
|
194
|
+
target: 'system',
|
|
195
|
+
parameters: {
|
|
196
|
+
action: 'clear_caches'
|
|
197
|
+
},
|
|
198
|
+
timeoutMs: 5000
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
id: 'restart-memory-intensive-agents',
|
|
202
|
+
type: 'restart',
|
|
203
|
+
target: 'memory_intensive_agents',
|
|
204
|
+
parameters: {
|
|
205
|
+
graceful: false
|
|
206
|
+
},
|
|
207
|
+
timeoutMs: 30000
|
|
208
|
+
}
|
|
209
|
+
],
|
|
210
|
+
priority: 3,
|
|
211
|
+
maxRetries: 2,
|
|
212
|
+
cooldownMs: 120000,
|
|
213
|
+
successThreshold: 0.9
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
id: 'circuit-breaker',
|
|
217
|
+
name: 'Circuit Breaker Strategy',
|
|
218
|
+
description: 'Activates circuit breaker to prevent cascading failures',
|
|
219
|
+
type: 'aggressive',
|
|
220
|
+
conditions: {
|
|
221
|
+
errorTypes: [
|
|
222
|
+
'cascading_failure',
|
|
223
|
+
'network_partition'
|
|
224
|
+
],
|
|
225
|
+
severity: [
|
|
226
|
+
'high',
|
|
227
|
+
'critical'
|
|
228
|
+
],
|
|
229
|
+
categories: [
|
|
230
|
+
'network',
|
|
231
|
+
'system'
|
|
232
|
+
]
|
|
233
|
+
},
|
|
234
|
+
actions: [
|
|
235
|
+
{
|
|
236
|
+
id: 'activate-circuit-breaker',
|
|
237
|
+
type: 'failover',
|
|
238
|
+
target: 'system',
|
|
239
|
+
parameters: {
|
|
240
|
+
mode: 'open',
|
|
241
|
+
timeout: 60000,
|
|
242
|
+
halfOpenRetries: 3
|
|
243
|
+
},
|
|
244
|
+
timeoutMs: 5000
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
id: 'enable-fallback-mode',
|
|
248
|
+
type: 'failover',
|
|
249
|
+
target: 'system',
|
|
250
|
+
parameters: {
|
|
251
|
+
mode: 'degraded'
|
|
252
|
+
},
|
|
253
|
+
timeoutMs: 10000
|
|
254
|
+
}
|
|
255
|
+
],
|
|
256
|
+
priority: 4,
|
|
257
|
+
maxRetries: 1,
|
|
258
|
+
cooldownMs: 300000,
|
|
259
|
+
successThreshold: 0.95
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
id: 'emergency-shutdown',
|
|
263
|
+
name: 'Emergency Shutdown Strategy',
|
|
264
|
+
description: 'Performs emergency shutdown to prevent data loss',
|
|
265
|
+
type: 'emergency',
|
|
266
|
+
conditions: {
|
|
267
|
+
errorTypes: [
|
|
268
|
+
'critical_system_failure',
|
|
269
|
+
'data_corruption'
|
|
270
|
+
],
|
|
271
|
+
severity: [
|
|
272
|
+
'critical'
|
|
273
|
+
],
|
|
274
|
+
categories: [
|
|
275
|
+
'system'
|
|
276
|
+
]
|
|
277
|
+
},
|
|
278
|
+
actions: [
|
|
279
|
+
{
|
|
280
|
+
id: 'graceful-shutdown',
|
|
281
|
+
type: 'custom',
|
|
282
|
+
target: 'system',
|
|
283
|
+
parameters: {
|
|
284
|
+
saveState: true,
|
|
285
|
+
timeout: 30000,
|
|
286
|
+
forceAfterTimeout: true
|
|
287
|
+
},
|
|
288
|
+
timeoutMs: 45000
|
|
289
|
+
},
|
|
290
|
+
{
|
|
291
|
+
id: 'activate-emergency-procedures',
|
|
292
|
+
type: 'custom',
|
|
293
|
+
target: 'system',
|
|
294
|
+
parameters: {
|
|
295
|
+
notifyAdmin: true,
|
|
296
|
+
createBackup: true,
|
|
297
|
+
lockSystem: true
|
|
298
|
+
},
|
|
299
|
+
timeoutMs: 60000
|
|
300
|
+
}
|
|
301
|
+
],
|
|
302
|
+
priority: 5,
|
|
303
|
+
maxRetries: 0,
|
|
304
|
+
cooldownMs: 0,
|
|
305
|
+
successThreshold: 1.0
|
|
306
|
+
}
|
|
307
|
+
];
|
|
308
|
+
defaultStrategies.forEach((strategy)=>{
|
|
309
|
+
this.strategies.set(strategy.id, strategy);
|
|
310
|
+
});
|
|
311
|
+
}
|
|
312
|
+
async handleError(error) {
|
|
313
|
+
const context = await this.buildRecoveryContext(error);
|
|
314
|
+
const strategy = await this.selectBestStrategy(context);
|
|
315
|
+
if (!strategy) {
|
|
316
|
+
this.logger.warn('No recovery strategy found for error', {
|
|
317
|
+
errorId: error.id,
|
|
318
|
+
type: error.type
|
|
319
|
+
});
|
|
320
|
+
throw new Error('No suitable recovery strategy available');
|
|
321
|
+
}
|
|
322
|
+
const workflow = await this.createRecoveryWorkflow(strategy, context);
|
|
323
|
+
await this.executeWorkflow(workflow);
|
|
324
|
+
return workflow.id;
|
|
325
|
+
}
|
|
326
|
+
async buildRecoveryContext(error) {
|
|
327
|
+
const previousAttempts = await this.getPreviousAttempts(error);
|
|
328
|
+
const systemLoad = await this.getSystemLoad();
|
|
329
|
+
const swarmState = await this.getSwarmState();
|
|
330
|
+
return {
|
|
331
|
+
errorId: error.id,
|
|
332
|
+
errorType: error.type,
|
|
333
|
+
severity: error.severity,
|
|
334
|
+
category: error.category,
|
|
335
|
+
source: error.source,
|
|
336
|
+
affectedComponents: this.extractAffectedComponents(error),
|
|
337
|
+
swarmState,
|
|
338
|
+
previousAttempts,
|
|
339
|
+
systemLoad,
|
|
340
|
+
timeConstraints: await this.getTimeConstraints()
|
|
341
|
+
};
|
|
342
|
+
}
|
|
343
|
+
async selectBestStrategy(context) {
|
|
344
|
+
const candidates = [];
|
|
345
|
+
for (const strategy of this.strategies.values()){
|
|
346
|
+
const score = await this.scoreStrategy(strategy, context);
|
|
347
|
+
if (score > 0) {
|
|
348
|
+
candidates.push({
|
|
349
|
+
strategy,
|
|
350
|
+
score
|
|
351
|
+
});
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
if (candidates.length === 0) {
|
|
355
|
+
return null;
|
|
356
|
+
}
|
|
357
|
+
// Sort by score and apply safety checks
|
|
358
|
+
candidates.sort((a, b)=>b.score - a.score);
|
|
359
|
+
for (const { strategy } of candidates){
|
|
360
|
+
if (await this.safetyMonitor.isSafeToExecute(strategy, context)) {
|
|
361
|
+
return strategy;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
return null; // No strategy is safe to execute
|
|
365
|
+
}
|
|
366
|
+
async scoreStrategy(strategy, context) {
|
|
367
|
+
let score = 0;
|
|
368
|
+
// Check basic conditions
|
|
369
|
+
if (!this.matchesConditions(strategy, context)) {
|
|
370
|
+
return 0;
|
|
371
|
+
}
|
|
372
|
+
// Base score from conditions match
|
|
373
|
+
score += 50;
|
|
374
|
+
// Adjust for severity appropriateness
|
|
375
|
+
const severityScore = this.getSeverityScore(strategy.type, context.severity);
|
|
376
|
+
score += severityScore;
|
|
377
|
+
// Adjust for previous success rate
|
|
378
|
+
const successRate = await this.learningEngine.getStrategySuccessRate(strategy.id);
|
|
379
|
+
score += successRate * 20;
|
|
380
|
+
// Adjust for system load
|
|
381
|
+
const loadScore = this.getLoadScore(strategy, context.systemLoad);
|
|
382
|
+
score += loadScore;
|
|
383
|
+
// Adjust for timing constraints
|
|
384
|
+
const timingScore = this.getTimingScore(strategy, context.timeConstraints);
|
|
385
|
+
score += timingScore;
|
|
386
|
+
// Apply confidence factors
|
|
387
|
+
score *= this.getConfidenceFactor(strategy, context);
|
|
388
|
+
return Math.min(score, 100);
|
|
389
|
+
}
|
|
390
|
+
matchesConditions(strategy, context) {
|
|
391
|
+
const { conditions } = strategy;
|
|
392
|
+
// Check error types
|
|
393
|
+
if (conditions.errorTypes.length > 0 && !conditions.errorTypes.includes(context.errorType)) {
|
|
394
|
+
return false;
|
|
395
|
+
}
|
|
396
|
+
// Check severity
|
|
397
|
+
if (conditions.severity.length > 0 && !conditions.severity.includes(context.severity)) {
|
|
398
|
+
return false;
|
|
399
|
+
}
|
|
400
|
+
// Check categories
|
|
401
|
+
if (conditions.categories.length > 0 && !conditions.categories.includes(context.category)) {
|
|
402
|
+
return false;
|
|
403
|
+
}
|
|
404
|
+
return true;
|
|
405
|
+
}
|
|
406
|
+
getSeverityScore(strategyType, severity) {
|
|
407
|
+
const mapping = {
|
|
408
|
+
conservative: {
|
|
409
|
+
low: 20,
|
|
410
|
+
medium: 15,
|
|
411
|
+
high: 5,
|
|
412
|
+
critical: 0
|
|
413
|
+
},
|
|
414
|
+
standard: {
|
|
415
|
+
low: 15,
|
|
416
|
+
medium: 20,
|
|
417
|
+
high: 15,
|
|
418
|
+
critical: 5
|
|
419
|
+
},
|
|
420
|
+
aggressive: {
|
|
421
|
+
low: 5,
|
|
422
|
+
medium: 15,
|
|
423
|
+
high: 20,
|
|
424
|
+
critical: 15
|
|
425
|
+
},
|
|
426
|
+
emergency: {
|
|
427
|
+
low: 0,
|
|
428
|
+
medium: 5,
|
|
429
|
+
high: 15,
|
|
430
|
+
critical: 25
|
|
431
|
+
}
|
|
432
|
+
};
|
|
433
|
+
return mapping[strategyType]?.[severity] || 0;
|
|
434
|
+
}
|
|
435
|
+
getLoadScore(strategy, systemLoad) {
|
|
436
|
+
// Heavy strategies get lower scores under high load
|
|
437
|
+
if (strategy.type === 'aggressive' || strategy.type === 'emergency') {
|
|
438
|
+
if (systemLoad.cpu > 80 || systemLoad.memory > 80) {
|
|
439
|
+
return -10;
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
// Conservative strategies preferred under high load
|
|
443
|
+
if (strategy.type === 'conservative' && (systemLoad.cpu > 70 || systemLoad.memory > 70)) {
|
|
444
|
+
return 10;
|
|
445
|
+
}
|
|
446
|
+
return 0;
|
|
447
|
+
}
|
|
448
|
+
getTimingScore(strategy, timeConstraints) {
|
|
449
|
+
// Emergency strategies get higher score during critical times
|
|
450
|
+
if (strategy.type === 'emergency' && timeConstraints.maxDowntime < 60000) {
|
|
451
|
+
return 15;
|
|
452
|
+
}
|
|
453
|
+
// Conservative strategies preferred during business hours
|
|
454
|
+
if (strategy.type === 'conservative' && timeConstraints.businessHours) {
|
|
455
|
+
return 10;
|
|
456
|
+
}
|
|
457
|
+
return 0;
|
|
458
|
+
}
|
|
459
|
+
getConfidenceFactor(strategy, context) {
|
|
460
|
+
let factor = 1.0;
|
|
461
|
+
// Reduce confidence for previous failed attempts
|
|
462
|
+
const recentFailures = context.previousAttempts.filter((attempt)=>attempt.strategyId === strategy.id && Date.now() - attempt.timestamp.getTime() < 300000);
|
|
463
|
+
factor *= Math.pow(0.5, recentFailures.length);
|
|
464
|
+
// Adjust for cooldown period
|
|
465
|
+
const lastAttempt = context.previousAttempts.filter((attempt)=>attempt.strategyId === strategy.id).sort((a, b)=>b.timestamp.getTime() - a.timestamp.getTime())[0];
|
|
466
|
+
if (lastAttempt && Date.now() - lastAttempt.timestamp.getTime() < strategy.cooldownMs) {
|
|
467
|
+
factor *= 0.3;
|
|
468
|
+
}
|
|
469
|
+
return Math.max(factor, 0.1);
|
|
470
|
+
}
|
|
471
|
+
async createRecoveryWorkflow(strategy, context) {
|
|
472
|
+
const workflow = {
|
|
473
|
+
id: `workflow-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
|
474
|
+
name: strategy.name,
|
|
475
|
+
description: strategy.description,
|
|
476
|
+
strategy,
|
|
477
|
+
context,
|
|
478
|
+
attempts: [],
|
|
479
|
+
status: 'created',
|
|
480
|
+
createdAt: new Date(),
|
|
481
|
+
updatedAt: new Date(),
|
|
482
|
+
estimatedDuration: this.estimateWorkflowDuration(strategy),
|
|
483
|
+
success: false
|
|
484
|
+
};
|
|
485
|
+
this.activeWorkflows.set(workflow.id, workflow);
|
|
486
|
+
// Store in Redis
|
|
487
|
+
await this.redis.setEx(`swarm:error-recovery-final:workflows:${workflow.id}`, 3600, JSON.stringify(workflow));
|
|
488
|
+
this.emit('workflowCreated', workflow);
|
|
489
|
+
return workflow;
|
|
490
|
+
}
|
|
491
|
+
async executeWorkflow(workflow) {
|
|
492
|
+
workflow.status = 'running';
|
|
493
|
+
workflow.updatedAt = new Date();
|
|
494
|
+
this.logger.info('Executing recovery workflow', {
|
|
495
|
+
workflowId: workflow.id,
|
|
496
|
+
strategy: workflow.strategy.id,
|
|
497
|
+
errorId: workflow.context.errorId
|
|
498
|
+
});
|
|
499
|
+
this.emit('workflowStarted', workflow);
|
|
500
|
+
try {
|
|
501
|
+
const attempt = await this.createAttempt(workflow);
|
|
502
|
+
const success = await this.executeAttempt(workflow, attempt);
|
|
503
|
+
workflow.attempts.push(attempt);
|
|
504
|
+
workflow.success = success;
|
|
505
|
+
workflow.status = success ? 'completed' : 'failed';
|
|
506
|
+
workflow.actualDuration = Date.now() - workflow.createdAt.getTime();
|
|
507
|
+
workflow.updatedAt = new Date();
|
|
508
|
+
// Update in Redis
|
|
509
|
+
await this.redis.setEx(`swarm:error-recovery-final:workflows:${workflow.id}`, 3600, JSON.stringify(workflow));
|
|
510
|
+
// Publish result
|
|
511
|
+
await this.redis.publish('swarm:error-recovery-final', JSON.stringify({
|
|
512
|
+
type: success ? 'RECOVERY_SUCCESS' : 'RECOVERY_FAILED',
|
|
513
|
+
workflowId: workflow.id,
|
|
514
|
+
timestamp: new Date().toISOString()
|
|
515
|
+
}));
|
|
516
|
+
this.emit(success ? 'workflowCompleted' : 'workflowFailed', workflow);
|
|
517
|
+
// Move to completed workflows
|
|
518
|
+
this.activeWorkflows.delete(workflow.id);
|
|
519
|
+
this.completedWorkflows.push(workflow);
|
|
520
|
+
// Learn from the result
|
|
521
|
+
await this.learningEngine.recordWorkflowResult(workflow);
|
|
522
|
+
} catch (error) {
|
|
523
|
+
workflow.status = 'failed';
|
|
524
|
+
workflow.updatedAt = new Date();
|
|
525
|
+
this.logger.error('Workflow execution failed', {
|
|
526
|
+
workflowId: workflow.id,
|
|
527
|
+
error: error instanceof Error ? error.message : String(error)
|
|
528
|
+
});
|
|
529
|
+
this.emit('workflowFailed', workflow);
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
async createAttempt(workflow) {
|
|
533
|
+
const attempt = {
|
|
534
|
+
id: `attempt-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
|
535
|
+
strategyId: workflow.strategy.id,
|
|
536
|
+
timestamp: new Date(),
|
|
537
|
+
actions: [
|
|
538
|
+
...workflow.strategy.actions
|
|
539
|
+
],
|
|
540
|
+
status: 'pending',
|
|
541
|
+
duration: 0,
|
|
542
|
+
confidence: await this.calculateAttemptConfidence(workflow)
|
|
543
|
+
};
|
|
544
|
+
return attempt;
|
|
545
|
+
}
|
|
546
|
+
async executeAttempt(workflow, attempt) {
|
|
547
|
+
attempt.status = 'running';
|
|
548
|
+
const startTime = Date.now();
|
|
549
|
+
this.logger.info('Executing recovery attempt', {
|
|
550
|
+
workflowId: workflow.id,
|
|
551
|
+
attemptId: attempt.id,
|
|
552
|
+
actions: attempt.actions.length
|
|
553
|
+
});
|
|
554
|
+
try {
|
|
555
|
+
for (const action of attempt.actions){
|
|
556
|
+
const actionSuccess = await this.executeAction(workflow, action);
|
|
557
|
+
if (!actionSuccess) {
|
|
558
|
+
await this.rollbackAction(workflow, action);
|
|
559
|
+
attempt.status = 'failed';
|
|
560
|
+
attempt.error = `Action ${action.id} failed`;
|
|
561
|
+
return false;
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
attempt.status = 'completed';
|
|
565
|
+
attempt.duration = Date.now() - startTime;
|
|
566
|
+
// Verify recovery success
|
|
567
|
+
const verificationResult = await this.verifyRecovery(workflow);
|
|
568
|
+
attempt.result = verificationResult;
|
|
569
|
+
return verificationResult.success;
|
|
570
|
+
} catch (error) {
|
|
571
|
+
attempt.status = 'failed';
|
|
572
|
+
attempt.error = error instanceof Error ? error.message : String(error);
|
|
573
|
+
attempt.duration = Date.now() - startTime;
|
|
574
|
+
return false;
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
async executeAction(workflow, action) {
|
|
578
|
+
this.logger.info('Executing recovery action', {
|
|
579
|
+
workflowId: workflow.id,
|
|
580
|
+
actionId: action.id,
|
|
581
|
+
type: action.type,
|
|
582
|
+
target: action.target
|
|
583
|
+
});
|
|
584
|
+
try {
|
|
585
|
+
switch(action.type){
|
|
586
|
+
case 'restart':
|
|
587
|
+
return await this.executeRestartAction(workflow, action);
|
|
588
|
+
case 'scale':
|
|
589
|
+
return await this.executeScaleAction(workflow, action);
|
|
590
|
+
case 'rollback':
|
|
591
|
+
return await this.executeRollbackAction(workflow, action);
|
|
592
|
+
case 'retry':
|
|
593
|
+
return await this.executeRetryAction(workflow, action);
|
|
594
|
+
case 'failover':
|
|
595
|
+
return await this.executeFailoverAction(workflow, action);
|
|
596
|
+
case 'mitigate':
|
|
597
|
+
return await this.executeMitigateAction(workflow, action);
|
|
598
|
+
case 'custom':
|
|
599
|
+
return await this.executeCustomAction(workflow, action);
|
|
600
|
+
default:
|
|
601
|
+
this.logger.warn('Unknown action type', {
|
|
602
|
+
type: action.type
|
|
603
|
+
});
|
|
604
|
+
return false;
|
|
605
|
+
}
|
|
606
|
+
} catch (error) {
|
|
607
|
+
this.logger.error('Action execution failed', {
|
|
608
|
+
workflowId: workflow.id,
|
|
609
|
+
actionId: action.id,
|
|
610
|
+
error: error instanceof Error ? error.message : String(error)
|
|
611
|
+
});
|
|
612
|
+
return false;
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
async executeRestartAction(workflow, action) {
|
|
616
|
+
const { target, parameters } = action;
|
|
617
|
+
if (target === 'agent') {
|
|
618
|
+
return await this.restartAgent(parameters.agentId, parameters.graceful);
|
|
619
|
+
} else if (target === 'service') {
|
|
620
|
+
return await this.restartService(parameters.serviceId, parameters.graceful);
|
|
621
|
+
}
|
|
622
|
+
return false;
|
|
623
|
+
}
|
|
624
|
+
async executeScaleAction(workflow, action) {
|
|
625
|
+
const { target, parameters } = action;
|
|
626
|
+
if (target === 'agents') {
|
|
627
|
+
return await this.scaleAgents(parameters.direction, parameters.count);
|
|
628
|
+
} else if (target === 'resources') {
|
|
629
|
+
return await this.scaleResources(parameters.resource, parameters.amount);
|
|
630
|
+
}
|
|
631
|
+
return false;
|
|
632
|
+
}
|
|
633
|
+
async executeRollbackAction(workflow, action) {
|
|
634
|
+
const { target, parameters } = action;
|
|
635
|
+
// Implementation would rollback changes
|
|
636
|
+
this.logger.info('Rolling back changes', {
|
|
637
|
+
target,
|
|
638
|
+
parameters
|
|
639
|
+
});
|
|
640
|
+
return true;
|
|
641
|
+
}
|
|
642
|
+
async executeRetryAction(workflow, action) {
|
|
643
|
+
const { target, parameters } = action;
|
|
644
|
+
if (target === 'task') {
|
|
645
|
+
return await this.retryTask(parameters.taskId, parameters);
|
|
646
|
+
}
|
|
647
|
+
return false;
|
|
648
|
+
}
|
|
649
|
+
async executeFailoverAction(workflow, action) {
|
|
650
|
+
const { target, parameters } = action;
|
|
651
|
+
if (target === 'system') {
|
|
652
|
+
return await this.activateFailover(parameters.mode);
|
|
653
|
+
}
|
|
654
|
+
return false;
|
|
655
|
+
}
|
|
656
|
+
async executeMitigateAction(workflow, action) {
|
|
657
|
+
const { target, parameters } = action;
|
|
658
|
+
// Implementation would apply mitigation measures
|
|
659
|
+
this.logger.info('Applying mitigation', {
|
|
660
|
+
target,
|
|
661
|
+
parameters
|
|
662
|
+
});
|
|
663
|
+
return true;
|
|
664
|
+
}
|
|
665
|
+
async executeCustomAction(workflow, action) {
|
|
666
|
+
const { target, parameters } = action;
|
|
667
|
+
// Implementation would execute custom action
|
|
668
|
+
this.logger.info('Executing custom action', {
|
|
669
|
+
target,
|
|
670
|
+
parameters
|
|
671
|
+
});
|
|
672
|
+
return true;
|
|
673
|
+
}
|
|
674
|
+
async rollbackAction(workflow, action) {
|
|
675
|
+
if (action.rollbackAction) {
|
|
676
|
+
this.logger.info('Rolling back action', {
|
|
677
|
+
workflowId: workflow.id,
|
|
678
|
+
actionId: action.id
|
|
679
|
+
});
|
|
680
|
+
await this.executeAction(workflow, action.rollbackAction);
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
async verifyRecovery(workflow) {
|
|
684
|
+
// Check if the original error is resolved
|
|
685
|
+
const errorResolved = await this.checkErrorResolution(workflow.context.errorId);
|
|
686
|
+
// Check system health
|
|
687
|
+
const systemHealth = await this.checkSystemHealth();
|
|
688
|
+
// Check affected components
|
|
689
|
+
const componentHealth = await this.checkComponentHealth(workflow.context.affectedComponents);
|
|
690
|
+
const success = errorResolved && systemHealth.overall > 0.8 && componentHealth.every((c)=>c.health > 0.7);
|
|
691
|
+
return {
|
|
692
|
+
success,
|
|
693
|
+
details: {
|
|
694
|
+
errorResolved,
|
|
695
|
+
systemHealth,
|
|
696
|
+
componentHealth
|
|
697
|
+
}
|
|
698
|
+
};
|
|
699
|
+
}
|
|
700
|
+
// Action implementations (simplified)
|
|
701
|
+
async restartAgent(agentId, graceful) {
|
|
702
|
+
this.logger.info('Restarting agent', {
|
|
703
|
+
agentId,
|
|
704
|
+
graceful
|
|
705
|
+
});
|
|
706
|
+
// Implementation would restart the agent
|
|
707
|
+
return true;
|
|
708
|
+
}
|
|
709
|
+
async restartService(serviceId, graceful) {
|
|
710
|
+
this.logger.info('Restarting service', {
|
|
711
|
+
serviceId,
|
|
712
|
+
graceful
|
|
713
|
+
});
|
|
714
|
+
// Implementation would restart the service
|
|
715
|
+
return true;
|
|
716
|
+
}
|
|
717
|
+
async scaleAgents(direction, count) {
|
|
718
|
+
this.logger.info('Scaling agents', {
|
|
719
|
+
direction,
|
|
720
|
+
count
|
|
721
|
+
});
|
|
722
|
+
// Implementation would scale agents
|
|
723
|
+
return true;
|
|
724
|
+
}
|
|
725
|
+
async scaleResources(resource, amount) {
|
|
726
|
+
this.logger.info('Scaling resources', {
|
|
727
|
+
resource,
|
|
728
|
+
amount
|
|
729
|
+
});
|
|
730
|
+
// Implementation would scale resources
|
|
731
|
+
return true;
|
|
732
|
+
}
|
|
733
|
+
async retryTask(taskId, parameters) {
|
|
734
|
+
this.logger.info('Retrying task', {
|
|
735
|
+
taskId,
|
|
736
|
+
parameters
|
|
737
|
+
});
|
|
738
|
+
// Implementation would retry the task
|
|
739
|
+
return true;
|
|
740
|
+
}
|
|
741
|
+
async activateFailover(mode) {
|
|
742
|
+
this.logger.info('Activating failover', {
|
|
743
|
+
mode
|
|
744
|
+
});
|
|
745
|
+
// Implementation would activate failover mode
|
|
746
|
+
return true;
|
|
747
|
+
}
|
|
748
|
+
// Helper methods
|
|
749
|
+
async calculateAttemptConfidence(workflow) {
|
|
750
|
+
const strategySuccessRate = await this.learningEngine.getStrategySuccessRate(workflow.strategy.id);
|
|
751
|
+
const contextConfidence = this.getContextConfidence(workflow.context);
|
|
752
|
+
return (strategySuccessRate + contextConfidence) / 2;
|
|
753
|
+
}
|
|
754
|
+
getContextConfidence(context) {
|
|
755
|
+
let confidence = 0.5;
|
|
756
|
+
// Adjust based on system load
|
|
757
|
+
if (context.systemLoad.cpu < 50 && context.systemLoad.memory < 50) {
|
|
758
|
+
confidence += 0.2;
|
|
759
|
+
} else if (context.systemLoad.cpu > 80 || context.systemLoad.memory > 80) {
|
|
760
|
+
confidence -= 0.2;
|
|
761
|
+
}
|
|
762
|
+
// Adjust based on previous attempts
|
|
763
|
+
const recentFailures = context.previousAttempts.filter((attempt)=>Date.now() - attempt.timestamp.getTime() < 300000);
|
|
764
|
+
confidence -= recentFailures.length * 0.1;
|
|
765
|
+
return Math.max(Math.min(confidence, 1.0), 0.1);
|
|
766
|
+
}
|
|
767
|
+
estimateWorkflowDuration(strategy) {
|
|
768
|
+
return strategy.actions.reduce((total, action)=>total + action.timeoutMs, 0);
|
|
769
|
+
}
|
|
770
|
+
extractAffectedComponents(error) {
|
|
771
|
+
if ('affectedComponents' in error) {
|
|
772
|
+
return error.affectedComponents;
|
|
773
|
+
}
|
|
774
|
+
return [
|
|
775
|
+
error.source
|
|
776
|
+
];
|
|
777
|
+
}
|
|
778
|
+
async getPreviousAttempts(error) {
|
|
779
|
+
// Implementation would load previous attempts from Redis
|
|
780
|
+
return [];
|
|
781
|
+
}
|
|
782
|
+
async getSystemLoad() {
|
|
783
|
+
return {
|
|
784
|
+
cpu: Math.random() * 100,
|
|
785
|
+
memory: Math.random() * 100,
|
|
786
|
+
activeTasks: Math.floor(Math.random() * 100)
|
|
787
|
+
};
|
|
788
|
+
}
|
|
789
|
+
async getSwarmState() {
|
|
790
|
+
// Implementation would get current swarm state
|
|
791
|
+
return {};
|
|
792
|
+
}
|
|
793
|
+
async getTimeConstraints() {
|
|
794
|
+
const now = new Date();
|
|
795
|
+
const businessHours = now.getHours() >= 9 && now.getHours() <= 17;
|
|
796
|
+
return {
|
|
797
|
+
maxDowntime: 300000,
|
|
798
|
+
businessHours
|
|
799
|
+
};
|
|
800
|
+
}
|
|
801
|
+
async checkErrorResolution(errorId) {
|
|
802
|
+
// Implementation would check if the original error is resolved
|
|
803
|
+
return true;
|
|
804
|
+
}
|
|
805
|
+
async checkSystemHealth() {
|
|
806
|
+
return {
|
|
807
|
+
overall: Math.random(),
|
|
808
|
+
details: {}
|
|
809
|
+
};
|
|
810
|
+
}
|
|
811
|
+
async checkComponentHealth(components) {
|
|
812
|
+
return components.map((component)=>({
|
|
813
|
+
component,
|
|
814
|
+
health: Math.random()
|
|
815
|
+
}));
|
|
816
|
+
}
|
|
817
|
+
startMonitoring() {
|
|
818
|
+
setInterval(async ()=>{
|
|
819
|
+
try {
|
|
820
|
+
await this.cleanupOldWorkflows();
|
|
821
|
+
await this.updateMetrics();
|
|
822
|
+
} catch (error) {
|
|
823
|
+
this.logger.error('Error in recovery monitoring', {
|
|
824
|
+
error
|
|
825
|
+
});
|
|
826
|
+
}
|
|
827
|
+
}, this.config.global.monitoringIntervalMs);
|
|
828
|
+
}
|
|
829
|
+
async cleanupOldWorkflows() {
|
|
830
|
+
const cutoff = Date.now() - this.config.learning.historyRetentionDays * 24 * 60 * 60 * 1000;
|
|
831
|
+
this.completedWorkflows = this.completedWorkflows.filter((workflow)=>workflow.createdAt.getTime() > cutoff);
|
|
832
|
+
// Keep only last 1000 workflows in memory
|
|
833
|
+
if (this.completedWorkflows.length > 1000) {
|
|
834
|
+
this.completedWorkflows = this.completedWorkflows.slice(-1000);
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
async updateMetrics() {
|
|
838
|
+
const metrics = {
|
|
839
|
+
activeWorkflows: this.activeWorkflows.size,
|
|
840
|
+
completedWorkflows: this.completedWorkflows.length,
|
|
841
|
+
strategies: this.strategies.size,
|
|
842
|
+
timestamp: new Date().toISOString()
|
|
843
|
+
};
|
|
844
|
+
await this.redis.setEx('swarm:error-recovery-final:recovery-metrics', 300, JSON.stringify(metrics));
|
|
845
|
+
}
|
|
846
|
+
async loadHistoricalData() {
|
|
847
|
+
// Implementation would load historical workflow data from Redis
|
|
848
|
+
}
|
|
849
|
+
async saveHistoricalData() {
|
|
850
|
+
// Implementation would save workflow history to Redis
|
|
851
|
+
}
|
|
852
|
+
async stopWorkflow(workflowId, reason) {
|
|
853
|
+
const workflow = this.activeWorkflows.get(workflowId);
|
|
854
|
+
if (!workflow) return;
|
|
855
|
+
workflow.status = 'failed';
|
|
856
|
+
workflow.updatedAt = new Date();
|
|
857
|
+
this.logger.info('Stopping workflow', {
|
|
858
|
+
workflowId,
|
|
859
|
+
reason
|
|
860
|
+
});
|
|
861
|
+
this.emit('workflowStopped', {
|
|
862
|
+
workflowId,
|
|
863
|
+
reason
|
|
864
|
+
});
|
|
865
|
+
}
|
|
866
|
+
// Public API methods
|
|
867
|
+
async getWorkflow(workflowId) {
|
|
868
|
+
const workflow = this.activeWorkflows.get(workflowId);
|
|
869
|
+
if (workflow) return workflow;
|
|
870
|
+
const completed = this.completedWorkflows.find((w)=>w.id === workflowId);
|
|
871
|
+
if (completed) return completed;
|
|
872
|
+
// Try loading from Redis
|
|
873
|
+
const data = await this.redis.get(`swarm:error-recovery-final:workflows:${workflowId}`);
|
|
874
|
+
return data ? JSON.parse(data) : null;
|
|
875
|
+
}
|
|
876
|
+
async getActiveWorkflows() {
|
|
877
|
+
return Array.from(this.activeWorkflows.values());
|
|
878
|
+
}
|
|
879
|
+
async getCompletedWorkflows(limit = 50) {
|
|
880
|
+
return this.completedWorkflows.sort((a, b)=>b.createdAt.getTime() - a.createdAt.getTime()).slice(0, limit);
|
|
881
|
+
}
|
|
882
|
+
async addStrategy(strategy) {
|
|
883
|
+
this.strategies.set(strategy.id, strategy);
|
|
884
|
+
this.logger.info('Recovery strategy added', {
|
|
885
|
+
id: strategy.id,
|
|
886
|
+
name: strategy.name
|
|
887
|
+
});
|
|
888
|
+
}
|
|
889
|
+
async removeStrategy(strategyId) {
|
|
890
|
+
this.strategies.delete(strategyId);
|
|
891
|
+
this.logger.info('Recovery strategy removed', {
|
|
892
|
+
id: strategyId
|
|
893
|
+
});
|
|
894
|
+
}
|
|
895
|
+
async getStrategySuccessRate(strategyId) {
|
|
896
|
+
return await this.learningEngine.getStrategySuccessRate(strategyId);
|
|
897
|
+
}
|
|
898
|
+
constructor(logger, config){
|
|
899
|
+
super(), _define_property(this, "redis", void 0), _define_property(this, "logger", void 0), _define_property(this, "config", void 0), _define_property(this, "isRunning", false), _define_property(this, "activeWorkflows", new Map()), _define_property(this, "completedWorkflows", []), _define_property(this, "strategies", new Map()), _define_property(this, "learningEngine", void 0), _define_property(this, "safetyMonitor", void 0);
|
|
900
|
+
this.logger = logger;
|
|
901
|
+
this.config = config;
|
|
902
|
+
this.redis = createClient(config.redis);
|
|
903
|
+
this.learningEngine = new RecoveryLearningEngine(logger, config.learning);
|
|
904
|
+
this.safetyMonitor = new RecoverySafetyMonitor(logger, config.safety);
|
|
905
|
+
this.initializeStrategies();
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
let RecoveryLearningEngine = class RecoveryLearningEngine {
|
|
909
|
+
async recordWorkflowResult(workflow) {
|
|
910
|
+
if (!this.config.enabled) return;
|
|
911
|
+
const strategyId = workflow.strategy.id;
|
|
912
|
+
const stats = this.strategyStats.get(strategyId) || {
|
|
913
|
+
attempts: 0,
|
|
914
|
+
successes: 0
|
|
915
|
+
};
|
|
916
|
+
stats.attempts++;
|
|
917
|
+
if (workflow.success) {
|
|
918
|
+
stats.successes++;
|
|
919
|
+
}
|
|
920
|
+
this.strategyStats.set(strategyId, stats);
|
|
921
|
+
// Adapt strategies if enabled
|
|
922
|
+
if (this.config.adaptStrategies) {
|
|
923
|
+
await this.adaptStrategy(strategyId, workflow);
|
|
924
|
+
}
|
|
925
|
+
this.logger.debug('Recorded workflow result', {
|
|
926
|
+
strategyId,
|
|
927
|
+
success: workflow.success,
|
|
928
|
+
successRate: stats.successes / stats.attempts
|
|
929
|
+
});
|
|
930
|
+
}
|
|
931
|
+
async getStrategySuccessRate(strategyId) {
|
|
932
|
+
const stats = this.strategyStats.get(strategyId);
|
|
933
|
+
return stats ? stats.successes / stats.attempts : 0.5; // Default 50% for new strategies
|
|
934
|
+
}
|
|
935
|
+
async adaptStrategy(strategyId, workflow) {
|
|
936
|
+
// Implementation would adapt strategy parameters based on results
|
|
937
|
+
this.logger.info('Adapting strategy based on results', {
|
|
938
|
+
strategyId
|
|
939
|
+
});
|
|
940
|
+
}
|
|
941
|
+
constructor(logger, config){
|
|
942
|
+
_define_property(this, "logger", void 0);
|
|
943
|
+
_define_property(this, "config", void 0);
|
|
944
|
+
_define_property(this, "strategyStats", new Map());
|
|
945
|
+
this.logger = logger;
|
|
946
|
+
this.config = config;
|
|
947
|
+
}
|
|
948
|
+
};
|
|
949
|
+
let RecoverySafetyMonitor = class RecoverySafetyMonitor {
|
|
950
|
+
async isSafeToExecute(strategy, context) {
|
|
951
|
+
// Check for cascading failures
|
|
952
|
+
if (this.cascadingFailures >= this.config.maxCascadingFailures) {
|
|
953
|
+
this.logger.warn('Cascading failure limit reached', {
|
|
954
|
+
count: this.cascadingFailures,
|
|
955
|
+
limit: this.config.maxCascadingFailures
|
|
956
|
+
});
|
|
957
|
+
return false;
|
|
958
|
+
}
|
|
959
|
+
// Check emergency mode triggers
|
|
960
|
+
if (this.config.emergencyMode.enabled) {
|
|
961
|
+
const shouldTriggerEmergency = this.config.emergencyMode.triggers.includes(context.errorType);
|
|
962
|
+
if (shouldTriggerEmergency && strategy.type !== 'emergency') {
|
|
963
|
+
return false; // Only emergency strategies allowed
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
// Check quarantining
|
|
967
|
+
if (this.config.quarantining.enabled) {
|
|
968
|
+
if (await this.isComponentQuarantined(context.source)) {
|
|
969
|
+
this.logger.warn('Component is quarantined', {
|
|
970
|
+
component: context.source
|
|
971
|
+
});
|
|
972
|
+
return false;
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
return true;
|
|
976
|
+
}
|
|
977
|
+
async recordCascadingFailure() {
|
|
978
|
+
const now = Date.now();
|
|
979
|
+
if (now - this.lastFailureTime < 60000) {
|
|
980
|
+
this.cascadingFailures++;
|
|
981
|
+
} else {
|
|
982
|
+
this.cascadingFailures = 1;
|
|
983
|
+
}
|
|
984
|
+
this.lastFailureTime = now;
|
|
985
|
+
}
|
|
986
|
+
async isComponentQuarantined(component) {
|
|
987
|
+
// Implementation would check Redis for quarantined components
|
|
988
|
+
return false;
|
|
989
|
+
}
|
|
990
|
+
constructor(logger, config){
|
|
991
|
+
_define_property(this, "logger", void 0);
|
|
992
|
+
_define_property(this, "config", void 0);
|
|
993
|
+
_define_property(this, "cascadingFailures", 0);
|
|
994
|
+
_define_property(this, "lastFailureTime", 0);
|
|
995
|
+
this.logger = logger;
|
|
996
|
+
this.config = config;
|
|
997
|
+
}
|
|
998
|
+
};
|