wyrm-mcp 7.2.0 → 7.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +26 -667
- package/NOTICE +14 -33
- package/dist/activation.d.ts.map +1 -1
- package/dist/activation.js +1 -44
- package/dist/activation.js.map +1 -1
- package/dist/agent-daemon.js +4 -281
- package/dist/agent-loop.js +7 -332
- package/dist/analytics.js +13 -236
- package/dist/attribution.js +1 -49
- package/dist/audit.js +2 -457
- package/dist/auto-capture.js +3 -138
- package/dist/auto-orchestrator.js +1 -325
- package/dist/autoconfig.js +39 -840
- package/dist/buddy-runner.js +1 -109
- package/dist/buddy.js +14 -564
- package/dist/build-flags.js +1 -17
- package/dist/capabilities.js +3 -183
- package/dist/capture.js +1 -56
- package/dist/causality.js +6 -107
- package/dist/cli.js +20 -281
- package/dist/cloud/cli.js +5 -541
- package/dist/cloud/client.js +1 -221
- package/dist/cloud/crypto.js +1 -85
- package/dist/cloud/machine-id.js +2 -113
- package/dist/cloud/recovery.js +1 -60
- package/dist/cloud/sync-engine.js +7 -543
- package/dist/cloud-backup.js +5 -579
- package/dist/cloud-profile.js +1 -138
- package/dist/cloud-sync-entrypoint.js +1 -47
- package/dist/cloud-sync.js +2 -309
- package/dist/constellation.js +12 -168
- package/dist/context-build-budgeted.js +4 -144
- package/dist/context-ranking.js +1 -69
- package/dist/crypto.js +1 -179
- package/dist/daemon-write-endpoint.js +1 -290
- package/dist/daemon-writer.js +2 -406
- package/dist/database.js +43 -1110
- package/dist/deprecations.js +2 -162
- package/dist/design.js +13 -141
- package/dist/event-replication.js +1 -112
- package/dist/events-sse.js +7 -43
- package/dist/events.js +6 -238
- package/dist/failure-patterns.js +42 -659
- package/dist/federation.js +12 -236
- package/dist/goals.js +13 -101
- package/dist/golden.js +3 -355
- package/dist/handlers/agent.js +4 -165
- package/dist/handlers/alias-adapters.js +1 -129
- package/dist/handlers/aliases.js +1 -171
- package/dist/handlers/audit.js +1 -87
- package/dist/handlers/boundary.js +1 -221
- package/dist/handlers/capture.js +73 -1109
- package/dist/handlers/causality.js +7 -114
- package/dist/handlers/cloud.js +85 -382
- package/dist/handlers/companion.js +28 -459
- package/dist/handlers/datalake.js +7 -187
- package/dist/handlers/dispatch-context.js +0 -22
- package/dist/handlers/entity.js +25 -256
- package/dist/handlers/events.js +16 -335
- package/dist/handlers/failure.js +13 -340
- package/dist/handlers/goals.js +4 -296
- package/dist/handlers/intelligence.js +126 -674
- package/dist/handlers/invoicing.js +1 -70
- package/dist/handlers/mcpclient.js +6 -137
- package/dist/handlers/orchestration.js +40 -125
- package/dist/handlers/output-schemas.js +1 -24
- package/dist/handlers/presence.js +3 -99
- package/dist/handlers/project.js +28 -182
- package/dist/handlers/prompts.js +6 -157
- package/dist/handlers/quest.js +4 -224
- package/dist/handlers/recall.js +11 -218
- package/dist/handlers/registry.js +1 -167
- package/dist/handlers/resources.js +1 -288
- package/dist/handlers/review.js +11 -74
- package/dist/handlers/run.js +17 -487
- package/dist/handlers/search.js +15 -326
- package/dist/handlers/session.js +28 -615
- package/dist/handlers/share.js +8 -184
- package/dist/handlers/shims.js +1 -464
- package/dist/handlers/skill.js +67 -449
- package/dist/handlers/survivors.js +1 -120
- package/dist/handlers/symbols.js +8 -109
- package/dist/handlers/syncops.js +4 -302
- package/dist/handlers/types.js +1 -27
- package/dist/harvest.js +5 -191
- package/dist/hours.js +7 -156
- package/dist/http-auth.js +3 -321
- package/dist/http-fast.js +21 -1137
- package/dist/icons.js +1 -47
- package/dist/index.js +2 -924
- package/dist/indexer.js +4 -145
- package/dist/intelligence.js +31 -261
- package/dist/internal-dispatch.js +3 -212
- package/dist/keyset.js +1 -110
- package/dist/knowledge-graph.js +12 -176
- package/dist/license.d.ts +11 -0
- package/dist/license.d.ts.map +1 -1
- package/dist/license.js +2 -414
- package/dist/license.js.map +1 -1
- package/dist/logger.js +2 -199
- package/dist/maintenance.js +2 -148
- package/dist/mcp-client.js +6 -262
- package/dist/memory-artifacts.js +30 -449
- package/dist/migrate-prompt.js +2 -124
- package/dist/migrations.js +40 -655
- package/dist/performance.js +1 -228
- package/dist/presence.js +11 -140
- package/dist/priority-embed.js +5 -164
- package/dist/providers/embedding-provider.js +1 -196
- package/dist/readonly-gate.js +1 -29
- package/dist/rehydration.js +9 -157
- package/dist/reindex.js +1 -88
- package/dist/render-target.js +21 -514
- package/dist/render.js +4 -280
- package/dist/repl-guard.js +1 -173
- package/dist/replication-daemon-entrypoint.js +1 -31
- package/dist/replication-daemon.js +2 -262
- package/dist/resilience.js +1 -591
- package/dist/reverse-bridge.js +5 -360
- package/dist/security.js +1 -244
- package/dist/session-seen.js +3 -51
- package/dist/setup.js +1 -260
- package/dist/skill-author.js +5 -168
- package/dist/spec-kit.js +1 -191
- package/dist/sqlite-busy.js +1 -154
- package/dist/statusline.js +11 -315
- package/dist/sub-agent.js +13 -262
- package/dist/summarizer.js +13 -139
- package/dist/symbols.js +7 -283
- package/dist/sync.js +5 -359
- package/dist/tasks-dispatch.js +1 -84
- package/dist/tasks.js +1 -282
- package/dist/token-budget.js +1 -143
- package/dist/tool-analytics.js +7 -129
- package/dist/tool-annotations.js +1 -365
- package/dist/tool-manifest-v2.json +1 -1
- package/dist/tool-manifest.json +1 -1
- package/dist/tool-profiles.js +1 -75
- package/dist/trace-harvest.js +6 -244
- package/dist/types.js +1 -30
- package/dist/ui-dashboard.js +41 -50
- package/dist/ulid.js +1 -81
- package/dist/validate.js +1 -129
- package/dist/vault.js +1 -534
- package/dist/vectors.js +3 -184
- package/dist/version-check.js +4 -136
- package/dist/visibility.js +19 -155
- package/dist/wyrm-cli.js +98 -2451
- package/dist/wyrm-cli.js.map +1 -1
- package/dist/wyrm-guard.js +14 -424
- package/dist/wyrm-loop.js +3 -150
- package/dist/wyrm-manifest.json +1 -1
- package/dist/wyrm-statusline-daemon.js +1 -11
- package/dist/wyrm-statusline.js +4 -56
- package/dist/wyrm-ui.js +9 -77
- package/package.json +4 -2
package/dist/resilience.js
CHANGED
|
@@ -1,591 +1 @@
|
|
|
1
|
-
|
|
2
|
-
* Wyrm Resilience Module - Professional-grade fault tolerance
|
|
3
|
-
*
|
|
4
|
-
* @copyright 2026 Ghost Protocol (Pvt) Ltd.
|
|
5
|
-
* @license AGPL-3.0-or-later — dual-licensed; commercial terms: ghosts.lk@proton.me. See LICENSE.
|
|
6
|
-
* @module resilience
|
|
7
|
-
* @version 3.0.0
|
|
8
|
-
*
|
|
9
|
-
* Features:
|
|
10
|
-
* - Transaction safety with automatic rollback
|
|
11
|
-
* - Exponential backoff retry logic
|
|
12
|
-
* - Circuit breaker pattern
|
|
13
|
-
* - Operation checkpointing
|
|
14
|
-
* - Crash recovery via WAL
|
|
15
|
-
* - Graceful degradation
|
|
16
|
-
*/
|
|
17
|
-
import { existsSync, readFileSync, writeFileSync, mkdirSync, unlinkSync, renameSync, readdirSync } from 'fs';
|
|
18
|
-
import { join } from 'path';
|
|
19
|
-
import { homedir } from 'os';
|
|
20
|
-
import { WyrmLogger } from './logger.js';
|
|
21
|
-
import { isSqliteBusyError, WYRM_CIRCUIT_OPEN_CODE } from './sqlite-busy.js';
|
|
22
|
-
// ==================== DEFAULT CONFIGS ====================
|
|
23
|
-
const DEFAULT_RETRY_CONFIG = {
|
|
24
|
-
maxAttempts: 5,
|
|
25
|
-
baseDelayMs: 100,
|
|
26
|
-
maxDelayMs: 10000,
|
|
27
|
-
backoffMultiplier: 2,
|
|
28
|
-
// v7 F2 review fix: SQLITE_BUSY is deliberately NOT in this list. By the
|
|
29
|
-
// time better-sqlite3 surfaces it, SQLite has ALREADY retried internally for
|
|
30
|
-
// the full busy_timeout window (5000ms — database.ts); re-retrying here
|
|
31
|
-
// multiplied one contended write into a ~26.5s synchronous stall (5×5000ms
|
|
32
|
-
// busy_timeout + backoff sleeps) and the per-attempt failure count tripped
|
|
33
|
-
// the shared circuit breaker, hiding the structured WYRM_BUSY classification
|
|
34
|
-
// behind a code-less circuit-open error for 30s. One busy_timeout window IS
|
|
35
|
-
// the retry; BUSY now propagates immediately with its code intact (see the
|
|
36
|
-
// isSqliteBusyError early-outs in withRetry/withRetrySync).
|
|
37
|
-
retryableErrors: [
|
|
38
|
-
'SQLITE_LOCKED',
|
|
39
|
-
'ECONNRESET',
|
|
40
|
-
'ETIMEDOUT',
|
|
41
|
-
'ENOTFOUND',
|
|
42
|
-
'EAI_AGAIN',
|
|
43
|
-
'EPIPE',
|
|
44
|
-
'ECONNREFUSED',
|
|
45
|
-
],
|
|
46
|
-
};
|
|
47
|
-
const DEFAULT_CIRCUIT_CONFIG = {
|
|
48
|
-
failureThreshold: 5,
|
|
49
|
-
successThreshold: 3,
|
|
50
|
-
timeout: 30000,
|
|
51
|
-
};
|
|
52
|
-
// ==================== RESILIENCE MANAGER ====================
|
|
53
|
-
export class ResilienceManager {
|
|
54
|
-
logger;
|
|
55
|
-
checkpointDir;
|
|
56
|
-
retryConfig;
|
|
57
|
-
circuitConfig;
|
|
58
|
-
// Circuit breaker state
|
|
59
|
-
circuitState = 'closed';
|
|
60
|
-
failureCount = 0;
|
|
61
|
-
successCount = 0;
|
|
62
|
-
lastFailureTime = 0;
|
|
63
|
-
// Active operations for tracking
|
|
64
|
-
activeOperations = new Map();
|
|
65
|
-
constructor(logger, retryConfig, circuitConfig) {
|
|
66
|
-
this.logger = logger || new WyrmLogger();
|
|
67
|
-
this.retryConfig = { ...DEFAULT_RETRY_CONFIG, ...retryConfig };
|
|
68
|
-
this.circuitConfig = { ...DEFAULT_CIRCUIT_CONFIG, ...circuitConfig };
|
|
69
|
-
// Initialize checkpoint directory
|
|
70
|
-
this.checkpointDir = join(homedir(), '.wyrm', 'checkpoints');
|
|
71
|
-
if (!existsSync(this.checkpointDir)) {
|
|
72
|
-
mkdirSync(this.checkpointDir, { recursive: true });
|
|
73
|
-
}
|
|
74
|
-
// Recover any incomplete operations on startup
|
|
75
|
-
this.recoverIncompleteOperations();
|
|
76
|
-
}
|
|
77
|
-
// ==================== RETRY WITH BACKOFF ====================
|
|
78
|
-
/**
|
|
79
|
-
* Execute an operation with exponential backoff retry
|
|
80
|
-
*/
|
|
81
|
-
async withRetry(operation, operationName, config) {
|
|
82
|
-
const cfg = { ...this.retryConfig, ...config };
|
|
83
|
-
let lastError;
|
|
84
|
-
let attempts = 0;
|
|
85
|
-
while (attempts < cfg.maxAttempts) {
|
|
86
|
-
attempts++;
|
|
87
|
-
try {
|
|
88
|
-
// Check circuit breaker. The error carries WYRM_CIRCUIT_OPEN (v7 F2
|
|
89
|
-
// review fix) so the MCP dispatcher can map it to the structured
|
|
90
|
-
// retryable body instead of an opaque plain Error.
|
|
91
|
-
if (!this.checkCircuit()) {
|
|
92
|
-
return {
|
|
93
|
-
success: false,
|
|
94
|
-
error: Object.assign(new Error('Circuit breaker is open - service unavailable'), { code: WYRM_CIRCUIT_OPEN_CODE }),
|
|
95
|
-
attempts,
|
|
96
|
-
recoverable: true,
|
|
97
|
-
};
|
|
98
|
-
}
|
|
99
|
-
const result = await operation();
|
|
100
|
-
this.recordSuccess();
|
|
101
|
-
return {
|
|
102
|
-
success: true,
|
|
103
|
-
data: result,
|
|
104
|
-
attempts,
|
|
105
|
-
recoverable: false,
|
|
106
|
-
};
|
|
107
|
-
}
|
|
108
|
-
catch (error) {
|
|
109
|
-
lastError = error;
|
|
110
|
-
// v7 F2 review fix: SQLITE_BUSY already burned its full busy_timeout
|
|
111
|
-
// retry window inside SQLite — propagate immediately with the code
|
|
112
|
-
// intact (the dispatcher owns the structured WYRM_BUSY body) and do
|
|
113
|
-
// NOT count it toward the circuit breaker (cross-process contention is
|
|
114
|
-
// not a service failure).
|
|
115
|
-
if (isSqliteBusyError(error)) {
|
|
116
|
-
return { success: false, error: lastError, attempts, recoverable: true };
|
|
117
|
-
}
|
|
118
|
-
this.recordFailure();
|
|
119
|
-
const errorCode = error.code || '';
|
|
120
|
-
const errorMessage = error.message || '';
|
|
121
|
-
// Check if error is retryable
|
|
122
|
-
const isRetryable = cfg.retryableErrors?.some(code => errorCode.includes(code) || errorMessage.includes(code));
|
|
123
|
-
if (!isRetryable && attempts === 1) {
|
|
124
|
-
// Non-retryable error on first attempt
|
|
125
|
-
this.logger.error(`${operationName} failed with non-retryable error`, {
|
|
126
|
-
error: errorMessage,
|
|
127
|
-
code: errorCode,
|
|
128
|
-
});
|
|
129
|
-
return {
|
|
130
|
-
success: false,
|
|
131
|
-
error: lastError,
|
|
132
|
-
attempts,
|
|
133
|
-
recoverable: false,
|
|
134
|
-
};
|
|
135
|
-
}
|
|
136
|
-
if (attempts < cfg.maxAttempts) {
|
|
137
|
-
const delay = Math.min(cfg.baseDelayMs * Math.pow(cfg.backoffMultiplier, attempts - 1), cfg.maxDelayMs);
|
|
138
|
-
this.logger.warn(`${operationName} failed, retrying in ${delay}ms`, {
|
|
139
|
-
attempt: attempts,
|
|
140
|
-
maxAttempts: cfg.maxAttempts,
|
|
141
|
-
error: errorMessage,
|
|
142
|
-
});
|
|
143
|
-
await this.sleep(delay);
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
this.logger.error(`${operationName} failed after ${attempts} attempts`, {
|
|
148
|
-
error: lastError?.message,
|
|
149
|
-
});
|
|
150
|
-
return {
|
|
151
|
-
success: false,
|
|
152
|
-
error: lastError,
|
|
153
|
-
attempts,
|
|
154
|
-
recoverable: true,
|
|
155
|
-
};
|
|
156
|
-
}
|
|
157
|
-
/**
|
|
158
|
-
* Synchronous retry for database operations
|
|
159
|
-
*/
|
|
160
|
-
withRetrySync(operation, operationName, config) {
|
|
161
|
-
const cfg = { ...this.retryConfig, ...config };
|
|
162
|
-
let lastError;
|
|
163
|
-
let attempts = 0;
|
|
164
|
-
while (attempts < cfg.maxAttempts) {
|
|
165
|
-
attempts++;
|
|
166
|
-
try {
|
|
167
|
-
// WYRM_CIRCUIT_OPEN code: see withRetry above (v7 F2 review fix).
|
|
168
|
-
if (!this.checkCircuit()) {
|
|
169
|
-
return {
|
|
170
|
-
success: false,
|
|
171
|
-
error: Object.assign(new Error('Circuit breaker is open'), { code: WYRM_CIRCUIT_OPEN_CODE }),
|
|
172
|
-
attempts,
|
|
173
|
-
recoverable: true,
|
|
174
|
-
};
|
|
175
|
-
}
|
|
176
|
-
const result = operation();
|
|
177
|
-
this.recordSuccess();
|
|
178
|
-
return {
|
|
179
|
-
success: true,
|
|
180
|
-
data: result,
|
|
181
|
-
attempts,
|
|
182
|
-
recoverable: false,
|
|
183
|
-
};
|
|
184
|
-
}
|
|
185
|
-
catch (error) {
|
|
186
|
-
lastError = error;
|
|
187
|
-
// SQLITE_BUSY: already retried inside SQLite for the busy_timeout
|
|
188
|
-
// window — propagate with code intact, never feed the breaker
|
|
189
|
-
// (v7 F2 review fix; see withRetry above).
|
|
190
|
-
if (isSqliteBusyError(error)) {
|
|
191
|
-
return { success: false, error: lastError, attempts, recoverable: true };
|
|
192
|
-
}
|
|
193
|
-
this.recordFailure();
|
|
194
|
-
const errorCode = error.code || '';
|
|
195
|
-
const errorMessage = error.message || '';
|
|
196
|
-
const isRetryable = cfg.retryableErrors?.some(code => errorCode.includes(code) || errorMessage.includes(code));
|
|
197
|
-
if (!isRetryable && attempts === 1) {
|
|
198
|
-
return {
|
|
199
|
-
success: false,
|
|
200
|
-
error: lastError,
|
|
201
|
-
attempts,
|
|
202
|
-
recoverable: false,
|
|
203
|
-
};
|
|
204
|
-
}
|
|
205
|
-
if (attempts < cfg.maxAttempts) {
|
|
206
|
-
const delay = Math.min(cfg.baseDelayMs * Math.pow(cfg.backoffMultiplier, attempts - 1), cfg.maxDelayMs);
|
|
207
|
-
this.logger.warn(`${operationName} sync retry ${attempts}/${cfg.maxAttempts}`);
|
|
208
|
-
this.sleepSync(delay);
|
|
209
|
-
}
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
return {
|
|
213
|
-
success: false,
|
|
214
|
-
error: lastError,
|
|
215
|
-
attempts,
|
|
216
|
-
recoverable: true,
|
|
217
|
-
};
|
|
218
|
-
}
|
|
219
|
-
// ==================== CIRCUIT BREAKER ====================
|
|
220
|
-
/**
|
|
221
|
-
* Check if circuit allows operations
|
|
222
|
-
*/
|
|
223
|
-
checkCircuit() {
|
|
224
|
-
switch (this.circuitState) {
|
|
225
|
-
case 'closed':
|
|
226
|
-
return true;
|
|
227
|
-
case 'open':
|
|
228
|
-
// Check if timeout has passed
|
|
229
|
-
if (Date.now() - this.lastFailureTime >= this.circuitConfig.timeout) {
|
|
230
|
-
this.circuitState = 'half-open';
|
|
231
|
-
this.logger.info('Circuit breaker transitioning to half-open');
|
|
232
|
-
return true;
|
|
233
|
-
}
|
|
234
|
-
return false;
|
|
235
|
-
case 'half-open':
|
|
236
|
-
return true;
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
/**
|
|
240
|
-
* Record a successful operation
|
|
241
|
-
*/
|
|
242
|
-
recordSuccess() {
|
|
243
|
-
if (this.circuitState === 'half-open') {
|
|
244
|
-
this.successCount++;
|
|
245
|
-
if (this.successCount >= this.circuitConfig.successThreshold) {
|
|
246
|
-
this.circuitState = 'closed';
|
|
247
|
-
this.failureCount = 0;
|
|
248
|
-
this.successCount = 0;
|
|
249
|
-
this.logger.info('Circuit breaker closed - service recovered');
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
else {
|
|
253
|
-
this.failureCount = 0;
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
|
-
/**
|
|
257
|
-
* Record a failed operation
|
|
258
|
-
*/
|
|
259
|
-
recordFailure() {
|
|
260
|
-
this.failureCount++;
|
|
261
|
-
this.lastFailureTime = Date.now();
|
|
262
|
-
if (this.circuitState === 'half-open') {
|
|
263
|
-
this.circuitState = 'open';
|
|
264
|
-
this.successCount = 0;
|
|
265
|
-
this.logger.warn('Circuit breaker reopened after half-open failure');
|
|
266
|
-
}
|
|
267
|
-
else if (this.failureCount >= this.circuitConfig.failureThreshold) {
|
|
268
|
-
this.circuitState = 'open';
|
|
269
|
-
this.logger.warn('Circuit breaker opened due to failure threshold', {
|
|
270
|
-
failures: this.failureCount,
|
|
271
|
-
threshold: this.circuitConfig.failureThreshold,
|
|
272
|
-
});
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
/**
|
|
276
|
-
* Get circuit breaker status
|
|
277
|
-
*/
|
|
278
|
-
getCircuitStatus() {
|
|
279
|
-
return {
|
|
280
|
-
state: this.circuitState,
|
|
281
|
-
failures: this.failureCount,
|
|
282
|
-
lastFailure: this.lastFailureTime,
|
|
283
|
-
};
|
|
284
|
-
}
|
|
285
|
-
/**
|
|
286
|
-
* Manually reset the circuit breaker
|
|
287
|
-
*/
|
|
288
|
-
resetCircuit() {
|
|
289
|
-
this.circuitState = 'closed';
|
|
290
|
-
this.failureCount = 0;
|
|
291
|
-
this.successCount = 0;
|
|
292
|
-
this.logger.info('Circuit breaker manually reset');
|
|
293
|
-
}
|
|
294
|
-
// ==================== CHECKPOINTING ====================
|
|
295
|
-
/**
|
|
296
|
-
* Create a checkpoint for an operation
|
|
297
|
-
*/
|
|
298
|
-
createCheckpoint(operationId, operation, stage, data) {
|
|
299
|
-
const checkpoint = {
|
|
300
|
-
id: operationId,
|
|
301
|
-
operation,
|
|
302
|
-
stage,
|
|
303
|
-
data,
|
|
304
|
-
timestamp: new Date().toISOString(),
|
|
305
|
-
completed: false,
|
|
306
|
-
};
|
|
307
|
-
const filePath = join(this.checkpointDir, `${operationId}.json`);
|
|
308
|
-
writeFileSync(filePath, JSON.stringify(checkpoint, null, 2));
|
|
309
|
-
this.activeOperations.set(operationId, checkpoint);
|
|
310
|
-
this.logger.debug(`Checkpoint created: ${operation}/${stage}`, { operationId });
|
|
311
|
-
}
|
|
312
|
-
/**
|
|
313
|
-
* Update checkpoint stage
|
|
314
|
-
*/
|
|
315
|
-
updateCheckpoint(operationId, stage, data) {
|
|
316
|
-
const existing = this.activeOperations.get(operationId);
|
|
317
|
-
if (!existing) {
|
|
318
|
-
this.logger.warn('Checkpoint not found for update', { operationId });
|
|
319
|
-
return;
|
|
320
|
-
}
|
|
321
|
-
const updated = {
|
|
322
|
-
...existing,
|
|
323
|
-
stage,
|
|
324
|
-
data: data ? { ...existing.data, ...data } : existing.data,
|
|
325
|
-
timestamp: new Date().toISOString(),
|
|
326
|
-
};
|
|
327
|
-
const filePath = join(this.checkpointDir, `${operationId}.json`);
|
|
328
|
-
writeFileSync(filePath, JSON.stringify(updated, null, 2));
|
|
329
|
-
this.activeOperations.set(operationId, updated);
|
|
330
|
-
}
|
|
331
|
-
/**
|
|
332
|
-
* Complete a checkpoint (mark as done and remove)
|
|
333
|
-
*/
|
|
334
|
-
completeCheckpoint(operationId) {
|
|
335
|
-
const filePath = join(this.checkpointDir, `${operationId}.json`);
|
|
336
|
-
if (existsSync(filePath)) {
|
|
337
|
-
// Mark as completed before deletion
|
|
338
|
-
const checkpoint = this.activeOperations.get(operationId);
|
|
339
|
-
if (checkpoint) {
|
|
340
|
-
checkpoint.completed = true;
|
|
341
|
-
writeFileSync(filePath, JSON.stringify(checkpoint, null, 2));
|
|
342
|
-
}
|
|
343
|
-
// Move to completed folder or delete
|
|
344
|
-
try {
|
|
345
|
-
unlinkSync(filePath);
|
|
346
|
-
}
|
|
347
|
-
catch {
|
|
348
|
-
// Ignore deletion errors
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
this.activeOperations.delete(operationId);
|
|
352
|
-
this.logger.debug('Checkpoint completed', { operationId });
|
|
353
|
-
}
|
|
354
|
-
/**
|
|
355
|
-
* Get a checkpoint by ID
|
|
356
|
-
*/
|
|
357
|
-
getCheckpoint(operationId) {
|
|
358
|
-
const cached = this.activeOperations.get(operationId);
|
|
359
|
-
if (cached)
|
|
360
|
-
return cached;
|
|
361
|
-
const filePath = join(this.checkpointDir, `${operationId}.json`);
|
|
362
|
-
if (existsSync(filePath)) {
|
|
363
|
-
try {
|
|
364
|
-
const data = JSON.parse(readFileSync(filePath, 'utf-8'));
|
|
365
|
-
return data;
|
|
366
|
-
}
|
|
367
|
-
catch {
|
|
368
|
-
return null;
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
return null;
|
|
372
|
-
}
|
|
373
|
-
/**
|
|
374
|
-
* Recover incomplete operations on startup
|
|
375
|
-
*/
|
|
376
|
-
recoverIncompleteOperations() {
|
|
377
|
-
try {
|
|
378
|
-
const files = readdirSync(this.checkpointDir);
|
|
379
|
-
const checkpoints = [];
|
|
380
|
-
for (const file of files) {
|
|
381
|
-
if (!file.endsWith('.json'))
|
|
382
|
-
continue;
|
|
383
|
-
const filePath = join(this.checkpointDir, file);
|
|
384
|
-
try {
|
|
385
|
-
const data = JSON.parse(readFileSync(filePath, 'utf-8'));
|
|
386
|
-
if (!data.completed) {
|
|
387
|
-
checkpoints.push(data);
|
|
388
|
-
this.activeOperations.set(data.id, data);
|
|
389
|
-
}
|
|
390
|
-
else {
|
|
391
|
-
// Clean up completed checkpoints
|
|
392
|
-
unlinkSync(filePath);
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
catch {
|
|
396
|
-
// Corrupted checkpoint file, remove it
|
|
397
|
-
try {
|
|
398
|
-
unlinkSync(filePath);
|
|
399
|
-
}
|
|
400
|
-
catch {
|
|
401
|
-
// Ignore
|
|
402
|
-
}
|
|
403
|
-
}
|
|
404
|
-
}
|
|
405
|
-
if (checkpoints.length > 0) {
|
|
406
|
-
this.logger.warn('Found incomplete operations from previous run', {
|
|
407
|
-
count: checkpoints.length,
|
|
408
|
-
operations: checkpoints.map(c => c.operation),
|
|
409
|
-
});
|
|
410
|
-
}
|
|
411
|
-
}
|
|
412
|
-
catch {
|
|
413
|
-
// Checkpoint directory doesn't exist or not readable
|
|
414
|
-
}
|
|
415
|
-
}
|
|
416
|
-
/**
|
|
417
|
-
* Get all incomplete operations for recovery
|
|
418
|
-
*/
|
|
419
|
-
getIncompleteOperations() {
|
|
420
|
-
return Array.from(this.activeOperations.values()).filter(c => !c.completed);
|
|
421
|
-
}
|
|
422
|
-
// ==================== TRANSACTION WRAPPER ====================
|
|
423
|
-
/**
|
|
424
|
-
* Execute a database transaction with automatic rollback on failure
|
|
425
|
-
*/
|
|
426
|
-
executeTransaction(db, operations) {
|
|
427
|
-
const operationId = `txn-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
428
|
-
this.createCheckpoint(operationId, 'transaction', 'begin', {});
|
|
429
|
-
try {
|
|
430
|
-
db.prepare('BEGIN IMMEDIATE').run();
|
|
431
|
-
this.updateCheckpoint(operationId, 'executing');
|
|
432
|
-
const result = operations();
|
|
433
|
-
db.prepare('COMMIT').run();
|
|
434
|
-
this.completeCheckpoint(operationId);
|
|
435
|
-
return {
|
|
436
|
-
success: true,
|
|
437
|
-
data: result,
|
|
438
|
-
attempts: 1,
|
|
439
|
-
recoverable: false,
|
|
440
|
-
};
|
|
441
|
-
}
|
|
442
|
-
catch (error) {
|
|
443
|
-
this.logger.error('Transaction failed, rolling back', {
|
|
444
|
-
operationId,
|
|
445
|
-
error: error.message,
|
|
446
|
-
});
|
|
447
|
-
try {
|
|
448
|
-
db.prepare('ROLLBACK').run();
|
|
449
|
-
}
|
|
450
|
-
catch (rollbackError) {
|
|
451
|
-
this.logger.error('Rollback failed', {
|
|
452
|
-
error: rollbackError.message,
|
|
453
|
-
});
|
|
454
|
-
}
|
|
455
|
-
this.updateCheckpoint(operationId, 'rolled_back', {
|
|
456
|
-
error: error.message,
|
|
457
|
-
});
|
|
458
|
-
return {
|
|
459
|
-
success: false,
|
|
460
|
-
error: error,
|
|
461
|
-
attempts: 1,
|
|
462
|
-
recoverable: true,
|
|
463
|
-
};
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
// ==================== SAFE FILE OPERATIONS ====================
|
|
467
|
-
/**
|
|
468
|
-
* Write file atomically (write to temp, then rename)
|
|
469
|
-
*/
|
|
470
|
-
atomicWriteFile(filePath, content) {
|
|
471
|
-
const tempPath = `${filePath}.tmp.${Date.now()}`;
|
|
472
|
-
try {
|
|
473
|
-
writeFileSync(tempPath, content, { encoding: 'utf-8', flag: 'w' });
|
|
474
|
-
renameSync(tempPath, filePath);
|
|
475
|
-
return true;
|
|
476
|
-
}
|
|
477
|
-
catch (error) {
|
|
478
|
-
this.logger.error('Atomic write failed', {
|
|
479
|
-
filePath,
|
|
480
|
-
error: error.message,
|
|
481
|
-
});
|
|
482
|
-
// Clean up temp file if it exists
|
|
483
|
-
try {
|
|
484
|
-
if (existsSync(tempPath)) {
|
|
485
|
-
unlinkSync(tempPath);
|
|
486
|
-
}
|
|
487
|
-
}
|
|
488
|
-
catch {
|
|
489
|
-
// Ignore cleanup errors
|
|
490
|
-
}
|
|
491
|
-
return false;
|
|
492
|
-
}
|
|
493
|
-
}
|
|
494
|
-
/**
|
|
495
|
-
* Read file with fallback to backup
|
|
496
|
-
*/
|
|
497
|
-
safeReadFile(filePath, backupPath) {
|
|
498
|
-
// Try primary file
|
|
499
|
-
try {
|
|
500
|
-
if (existsSync(filePath)) {
|
|
501
|
-
return readFileSync(filePath, 'utf-8');
|
|
502
|
-
}
|
|
503
|
-
}
|
|
504
|
-
catch {
|
|
505
|
-
this.logger.warn('Failed to read primary file', { filePath });
|
|
506
|
-
}
|
|
507
|
-
// Try backup if provided
|
|
508
|
-
if (backupPath) {
|
|
509
|
-
try {
|
|
510
|
-
if (existsSync(backupPath)) {
|
|
511
|
-
this.logger.info('Using backup file', { backupPath });
|
|
512
|
-
return readFileSync(backupPath, 'utf-8');
|
|
513
|
-
}
|
|
514
|
-
}
|
|
515
|
-
catch {
|
|
516
|
-
this.logger.error('Failed to read backup file', { backupPath });
|
|
517
|
-
}
|
|
518
|
-
}
|
|
519
|
-
return null;
|
|
520
|
-
}
|
|
521
|
-
// ==================== UTILITIES ====================
|
|
522
|
-
sleep(ms) {
|
|
523
|
-
return new Promise(resolve => { setTimeout(resolve, ms); });
|
|
524
|
-
}
|
|
525
|
-
sleepSync(ms) {
|
|
526
|
-
// Block the calling thread for `ms` WITHOUT a CPU busy-wait. Atomics.wait on
|
|
527
|
-
// an unshared int never gets notified, so it just times out after `ms` —
|
|
528
|
-
// sleeping the thread (0% CPU) instead of spinning the event loop.
|
|
529
|
-
if (ms <= 0)
|
|
530
|
-
return;
|
|
531
|
-
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
|
|
532
|
-
}
|
|
533
|
-
/**
|
|
534
|
-
* Generate a unique operation ID
|
|
535
|
-
*/
|
|
536
|
-
generateOperationId(prefix = 'op') {
|
|
537
|
-
return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
538
|
-
}
|
|
539
|
-
}
|
|
540
|
-
// ==================== SINGLETON INSTANCE ====================
|
|
541
|
-
let _resilienceManager = null;
|
|
542
|
-
export function getResilienceManager() {
|
|
543
|
-
if (!_resilienceManager) {
|
|
544
|
-
_resilienceManager = new ResilienceManager();
|
|
545
|
-
}
|
|
546
|
-
return _resilienceManager;
|
|
547
|
-
}
|
|
548
|
-
// ==================== HELPER DECORATORS ====================
|
|
549
|
-
/**
|
|
550
|
-
* Decorator to add retry logic to a method
|
|
551
|
-
*/
|
|
552
|
-
export function withRetry(config) {
|
|
553
|
-
return function (_target, propertyKey, descriptor) {
|
|
554
|
-
const originalMethod = descriptor.value;
|
|
555
|
-
descriptor.value = async function (...args) {
|
|
556
|
-
const manager = getResilienceManager();
|
|
557
|
-
const result = await manager.withRetry(() => originalMethod.apply(this, args), propertyKey, config);
|
|
558
|
-
if (!result.success) {
|
|
559
|
-
throw result.error;
|
|
560
|
-
}
|
|
561
|
-
return result.data;
|
|
562
|
-
};
|
|
563
|
-
return descriptor;
|
|
564
|
-
};
|
|
565
|
-
}
|
|
566
|
-
/**
|
|
567
|
-
* Decorator to add checkpointing to a method
|
|
568
|
-
*/
|
|
569
|
-
export function withCheckpoint(operationName) {
|
|
570
|
-
return function (_target, _propertyKey, descriptor) {
|
|
571
|
-
const originalMethod = descriptor.value;
|
|
572
|
-
descriptor.value = async function (...args) {
|
|
573
|
-
const manager = getResilienceManager();
|
|
574
|
-
const operationId = manager.generateOperationId(operationName);
|
|
575
|
-
manager.createCheckpoint(operationId, operationName, 'started', { args });
|
|
576
|
-
try {
|
|
577
|
-
const result = await originalMethod.apply(this, args);
|
|
578
|
-
manager.completeCheckpoint(operationId);
|
|
579
|
-
return result;
|
|
580
|
-
}
|
|
581
|
-
catch (error) {
|
|
582
|
-
manager.updateCheckpoint(operationId, 'failed', {
|
|
583
|
-
error: error.message,
|
|
584
|
-
});
|
|
585
|
-
throw error;
|
|
586
|
-
}
|
|
587
|
-
};
|
|
588
|
-
return descriptor;
|
|
589
|
-
};
|
|
590
|
-
}
|
|
591
|
-
//# sourceMappingURL=resilience.js.map
|
|
1
|
+
import{existsSync as l,readFileSync as p,writeFileSync as g,mkdirSync as S,unlinkSync as m,renameSync as b,readdirSync as O}from"fs";import{join as u}from"path";import{homedir as v}from"os";import{WyrmLogger as D}from"./logger.js";import{isSqliteBusyError as y,WYRM_CIRCUIT_OPEN_CODE as C}from"./sqlite-busy.js";const E={maxAttempts:5,baseDelayMs:100,maxDelayMs:1e4,backoffMultiplier:2,retryableErrors:["SQLITE_LOCKED","ECONNRESET","ETIMEDOUT","ENOTFOUND","EAI_AGAIN","EPIPE","ECONNREFUSED"]},M={failureThreshold:5,successThreshold:3,timeout:3e4};class T{logger;checkpointDir;retryConfig;circuitConfig;circuitState="closed";failureCount=0;successCount=0;lastFailureTime=0;activeOperations=new Map;constructor(e,r,t){this.logger=e||new D,this.retryConfig={...E,...r},this.circuitConfig={...M,...t},this.checkpointDir=u(v(),".wyrm","checkpoints"),l(this.checkpointDir)||S(this.checkpointDir,{recursive:!0}),this.recoverIncompleteOperations()}async withRetry(e,r,t){const i={...this.retryConfig,...t};let c,s=0;for(;s<i.maxAttempts;){s++;try{if(!this.checkCircuit())return{success:!1,error:Object.assign(new Error("Circuit breaker is open - service unavailable"),{code:C}),attempts:s,recoverable:!0};const o=await e();return this.recordSuccess(),{success:!0,data:o,attempts:s,recoverable:!1}}catch(o){if(c=o,y(o))return{success:!1,error:c,attempts:s,recoverable:!0};this.recordFailure();const n=o.code||"",f=o.message||"";if(!i.retryableErrors?.some(a=>n.includes(a)||f.includes(a))&&s===1)return this.logger.error(`${r} failed with non-retryable error`,{error:f,code:n}),{success:!1,error:c,attempts:s,recoverable:!1};if(s<i.maxAttempts){const a=Math.min(i.baseDelayMs*Math.pow(i.backoffMultiplier,s-1),i.maxDelayMs);this.logger.warn(`${r} failed, retrying in ${a}ms`,{attempt:s,maxAttempts:i.maxAttempts,error:f}),await this.sleep(a)}}}return this.logger.error(`${r} failed after ${s} attempts`,{error:c?.message}),{success:!1,error:c,attempts:s,recoverable:!0}}withRetrySync(e,r,t){const i={...this.retryConfig,...t};let c,s=0;for(;s<i.maxAttempts;){s++;try{if(!this.checkCircuit())return{success:!1,error:Object.assign(new Error("Circuit breaker is open"),{code:C}),attempts:s,recoverable:!0};const o=e();return this.recordSuccess(),{success:!0,data:o,attempts:s,recoverable:!1}}catch(o){if(c=o,y(o))return{success:!1,error:c,attempts:s,recoverable:!0};this.recordFailure();const n=o.code||"",f=o.message||"";if(!i.retryableErrors?.some(a=>n.includes(a)||f.includes(a))&&s===1)return{success:!1,error:c,attempts:s,recoverable:!1};if(s<i.maxAttempts){const a=Math.min(i.baseDelayMs*Math.pow(i.backoffMultiplier,s-1),i.maxDelayMs);this.logger.warn(`${r} sync retry ${s}/${i.maxAttempts}`),this.sleepSync(a)}}}return{success:!1,error:c,attempts:s,recoverable:!0}}checkCircuit(){switch(this.circuitState){case"closed":return!0;case"open":return Date.now()-this.lastFailureTime>=this.circuitConfig.timeout?(this.circuitState="half-open",this.logger.info("Circuit breaker transitioning to half-open"),!0):!1;case"half-open":return!0}}recordSuccess(){this.circuitState==="half-open"?(this.successCount++,this.successCount>=this.circuitConfig.successThreshold&&(this.circuitState="closed",this.failureCount=0,this.successCount=0,this.logger.info("Circuit breaker closed - service recovered"))):this.failureCount=0}recordFailure(){this.failureCount++,this.lastFailureTime=Date.now(),this.circuitState==="half-open"?(this.circuitState="open",this.successCount=0,this.logger.warn("Circuit breaker reopened after half-open failure")):this.failureCount>=this.circuitConfig.failureThreshold&&(this.circuitState="open",this.logger.warn("Circuit breaker opened due to failure threshold",{failures:this.failureCount,threshold:this.circuitConfig.failureThreshold}))}getCircuitStatus(){return{state:this.circuitState,failures:this.failureCount,lastFailure:this.lastFailureTime}}resetCircuit(){this.circuitState="closed",this.failureCount=0,this.successCount=0,this.logger.info("Circuit breaker manually reset")}createCheckpoint(e,r,t,i){const c={id:e,operation:r,stage:t,data:i,timestamp:new Date().toISOString(),completed:!1},s=u(this.checkpointDir,`${e}.json`);g(s,JSON.stringify(c,null,2)),this.activeOperations.set(e,c),this.logger.debug(`Checkpoint created: ${r}/${t}`,{operationId:e})}updateCheckpoint(e,r,t){const i=this.activeOperations.get(e);if(!i){this.logger.warn("Checkpoint not found for update",{operationId:e});return}const c={...i,stage:r,data:t?{...i.data,...t}:i.data,timestamp:new Date().toISOString()},s=u(this.checkpointDir,`${e}.json`);g(s,JSON.stringify(c,null,2)),this.activeOperations.set(e,c)}completeCheckpoint(e){const r=u(this.checkpointDir,`${e}.json`);if(l(r)){const t=this.activeOperations.get(e);t&&(t.completed=!0,g(r,JSON.stringify(t,null,2)));try{m(r)}catch{}}this.activeOperations.delete(e),this.logger.debug("Checkpoint completed",{operationId:e})}getCheckpoint(e){const r=this.activeOperations.get(e);if(r)return r;const t=u(this.checkpointDir,`${e}.json`);if(l(t))try{return JSON.parse(p(t,"utf-8"))}catch{return null}return null}recoverIncompleteOperations(){try{const e=O(this.checkpointDir),r=[];for(const t of e){if(!t.endsWith(".json"))continue;const i=u(this.checkpointDir,t);try{const c=JSON.parse(p(i,"utf-8"));c.completed?m(i):(r.push(c),this.activeOperations.set(c.id,c))}catch{try{m(i)}catch{}}}r.length>0&&this.logger.warn("Found incomplete operations from previous run",{count:r.length,operations:r.map(t=>t.operation)})}catch{}}getIncompleteOperations(){return Array.from(this.activeOperations.values()).filter(e=>!e.completed)}executeTransaction(e,r){const t=`txn-${Date.now()}-${Math.random().toString(36).slice(2,8)}`;this.createCheckpoint(t,"transaction","begin",{});try{e.prepare("BEGIN IMMEDIATE").run(),this.updateCheckpoint(t,"executing");const i=r();return e.prepare("COMMIT").run(),this.completeCheckpoint(t),{success:!0,data:i,attempts:1,recoverable:!1}}catch(i){this.logger.error("Transaction failed, rolling back",{operationId:t,error:i.message});try{e.prepare("ROLLBACK").run()}catch(c){this.logger.error("Rollback failed",{error:c.message})}return this.updateCheckpoint(t,"rolled_back",{error:i.message}),{success:!1,error:i,attempts:1,recoverable:!0}}}atomicWriteFile(e,r){const t=`${e}.tmp.${Date.now()}`;try{return g(t,r,{encoding:"utf-8",flag:"w"}),b(t,e),!0}catch(i){this.logger.error("Atomic write failed",{filePath:e,error:i.message});try{l(t)&&m(t)}catch{}return!1}}safeReadFile(e,r){try{if(l(e))return p(e,"utf-8")}catch{this.logger.warn("Failed to read primary file",{filePath:e})}if(r)try{if(l(r))return this.logger.info("Using backup file",{backupPath:r}),p(r,"utf-8")}catch{this.logger.error("Failed to read backup file",{backupPath:r})}return null}sleep(e){return new Promise(r=>{setTimeout(r,e)})}sleepSync(e){e<=0||Atomics.wait(new Int32Array(new SharedArrayBuffer(4)),0,0,e)}generateOperationId(e="op"){return`${e}-${Date.now()}-${Math.random().toString(36).slice(2,8)}`}}let d=null;function k(){return d||(d=new T),d}function I(h){return function(e,r,t){const i=t.value;return t.value=async function(...c){const o=await k().withRetry(()=>i.apply(this,c),r,h);if(!o.success)throw o.error;return o.data},t}}function _(h){return function(e,r,t){const i=t.value;return t.value=async function(...c){const s=k(),o=s.generateOperationId(h);s.createCheckpoint(o,h,"started",{args:c});try{const n=await i.apply(this,c);return s.completeCheckpoint(o),n}catch(n){throw s.updateCheckpoint(o,"failed",{error:n.message}),n}},t}}export{T as ResilienceManager,k as getResilienceManager,_ as withCheckpoint,I as withRetry};
|