twinclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/README.md +66 -0
  2. package/bin/npm-twinclaw.js +17 -0
  3. package/bin/run-twinbot-cli.js +36 -0
  4. package/bin/twinbot.js +4 -0
  5. package/bin/twinclaw.js +4 -0
  6. package/dist/api/handlers/browser.js +160 -0
  7. package/dist/api/handlers/callback.js +80 -0
  8. package/dist/api/handlers/config-validate.js +19 -0
  9. package/dist/api/handlers/health.js +117 -0
  10. package/dist/api/handlers/local-state-backup.js +118 -0
  11. package/dist/api/handlers/persona-state.js +59 -0
  12. package/dist/api/handlers/skill-packages.js +94 -0
  13. package/dist/api/router.js +278 -0
  14. package/dist/api/runtime-event-producer.js +99 -0
  15. package/dist/api/shared.js +82 -0
  16. package/dist/api/websocket-hub.js +305 -0
  17. package/dist/config/config-loader.js +2 -0
  18. package/dist/config/env-schema.js +202 -0
  19. package/dist/config/env-validator.js +223 -0
  20. package/dist/config/identity-bootstrap.js +115 -0
  21. package/dist/config/json-config.js +344 -0
  22. package/dist/config/workspace.js +186 -0
  23. package/dist/core/channels-cli.js +77 -0
  24. package/dist/core/cli.js +119 -0
  25. package/dist/core/context-assembly.js +33 -0
  26. package/dist/core/doctor.js +365 -0
  27. package/dist/core/gateway-cli.js +323 -0
  28. package/dist/core/gateway.js +416 -0
  29. package/dist/core/heartbeat.js +54 -0
  30. package/dist/core/install-cli.js +320 -0
  31. package/dist/core/lane-executor.js +134 -0
  32. package/dist/core/logs-cli.js +70 -0
  33. package/dist/core/onboarding.js +760 -0
  34. package/dist/core/pairing-cli.js +78 -0
  35. package/dist/core/secret-vault-cli.js +204 -0
  36. package/dist/core/types.js +1 -0
  37. package/dist/index.js +404 -0
  38. package/dist/interfaces/dispatcher.js +214 -0
  39. package/dist/interfaces/telegram_handler.js +82 -0
  40. package/dist/interfaces/tui-dashboard.js +53 -0
  41. package/dist/interfaces/whatsapp_handler.js +94 -0
  42. package/dist/release/cli.js +97 -0
  43. package/dist/release/mvp-gate-cli.js +118 -0
  44. package/dist/release/twinbot-config-schema.js +162 -0
  45. package/dist/release/twinclaw-config-schema.js +162 -0
  46. package/dist/services/block-chunker.js +174 -0
  47. package/dist/services/browser-service.js +334 -0
  48. package/dist/services/context-lifecycle.js +314 -0
  49. package/dist/services/db.js +1055 -0
  50. package/dist/services/delivery-tracker.js +110 -0
  51. package/dist/services/dm-pairing.js +245 -0
  52. package/dist/services/embedding-service.js +125 -0
  53. package/dist/services/file-watcher.js +125 -0
  54. package/dist/services/inbound-debounce.js +92 -0
  55. package/dist/services/incident-manager.js +516 -0
  56. package/dist/services/job-scheduler.js +176 -0
  57. package/dist/services/local-state-backup.js +682 -0
  58. package/dist/services/mcp-client-adapter.js +291 -0
  59. package/dist/services/mcp-server-manager.js +143 -0
  60. package/dist/services/model-router.js +927 -0
  61. package/dist/services/mvp-gate.js +845 -0
  62. package/dist/services/orchestration-service.js +422 -0
  63. package/dist/services/persona-state.js +256 -0
  64. package/dist/services/policy-engine.js +92 -0
  65. package/dist/services/proactive-notifier.js +94 -0
  66. package/dist/services/queue-service.js +146 -0
  67. package/dist/services/release-pipeline.js +652 -0
  68. package/dist/services/runtime-budget-governor.js +415 -0
  69. package/dist/services/secret-vault.js +704 -0
  70. package/dist/services/semantic-memory.js +249 -0
  71. package/dist/services/skill-package-manager.js +806 -0
  72. package/dist/services/skill-registry.js +122 -0
  73. package/dist/services/streaming-output.js +75 -0
  74. package/dist/services/stt-service.js +39 -0
  75. package/dist/services/tts-service.js +44 -0
  76. package/dist/skills/builtin.js +250 -0
  77. package/dist/skills/shell.js +87 -0
  78. package/dist/skills/types.js +1 -0
  79. package/dist/types/api.js +1 -0
  80. package/dist/types/context-budget.js +1 -0
  81. package/dist/types/doctor.js +1 -0
  82. package/dist/types/file-watcher.js +1 -0
  83. package/dist/types/incident.js +1 -0
  84. package/dist/types/local-state-backup.js +1 -0
  85. package/dist/types/mcp.js +1 -0
  86. package/dist/types/messaging.js +1 -0
  87. package/dist/types/model-routing.js +1 -0
  88. package/dist/types/mvp-gate.js +2 -0
  89. package/dist/types/orchestration.js +1 -0
  90. package/dist/types/persona-state.js +22 -0
  91. package/dist/types/policy.js +1 -0
  92. package/dist/types/reasoning-graph.js +1 -0
  93. package/dist/types/release.js +1 -0
  94. package/dist/types/reliability.js +1 -0
  95. package/dist/types/runtime-budget.js +1 -0
  96. package/dist/types/scheduler.js +1 -0
  97. package/dist/types/secret-vault.js +1 -0
  98. package/dist/types/skill-packages.js +1 -0
  99. package/dist/types/websocket.js +14 -0
  100. package/dist/utils/logger.js +57 -0
  101. package/dist/utils/retry.js +61 -0
  102. package/dist/utils/secret-scan.js +208 -0
  103. package/mcp-servers.json +179 -0
  104. package/package.json +81 -0
  105. package/skill-packages.json +92 -0
  106. package/skill-packages.lock.json +5 -0
  107. package/src/skills/builtin.ts +275 -0
  108. package/src/skills/shell.ts +118 -0
  109. package/src/skills/types.ts +30 -0
  110. package/src/types/api.ts +252 -0
  111. package/src/types/blessed-contrib.d.ts +4 -0
  112. package/src/types/context-budget.ts +76 -0
  113. package/src/types/doctor.ts +29 -0
  114. package/src/types/file-watcher.ts +26 -0
  115. package/src/types/incident.ts +57 -0
  116. package/src/types/local-state-backup.ts +121 -0
  117. package/src/types/mcp.ts +106 -0
  118. package/src/types/messaging.ts +35 -0
  119. package/src/types/model-routing.ts +61 -0
  120. package/src/types/mvp-gate.ts +99 -0
  121. package/src/types/orchestration.ts +65 -0
  122. package/src/types/persona-state.ts +61 -0
  123. package/src/types/policy.ts +27 -0
  124. package/src/types/reasoning-graph.ts +58 -0
  125. package/src/types/release.ts +115 -0
  126. package/src/types/reliability.ts +43 -0
  127. package/src/types/runtime-budget.ts +85 -0
  128. package/src/types/scheduler.ts +47 -0
  129. package/src/types/secret-vault.ts +62 -0
  130. package/src/types/skill-packages.ts +81 -0
  131. package/src/types/sqlite-vec.d.ts +5 -0
  132. package/src/types/websocket.ts +122 -0
@@ -0,0 +1,516 @@
1
+ import { randomUUID } from 'node:crypto';
2
+ import { appendIncidentTimelineEntry, getCallbackOutcomeCounts, listIncidentRecords, listIncidentTimeline, upsertIncidentRecord, } from './db.js';
3
+ import { logThought } from '../utils/logger.js';
4
+ import { getConfigValue } from '../config/config-loader.js';
5
+ const INCIDENT_JOB_ID = 'incident-self-healing';
6
+ const DEFAULT_CONFIG = {
7
+ pollCronExpression: '*/20 * * * * *',
8
+ queueDepthThreshold: 25,
9
+ queueFailureThreshold: 10,
10
+ callbackFailureBurstThreshold: 4,
11
+ callbackWindowMinutes: 5,
12
+ contextDegradationThreshold: 3,
13
+ modelRoutingFailureThreshold: 3,
14
+ remediationCooldownMs: 60_000,
15
+ maxRemediationAttempts: 3,
16
+ };
17
+ export class IncidentManager {
18
+ #gateway;
19
+ #router;
20
+ #queue;
21
+ #scheduler;
22
+ #config;
23
+ #active = new Map();
24
+ #callbackTotalsBaseline = {
25
+ accepted: 0,
26
+ duplicate: 0,
27
+ rejected: 0,
28
+ };
29
+ #safeMode = false;
30
+ constructor(deps) {
31
+ this.#gateway = deps.gateway;
32
+ this.#router = deps.router;
33
+ this.#queue = deps.queue;
34
+ this.#scheduler = deps.scheduler;
35
+ this.#config = {
36
+ pollCronExpression: deps.config?.pollCronExpression ??
37
+ getConfigValue('INCIDENT_POLL_CRON') ??
38
+ DEFAULT_CONFIG.pollCronExpression,
39
+ queueDepthThreshold: deps.config?.queueDepthThreshold ??
40
+ readNumberEnv('INCIDENT_QUEUE_DEPTH_THRESHOLD', DEFAULT_CONFIG.queueDepthThreshold),
41
+ queueFailureThreshold: deps.config?.queueFailureThreshold ??
42
+ readNumberEnv('INCIDENT_QUEUE_FAILURE_THRESHOLD', DEFAULT_CONFIG.queueFailureThreshold),
43
+ callbackFailureBurstThreshold: deps.config?.callbackFailureBurstThreshold ??
44
+ readNumberEnv('INCIDENT_CALLBACK_BURST_THRESHOLD', DEFAULT_CONFIG.callbackFailureBurstThreshold),
45
+ callbackWindowMinutes: deps.config?.callbackWindowMinutes ??
46
+ readNumberEnv('INCIDENT_CALLBACK_WINDOW_MINUTES', DEFAULT_CONFIG.callbackWindowMinutes),
47
+ contextDegradationThreshold: deps.config?.contextDegradationThreshold ??
48
+ readNumberEnv('INCIDENT_CONTEXT_DEGRADATION_THRESHOLD', DEFAULT_CONFIG.contextDegradationThreshold),
49
+ modelRoutingFailureThreshold: deps.config?.modelRoutingFailureThreshold ??
50
+ readNumberEnv('INCIDENT_ROUTING_FAILURE_THRESHOLD', DEFAULT_CONFIG.modelRoutingFailureThreshold),
51
+ remediationCooldownMs: deps.config?.remediationCooldownMs ??
52
+ readNumberEnv('INCIDENT_REMEDIATION_COOLDOWN_MS', DEFAULT_CONFIG.remediationCooldownMs),
53
+ maxRemediationAttempts: deps.config?.maxRemediationAttempts ??
54
+ readNumberEnv('INCIDENT_MAX_REMEDIATION_ATTEMPTS', DEFAULT_CONFIG.maxRemediationAttempts),
55
+ };
56
+ }
57
+ start() {
58
+ if (!this.#scheduler || this.#scheduler.getJob(INCIDENT_JOB_ID)) {
59
+ return;
60
+ }
61
+ this.#scheduler.register({
62
+ id: INCIDENT_JOB_ID,
63
+ cronExpression: this.#config.pollCronExpression,
64
+ description: 'Detect incidents and apply self-healing playbooks',
65
+ handler: async () => {
66
+ this.evaluateNow();
67
+ },
68
+ autoStart: true,
69
+ });
70
+ }
71
+ stop() {
72
+ this.#scheduler?.unregister(INCIDENT_JOB_ID);
73
+ }
74
+ isSafeModeEnabled() {
75
+ return this.#safeMode;
76
+ }
77
+ evaluateNow() {
78
+ const snapshot = this.#collectSignals();
79
+ const detections = this.#detect(snapshot);
80
+ const detectedTypes = new Set(detections.map((detection) => detection.type));
81
+ for (const detection of detections) {
82
+ this.#handleDetection(detection);
83
+ }
84
+ for (const [type, state] of this.#active.entries()) {
85
+ if (!detectedTypes.has(type)) {
86
+ this.#resolveIncident(state, 'Signals returned below thresholds.');
87
+ }
88
+ }
89
+ return this.getCurrentIncidents();
90
+ }
91
+ getCurrentIncidents() {
92
+ const rows = listIncidentRecords(100, ['active', 'remediating', 'escalated']);
93
+ return rows.map((row) => this.#toIncidentRecord(row));
94
+ }
95
+ getIncidentHistory(limit = 200) {
96
+ const rows = listIncidentRecords(limit);
97
+ return rows.map((row) => this.#toIncidentRecord(row));
98
+ }
99
+ getIncidentTimeline(limit = 300) {
100
+ const rows = listIncidentTimeline(limit);
101
+ return rows.map((row) => this.#toTimelineEntry(row));
102
+ }
103
+ #collectSignals() {
104
+ const queueStatsRaw = this.#queue?.getStats();
105
+ const queue = queueStatsRaw
106
+ ? {
107
+ totalQueued: Number(queueStatsRaw.totalQueued ?? 0),
108
+ totalFailed: Number(queueStatsRaw.totalFailed ?? 0),
109
+ totalDispatching: Number(queueStatsRaw.totalDispatching ?? 0),
110
+ totalDeadLetters: Number(queueStatsRaw.totalDeadLetters ?? 0),
111
+ }
112
+ : null;
113
+ const callbackTotals = getCallbackOutcomeCounts(this.#config.callbackWindowMinutes);
114
+ const callbackDelta = {
115
+ accepted: Math.max(0, callbackTotals.accepted - this.#callbackTotalsBaseline.accepted),
116
+ duplicate: Math.max(0, callbackTotals.duplicate - this.#callbackTotalsBaseline.duplicate),
117
+ rejected: Math.max(0, callbackTotals.rejected - this.#callbackTotalsBaseline.rejected),
118
+ };
119
+ this.#callbackTotalsBaseline = callbackTotals;
120
+ return {
121
+ queue,
122
+ callbackTotals,
123
+ callbackDelta,
124
+ context: this.#gateway.getContextDegradationSnapshot(),
125
+ modelRouting: this.#router.getHealthSnapshot(),
126
+ };
127
+ }
128
+ #detect(snapshot) {
129
+ const detections = [];
130
+ if (snapshot.queue &&
131
+ (snapshot.queue.totalQueued + snapshot.queue.totalDispatching >= this.#config.queueDepthThreshold ||
132
+ snapshot.queue.totalFailed + snapshot.queue.totalDeadLetters >= this.#config.queueFailureThreshold)) {
133
+ detections.push({
134
+ type: 'queue_backpressure',
135
+ severity: snapshot.queue.totalDeadLetters > 0 || snapshot.queue.totalFailed > this.#config.queueFailureThreshold
136
+ ? 'critical'
137
+ : 'warning',
138
+ summary: `Queue backlog=${snapshot.queue.totalQueued + snapshot.queue.totalDispatching}, ` +
139
+ `failures=${snapshot.queue.totalFailed}, deadLetters=${snapshot.queue.totalDeadLetters}.`,
140
+ evidence: [
141
+ {
142
+ signal: 'queue_depth',
143
+ observedValue: snapshot.queue.totalQueued + snapshot.queue.totalDispatching,
144
+ threshold: this.#config.queueDepthThreshold,
145
+ },
146
+ {
147
+ signal: 'queue_failures',
148
+ observedValue: snapshot.queue.totalFailed + snapshot.queue.totalDeadLetters,
149
+ threshold: this.#config.queueFailureThreshold,
150
+ },
151
+ ],
152
+ });
153
+ }
154
+ if (snapshot.callbackDelta.rejected >= this.#config.callbackFailureBurstThreshold) {
155
+ detections.push({
156
+ type: 'callback_failure_storm',
157
+ severity: 'critical',
158
+ summary: `Callback rejection burst detected (+${snapshot.callbackDelta.rejected} in the latest window).`,
159
+ evidence: [
160
+ {
161
+ signal: 'callback_rejected_delta',
162
+ observedValue: snapshot.callbackDelta.rejected,
163
+ threshold: this.#config.callbackFailureBurstThreshold,
164
+ metadata: {
165
+ accepted: snapshot.callbackTotals.accepted,
166
+ duplicate: snapshot.callbackTotals.duplicate,
167
+ rejected: snapshot.callbackTotals.rejected,
168
+ },
169
+ },
170
+ ],
171
+ });
172
+ }
173
+ if (snapshot.context.maxConsecutiveDegradation >= this.#config.contextDegradationThreshold) {
174
+ detections.push({
175
+ type: 'context_budget_degradation',
176
+ severity: 'warning',
177
+ summary: `Context degradation sustained for ${snapshot.context.maxConsecutiveDegradation} consecutive turns.`,
178
+ evidence: [
179
+ {
180
+ signal: 'context_degradation_consecutive',
181
+ observedValue: snapshot.context.maxConsecutiveDegradation,
182
+ threshold: this.#config.contextDegradationThreshold,
183
+ metadata: {
184
+ degradedSessions: snapshot.context.degradedSessions,
185
+ sessions: snapshot.context.sessions,
186
+ },
187
+ },
188
+ ],
189
+ });
190
+ }
191
+ if (snapshot.modelRouting.consecutiveFailures >= this.#config.modelRoutingFailureThreshold) {
192
+ detections.push({
193
+ type: 'model_routing_instability',
194
+ severity: 'critical',
195
+ summary: `Model router has ${snapshot.modelRouting.consecutiveFailures} consecutive failures (last error: ` +
196
+ `${snapshot.modelRouting.lastError ?? 'unknown'}).`,
197
+ evidence: [
198
+ {
199
+ signal: 'model_router_consecutive_failures',
200
+ observedValue: snapshot.modelRouting.consecutiveFailures,
201
+ threshold: this.#config.modelRoutingFailureThreshold,
202
+ metadata: {
203
+ preferredModelId: snapshot.modelRouting.preferredModelId,
204
+ failoverCount: snapshot.modelRouting.failoverCount,
205
+ },
206
+ },
207
+ ],
208
+ });
209
+ }
210
+ return detections;
211
+ }
212
+ #handleDetection(detection) {
213
+ const existing = this.#active.get(detection.type);
214
+ const state = existing ??
215
+ {
216
+ id: randomUUID(),
217
+ type: detection.type,
218
+ severity: detection.severity,
219
+ status: 'active',
220
+ remediationAttempts: 0,
221
+ remediationAction: 'none',
222
+ cooldownUntil: null,
223
+ summary: detection.summary,
224
+ evidence: detection.evidence,
225
+ recommendedActions: [],
226
+ rollbackStack: [],
227
+ };
228
+ state.severity = detection.severity;
229
+ state.summary = detection.summary;
230
+ state.evidence = detection.evidence;
231
+ this.#active.set(detection.type, state);
232
+ if (!existing) {
233
+ this.#persist(state);
234
+ this.#appendTimeline(state, 'detected', { summary: detection.summary, evidence: detection.evidence });
235
+ }
236
+ if (state.status === 'escalated') {
237
+ this.#persist(state);
238
+ return;
239
+ }
240
+ const now = Date.now();
241
+ if (state.cooldownUntil && state.cooldownUntil > now) {
242
+ state.status = 'remediating';
243
+ this.#persist(state);
244
+ this.#appendTimeline(state, 'cooldown_active', {
245
+ cooldownUntil: new Date(state.cooldownUntil).toISOString(),
246
+ remainingMs: state.cooldownUntil - now,
247
+ });
248
+ return;
249
+ }
250
+ if (state.remediationAttempts >= this.#config.maxRemediationAttempts) {
251
+ this.#escalate(state, 'Maximum remediation attempts exceeded.');
252
+ return;
253
+ }
254
+ const remediation = this.#runPlaybook(state);
255
+ state.remediationAttempts += 1;
256
+ state.remediationAction = remediation.action;
257
+ state.cooldownUntil = Date.now() + this.#config.remediationCooldownMs;
258
+ if (remediation.rollback) {
259
+ state.rollbackStack.push(remediation.rollback);
260
+ }
261
+ if (remediation.succeeded) {
262
+ state.status = 'remediating';
263
+ this.#persist(state);
264
+ this.#appendTimeline(state, 'remediation_applied', {
265
+ action: remediation.action,
266
+ detail: remediation.detail,
267
+ attempt: state.remediationAttempts,
268
+ cooldownUntil: new Date(state.cooldownUntil).toISOString(),
269
+ });
270
+ void logThought(`[IncidentManager] ${state.type}: applied remediation '${remediation.action}'.`);
271
+ return;
272
+ }
273
+ this.#appendTimeline(state, 'remediation_failed', {
274
+ action: remediation.action,
275
+ detail: remediation.detail,
276
+ attempt: state.remediationAttempts,
277
+ });
278
+ this.#escalate(state, remediation.detail);
279
+ }
280
+ #runPlaybook(state) {
281
+ const action = this.#selectAction(state.type, state.remediationAttempts + 1);
282
+ switch (action) {
283
+ case 'throttle': {
284
+ if (!this.#queue) {
285
+ return { action, detail: 'Queue service unavailable; cannot throttle.', succeeded: false };
286
+ }
287
+ const previousMode = this.#queue.getRuntimeControls().mode;
288
+ this.#queue.setProcessingMode('throttled');
289
+ return {
290
+ action,
291
+ detail: `Queue mode changed ${previousMode} -> throttled.`,
292
+ succeeded: true,
293
+ rollback: () => this.#queue?.setProcessingMode(previousMode),
294
+ };
295
+ }
296
+ case 'drain': {
297
+ if (!this.#queue) {
298
+ return { action, detail: 'Queue service unavailable; cannot drain.', succeeded: false };
299
+ }
300
+ const previousMode = this.#queue.getRuntimeControls().mode;
301
+ this.#queue.setProcessingMode('drain');
302
+ return {
303
+ action,
304
+ detail: `Queue mode changed ${previousMode} -> drain.`,
305
+ succeeded: true,
306
+ rollback: () => this.#queue?.setProcessingMode(previousMode),
307
+ };
308
+ }
309
+ case 'retry_window_adjustment': {
310
+ if (!this.#queue) {
311
+ return { action, detail: 'Queue service unavailable; cannot adjust retry window.', succeeded: false };
312
+ }
313
+ const controls = this.#queue.getRuntimeControls();
314
+ const previousMultiplier = controls.retryWindowMultiplier;
315
+ const nextMultiplier = Math.min(previousMultiplier * 1.5, 6);
316
+ this.#queue.setRetryWindowMultiplier(nextMultiplier);
317
+ return {
318
+ action,
319
+ detail: `Retry window multiplier changed ${previousMultiplier} -> ${nextMultiplier}.`,
320
+ succeeded: true,
321
+ rollback: () => this.#queue?.setRetryWindowMultiplier(previousMultiplier),
322
+ };
323
+ }
324
+ case 'failover': {
325
+ const shift = this.#router.forceFailover();
326
+ return {
327
+ action,
328
+ detail: `Model routing failover applied ${shift.previousModelId ?? 'none'} -> ${shift.nextModelId ?? 'none'}.`,
329
+ succeeded: true,
330
+ rollback: () => this.#router.resetPreferredModel(),
331
+ };
332
+ }
333
+ case 'halt_safe_mode': {
334
+ const previousSafeMode = this.#safeMode;
335
+ this.#safeMode = true;
336
+ const previousQueueMode = this.#queue?.getRuntimeControls().mode ?? null;
337
+ this.#queue?.setProcessingMode('throttled');
338
+ return {
339
+ action,
340
+ detail: 'Entered safe mode and throttled queue processing.',
341
+ succeeded: true,
342
+ rollback: () => {
343
+ this.#safeMode = previousSafeMode;
344
+ if (previousQueueMode) {
345
+ this.#queue?.setProcessingMode(previousQueueMode);
346
+ }
347
+ },
348
+ };
349
+ }
350
+ default:
351
+ return { action: 'none', detail: 'No remediation action selected.', succeeded: false };
352
+ }
353
+ }
354
+ #selectAction(type, attempt) {
355
+ if (type === 'queue_backpressure') {
356
+ if (attempt === 1)
357
+ return 'throttle';
358
+ if (attempt === 2)
359
+ return 'drain';
360
+ return 'halt_safe_mode';
361
+ }
362
+ if (type === 'callback_failure_storm') {
363
+ if (attempt === 1)
364
+ return 'retry_window_adjustment';
365
+ if (attempt === 2)
366
+ return 'throttle';
367
+ return 'halt_safe_mode';
368
+ }
369
+ if (type === 'context_budget_degradation') {
370
+ if (attempt === 1)
371
+ return 'retry_window_adjustment';
372
+ return 'halt_safe_mode';
373
+ }
374
+ if (type === 'model_routing_instability') {
375
+ if (attempt <= 2)
376
+ return 'failover';
377
+ return 'halt_safe_mode';
378
+ }
379
+ return 'none';
380
+ }
381
+ #resolveIncident(state, reason) {
382
+ const rollbacks = [...state.rollbackStack].reverse();
383
+ const rollbackFailures = [];
384
+ for (const rollback of rollbacks) {
385
+ try {
386
+ rollback();
387
+ }
388
+ catch (err) {
389
+ rollbackFailures.push(err instanceof Error ? err.message : String(err));
390
+ }
391
+ }
392
+ state.status = 'resolved';
393
+ state.recommendedActions = [];
394
+ state.remediationAction = 'none';
395
+ state.cooldownUntil = null;
396
+ this.#persist(state, new Date().toISOString());
397
+ this.#appendTimeline(state, 'resolved', {
398
+ reason,
399
+ rollbackFailures,
400
+ });
401
+ this.#active.delete(state.type);
402
+ if (this.#active.size === 0) {
403
+ this.#safeMode = false;
404
+ }
405
+ void logThought(`[IncidentManager] ${state.type}: resolved. ${reason}`);
406
+ }
407
+ #escalate(state, reason) {
408
+ state.status = 'escalated';
409
+ state.recommendedActions = this.#recommendedActions(state.type);
410
+ this.#persist(state);
411
+ this.#appendTimeline(state, 'escalated', {
412
+ reason,
413
+ recommendations: state.recommendedActions,
414
+ remediationAttempts: state.remediationAttempts,
415
+ });
416
+ void logThought(`[IncidentManager] ${state.type}: escalated after ${state.remediationAttempts} attempt(s). ${reason}`);
417
+ }
418
+ #recommendedActions(type) {
419
+ switch (type) {
420
+ case 'queue_backpressure':
421
+ return [
422
+ 'Inspect outbound adapter availability and delivery queue growth sources.',
423
+ 'Replay dead-letter entries selectively after connectivity stabilizes.',
424
+ ];
425
+ case 'callback_failure_storm':
426
+ return [
427
+ 'Validate callback signature configuration and upstream webhook retries.',
428
+ 'Pause new callback-producing workloads until reject rate stabilizes.',
429
+ ];
430
+ case 'context_budget_degradation':
431
+ return [
432
+ 'Inspect conversation scope growth and prune long-running sessions.',
433
+ 'Tune context budget env vars for hot/warm/archive retention tiers.',
434
+ ];
435
+ case 'model_routing_instability':
436
+ return [
437
+ 'Verify upstream model provider credentials and quotas.',
438
+ 'Pin a stable provider temporarily and inspect provider health telemetry.',
439
+ ];
440
+ default:
441
+ return ['Inspect runtime logs and choose a manual remediation path.'];
442
+ }
443
+ }
444
+ #persist(state, resolvedAt = null) {
445
+ upsertIncidentRecord({
446
+ id: state.id,
447
+ incidentType: state.type,
448
+ severity: state.severity,
449
+ status: state.status,
450
+ summary: state.summary,
451
+ evidenceJson: JSON.stringify(state.evidence),
452
+ remediationAction: state.remediationAction,
453
+ remediationAttempts: state.remediationAttempts,
454
+ cooldownUntil: state.cooldownUntil ? new Date(state.cooldownUntil).toISOString() : null,
455
+ escalated: state.status === 'escalated',
456
+ recommendedActionsJson: JSON.stringify(state.recommendedActions),
457
+ resolvedAt,
458
+ });
459
+ }
460
+ #appendTimeline(state, eventType, detail) {
461
+ appendIncidentTimelineEntry({
462
+ id: randomUUID(),
463
+ incidentId: state.id,
464
+ incidentType: state.type,
465
+ eventType,
466
+ detailJson: JSON.stringify(detail),
467
+ });
468
+ }
469
+ #toIncidentRecord(row) {
470
+ return {
471
+ id: row.id,
472
+ incidentType: row.incident_type,
473
+ severity: row.severity,
474
+ status: row.status,
475
+ summary: row.summary,
476
+ evidence: this.#parseJson(row.evidence_json, []),
477
+ remediationAction: row.remediation_action,
478
+ remediationAttempts: row.remediation_attempts,
479
+ cooldownUntil: row.cooldown_until,
480
+ escalated: row.escalated === 1,
481
+ recommendedActions: this.#parseJson(row.recommended_actions_json, []),
482
+ createdAt: row.created_at,
483
+ updatedAt: row.updated_at,
484
+ resolvedAt: row.resolved_at,
485
+ };
486
+ }
487
+ #toTimelineEntry(row) {
488
+ return {
489
+ id: row.id,
490
+ incidentId: row.incident_id,
491
+ incidentType: row.incident_type,
492
+ eventType: row.event_type,
493
+ detail: this.#parseJson(row.detail_json, {}),
494
+ createdAt: row.created_at,
495
+ };
496
+ }
497
+ #parseJson(value, fallback) {
498
+ try {
499
+ return JSON.parse(value);
500
+ }
501
+ catch {
502
+ return fallback;
503
+ }
504
+ }
505
+ }
506
+ function readNumberEnv(name, fallback) {
507
+ const raw = getConfigValue(name);
508
+ if (!raw) {
509
+ return fallback;
510
+ }
511
+ const parsed = Number(raw);
512
+ if (!Number.isFinite(parsed)) {
513
+ return fallback;
514
+ }
515
+ return parsed;
516
+ }
@@ -0,0 +1,176 @@
1
+ import cron from 'node-cron';
2
+ import { logThought } from '../utils/logger.js';
3
+ /**
4
+ * Centralized job scheduler for TwinBot's proactive execution layer.
5
+ *
6
+ * Wraps `node-cron` to manage multiple named, repeating background jobs
7
+ * with event emission, error isolation, and runtime inspection.
8
+ *
9
+ * Usage:
10
+ * ```ts
11
+ * const scheduler = new JobScheduler();
12
+ * scheduler.register({
13
+ * id: 'daily-heartbeat',
14
+ * cronExpression: '0 9 * * *',
15
+ * description: 'Morning proactive check-in',
16
+ * handler: async () => { … },
17
+ * });
18
+ * scheduler.startAll();
19
+ * ```
20
+ */
21
+ export class JobScheduler {
22
+ #jobs = new Map();
23
+ #listeners = new Map();
24
+ /** Register a new repeating job. Throws if a job with the same ID already exists. */
25
+ register(config) {
26
+ if (this.#jobs.has(config.id)) {
27
+ throw new Error(`[JobScheduler] Job '${config.id}' is already registered.`);
28
+ }
29
+ if (!cron.validate(config.cronExpression)) {
30
+ throw new Error(`[JobScheduler] Invalid cron expression for job '${config.id}': ${config.cronExpression}`);
31
+ }
32
+ const entry = {
33
+ config,
34
+ task: null,
35
+ status: 'idle',
36
+ lastRunAt: null,
37
+ lastError: null,
38
+ };
39
+ this.#jobs.set(config.id, entry);
40
+ const autoStart = config.autoStart ?? true;
41
+ if (autoStart) {
42
+ this.#startJob(entry);
43
+ }
44
+ }
45
+ /** Unregister and stop a job by ID. */
46
+ unregister(jobId) {
47
+ const entry = this.#jobs.get(jobId);
48
+ if (!entry)
49
+ return false;
50
+ entry.task?.stop();
51
+ this.#jobs.delete(jobId);
52
+ return true;
53
+ }
54
+ /** Start a specific job by ID. No-op if already running. */
55
+ start(jobId) {
56
+ const entry = this.#jobs.get(jobId);
57
+ if (!entry) {
58
+ throw new Error(`[JobScheduler] Job '${jobId}' is not registered.`);
59
+ }
60
+ this.#startJob(entry);
61
+ }
62
+ /** Stop a specific job by ID. No-op if already stopped. */
63
+ stop(jobId) {
64
+ const entry = this.#jobs.get(jobId);
65
+ if (!entry) {
66
+ throw new Error(`[JobScheduler] Job '${jobId}' is not registered.`);
67
+ }
68
+ if (entry.task) {
69
+ entry.task.stop();
70
+ entry.task = null;
71
+ entry.status = 'stopped';
72
+ }
73
+ }
74
+ /** Start all registered jobs that are not currently running. */
75
+ startAll() {
76
+ for (const entry of this.#jobs.values()) {
77
+ if (!entry.task) {
78
+ this.#startJob(entry);
79
+ }
80
+ }
81
+ }
82
+ /** Stop all running jobs gracefully. */
83
+ stopAll() {
84
+ for (const entry of this.#jobs.values()) {
85
+ if (entry.task) {
86
+ entry.task.stop();
87
+ entry.task = null;
88
+ entry.status = 'stopped';
89
+ }
90
+ }
91
+ }
92
+ /** Return a read-only snapshot of all registered jobs. */
93
+ listJobs() {
94
+ const snapshots = [];
95
+ for (const entry of this.#jobs.values()) {
96
+ snapshots.push({
97
+ id: entry.config.id,
98
+ cronExpression: entry.config.cronExpression,
99
+ description: entry.config.description,
100
+ status: entry.status,
101
+ lastRunAt: entry.lastRunAt,
102
+ lastError: entry.lastError,
103
+ });
104
+ }
105
+ return snapshots;
106
+ }
107
+ /** Get a single job's snapshot by ID. Returns `undefined` if not found. */
108
+ getJob(jobId) {
109
+ const entry = this.#jobs.get(jobId);
110
+ if (!entry)
111
+ return undefined;
112
+ return {
113
+ id: entry.config.id,
114
+ cronExpression: entry.config.cronExpression,
115
+ description: entry.config.description,
116
+ status: entry.status,
117
+ lastRunAt: entry.lastRunAt,
118
+ lastError: entry.lastError,
119
+ };
120
+ }
121
+ /** Subscribe to scheduler events. Returns an unsubscribe function. */
122
+ on(eventType, listener) {
123
+ let set = this.#listeners.get(eventType);
124
+ if (!set) {
125
+ set = new Set();
126
+ this.#listeners.set(eventType, set);
127
+ }
128
+ set.add(listener);
129
+ return () => {
130
+ set?.delete(listener);
131
+ };
132
+ }
133
+ // ── Private Helpers ────────────────────────────────────────────────────────
134
+ #startJob(entry) {
135
+ if (entry.task)
136
+ return; // Already running
137
+ entry.task = cron.schedule(entry.config.cronExpression, async () => {
138
+ await this.#executeJob(entry);
139
+ });
140
+ entry.status = 'idle';
141
+ }
142
+ async #executeJob(entry) {
143
+ const { config } = entry;
144
+ entry.status = 'running';
145
+ entry.lastRunAt = new Date();
146
+ this.#emit({ type: 'job:start', jobId: config.id, timestamp: new Date() });
147
+ try {
148
+ await logThought(`[JobScheduler] Executing job '${config.id}' (${config.cronExpression}).`);
149
+ await config.handler();
150
+ entry.status = 'idle';
151
+ entry.lastError = null;
152
+ this.#emit({ type: 'job:done', jobId: config.id, timestamp: new Date() });
153
+ }
154
+ catch (err) {
155
+ const message = err instanceof Error ? err.message : String(err);
156
+ entry.status = 'error';
157
+ entry.lastError = message;
158
+ console.error(`[JobScheduler] Job '${config.id}' failed:`, message);
159
+ await logThought(`[JobScheduler] Job '${config.id}' failed: ${message}`);
160
+ this.#emit({ type: 'job:error', jobId: config.id, timestamp: new Date(), error: message });
161
+ }
162
+ }
163
+ #emit(event) {
164
+ const listeners = this.#listeners.get(event.type);
165
+ if (!listeners)
166
+ return;
167
+ for (const listener of listeners) {
168
+ try {
169
+ listener(event);
170
+ }
171
+ catch (listenerErr) {
172
+ console.error('[JobScheduler] Event listener threw an error:', listenerErr);
173
+ }
174
+ }
175
+ }
176
+ }