@geravant/sinain 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +183 -0
  2. package/index.ts +2096 -0
  3. package/install.js +155 -0
  4. package/openclaw.plugin.json +59 -0
  5. package/package.json +21 -0
  6. package/sinain-memory/common.py +403 -0
  7. package/sinain-memory/demo_knowledge_transfer.sh +85 -0
  8. package/sinain-memory/embedder.py +268 -0
  9. package/sinain-memory/eval/__init__.py +0 -0
  10. package/sinain-memory/eval/assertions.py +288 -0
  11. package/sinain-memory/eval/judges/__init__.py +0 -0
  12. package/sinain-memory/eval/judges/base_judge.py +61 -0
  13. package/sinain-memory/eval/judges/curation_judge.py +46 -0
  14. package/sinain-memory/eval/judges/insight_judge.py +48 -0
  15. package/sinain-memory/eval/judges/mining_judge.py +42 -0
  16. package/sinain-memory/eval/judges/signal_judge.py +45 -0
  17. package/sinain-memory/eval/schemas.py +247 -0
  18. package/sinain-memory/eval_delta.py +109 -0
  19. package/sinain-memory/eval_reporter.py +642 -0
  20. package/sinain-memory/feedback_analyzer.py +221 -0
  21. package/sinain-memory/git_backup.sh +19 -0
  22. package/sinain-memory/insight_synthesizer.py +181 -0
  23. package/sinain-memory/memory/2026-03-01.md +11 -0
  24. package/sinain-memory/memory/playbook-archive/sinain-playbook-2026-03-01-1418.md +15 -0
  25. package/sinain-memory/memory/playbook-logs/2026-03-01.jsonl +1 -0
  26. package/sinain-memory/memory/sinain-playbook.md +21 -0
  27. package/sinain-memory/memory-config.json +39 -0
  28. package/sinain-memory/memory_miner.py +183 -0
  29. package/sinain-memory/module_manager.py +695 -0
  30. package/sinain-memory/playbook_curator.py +225 -0
  31. package/sinain-memory/requirements.txt +3 -0
  32. package/sinain-memory/signal_analyzer.py +141 -0
  33. package/sinain-memory/test_local.py +402 -0
  34. package/sinain-memory/tests/__init__.py +0 -0
  35. package/sinain-memory/tests/conftest.py +189 -0
  36. package/sinain-memory/tests/test_curator_helpers.py +94 -0
  37. package/sinain-memory/tests/test_embedder.py +210 -0
  38. package/sinain-memory/tests/test_extract_json.py +124 -0
  39. package/sinain-memory/tests/test_feedback_computation.py +121 -0
  40. package/sinain-memory/tests/test_miner_helpers.py +71 -0
  41. package/sinain-memory/tests/test_module_management.py +458 -0
  42. package/sinain-memory/tests/test_parsers.py +96 -0
  43. package/sinain-memory/tests/test_tick_evaluator.py +430 -0
  44. package/sinain-memory/tests/test_triple_extractor.py +255 -0
  45. package/sinain-memory/tests/test_triple_ingest.py +191 -0
  46. package/sinain-memory/tests/test_triple_migrate.py +138 -0
  47. package/sinain-memory/tests/test_triplestore.py +248 -0
  48. package/sinain-memory/tick_evaluator.py +392 -0
  49. package/sinain-memory/triple_extractor.py +402 -0
  50. package/sinain-memory/triple_ingest.py +290 -0
  51. package/sinain-memory/triple_migrate.py +275 -0
  52. package/sinain-memory/triple_query.py +184 -0
  53. package/sinain-memory/triplestore.py +498 -0
package/index.ts ADDED
@@ -0,0 +1,2096 @@
1
+ /**
2
+ * sinain-hud OpenClaw Plugin
3
+ *
4
+ * Manages the sinain-hud agent lifecycle:
5
+ * - Auto-deploys HEARTBEAT.md and SKILL.md to workspace on agent start
6
+ * - Tracks tool usage patterns per session (fire-and-forget, sync only)
7
+ * - Generates structured session summaries on agent end
8
+ * - Strips <private> tags from tool results before persistence
9
+ */
10
+
11
+ import { readFileSync, writeFileSync, mkdirSync, existsSync, readdirSync, statSync, chmodSync, copyFileSync, renameSync } from "node:fs";
12
+ import { join, dirname, extname } from "node:path";
13
+ import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
14
+
15
+ // ============================================================================
16
+ // Types
17
+ // ============================================================================
18
+
19
+ type PluginConfig = {
20
+ heartbeatPath?: string;
21
+ skillPath?: string;
22
+ memoryPath?: string;
23
+ modulesPath?: string;
24
+ sessionKey?: string;
25
+ userTimezone?: string;
26
+ };
27
+
28
+ type ModuleRegistryEntry = {
29
+ status: "active" | "suspended" | "disabled";
30
+ priority: number;
31
+ activatedAt: string | null;
32
+ lastTriggered: string | null;
33
+ locked: boolean;
34
+ };
35
+
36
+ type ModuleRegistry = {
37
+ version: number;
38
+ modules: Record<string, ModuleRegistryEntry>;
39
+ };
40
+
41
+ type ToolUsageEntry = {
42
+ toolName: string;
43
+ ts: number;
44
+ durationMs?: number;
45
+ error?: string;
46
+ };
47
+
48
+ type SessionState = {
49
+ startedAt: number;
50
+ toolUsage: ToolUsageEntry[];
51
+ workspaceDir?: string;
52
+ heartbeatToolCalled?: boolean;
53
+ };
54
+
55
+ // ============================================================================
56
+ // Privacy helpers
57
+ // ============================================================================
58
+
59
+ const PRIVATE_TAG_RE = /<private>[\s\S]*?<\/private>/g;
60
+
61
+ // ============================================================================
62
+ // Retry storm resilience constants
63
+ // ============================================================================
64
+
65
+ const ERROR_WINDOW_MS = 5 * 60_000; // 5-min sliding window for error rate
66
+ const OUTAGE_ERROR_RATE_THRESHOLD = 0.8; // 80% failure → outage detected
67
+ const OUTAGE_MIN_SAMPLES = 3; // need ≥3 samples before threshold applies
68
+ const FILE_SYNC_DEBOUNCE_MS = 3 * 60_000; // skip file sync if done <3 min ago
69
+ const PLAYBOOK_GEN_DEBOUNCE_MS = 5 * 60_000; // skip playbook gen if done <5 min ago
70
+ const SHORT_FAILURE_THRESHOLD_MS = 10_000; // fails in <10s = likely API error
71
+ const LONG_FAILURE_THRESHOLD_MS = 3 * 60_000; // >3min failure = likely stuck retry loop
72
+
73
+ // Context overflow watchdog constants
74
+ const OVERFLOW_CONSECUTIVE_THRESHOLD = 5; // N consecutive overload errors → trigger reset
75
+ const OVERFLOW_TRANSCRIPT_MIN_BYTES = 1_000_000; // 1MB guard — skip reset if transcript is small (transient outage)
76
+ const OVERFLOW_ERROR_PATTERN = /overloaded|context.*too.*long|token.*limit|extra usage is required/i;
77
+
78
+ // Proactive session hygiene constants
79
+ const SESSION_HYGIENE_SIZE_BYTES = 2_000_000; // 2MB — proactive archive+truncate threshold
80
+ const SESSION_HYGIENE_AGE_MS = 24 * 60 * 60 * 1000; // 24h — max session age before proactive reset
81
+
82
+ // Health watchdog constants
83
+ const WATCHDOG_INTERVAL_MS = 5 * 60_000; // 5 min — independent of curation timer
84
+ const ALERT_COOLDOWN_MS = 15 * 60_000; // 15 min per alert type
85
+ const STALENESS_WARNING_MS = 10 * 60_000; // 10 min no success → warning
86
+ const STALENESS_CRITICAL_MS = 15 * 60_000; // 15 min no success after reset → emergency restart
87
+ const SESSION_SIZE_WARNING_BYTES = 1_500_000; // 1.5MB → proactive reset
88
+ const SESSION_SIZE_RESTART_BYTES = 2_000_000; // 2MB → forced reset
89
+ const AUTO_RESTART_COOLDOWN_MS = 60 * 60_000; // max 1 auto-restart per hour
90
+
91
+ // ============================================================================
92
+ // Parent context injection (subagent support)
93
+ // ============================================================================
94
+
95
+ const PARENT_CONTEXT_MAX_CHARS = 4000;
96
+ const PARENT_CONTEXT_TTL_MS = 10 * 60_000; // 10 minutes — stale cache won't be injected
97
+
98
+ type ParentContextCache = {
99
+ sessionKey: string;
100
+ capturedAt: number;
101
+ contextText: string;
102
+ };
103
+
104
+ function isSubagentSession(sessionKey: string): boolean {
105
+ return sessionKey.includes(":subagent:") || sessionKey.startsWith("subagent:");
106
+ }
107
+
108
+ function extractRecentContext(
109
+ messages: unknown[],
110
+ prompt: string,
111
+ maxChars: number,
112
+ ): string {
113
+ const lines: string[] = [];
114
+ let budget = maxChars;
115
+
116
+ // Process messages in reverse (most recent first)
117
+ for (let i = messages.length - 1; i >= 0 && budget > 0; i--) {
118
+ const msg = messages[i];
119
+ if (!msg || typeof msg !== "object") continue;
120
+
121
+ const { role, content } = msg as Record<string, unknown>;
122
+ if (typeof role !== "string") continue;
123
+ // Skip tool messages — verbose and low-value for context transfer
124
+ if (role === "tool" || role === "tool_result") continue;
125
+
126
+ let text = "";
127
+ if (typeof content === "string") {
128
+ text = content;
129
+ } else if (Array.isArray(content)) {
130
+ text = content
131
+ .filter((b: unknown) => b && typeof b === "object" && (b as Record<string, unknown>).type === "text")
132
+ .map((b: unknown) => String((b as Record<string, unknown>).text ?? ""))
133
+ .join("\n");
134
+ }
135
+ if (!text) continue;
136
+
137
+ const truncated = text.slice(0, 500);
138
+ const line = `[${role}]: ${truncated}`;
139
+ if (line.length > budget) break;
140
+ lines.unshift(line);
141
+ budget -= line.length + 1; // +1 for newline
142
+ }
143
+
144
+ // Prepend current prompt if budget remains
145
+ if (prompt && budget > 0) {
146
+ const promptLine = `[system-prompt]: ${prompt.slice(0, 500)}`;
147
+ if (promptLine.length <= budget) {
148
+ lines.unshift(promptLine);
149
+ }
150
+ }
151
+
152
+ return lines.join("\n");
153
+ }
154
+
155
+ function stripPrivateTags(text: string): string {
156
+ return text.replace(PRIVATE_TAG_RE, "").trim();
157
+ }
158
+
159
+ // ============================================================================
160
+ // Telegram alert helpers
161
+ // ============================================================================
162
+
163
+ let _cachedBotToken: string | null | undefined; // undefined = not read yet
164
+ let _alertMissingConfigLogged = false;
165
+
166
+ function readBotToken(stateDir: string): string | null {
167
+ if (_cachedBotToken !== undefined) return _cachedBotToken;
168
+ try {
169
+ const openclawJson = join(stateDir, "openclaw.json");
170
+ const raw = JSON.parse(readFileSync(openclawJson, "utf-8"));
171
+ const token = raw?.channels?.telegram?.botToken ?? raw?.telegram?.botToken ?? null;
172
+ _cachedBotToken = typeof token === "string" && token.length > 10 ? token : null;
173
+ } catch {
174
+ _cachedBotToken = null;
175
+ }
176
+ return _cachedBotToken;
177
+ }
178
+
179
+ const _alertCooldowns = new Map<string, number>();
180
+
181
+ async function sendTelegramAlert(
182
+ alertType: string,
183
+ title: string,
184
+ body: string,
185
+ stateDir: string,
186
+ ): Promise<void> {
187
+ const chatId = process.env.SINAIN_ALERT_CHAT_ID;
188
+ const token = readBotToken(stateDir);
189
+
190
+ if (!chatId || !token) {
191
+ if (!_alertMissingConfigLogged) {
192
+ _alertMissingConfigLogged = true;
193
+ // Will be picked up by whoever has access to logger — logged once
194
+ console.log("sinain-hud: Telegram alerts disabled (missing SINAIN_ALERT_CHAT_ID or bot token)");
195
+ }
196
+ return;
197
+ }
198
+
199
+ // Per-type cooldown
200
+ const lastSent = _alertCooldowns.get(alertType) ?? 0;
201
+ if (Date.now() - lastSent < ALERT_COOLDOWN_MS) return;
202
+ _alertCooldowns.set(alertType, Date.now());
203
+
204
+ const text = `${title}\n${body}`;
205
+ fetch(`https://api.telegram.org/bot${token}/sendMessage`, {
206
+ method: "POST",
207
+ headers: { "Content-Type": "application/json" },
208
+ body: JSON.stringify({ chat_id: chatId, text, parse_mode: "Markdown" }),
209
+ }).catch(() => {
210
+ // Fire-and-forget — alert failure must never break the watchdog
211
+ });
212
+ }
213
+
214
+ // ============================================================================
215
+ // File sync helpers
216
+ // ============================================================================
217
+
218
+ function syncFileToWorkspace(
219
+ sourcePath: string | undefined,
220
+ workspaceDir: string,
221
+ targetName: string,
222
+ logger: OpenClawPluginApi["logger"],
223
+ ): boolean {
224
+ if (!sourcePath) return false;
225
+
226
+ try {
227
+ const content = readFileSync(sourcePath, "utf-8");
228
+ const targetPath = join(workspaceDir, targetName);
229
+ const targetDir = dirname(targetPath);
230
+
231
+ if (!existsSync(targetDir)) {
232
+ mkdirSync(targetDir, { recursive: true });
233
+ }
234
+
235
+ // Only write if content changed (avoid unnecessary git diffs)
236
+ let existing = "";
237
+ try {
238
+ existing = readFileSync(targetPath, "utf-8");
239
+ } catch {
240
+ // File doesn't exist yet
241
+ }
242
+
243
+ if (existing !== content) {
244
+ writeFileSync(targetPath, content, "utf-8");
245
+ logger.info(`sinain-hud: synced ${targetName} to workspace`);
246
+ return true;
247
+ }
248
+ return false;
249
+ } catch (err) {
250
+ logger.warn(`sinain-hud: failed to sync ${targetName}: ${String(err)}`);
251
+ return false;
252
+ }
253
+ }
254
+
255
+ /**
256
+ * Recursively sync a source directory to the workspace with selective overwrite policy:
257
+ * - .json, .sh, .txt, .jsonl — always overwritten (infra/config files we control)
258
+ * - .py and others — deploy-once only (skip if already exists; bot owns these after first deploy)
259
+ * Skips __pycache__ and hidden directories.
260
+ */
261
+ function syncDirToWorkspace(
262
+ sourceDir: string,
263
+ workspaceDir: string,
264
+ targetDirName: string,
265
+ logger: OpenClawPluginApi["logger"],
266
+ ): number {
267
+ if (!existsSync(sourceDir)) return 0;
268
+ const targetDir = join(workspaceDir, targetDirName);
269
+ if (!existsSync(targetDir)) mkdirSync(targetDir, { recursive: true });
270
+
271
+ const ALWAYS_OVERWRITE = new Set([".json", ".sh", ".txt", ".jsonl", ".py"]);
272
+ let synced = 0;
273
+
274
+ function syncRecursive(srcDir: string, dstDir: string): void {
275
+ if (!existsSync(dstDir)) mkdirSync(dstDir, { recursive: true });
276
+ for (const entry of readdirSync(srcDir)) {
277
+ const srcPath = join(srcDir, entry);
278
+ const dstPath = join(dstDir, entry);
279
+ const stat = statSync(srcPath);
280
+ if (stat.isDirectory()) {
281
+ if (entry.startsWith("__") || entry.startsWith(".")) continue;
282
+ syncRecursive(srcPath, dstPath);
283
+ continue;
284
+ }
285
+ if (!stat.isFile()) continue;
286
+ const ext = extname(entry).toLowerCase();
287
+ if (!ALWAYS_OVERWRITE.has(ext) && existsSync(dstPath)) continue;
288
+ const content = readFileSync(srcPath, "utf-8");
289
+ let existing = "";
290
+ try { existing = readFileSync(dstPath, "utf-8"); } catch {}
291
+ if (existing !== content) {
292
+ writeFileSync(dstPath, content, "utf-8");
293
+ synced++;
294
+ }
295
+ }
296
+ }
297
+
298
+ syncRecursive(sourceDir, targetDir);
299
+ if (synced > 0) logger.info(`sinain-hud: synced ${synced} files to ${targetDirName}/`);
300
+ return synced;
301
+ }
302
+
303
+ /**
304
+ * Recursively sync a modules/ source directory to workspace with selective deploy policy:
305
+ * - module-registry.json → deploy-once (agent manages via module_manager.py)
306
+ * - manifest.json → always overwrite (plugin controls schema)
307
+ * - patterns.md → deploy-once (agent/extract may have modified)
308
+ * - context/*.json → always overwrite
309
+ */
310
+ function syncModulesToWorkspace(
311
+ sourceDir: string,
312
+ workspaceDir: string,
313
+ logger: OpenClawPluginApi["logger"],
314
+ ): number {
315
+ if (!existsSync(sourceDir)) return 0;
316
+ const targetDir = join(workspaceDir, "modules");
317
+ if (!existsSync(targetDir)) mkdirSync(targetDir, { recursive: true });
318
+
319
+ const ALWAYS_OVERWRITE = new Set(["manifest.json"]);
320
+ const DEPLOY_ONCE = new Set(["module-registry.json", "patterns.md", "guidance.md"]);
321
+ let synced = 0;
322
+
323
+ function syncRecursive(srcDir: string, dstDir: string): void {
324
+ if (!existsSync(dstDir)) mkdirSync(dstDir, { recursive: true });
325
+
326
+ for (const entry of readdirSync(srcDir)) {
327
+ const srcPath = join(srcDir, entry);
328
+ const dstPath = join(dstDir, entry);
329
+ const stat = statSync(srcPath);
330
+
331
+ if (stat.isDirectory()) {
332
+ syncRecursive(srcPath, dstPath);
333
+ continue;
334
+ }
335
+
336
+ if (!stat.isFile()) continue;
337
+
338
+ const fileName = entry;
339
+ const isAlwaysOverwrite = ALWAYS_OVERWRITE.has(fileName) || fileName.startsWith("context/");
340
+ const isDeployOnce = DEPLOY_ONCE.has(fileName);
341
+
342
+ // Deploy-once: skip if already in workspace
343
+ if (isDeployOnce && existsSync(dstPath)) continue;
344
+
345
+ // Default for unknown files: deploy-once
346
+ if (!isAlwaysOverwrite && !isDeployOnce && existsSync(dstPath)) continue;
347
+
348
+ const content = readFileSync(srcPath, "utf-8");
349
+ let existing = "";
350
+ try { existing = readFileSync(dstPath, "utf-8"); } catch {}
351
+ if (existing !== content) {
352
+ writeFileSync(dstPath, content, "utf-8");
353
+ synced++;
354
+ }
355
+ }
356
+ }
357
+
358
+ syncRecursive(sourceDir, targetDir);
359
+ if (synced > 0) logger.info(`sinain-hud: synced ${synced} module files to modules/`);
360
+ return synced;
361
+ }
362
+
363
+ /**
364
+ * Collect behavioral guidance from all active modules for prependContext injection.
365
+ *
366
+ * Reads module-registry.json, collects guidance.md from each active module
367
+ * (sorted by priority desc). Imported modules get a [transferred] label.
368
+ * Returns a formatted [MODULE GUIDANCE] block or empty string.
369
+ */
370
+ function collectModuleGuidance(
371
+ workspaceDir: string,
372
+ logger: OpenClawPluginApi["logger"],
373
+ ): string {
374
+ const registryPath = join(workspaceDir, "modules", "module-registry.json");
375
+ if (!existsSync(registryPath)) return "";
376
+
377
+ let registry: ModuleRegistry;
378
+ try {
379
+ registry = JSON.parse(readFileSync(registryPath, "utf-8")) as ModuleRegistry;
380
+ } catch {
381
+ return "";
382
+ }
383
+
384
+ // Active modules sorted by priority desc
385
+ const activeModules: Array<{ id: string; priority: number }> = [];
386
+ for (const [id, entry] of Object.entries(registry.modules)) {
387
+ if (entry.status === "active") {
388
+ activeModules.push({ id, priority: entry.priority });
389
+ }
390
+ }
391
+ activeModules.sort((a, b) => b.priority - a.priority);
392
+
393
+ const guidanceSections: string[] = [];
394
+ let moduleCount = 0;
395
+
396
+ for (const mod of activeModules) {
397
+ const guidancePath = join(workspaceDir, "modules", mod.id, "guidance.md");
398
+ if (!existsSync(guidancePath)) continue;
399
+
400
+ try {
401
+ const content = readFileSync(guidancePath, "utf-8").trim();
402
+ if (!content) continue;
403
+
404
+ // Check if module was imported (transferred)
405
+ let label = mod.id;
406
+ const manifestPath = join(workspaceDir, "modules", mod.id, "manifest.json");
407
+ if (existsSync(manifestPath)) {
408
+ try {
409
+ const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
410
+ if (manifest.importedAt) {
411
+ label = `${manifest.name || mod.id} [transferred]`;
412
+ }
413
+ } catch { /* skip */ }
414
+ }
415
+
416
+ guidanceSections.push(`### ${label}\n${content}`);
417
+ moduleCount++;
418
+ } catch {
419
+ // Skip unreadable guidance
420
+ }
421
+ }
422
+
423
+ if (guidanceSections.length === 0) return "";
424
+
425
+ logger.info(`sinain-hud: injecting guidance from ${moduleCount} module(s)`);
426
+ return `[MODULE GUIDANCE]\n${guidanceSections.join("\n\n")}`;
427
+ }
428
+
429
+ /**
430
+ * Generate the merged effective playbook from active modules + base playbook.
431
+ *
432
+ * Reads module-registry.json, collects patterns.md from each active module
433
+ * (sorted by priority desc), reads the base sinain-playbook.md, and writes
434
+ * the merged result to memory/sinain-playbook-effective.md.
435
+ */
436
+ function generateEffectivePlaybook(
437
+ workspaceDir: string,
438
+ logger: OpenClawPluginApi["logger"],
439
+ ): boolean {
440
+ const registryPath = join(workspaceDir, "modules", "module-registry.json");
441
+ if (!existsSync(registryPath)) {
442
+ logger.info("sinain-hud: no module-registry.json found, skipping effective playbook generation");
443
+ return false;
444
+ }
445
+
446
+ let registry: ModuleRegistry;
447
+ try {
448
+ registry = JSON.parse(readFileSync(registryPath, "utf-8")) as ModuleRegistry;
449
+ } catch (err) {
450
+ logger.warn(`sinain-hud: failed to parse module-registry.json: ${String(err)}`);
451
+ return false;
452
+ }
453
+
454
+ // Collect active modules sorted by priority desc
455
+ const activeModules: Array<{ id: string; priority: number }> = [];
456
+ for (const [id, entry] of Object.entries(registry.modules)) {
457
+ if (entry.status === "active") {
458
+ activeModules.push({ id, priority: entry.priority });
459
+ }
460
+ }
461
+ activeModules.sort((a, b) => b.priority - a.priority);
462
+
463
+ // Build module stack header
464
+ const stackLabel = activeModules.map((m) => `${m.id}(${m.priority})`).join(", ");
465
+
466
+ // Collect patterns from each active module
467
+ const sections: string[] = [];
468
+ sections.push(`<!-- module-stack: ${stackLabel} -->`);
469
+ sections.push("");
470
+
471
+ for (const mod of activeModules) {
472
+ const patternsPath = join(workspaceDir, "modules", mod.id, "patterns.md");
473
+ if (!existsSync(patternsPath)) continue;
474
+ try {
475
+ const patterns = readFileSync(patternsPath, "utf-8").trim();
476
+ if (patterns) {
477
+ sections.push(`<!-- module: ${mod.id} (priority ${mod.priority}) -->`);
478
+ // Attribution for transferred (imported) modules
479
+ const manifestPath = join(workspaceDir, "modules", mod.id, "manifest.json");
480
+ if (existsSync(manifestPath)) {
481
+ try {
482
+ const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
483
+ if (manifest.importedAt) {
484
+ sections.push(`> *[Transferred knowledge: ${manifest.name || mod.id}]*`);
485
+ }
486
+ } catch { /* skip if manifest unreadable */ }
487
+ }
488
+ sections.push(patterns);
489
+ sections.push("");
490
+ }
491
+ } catch {
492
+ // Skip unreadable patterns
493
+ }
494
+ }
495
+
496
+ // Append base playbook
497
+ const basePlaybookPath = join(workspaceDir, "memory", "sinain-playbook.md");
498
+ if (existsSync(basePlaybookPath)) {
499
+ try {
500
+ const base = readFileSync(basePlaybookPath, "utf-8").trim();
501
+ if (base) {
502
+ sections.push("<!-- base-playbook -->");
503
+ sections.push(base);
504
+ sections.push("");
505
+ }
506
+ } catch {
507
+ // Skip if unreadable
508
+ }
509
+ }
510
+
511
+ // Write effective playbook (always overwrite)
512
+ const effectivePath = join(workspaceDir, "memory", "sinain-playbook-effective.md");
513
+ const effectiveDir = dirname(effectivePath);
514
+ if (!existsSync(effectiveDir)) mkdirSync(effectiveDir, { recursive: true });
515
+
516
+ const content = sections.join("\n");
517
+ writeFileSync(effectivePath, content, "utf-8");
518
+ logger.info(`sinain-hud: generated effective playbook (${activeModules.length} active modules)`);
519
+ return true;
520
+ }
521
+
522
+ // ============================================================================
523
+ // Plugin Definition
524
+ // ============================================================================
525
+
526
+ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
527
+ const cfg = (api.pluginConfig ?? {}) as PluginConfig;
528
+ const sessionStates = new Map<string, SessionState>();
529
+ let curationInterval: ReturnType<typeof setInterval> | null = null;
530
+ let lastWorkspaceDir: string | null = null;
531
+ let consecutiveHeartbeatSkips = 0;
532
+ let lastEvalReportDate: string | null = null;
533
+
534
+ // Retry storm resilience state
535
+ const recentOutcomes: Array<{ ts: number; success: boolean; error?: string }> = [];
536
+ let lastSuccessTs = 0;
537
+ let lastPlaybookGenTs = 0;
538
+ let lastFileSyncTs = 0;
539
+ let outageDetected = false;
540
+ let consecutiveFailures = 0;
541
+ let outageStartTs = 0;
542
+ let consecutiveOverflowErrors = 0;
543
+
544
+ // Parent context cache for subagent injection
545
+ let parentContextCache: ParentContextCache | null = null;
546
+
547
+ // Health watchdog state
548
+ let watchdogInterval: ReturnType<typeof setInterval> | null = null;
549
+ let lastResetTs = 0;
550
+ let lastAutoRestartTs = 0;
551
+
552
+ function appendToContextCache(line: string): void {
553
+ if (!parentContextCache) return;
554
+ parentContextCache.contextText += "\n" + line;
555
+ parentContextCache.capturedAt = Date.now();
556
+ // Trim from front if over budget (keep most recent context)
557
+ if (parentContextCache.contextText.length > PARENT_CONTEXT_MAX_CHARS) {
558
+ const excess = parentContextCache.contextText.length - PARENT_CONTEXT_MAX_CHARS;
559
+ const newStart = parentContextCache.contextText.indexOf("\n", excess);
560
+ parentContextCache.contextText = newStart >= 0
561
+ ? parentContextCache.contextText.slice(newStart + 1)
562
+ : parentContextCache.contextText.slice(excess);
563
+ }
564
+ }
565
+
566
+ function computeErrorRate(): { rate: number; total: number; failures: number } {
567
+ const cutoff = Date.now() - ERROR_WINDOW_MS;
568
+ // Prune entries older than the window
569
+ while (recentOutcomes.length > 0 && recentOutcomes[0].ts < cutoff) {
570
+ recentOutcomes.shift();
571
+ }
572
+ const total = recentOutcomes.length;
573
+ if (total === 0) return { rate: 0, total: 0, failures: 0 };
574
+ const failures = recentOutcomes.filter((o) => !o.success).length;
575
+ return { rate: failures / total, total, failures };
576
+ }
577
+
578
+ function getSessionsJsonPath(): string | null {
579
+ if (!lastWorkspaceDir) return null;
580
+ const sessionsDir = join(dirname(lastWorkspaceDir), "agents", "main", "sessions");
581
+ const p = join(sessionsDir, "sessions.json");
582
+ return existsSync(p) ? p : null;
583
+ }
584
+
585
+ function performOverflowReset(): boolean {
586
+ const targetSessionKey = cfg.sessionKey;
587
+ if (!targetSessionKey || !lastWorkspaceDir) {
588
+ api.logger.warn("sinain-hud: overflow reset aborted — no sessionKey or workspace dir");
589
+ return false;
590
+ }
591
+
592
+ const sessionsJsonPath = getSessionsJsonPath();
593
+
594
+ if (!sessionsJsonPath) {
595
+ api.logger.warn(`sinain-hud: overflow reset aborted — sessions.json not found`);
596
+ return false;
597
+ }
598
+
599
+ let sessionsData: Record<string, Record<string, unknown>>;
600
+ try {
601
+ sessionsData = JSON.parse(readFileSync(sessionsJsonPath, "utf-8"));
602
+ } catch (err) {
603
+ api.logger.warn(`sinain-hud: overflow reset aborted — cannot parse sessions.json: ${err}`);
604
+ return false;
605
+ }
606
+
607
+ const session = sessionsData[targetSessionKey];
608
+ const transcriptPath = session?.sessionFile as string | undefined;
609
+ if (!transcriptPath || !existsSync(transcriptPath)) {
610
+ api.logger.warn(`sinain-hud: overflow reset aborted — transcript not found: ${transcriptPath}`);
611
+ return false;
612
+ }
613
+
614
+ // Guard: only reset if transcript is actually large
615
+ const size = statSync(transcriptPath).size;
616
+ if (size < OVERFLOW_TRANSCRIPT_MIN_BYTES) {
617
+ api.logger.info(
618
+ `sinain-hud: overflow reset skipped — transcript only ${Math.round(size / 1024)}KB (threshold: ${Math.round(OVERFLOW_TRANSCRIPT_MIN_BYTES / 1024)}KB)`,
619
+ );
620
+ return false;
621
+ }
622
+
623
+ // Archive → truncate → reset metadata
624
+ const archivePath = transcriptPath.replace(/\.jsonl$/, `.archived.${Date.now()}.jsonl`);
625
+ try {
626
+ copyFileSync(transcriptPath, archivePath);
627
+ } catch (err) {
628
+ api.logger.warn(`sinain-hud: overflow reset aborted — archive failed: ${err}`);
629
+ return false;
630
+ }
631
+
632
+ writeFileSync(transcriptPath, "", "utf-8");
633
+
634
+ try {
635
+ session.contextTokens = 0;
636
+ writeFileSync(sessionsJsonPath, JSON.stringify(sessionsData, null, 2), "utf-8");
637
+ } catch {
638
+ // Non-fatal — gateway recomputes tokens from transcript content
639
+ }
640
+
641
+ api.logger.info(
642
+ `sinain-hud: === OVERFLOW RESET === Transcript truncated (was ${Math.round(size / 1024)}KB). Archive: ${archivePath}`,
643
+ );
644
+ return true;
645
+ }
646
+
647
+ api.logger.info("sinain-hud: plugin registered");
648
+
649
+ // ==========================================================================
650
+ // RPC: situation.update — receive fresh SITUATION.md from sinain-core
651
+ // ==========================================================================
652
+
653
+ api.registerGatewayMethod("situation.update", ({ params, respond }: { params: Record<string, unknown>; respond: (ok: boolean, result: unknown, error?: unknown) => void }) => {
654
+ const content = params.content;
655
+ if (typeof content !== "string" || !content) {
656
+ respond(false, null, { code: "invalid_params", message: "content must be a non-empty string" });
657
+ return;
658
+ }
659
+ if (!lastWorkspaceDir) {
660
+ respond(false, null, { code: "not_ready", message: "workspace not initialized" });
661
+ return;
662
+ }
663
+ const situationPath = join(lastWorkspaceDir, "SITUATION.md");
664
+ const tmpPath = situationPath + ".rpc.tmp";
665
+ try {
666
+ writeFileSync(tmpPath, content, "utf-8");
667
+ renameSync(tmpPath, situationPath);
668
+ respond(true, { ok: true, bytes: content.length });
669
+ api.logger.info(`sinain-hud: SITUATION.md updated via RPC (${content.length} chars)`);
670
+ } catch (err: any) {
671
+ respond(false, null, { code: "write_error", message: err.message });
672
+ }
673
+ });
674
+
675
+ // ==========================================================================
676
+ // Hook: session_start — initialize per-session tracking
677
+ // ==========================================================================
678
+
679
+ api.on("session_start", async (_event, ctx) => {
680
+ const key = ctx.sessionId;
681
+ sessionStates.set(key, {
682
+ startedAt: Date.now(),
683
+ toolUsage: [],
684
+ });
685
+ api.logger.info?.(`sinain-hud: session started (${key})`);
686
+ });
687
+
688
+ // ==========================================================================
689
+ // Hook: before_agent_start — auto-deploy HEARTBEAT.md + SKILL.md
690
+ // ==========================================================================
691
+
692
+ api.on("before_agent_start", async (event, ctx) => {
693
+ const workspaceDir = ctx.workspaceDir;
694
+ if (!workspaceDir) return;
695
+
696
+ // Track workspace dir in session state and for curation timer
697
+ lastWorkspaceDir = workspaceDir;
698
+ const sessionKey = ctx.sessionKey;
699
+ if (sessionKey) {
700
+ const state = sessionStates.get(sessionKey);
701
+ if (state) {
702
+ state.workspaceDir = workspaceDir;
703
+ }
704
+ }
705
+
706
+ const now = Date.now();
707
+
708
+ // ── Debounced file sync (skip if done <3 min ago) ───────────────────
709
+ const fileSyncDue = lastFileSyncTs === 0 || (now - lastFileSyncTs) >= FILE_SYNC_DEBOUNCE_MS;
710
+ if (fileSyncDue) {
711
+ const heartbeatSource = cfg.heartbeatPath
712
+ ? api.resolvePath(cfg.heartbeatPath)
713
+ : undefined;
714
+ const skillSource = cfg.skillPath
715
+ ? api.resolvePath(cfg.skillPath)
716
+ : undefined;
717
+
718
+ syncFileToWorkspace(heartbeatSource, workspaceDir, "HEARTBEAT.md", api.logger);
719
+ syncFileToWorkspace(skillSource, workspaceDir, "SKILL.md", api.logger);
720
+
721
+ const memorySource = cfg.memoryPath ? api.resolvePath(cfg.memoryPath) : undefined;
722
+ if (memorySource) {
723
+ syncDirToWorkspace(memorySource, workspaceDir, "sinain-memory", api.logger);
724
+ const gbPath = join(workspaceDir, "sinain-memory", "git_backup.sh");
725
+ if (existsSync(gbPath)) try { chmodSync(gbPath, 0o755); } catch {}
726
+ }
727
+
728
+ const modulesSource = cfg.modulesPath ? api.resolvePath(cfg.modulesPath) : undefined;
729
+ if (modulesSource && existsSync(modulesSource)) {
730
+ syncModulesToWorkspace(modulesSource, workspaceDir, api.logger);
731
+ }
732
+
733
+ lastFileSyncTs = now;
734
+ }
735
+
736
+ // ── Debounced playbook generation (skip if done <5 min ago) ─────────
737
+ const playbookGenDue = lastPlaybookGenTs === 0 || (now - lastPlaybookGenTs) >= PLAYBOOK_GEN_DEBOUNCE_MS;
738
+ if (playbookGenDue) {
739
+ const modulesSource = cfg.modulesPath ? api.resolvePath(cfg.modulesPath) : undefined;
740
+ if (modulesSource && existsSync(modulesSource)) {
741
+ generateEffectivePlaybook(workspaceDir, api.logger);
742
+ lastPlaybookGenTs = now;
743
+ }
744
+ }
745
+
746
+ // ── Fire-and-forget: ingest active module patterns into triple store
747
+ try {
748
+ const regPath = join(workspaceDir, "modules", "module-registry.json");
749
+ if (existsSync(regPath)) {
750
+ const reg = JSON.parse(readFileSync(regPath, "utf-8"));
751
+ for (const [id, entry] of Object.entries(reg.modules || {})) {
752
+ if ((entry as Record<string, unknown>).status === "active") {
753
+ api.runtime.system.runCommandWithTimeout(
754
+ ["uv", "run", "--with", "requests", "python3",
755
+ "sinain-memory/triple_ingest.py",
756
+ "--memory-dir", "memory/",
757
+ "--ingest-module", id,
758
+ "--modules-dir", "modules/"],
759
+ { timeoutMs: 15_000, cwd: workspaceDir },
760
+ ).catch(() => {});
761
+ }
762
+ }
763
+ }
764
+ } catch {}
765
+
766
+ // ── Memory dirs — always run (cheap, idempotent) ────────────────────
767
+ for (const dir of ["memory", "memory/playbook-archive", "memory/playbook-logs",
768
+ "memory/eval-logs", "memory/eval-reports"]) {
769
+ const fullPath = join(workspaceDir, dir);
770
+ if (!existsSync(fullPath)) {
771
+ mkdirSync(fullPath, { recursive: true });
772
+ }
773
+ // Ensure directory is writable even if created by another process (e.g. root)
774
+ try { chmodSync(fullPath, 0o755); } catch {}
775
+ }
776
+
777
+ // ── Context capture + subagent injection ────────────────────────────
778
+ const isSubagent = sessionKey ? isSubagentSession(sessionKey) : false;
779
+
780
+ if (!isSubagent) {
781
+ // Main session: capture recent conversation context for future subagents
782
+ const messages = (event as Record<string, unknown>).messages as unknown[] | undefined;
783
+ const prompt = (event as Record<string, unknown>).prompt as string | undefined;
784
+ if (messages && Array.isArray(messages) && messages.length > 0) {
785
+ const contextText = extractRecentContext(messages, prompt ?? "", PARENT_CONTEXT_MAX_CHARS);
786
+ if (contextText) {
787
+ parentContextCache = {
788
+ sessionKey: sessionKey ?? "unknown",
789
+ capturedAt: now,
790
+ contextText,
791
+ };
792
+ api.logger.info(
793
+ `sinain-hud: captured parent context (${contextText.length} chars, ${messages.length} messages)`,
794
+ );
795
+ }
796
+ }
797
+ }
798
+
799
+ // ── Accumulate context parts (time + outage recovery + subagent injection)
800
+ const contextParts: string[] = [];
801
+
802
+ // Time awareness — always inject current local time
803
+ const userTz = cfg.userTimezone ?? "Europe/Berlin";
804
+ const nowLocal = new Date().toLocaleString("en-GB", {
805
+ timeZone: userTz,
806
+ weekday: "long",
807
+ year: "numeric",
808
+ month: "long",
809
+ day: "numeric",
810
+ hour: "2-digit",
811
+ minute: "2-digit",
812
+ hour12: false,
813
+ });
814
+ contextParts.push(`[CURRENT TIME] ${nowLocal} (${userTz})`);
815
+
816
+ // Recovery context injection after outage
817
+ if (outageStartTs > 0 && !outageDetected && lastSuccessTs > outageStartTs) {
818
+ const outageDurationMin = Math.round((lastSuccessTs - outageStartTs) / 60_000);
819
+ outageStartTs = 0; // one-shot: only inject once
820
+ api.logger.info(`sinain-hud: injecting recovery context (outage lasted ~${outageDurationMin}min)`);
821
+ contextParts.push(
822
+ `[SYSTEM] The upstream API was unavailable for ~${outageDurationMin} minutes. ` +
823
+ `Multiple queued messages may have accumulated. Prioritize the current task, skip catch-up on stale items, and keep responses concise.`,
824
+ );
825
+ }
826
+
827
+ // Subagent: inject cached parent context
828
+ if (isSubagent && parentContextCache) {
829
+ const cacheAgeMs = now - parentContextCache.capturedAt;
830
+ if (cacheAgeMs < PARENT_CONTEXT_TTL_MS) {
831
+ const cacheAgeSec = Math.round(cacheAgeMs / 1000);
832
+ api.logger.info(
833
+ `sinain-hud: injected parent context for subagent (${parentContextCache.contextText.length} chars, ${cacheAgeSec}s old)`,
834
+ );
835
+ contextParts.push(
836
+ `[PARENT SESSION CONTEXT] The following is a summary of the recent conversation from the parent session that spawned you. Use it to understand references to code, files, or decisions discussed earlier:\n\n${parentContextCache.contextText}`,
837
+ );
838
+ } else {
839
+ api.logger.info(
840
+ `sinain-hud: skipped stale parent context for subagent (${Math.round(cacheAgeMs / 1000)}s old, TTL=${PARENT_CONTEXT_TTL_MS / 1000}s)`,
841
+ );
842
+ }
843
+ }
844
+
845
+ // Heartbeat enforcement (replaces fork's system-prompt.ts logic)
846
+ if (cfg.heartbeatPath) {
847
+ const hbTarget = join(workspaceDir, "HEARTBEAT.md");
848
+ if (existsSync(hbTarget)) {
849
+ contextParts.push(
850
+ "[HEARTBEAT PROTOCOL] HEARTBEAT.md is loaded in your project context. " +
851
+ "On every heartbeat poll, you MUST execute the full protocol defined in " +
852
+ "HEARTBEAT.md — all phases, all steps, in order. " +
853
+ "Only reply HEARTBEAT_OK if HEARTBEAT.md explicitly permits it " +
854
+ "after you have completed all mandatory steps."
855
+ );
856
+ }
857
+ }
858
+
859
+ // SITUATION.md bootstrap (replaces fork's workspace.ts logic)
860
+ const situationPath = join(workspaceDir, "SITUATION.md");
861
+ if (existsSync(situationPath)) {
862
+ try {
863
+ const content = readFileSync(situationPath, "utf-8").trim();
864
+ if (content) contextParts.push(`[SITUATION]\n${content}`);
865
+ } catch {}
866
+ }
867
+
868
+ // Knowledge transfer attribution — if effective playbook contains imported modules
869
+ const effectivePlaybookPath = join(workspaceDir, "memory", "sinain-playbook-effective.md");
870
+ if (existsSync(effectivePlaybookPath)) {
871
+ try {
872
+ const effectiveContent = readFileSync(effectivePlaybookPath, "utf-8");
873
+ if (effectiveContent.includes("[Transferred knowledge:")) {
874
+ contextParts.push(
875
+ "[KNOWLEDGE TRANSFER] Some patterns in your playbook were transferred from " +
876
+ "another sinain instance. When surfacing these, briefly cite their origin."
877
+ );
878
+ }
879
+ } catch { /* skip if unreadable */ }
880
+ }
881
+
882
+ // Module guidance injection — behavioral instructions from active modules
883
+ const moduleGuidance = collectModuleGuidance(workspaceDir, api.logger);
884
+ if (moduleGuidance) contextParts.push(moduleGuidance);
885
+
886
+ // Synchronous: knowledge graph context (10s timeout, skipped on failure)
887
+ try {
888
+ const ragResult = await api.runtime.system.runCommandWithTimeout(
889
+ ["uv", "run", "--with", "requests", "python3",
890
+ "sinain-memory/triple_query.py",
891
+ "--memory-dir", join(workspaceDir, "memory"),
892
+ "--context", "current session",
893
+ "--max-chars", "1500"],
894
+ { timeoutMs: 10_000, cwd: workspaceDir },
895
+ );
896
+ if (ragResult.code === 0) {
897
+ const parsed = JSON.parse(ragResult.stdout.trim());
898
+ if (parsed.context && parsed.context.length > 50) {
899
+ contextParts.push(`[KNOWLEDGE GRAPH CONTEXT]\n${parsed.context}`);
900
+ }
901
+ }
902
+ } catch {}
903
+
904
+ if (contextParts.length > 0) {
905
+ return { prependContext: contextParts.join("\n\n") };
906
+ }
907
+ });
908
+
909
+ // ==========================================================================
910
+ // Hook: tool_result_persist — track tool usage + strip privacy tags
911
+ // IMPORTANT: This hook MUST be synchronous (no async/await)
912
+ // ==========================================================================
913
+
914
+ api.on("tool_result_persist", (event, ctx) => {
915
+ // Track tool usage for session summary
916
+ const sessionKey = ctx.sessionKey;
917
+ if (sessionKey) {
918
+ const state = sessionStates.get(sessionKey);
919
+ if (state) {
920
+ state.toolUsage.push({
921
+ toolName: ctx.toolName ?? "unknown",
922
+ ts: Date.now(),
923
+ });
924
+
925
+ // Track heartbeat tool calls for compliance validation
926
+ if (ctx.toolName === "sinain_heartbeat_tick") {
927
+ state.heartbeatToolCalled = true;
928
+ }
929
+ }
930
+ }
931
+
932
+ // Strip <private> tags from tool result content before persistence
933
+ const msg = event.message;
934
+ if (msg && typeof msg === "object" && "content" in msg) {
935
+ const content = (msg as Record<string, unknown>).content;
936
+
937
+ if (typeof content === "string" && content.includes("<private>")) {
938
+ return {
939
+ message: { ...msg, content: stripPrivateTags(content) } as typeof msg,
940
+ };
941
+ }
942
+
943
+ if (Array.isArray(content)) {
944
+ let modified = false;
945
+ const newContent = content.map((block) => {
946
+ if (
947
+ block &&
948
+ typeof block === "object" &&
949
+ "type" in block &&
950
+ (block as Record<string, unknown>).type === "text" &&
951
+ "text" in block
952
+ ) {
953
+ const text = (block as Record<string, unknown>).text;
954
+ if (typeof text === "string" && text.includes("<private>")) {
955
+ modified = true;
956
+ return { ...block, text: stripPrivateTags(text) };
957
+ }
958
+ }
959
+ return block;
960
+ });
961
+
962
+ if (modified) {
963
+ return {
964
+ message: { ...msg, content: newContent } as typeof msg,
965
+ };
966
+ }
967
+ }
968
+ }
969
+ });
970
+
971
+ // ==========================================================================
972
+ // Hook: agent_end — generate structured session summary
973
+ // ==========================================================================
974
+
975
+ api.on("agent_end", async (event, ctx) => {
976
+ const sessionKey = ctx.sessionKey;
977
+ if (!sessionKey) return;
978
+
979
+ const state = sessionStates.get(sessionKey);
980
+ if (!state) return;
981
+
982
+ const durationMs = event.durationMs ?? (Date.now() - state.startedAt);
983
+ const toolCount = state.toolUsage.length;
984
+ const isSuccess = event.success === true;
985
+ const isShortFailure = !isSuccess && durationMs < SHORT_FAILURE_THRESHOLD_MS;
986
+
987
+ // ── Retry storm: track outcome ──────────────────────────────────────
988
+ recentOutcomes.push({
989
+ ts: Date.now(),
990
+ success: isSuccess,
991
+ error: isSuccess ? undefined : String(event.error ?? "unknown"),
992
+ });
993
+
994
+ if (isSuccess) {
995
+ const wasOutage = outageDetected;
996
+ const outageDurationMs = outageStartTs > 0 ? Date.now() - outageStartTs : 0;
997
+ consecutiveFailures = 0;
998
+ outageDetected = false;
999
+ lastSuccessTs = Date.now();
1000
+ if (wasOutage) {
1001
+ api.logger.info(
1002
+ `sinain-hud: OUTAGE RECOVERED — resumed after ${Math.round(outageDurationMs / 1000)}s`,
1003
+ );
1004
+ // outageStartTs is NOT reset here — before_agent_start uses it to
1005
+ // inject recovery context on the next run, then resets it itself.
1006
+
1007
+ // Send recovery alert via Telegram
1008
+ const sd = getStateDir();
1009
+ if (sd) {
1010
+ sendTelegramAlert("recovery", "✅ *sinain-hud* recovered",
1011
+ `• Gateway up, first run succeeded\n• Downtime: ~${Math.round(outageDurationMs / 60_000)}min`,
1012
+ sd);
1013
+ }
1014
+ }
1015
+ } else if (isShortFailure) {
1016
+ consecutiveFailures++;
1017
+ const { rate, total } = computeErrorRate();
1018
+ if (!outageDetected && total >= OUTAGE_MIN_SAMPLES && rate >= OUTAGE_ERROR_RATE_THRESHOLD) {
1019
+ outageDetected = true;
1020
+ outageStartTs = Date.now();
1021
+ api.logger.warn(
1022
+ `sinain-hud: OUTAGE DETECTED — ${Math.round(rate * 100)}% error rate over ${total} samples, ${consecutiveFailures} consecutive failures`,
1023
+ );
1024
+ const sd = getStateDir();
1025
+ if (sd) {
1026
+ sendTelegramAlert("outage", "🔴 *sinain-hud* OUTAGE DETECTED",
1027
+ `• ${Math.round(rate * 100)}% error rate over ${total} samples\n• ${consecutiveFailures} consecutive failures`,
1028
+ sd);
1029
+ }
1030
+ }
1031
+ }
1032
+
1033
+ // ── Context overflow watchdog ──────────────────────────────────────
1034
+ if (sessionKey === cfg.sessionKey) {
1035
+ if (!isSuccess && OVERFLOW_ERROR_PATTERN.test(String(event.error ?? ""))) {
1036
+ consecutiveOverflowErrors++;
1037
+ api.logger.warn(
1038
+ `sinain-hud: overflow watchdog — error #${consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`,
1039
+ );
1040
+ if (consecutiveOverflowErrors >= OVERFLOW_CONSECUTIVE_THRESHOLD) {
1041
+ api.logger.warn("sinain-hud: OVERFLOW THRESHOLD REACHED — attempting transcript reset");
1042
+ if (performOverflowReset()) {
1043
+ lastResetTs = Date.now();
1044
+ consecutiveOverflowErrors = 0;
1045
+ outageDetected = false;
1046
+ consecutiveFailures = 0;
1047
+ outageStartTs = 0;
1048
+ const sd = getStateDir();
1049
+ if (sd) {
1050
+ sendTelegramAlert("overflow_reset", "⚠️ *sinain-hud* overflow reset triggered",
1051
+ `• ${OVERFLOW_CONSECUTIVE_THRESHOLD} consecutive overflow errors\n• Transcript truncated`,
1052
+ sd);
1053
+ }
1054
+ }
1055
+ }
1056
+ } else if (isSuccess) {
1057
+ consecutiveOverflowErrors = 0;
1058
+ }
1059
+
1060
+ // Duration-gated overflow reset: long failure + overflow error pattern = stuck retry loop.
1061
+ // The core misclassifies "extra usage is required" as rate_limit → infinite retry.
1062
+ // After the run times out (>3min), we detect it and reset for the next cycle.
1063
+ const isLongFailure = !isSuccess && durationMs > LONG_FAILURE_THRESHOLD_MS;
1064
+ if (isLongFailure && OVERFLOW_ERROR_PATTERN.test(String(event.error ?? ""))) {
1065
+ api.logger.warn(
1066
+ `sinain-hud: long failure (${Math.round(durationMs / 1000)}s) with overflow error — immediate reset`,
1067
+ );
1068
+ if (performOverflowReset()) {
1069
+ lastResetTs = Date.now();
1070
+ consecutiveOverflowErrors = 0;
1071
+ outageDetected = false;
1072
+ consecutiveFailures = 0;
1073
+ outageStartTs = 0;
1074
+ const sd = getStateDir();
1075
+ if (sd) {
1076
+ sendTelegramAlert("overflow_reset", "⚠️ *sinain-hud* overflow reset (stuck retry)",
1077
+ `• ${Math.round(durationMs / 1000)}s failed run with overflow error\n• Transcript truncated, next heartbeat should recover`,
1078
+ sd);
1079
+ }
1080
+ }
1081
+ }
1082
+ }
1083
+
1084
+ // ── Count tool usage by name ────────────────────────────────────────
1085
+ const toolCounts: Record<string, number> = {};
1086
+ for (const usage of state.toolUsage) {
1087
+ toolCounts[usage.toolName] = (toolCounts[usage.toolName] ?? 0) + 1;
1088
+ }
1089
+
1090
+ // ── Write session summary (skip during outage — noise reduction) ───
1091
+ const skipSummary = outageDetected && isShortFailure;
1092
+ if (state.workspaceDir && !skipSummary) {
1093
+ const summaryPath = join(
1094
+ state.workspaceDir,
1095
+ "memory",
1096
+ "session-summaries.jsonl",
1097
+ );
1098
+
1099
+ const summary = {
1100
+ ts: new Date().toISOString(),
1101
+ sessionKey,
1102
+ agentId: ctx.agentId,
1103
+ durationMs,
1104
+ success: event.success,
1105
+ error: event.error,
1106
+ toolCallCount: toolCount,
1107
+ toolBreakdown: toolCounts,
1108
+ messageCount: event.messages?.length ?? 0,
1109
+ };
1110
+
1111
+ try {
1112
+ const dir = dirname(summaryPath);
1113
+ if (!existsSync(dir)) {
1114
+ mkdirSync(dir, { recursive: true });
1115
+ }
1116
+ writeFileSync(summaryPath, JSON.stringify(summary) + "\n", {
1117
+ flag: "a",
1118
+ });
1119
+ api.logger.info(
1120
+ `sinain-hud: session summary written (${toolCount} tools, ${Math.round(durationMs / 1000)}s)`,
1121
+ );
1122
+
1123
+ // Fire-and-forget: ingest session summary into triple store
1124
+ if (state.workspaceDir) {
1125
+ api.runtime.system.runCommandWithTimeout(
1126
+ ["uv", "run", "--with", "requests", "python3",
1127
+ "sinain-memory/triple_ingest.py",
1128
+ "--memory-dir", "memory/",
1129
+ "--ingest-session", JSON.stringify(summary),
1130
+ "--embed"],
1131
+ { timeoutMs: 15_000, cwd: state.workspaceDir },
1132
+ ).catch(() => {});
1133
+ }
1134
+ } catch (err) {
1135
+ api.logger.warn(
1136
+ `sinain-hud: failed to write session summary: ${String(err)}`,
1137
+ );
1138
+ }
1139
+ }
1140
+
1141
+ // ── Heartbeat compliance (exempt during outage) ─────────────────────
1142
+ if ((ctx as Record<string, unknown>).messageProvider === "heartbeat") {
1143
+ if (outageDetected && isShortFailure) {
1144
+ // Agent couldn't even process the prompt — don't count as a skip
1145
+ api.logger.info(
1146
+ `sinain-hud: heartbeat compliance exempted (outage active, ${Math.round(durationMs / 1000)}s run)`,
1147
+ );
1148
+ } else if (!state.heartbeatToolCalled) {
1149
+ consecutiveHeartbeatSkips++;
1150
+ api.logger.warn(
1151
+ `sinain-hud: heartbeat compliance violation — tool not called (consecutive: ${consecutiveHeartbeatSkips})`,
1152
+ );
1153
+ if (consecutiveHeartbeatSkips >= 3) {
1154
+ api.logger.warn(
1155
+ `sinain-hud: ESCALATION — ${consecutiveHeartbeatSkips} consecutive heartbeat skips`,
1156
+ );
1157
+ }
1158
+ } else {
1159
+ consecutiveHeartbeatSkips = 0;
1160
+ }
1161
+ }
1162
+
1163
+ // Cleanup session state
1164
+ sessionStates.delete(sessionKey);
1165
+ });
1166
+
1167
+ // ==========================================================================
1168
+ // Hook: session_end — cleanup any orphaned state
1169
+ // ==========================================================================
1170
+
1171
+ api.on("session_end", async (_event, ctx) => {
1172
+ sessionStates.delete(ctx.sessionId);
1173
+ });
1174
+
1175
+ // ==========================================================================
1176
+ // Hook: llm_output — continuously refresh parent context cache
1177
+ // ==========================================================================
1178
+
1179
+ api.on("llm_output", async (event, ctx) => {
1180
+ const sessionKey = ctx.sessionKey;
1181
+ if (!sessionKey || isSubagentSession(sessionKey)) return;
1182
+ if (!parentContextCache) return;
1183
+
1184
+ const latest = ((event as Record<string, unknown>).assistantTexts as string[] | undefined)?.at(-1);
1185
+ if (!latest) return;
1186
+ appendToContextCache(`[assistant]: ${latest.slice(0, 500)}`);
1187
+ });
1188
+
1189
+ // ==========================================================================
1190
+ // Hook: llm_input — capture user turns mid-session
1191
+ // ==========================================================================
1192
+
1193
+ api.on("llm_input", async (event, ctx) => {
1194
+ const sessionKey = ctx.sessionKey;
1195
+ if (!sessionKey || isSubagentSession(sessionKey)) return;
1196
+ if (!parentContextCache) return;
1197
+
1198
+ const prompt = (event as Record<string, unknown>).prompt as string | undefined;
1199
+ if (!prompt) return;
1200
+ appendToContextCache(`[user]: ${prompt.slice(0, 500)}`);
1201
+ });
1202
+
1203
+ // ==========================================================================
1204
+ // Hook: subagent_spawning — diagnostic logging
1205
+ // ==========================================================================
1206
+
1207
+ api.on("subagent_spawning", async (event, ctx) => {
1208
+ const cacheAge = parentContextCache
1209
+ ? `${Math.round((Date.now() - parentContextCache.capturedAt) / 1000)}s`
1210
+ : "none";
1211
+ const childKey = (event as Record<string, unknown>).childSessionKey ?? "?";
1212
+ const parentKey = (ctx as Record<string, unknown>).requesterSessionKey ?? "?";
1213
+ api.logger.info(
1214
+ `sinain-hud: subagent spawning (child=${childKey}, parent=${parentKey}, contextCache=${cacheAge})`,
1215
+ );
1216
+ });
1217
+
1218
+ // ==========================================================================
1219
+ // Hook: gateway_start — reset all tracking on gateway restart
1220
+ // ==========================================================================
1221
+
1222
+ api.on("gateway_start", async () => {
1223
+ sessionStates.clear();
1224
+ // Reset all resilience state — clean slate on restart
1225
+ recentOutcomes.length = 0;
1226
+ lastSuccessTs = 0;
1227
+ lastPlaybookGenTs = 0;
1228
+ lastFileSyncTs = 0;
1229
+ outageDetected = false;
1230
+ consecutiveFailures = 0;
1231
+ outageStartTs = 0;
1232
+ consecutiveHeartbeatSkips = 0;
1233
+ consecutiveOverflowErrors = 0;
1234
+ parentContextCache = null;
1235
+ // Reset watchdog alert state
1236
+ lastResetTs = 0;
1237
+ _alertCooldowns.clear();
1238
+ _cachedBotToken = undefined; // re-read on next alert
1239
+ _alertMissingConfigLogged = false;
1240
+ api.logger.info("sinain-hud: gateway started, session + resilience + watchdog tracking reset");
1241
+ });
1242
+
1243
+ // ==========================================================================
1244
+ // Command: /sinain-status — show plugin status
1245
+ // ==========================================================================
1246
+
1247
+ api.registerCommand({
1248
+ name: "sinain_status",
1249
+ description: "Show sinain-hud plugin status and active sessions",
1250
+ handler: () => {
1251
+ const lines: string[] = ["sinain-hud plugin active"];
1252
+
1253
+ // Persistent session info from disk
1254
+ const sessionsJsonPath = getSessionsJsonPath();
1255
+ if (sessionsJsonPath) {
1256
+ try {
1257
+ const sessionsData = JSON.parse(readFileSync(sessionsJsonPath, "utf-8"));
1258
+ const keysToShow = [cfg.sessionKey, "agent:main:main"].filter(Boolean);
1259
+ lines.push("\nSessions:");
1260
+ for (const key of keysToShow) {
1261
+ const s = sessionsData[key as string];
1262
+ if (!s) continue;
1263
+ const updatedAgo = s.updatedAt ? `${Math.round((Date.now() - s.updatedAt) / 1000)}s ago` : "?";
1264
+ const tokens = s.contextTokens ?? "?";
1265
+ const compactions = s.compactionCount ?? 0;
1266
+ let transcriptSize = "?";
1267
+ if (s.sessionFile && existsSync(s.sessionFile)) {
1268
+ transcriptSize = `${Math.round(statSync(s.sessionFile).size / 1024)}KB`;
1269
+ }
1270
+ lines.push(`- ${key}: updated ${updatedAgo}, ${tokens} tokens, ${compactions} compactions, transcript ${transcriptSize}`);
1271
+ }
1272
+ } catch {
1273
+ lines.push("No session data available.");
1274
+ }
1275
+ } else {
1276
+ lines.push("No session data available (workspace not set).");
1277
+ }
1278
+
1279
+ // Resilience info
1280
+ const { rate, total, failures } = computeErrorRate();
1281
+ lines.push("\n**Resilience**");
1282
+ lines.push(`- Outage: ${outageDetected ? `ACTIVE (${Math.round((Date.now() - outageStartTs) / 1000)}s, ${consecutiveFailures} consecutive failures)` : "clear"}`);
1283
+ lines.push(`- Error rate: ${Math.round(rate * 100)}% (${failures}/${total} in ${ERROR_WINDOW_MS / 60_000}min window)`);
1284
+ lines.push(`- Last success: ${lastSuccessTs > 0 ? `${Math.round((Date.now() - lastSuccessTs) / 1000)}s ago` : "never"}`);
1285
+ lines.push(`- Heartbeat skips: ${consecutiveHeartbeatSkips}`);
1286
+ lines.push(`- Overflow watchdog: ${consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`);
1287
+ lines.push(`- Parent context cache: ${parentContextCache ? `${parentContextCache.contextText.length} chars, ${Math.round((Date.now() - parentContextCache.capturedAt) / 1000)}s old` : "empty"}`);
1288
+
1289
+ return { text: lines.join("\n") };
1290
+ },
1291
+ });
1292
+
1293
+ // ==========================================================================
1294
+ // Command: /sinain_modules — show active module stack
1295
+ // ==========================================================================
1296
+
1297
+ api.registerCommand({
1298
+ name: "sinain_modules",
1299
+ description: "Show active knowledge module stack and suspended modules",
1300
+ handler: () => {
1301
+ // Find workspace dir from active sessions
1302
+ let workspaceDir: string | undefined;
1303
+ for (const state of sessionStates.values()) {
1304
+ if (state.workspaceDir) { workspaceDir = state.workspaceDir; break; }
1305
+ }
1306
+ if (!workspaceDir) {
1307
+ return { text: "No workspace directory available (no active session)." };
1308
+ }
1309
+
1310
+ const registryPath = join(workspaceDir, "modules", "module-registry.json");
1311
+ if (!existsSync(registryPath)) {
1312
+ return { text: "Module system not initialized (no module-registry.json found)." };
1313
+ }
1314
+
1315
+ let registry: ModuleRegistry;
1316
+ try {
1317
+ registry = JSON.parse(readFileSync(registryPath, "utf-8")) as ModuleRegistry;
1318
+ } catch {
1319
+ return { text: "Failed to parse module-registry.json." };
1320
+ }
1321
+
1322
+ const active: Array<{ id: string; priority: number; locked: boolean }> = [];
1323
+ const suspended: string[] = [];
1324
+ const disabled: string[] = [];
1325
+
1326
+ for (const [id, entry] of Object.entries(registry.modules)) {
1327
+ if (entry.status === "active") {
1328
+ active.push({ id, priority: entry.priority, locked: entry.locked });
1329
+ } else if (entry.status === "suspended") {
1330
+ suspended.push(id);
1331
+ } else if (entry.status === "disabled") {
1332
+ disabled.push(id);
1333
+ }
1334
+ }
1335
+
1336
+ active.sort((a, b) => b.priority - a.priority);
1337
+
1338
+ const lines: string[] = ["**Knowledge Module Stack**\n"];
1339
+
1340
+ if (active.length > 0) {
1341
+ lines.push("Active (highest priority first):");
1342
+ for (const m of active) {
1343
+ const lock = m.locked ? " [locked]" : "";
1344
+ lines.push(` ${m.priority} — ${m.id}${lock}`);
1345
+ }
1346
+ } else {
1347
+ lines.push("No active modules.");
1348
+ }
1349
+
1350
+ if (suspended.length > 0) {
1351
+ lines.push(`\nSuspended: ${suspended.join(", ")}`);
1352
+ }
1353
+ if (disabled.length > 0) {
1354
+ lines.push(`\nDisabled: ${disabled.join(", ")}`);
1355
+ }
1356
+
1357
+ return { text: lines.join("\n") };
1358
+ },
1359
+ });
1360
+
1361
+ // ==========================================================================
1362
+ // Command: /sinain_eval — show latest evaluation report + metrics
1363
+ // ==========================================================================
1364
+
1365
+ api.registerCommand({
1366
+ name: "sinain_eval",
1367
+ description: "Show latest evaluation report and current eval metrics",
1368
+ handler: () => {
1369
+ let workspaceDir: string | undefined;
1370
+ for (const state of sessionStates.values()) {
1371
+ if (state.workspaceDir) { workspaceDir = state.workspaceDir; break; }
1372
+ }
1373
+ if (!workspaceDir) {
1374
+ return { text: "No workspace directory available (no active session)." };
1375
+ }
1376
+
1377
+ const reportsDir = join(workspaceDir, "memory", "eval-reports");
1378
+ const logsDir = join(workspaceDir, "memory", "eval-logs");
1379
+ const lines: string[] = ["**Evaluation Report**\n"];
1380
+
1381
+ // Find latest report
1382
+ let latestReport = "";
1383
+ if (existsSync(reportsDir)) {
1384
+ const reports = readdirSync(reportsDir)
1385
+ .filter((f: string) => f.endsWith(".md"))
1386
+ .sort()
1387
+ .reverse();
1388
+ if (reports.length > 0) {
1389
+ try {
1390
+ latestReport = readFileSync(join(reportsDir, reports[0]), "utf-8");
1391
+ lines.push(latestReport.trim());
1392
+ } catch {
1393
+ lines.push("Failed to read latest report.");
1394
+ }
1395
+ }
1396
+ }
1397
+
1398
+ if (!latestReport) {
1399
+ lines.push("No eval reports generated yet.\n");
1400
+ }
1401
+
1402
+ // Show latest eval-log entries
1403
+ if (existsSync(logsDir)) {
1404
+ const logFiles = readdirSync(logsDir)
1405
+ .filter((f: string) => f.endsWith(".jsonl"))
1406
+ .sort()
1407
+ .reverse();
1408
+ if (logFiles.length > 0) {
1409
+ try {
1410
+ const content = readFileSync(join(logsDir, logFiles[0]), "utf-8");
1411
+ const entries = content.trim().split("\n").slice(-5);
1412
+ lines.push("\n**Recent Tick Evaluations** (last 5):");
1413
+ for (const line of entries) {
1414
+ try {
1415
+ const e = JSON.parse(line) as Record<string, unknown>;
1416
+ const judges = e.judges ? ` judgeAvg=${e.judgeAvg ?? "?"}` : "";
1417
+ lines.push(` ${e.tickTs} — passRate=${e.passRate}${judges}`);
1418
+ } catch {
1419
+ // skip malformed line
1420
+ }
1421
+ }
1422
+ } catch {
1423
+ // skip if unreadable
1424
+ }
1425
+ }
1426
+ }
1427
+
1428
+ return { text: lines.join("\n") };
1429
+ },
1430
+ });
1431
+
1432
+ // ==========================================================================
1433
+ // Command: /sinain_eval_level — change evaluation level at runtime
1434
+ // ==========================================================================
1435
+
1436
+ api.registerCommand({
1437
+ name: "sinain_eval_level",
1438
+ description: "Set evaluation level: mechanical, sampled, or full",
1439
+ handler: (args) => {
1440
+ let workspaceDir: string | undefined;
1441
+ for (const state of sessionStates.values()) {
1442
+ if (state.workspaceDir) { workspaceDir = state.workspaceDir; break; }
1443
+ }
1444
+ if (!workspaceDir) {
1445
+ return { text: "No workspace directory available (no active session)." };
1446
+ }
1447
+
1448
+ const level = (args.text ?? "").trim().toLowerCase();
1449
+ const validLevels = ["mechanical", "sampled", "full"];
1450
+ if (!validLevels.includes(level)) {
1451
+ return { text: `Invalid level '${level}'. Valid options: ${validLevels.join(", ")}` };
1452
+ }
1453
+
1454
+ const configPath = join(workspaceDir, "memory", "eval-config.json");
1455
+ const configDir = join(workspaceDir, "memory");
1456
+ if (!existsSync(configDir)) {
1457
+ mkdirSync(configDir, { recursive: true });
1458
+ }
1459
+
1460
+ const config = {
1461
+ level,
1462
+ changedAt: new Date().toISOString(),
1463
+ };
1464
+ writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n", "utf-8");
1465
+
1466
+ return { text: `Eval level set to '${level}'. Next tick evaluation will use this level.` };
1467
+ },
1468
+ });
1469
+
1470
+ // ==========================================================================
1471
+ // Command: /sinain_health — on-demand health check
1472
+ // ==========================================================================
1473
+
1474
+ api.registerCommand({
1475
+ name: "sinain_health",
1476
+ description: "Run health watchdog checks on-demand and show results",
1477
+ handler: () => {
1478
+ const checks = runHealthChecks();
1479
+ const lines: string[] = ["**Health Watchdog Report**\n"];
1480
+
1481
+ lines.push(`Transcript: ${checks.transcriptMB !== null ? `${checks.transcriptMB}MB` : "unknown"}`);
1482
+ lines.push(`Last success: ${checks.staleSec > 0 ? `${checks.staleSec}s ago` : lastSuccessTs > 0 ? "just now" : "never"}`);
1483
+ lines.push(`Error rate: ${Math.round(checks.errorRate * 100)}% (${checks.errorTotal} samples)`);
1484
+ lines.push(`Overflow counter: ${checks.overflowCount}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`);
1485
+ lines.push(`Last reset: ${lastResetTs > 0 ? `${Math.round((Date.now() - lastResetTs) / 1000)}s ago` : "never"}`);
1486
+ lines.push(`Last auto-restart: ${lastAutoRestartTs > 0 ? `${Math.round((Date.now() - lastAutoRestartTs) / 1000)}s ago` : "never"}`);
1487
+ lines.push(`Alerts configured: ${process.env.SINAIN_ALERT_CHAT_ID ? "yes" : "no (SINAIN_ALERT_CHAT_ID not set)"}`);
1488
+
1489
+ if (checks.issues.length > 0) {
1490
+ lines.push(`\n**Issues detected:**`);
1491
+ for (const issue of checks.issues) {
1492
+ lines.push(` ⚠️ ${issue}`);
1493
+ }
1494
+ } else {
1495
+ lines.push(`\n✅ All checks passed`);
1496
+ }
1497
+
1498
+ return { text: lines.join("\n") };
1499
+ },
1500
+ });
1501
+
1502
+ // ==========================================================================
1503
+ // Tool: sinain_heartbeat_tick — deterministic heartbeat execution
1504
+ // ==========================================================================
1505
+
1506
+ api.registerTool(
1507
+ (ctx) => {
1508
+ const workspaceDir = ctx.workspaceDir;
1509
+ if (!workspaceDir) return null;
1510
+
1511
+ return {
1512
+ name: "sinain_heartbeat_tick",
1513
+ label: "Heartbeat Tick",
1514
+ description:
1515
+ "Execute all heartbeat mechanical work: git backup, signal analysis, insight synthesis, and log writing. " +
1516
+ "Returns structured JSON with script results, recommended actions, and output for Telegram.",
1517
+ parameters: {
1518
+ type: "object",
1519
+ properties: {
1520
+ sessionSummary: {
1521
+ type: "string",
1522
+ description: "2-3 sentence summary of current session state",
1523
+ },
1524
+ idle: {
1525
+ type: "boolean",
1526
+ description: "True if user has been inactive >30 minutes",
1527
+ },
1528
+ },
1529
+ required: ["sessionSummary", "idle"],
1530
+ },
1531
+ async execute(
1532
+ _toolCallId: string,
1533
+ params: { sessionSummary: string; idle: boolean },
1534
+ ) {
1535
+ const result: Record<string, unknown> = {
1536
+ status: "ok",
1537
+ gitBackup: null,
1538
+ signals: [],
1539
+ recommendedAction: { action: "skip", task: null, confidence: 0 },
1540
+ output: null,
1541
+ skipped: false,
1542
+ skipReason: null,
1543
+ logWritten: false,
1544
+ };
1545
+
1546
+ // Helper: run a python script and parse JSON stdout
1547
+ const runScript = async (
1548
+ args: string[],
1549
+ timeoutMs = 60_000,
1550
+ ): Promise<Record<string, unknown> | null> => {
1551
+ try {
1552
+ const out = await api.runtime.system.runCommandWithTimeout(
1553
+ ["uv", "run", "--with", "requests", "python3", ...args],
1554
+ { timeoutMs, cwd: workspaceDir },
1555
+ );
1556
+ if (out.code !== 0) {
1557
+ api.logger.warn(
1558
+ `sinain-hud: heartbeat script failed: ${args[0]} (code ${out.code})\n${out.stderr}`,
1559
+ );
1560
+ return null;
1561
+ }
1562
+ return JSON.parse(out.stdout.trim());
1563
+ } catch (err) {
1564
+ api.logger.warn(
1565
+ `sinain-hud: heartbeat script error: ${args[0]}: ${String(err)}`,
1566
+ );
1567
+ return null;
1568
+ }
1569
+ };
1570
+
1571
+ // Latency tracking helper
1572
+ const latencyMs: Record<string, number> = {};
1573
+ const heartbeatStart = Date.now();
1574
+
1575
+ // 1. Git backup (30s timeout)
1576
+ try {
1577
+ const t0 = Date.now();
1578
+ const gitOut = await api.runtime.system.runCommandWithTimeout(
1579
+ ["bash", "sinain-memory/git_backup.sh"],
1580
+ { timeoutMs: 30_000, cwd: workspaceDir },
1581
+ );
1582
+ latencyMs.gitBackup = Date.now() - t0;
1583
+ result.gitBackup = gitOut.stdout.trim() || "nothing to commit";
1584
+ } catch (err) {
1585
+ api.logger.warn(`sinain-hud: git backup error: ${String(err)}`);
1586
+ result.gitBackup = `error: ${String(err)}`;
1587
+ }
1588
+
1589
+ // Current time string for memory scripts
1590
+ const hbTz = cfg.userTimezone ?? "Europe/Berlin";
1591
+ const currentTimeStr = new Date().toLocaleString("en-GB", {
1592
+ timeZone: hbTz, weekday: "long", year: "numeric", month: "long",
1593
+ day: "numeric", hour: "2-digit", minute: "2-digit", hour12: false,
1594
+ }) + ` (${hbTz})`;
1595
+
1596
+ // 2. Signal analysis (60s timeout)
1597
+ const signalArgs = [
1598
+ "sinain-memory/signal_analyzer.py",
1599
+ "--memory-dir", "memory/",
1600
+ "--session-summary", params.sessionSummary,
1601
+ "--current-time", currentTimeStr,
1602
+ ];
1603
+ if (params.idle) signalArgs.push("--idle");
1604
+
1605
+ const signalT0 = Date.now();
1606
+ const signalResult = await runScript(signalArgs, 60_000);
1607
+ latencyMs.signalAnalysis = Date.now() - signalT0;
1608
+ if (signalResult) {
1609
+ result.signals = signalResult.signals ?? [];
1610
+ result.recommendedAction = signalResult.recommendedAction ?? {
1611
+ action: "skip",
1612
+ task: null,
1613
+ confidence: 0,
1614
+ };
1615
+
1616
+ // Fire-and-forget: ingest signal into triple store
1617
+ const tickTs = new Date().toISOString();
1618
+ runScript([
1619
+ "sinain-memory/triple_ingest.py",
1620
+ "--memory-dir", "memory/",
1621
+ "--tick-ts", tickTs,
1622
+ "--signal-result", JSON.stringify(signalResult),
1623
+ "--embed",
1624
+ ], 15_000).catch(() => {});
1625
+ }
1626
+
1627
+ // 3. Insight synthesis (60s timeout)
1628
+ const synthArgs = [
1629
+ "sinain-memory/insight_synthesizer.py",
1630
+ "--memory-dir", "memory/",
1631
+ "--session-summary", params.sessionSummary,
1632
+ "--current-time", currentTimeStr,
1633
+ ];
1634
+ if (params.idle) synthArgs.push("--idle");
1635
+
1636
+ const synthT0 = Date.now();
1637
+ const synthResult = await runScript(synthArgs, 60_000);
1638
+ latencyMs.insightSynthesis = Date.now() - synthT0;
1639
+ if (synthResult) {
1640
+ if (synthResult.skip === false) {
1641
+ result.output = {
1642
+ suggestion: synthResult.suggestion ?? null,
1643
+ insight: synthResult.insight ?? null,
1644
+ };
1645
+ } else {
1646
+ result.skipped = true;
1647
+ result.skipReason = synthResult.skipReason ?? "synthesizer skipped";
1648
+ }
1649
+ }
1650
+
1651
+ // 4. Write log entry to memory/playbook-logs/YYYY-MM-DD.jsonl
1652
+ try {
1653
+ const now = new Date();
1654
+ const dateStr = now.toISOString().slice(0, 10);
1655
+ const logDir = join(workspaceDir, "memory", "playbook-logs");
1656
+ if (!existsSync(logDir)) mkdirSync(logDir, { recursive: true });
1657
+
1658
+ const totalLatencyMs = Date.now() - heartbeatStart;
1659
+ const logEntry = {
1660
+ ts: now.toISOString(),
1661
+ idle: params.idle,
1662
+ sessionHistorySummary: params.sessionSummary,
1663
+ signals: result.signals,
1664
+ recommendedAction: result.recommendedAction,
1665
+ output: result.output,
1666
+ skipped: result.skipped,
1667
+ skipReason: result.skipReason,
1668
+ gitBackup: result.gitBackup,
1669
+ latencyMs,
1670
+ totalLatencyMs,
1671
+ };
1672
+
1673
+ writeFileSync(
1674
+ join(logDir, `${dateStr}.jsonl`),
1675
+ JSON.stringify(logEntry) + "\n",
1676
+ { flag: "a" },
1677
+ );
1678
+ result.logWritten = true;
1679
+ } catch (err) {
1680
+ api.logger.warn(
1681
+ `sinain-hud: failed to write heartbeat log: ${String(err)}`,
1682
+ );
1683
+ }
1684
+
1685
+ return {
1686
+ content: [
1687
+ { type: "text" as const, text: JSON.stringify(result, null, 2) },
1688
+ ],
1689
+ details: result,
1690
+ };
1691
+ },
1692
+ } as any; // AnyAgentTool — plain JSON schema, no TypeBox dependency
1693
+ },
1694
+ { name: "sinain_heartbeat_tick" },
1695
+ );
1696
+
1697
+ // ==========================================================================
1698
+ // Effectiveness footer update
1699
+ // ==========================================================================
1700
+
1701
+ function updateEffectivenessFooter(
1702
+ workspaceDir: string,
1703
+ effectiveness: Record<string, unknown>,
1704
+ ): void {
1705
+ const playbookPath = join(workspaceDir, "memory", "sinain-playbook.md");
1706
+ if (!existsSync(playbookPath)) return;
1707
+ let content = readFileSync(playbookPath, "utf-8");
1708
+ const today = new Date().toISOString().slice(0, 10);
1709
+ const newFooter = `<!-- effectiveness: outputs=${effectiveness.outputs ?? 0}, positive=${effectiveness.positive ?? 0}, negative=${effectiveness.negative ?? 0}, neutral=${effectiveness.neutral ?? 0}, rate=${effectiveness.rate ?? 0}, updated=${today} -->`;
1710
+ const footerRe = /<!--\s*effectiveness:[^>]+-->/;
1711
+ if (footerRe.test(content)) {
1712
+ content = content.replace(footerRe, newFooter);
1713
+ } else {
1714
+ content = content.trimEnd() + "\n\n" + newFooter + "\n";
1715
+ }
1716
+ writeFileSync(playbookPath, content, "utf-8");
1717
+ }
1718
+
1719
+ // ==========================================================================
1720
+ // Curation pipeline (runs on 30-min timer)
1721
+ // ==========================================================================
1722
+
1723
+ async function runCurationPipeline(workspaceDir: string): Promise<void> {
1724
+ const runScript = async (
1725
+ args: string[],
1726
+ timeoutMs = 90_000,
1727
+ ): Promise<Record<string, unknown> | null> => {
1728
+ try {
1729
+ const result = await api.runtime.system.runCommandWithTimeout(
1730
+ ["uv", "run", "--with", "requests", "python3", ...args],
1731
+ { timeoutMs, cwd: workspaceDir },
1732
+ );
1733
+ if (result.code !== 0) {
1734
+ api.logger.warn(
1735
+ `sinain-hud: curation script failed: ${args[0]} (code ${result.code})\n${result.stderr}`,
1736
+ );
1737
+ return null;
1738
+ }
1739
+ return JSON.parse(result.stdout.trim());
1740
+ } catch (err) {
1741
+ api.logger.warn(
1742
+ `sinain-hud: curation script error: ${args[0]}: ${String(err)}`,
1743
+ );
1744
+ return null;
1745
+ }
1746
+ };
1747
+
1748
+ api.logger.info("sinain-hud: curation pipeline starting");
1749
+ const curationLatency: Record<string, number> = {};
1750
+
1751
+ // Step 1: Feedback analysis
1752
+ const feedbackT0 = Date.now();
1753
+ const feedback = await runScript([
1754
+ "sinain-memory/feedback_analyzer.py",
1755
+ "--memory-dir", "memory/",
1756
+ "--session-summary", "periodic curation (plugin timer)",
1757
+ ]);
1758
+ curationLatency.feedback = Date.now() - feedbackT0;
1759
+ const directive = (feedback as Record<string, unknown> | null)?.curateDirective as string ?? "stability";
1760
+
1761
+ // Step 2: Memory mining (background task — mines unread daily files)
1762
+ const miningT0 = Date.now();
1763
+ const mining = await runScript([
1764
+ "sinain-memory/memory_miner.py",
1765
+ "--memory-dir", "memory/",
1766
+ ]);
1767
+ curationLatency.mining = Date.now() - miningT0;
1768
+ const findings = mining?.findings ? JSON.stringify(mining.findings) : null;
1769
+
1770
+ // Fire-and-forget: ingest mining results into triple store
1771
+ if (mining) {
1772
+ runScript([
1773
+ "sinain-memory/triple_ingest.py",
1774
+ "--memory-dir", "memory/",
1775
+ "--ingest-mining", JSON.stringify(mining),
1776
+ "--embed",
1777
+ ], 15_000).catch(() => {});
1778
+ }
1779
+
1780
+ // Step 3: Playbook curation
1781
+ const curatorArgs = [
1782
+ "sinain-memory/playbook_curator.py",
1783
+ "--memory-dir", "memory/",
1784
+ "--session-summary", "periodic curation (plugin timer)",
1785
+ "--curate-directive", directive,
1786
+ ];
1787
+ if (findings) {
1788
+ curatorArgs.push("--mining-findings", findings);
1789
+ }
1790
+ const curatorT0 = Date.now();
1791
+ const curator = await runScript(curatorArgs);
1792
+ curationLatency.curation = Date.now() - curatorT0;
1793
+
1794
+ // Fire-and-forget: ingest playbook patterns into triple store
1795
+ runScript([
1796
+ "sinain-memory/triple_ingest.py",
1797
+ "--memory-dir", "memory/",
1798
+ "--ingest-playbook",
1799
+ "--embed",
1800
+ ], 15_000).catch(() => {});
1801
+
1802
+ // Step 4: Update effectiveness footer with fresh metrics
1803
+ const effectiveness = (feedback as Record<string, unknown> | null)?.effectiveness;
1804
+ if (effectiveness && typeof effectiveness === "object") {
1805
+ try {
1806
+ updateEffectivenessFooter(workspaceDir, effectiveness as Record<string, unknown>);
1807
+ } catch (err) {
1808
+ api.logger.warn(`sinain-hud: effectiveness footer update failed: ${String(err)}`);
1809
+ }
1810
+ }
1811
+
1812
+ // Step 5: Regenerate effective playbook after curation
1813
+ generateEffectivePlaybook(workspaceDir, api.logger);
1814
+
1815
+ // Step 6: Tick evaluation (runs mechanical + sampled judges)
1816
+ await runScript([
1817
+ "sinain-memory/tick_evaluator.py",
1818
+ "--memory-dir", "memory/",
1819
+ ], 120_000);
1820
+
1821
+ // Step 7: Daily eval report (run once per day after 03:00 UTC)
1822
+ const nowUTC = new Date();
1823
+ const todayStr = nowUTC.toISOString().slice(0, 10);
1824
+ if (nowUTC.getUTCHours() >= 3 && lastEvalReportDate !== todayStr) {
1825
+ await runScript([
1826
+ "sinain-memory/eval_reporter.py",
1827
+ "--memory-dir", "memory/",
1828
+ ], 120_000);
1829
+ lastEvalReportDate = todayStr;
1830
+ }
1831
+
1832
+ // Log result with curation latency
1833
+ const changes = (curator as Record<string, unknown> | null)?.changes ?? "unknown";
1834
+ api.logger.info(
1835
+ `sinain-hud: curation pipeline complete (directive=${directive}, changes=${JSON.stringify(changes)}, latency=${JSON.stringify(curationLatency)})`,
1836
+ );
1837
+
1838
+ // Write curation result to playbook-logs so eval_reporter can track churn
1839
+ if (curator) {
1840
+ try {
1841
+ const dateStr = new Date().toISOString().slice(0, 10);
1842
+ const logDir = join(workspaceDir, "memory", "playbook-logs");
1843
+ const curatorChanges = (curator as Record<string, unknown>).changes as Record<string, string[]> | undefined;
1844
+ const curationEntry = {
1845
+ _type: "curation",
1846
+ ts: new Date().toISOString(),
1847
+ directive,
1848
+ playbookChanges: {
1849
+ added: curatorChanges?.added ?? [],
1850
+ pruned: curatorChanges?.pruned ?? [],
1851
+ promoted: curatorChanges?.promoted ?? [],
1852
+ playbookLines: (curator as Record<string, unknown>).playbookLines ?? 0,
1853
+ },
1854
+ latencyMs: curationLatency,
1855
+ };
1856
+ writeFileSync(
1857
+ join(logDir, `${dateStr}.jsonl`),
1858
+ JSON.stringify(curationEntry) + "\n",
1859
+ { flag: "a" },
1860
+ );
1861
+ } catch (err) {
1862
+ api.logger.warn(`sinain-hud: failed to write curation log entry: ${String(err)}`);
1863
+ }
1864
+ }
1865
+ }
1866
+
1867
+ // ==========================================================================
1868
+ // Health watchdog helpers
1869
+ // ==========================================================================
1870
+
1871
+ function getStateDir(): string | null {
1872
+ // State dir is the parent of the workspace dir (e.g. /home/node/.openclaw)
1873
+ if (!lastWorkspaceDir) return null;
1874
+ return dirname(lastWorkspaceDir);
1875
+ }
1876
+
1877
+ function getTranscriptSize(): { path: string; bytes: number } | null {
1878
+ const sessionsJsonPath = getSessionsJsonPath();
1879
+ if (!sessionsJsonPath || !cfg.sessionKey) return null;
1880
+ try {
1881
+ const sessionsData = JSON.parse(readFileSync(sessionsJsonPath, "utf-8"));
1882
+ const session = sessionsData[cfg.sessionKey];
1883
+ const transcriptPath = session?.sessionFile as string | undefined;
1884
+ if (!transcriptPath || !existsSync(transcriptPath)) return null;
1885
+ return { path: transcriptPath, bytes: statSync(transcriptPath).size };
1886
+ } catch {
1887
+ return null;
1888
+ }
1889
+ }
1890
+
1891
+ function runHealthChecks(): {
1892
+ transcriptMB: number | null;
1893
+ staleSec: number;
1894
+ errorRate: number;
1895
+ errorTotal: number;
1896
+ overflowCount: number;
1897
+ resetRecently: boolean;
1898
+ issues: string[];
1899
+ } {
1900
+ const transcript = getTranscriptSize();
1901
+ const transcriptMB = transcript ? +(transcript.bytes / 1_000_000).toFixed(2) : null;
1902
+ const staleSec = lastSuccessTs > 0 ? Math.round((Date.now() - lastSuccessTs) / 1000) : 0;
1903
+ const { rate, total } = computeErrorRate();
1904
+ const resetRecently = lastResetTs > 0 && (Date.now() - lastResetTs) < STALENESS_CRITICAL_MS * 2;
1905
+
1906
+ const issues: string[] = [];
1907
+ if (transcriptMB !== null && transcript!.bytes >= SESSION_SIZE_WARNING_BYTES) {
1908
+ issues.push(`transcript ${transcriptMB}MB (threshold ${(SESSION_SIZE_WARNING_BYTES / 1_000_000).toFixed(1)}MB)`);
1909
+ }
1910
+ if (lastSuccessTs > 0 && (Date.now() - lastSuccessTs) >= STALENESS_WARNING_MS && recentOutcomes.length >= 3) {
1911
+ issues.push(`stale ${staleSec}s since last success`);
1912
+ }
1913
+ if (total >= 5 && rate > 0.5) {
1914
+ issues.push(`error rate ${Math.round(rate * 100)}% (${total} samples)`);
1915
+ }
1916
+ if (consecutiveOverflowErrors >= 3) {
1917
+ issues.push(`overflow errors ${consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`);
1918
+ }
1919
+ if (resetRecently && lastSuccessTs > 0 && lastSuccessTs < lastResetTs) {
1920
+ issues.push("post-reset stall (no success since reset)");
1921
+ }
1922
+
1923
+ return { transcriptMB, staleSec, errorRate: rate, errorTotal: total, overflowCount: consecutiveOverflowErrors, resetRecently, issues };
1924
+ }
1925
+
1926
+ async function runHealthWatchdog(): Promise<void> {
1927
+ const stateDir = getStateDir();
1928
+ if (!stateDir) return;
1929
+
1930
+ const transcript = getTranscriptSize();
1931
+ const now = Date.now();
1932
+
1933
+ // ── Layer 1: Proactive session size check ────────────────────────────
1934
+ if (transcript && transcript.bytes >= SESSION_SIZE_WARNING_BYTES) {
1935
+ const sizeMB = (transcript.bytes / 1_000_000).toFixed(1);
1936
+
1937
+ if (transcript.bytes >= SESSION_SIZE_RESTART_BYTES) {
1938
+ // Critical — force reset
1939
+ api.logger.warn(`sinain-hud: watchdog — transcript ${sizeMB}MB, forcing overflow reset`);
1940
+ if (performOverflowReset()) {
1941
+ lastResetTs = now;
1942
+ consecutiveOverflowErrors = 0;
1943
+ sendTelegramAlert("proactive_reset", "⚠️ *sinain-hud* proactive session reset", `• Transcript was ${sizeMB}MB → truncated\n• No downtime expected`, stateDir);
1944
+ }
1945
+ } else {
1946
+ // Warning — proactive reset at 1.5MB
1947
+ api.logger.info(`sinain-hud: watchdog — transcript ${sizeMB}MB, proactive reset`);
1948
+ if (performOverflowReset()) {
1949
+ lastResetTs = now;
1950
+ consecutiveOverflowErrors = 0;
1951
+ sendTelegramAlert("proactive_reset", "⚠️ *sinain-hud* proactive session reset", `• Transcript was ${sizeMB}MB → truncated\n• No downtime expected`, stateDir);
1952
+ }
1953
+ }
1954
+ }
1955
+
1956
+ // ── Staleness check ──────────────────────────────────────────────────
1957
+ if (lastSuccessTs > 0 && recentOutcomes.length >= 3) {
1958
+ const staleMs = now - lastSuccessTs;
1959
+
1960
+ if (staleMs >= STALENESS_WARNING_MS && staleMs < STALENESS_CRITICAL_MS) {
1961
+ const staleMin = Math.round(staleMs / 60_000);
1962
+ sendTelegramAlert("staleness_warning", "⚠️ *sinain-hud* response stale",
1963
+ `• No successful run in ${staleMin}min\n• Error rate: ${Math.round(computeErrorRate().rate * 100)}%`,
1964
+ stateDir);
1965
+ }
1966
+ }
1967
+
1968
+ // ── Layer 2: Emergency restart — reset didn't recover ────────────────
1969
+ if (lastResetTs > 0 && lastSuccessTs > 0 && lastSuccessTs < lastResetTs) {
1970
+ const sinceResetMs = now - lastResetTs;
1971
+ if (sinceResetMs >= STALENESS_CRITICAL_MS) {
1972
+ // Reset was performed but no success since → queue is jammed
1973
+ const canRestart = (now - lastAutoRestartTs) >= AUTO_RESTART_COOLDOWN_MS;
1974
+ if (canRestart) {
1975
+ const staleMin = Math.round((now - lastSuccessTs) / 60_000);
1976
+ api.logger.warn(`sinain-hud: EMERGENCY RESTART — reset ${Math.round(sinceResetMs / 60_000)}min ago, no recovery`);
1977
+ // Send alert BEFORE exit so user sees it
1978
+ await sendTelegramAlert("emergency_restart", "🔴 *sinain-hud* EMERGENCY RESTART",
1979
+ `• Queue jammed — reset didn't recover in ${Math.round(sinceResetMs / 60_000)}min\n• Last success: ${staleMin}min ago\n• Gateway restarting now (~5s)`,
1980
+ stateDir);
1981
+ lastAutoRestartTs = now;
1982
+ // Give Telegram a moment to deliver
1983
+ await new Promise((r) => setTimeout(r, 1000));
1984
+ process.exit(1);
1985
+ } else {
1986
+ api.logger.warn("sinain-hud: watchdog — would restart but cooldown active (max 1/hour)");
1987
+ }
1988
+ }
1989
+ }
1990
+
1991
+ // ── Error rate alert ─────────────────────────────────────────────────
1992
+ const { rate, total } = computeErrorRate();
1993
+ if (total >= 5 && rate > 0.5) {
1994
+ sendTelegramAlert("high_error_rate", "⚠️ *sinain-hud* high error rate",
1995
+ `• ${Math.round(rate * 100)}% failures over ${total} samples\n• Consecutive overflow errors: ${consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`,
1996
+ stateDir);
1997
+ }
1998
+
1999
+ // ── Overflow approaching threshold ───────────────────────────────────
2000
+ if (consecutiveOverflowErrors >= 3 && consecutiveOverflowErrors < OVERFLOW_CONSECUTIVE_THRESHOLD) {
2001
+ sendTelegramAlert("overflow_warning", "⚠️ *sinain-hud* overflow errors accumulating",
2002
+ `• ${consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD} consecutive overflow errors\n• Auto-reset will trigger at ${OVERFLOW_CONSECUTIVE_THRESHOLD}`,
2003
+ stateDir);
2004
+ }
2005
+ }
2006
+
2007
+ // ==========================================================================
2008
+ // Service registration
2009
+ // ==========================================================================
2010
+
2011
+ api.registerService({
2012
+ id: "sinain-hud",
2013
+ start: () => {
2014
+ api.logger.info(
2015
+ `sinain-hud: service started (heartbeat: ${cfg.heartbeatPath ?? "not configured"})`,
2016
+ );
2017
+
2018
+ // Start health watchdog — runs every 5 minutes, independent of curation
2019
+ watchdogInterval = setInterval(() => {
2020
+ runHealthWatchdog().catch((err) => {
2021
+ api.logger.warn(`sinain-hud: watchdog error: ${String(err)}`);
2022
+ });
2023
+ }, WATCHDOG_INTERVAL_MS);
2024
+ api.logger.info("sinain-hud: health watchdog started (5-min interval)");
2025
+
2026
+ // Start curation timer — runs every 30 minutes
2027
+ curationInterval = setInterval(async () => {
2028
+ // Skip curation during outage — scripts would work (OpenRouter) but
2029
+ // results are wasted when no agent runs succeed
2030
+ if (outageDetected) {
2031
+ api.logger.info("sinain-hud: curation skipped — outage active");
2032
+ return;
2033
+ }
2034
+
2035
+ // Find workspace dir from active sessions or last known
2036
+ let workspaceDir: string | undefined;
2037
+ for (const state of sessionStates.values()) {
2038
+ if (state.workspaceDir) { workspaceDir = state.workspaceDir; break; }
2039
+ }
2040
+ workspaceDir ??= lastWorkspaceDir ?? undefined;
2041
+ if (!workspaceDir) {
2042
+ api.logger.info("sinain-hud: curation skipped — no workspace dir");
2043
+ return;
2044
+ }
2045
+ try {
2046
+ await runCurationPipeline(workspaceDir);
2047
+ } catch (err) {
2048
+ api.logger.warn(`sinain-hud: curation pipeline error: ${String(err)}`);
2049
+ }
2050
+
2051
+ // ── Proactive session hygiene ──────────────────────────────────
2052
+ // Check sinain session size/age and archive+truncate if needed.
2053
+ // This prevents context bloat from causing cascading RPC timeouts.
2054
+ try {
2055
+ const sessionsJsonPath = getSessionsJsonPath();
2056
+ if (sessionsJsonPath && cfg.sessionKey) {
2057
+ const sessionsData = JSON.parse(readFileSync(sessionsJsonPath, "utf-8"));
2058
+ const sinainSession = sessionsData[cfg.sessionKey];
2059
+ if (sinainSession?.sessionFile && existsSync(sinainSession.sessionFile)) {
2060
+ const size = statSync(sinainSession.sessionFile).size;
2061
+ const createdAt = typeof sinainSession.createdAt === "number"
2062
+ ? sinainSession.createdAt
2063
+ : Date.now();
2064
+ const ageMs = Date.now() - createdAt;
2065
+ if (size > SESSION_HYGIENE_SIZE_BYTES || ageMs > SESSION_HYGIENE_AGE_MS) {
2066
+ api.logger.info(
2067
+ `sinain-hud: proactive session hygiene \u2014 size=${Math.round(size / 1024)}KB, age=${Math.round(ageMs / 3600000)}h`,
2068
+ );
2069
+ if (performOverflowReset()) {
2070
+ consecutiveOverflowErrors = 0;
2071
+ outageDetected = false;
2072
+ consecutiveFailures = 0;
2073
+ outageStartTs = 0;
2074
+ }
2075
+ }
2076
+ }
2077
+ }
2078
+ } catch (err) {
2079
+ api.logger.warn(`sinain-hud: session hygiene check error: ${String(err)}`);
2080
+ }
2081
+ }, 30 * 60 * 1000); // 30 minutes
2082
+ },
2083
+ stop: () => {
2084
+ if (curationInterval) {
2085
+ clearInterval(curationInterval);
2086
+ curationInterval = null;
2087
+ }
2088
+ if (watchdogInterval) {
2089
+ clearInterval(watchdogInterval);
2090
+ watchdogInterval = null;
2091
+ }
2092
+ api.logger.info("sinain-hud: service stopped");
2093
+ sessionStates.clear();
2094
+ },
2095
+ });
2096
+ }