@geravant/sinain 1.0.17 → 1.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +163 -1257
- package/install.js +2 -1
- package/package.json +9 -2
- package/sinain-knowledge/adapters/generic/adapter.ts +103 -0
- package/sinain-knowledge/adapters/interface.ts +72 -0
- package/sinain-knowledge/adapters/openclaw/adapter.ts +223 -0
- package/sinain-knowledge/curation/engine.ts +493 -0
- package/sinain-knowledge/curation/resilience.ts +336 -0
- package/sinain-knowledge/data/git-store.ts +310 -0
- package/sinain-knowledge/data/schema.ts +89 -0
- package/sinain-knowledge/data/snapshot.ts +226 -0
- package/sinain-knowledge/data/store.ts +488 -0
- package/sinain-knowledge/deploy/cli.ts +214 -0
- package/sinain-knowledge/deploy/manifest.ts +80 -0
- package/sinain-knowledge/protocol/bindings/generic.md +5 -0
- package/sinain-knowledge/protocol/bindings/openclaw.md +5 -0
- package/sinain-knowledge/protocol/heartbeat.md +62 -0
- package/sinain-knowledge/protocol/renderer.ts +56 -0
- package/sinain-knowledge/protocol/skill.md +335 -0
package/index.ts
CHANGED
|
@@ -8,49 +8,20 @@
|
|
|
8
8
|
* - Strips <private> tags from tool results before persistence
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
import { readFileSync, writeFileSync, mkdirSync, existsSync,
|
|
12
|
-
import { join, dirname
|
|
11
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync, statSync, chmodSync, copyFileSync } from "node:fs";
|
|
12
|
+
import { join, dirname } from "node:path";
|
|
13
13
|
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
userTimezone?: string;
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
type ModuleRegistryEntry = {
|
|
29
|
-
status: "active" | "suspended" | "disabled";
|
|
30
|
-
priority: number;
|
|
31
|
-
activatedAt: string | null;
|
|
32
|
-
lastTriggered: string | null;
|
|
33
|
-
locked: boolean;
|
|
34
|
-
};
|
|
35
|
-
|
|
36
|
-
type ModuleRegistry = {
|
|
37
|
-
version: number;
|
|
38
|
-
modules: Record<string, ModuleRegistryEntry>;
|
|
39
|
-
};
|
|
40
|
-
|
|
41
|
-
type ToolUsageEntry = {
|
|
42
|
-
toolName: string;
|
|
43
|
-
ts: number;
|
|
44
|
-
durationMs?: number;
|
|
45
|
-
error?: string;
|
|
46
|
-
};
|
|
47
|
-
|
|
48
|
-
type SessionState = {
|
|
49
|
-
startedAt: number;
|
|
50
|
-
toolUsage: ToolUsageEntry[];
|
|
51
|
-
workspaceDir?: string;
|
|
52
|
-
heartbeatToolCalled?: boolean;
|
|
53
|
-
};
|
|
15
|
+
import type {
|
|
16
|
+
PluginConfig,
|
|
17
|
+
SessionState,
|
|
18
|
+
ParentContextCache,
|
|
19
|
+
} from "./sinain-knowledge/data/schema.js";
|
|
20
|
+
import { KnowledgeStore } from "./sinain-knowledge/data/store.js";
|
|
21
|
+
import { ResilienceManager, HealthWatchdog, OVERFLOW_CONSECUTIVE_THRESHOLD, SHORT_FAILURE_THRESHOLD_MS, ERROR_WINDOW_MS, SESSION_HYGIENE_SIZE_BYTES, SESSION_HYGIENE_AGE_MS, ALERT_COOLDOWN_MS } from "./sinain-knowledge/curation/resilience.js";
|
|
22
|
+
import type { ResilienceBackend } from "./sinain-knowledge/curation/resilience.js";
|
|
23
|
+
import { CurationEngine } from "./sinain-knowledge/curation/engine.js";
|
|
24
|
+
import { GitSnapshotStore } from "./sinain-knowledge/data/git-store.js";
|
|
54
25
|
|
|
55
26
|
// ============================================================================
|
|
56
27
|
// Privacy helpers
|
|
@@ -58,35 +29,8 @@ type SessionState = {
|
|
|
58
29
|
|
|
59
30
|
const PRIVATE_TAG_RE = /<private>[\s\S]*?<\/private>/g;
|
|
60
31
|
|
|
61
|
-
//
|
|
62
|
-
//
|
|
63
|
-
// ============================================================================
|
|
64
|
-
|
|
65
|
-
const ERROR_WINDOW_MS = 5 * 60_000; // 5-min sliding window for error rate
|
|
66
|
-
const OUTAGE_ERROR_RATE_THRESHOLD = 0.8; // 80% failure → outage detected
|
|
67
|
-
const OUTAGE_MIN_SAMPLES = 3; // need ≥3 samples before threshold applies
|
|
68
|
-
const FILE_SYNC_DEBOUNCE_MS = 3 * 60_000; // skip file sync if done <3 min ago
|
|
69
|
-
const PLAYBOOK_GEN_DEBOUNCE_MS = 5 * 60_000; // skip playbook gen if done <5 min ago
|
|
70
|
-
const SHORT_FAILURE_THRESHOLD_MS = 10_000; // fails in <10s = likely API error
|
|
71
|
-
const LONG_FAILURE_THRESHOLD_MS = 3 * 60_000; // >3min failure = likely stuck retry loop
|
|
72
|
-
|
|
73
|
-
// Context overflow watchdog constants
|
|
74
|
-
const OVERFLOW_CONSECUTIVE_THRESHOLD = 5; // N consecutive overload errors → trigger reset
|
|
75
|
-
const OVERFLOW_TRANSCRIPT_MIN_BYTES = 1_000_000; // 1MB guard — skip reset if transcript is small (transient outage)
|
|
76
|
-
const OVERFLOW_ERROR_PATTERN = /overloaded|context.*too.*long|token.*limit|extra usage is required/i;
|
|
77
|
-
|
|
78
|
-
// Proactive session hygiene constants
|
|
79
|
-
const SESSION_HYGIENE_SIZE_BYTES = 2_000_000; // 2MB — proactive archive+truncate threshold
|
|
80
|
-
const SESSION_HYGIENE_AGE_MS = 24 * 60 * 60 * 1000; // 24h — max session age before proactive reset
|
|
81
|
-
|
|
82
|
-
// Health watchdog constants
|
|
83
|
-
const WATCHDOG_INTERVAL_MS = 5 * 60_000; // 5 min — independent of curation timer
|
|
84
|
-
const ALERT_COOLDOWN_MS = 15 * 60_000; // 15 min per alert type
|
|
85
|
-
const STALENESS_WARNING_MS = 10 * 60_000; // 10 min no success → warning
|
|
86
|
-
const STALENESS_CRITICAL_MS = 15 * 60_000; // 15 min no success after reset → emergency restart
|
|
87
|
-
const SESSION_SIZE_WARNING_BYTES = 1_500_000; // 1.5MB → proactive reset
|
|
88
|
-
const SESSION_SIZE_RESTART_BYTES = 2_000_000; // 2MB → forced reset
|
|
89
|
-
const AUTO_RESTART_COOLDOWN_MS = 60 * 60_000; // max 1 auto-restart per hour
|
|
32
|
+
// Resilience constants — only import what index.ts still uses directly
|
|
33
|
+
// (ResilienceManager, HealthWatchdog, CurationEngine own the rest)
|
|
90
34
|
|
|
91
35
|
// ============================================================================
|
|
92
36
|
// Parent context injection (subagent support)
|
|
@@ -95,11 +39,6 @@ const AUTO_RESTART_COOLDOWN_MS = 60 * 60_000; // max 1 auto-restart per hour
|
|
|
95
39
|
const PARENT_CONTEXT_MAX_CHARS = 4000;
|
|
96
40
|
const PARENT_CONTEXT_TTL_MS = 10 * 60_000; // 10 minutes — stale cache won't be injected
|
|
97
41
|
|
|
98
|
-
type ParentContextCache = {
|
|
99
|
-
sessionKey: string;
|
|
100
|
-
capturedAt: number;
|
|
101
|
-
contextText: string;
|
|
102
|
-
};
|
|
103
42
|
|
|
104
43
|
function isSubagentSession(sessionKey: string): boolean {
|
|
105
44
|
return sessionKey.includes(":subagent:") || sessionKey.startsWith("subagent:");
|
|
@@ -211,313 +150,6 @@ async function sendTelegramAlert(
|
|
|
211
150
|
});
|
|
212
151
|
}
|
|
213
152
|
|
|
214
|
-
// ============================================================================
|
|
215
|
-
// File sync helpers
|
|
216
|
-
// ============================================================================
|
|
217
|
-
|
|
218
|
-
function syncFileToWorkspace(
|
|
219
|
-
sourcePath: string | undefined,
|
|
220
|
-
workspaceDir: string,
|
|
221
|
-
targetName: string,
|
|
222
|
-
logger: OpenClawPluginApi["logger"],
|
|
223
|
-
): boolean {
|
|
224
|
-
if (!sourcePath) return false;
|
|
225
|
-
|
|
226
|
-
try {
|
|
227
|
-
const content = readFileSync(sourcePath, "utf-8");
|
|
228
|
-
const targetPath = join(workspaceDir, targetName);
|
|
229
|
-
const targetDir = dirname(targetPath);
|
|
230
|
-
|
|
231
|
-
if (!existsSync(targetDir)) {
|
|
232
|
-
mkdirSync(targetDir, { recursive: true });
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
// Only write if content changed (avoid unnecessary git diffs)
|
|
236
|
-
let existing = "";
|
|
237
|
-
try {
|
|
238
|
-
existing = readFileSync(targetPath, "utf-8");
|
|
239
|
-
} catch {
|
|
240
|
-
// File doesn't exist yet
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
if (existing !== content) {
|
|
244
|
-
writeFileSync(targetPath, content, "utf-8");
|
|
245
|
-
logger.info(`sinain-hud: synced ${targetName} to workspace`);
|
|
246
|
-
return true;
|
|
247
|
-
}
|
|
248
|
-
return false;
|
|
249
|
-
} catch (err) {
|
|
250
|
-
logger.warn(`sinain-hud: failed to sync ${targetName}: ${String(err)}`);
|
|
251
|
-
return false;
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
/**
|
|
256
|
-
* Recursively sync a source directory to the workspace with selective overwrite policy:
|
|
257
|
-
* - .json, .sh, .txt, .jsonl — always overwritten (infra/config files we control)
|
|
258
|
-
* - .py and others — deploy-once only (skip if already exists; bot owns these after first deploy)
|
|
259
|
-
* Skips __pycache__ and hidden directories.
|
|
260
|
-
*/
|
|
261
|
-
function syncDirToWorkspace(
|
|
262
|
-
sourceDir: string,
|
|
263
|
-
workspaceDir: string,
|
|
264
|
-
targetDirName: string,
|
|
265
|
-
logger: OpenClawPluginApi["logger"],
|
|
266
|
-
): number {
|
|
267
|
-
if (!existsSync(sourceDir)) return 0;
|
|
268
|
-
const targetDir = join(workspaceDir, targetDirName);
|
|
269
|
-
if (!existsSync(targetDir)) mkdirSync(targetDir, { recursive: true });
|
|
270
|
-
|
|
271
|
-
const ALWAYS_OVERWRITE = new Set([".json", ".sh", ".txt", ".jsonl", ".py"]);
|
|
272
|
-
let synced = 0;
|
|
273
|
-
|
|
274
|
-
function syncRecursive(srcDir: string, dstDir: string): void {
|
|
275
|
-
if (!existsSync(dstDir)) mkdirSync(dstDir, { recursive: true });
|
|
276
|
-
for (const entry of readdirSync(srcDir)) {
|
|
277
|
-
const srcPath = join(srcDir, entry);
|
|
278
|
-
const dstPath = join(dstDir, entry);
|
|
279
|
-
const stat = statSync(srcPath);
|
|
280
|
-
if (stat.isDirectory()) {
|
|
281
|
-
if (entry.startsWith("__") || entry.startsWith(".")) continue;
|
|
282
|
-
syncRecursive(srcPath, dstPath);
|
|
283
|
-
continue;
|
|
284
|
-
}
|
|
285
|
-
if (!stat.isFile()) continue;
|
|
286
|
-
const ext = extname(entry).toLowerCase();
|
|
287
|
-
if (!ALWAYS_OVERWRITE.has(ext) && existsSync(dstPath)) continue;
|
|
288
|
-
const content = readFileSync(srcPath, "utf-8");
|
|
289
|
-
let existing = "";
|
|
290
|
-
try { existing = readFileSync(dstPath, "utf-8"); } catch {}
|
|
291
|
-
if (existing !== content) {
|
|
292
|
-
writeFileSync(dstPath, content, "utf-8");
|
|
293
|
-
synced++;
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
syncRecursive(sourceDir, targetDir);
|
|
299
|
-
if (synced > 0) logger.info(`sinain-hud: synced ${synced} files to ${targetDirName}/`);
|
|
300
|
-
return synced;
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
/**
|
|
304
|
-
* Recursively sync a modules/ source directory to workspace with selective deploy policy:
|
|
305
|
-
* - module-registry.json → deploy-once (agent manages via module_manager.py)
|
|
306
|
-
* - manifest.json → always overwrite (plugin controls schema)
|
|
307
|
-
* - patterns.md → deploy-once (agent/extract may have modified)
|
|
308
|
-
* - context/*.json → always overwrite
|
|
309
|
-
*/
|
|
310
|
-
function syncModulesToWorkspace(
|
|
311
|
-
sourceDir: string,
|
|
312
|
-
workspaceDir: string,
|
|
313
|
-
logger: OpenClawPluginApi["logger"],
|
|
314
|
-
): number {
|
|
315
|
-
if (!existsSync(sourceDir)) return 0;
|
|
316
|
-
const targetDir = join(workspaceDir, "modules");
|
|
317
|
-
if (!existsSync(targetDir)) mkdirSync(targetDir, { recursive: true });
|
|
318
|
-
|
|
319
|
-
const ALWAYS_OVERWRITE = new Set(["manifest.json"]);
|
|
320
|
-
const DEPLOY_ONCE = new Set(["module-registry.json", "patterns.md", "guidance.md"]);
|
|
321
|
-
let synced = 0;
|
|
322
|
-
|
|
323
|
-
function syncRecursive(srcDir: string, dstDir: string): void {
|
|
324
|
-
if (!existsSync(dstDir)) mkdirSync(dstDir, { recursive: true });
|
|
325
|
-
|
|
326
|
-
for (const entry of readdirSync(srcDir)) {
|
|
327
|
-
const srcPath = join(srcDir, entry);
|
|
328
|
-
const dstPath = join(dstDir, entry);
|
|
329
|
-
const stat = statSync(srcPath);
|
|
330
|
-
|
|
331
|
-
if (stat.isDirectory()) {
|
|
332
|
-
syncRecursive(srcPath, dstPath);
|
|
333
|
-
continue;
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
if (!stat.isFile()) continue;
|
|
337
|
-
|
|
338
|
-
const fileName = entry;
|
|
339
|
-
const isAlwaysOverwrite = ALWAYS_OVERWRITE.has(fileName) || fileName.startsWith("context/");
|
|
340
|
-
const isDeployOnce = DEPLOY_ONCE.has(fileName);
|
|
341
|
-
|
|
342
|
-
// Deploy-once: skip if already in workspace
|
|
343
|
-
if (isDeployOnce && existsSync(dstPath)) continue;
|
|
344
|
-
|
|
345
|
-
// Default for unknown files: deploy-once
|
|
346
|
-
if (!isAlwaysOverwrite && !isDeployOnce && existsSync(dstPath)) continue;
|
|
347
|
-
|
|
348
|
-
const content = readFileSync(srcPath, "utf-8");
|
|
349
|
-
let existing = "";
|
|
350
|
-
try { existing = readFileSync(dstPath, "utf-8"); } catch {}
|
|
351
|
-
if (existing !== content) {
|
|
352
|
-
writeFileSync(dstPath, content, "utf-8");
|
|
353
|
-
synced++;
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
syncRecursive(sourceDir, targetDir);
|
|
359
|
-
if (synced > 0) logger.info(`sinain-hud: synced ${synced} module files to modules/`);
|
|
360
|
-
return synced;
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
/**
|
|
364
|
-
* Collect behavioral guidance from all active modules for prependContext injection.
|
|
365
|
-
*
|
|
366
|
-
* Reads module-registry.json, collects guidance.md from each active module
|
|
367
|
-
* (sorted by priority desc). Imported modules get a [transferred] label.
|
|
368
|
-
* Returns a formatted [MODULE GUIDANCE] block or empty string.
|
|
369
|
-
*/
|
|
370
|
-
function collectModuleGuidance(
|
|
371
|
-
workspaceDir: string,
|
|
372
|
-
logger: OpenClawPluginApi["logger"],
|
|
373
|
-
): string {
|
|
374
|
-
const registryPath = join(workspaceDir, "modules", "module-registry.json");
|
|
375
|
-
if (!existsSync(registryPath)) return "";
|
|
376
|
-
|
|
377
|
-
let registry: ModuleRegistry;
|
|
378
|
-
try {
|
|
379
|
-
registry = JSON.parse(readFileSync(registryPath, "utf-8")) as ModuleRegistry;
|
|
380
|
-
} catch {
|
|
381
|
-
return "";
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
// Active modules sorted by priority desc
|
|
385
|
-
const activeModules: Array<{ id: string; priority: number }> = [];
|
|
386
|
-
for (const [id, entry] of Object.entries(registry.modules)) {
|
|
387
|
-
if (entry.status === "active") {
|
|
388
|
-
activeModules.push({ id, priority: entry.priority });
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
activeModules.sort((a, b) => b.priority - a.priority);
|
|
392
|
-
|
|
393
|
-
const guidanceSections: string[] = [];
|
|
394
|
-
let moduleCount = 0;
|
|
395
|
-
|
|
396
|
-
for (const mod of activeModules) {
|
|
397
|
-
const guidancePath = join(workspaceDir, "modules", mod.id, "guidance.md");
|
|
398
|
-
if (!existsSync(guidancePath)) continue;
|
|
399
|
-
|
|
400
|
-
try {
|
|
401
|
-
const content = readFileSync(guidancePath, "utf-8").trim();
|
|
402
|
-
if (!content) continue;
|
|
403
|
-
|
|
404
|
-
// Check if module was imported (transferred)
|
|
405
|
-
let label = mod.id;
|
|
406
|
-
const manifestPath = join(workspaceDir, "modules", mod.id, "manifest.json");
|
|
407
|
-
if (existsSync(manifestPath)) {
|
|
408
|
-
try {
|
|
409
|
-
const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
|
|
410
|
-
if (manifest.importedAt) {
|
|
411
|
-
label = `${manifest.name || mod.id} [transferred]`;
|
|
412
|
-
}
|
|
413
|
-
} catch { /* skip */ }
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
guidanceSections.push(`### ${label}\n${content}`);
|
|
417
|
-
moduleCount++;
|
|
418
|
-
} catch {
|
|
419
|
-
// Skip unreadable guidance
|
|
420
|
-
}
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
if (guidanceSections.length === 0) return "";
|
|
424
|
-
|
|
425
|
-
logger.info(`sinain-hud: injecting guidance from ${moduleCount} module(s)`);
|
|
426
|
-
return `[MODULE GUIDANCE]\n${guidanceSections.join("\n\n")}`;
|
|
427
|
-
}
|
|
428
|
-
|
|
429
|
-
/**
|
|
430
|
-
* Generate the merged effective playbook from active modules + base playbook.
|
|
431
|
-
*
|
|
432
|
-
* Reads module-registry.json, collects patterns.md from each active module
|
|
433
|
-
* (sorted by priority desc), reads the base sinain-playbook.md, and writes
|
|
434
|
-
* the merged result to memory/sinain-playbook-effective.md.
|
|
435
|
-
*/
|
|
436
|
-
function generateEffectivePlaybook(
|
|
437
|
-
workspaceDir: string,
|
|
438
|
-
logger: OpenClawPluginApi["logger"],
|
|
439
|
-
): boolean {
|
|
440
|
-
const registryPath = join(workspaceDir, "modules", "module-registry.json");
|
|
441
|
-
if (!existsSync(registryPath)) {
|
|
442
|
-
logger.info("sinain-hud: no module-registry.json found, skipping effective playbook generation");
|
|
443
|
-
return false;
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
let registry: ModuleRegistry;
|
|
447
|
-
try {
|
|
448
|
-
registry = JSON.parse(readFileSync(registryPath, "utf-8")) as ModuleRegistry;
|
|
449
|
-
} catch (err) {
|
|
450
|
-
logger.warn(`sinain-hud: failed to parse module-registry.json: ${String(err)}`);
|
|
451
|
-
return false;
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
// Collect active modules sorted by priority desc
|
|
455
|
-
const activeModules: Array<{ id: string; priority: number }> = [];
|
|
456
|
-
for (const [id, entry] of Object.entries(registry.modules)) {
|
|
457
|
-
if (entry.status === "active") {
|
|
458
|
-
activeModules.push({ id, priority: entry.priority });
|
|
459
|
-
}
|
|
460
|
-
}
|
|
461
|
-
activeModules.sort((a, b) => b.priority - a.priority);
|
|
462
|
-
|
|
463
|
-
// Build module stack header
|
|
464
|
-
const stackLabel = activeModules.map((m) => `${m.id}(${m.priority})`).join(", ");
|
|
465
|
-
|
|
466
|
-
// Collect patterns from each active module
|
|
467
|
-
const sections: string[] = [];
|
|
468
|
-
sections.push(`<!-- module-stack: ${stackLabel} -->`);
|
|
469
|
-
sections.push("");
|
|
470
|
-
|
|
471
|
-
for (const mod of activeModules) {
|
|
472
|
-
const patternsPath = join(workspaceDir, "modules", mod.id, "patterns.md");
|
|
473
|
-
if (!existsSync(patternsPath)) continue;
|
|
474
|
-
try {
|
|
475
|
-
const patterns = readFileSync(patternsPath, "utf-8").trim();
|
|
476
|
-
if (patterns) {
|
|
477
|
-
sections.push(`<!-- module: ${mod.id} (priority ${mod.priority}) -->`);
|
|
478
|
-
// Attribution for transferred (imported) modules
|
|
479
|
-
const manifestPath = join(workspaceDir, "modules", mod.id, "manifest.json");
|
|
480
|
-
if (existsSync(manifestPath)) {
|
|
481
|
-
try {
|
|
482
|
-
const manifest = JSON.parse(readFileSync(manifestPath, "utf-8"));
|
|
483
|
-
if (manifest.importedAt) {
|
|
484
|
-
sections.push(`> *[Transferred knowledge: ${manifest.name || mod.id}]*`);
|
|
485
|
-
}
|
|
486
|
-
} catch { /* skip if manifest unreadable */ }
|
|
487
|
-
}
|
|
488
|
-
sections.push(patterns);
|
|
489
|
-
sections.push("");
|
|
490
|
-
}
|
|
491
|
-
} catch {
|
|
492
|
-
// Skip unreadable patterns
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
// Append base playbook
|
|
497
|
-
const basePlaybookPath = join(workspaceDir, "memory", "sinain-playbook.md");
|
|
498
|
-
if (existsSync(basePlaybookPath)) {
|
|
499
|
-
try {
|
|
500
|
-
const base = readFileSync(basePlaybookPath, "utf-8").trim();
|
|
501
|
-
if (base) {
|
|
502
|
-
sections.push("<!-- base-playbook -->");
|
|
503
|
-
sections.push(base);
|
|
504
|
-
sections.push("");
|
|
505
|
-
}
|
|
506
|
-
} catch {
|
|
507
|
-
// Skip if unreadable
|
|
508
|
-
}
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
// Write effective playbook (always overwrite)
|
|
512
|
-
const effectivePath = join(workspaceDir, "memory", "sinain-playbook-effective.md");
|
|
513
|
-
const effectiveDir = dirname(effectivePath);
|
|
514
|
-
if (!existsSync(effectiveDir)) mkdirSync(effectiveDir, { recursive: true });
|
|
515
|
-
|
|
516
|
-
const content = sections.join("\n");
|
|
517
|
-
writeFileSync(effectivePath, content, "utf-8");
|
|
518
|
-
logger.info(`sinain-hud: generated effective playbook (${activeModules.length} active modules)`);
|
|
519
|
-
return true;
|
|
520
|
-
}
|
|
521
153
|
|
|
522
154
|
// ============================================================================
|
|
523
155
|
// Plugin Definition
|
|
@@ -526,7 +158,6 @@ function generateEffectivePlaybook(
|
|
|
526
158
|
export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
527
159
|
const cfg = (api.pluginConfig ?? {}) as PluginConfig;
|
|
528
160
|
const sessionStates = new Map<string, SessionState>();
|
|
529
|
-
let curationInterval: ReturnType<typeof setInterval> | null = null;
|
|
530
161
|
let lastWorkspaceDir: string | null = null;
|
|
531
162
|
|
|
532
163
|
// Pre-initialize from config so situation.update works immediately after gateway restart,
|
|
@@ -537,53 +168,16 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
537
168
|
api.logger.info(`sinain-hud: workspace pre-initialized from config: ${lastWorkspaceDir}`);
|
|
538
169
|
}
|
|
539
170
|
|
|
540
|
-
|
|
541
|
-
|
|
171
|
+
// KnowledgeStore — wraps all file I/O for workspace, playbooks, modules, eval
|
|
172
|
+
const store = new KnowledgeStore(lastWorkspaceDir ?? "/tmp/sinain-placeholder", api.logger);
|
|
542
173
|
|
|
543
|
-
//
|
|
544
|
-
const
|
|
545
|
-
let lastSuccessTs = 0;
|
|
546
|
-
let lastPlaybookGenTs = 0;
|
|
547
|
-
let lastFileSyncTs = 0;
|
|
548
|
-
let outageDetected = false;
|
|
549
|
-
let consecutiveFailures = 0;
|
|
550
|
-
let outageStartTs = 0;
|
|
551
|
-
let consecutiveOverflowErrors = 0;
|
|
174
|
+
// Resilience layer
|
|
175
|
+
const resilience = new ResilienceManager();
|
|
552
176
|
|
|
553
177
|
// Parent context cache for subagent injection
|
|
554
178
|
let parentContextCache: ParentContextCache | null = null;
|
|
555
179
|
|
|
556
|
-
//
|
|
557
|
-
let watchdogInterval: ReturnType<typeof setInterval> | null = null;
|
|
558
|
-
let lastResetTs = 0;
|
|
559
|
-
let lastAutoRestartTs = 0;
|
|
560
|
-
|
|
561
|
-
function appendToContextCache(line: string): void {
|
|
562
|
-
if (!parentContextCache) return;
|
|
563
|
-
parentContextCache.contextText += "\n" + line;
|
|
564
|
-
parentContextCache.capturedAt = Date.now();
|
|
565
|
-
// Trim from front if over budget (keep most recent context)
|
|
566
|
-
if (parentContextCache.contextText.length > PARENT_CONTEXT_MAX_CHARS) {
|
|
567
|
-
const excess = parentContextCache.contextText.length - PARENT_CONTEXT_MAX_CHARS;
|
|
568
|
-
const newStart = parentContextCache.contextText.indexOf("\n", excess);
|
|
569
|
-
parentContextCache.contextText = newStart >= 0
|
|
570
|
-
? parentContextCache.contextText.slice(newStart + 1)
|
|
571
|
-
: parentContextCache.contextText.slice(excess);
|
|
572
|
-
}
|
|
573
|
-
}
|
|
574
|
-
|
|
575
|
-
function computeErrorRate(): { rate: number; total: number; failures: number } {
|
|
576
|
-
const cutoff = Date.now() - ERROR_WINDOW_MS;
|
|
577
|
-
// Prune entries older than the window
|
|
578
|
-
while (recentOutcomes.length > 0 && recentOutcomes[0].ts < cutoff) {
|
|
579
|
-
recentOutcomes.shift();
|
|
580
|
-
}
|
|
581
|
-
const total = recentOutcomes.length;
|
|
582
|
-
if (total === 0) return { rate: 0, total: 0, failures: 0 };
|
|
583
|
-
const failures = recentOutcomes.filter((o) => !o.success).length;
|
|
584
|
-
return { rate: failures / total, total, failures };
|
|
585
|
-
}
|
|
586
|
-
|
|
180
|
+
// ── Backend adapter for resilience (OpenClaw-specific) ──────────────────
|
|
587
181
|
function getSessionsJsonPath(): string | null {
|
|
588
182
|
if (!lastWorkspaceDir) return null;
|
|
589
183
|
const sessionsDir = join(dirname(lastWorkspaceDir), "agents", "main", "sessions");
|
|
@@ -591,20 +185,31 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
591
185
|
return existsSync(p) ? p : null;
|
|
592
186
|
}
|
|
593
187
|
|
|
188
|
+
function getTranscriptSize(): { path: string; bytes: number } | null {
|
|
189
|
+
const sessionsJsonPath = getSessionsJsonPath();
|
|
190
|
+
if (!sessionsJsonPath || !cfg.sessionKey) return null;
|
|
191
|
+
try {
|
|
192
|
+
const sessionsData = JSON.parse(readFileSync(sessionsJsonPath, "utf-8"));
|
|
193
|
+
const session = sessionsData[cfg.sessionKey];
|
|
194
|
+
const transcriptPath = session?.sessionFile as string | undefined;
|
|
195
|
+
if (!transcriptPath || !existsSync(transcriptPath)) return null;
|
|
196
|
+
return { path: transcriptPath, bytes: statSync(transcriptPath).size };
|
|
197
|
+
} catch {
|
|
198
|
+
return null;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
594
202
|
function performOverflowReset(): boolean {
|
|
595
203
|
const targetSessionKey = cfg.sessionKey;
|
|
596
204
|
if (!targetSessionKey || !lastWorkspaceDir) {
|
|
597
205
|
api.logger.warn("sinain-hud: overflow reset aborted — no sessionKey or workspace dir");
|
|
598
206
|
return false;
|
|
599
207
|
}
|
|
600
|
-
|
|
601
208
|
const sessionsJsonPath = getSessionsJsonPath();
|
|
602
|
-
|
|
603
209
|
if (!sessionsJsonPath) {
|
|
604
210
|
api.logger.warn(`sinain-hud: overflow reset aborted — sessions.json not found`);
|
|
605
211
|
return false;
|
|
606
212
|
}
|
|
607
|
-
|
|
608
213
|
let sessionsData: Record<string, Record<string, unknown>>;
|
|
609
214
|
try {
|
|
610
215
|
sessionsData = JSON.parse(readFileSync(sessionsJsonPath, "utf-8"));
|
|
@@ -612,15 +217,13 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
612
217
|
api.logger.warn(`sinain-hud: overflow reset aborted — cannot parse sessions.json: ${err}`);
|
|
613
218
|
return false;
|
|
614
219
|
}
|
|
615
|
-
|
|
616
220
|
const session = sessionsData[targetSessionKey];
|
|
617
221
|
const transcriptPath = session?.sessionFile as string | undefined;
|
|
618
222
|
if (!transcriptPath || !existsSync(transcriptPath)) {
|
|
619
223
|
api.logger.warn(`sinain-hud: overflow reset aborted — transcript not found: ${transcriptPath}`);
|
|
620
224
|
return false;
|
|
621
225
|
}
|
|
622
|
-
|
|
623
|
-
// Guard: only reset if transcript is actually large
|
|
226
|
+
const OVERFLOW_TRANSCRIPT_MIN_BYTES = 1_000_000;
|
|
624
227
|
const size = statSync(transcriptPath).size;
|
|
625
228
|
if (size < OVERFLOW_TRANSCRIPT_MIN_BYTES) {
|
|
626
229
|
api.logger.info(
|
|
@@ -628,31 +231,59 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
628
231
|
);
|
|
629
232
|
return false;
|
|
630
233
|
}
|
|
631
|
-
|
|
632
|
-
// Archive → truncate → reset metadata
|
|
633
234
|
const archivePath = transcriptPath.replace(/\.jsonl$/, `.archived.${Date.now()}.jsonl`);
|
|
634
|
-
try {
|
|
635
|
-
copyFileSync(transcriptPath, archivePath);
|
|
636
|
-
} catch (err) {
|
|
235
|
+
try { copyFileSync(transcriptPath, archivePath); } catch (err) {
|
|
637
236
|
api.logger.warn(`sinain-hud: overflow reset aborted — archive failed: ${err}`);
|
|
638
237
|
return false;
|
|
639
238
|
}
|
|
640
|
-
|
|
641
239
|
writeFileSync(transcriptPath, "", "utf-8");
|
|
642
|
-
|
|
643
240
|
try {
|
|
644
241
|
session.contextTokens = 0;
|
|
645
242
|
writeFileSync(sessionsJsonPath, JSON.stringify(sessionsData, null, 2), "utf-8");
|
|
646
|
-
} catch {
|
|
647
|
-
// Non-fatal — gateway recomputes tokens from transcript content
|
|
648
|
-
}
|
|
649
|
-
|
|
243
|
+
} catch {}
|
|
650
244
|
api.logger.info(
|
|
651
245
|
`sinain-hud: === OVERFLOW RESET === Transcript truncated (was ${Math.round(size / 1024)}KB). Archive: ${archivePath}`,
|
|
652
246
|
);
|
|
653
247
|
return true;
|
|
654
248
|
}
|
|
655
249
|
|
|
250
|
+
function getStateDir(): string | null {
|
|
251
|
+
if (!lastWorkspaceDir) return null;
|
|
252
|
+
return dirname(lastWorkspaceDir);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const resilienceBackend: ResilienceBackend = {
|
|
256
|
+
getTranscriptSize,
|
|
257
|
+
performOverflowReset,
|
|
258
|
+
async sendAlert(alertType: string, title: string, body: string): Promise<void> {
|
|
259
|
+
const sd = getStateDir();
|
|
260
|
+
if (sd) sendTelegramAlert(alertType, title, body, sd);
|
|
261
|
+
},
|
|
262
|
+
};
|
|
263
|
+
|
|
264
|
+
// CurationEngine + HealthWatchdog
|
|
265
|
+
const scriptRunner = (args: string[], opts: { timeoutMs: number; cwd: string }) =>
|
|
266
|
+
api.runtime.system.runCommandWithTimeout(args, opts);
|
|
267
|
+
const engine = new CurationEngine(store, scriptRunner, resilience, { userTimezone: cfg.userTimezone ?? "Europe/Berlin" }, api.logger);
|
|
268
|
+
if (cfg.snapshotRepoPath) {
|
|
269
|
+
engine.setGitSnapshotStore(new GitSnapshotStore(cfg.snapshotRepoPath, api.logger));
|
|
270
|
+
api.logger.info(`sinain-hud: git snapshot store configured at ${cfg.snapshotRepoPath}`);
|
|
271
|
+
}
|
|
272
|
+
const watchdog = new HealthWatchdog(resilience, resilienceBackend, api.logger);
|
|
273
|
+
|
|
274
|
+
function appendToContextCache(line: string): void {
|
|
275
|
+
if (!parentContextCache) return;
|
|
276
|
+
parentContextCache.contextText += "\n" + line;
|
|
277
|
+
parentContextCache.capturedAt = Date.now();
|
|
278
|
+
if (parentContextCache.contextText.length > PARENT_CONTEXT_MAX_CHARS) {
|
|
279
|
+
const excess = parentContextCache.contextText.length - PARENT_CONTEXT_MAX_CHARS;
|
|
280
|
+
const newStart = parentContextCache.contextText.indexOf("\n", excess);
|
|
281
|
+
parentContextCache.contextText = newStart >= 0
|
|
282
|
+
? parentContextCache.contextText.slice(newStart + 1)
|
|
283
|
+
: parentContextCache.contextText.slice(excess);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
656
287
|
api.logger.info("sinain-hud: plugin registered");
|
|
657
288
|
|
|
658
289
|
// ==========================================================================
|
|
@@ -669,13 +300,10 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
669
300
|
respond(false, null, { code: "not_ready", message: "workspace not initialized" });
|
|
670
301
|
return;
|
|
671
302
|
}
|
|
672
|
-
const situationPath = join(lastWorkspaceDir, "SITUATION.md");
|
|
673
|
-
const tmpPath = situationPath + ".rpc.tmp";
|
|
674
303
|
try {
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
api.logger.info(`sinain-hud: SITUATION.md updated via RPC (${content.length} chars)`);
|
|
304
|
+
store.writeSituation(content as string);
|
|
305
|
+
respond(true, { ok: true, bytes: (content as string).length });
|
|
306
|
+
api.logger.info(`sinain-hud: SITUATION.md updated via RPC (${(content as string).length} chars)`);
|
|
679
307
|
} catch (err: any) {
|
|
680
308
|
respond(false, null, { code: "write_error", message: err.message });
|
|
681
309
|
}
|
|
@@ -702,8 +330,9 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
702
330
|
const workspaceDir = ctx.workspaceDir;
|
|
703
331
|
if (!workspaceDir) return;
|
|
704
332
|
|
|
705
|
-
// Track workspace dir in session state and for curation timer
|
|
333
|
+
// Track workspace dir in session state, store, and for curation timer
|
|
706
334
|
lastWorkspaceDir = workspaceDir;
|
|
335
|
+
store.setWorkspaceDir(workspaceDir);
|
|
707
336
|
const sessionKey = ctx.sessionKey;
|
|
708
337
|
if (sessionKey) {
|
|
709
338
|
const state = sessionStates.get(sessionKey);
|
|
@@ -714,9 +343,8 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
714
343
|
|
|
715
344
|
const now = Date.now();
|
|
716
345
|
|
|
717
|
-
// ── Debounced file sync
|
|
718
|
-
|
|
719
|
-
if (fileSyncDue) {
|
|
346
|
+
// ── Debounced file sync ──────────────────────────────────────────────
|
|
347
|
+
if (resilience.isFileSyncDue()) {
|
|
720
348
|
const heartbeatSource = cfg.heartbeatPath
|
|
721
349
|
? api.resolvePath(cfg.heartbeatPath)
|
|
722
350
|
: undefined;
|
|
@@ -724,41 +352,39 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
724
352
|
? api.resolvePath(cfg.skillPath)
|
|
725
353
|
: undefined;
|
|
726
354
|
|
|
727
|
-
|
|
728
|
-
|
|
355
|
+
store.deployFile(heartbeatSource, "HEARTBEAT.md");
|
|
356
|
+
store.deployFile(skillSource, "SKILL.md");
|
|
729
357
|
|
|
730
358
|
const memorySource = cfg.memoryPath ? api.resolvePath(cfg.memoryPath) : undefined;
|
|
731
359
|
if (memorySource) {
|
|
732
|
-
|
|
360
|
+
store.deployDir(memorySource, "sinain-memory");
|
|
733
361
|
const gbPath = join(workspaceDir, "sinain-memory", "git_backup.sh");
|
|
734
362
|
if (existsSync(gbPath)) try { chmodSync(gbPath, 0o755); } catch {}
|
|
735
363
|
}
|
|
736
364
|
|
|
737
365
|
const modulesSource = cfg.modulesPath ? api.resolvePath(cfg.modulesPath) : undefined;
|
|
738
366
|
if (modulesSource && existsSync(modulesSource)) {
|
|
739
|
-
|
|
367
|
+
store.deployModules(modulesSource);
|
|
740
368
|
}
|
|
741
369
|
|
|
742
|
-
|
|
370
|
+
resilience.markFileSynced();
|
|
743
371
|
}
|
|
744
372
|
|
|
745
|
-
// ── Debounced playbook generation
|
|
746
|
-
|
|
747
|
-
if (playbookGenDue) {
|
|
373
|
+
// ── Debounced playbook generation ────────────────────────────────────
|
|
374
|
+
if (resilience.isPlaybookGenDue()) {
|
|
748
375
|
const modulesSource = cfg.modulesPath ? api.resolvePath(cfg.modulesPath) : undefined;
|
|
749
376
|
if (modulesSource && existsSync(modulesSource)) {
|
|
750
|
-
generateEffectivePlaybook(
|
|
751
|
-
|
|
377
|
+
store.generateEffectivePlaybook();
|
|
378
|
+
resilience.markPlaybookGenerated();
|
|
752
379
|
}
|
|
753
380
|
}
|
|
754
381
|
|
|
755
382
|
// ── Fire-and-forget: ingest active module patterns into triple store
|
|
756
383
|
try {
|
|
757
|
-
const
|
|
758
|
-
if (
|
|
759
|
-
const
|
|
760
|
-
|
|
761
|
-
if ((entry as Record<string, unknown>).status === "active") {
|
|
384
|
+
const registry = store.readModuleRegistry();
|
|
385
|
+
if (registry) {
|
|
386
|
+
for (const [id, entry] of Object.entries(registry.modules)) {
|
|
387
|
+
if (entry.status === "active") {
|
|
762
388
|
api.runtime.system.runCommandWithTimeout(
|
|
763
389
|
["uv", "run", "--with", "requests", "python3",
|
|
764
390
|
"sinain-memory/triple_ingest.py",
|
|
@@ -773,21 +399,12 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
773
399
|
} catch {}
|
|
774
400
|
|
|
775
401
|
// ── Memory dirs — always run (cheap, idempotent) ────────────────────
|
|
776
|
-
|
|
777
|
-
"memory/eval-logs", "memory/eval-reports"]) {
|
|
778
|
-
const fullPath = join(workspaceDir, dir);
|
|
779
|
-
if (!existsSync(fullPath)) {
|
|
780
|
-
mkdirSync(fullPath, { recursive: true });
|
|
781
|
-
}
|
|
782
|
-
// Ensure directory is writable even if created by another process (e.g. root)
|
|
783
|
-
try { chmodSync(fullPath, 0o755); } catch {}
|
|
784
|
-
}
|
|
402
|
+
store.ensureMemoryDirs();
|
|
785
403
|
|
|
786
404
|
// ── Context capture + subagent injection ────────────────────────────
|
|
787
405
|
const isSubagent = sessionKey ? isSubagentSession(sessionKey) : false;
|
|
788
406
|
|
|
789
407
|
if (!isSubagent) {
|
|
790
|
-
// Main session: capture recent conversation context for future subagents
|
|
791
408
|
const messages = (event as Record<string, unknown>).messages as unknown[] | undefined;
|
|
792
409
|
const prompt = (event as Record<string, unknown>).prompt as string | undefined;
|
|
793
410
|
if (messages && Array.isArray(messages) && messages.length > 0) {
|
|
@@ -805,110 +422,15 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
805
422
|
}
|
|
806
423
|
}
|
|
807
424
|
|
|
808
|
-
// ──
|
|
809
|
-
const contextParts
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
year: "numeric",
|
|
817
|
-
month: "long",
|
|
818
|
-
day: "numeric",
|
|
819
|
-
hour: "2-digit",
|
|
820
|
-
minute: "2-digit",
|
|
821
|
-
hour12: false,
|
|
425
|
+
// ── Context assembly (delegated to CurationEngine) ────────────────
|
|
426
|
+
const contextParts = await engine.assembleContext({
|
|
427
|
+
isSubagent,
|
|
428
|
+
parentContextText: parentContextCache?.contextText ?? null,
|
|
429
|
+
parentContextAgeMs: parentContextCache ? now - parentContextCache.capturedAt : undefined,
|
|
430
|
+
parentContextTtlMs: PARENT_CONTEXT_TTL_MS,
|
|
431
|
+
heartbeatConfigured: !!cfg.heartbeatPath,
|
|
432
|
+
heartbeatTargetExists: existsSync(join(workspaceDir, "HEARTBEAT.md")),
|
|
822
433
|
});
|
|
823
|
-
contextParts.push(`[CURRENT TIME] ${nowLocal} (${userTz})`);
|
|
824
|
-
|
|
825
|
-
// Recovery context injection after outage
|
|
826
|
-
if (outageStartTs > 0 && !outageDetected && lastSuccessTs > outageStartTs) {
|
|
827
|
-
const outageDurationMin = Math.round((lastSuccessTs - outageStartTs) / 60_000);
|
|
828
|
-
outageStartTs = 0; // one-shot: only inject once
|
|
829
|
-
api.logger.info(`sinain-hud: injecting recovery context (outage lasted ~${outageDurationMin}min)`);
|
|
830
|
-
contextParts.push(
|
|
831
|
-
`[SYSTEM] The upstream API was unavailable for ~${outageDurationMin} minutes. ` +
|
|
832
|
-
`Multiple queued messages may have accumulated. Prioritize the current task, skip catch-up on stale items, and keep responses concise.`,
|
|
833
|
-
);
|
|
834
|
-
}
|
|
835
|
-
|
|
836
|
-
// Subagent: inject cached parent context
|
|
837
|
-
if (isSubagent && parentContextCache) {
|
|
838
|
-
const cacheAgeMs = now - parentContextCache.capturedAt;
|
|
839
|
-
if (cacheAgeMs < PARENT_CONTEXT_TTL_MS) {
|
|
840
|
-
const cacheAgeSec = Math.round(cacheAgeMs / 1000);
|
|
841
|
-
api.logger.info(
|
|
842
|
-
`sinain-hud: injected parent context for subagent (${parentContextCache.contextText.length} chars, ${cacheAgeSec}s old)`,
|
|
843
|
-
);
|
|
844
|
-
contextParts.push(
|
|
845
|
-
`[PARENT SESSION CONTEXT] The following is a summary of the recent conversation from the parent session that spawned you. Use it to understand references to code, files, or decisions discussed earlier:\n\n${parentContextCache.contextText}`,
|
|
846
|
-
);
|
|
847
|
-
} else {
|
|
848
|
-
api.logger.info(
|
|
849
|
-
`sinain-hud: skipped stale parent context for subagent (${Math.round(cacheAgeMs / 1000)}s old, TTL=${PARENT_CONTEXT_TTL_MS / 1000}s)`,
|
|
850
|
-
);
|
|
851
|
-
}
|
|
852
|
-
}
|
|
853
|
-
|
|
854
|
-
// Heartbeat enforcement (replaces fork's system-prompt.ts logic)
|
|
855
|
-
if (cfg.heartbeatPath) {
|
|
856
|
-
const hbTarget = join(workspaceDir, "HEARTBEAT.md");
|
|
857
|
-
if (existsSync(hbTarget)) {
|
|
858
|
-
contextParts.push(
|
|
859
|
-
"[HEARTBEAT PROTOCOL] HEARTBEAT.md is loaded in your project context. " +
|
|
860
|
-
"On every heartbeat poll, you MUST execute the full protocol defined in " +
|
|
861
|
-
"HEARTBEAT.md — all phases, all steps, in order. " +
|
|
862
|
-
"Only reply HEARTBEAT_OK if HEARTBEAT.md explicitly permits it " +
|
|
863
|
-
"after you have completed all mandatory steps."
|
|
864
|
-
);
|
|
865
|
-
}
|
|
866
|
-
}
|
|
867
|
-
|
|
868
|
-
// SITUATION.md bootstrap (replaces fork's workspace.ts logic)
|
|
869
|
-
const situationPath = join(workspaceDir, "SITUATION.md");
|
|
870
|
-
if (existsSync(situationPath)) {
|
|
871
|
-
try {
|
|
872
|
-
const content = readFileSync(situationPath, "utf-8").trim();
|
|
873
|
-
if (content) contextParts.push(`[SITUATION]\n${content}`);
|
|
874
|
-
} catch {}
|
|
875
|
-
}
|
|
876
|
-
|
|
877
|
-
// Knowledge transfer attribution — if effective playbook contains imported modules
|
|
878
|
-
const effectivePlaybookPath = join(workspaceDir, "memory", "sinain-playbook-effective.md");
|
|
879
|
-
if (existsSync(effectivePlaybookPath)) {
|
|
880
|
-
try {
|
|
881
|
-
const effectiveContent = readFileSync(effectivePlaybookPath, "utf-8");
|
|
882
|
-
if (effectiveContent.includes("[Transferred knowledge:")) {
|
|
883
|
-
contextParts.push(
|
|
884
|
-
"[KNOWLEDGE TRANSFER] Some patterns in your playbook were transferred from " +
|
|
885
|
-
"another sinain instance. When surfacing these, briefly cite their origin."
|
|
886
|
-
);
|
|
887
|
-
}
|
|
888
|
-
} catch { /* skip if unreadable */ }
|
|
889
|
-
}
|
|
890
|
-
|
|
891
|
-
// Module guidance injection — behavioral instructions from active modules
|
|
892
|
-
const moduleGuidance = collectModuleGuidance(workspaceDir, api.logger);
|
|
893
|
-
if (moduleGuidance) contextParts.push(moduleGuidance);
|
|
894
|
-
|
|
895
|
-
// Synchronous: knowledge graph context (10s timeout, skipped on failure)
|
|
896
|
-
try {
|
|
897
|
-
const ragResult = await api.runtime.system.runCommandWithTimeout(
|
|
898
|
-
["uv", "run", "--with", "requests", "python3",
|
|
899
|
-
"sinain-memory/triple_query.py",
|
|
900
|
-
"--memory-dir", join(workspaceDir, "memory"),
|
|
901
|
-
"--context", "current session",
|
|
902
|
-
"--max-chars", "1500"],
|
|
903
|
-
{ timeoutMs: 10_000, cwd: workspaceDir },
|
|
904
|
-
);
|
|
905
|
-
if (ragResult.code === 0) {
|
|
906
|
-
const parsed = JSON.parse(ragResult.stdout.trim());
|
|
907
|
-
if (parsed.context && parsed.context.length > 50) {
|
|
908
|
-
contextParts.push(`[KNOWLEDGE GRAPH CONTEXT]\n${parsed.context}`);
|
|
909
|
-
}
|
|
910
|
-
}
|
|
911
|
-
} catch {}
|
|
912
434
|
|
|
913
435
|
if (contextParts.length > 0) {
|
|
914
436
|
return { prependContext: contextParts.join("\n\n") };
|
|
@@ -993,101 +515,22 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
993
515
|
const isSuccess = event.success === true;
|
|
994
516
|
const isShortFailure = !isSuccess && durationMs < SHORT_FAILURE_THRESHOLD_MS;
|
|
995
517
|
|
|
996
|
-
// ── Retry storm: track outcome
|
|
997
|
-
recentOutcomes.push({
|
|
518
|
+
// ── Retry storm: track outcome via ResilienceManager ────────────────
|
|
519
|
+
resilience.recentOutcomes.push({
|
|
998
520
|
ts: Date.now(),
|
|
999
521
|
success: isSuccess,
|
|
1000
522
|
error: isSuccess ? undefined : String(event.error ?? "unknown"),
|
|
1001
523
|
});
|
|
1002
524
|
|
|
1003
525
|
if (isSuccess) {
|
|
1004
|
-
|
|
1005
|
-
const outageDurationMs = outageStartTs > 0 ? Date.now() - outageStartTs : 0;
|
|
1006
|
-
consecutiveFailures = 0;
|
|
1007
|
-
outageDetected = false;
|
|
1008
|
-
lastSuccessTs = Date.now();
|
|
1009
|
-
if (wasOutage) {
|
|
1010
|
-
api.logger.info(
|
|
1011
|
-
`sinain-hud: OUTAGE RECOVERED — resumed after ${Math.round(outageDurationMs / 1000)}s`,
|
|
1012
|
-
);
|
|
1013
|
-
// outageStartTs is NOT reset here — before_agent_start uses it to
|
|
1014
|
-
// inject recovery context on the next run, then resets it itself.
|
|
1015
|
-
|
|
1016
|
-
// Send recovery alert via Telegram
|
|
1017
|
-
const sd = getStateDir();
|
|
1018
|
-
if (sd) {
|
|
1019
|
-
sendTelegramAlert("recovery", "✅ *sinain-hud* recovered",
|
|
1020
|
-
`• Gateway up, first run succeeded\n• Downtime: ~${Math.round(outageDurationMs / 60_000)}min`,
|
|
1021
|
-
sd);
|
|
1022
|
-
}
|
|
1023
|
-
}
|
|
526
|
+
resilience.recordSuccess(resilienceBackend, api.logger);
|
|
1024
527
|
} else if (isShortFailure) {
|
|
1025
|
-
|
|
1026
|
-
const { rate, total } = computeErrorRate();
|
|
1027
|
-
if (!outageDetected && total >= OUTAGE_MIN_SAMPLES && rate >= OUTAGE_ERROR_RATE_THRESHOLD) {
|
|
1028
|
-
outageDetected = true;
|
|
1029
|
-
outageStartTs = Date.now();
|
|
1030
|
-
api.logger.warn(
|
|
1031
|
-
`sinain-hud: OUTAGE DETECTED — ${Math.round(rate * 100)}% error rate over ${total} samples, ${consecutiveFailures} consecutive failures`,
|
|
1032
|
-
);
|
|
1033
|
-
const sd = getStateDir();
|
|
1034
|
-
if (sd) {
|
|
1035
|
-
sendTelegramAlert("outage", "🔴 *sinain-hud* OUTAGE DETECTED",
|
|
1036
|
-
`• ${Math.round(rate * 100)}% error rate over ${total} samples\n• ${consecutiveFailures} consecutive failures`,
|
|
1037
|
-
sd);
|
|
1038
|
-
}
|
|
1039
|
-
}
|
|
528
|
+
resilience.recordShortFailure(resilienceBackend, api.logger);
|
|
1040
529
|
}
|
|
1041
530
|
|
|
1042
531
|
// ── Context overflow watchdog ──────────────────────────────────────
|
|
1043
532
|
if (sessionKey === cfg.sessionKey) {
|
|
1044
|
-
|
|
1045
|
-
consecutiveOverflowErrors++;
|
|
1046
|
-
api.logger.warn(
|
|
1047
|
-
`sinain-hud: overflow watchdog — error #${consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`,
|
|
1048
|
-
);
|
|
1049
|
-
if (consecutiveOverflowErrors >= OVERFLOW_CONSECUTIVE_THRESHOLD) {
|
|
1050
|
-
api.logger.warn("sinain-hud: OVERFLOW THRESHOLD REACHED — attempting transcript reset");
|
|
1051
|
-
if (performOverflowReset()) {
|
|
1052
|
-
lastResetTs = Date.now();
|
|
1053
|
-
consecutiveOverflowErrors = 0;
|
|
1054
|
-
outageDetected = false;
|
|
1055
|
-
consecutiveFailures = 0;
|
|
1056
|
-
outageStartTs = 0;
|
|
1057
|
-
const sd = getStateDir();
|
|
1058
|
-
if (sd) {
|
|
1059
|
-
sendTelegramAlert("overflow_reset", "⚠️ *sinain-hud* overflow reset triggered",
|
|
1060
|
-
`• ${OVERFLOW_CONSECUTIVE_THRESHOLD} consecutive overflow errors\n• Transcript truncated`,
|
|
1061
|
-
sd);
|
|
1062
|
-
}
|
|
1063
|
-
}
|
|
1064
|
-
}
|
|
1065
|
-
} else if (isSuccess) {
|
|
1066
|
-
consecutiveOverflowErrors = 0;
|
|
1067
|
-
}
|
|
1068
|
-
|
|
1069
|
-
// Duration-gated overflow reset: long failure + overflow error pattern = stuck retry loop.
|
|
1070
|
-
// The core misclassifies "extra usage is required" as rate_limit → infinite retry.
|
|
1071
|
-
// After the run times out (>3min), we detect it and reset for the next cycle.
|
|
1072
|
-
const isLongFailure = !isSuccess && durationMs > LONG_FAILURE_THRESHOLD_MS;
|
|
1073
|
-
if (isLongFailure && OVERFLOW_ERROR_PATTERN.test(String(event.error ?? ""))) {
|
|
1074
|
-
api.logger.warn(
|
|
1075
|
-
`sinain-hud: long failure (${Math.round(durationMs / 1000)}s) with overflow error — immediate reset`,
|
|
1076
|
-
);
|
|
1077
|
-
if (performOverflowReset()) {
|
|
1078
|
-
lastResetTs = Date.now();
|
|
1079
|
-
consecutiveOverflowErrors = 0;
|
|
1080
|
-
outageDetected = false;
|
|
1081
|
-
consecutiveFailures = 0;
|
|
1082
|
-
outageStartTs = 0;
|
|
1083
|
-
const sd = getStateDir();
|
|
1084
|
-
if (sd) {
|
|
1085
|
-
sendTelegramAlert("overflow_reset", "⚠️ *sinain-hud* overflow reset (stuck retry)",
|
|
1086
|
-
`• ${Math.round(durationMs / 1000)}s failed run with overflow error\n• Transcript truncated, next heartbeat should recover`,
|
|
1087
|
-
sd);
|
|
1088
|
-
}
|
|
1089
|
-
}
|
|
1090
|
-
}
|
|
533
|
+
resilience.checkOverflow(isSuccess, event.error ? String(event.error) : undefined, durationMs, resilienceBackend, api.logger);
|
|
1091
534
|
}
|
|
1092
535
|
|
|
1093
536
|
// ── Count tool usage by name ────────────────────────────────────────
|
|
@@ -1097,14 +540,8 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
1097
540
|
}
|
|
1098
541
|
|
|
1099
542
|
// ── Write session summary (skip during outage — noise reduction) ───
|
|
1100
|
-
const skipSummary = outageDetected && isShortFailure;
|
|
543
|
+
const skipSummary = resilience.outageDetected && isShortFailure;
|
|
1101
544
|
if (state.workspaceDir && !skipSummary) {
|
|
1102
|
-
const summaryPath = join(
|
|
1103
|
-
state.workspaceDir,
|
|
1104
|
-
"memory",
|
|
1105
|
-
"session-summaries.jsonl",
|
|
1106
|
-
);
|
|
1107
|
-
|
|
1108
545
|
const summary = {
|
|
1109
546
|
ts: new Date().toISOString(),
|
|
1110
547
|
sessionKey,
|
|
@@ -1118,13 +555,7 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
1118
555
|
};
|
|
1119
556
|
|
|
1120
557
|
try {
|
|
1121
|
-
|
|
1122
|
-
if (!existsSync(dir)) {
|
|
1123
|
-
mkdirSync(dir, { recursive: true });
|
|
1124
|
-
}
|
|
1125
|
-
writeFileSync(summaryPath, JSON.stringify(summary) + "\n", {
|
|
1126
|
-
flag: "a",
|
|
1127
|
-
});
|
|
558
|
+
store.appendSessionSummary(summary);
|
|
1128
559
|
api.logger.info(
|
|
1129
560
|
`sinain-hud: session summary written (${toolCount} tools, ${Math.round(durationMs / 1000)}s)`,
|
|
1130
561
|
);
|
|
@@ -1149,23 +580,22 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
1149
580
|
|
|
1150
581
|
// ── Heartbeat compliance (exempt during outage) ─────────────────────
|
|
1151
582
|
if ((ctx as Record<string, unknown>).messageProvider === "heartbeat") {
|
|
1152
|
-
if (outageDetected && isShortFailure) {
|
|
1153
|
-
// Agent couldn't even process the prompt — don't count as a skip
|
|
583
|
+
if (resilience.outageDetected && isShortFailure) {
|
|
1154
584
|
api.logger.info(
|
|
1155
585
|
`sinain-hud: heartbeat compliance exempted (outage active, ${Math.round(durationMs / 1000)}s run)`,
|
|
1156
586
|
);
|
|
1157
587
|
} else if (!state.heartbeatToolCalled) {
|
|
1158
|
-
consecutiveHeartbeatSkips++;
|
|
588
|
+
resilience.consecutiveHeartbeatSkips++;
|
|
1159
589
|
api.logger.warn(
|
|
1160
|
-
`sinain-hud: heartbeat compliance violation — tool not called (consecutive: ${consecutiveHeartbeatSkips})`,
|
|
590
|
+
`sinain-hud: heartbeat compliance violation — tool not called (consecutive: ${resilience.consecutiveHeartbeatSkips})`,
|
|
1161
591
|
);
|
|
1162
|
-
if (consecutiveHeartbeatSkips >= 3) {
|
|
592
|
+
if (resilience.consecutiveHeartbeatSkips >= 3) {
|
|
1163
593
|
api.logger.warn(
|
|
1164
|
-
`sinain-hud: ESCALATION — ${consecutiveHeartbeatSkips} consecutive heartbeat skips`,
|
|
594
|
+
`sinain-hud: ESCALATION — ${resilience.consecutiveHeartbeatSkips} consecutive heartbeat skips`,
|
|
1165
595
|
);
|
|
1166
596
|
}
|
|
1167
597
|
} else {
|
|
1168
|
-
consecutiveHeartbeatSkips = 0;
|
|
598
|
+
resilience.consecutiveHeartbeatSkips = 0;
|
|
1169
599
|
}
|
|
1170
600
|
}
|
|
1171
601
|
|
|
@@ -1230,21 +660,10 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
1230
660
|
|
|
1231
661
|
api.on("gateway_start", async () => {
|
|
1232
662
|
sessionStates.clear();
|
|
1233
|
-
|
|
1234
|
-
recentOutcomes.length = 0;
|
|
1235
|
-
lastSuccessTs = 0;
|
|
1236
|
-
lastPlaybookGenTs = 0;
|
|
1237
|
-
lastFileSyncTs = 0;
|
|
1238
|
-
outageDetected = false;
|
|
1239
|
-
consecutiveFailures = 0;
|
|
1240
|
-
outageStartTs = 0;
|
|
1241
|
-
consecutiveHeartbeatSkips = 0;
|
|
1242
|
-
consecutiveOverflowErrors = 0;
|
|
663
|
+
resilience.resetAll();
|
|
1243
664
|
parentContextCache = null;
|
|
1244
|
-
// Reset watchdog alert state
|
|
1245
|
-
lastResetTs = 0;
|
|
1246
665
|
_alertCooldowns.clear();
|
|
1247
|
-
_cachedBotToken = undefined;
|
|
666
|
+
_cachedBotToken = undefined;
|
|
1248
667
|
_alertMissingConfigLogged = false;
|
|
1249
668
|
api.logger.info("sinain-hud: gateway started, session + resilience + watchdog tracking reset");
|
|
1250
669
|
});
|
|
@@ -1286,13 +705,13 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
1286
705
|
}
|
|
1287
706
|
|
|
1288
707
|
// Resilience info
|
|
1289
|
-
const { rate, total, failures } = computeErrorRate();
|
|
708
|
+
const { rate, total, failures } = resilience.computeErrorRate();
|
|
1290
709
|
lines.push("\n**Resilience**");
|
|
1291
|
-
lines.push(`- Outage: ${outageDetected ? `ACTIVE (${Math.round((Date.now() - outageStartTs) / 1000)}s, ${consecutiveFailures} consecutive failures)` : "clear"}`);
|
|
710
|
+
lines.push(`- Outage: ${resilience.outageDetected ? `ACTIVE (${Math.round((Date.now() - resilience.outageStartTs) / 1000)}s, ${resilience.consecutiveFailures} consecutive failures)` : "clear"}`);
|
|
1292
711
|
lines.push(`- Error rate: ${Math.round(rate * 100)}% (${failures}/${total} in ${ERROR_WINDOW_MS / 60_000}min window)`);
|
|
1293
|
-
lines.push(`- Last success: ${lastSuccessTs > 0 ? `${Math.round((Date.now() - lastSuccessTs) / 1000)}s ago` : "never"}`);
|
|
1294
|
-
lines.push(`- Heartbeat skips: ${consecutiveHeartbeatSkips}`);
|
|
1295
|
-
lines.push(`- Overflow watchdog: ${consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`);
|
|
712
|
+
lines.push(`- Last success: ${resilience.lastSuccessTs > 0 ? `${Math.round((Date.now() - resilience.lastSuccessTs) / 1000)}s ago` : "never"}`);
|
|
713
|
+
lines.push(`- Heartbeat skips: ${resilience.consecutiveHeartbeatSkips}`);
|
|
714
|
+
lines.push(`- Overflow watchdog: ${resilience.consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`);
|
|
1296
715
|
lines.push(`- Parent context cache: ${parentContextCache ? `${parentContextCache.contextText.length} chars, ${Math.round((Date.now() - parentContextCache.capturedAt) / 1000)}s old` : "empty"}`);
|
|
1297
716
|
|
|
1298
717
|
return { text: lines.join("\n") };
|
|
@@ -1316,18 +735,11 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
1316
735
|
return { text: "No workspace directory available (no active session)." };
|
|
1317
736
|
}
|
|
1318
737
|
|
|
1319
|
-
const
|
|
1320
|
-
if (!
|
|
738
|
+
const registry = store.readModuleRegistry();
|
|
739
|
+
if (!registry) {
|
|
1321
740
|
return { text: "Module system not initialized (no module-registry.json found)." };
|
|
1322
741
|
}
|
|
1323
742
|
|
|
1324
|
-
let registry: ModuleRegistry;
|
|
1325
|
-
try {
|
|
1326
|
-
registry = JSON.parse(readFileSync(registryPath, "utf-8")) as ModuleRegistry;
|
|
1327
|
-
} catch {
|
|
1328
|
-
return { text: "Failed to parse module-registry.json." };
|
|
1329
|
-
}
|
|
1330
|
-
|
|
1331
743
|
const active: Array<{ id: string; priority: number; locked: boolean }> = [];
|
|
1332
744
|
const suspended: string[] = [];
|
|
1333
745
|
const disabled: string[] = [];
|
|
@@ -1383,53 +795,27 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
1383
795
|
return { text: "No workspace directory available (no active session)." };
|
|
1384
796
|
}
|
|
1385
797
|
|
|
1386
|
-
const reportsDir = join(workspaceDir, "memory", "eval-reports");
|
|
1387
|
-
const logsDir = join(workspaceDir, "memory", "eval-logs");
|
|
1388
798
|
const lines: string[] = ["**Evaluation Report**\n"];
|
|
1389
799
|
|
|
1390
800
|
// Find latest report
|
|
1391
|
-
|
|
1392
|
-
if (
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
.sort()
|
|
1396
|
-
.reverse();
|
|
1397
|
-
if (reports.length > 0) {
|
|
1398
|
-
try {
|
|
1399
|
-
latestReport = readFileSync(join(reportsDir, reports[0]), "utf-8");
|
|
1400
|
-
lines.push(latestReport.trim());
|
|
1401
|
-
} catch {
|
|
1402
|
-
lines.push("Failed to read latest report.");
|
|
1403
|
-
}
|
|
1404
|
-
}
|
|
1405
|
-
}
|
|
1406
|
-
|
|
1407
|
-
if (!latestReport) {
|
|
801
|
+
const latestReport = store.readLatestEvalReport();
|
|
802
|
+
if (latestReport) {
|
|
803
|
+
lines.push(latestReport.trim());
|
|
804
|
+
} else {
|
|
1408
805
|
lines.push("No eval reports generated yet.\n");
|
|
1409
806
|
}
|
|
1410
807
|
|
|
1411
808
|
// Show latest eval-log entries
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
.reverse();
|
|
1417
|
-
if (logFiles.length > 0) {
|
|
809
|
+
const recentLogs = store.readRecentEvalLogs(5);
|
|
810
|
+
if (recentLogs.length > 0) {
|
|
811
|
+
lines.push("\n**Recent Tick Evaluations** (last 5):");
|
|
812
|
+
for (const line of recentLogs) {
|
|
1418
813
|
try {
|
|
1419
|
-
const
|
|
1420
|
-
const
|
|
1421
|
-
lines.push(
|
|
1422
|
-
for (const line of entries) {
|
|
1423
|
-
try {
|
|
1424
|
-
const e = JSON.parse(line) as Record<string, unknown>;
|
|
1425
|
-
const judges = e.judges ? ` judgeAvg=${e.judgeAvg ?? "?"}` : "";
|
|
1426
|
-
lines.push(` ${e.tickTs} — passRate=${e.passRate}${judges}`);
|
|
1427
|
-
} catch {
|
|
1428
|
-
// skip malformed line
|
|
1429
|
-
}
|
|
1430
|
-
}
|
|
814
|
+
const e = JSON.parse(line) as Record<string, unknown>;
|
|
815
|
+
const judges = e.judges ? ` judgeAvg=${e.judgeAvg ?? "?"}` : "";
|
|
816
|
+
lines.push(` ${e.tickTs} — passRate=${e.passRate}${judges}`);
|
|
1431
817
|
} catch {
|
|
1432
|
-
// skip
|
|
818
|
+
// skip malformed line
|
|
1433
819
|
}
|
|
1434
820
|
}
|
|
1435
821
|
}
|
|
@@ -1484,15 +870,15 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
1484
870
|
name: "sinain_health",
|
|
1485
871
|
description: "Run health watchdog checks on-demand and show results",
|
|
1486
872
|
handler: () => {
|
|
1487
|
-
const checks =
|
|
873
|
+
const checks = watchdog.runChecks();
|
|
1488
874
|
const lines: string[] = ["**Health Watchdog Report**\n"];
|
|
1489
875
|
|
|
1490
876
|
lines.push(`Transcript: ${checks.transcriptMB !== null ? `${checks.transcriptMB}MB` : "unknown"}`);
|
|
1491
|
-
lines.push(`Last success: ${checks.staleSec > 0 ? `${checks.staleSec}s ago` : lastSuccessTs > 0 ? "just now" : "never"}`);
|
|
877
|
+
lines.push(`Last success: ${checks.staleSec > 0 ? `${checks.staleSec}s ago` : resilience.lastSuccessTs > 0 ? "just now" : "never"}`);
|
|
1492
878
|
lines.push(`Error rate: ${Math.round(checks.errorRate * 100)}% (${checks.errorTotal} samples)`);
|
|
1493
879
|
lines.push(`Overflow counter: ${checks.overflowCount}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`);
|
|
1494
|
-
lines.push(`Last reset: ${lastResetTs > 0 ? `${Math.round((Date.now() - lastResetTs) / 1000)}s ago` : "never"}`);
|
|
1495
|
-
lines.push(`Last auto-restart: ${lastAutoRestartTs > 0 ? `${Math.round((Date.now() - lastAutoRestartTs) / 1000)}s ago` : "never"}`);
|
|
880
|
+
lines.push(`Last reset: ${resilience.lastResetTs > 0 ? `${Math.round((Date.now() - resilience.lastResetTs) / 1000)}s ago` : "never"}`);
|
|
881
|
+
lines.push(`Last auto-restart: ${resilience.lastAutoRestartTs > 0 ? `${Math.round((Date.now() - resilience.lastAutoRestartTs) / 1000)}s ago` : "never"}`);
|
|
1496
882
|
lines.push(`Alerts configured: ${process.env.SINAIN_ALERT_CHAT_ID ? "yes" : "no (SINAIN_ALERT_CHAT_ID not set)"}`);
|
|
1497
883
|
|
|
1498
884
|
if (checks.issues.length > 0) {
|
|
@@ -1541,155 +927,8 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
1541
927
|
_toolCallId: string,
|
|
1542
928
|
params: { sessionSummary: string; idle: boolean },
|
|
1543
929
|
) {
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
gitBackup: null,
|
|
1547
|
-
signals: [],
|
|
1548
|
-
recommendedAction: { action: "skip", task: null, confidence: 0 },
|
|
1549
|
-
output: null,
|
|
1550
|
-
skipped: false,
|
|
1551
|
-
skipReason: null,
|
|
1552
|
-
logWritten: false,
|
|
1553
|
-
};
|
|
1554
|
-
|
|
1555
|
-
// Helper: run a python script and parse JSON stdout
|
|
1556
|
-
const runScript = async (
|
|
1557
|
-
args: string[],
|
|
1558
|
-
timeoutMs = 60_000,
|
|
1559
|
-
): Promise<Record<string, unknown> | null> => {
|
|
1560
|
-
try {
|
|
1561
|
-
const out = await api.runtime.system.runCommandWithTimeout(
|
|
1562
|
-
["uv", "run", "--with", "requests", "python3", ...args],
|
|
1563
|
-
{ timeoutMs, cwd: workspaceDir },
|
|
1564
|
-
);
|
|
1565
|
-
if (out.code !== 0) {
|
|
1566
|
-
api.logger.warn(
|
|
1567
|
-
`sinain-hud: heartbeat script failed: ${args[0]} (code ${out.code})\n${out.stderr}`,
|
|
1568
|
-
);
|
|
1569
|
-
return null;
|
|
1570
|
-
}
|
|
1571
|
-
return JSON.parse(out.stdout.trim());
|
|
1572
|
-
} catch (err) {
|
|
1573
|
-
api.logger.warn(
|
|
1574
|
-
`sinain-hud: heartbeat script error: ${args[0]}: ${String(err)}`,
|
|
1575
|
-
);
|
|
1576
|
-
return null;
|
|
1577
|
-
}
|
|
1578
|
-
};
|
|
1579
|
-
|
|
1580
|
-
// Latency tracking helper
|
|
1581
|
-
const latencyMs: Record<string, number> = {};
|
|
1582
|
-
const heartbeatStart = Date.now();
|
|
1583
|
-
|
|
1584
|
-
// 1. Git backup (30s timeout)
|
|
1585
|
-
try {
|
|
1586
|
-
const t0 = Date.now();
|
|
1587
|
-
const gitOut = await api.runtime.system.runCommandWithTimeout(
|
|
1588
|
-
["bash", "sinain-memory/git_backup.sh"],
|
|
1589
|
-
{ timeoutMs: 30_000, cwd: workspaceDir },
|
|
1590
|
-
);
|
|
1591
|
-
latencyMs.gitBackup = Date.now() - t0;
|
|
1592
|
-
result.gitBackup = gitOut.stdout.trim() || "nothing to commit";
|
|
1593
|
-
} catch (err) {
|
|
1594
|
-
api.logger.warn(`sinain-hud: git backup error: ${String(err)}`);
|
|
1595
|
-
result.gitBackup = `error: ${String(err)}`;
|
|
1596
|
-
}
|
|
1597
|
-
|
|
1598
|
-
// Current time string for memory scripts
|
|
1599
|
-
const hbTz = cfg.userTimezone ?? "Europe/Berlin";
|
|
1600
|
-
const currentTimeStr = new Date().toLocaleString("en-GB", {
|
|
1601
|
-
timeZone: hbTz, weekday: "long", year: "numeric", month: "long",
|
|
1602
|
-
day: "numeric", hour: "2-digit", minute: "2-digit", hour12: false,
|
|
1603
|
-
}) + ` (${hbTz})`;
|
|
1604
|
-
|
|
1605
|
-
// 2. Signal analysis (60s timeout)
|
|
1606
|
-
const signalArgs = [
|
|
1607
|
-
"sinain-memory/signal_analyzer.py",
|
|
1608
|
-
"--memory-dir", "memory/",
|
|
1609
|
-
"--session-summary", params.sessionSummary,
|
|
1610
|
-
"--current-time", currentTimeStr,
|
|
1611
|
-
];
|
|
1612
|
-
if (params.idle) signalArgs.push("--idle");
|
|
1613
|
-
|
|
1614
|
-
const signalT0 = Date.now();
|
|
1615
|
-
const signalResult = await runScript(signalArgs, 60_000);
|
|
1616
|
-
latencyMs.signalAnalysis = Date.now() - signalT0;
|
|
1617
|
-
if (signalResult) {
|
|
1618
|
-
result.signals = signalResult.signals ?? [];
|
|
1619
|
-
result.recommendedAction = signalResult.recommendedAction ?? {
|
|
1620
|
-
action: "skip",
|
|
1621
|
-
task: null,
|
|
1622
|
-
confidence: 0,
|
|
1623
|
-
};
|
|
1624
|
-
|
|
1625
|
-
// Fire-and-forget: ingest signal into triple store
|
|
1626
|
-
const tickTs = new Date().toISOString();
|
|
1627
|
-
runScript([
|
|
1628
|
-
"sinain-memory/triple_ingest.py",
|
|
1629
|
-
"--memory-dir", "memory/",
|
|
1630
|
-
"--tick-ts", tickTs,
|
|
1631
|
-
"--signal-result", JSON.stringify(signalResult),
|
|
1632
|
-
"--embed",
|
|
1633
|
-
], 15_000).catch(() => {});
|
|
1634
|
-
}
|
|
1635
|
-
|
|
1636
|
-
// 3. Insight synthesis (60s timeout)
|
|
1637
|
-
const synthArgs = [
|
|
1638
|
-
"sinain-memory/insight_synthesizer.py",
|
|
1639
|
-
"--memory-dir", "memory/",
|
|
1640
|
-
"--session-summary", params.sessionSummary,
|
|
1641
|
-
"--current-time", currentTimeStr,
|
|
1642
|
-
];
|
|
1643
|
-
if (params.idle) synthArgs.push("--idle");
|
|
1644
|
-
|
|
1645
|
-
const synthT0 = Date.now();
|
|
1646
|
-
const synthResult = await runScript(synthArgs, 60_000);
|
|
1647
|
-
latencyMs.insightSynthesis = Date.now() - synthT0;
|
|
1648
|
-
if (synthResult) {
|
|
1649
|
-
if (synthResult.skip === false) {
|
|
1650
|
-
result.output = {
|
|
1651
|
-
suggestion: synthResult.suggestion ?? null,
|
|
1652
|
-
insight: synthResult.insight ?? null,
|
|
1653
|
-
};
|
|
1654
|
-
} else {
|
|
1655
|
-
result.skipped = true;
|
|
1656
|
-
result.skipReason = synthResult.skipReason ?? "synthesizer skipped";
|
|
1657
|
-
}
|
|
1658
|
-
}
|
|
1659
|
-
|
|
1660
|
-
// 4. Write log entry to memory/playbook-logs/YYYY-MM-DD.jsonl
|
|
1661
|
-
try {
|
|
1662
|
-
const now = new Date();
|
|
1663
|
-
const dateStr = now.toISOString().slice(0, 10);
|
|
1664
|
-
const logDir = join(workspaceDir, "memory", "playbook-logs");
|
|
1665
|
-
if (!existsSync(logDir)) mkdirSync(logDir, { recursive: true });
|
|
1666
|
-
|
|
1667
|
-
const totalLatencyMs = Date.now() - heartbeatStart;
|
|
1668
|
-
const logEntry = {
|
|
1669
|
-
ts: now.toISOString(),
|
|
1670
|
-
idle: params.idle,
|
|
1671
|
-
sessionHistorySummary: params.sessionSummary,
|
|
1672
|
-
signals: result.signals,
|
|
1673
|
-
recommendedAction: result.recommendedAction,
|
|
1674
|
-
output: result.output,
|
|
1675
|
-
skipped: result.skipped,
|
|
1676
|
-
skipReason: result.skipReason,
|
|
1677
|
-
gitBackup: result.gitBackup,
|
|
1678
|
-
latencyMs,
|
|
1679
|
-
totalLatencyMs,
|
|
1680
|
-
};
|
|
1681
|
-
|
|
1682
|
-
writeFileSync(
|
|
1683
|
-
join(logDir, `${dateStr}.jsonl`),
|
|
1684
|
-
JSON.stringify(logEntry) + "\n",
|
|
1685
|
-
{ flag: "a" },
|
|
1686
|
-
);
|
|
1687
|
-
result.logWritten = true;
|
|
1688
|
-
} catch (err) {
|
|
1689
|
-
api.logger.warn(
|
|
1690
|
-
`sinain-hud: failed to write heartbeat log: ${String(err)}`,
|
|
1691
|
-
);
|
|
1692
|
-
}
|
|
930
|
+
store.setWorkspaceDir(workspaceDir);
|
|
931
|
+
const result = await engine.executeHeartbeatTick(params);
|
|
1693
932
|
|
|
1694
933
|
return {
|
|
1695
934
|
content: [
|
|
@@ -1703,315 +942,6 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
1703
942
|
{ name: "sinain_heartbeat_tick" },
|
|
1704
943
|
);
|
|
1705
944
|
|
|
1706
|
-
// ==========================================================================
|
|
1707
|
-
// Effectiveness footer update
|
|
1708
|
-
// ==========================================================================
|
|
1709
|
-
|
|
1710
|
-
function updateEffectivenessFooter(
|
|
1711
|
-
workspaceDir: string,
|
|
1712
|
-
effectiveness: Record<string, unknown>,
|
|
1713
|
-
): void {
|
|
1714
|
-
const playbookPath = join(workspaceDir, "memory", "sinain-playbook.md");
|
|
1715
|
-
if (!existsSync(playbookPath)) return;
|
|
1716
|
-
let content = readFileSync(playbookPath, "utf-8");
|
|
1717
|
-
const today = new Date().toISOString().slice(0, 10);
|
|
1718
|
-
const newFooter = `<!-- effectiveness: outputs=${effectiveness.outputs ?? 0}, positive=${effectiveness.positive ?? 0}, negative=${effectiveness.negative ?? 0}, neutral=${effectiveness.neutral ?? 0}, rate=${effectiveness.rate ?? 0}, updated=${today} -->`;
|
|
1719
|
-
const footerRe = /<!--\s*effectiveness:[^>]+-->/;
|
|
1720
|
-
if (footerRe.test(content)) {
|
|
1721
|
-
content = content.replace(footerRe, newFooter);
|
|
1722
|
-
} else {
|
|
1723
|
-
content = content.trimEnd() + "\n\n" + newFooter + "\n";
|
|
1724
|
-
}
|
|
1725
|
-
writeFileSync(playbookPath, content, "utf-8");
|
|
1726
|
-
}
|
|
1727
|
-
|
|
1728
|
-
// ==========================================================================
|
|
1729
|
-
// Curation pipeline (runs on 30-min timer)
|
|
1730
|
-
// ==========================================================================
|
|
1731
|
-
|
|
1732
|
-
async function runCurationPipeline(workspaceDir: string): Promise<void> {
|
|
1733
|
-
const runScript = async (
|
|
1734
|
-
args: string[],
|
|
1735
|
-
timeoutMs = 90_000,
|
|
1736
|
-
): Promise<Record<string, unknown> | null> => {
|
|
1737
|
-
try {
|
|
1738
|
-
const result = await api.runtime.system.runCommandWithTimeout(
|
|
1739
|
-
["uv", "run", "--with", "requests", "python3", ...args],
|
|
1740
|
-
{ timeoutMs, cwd: workspaceDir },
|
|
1741
|
-
);
|
|
1742
|
-
if (result.code !== 0) {
|
|
1743
|
-
api.logger.warn(
|
|
1744
|
-
`sinain-hud: curation script failed: ${args[0]} (code ${result.code})\n${result.stderr}`,
|
|
1745
|
-
);
|
|
1746
|
-
return null;
|
|
1747
|
-
}
|
|
1748
|
-
return JSON.parse(result.stdout.trim());
|
|
1749
|
-
} catch (err) {
|
|
1750
|
-
api.logger.warn(
|
|
1751
|
-
`sinain-hud: curation script error: ${args[0]}: ${String(err)}`,
|
|
1752
|
-
);
|
|
1753
|
-
return null;
|
|
1754
|
-
}
|
|
1755
|
-
};
|
|
1756
|
-
|
|
1757
|
-
api.logger.info("sinain-hud: curation pipeline starting");
|
|
1758
|
-
const curationLatency: Record<string, number> = {};
|
|
1759
|
-
|
|
1760
|
-
// Step 1: Feedback analysis
|
|
1761
|
-
const feedbackT0 = Date.now();
|
|
1762
|
-
const feedback = await runScript([
|
|
1763
|
-
"sinain-memory/feedback_analyzer.py",
|
|
1764
|
-
"--memory-dir", "memory/",
|
|
1765
|
-
"--session-summary", "periodic curation (plugin timer)",
|
|
1766
|
-
]);
|
|
1767
|
-
curationLatency.feedback = Date.now() - feedbackT0;
|
|
1768
|
-
const directive = (feedback as Record<string, unknown> | null)?.curateDirective as string ?? "stability";
|
|
1769
|
-
|
|
1770
|
-
// Step 2: Memory mining (background task — mines unread daily files)
|
|
1771
|
-
const miningT0 = Date.now();
|
|
1772
|
-
const mining = await runScript([
|
|
1773
|
-
"sinain-memory/memory_miner.py",
|
|
1774
|
-
"--memory-dir", "memory/",
|
|
1775
|
-
]);
|
|
1776
|
-
curationLatency.mining = Date.now() - miningT0;
|
|
1777
|
-
const findings = mining?.findings ? JSON.stringify(mining.findings) : null;
|
|
1778
|
-
|
|
1779
|
-
// Fire-and-forget: ingest mining results into triple store
|
|
1780
|
-
if (mining) {
|
|
1781
|
-
runScript([
|
|
1782
|
-
"sinain-memory/triple_ingest.py",
|
|
1783
|
-
"--memory-dir", "memory/",
|
|
1784
|
-
"--ingest-mining", JSON.stringify(mining),
|
|
1785
|
-
"--embed",
|
|
1786
|
-
], 15_000).catch(() => {});
|
|
1787
|
-
}
|
|
1788
|
-
|
|
1789
|
-
// Step 3: Playbook curation
|
|
1790
|
-
const curatorArgs = [
|
|
1791
|
-
"sinain-memory/playbook_curator.py",
|
|
1792
|
-
"--memory-dir", "memory/",
|
|
1793
|
-
"--session-summary", "periodic curation (plugin timer)",
|
|
1794
|
-
"--curate-directive", directive,
|
|
1795
|
-
];
|
|
1796
|
-
if (findings) {
|
|
1797
|
-
curatorArgs.push("--mining-findings", findings);
|
|
1798
|
-
}
|
|
1799
|
-
const curatorT0 = Date.now();
|
|
1800
|
-
const curator = await runScript(curatorArgs);
|
|
1801
|
-
curationLatency.curation = Date.now() - curatorT0;
|
|
1802
|
-
|
|
1803
|
-
// Fire-and-forget: ingest playbook patterns into triple store
|
|
1804
|
-
runScript([
|
|
1805
|
-
"sinain-memory/triple_ingest.py",
|
|
1806
|
-
"--memory-dir", "memory/",
|
|
1807
|
-
"--ingest-playbook",
|
|
1808
|
-
"--embed",
|
|
1809
|
-
], 15_000).catch(() => {});
|
|
1810
|
-
|
|
1811
|
-
// Step 4: Update effectiveness footer with fresh metrics
|
|
1812
|
-
const effectiveness = (feedback as Record<string, unknown> | null)?.effectiveness;
|
|
1813
|
-
if (effectiveness && typeof effectiveness === "object") {
|
|
1814
|
-
try {
|
|
1815
|
-
updateEffectivenessFooter(workspaceDir, effectiveness as Record<string, unknown>);
|
|
1816
|
-
} catch (err) {
|
|
1817
|
-
api.logger.warn(`sinain-hud: effectiveness footer update failed: ${String(err)}`);
|
|
1818
|
-
}
|
|
1819
|
-
}
|
|
1820
|
-
|
|
1821
|
-
// Step 5: Regenerate effective playbook after curation
|
|
1822
|
-
generateEffectivePlaybook(workspaceDir, api.logger);
|
|
1823
|
-
|
|
1824
|
-
// Step 6: Tick evaluation (runs mechanical + sampled judges)
|
|
1825
|
-
await runScript([
|
|
1826
|
-
"sinain-memory/tick_evaluator.py",
|
|
1827
|
-
"--memory-dir", "memory/",
|
|
1828
|
-
], 120_000);
|
|
1829
|
-
|
|
1830
|
-
// Step 7: Daily eval report (run once per day after 03:00 UTC)
|
|
1831
|
-
const nowUTC = new Date();
|
|
1832
|
-
const todayStr = nowUTC.toISOString().slice(0, 10);
|
|
1833
|
-
if (nowUTC.getUTCHours() >= 3 && lastEvalReportDate !== todayStr) {
|
|
1834
|
-
await runScript([
|
|
1835
|
-
"sinain-memory/eval_reporter.py",
|
|
1836
|
-
"--memory-dir", "memory/",
|
|
1837
|
-
], 120_000);
|
|
1838
|
-
lastEvalReportDate = todayStr;
|
|
1839
|
-
}
|
|
1840
|
-
|
|
1841
|
-
// Log result with curation latency
|
|
1842
|
-
const changes = (curator as Record<string, unknown> | null)?.changes ?? "unknown";
|
|
1843
|
-
api.logger.info(
|
|
1844
|
-
`sinain-hud: curation pipeline complete (directive=${directive}, changes=${JSON.stringify(changes)}, latency=${JSON.stringify(curationLatency)})`,
|
|
1845
|
-
);
|
|
1846
|
-
|
|
1847
|
-
// Write curation result to playbook-logs so eval_reporter can track churn
|
|
1848
|
-
if (curator) {
|
|
1849
|
-
try {
|
|
1850
|
-
const dateStr = new Date().toISOString().slice(0, 10);
|
|
1851
|
-
const logDir = join(workspaceDir, "memory", "playbook-logs");
|
|
1852
|
-
const curatorChanges = (curator as Record<string, unknown>).changes as Record<string, string[]> | undefined;
|
|
1853
|
-
const curationEntry = {
|
|
1854
|
-
_type: "curation",
|
|
1855
|
-
ts: new Date().toISOString(),
|
|
1856
|
-
directive,
|
|
1857
|
-
playbookChanges: {
|
|
1858
|
-
added: curatorChanges?.added ?? [],
|
|
1859
|
-
pruned: curatorChanges?.pruned ?? [],
|
|
1860
|
-
promoted: curatorChanges?.promoted ?? [],
|
|
1861
|
-
playbookLines: (curator as Record<string, unknown>).playbookLines ?? 0,
|
|
1862
|
-
},
|
|
1863
|
-
latencyMs: curationLatency,
|
|
1864
|
-
};
|
|
1865
|
-
writeFileSync(
|
|
1866
|
-
join(logDir, `${dateStr}.jsonl`),
|
|
1867
|
-
JSON.stringify(curationEntry) + "\n",
|
|
1868
|
-
{ flag: "a" },
|
|
1869
|
-
);
|
|
1870
|
-
} catch (err) {
|
|
1871
|
-
api.logger.warn(`sinain-hud: failed to write curation log entry: ${String(err)}`);
|
|
1872
|
-
}
|
|
1873
|
-
}
|
|
1874
|
-
}
|
|
1875
|
-
|
|
1876
|
-
// ==========================================================================
|
|
1877
|
-
// Health watchdog helpers
|
|
1878
|
-
// ==========================================================================
|
|
1879
|
-
|
|
1880
|
-
function getStateDir(): string | null {
|
|
1881
|
-
// State dir is the parent of the workspace dir (e.g. /home/node/.openclaw)
|
|
1882
|
-
if (!lastWorkspaceDir) return null;
|
|
1883
|
-
return dirname(lastWorkspaceDir);
|
|
1884
|
-
}
|
|
1885
|
-
|
|
1886
|
-
function getTranscriptSize(): { path: string; bytes: number } | null {
|
|
1887
|
-
const sessionsJsonPath = getSessionsJsonPath();
|
|
1888
|
-
if (!sessionsJsonPath || !cfg.sessionKey) return null;
|
|
1889
|
-
try {
|
|
1890
|
-
const sessionsData = JSON.parse(readFileSync(sessionsJsonPath, "utf-8"));
|
|
1891
|
-
const session = sessionsData[cfg.sessionKey];
|
|
1892
|
-
const transcriptPath = session?.sessionFile as string | undefined;
|
|
1893
|
-
if (!transcriptPath || !existsSync(transcriptPath)) return null;
|
|
1894
|
-
return { path: transcriptPath, bytes: statSync(transcriptPath).size };
|
|
1895
|
-
} catch {
|
|
1896
|
-
return null;
|
|
1897
|
-
}
|
|
1898
|
-
}
|
|
1899
|
-
|
|
1900
|
-
function runHealthChecks(): {
|
|
1901
|
-
transcriptMB: number | null;
|
|
1902
|
-
staleSec: number;
|
|
1903
|
-
errorRate: number;
|
|
1904
|
-
errorTotal: number;
|
|
1905
|
-
overflowCount: number;
|
|
1906
|
-
resetRecently: boolean;
|
|
1907
|
-
issues: string[];
|
|
1908
|
-
} {
|
|
1909
|
-
const transcript = getTranscriptSize();
|
|
1910
|
-
const transcriptMB = transcript ? +(transcript.bytes / 1_000_000).toFixed(2) : null;
|
|
1911
|
-
const staleSec = lastSuccessTs > 0 ? Math.round((Date.now() - lastSuccessTs) / 1000) : 0;
|
|
1912
|
-
const { rate, total } = computeErrorRate();
|
|
1913
|
-
const resetRecently = lastResetTs > 0 && (Date.now() - lastResetTs) < STALENESS_CRITICAL_MS * 2;
|
|
1914
|
-
|
|
1915
|
-
const issues: string[] = [];
|
|
1916
|
-
if (transcriptMB !== null && transcript!.bytes >= SESSION_SIZE_WARNING_BYTES) {
|
|
1917
|
-
issues.push(`transcript ${transcriptMB}MB (threshold ${(SESSION_SIZE_WARNING_BYTES / 1_000_000).toFixed(1)}MB)`);
|
|
1918
|
-
}
|
|
1919
|
-
if (lastSuccessTs > 0 && (Date.now() - lastSuccessTs) >= STALENESS_WARNING_MS && recentOutcomes.length >= 3) {
|
|
1920
|
-
issues.push(`stale ${staleSec}s since last success`);
|
|
1921
|
-
}
|
|
1922
|
-
if (total >= 5 && rate > 0.5) {
|
|
1923
|
-
issues.push(`error rate ${Math.round(rate * 100)}% (${total} samples)`);
|
|
1924
|
-
}
|
|
1925
|
-
if (consecutiveOverflowErrors >= 3) {
|
|
1926
|
-
issues.push(`overflow errors ${consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`);
|
|
1927
|
-
}
|
|
1928
|
-
if (resetRecently && lastSuccessTs > 0 && lastSuccessTs < lastResetTs) {
|
|
1929
|
-
issues.push("post-reset stall (no success since reset)");
|
|
1930
|
-
}
|
|
1931
|
-
|
|
1932
|
-
return { transcriptMB, staleSec, errorRate: rate, errorTotal: total, overflowCount: consecutiveOverflowErrors, resetRecently, issues };
|
|
1933
|
-
}
|
|
1934
|
-
|
|
1935
|
-
async function runHealthWatchdog(): Promise<void> {
|
|
1936
|
-
const stateDir = getStateDir();
|
|
1937
|
-
if (!stateDir) return;
|
|
1938
|
-
|
|
1939
|
-
const transcript = getTranscriptSize();
|
|
1940
|
-
const now = Date.now();
|
|
1941
|
-
|
|
1942
|
-
// ── Layer 1: Proactive session size check ────────────────────────────
|
|
1943
|
-
if (transcript && transcript.bytes >= SESSION_SIZE_WARNING_BYTES) {
|
|
1944
|
-
const sizeMB = (transcript.bytes / 1_000_000).toFixed(1);
|
|
1945
|
-
|
|
1946
|
-
if (transcript.bytes >= SESSION_SIZE_RESTART_BYTES) {
|
|
1947
|
-
// Critical — force reset
|
|
1948
|
-
api.logger.warn(`sinain-hud: watchdog — transcript ${sizeMB}MB, forcing overflow reset`);
|
|
1949
|
-
if (performOverflowReset()) {
|
|
1950
|
-
lastResetTs = now;
|
|
1951
|
-
consecutiveOverflowErrors = 0;
|
|
1952
|
-
sendTelegramAlert("proactive_reset", "⚠️ *sinain-hud* proactive session reset", `• Transcript was ${sizeMB}MB → truncated\n• No downtime expected`, stateDir);
|
|
1953
|
-
}
|
|
1954
|
-
} else {
|
|
1955
|
-
// Warning — proactive reset at 1.5MB
|
|
1956
|
-
api.logger.info(`sinain-hud: watchdog — transcript ${sizeMB}MB, proactive reset`);
|
|
1957
|
-
if (performOverflowReset()) {
|
|
1958
|
-
lastResetTs = now;
|
|
1959
|
-
consecutiveOverflowErrors = 0;
|
|
1960
|
-
sendTelegramAlert("proactive_reset", "⚠️ *sinain-hud* proactive session reset", `• Transcript was ${sizeMB}MB → truncated\n• No downtime expected`, stateDir);
|
|
1961
|
-
}
|
|
1962
|
-
}
|
|
1963
|
-
}
|
|
1964
|
-
|
|
1965
|
-
// ── Staleness check ──────────────────────────────────────────────────
|
|
1966
|
-
if (lastSuccessTs > 0 && recentOutcomes.length >= 3) {
|
|
1967
|
-
const staleMs = now - lastSuccessTs;
|
|
1968
|
-
|
|
1969
|
-
if (staleMs >= STALENESS_WARNING_MS && staleMs < STALENESS_CRITICAL_MS) {
|
|
1970
|
-
const staleMin = Math.round(staleMs / 60_000);
|
|
1971
|
-
sendTelegramAlert("staleness_warning", "⚠️ *sinain-hud* response stale",
|
|
1972
|
-
`• No successful run in ${staleMin}min\n• Error rate: ${Math.round(computeErrorRate().rate * 100)}%`,
|
|
1973
|
-
stateDir);
|
|
1974
|
-
}
|
|
1975
|
-
}
|
|
1976
|
-
|
|
1977
|
-
// ── Layer 2: Emergency restart — reset didn't recover ────────────────
|
|
1978
|
-
if (lastResetTs > 0 && lastSuccessTs > 0 && lastSuccessTs < lastResetTs) {
|
|
1979
|
-
const sinceResetMs = now - lastResetTs;
|
|
1980
|
-
if (sinceResetMs >= STALENESS_CRITICAL_MS) {
|
|
1981
|
-
// Reset was performed but no success since → queue is jammed
|
|
1982
|
-
const canRestart = (now - lastAutoRestartTs) >= AUTO_RESTART_COOLDOWN_MS;
|
|
1983
|
-
if (canRestart) {
|
|
1984
|
-
const staleMin = Math.round((now - lastSuccessTs) / 60_000);
|
|
1985
|
-
api.logger.warn(`sinain-hud: EMERGENCY RESTART — reset ${Math.round(sinceResetMs / 60_000)}min ago, no recovery`);
|
|
1986
|
-
// Send alert BEFORE exit so user sees it
|
|
1987
|
-
await sendTelegramAlert("emergency_restart", "🔴 *sinain-hud* EMERGENCY RESTART",
|
|
1988
|
-
`• Queue jammed — reset didn't recover in ${Math.round(sinceResetMs / 60_000)}min\n• Last success: ${staleMin}min ago\n• Gateway restarting now (~5s)`,
|
|
1989
|
-
stateDir);
|
|
1990
|
-
lastAutoRestartTs = now;
|
|
1991
|
-
// Give Telegram a moment to deliver
|
|
1992
|
-
await new Promise((r) => setTimeout(r, 1000));
|
|
1993
|
-
process.exit(1);
|
|
1994
|
-
} else {
|
|
1995
|
-
api.logger.warn("sinain-hud: watchdog — would restart but cooldown active (max 1/hour)");
|
|
1996
|
-
}
|
|
1997
|
-
}
|
|
1998
|
-
}
|
|
1999
|
-
|
|
2000
|
-
// ── Error rate alert ─────────────────────────────────────────────────
|
|
2001
|
-
const { rate, total } = computeErrorRate();
|
|
2002
|
-
if (total >= 5 && rate > 0.5) {
|
|
2003
|
-
sendTelegramAlert("high_error_rate", "⚠️ *sinain-hud* high error rate",
|
|
2004
|
-
`• ${Math.round(rate * 100)}% failures over ${total} samples\n• Consecutive overflow errors: ${consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`,
|
|
2005
|
-
stateDir);
|
|
2006
|
-
}
|
|
2007
|
-
|
|
2008
|
-
// ── Overflow approaching threshold ───────────────────────────────────
|
|
2009
|
-
if (consecutiveOverflowErrors >= 3 && consecutiveOverflowErrors < OVERFLOW_CONSECUTIVE_THRESHOLD) {
|
|
2010
|
-
sendTelegramAlert("overflow_warning", "⚠️ *sinain-hud* overflow errors accumulating",
|
|
2011
|
-
`• ${consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD} consecutive overflow errors\n• Auto-reset will trigger at ${OVERFLOW_CONSECUTIVE_THRESHOLD}`,
|
|
2012
|
-
stateDir);
|
|
2013
|
-
}
|
|
2014
|
-
}
|
|
2015
945
|
|
|
2016
946
|
// ==========================================================================
|
|
2017
947
|
// Service registration
|
|
@@ -2024,42 +954,24 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
2024
954
|
`sinain-hud: service started (heartbeat: ${cfg.heartbeatPath ?? "not configured"})`,
|
|
2025
955
|
);
|
|
2026
956
|
|
|
2027
|
-
// Start health watchdog — runs every 5 minutes
|
|
2028
|
-
|
|
2029
|
-
runHealthWatchdog().catch((err) => {
|
|
2030
|
-
api.logger.warn(`sinain-hud: watchdog error: ${String(err)}`);
|
|
2031
|
-
});
|
|
2032
|
-
}, WATCHDOG_INTERVAL_MS);
|
|
2033
|
-
api.logger.info("sinain-hud: health watchdog started (5-min interval)");
|
|
957
|
+
// Start health watchdog — runs every 5 minutes
|
|
958
|
+
watchdog.start();
|
|
2034
959
|
|
|
2035
960
|
// Start curation timer — runs every 30 minutes
|
|
2036
|
-
|
|
2037
|
-
// Skip curation during outage — scripts would work (OpenRouter) but
|
|
2038
|
-
// results are wasted when no agent runs succeed
|
|
2039
|
-
if (outageDetected) {
|
|
2040
|
-
api.logger.info("sinain-hud: curation skipped — outage active");
|
|
2041
|
-
return;
|
|
2042
|
-
}
|
|
2043
|
-
|
|
2044
|
-
// Find workspace dir from active sessions or last known
|
|
2045
|
-
let workspaceDir: string | undefined;
|
|
961
|
+
const resolveWorkspaceDir = (): string | null => {
|
|
2046
962
|
for (const state of sessionStates.values()) {
|
|
2047
|
-
if (state.workspaceDir)
|
|
2048
|
-
}
|
|
2049
|
-
workspaceDir ??= lastWorkspaceDir ?? undefined;
|
|
2050
|
-
if (!workspaceDir) {
|
|
2051
|
-
api.logger.info("sinain-hud: curation skipped — no workspace dir");
|
|
2052
|
-
return;
|
|
2053
|
-
}
|
|
2054
|
-
try {
|
|
2055
|
-
await runCurationPipeline(workspaceDir);
|
|
2056
|
-
} catch (err) {
|
|
2057
|
-
api.logger.warn(`sinain-hud: curation pipeline error: ${String(err)}`);
|
|
963
|
+
if (state.workspaceDir) return state.workspaceDir;
|
|
2058
964
|
}
|
|
965
|
+
return lastWorkspaceDir;
|
|
966
|
+
};
|
|
967
|
+
engine.startCurationTimer(
|
|
968
|
+
() => resilience.outageDetected,
|
|
969
|
+
resolveWorkspaceDir,
|
|
970
|
+
);
|
|
2059
971
|
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
972
|
+
// Proactive session hygiene on a 30-min curation cycle
|
|
973
|
+
// (piggybacks on the curation timer — checked after each pipeline run)
|
|
974
|
+
setInterval(() => {
|
|
2063
975
|
try {
|
|
2064
976
|
const sessionsJsonPath = getSessionsJsonPath();
|
|
2065
977
|
if (sessionsJsonPath && cfg.sessionKey) {
|
|
@@ -2073,13 +985,13 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
2073
985
|
const ageMs = Date.now() - createdAt;
|
|
2074
986
|
if (size > SESSION_HYGIENE_SIZE_BYTES || ageMs > SESSION_HYGIENE_AGE_MS) {
|
|
2075
987
|
api.logger.info(
|
|
2076
|
-
`sinain-hud: proactive session hygiene
|
|
988
|
+
`sinain-hud: proactive session hygiene — size=${Math.round(size / 1024)}KB, age=${Math.round(ageMs / 3600000)}h`,
|
|
2077
989
|
);
|
|
2078
990
|
if (performOverflowReset()) {
|
|
2079
|
-
consecutiveOverflowErrors = 0;
|
|
2080
|
-
outageDetected = false;
|
|
2081
|
-
consecutiveFailures = 0;
|
|
2082
|
-
outageStartTs = 0;
|
|
991
|
+
resilience.consecutiveOverflowErrors = 0;
|
|
992
|
+
resilience.outageDetected = false;
|
|
993
|
+
resilience.consecutiveFailures = 0;
|
|
994
|
+
resilience.outageStartTs = 0;
|
|
2083
995
|
}
|
|
2084
996
|
}
|
|
2085
997
|
}
|
|
@@ -2087,17 +999,11 @@ export default function sinainHudPlugin(api: OpenClawPluginApi): void {
|
|
|
2087
999
|
} catch (err) {
|
|
2088
1000
|
api.logger.warn(`sinain-hud: session hygiene check error: ${String(err)}`);
|
|
2089
1001
|
}
|
|
2090
|
-
}, 30 * 60 * 1000);
|
|
1002
|
+
}, 30 * 60 * 1000);
|
|
2091
1003
|
},
|
|
2092
1004
|
stop: () => {
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
curationInterval = null;
|
|
2096
|
-
}
|
|
2097
|
-
if (watchdogInterval) {
|
|
2098
|
-
clearInterval(watchdogInterval);
|
|
2099
|
-
watchdogInterval = null;
|
|
2100
|
-
}
|
|
1005
|
+
engine.stopCurationTimer();
|
|
1006
|
+
watchdog.stop();
|
|
2101
1007
|
api.logger.info("sinain-hud: service stopped");
|
|
2102
1008
|
sessionStates.clear();
|
|
2103
1009
|
},
|