@geravant/sinain 1.0.18 → 1.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,336 @@
1
+ /**
2
+ * sinain-knowledge — ResilienceManager + HealthWatchdog
3
+ *
4
+ * Manages retry storm detection, outage tracking, overflow watchdog,
5
+ * and proactive health monitoring. Decoupled from OpenClaw — communicates
6
+ * through the BackendAdapter interface for transcript access and alerts.
7
+ */
8
+
9
+ import type { Logger } from "../data/schema.js";
10
+
11
+ // ============================================================================
12
+ // Constants
13
+ // ============================================================================
14
+
15
+ export const ERROR_WINDOW_MS = 5 * 60_000;
16
+ export const OUTAGE_ERROR_RATE_THRESHOLD = 0.8;
17
+ export const OUTAGE_MIN_SAMPLES = 3;
18
+ export const FILE_SYNC_DEBOUNCE_MS = 3 * 60_000;
19
+ export const PLAYBOOK_GEN_DEBOUNCE_MS = 5 * 60_000;
20
+ export const SHORT_FAILURE_THRESHOLD_MS = 10_000;
21
+ export const LONG_FAILURE_THRESHOLD_MS = 3 * 60_000;
22
+
23
+ export const OVERFLOW_CONSECUTIVE_THRESHOLD = 5;
24
+ export const OVERFLOW_TRANSCRIPT_MIN_BYTES = 1_000_000;
25
+ export const OVERFLOW_ERROR_PATTERN = /overloaded|context.*too.*long|token.*limit|extra usage is required/i;
26
+
27
+ export const SESSION_HYGIENE_SIZE_BYTES = 2_000_000;
28
+ export const SESSION_HYGIENE_AGE_MS = 24 * 60 * 60 * 1000;
29
+
30
+ export const WATCHDOG_INTERVAL_MS = 5 * 60_000;
31
+ export const ALERT_COOLDOWN_MS = 15 * 60_000;
32
+ export const STALENESS_WARNING_MS = 10 * 60_000;
33
+ export const STALENESS_CRITICAL_MS = 15 * 60_000;
34
+ export const SESSION_SIZE_WARNING_BYTES = 1_500_000;
35
+ export const SESSION_SIZE_RESTART_BYTES = 2_000_000;
36
+ export const AUTO_RESTART_COOLDOWN_MS = 60 * 60_000;
37
+
38
+ // ============================================================================
39
+ // Types
40
+ // ============================================================================
41
+
42
+ export type ErrorRateResult = {
43
+ rate: number;
44
+ total: number;
45
+ failures: number;
46
+ };
47
+
48
+ export type HealthCheckResult = {
49
+ transcriptMB: number | null;
50
+ staleSec: number;
51
+ errorRate: number;
52
+ errorTotal: number;
53
+ overflowCount: number;
54
+ resetRecently: boolean;
55
+ issues: string[];
56
+ };
57
+
58
+ export interface TranscriptInfo {
59
+ path: string;
60
+ bytes: number;
61
+ }
62
+
63
+ /**
64
+ * Interface for backend operations needed by the resilience layer.
65
+ * Avoids direct coupling to OpenClaw or any specific backend.
66
+ */
67
+ export interface ResilienceBackend {
68
+ getTranscriptSize(): TranscriptInfo | null;
69
+ performOverflowReset(): boolean;
70
+ sendAlert(alertType: string, title: string, body: string): Promise<void>;
71
+ }
72
+
73
+ // ============================================================================
74
+ // ResilienceManager
75
+ // ============================================================================
76
+
77
+ export class ResilienceManager {
78
+ recentOutcomes: Array<{ ts: number; success: boolean; error?: string }> = [];
79
+ lastSuccessTs = 0;
80
+ consecutiveFailures = 0;
81
+ outageDetected = false;
82
+ outageStartTs = 0;
83
+ consecutiveOverflowErrors = 0;
84
+ consecutiveHeartbeatSkips = 0;
85
+ lastResetTs = 0;
86
+ lastAutoRestartTs = 0;
87
+
88
+ // Debounce timestamps
89
+ lastPlaybookGenTs = 0;
90
+ lastFileSyncTs = 0;
91
+ lastEvalReportDate: string | null = null;
92
+
93
+ computeErrorRate(): ErrorRateResult {
94
+ const cutoff = Date.now() - ERROR_WINDOW_MS;
95
+ while (this.recentOutcomes.length > 0 && this.recentOutcomes[0].ts < cutoff) {
96
+ this.recentOutcomes.shift();
97
+ }
98
+ const total = this.recentOutcomes.length;
99
+ if (total === 0) return { rate: 0, total: 0, failures: 0 };
100
+ const failures = this.recentOutcomes.filter((o) => !o.success).length;
101
+ return { rate: failures / total, total, failures };
102
+ }
103
+
104
+ recordSuccess(backend: ResilienceBackend, logger: Logger): void {
105
+ const wasOutage = this.outageDetected;
106
+ const outageDurationMs = this.outageStartTs > 0 ? Date.now() - this.outageStartTs : 0;
107
+ this.consecutiveFailures = 0;
108
+ this.outageDetected = false;
109
+ this.lastSuccessTs = Date.now();
110
+ if (wasOutage) {
111
+ logger.info(
112
+ `sinain-hud: OUTAGE RECOVERED — resumed after ${Math.round(outageDurationMs / 1000)}s`,
113
+ );
114
+ backend.sendAlert("recovery", "✅ *sinain-hud* recovered",
115
+ `• Gateway up, first run succeeded\n• Downtime: ~${Math.round(outageDurationMs / 60_000)}min`);
116
+ }
117
+ }
118
+
119
+ recordShortFailure(backend: ResilienceBackend, logger: Logger): void {
120
+ this.consecutiveFailures++;
121
+ const { rate, total } = this.computeErrorRate();
122
+ if (!this.outageDetected && total >= OUTAGE_MIN_SAMPLES && rate >= OUTAGE_ERROR_RATE_THRESHOLD) {
123
+ this.outageDetected = true;
124
+ this.outageStartTs = Date.now();
125
+ logger.warn(
126
+ `sinain-hud: OUTAGE DETECTED — ${Math.round(rate * 100)}% error rate over ${total} samples, ${this.consecutiveFailures} consecutive failures`,
127
+ );
128
+ backend.sendAlert("outage", "🔴 *sinain-hud* OUTAGE DETECTED",
129
+ `• ${Math.round(rate * 100)}% error rate over ${total} samples\n• ${this.consecutiveFailures} consecutive failures`);
130
+ }
131
+ }
132
+
133
+ checkOverflow(
134
+ isSuccess: boolean,
135
+ error: string | undefined,
136
+ durationMs: number,
137
+ backend: ResilienceBackend,
138
+ logger: Logger,
139
+ ): void {
140
+ if (!isSuccess && OVERFLOW_ERROR_PATTERN.test(error ?? "")) {
141
+ this.consecutiveOverflowErrors++;
142
+ logger.warn(
143
+ `sinain-hud: overflow watchdog — error #${this.consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`,
144
+ );
145
+ if (this.consecutiveOverflowErrors >= OVERFLOW_CONSECUTIVE_THRESHOLD) {
146
+ logger.warn("sinain-hud: OVERFLOW THRESHOLD REACHED — attempting transcript reset");
147
+ if (backend.performOverflowReset()) {
148
+ this._resetAfterOverflow(backend);
149
+ }
150
+ }
151
+ } else if (isSuccess) {
152
+ this.consecutiveOverflowErrors = 0;
153
+ }
154
+
155
+ // Duration-gated: long failure + overflow pattern = stuck retry loop
156
+ const isLongFailure = !isSuccess && durationMs > LONG_FAILURE_THRESHOLD_MS;
157
+ if (isLongFailure && OVERFLOW_ERROR_PATTERN.test(error ?? "")) {
158
+ logger.warn(
159
+ `sinain-hud: long failure (${Math.round(durationMs / 1000)}s) with overflow error — immediate reset`,
160
+ );
161
+ if (backend.performOverflowReset()) {
162
+ this._resetAfterOverflow(backend, `• ${Math.round(durationMs / 1000)}s failed run with overflow error\n• Transcript truncated, next heartbeat should recover`);
163
+ }
164
+ }
165
+ }
166
+
167
+ private _resetAfterOverflow(backend: ResilienceBackend, body?: string): void {
168
+ this.lastResetTs = Date.now();
169
+ this.consecutiveOverflowErrors = 0;
170
+ this.outageDetected = false;
171
+ this.consecutiveFailures = 0;
172
+ this.outageStartTs = 0;
173
+ backend.sendAlert(
174
+ "overflow_reset",
175
+ body ? "⚠️ *sinain-hud* overflow reset (stuck retry)" : "⚠️ *sinain-hud* overflow reset triggered",
176
+ body ?? `• ${OVERFLOW_CONSECUTIVE_THRESHOLD} consecutive overflow errors\n• Transcript truncated`,
177
+ );
178
+ }
179
+
180
+ isFileSyncDue(): boolean {
181
+ return this.lastFileSyncTs === 0 || (Date.now() - this.lastFileSyncTs) >= FILE_SYNC_DEBOUNCE_MS;
182
+ }
183
+
184
+ markFileSynced(): void {
185
+ this.lastFileSyncTs = Date.now();
186
+ }
187
+
188
+ isPlaybookGenDue(): boolean {
189
+ return this.lastPlaybookGenTs === 0 || (Date.now() - this.lastPlaybookGenTs) >= PLAYBOOK_GEN_DEBOUNCE_MS;
190
+ }
191
+
192
+ markPlaybookGenerated(): void {
193
+ this.lastPlaybookGenTs = Date.now();
194
+ }
195
+
196
+ resetAll(): void {
197
+ this.recentOutcomes.length = 0;
198
+ this.lastSuccessTs = 0;
199
+ this.lastPlaybookGenTs = 0;
200
+ this.lastFileSyncTs = 0;
201
+ this.outageDetected = false;
202
+ this.consecutiveFailures = 0;
203
+ this.outageStartTs = 0;
204
+ this.consecutiveHeartbeatSkips = 0;
205
+ this.consecutiveOverflowErrors = 0;
206
+ this.lastResetTs = 0;
207
+ this.lastAutoRestartTs = 0;
208
+ this.lastEvalReportDate = null;
209
+ }
210
+ }
211
+
212
+ // ============================================================================
213
+ // HealthWatchdog
214
+ // ============================================================================
215
+
216
+ export class HealthWatchdog {
217
+ private interval: ReturnType<typeof setInterval> | null = null;
218
+
219
+ constructor(
220
+ private resilience: ResilienceManager,
221
+ private backend: ResilienceBackend,
222
+ private logger: Logger,
223
+ ) {}
224
+
225
+ runChecks(): HealthCheckResult {
226
+ const transcript = this.backend.getTranscriptSize();
227
+ const transcriptMB = transcript ? +(transcript.bytes / 1_000_000).toFixed(2) : null;
228
+ const staleSec = this.resilience.lastSuccessTs > 0
229
+ ? Math.round((Date.now() - this.resilience.lastSuccessTs) / 1000)
230
+ : 0;
231
+ const { rate, total } = this.resilience.computeErrorRate();
232
+ const resetRecently = this.resilience.lastResetTs > 0
233
+ && (Date.now() - this.resilience.lastResetTs) < STALENESS_CRITICAL_MS * 2;
234
+
235
+ const issues: string[] = [];
236
+ if (transcriptMB !== null && transcript!.bytes >= SESSION_SIZE_WARNING_BYTES) {
237
+ issues.push(`transcript ${transcriptMB}MB (threshold ${(SESSION_SIZE_WARNING_BYTES / 1_000_000).toFixed(1)}MB)`);
238
+ }
239
+ if (this.resilience.lastSuccessTs > 0 && (Date.now() - this.resilience.lastSuccessTs) >= STALENESS_WARNING_MS && this.resilience.recentOutcomes.length >= 3) {
240
+ issues.push(`stale ${staleSec}s since last success`);
241
+ }
242
+ if (total >= 5 && rate > 0.5) {
243
+ issues.push(`error rate ${Math.round(rate * 100)}% (${total} samples)`);
244
+ }
245
+ if (this.resilience.consecutiveOverflowErrors >= 3) {
246
+ issues.push(`overflow errors ${this.resilience.consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`);
247
+ }
248
+ if (resetRecently && this.resilience.lastSuccessTs > 0 && this.resilience.lastSuccessTs < this.resilience.lastResetTs) {
249
+ issues.push("post-reset stall (no success since reset)");
250
+ }
251
+
252
+ return { transcriptMB, staleSec, errorRate: rate, errorTotal: total, overflowCount: this.resilience.consecutiveOverflowErrors, resetRecently, issues };
253
+ }
254
+
255
+ async runWatchdog(): Promise<void> {
256
+ const transcript = this.backend.getTranscriptSize();
257
+ const now = Date.now();
258
+
259
+ // Layer 1: Proactive session size check
260
+ if (transcript && transcript.bytes >= SESSION_SIZE_WARNING_BYTES) {
261
+ const sizeMB = (transcript.bytes / 1_000_000).toFixed(1);
262
+
263
+ if (transcript.bytes >= SESSION_SIZE_RESTART_BYTES) {
264
+ this.logger.warn(`sinain-hud: watchdog — transcript ${sizeMB}MB, forcing overflow reset`);
265
+ } else {
266
+ this.logger.info(`sinain-hud: watchdog — transcript ${sizeMB}MB, proactive reset`);
267
+ }
268
+
269
+ if (this.backend.performOverflowReset()) {
270
+ this.resilience.lastResetTs = now;
271
+ this.resilience.consecutiveOverflowErrors = 0;
272
+ this.backend.sendAlert("proactive_reset", "⚠️ *sinain-hud* proactive session reset",
273
+ `• Transcript was ${sizeMB}MB → truncated\n• No downtime expected`);
274
+ }
275
+ }
276
+
277
+ // Staleness check
278
+ if (this.resilience.lastSuccessTs > 0 && this.resilience.recentOutcomes.length >= 3) {
279
+ const staleMs = now - this.resilience.lastSuccessTs;
280
+
281
+ if (staleMs >= STALENESS_WARNING_MS && staleMs < STALENESS_CRITICAL_MS) {
282
+ const staleMin = Math.round(staleMs / 60_000);
283
+ this.backend.sendAlert("staleness_warning", "⚠️ *sinain-hud* response stale",
284
+ `• No successful run in ${staleMin}min\n• Error rate: ${Math.round(this.resilience.computeErrorRate().rate * 100)}%`);
285
+ }
286
+ }
287
+
288
+ // Layer 2: Emergency restart — reset didn't recover
289
+ if (this.resilience.lastResetTs > 0 && this.resilience.lastSuccessTs > 0 && this.resilience.lastSuccessTs < this.resilience.lastResetTs) {
290
+ const sinceResetMs = now - this.resilience.lastResetTs;
291
+ if (sinceResetMs >= STALENESS_CRITICAL_MS) {
292
+ const canRestart = (now - this.resilience.lastAutoRestartTs) >= AUTO_RESTART_COOLDOWN_MS;
293
+ if (canRestart) {
294
+ const staleMin = Math.round((now - this.resilience.lastSuccessTs) / 60_000);
295
+ this.logger.warn(`sinain-hud: EMERGENCY RESTART — reset ${Math.round(sinceResetMs / 60_000)}min ago, no recovery`);
296
+ await this.backend.sendAlert("emergency_restart", "🔴 *sinain-hud* EMERGENCY RESTART",
297
+ `• Queue jammed — reset didn't recover in ${Math.round(sinceResetMs / 60_000)}min\n• Last success: ${staleMin}min ago\n• Gateway restarting now (~5s)`);
298
+ this.resilience.lastAutoRestartTs = now;
299
+ await new Promise((r) => setTimeout(r, 1000));
300
+ process.exit(1);
301
+ } else {
302
+ this.logger.warn("sinain-hud: watchdog — would restart but cooldown active (max 1/hour)");
303
+ }
304
+ }
305
+ }
306
+
307
+ // Error rate alert
308
+ const { rate, total } = this.resilience.computeErrorRate();
309
+ if (total >= 5 && rate > 0.5) {
310
+ this.backend.sendAlert("high_error_rate", "⚠️ *sinain-hud* high error rate",
311
+ `• ${Math.round(rate * 100)}% failures over ${total} samples\n• Consecutive overflow errors: ${this.resilience.consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD}`);
312
+ }
313
+
314
+ // Overflow approaching threshold
315
+ if (this.resilience.consecutiveOverflowErrors >= 3 && this.resilience.consecutiveOverflowErrors < OVERFLOW_CONSECUTIVE_THRESHOLD) {
316
+ this.backend.sendAlert("overflow_warning", "⚠️ *sinain-hud* overflow errors accumulating",
317
+ `• ${this.resilience.consecutiveOverflowErrors}/${OVERFLOW_CONSECUTIVE_THRESHOLD} consecutive overflow errors\n• Auto-reset will trigger at ${OVERFLOW_CONSECUTIVE_THRESHOLD}`);
318
+ }
319
+ }
320
+
321
+ start(): void {
322
+ this.interval = setInterval(() => {
323
+ this.runWatchdog().catch((err) => {
324
+ this.logger.warn(`sinain-hud: watchdog error: ${String(err)}`);
325
+ });
326
+ }, WATCHDOG_INTERVAL_MS);
327
+ this.logger.info("sinain-hud: health watchdog started (5-min interval)");
328
+ }
329
+
330
+ stop(): void {
331
+ if (this.interval) {
332
+ clearInterval(this.interval);
333
+ this.interval = null;
334
+ }
335
+ }
336
+ }
@@ -0,0 +1,310 @@
1
+ /**
2
+ * sinain-knowledge — Git-backed snapshot store
3
+ *
4
+ * Manages a local git repository for periodic knowledge snapshots.
5
+ * Each save overwrites snapshot.json and commits — git history IS the version history.
6
+ *
7
+ * Default location: ~/.sinain/knowledge-snapshots/
8
+ */
9
+
10
+ import { execFileSync } from "node:child_process";
11
+ import { copyFileSync, existsSync, mkdirSync, statSync, writeFileSync } from "node:fs";
12
+ import { join, resolve } from "node:path";
13
+ import { homedir } from "node:os";
14
+
15
+ import type { KnowledgeStore } from "./store.js";
16
+ import { exportSnapshot, importSnapshot, resolveTriplestorePath } from "./snapshot.js";
17
+ import type { KnowledgeSnapshot } from "./snapshot.js";
18
+ import type { Logger } from "./schema.js";
19
+
20
+ // ============================================================================
21
+ // Constants
22
+ // ============================================================================
23
+
24
+ const DEFAULT_REPO_PATH = join(homedir(), ".sinain", "knowledge-snapshots");
25
+ const SNAPSHOT_FILE = "snapshot.json";
26
+ const MAX_SNAPSHOTS = 100; // prune reflog beyond this
27
+
28
+ // ============================================================================
29
+ // GitSnapshotStore
30
+ // ============================================================================
31
+
32
+ const TRIPLES_FILE = "triples.db";
33
+ const GITATTRIBUTES_FILE = ".gitattributes";
34
+
35
+ export class GitSnapshotStore {
36
+ private repoPath: string;
37
+ private logger: Logger;
38
+ private remoteChecked = false;
39
+
40
+ constructor(repoPath?: string, logger?: Logger) {
41
+ this.repoPath = resolve(repoPath ?? DEFAULT_REPO_PATH);
42
+ this.logger = logger ?? { info: console.log, warn: console.warn };
43
+ }
44
+
45
+ // ── Git helpers ──────────────────────────────────────────────────────────
46
+
47
+ private git(...args: string[]): string {
48
+ return execFileSync("git", args, {
49
+ cwd: this.repoPath,
50
+ encoding: "utf-8",
51
+ timeout: 15_000,
52
+ }).trim();
53
+ }
54
+
55
+ private async ensureRepo(): Promise<void> {
56
+ if (!existsSync(this.repoPath)) {
57
+ mkdirSync(this.repoPath, { recursive: true });
58
+ }
59
+
60
+ const gitDir = join(this.repoPath, ".git");
61
+ if (!existsSync(gitDir)) {
62
+ this.git("init");
63
+ this.git("config", "user.name", "sinain-knowledge");
64
+ this.git("config", "user.email", "sinain@local");
65
+
66
+ // Ensure binary handling for triplestore
67
+ const gitattrsPath = join(this.repoPath, GITATTRIBUTES_FILE);
68
+ if (!existsSync(gitattrsPath)) {
69
+ writeFileSync(gitattrsPath, `${TRIPLES_FILE} binary\n`, "utf-8");
70
+ }
71
+
72
+ this.logger.info(`sinain-knowledge: initialized snapshot repo at ${this.repoPath}`);
73
+ }
74
+
75
+ await this.validateRemoteVisibility();
76
+ }
77
+
78
+ // ── Public repo guard ─────────────────────────────────────────────────
79
+
80
+ private async validateRemoteVisibility(): Promise<void> {
81
+ if (this.remoteChecked) return;
82
+
83
+ let remotes: string;
84
+ try {
85
+ remotes = this.git("remote", "-v");
86
+ } catch {
87
+ this.remoteChecked = true;
88
+ return; // no remotes
89
+ }
90
+ if (!remotes) { this.remoteChecked = true; return; }
91
+
92
+ const githubPattern = /github\.com[:/]([^/]+)\/([^/.]+)/;
93
+ const checked = new Set<string>();
94
+
95
+ for (const line of remotes.split("\n")) {
96
+ const match = line.match(githubPattern);
97
+ if (!match) {
98
+ // Non-GitHub remote — warn and skip
99
+ const remoteName = line.split(/\s/)[0];
100
+ if (remoteName && !line.includes("github.com")) {
101
+ this.logger.warn(
102
+ `sinain-knowledge: remote '${remoteName}' is not GitHub — skipping visibility check`,
103
+ );
104
+ }
105
+ continue;
106
+ }
107
+ const [, owner, repo] = match;
108
+ const key = `${owner}/${repo}`;
109
+ if (checked.has(key)) continue; // deduplicate fetch/push lines
110
+ checked.add(key);
111
+
112
+ const remoteName = line.split(/\s/)[0];
113
+ try {
114
+ const resp = await fetch(`https://api.github.com/repos/${owner}/${repo}`);
115
+ if (resp.ok) {
116
+ const data = await resp.json() as { private: boolean };
117
+ if (data.private === false) {
118
+ throw new Error(
119
+ `Refusing to save: remote '${remoteName}' points to public repo ${owner}/${repo}. ` +
120
+ `Knowledge snapshots contain sensitive data and must only be stored in private repositories.`,
121
+ );
122
+ }
123
+ }
124
+ // 404 = private (or doesn't exist) → safe
125
+ } catch (err) {
126
+ // Re-throw our own Error, swallow network failures
127
+ if (err instanceof Error && err.message.startsWith("Refusing to save")) throw err;
128
+ this.logger.warn(
129
+ `sinain-knowledge: could not check visibility of ${key}: ${String(err)}`,
130
+ );
131
+ }
132
+ }
133
+
134
+ this.remoteChecked = true;
135
+ }
136
+
137
+ // ── Save ─────────────────────────────────────────────────────────────────
138
+
139
+ /**
140
+ * Export a snapshot from the store and commit it to the local git repo.
141
+ * Returns the short commit hash.
142
+ */
143
+ async save(store: KnowledgeStore): Promise<string> {
144
+ await this.ensureRepo();
145
+
146
+ // Export snapshot WITHOUT triplestore (avoid loading GB of data into memory)
147
+ const snapshot = exportSnapshot(store, { skipTriplestore: true });
148
+ const snapshotPath = join(this.repoPath, SNAPSHOT_FILE);
149
+
150
+ // Copy triplestore directly as binary — no base64 round-trip
151
+ const srcDbPath = resolveTriplestorePath(store.getWorkspaceDir());
152
+ const hasTriples = srcDbPath !== null;
153
+ if (hasTriples) {
154
+ copyFileSync(srcDbPath, join(this.repoPath, TRIPLES_FILE));
155
+ const size = statSync(srcDbPath).size;
156
+ (snapshot as any).triplestore = { dbFile: TRIPLES_FILE, sizeBytes: size };
157
+ }
158
+
159
+ // Write snapshot with stable key ordering for minimal diffs
160
+ writeFileSync(snapshotPath, JSON.stringify(snapshot, null, 2) + "\n", "utf-8");
161
+
162
+ // Stage both files
163
+ const filesToStage = [SNAPSHOT_FILE];
164
+ if (hasTriples) filesToStage.push(TRIPLES_FILE);
165
+ this.git("add", ...filesToStage);
166
+
167
+ // Check if there are staged changes
168
+ try {
169
+ this.git("diff", "--cached", "--quiet");
170
+ // No changes — skip commit
171
+ this.logger.info("sinain-knowledge: snapshot unchanged, skipping commit");
172
+ return this.git("rev-parse", "--short", "HEAD");
173
+ } catch {
174
+ // diff --cached --quiet exits non-zero when there ARE changes — this is the expected path
175
+ }
176
+
177
+ // Build commit message
178
+ const ts = snapshot.exportedAt;
179
+ const playbookLines = snapshot.playbook.effective.split("\n").length;
180
+ const moduleCount = snapshot.modules.items.length;
181
+
182
+ const message = [
183
+ `snapshot ${ts.slice(0, 19).replace("T", " ")}`,
184
+ "",
185
+ `Playbook: ${playbookLines} lines`,
186
+ `Modules: ${moduleCount}`,
187
+ `Triplestore: ${hasTriples ? "yes" : "empty"}`,
188
+ `Source: ${snapshot.exportedFrom}`,
189
+ `Integrity: ${snapshot.integrity.slice(0, 12)}…`,
190
+ ].join("\n");
191
+
192
+ this.git("commit", "-m", message);
193
+ const hash = this.git("rev-parse", "--short", "HEAD");
194
+ this.logger.info(`sinain-knowledge: snapshot saved → ${hash}`);
195
+ return hash;
196
+ }
197
+
198
+ // ── List ─────────────────────────────────────────────────────────────────
199
+
200
+ /**
201
+ * List recent snapshots from git log.
202
+ * Returns an array of { hash, date, subject } objects.
203
+ */
204
+ async list(count = 20): Promise<Array<{ hash: string; date: string; subject: string }>> {
205
+ await this.ensureRepo();
206
+
207
+ try {
208
+ const log = this.git(
209
+ "log",
210
+ `--max-count=${count}`,
211
+ "--format=%h\t%ai\t%s",
212
+ );
213
+ if (!log) return [];
214
+
215
+ return log.split("\n").map((line) => {
216
+ const [hash, date, subject] = line.split("\t");
217
+ return { hash, date, subject };
218
+ });
219
+ } catch {
220
+ return []; // empty repo
221
+ }
222
+ }
223
+
224
+ // ── Restore ──────────────────────────────────────────────────────────────
225
+
226
+ /**
227
+ * Read a snapshot from a specific git commit (or HEAD).
228
+ * Reconstitutes triplestore base64 from separate binary file if needed.
229
+ */
230
+ async read(ref = "HEAD"): Promise<KnowledgeSnapshot> {
231
+ await this.ensureRepo();
232
+ const content = this.git("show", `${ref}:${SNAPSHOT_FILE}`);
233
+ const snapshot = JSON.parse(content);
234
+
235
+ // Reconstitute base64 from separate binary file (new format)
236
+ if (snapshot.triplestore?.dbFile) {
237
+ try {
238
+ const dbBuf = execFileSync("git", ["show", `${ref}:${TRIPLES_FILE}`], {
239
+ cwd: this.repoPath,
240
+ timeout: 15_000,
241
+ });
242
+ snapshot.triplestore = { dbBase64: dbBuf.toString("base64") };
243
+ } catch {
244
+ // triples.db missing for this commit — treat as empty
245
+ snapshot.triplestore = { dbBase64: "" };
246
+ }
247
+ }
248
+ // Old format with inline dbBase64 — pass through unchanged
249
+
250
+ return snapshot as KnowledgeSnapshot;
251
+ }
252
+
253
+ /**
254
+ * Restore a snapshot from a git commit into the knowledge store.
255
+ */
256
+ async restore(store: KnowledgeStore, ref = "HEAD"): Promise<void> {
257
+ const snapshot = await this.read(ref);
258
+ importSnapshot(store, snapshot);
259
+ this.logger.info(`sinain-knowledge: restored snapshot from ${ref}`);
260
+ }
261
+
262
+ // ── Diff ─────────────────────────────────────────────────────────────────
263
+
264
+ /**
265
+ * Show what changed between two snapshots (defaults to last two commits).
266
+ */
267
+ async diff(fromRef = "HEAD~1", toRef = "HEAD"): Promise<string> {
268
+ await this.ensureRepo();
269
+ try {
270
+ return this.git("diff", "--stat", fromRef, toRef);
271
+ } catch {
272
+ return "(no diff available)";
273
+ }
274
+ }
275
+
276
+ // ── Prune ────────────────────────────────────────────────────────────────
277
+
278
+ /**
279
+ * Prune old snapshots by squashing history beyond maxSnapshots.
280
+ * Uses reflog expire + gc to reclaim space.
281
+ */
282
+ async prune(maxSnapshots = MAX_SNAPSHOTS): Promise<void> {
283
+ await this.ensureRepo();
284
+ try {
285
+ const count = parseInt(this.git("rev-list", "--count", "HEAD"), 10);
286
+ if (count <= maxSnapshots) return;
287
+
288
+ this.git("reflog", "expire", "--expire=now", "--all");
289
+ this.git("gc", "--prune=now", "--quiet");
290
+ this.logger.info(`sinain-knowledge: pruned snapshot repo (${count} commits, gc'd)`);
291
+ } catch {
292
+ // gc failure is non-critical
293
+ }
294
+ }
295
+
296
+ // ── Info ──────────────────────────────────────────────────────────────────
297
+
298
+ getRepoPath(): string {
299
+ return this.repoPath;
300
+ }
301
+
302
+ async getSnapshotCount(): Promise<number> {
303
+ await this.ensureRepo();
304
+ try {
305
+ return parseInt(this.git("rev-list", "--count", "HEAD"), 10);
306
+ } catch {
307
+ return 0;
308
+ }
309
+ }
310
+ }