polygram 0.12.5 → 0.12.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,200 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * events-table retention (#3, spec docs/0.13-events-retention-spec.md).
5
+ *
6
+ * `events` is append-only and grew unbounded fleet-wide (the table that
7
+ * ballooned shumabit.db to 4.4GB in the May-3 EIO storm). This caps it with:
8
+ * - time tiers: diagnostic kinds (high-frequency, short forensic value) pruned
9
+ * at `diagnosticDays`; everything else at `defaultDays`; a keep-forever set
10
+ * (lifecycle + errors) never pruned by time.
11
+ * - a UNIVERSAL per-kind row cap (`maxPerKind`) applied to EVERY kind incl.
12
+ * keep-forever — the real safety net so hardcoded tier lists aren't
13
+ * load-bearing and an incident-storm of one kind (12k/sec handler-error,
14
+ * the May-3 shape) can't balloon the table.
15
+ * - safety guards: `enabled` kill switch, `dryRun`, clock-backward skip, and a
16
+ * mass-delete fraction guard (refuse if a run would remove > a fraction of
17
+ * the table — the check that would have made May-3 a deliberate act).
18
+ *
19
+ * `pruneEvents` is PURE w.r.t. the events log: it deletes + returns counts but
20
+ * writes NO event rows. The caller (polygram boot) emits the `events-pruned` /
21
+ * `-preview` / `-skipped` audit event from the returned result (those kinds are
22
+ * in keepForever, so the audit trail survives its own prune).
23
+ */
24
+
25
+ const DAY_MS = 86_400_000;
26
+
27
+ const DEFAULT_POLICY = {
28
+ enabled: true,
29
+ dryRun: false,
30
+ diagnosticDays: 14,
31
+ defaultDays: 90,
32
+ diagnosticKinds: [
33
+ 'reactor-state', 'hook-lag-sample', 'tool-result', 'cli-ups-seen',
34
+ // dormant since 2026-05-25 but listed defensively in case re-enabled:
35
+ 'hook-event', 'turn-phase-change',
36
+ ],
37
+ keepForeverKinds: [
38
+ 'polygram-start', 'polygram-stop', 'shutdown-drain',
39
+ 'handler-error', 'auth-expired', 'resume-fail',
40
+ // the prune's own audit trail — kept so it survives a prune (still capped):
41
+ 'events-pruned', 'events-prune-preview', 'events-prune-skipped',
42
+ ],
43
+ maxPerKind: 50_000,
44
+ maxDeleteFraction: 0.5,
45
+ batchSize: 5_000,
46
+ // compact-* drive findOrphanedCompactCommands; their retention must stay above
47
+ // the replay-window cap (2h) + margin or the rc.66 handled-/compact dedup can
48
+ // re-surface an old /compact to a partner. Validated, not assumed.
49
+ compactKinds: ['compact-command', 'compact-boundary', 'compact-replay', 'compact-failed-restart'],
50
+ minCompactRetentionMs: 3 * 3600 * 1000, // 3h > 2h replay cap
51
+ };
52
+
53
+ /** Merge a `config.defaults.events_retention` override onto the defaults. */
54
+ function resolveRetentionPolicy(config) {
55
+ const o = (config && config.defaults && config.defaults.events_retention) || {};
56
+ return {
57
+ ...DEFAULT_POLICY,
58
+ ...o,
59
+ // arrays don't deep-merge — fall back to defaults when not overridden
60
+ diagnosticKinds: o.diagnosticKinds || DEFAULT_POLICY.diagnosticKinds,
61
+ keepForeverKinds: o.keepForeverKinds || DEFAULT_POLICY.keepForeverKinds,
62
+ compactKinds: o.compactKinds || DEFAULT_POLICY.compactKinds,
63
+ };
64
+ }
65
+
66
+ /** Fail loud on a misconfigured policy. Called at load and defensively per-run. */
67
+ function validatePolicy(policy) {
68
+ const diag = policy.diagnosticKinds || [];
69
+ const keep = policy.keepForeverKinds || [];
70
+ for (const k of [...diag, ...keep]) {
71
+ if (!k || typeof k !== 'string') {
72
+ throw new Error('events_retention: null/empty kind in a tier list');
73
+ }
74
+ }
75
+ const keepSet = new Set(keep);
76
+ for (const k of diag) {
77
+ if (keepSet.has(k)) {
78
+ throw new Error(`events_retention: kind "${k}" is in both diagnostic and keep-forever (tiers must be disjoint)`);
79
+ }
80
+ }
81
+ const diagSet = new Set(diag);
82
+ for (const k of (policy.compactKinds || [])) {
83
+ let ms;
84
+ if (keepSet.has(k)) ms = Infinity;
85
+ else if (diagSet.has(k)) ms = policy.diagnosticDays * DAY_MS;
86
+ else ms = policy.defaultDays * DAY_MS;
87
+ if (ms < policy.minCompactRetentionMs) {
88
+ throw new Error(`events_retention: compact kind "${k}" retention (${ms}ms) is below the replay-window floor (${policy.minCompactRetentionMs}ms) — would re-arm the rc.66 re-surface bug`);
89
+ }
90
+ }
91
+ return true;
92
+ }
93
+
94
+ /** Loop DELETE…LIMIT until a batch comes up short. Steady state = 1 batch. */
95
+ function batchedDelete(rawDb, sql, params, batchSize) {
96
+ const stmt = rawDb.prepare(sql);
97
+ let deleted = 0;
98
+ for (;;) {
99
+ const r = stmt.run(...params, batchSize);
100
+ deleted += r.changes;
101
+ if (r.changes < batchSize) break;
102
+ }
103
+ return deleted;
104
+ }
105
+
106
+ /**
107
+ * Prune the events table per `policy`. Returns one of:
108
+ * { skipped: true, reason } — disabled / clock / mass-delete
109
+ * { dryRun: true, preview: {default,diagnostic,cap,total}, before }
110
+ * { deleted: {default,diagnostic,cap,total}, before, after }
111
+ * Never writes an event row (caller logs the audit event).
112
+ */
113
+ function pruneEvents(rawDb, now, policy) {
114
+ if (!policy.enabled) return { skipped: true, reason: 'disabled' };
115
+ validatePolicy(policy);
116
+
117
+ const diagSet = new Set(policy.diagnosticKinds);
118
+ const keepSet = new Set(policy.keepForeverKinds);
119
+
120
+ const before = rawDb.prepare('SELECT count(*) c, max(ts) mx FROM events').get();
121
+ const totalBefore = before.c;
122
+ if (totalBefore === 0) {
123
+ return { deleted: { default: 0, diagnostic: 0, cap: 0, total: 0 }, before: 0, after: 0 };
124
+ }
125
+
126
+ // Clock-backward guard: newest row is in the future relative to `now` ⇒ the
127
+ // system clock can't be trusted, don't delete on it.
128
+ if (before.mx != null && now < before.mx) {
129
+ return { skipped: true, reason: `clock-backward (now ${now} < max ts ${before.mx})` };
130
+ }
131
+
132
+ const diagCut = now - policy.diagnosticDays * DAY_MS;
133
+ const defCut = now - policy.defaultDays * DAY_MS;
134
+
135
+ // Default-bucket predicate: old AND not diagnostic AND not keep-forever.
136
+ // Explicit ?-placeholders — better-sqlite3 does NOT expand a JS array from one
137
+ // param, and `NOT IN (…, NULL)` is a 3-valued-logic trap. validatePolicy
138
+ // already guarantees no NULL members.
139
+ const excluded = [...diagSet, ...keepSet];
140
+ const ph = excluded.map(() => '?').join(',');
141
+ const defWhere = `ts < ?${excluded.length ? ` AND kind NOT IN (${ph})` : ''}`;
142
+
143
+ // ---- estimate (drives dryRun + the mass-delete guard) ----
144
+ const estDefault = rawDb.prepare(`SELECT count(*) c FROM events WHERE ${defWhere}`).get(defCut, ...excluded).c;
145
+ let estDiag = 0;
146
+ const diagCountStmt = rawDb.prepare('SELECT count(*) c FROM events WHERE kind = ? AND ts < ?');
147
+ for (const k of diagSet) estDiag += diagCountStmt.get(k, diagCut).c;
148
+ const kinds = rawDb.prepare('SELECT kind, count(*) c FROM events GROUP BY kind').all();
149
+ let estCap = 0;
150
+ for (const { c } of kinds) if (c > policy.maxPerKind) estCap += c - policy.maxPerKind;
151
+ const estTotal = estDefault + estDiag + estCap;
152
+
153
+ // dryRun returns the preview regardless of the mass-delete guard (you want to
154
+ // SEE a would-be mass delete). Clock-backward already short-circuited above.
155
+ if (policy.dryRun) {
156
+ return {
157
+ dryRun: true,
158
+ preview: { default: estDefault, diagnostic: estDiag, cap: estCap, total: estTotal },
159
+ before: totalBefore,
160
+ };
161
+ }
162
+
163
+ // Mass-delete guard: refuse an anomalous run rather than execute it.
164
+ if (estTotal > 0 && estTotal / totalBefore > policy.maxDeleteFraction) {
165
+ return {
166
+ skipped: true,
167
+ reason: `mass-delete-guard (${estTotal}/${totalBefore} = ${(estTotal / totalBefore).toFixed(2)} > ${policy.maxDeleteFraction})`,
168
+ };
169
+ }
170
+
171
+ // ---- execute (batched; steady state is a single batch) ----
172
+ const delDefault = batchedDelete(rawDb, `DELETE FROM events WHERE ${defWhere} LIMIT ?`, [defCut, ...excluded], policy.batchSize);
173
+ let delDiag = 0;
174
+ for (const k of diagSet) {
175
+ delDiag += batchedDelete(rawDb, 'DELETE FROM events WHERE kind = ? AND ts < ? LIMIT ?', [k, diagCut], policy.batchSize);
176
+ }
177
+ // Universal cap: for each kind, delete everything older (by id) than the
178
+ // maxPerKind-th most-recent row. Applies to keep-forever too.
179
+ let delCap = 0;
180
+ for (const { kind, c } of kinds) {
181
+ if (c <= policy.maxPerKind) continue;
182
+ const thr = rawDb.prepare('SELECT id FROM events WHERE kind = ? ORDER BY id DESC LIMIT 1 OFFSET ?').get(kind, policy.maxPerKind);
183
+ if (!thr) continue; // a time-delete already brought it under the cap
184
+ delCap += batchedDelete(rawDb, 'DELETE FROM events WHERE kind = ? AND id <= ? LIMIT ?', [kind, thr.id], policy.batchSize);
185
+ }
186
+
187
+ const totalDeleted = delDefault + delDiag + delCap;
188
+ // Reclaim WAL slack after a large prune (steady-state prunes are tiny — skip).
189
+ if (totalDeleted > policy.batchSize) {
190
+ try { rawDb.pragma('wal_checkpoint(TRUNCATE)'); } catch { /* best-effort */ }
191
+ }
192
+
193
+ return {
194
+ deleted: { default: delDefault, diagnostic: delDiag, cap: delCap, total: totalDeleted },
195
+ before: totalBefore,
196
+ after: totalBefore - totalDeleted,
197
+ };
198
+ }
199
+
200
+ module.exports = { pruneEvents, resolveRetentionPolicy, validatePolicy, DEFAULT_POLICY };
@@ -79,6 +79,17 @@ function createGateInbound({
79
79
  && pairings.hasLivePairing({ bot_name: botName, user_id: msg.from.id, chat_id: chatId })) {
80
80
  return true;
81
81
  }
82
+ // The operator owns the bot — their abort is never a bystander abort, so it
83
+ // outranks the @mention/reply requirement even in a group. Without this, the
84
+ // operator's bare "stop" in a group is silently abort-identity-blocked (prod:
85
+ // chat -1003369922517, 2026-06-15). Same operator predicate /rewind uses
86
+ // below: operatorUserId, else adminChatId ONLY when it's a user id — a
87
+ // negative/group adminChatId never equals a positive sender id, so it grants
88
+ // no bypass (fail-safe). Narrow: only the operator, not every group member.
89
+ const opId = config.bot?.operatorUserId;
90
+ const adminChatId = config.bot?.adminChatId;
91
+ const operatorUid = opId != null ? Number(opId) : (adminChatId != null ? Number(adminChatId) : null);
92
+ if (operatorUid != null && msg.from?.id != null && Number(msg.from.id) === operatorUid) return true;
82
93
  return false;
83
94
  }
84
95
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "polygram",
3
- "version": "0.12.5",
3
+ "version": "0.12.7",
4
4
  "description": "Telegram daemon for Claude Code that preserves the OpenClaw per-chat session model. Migration path for OpenClaw users moving to Claude Code.",
5
5
  "main": "lib/ipc/client.js",
6
6
  "bin": {
package/polygram.js CHANGED
@@ -110,6 +110,7 @@ const { applyReactionToMessages } = require('./lib/telegram/album-reactions');
110
110
  const { classify: classifyError, detectWedgedSessionError, isTransientHttpError } = require('./lib/error/classify');
111
111
  const { createAutoResumeTracker, isAutoResumable } = require('./lib/db/auto-resume');
112
112
  const { resolveReplayWindowMs } = require('./lib/db/replay-window');
113
+ const { pruneEvents, resolveRetentionPolicy, validatePolicy } = require('./lib/db/events-retention');
113
114
  // validateIpcFileParam moved with handleSendOverIpc to
114
115
  // lib/handlers/ipc-send.js (commit 36).
115
116
  const {
@@ -2195,6 +2196,43 @@ async function main() {
2195
2196
  process.exit(1);
2196
2197
  }
2197
2198
 
2199
+ // #3 events-table retention. Prune on boot (the primary path — daemons rarely
2200
+ // live to the 24h tick given deploy cadence) + a 24h .unref()'d interval as
2201
+ // insurance for long-uptime daemons. Validation failures DISABLE pruning and
2202
+ // log loud — a retention config typo must never take down the bot, so this
2203
+ // lives outside the DB-fatal try/catch above. pruneEvents writes no event
2204
+ // rows; we emit the audit event here from its result.
2205
+ let eventsRetentionPolicy = null;
2206
+ try {
2207
+ eventsRetentionPolicy = resolveRetentionPolicy(config);
2208
+ validatePolicy(eventsRetentionPolicy);
2209
+ } catch (err) {
2210
+ console.error(`[events-retention] invalid policy — pruning DISABLED: ${err.message}`);
2211
+ eventsRetentionPolicy = null;
2212
+ }
2213
+ const runEventsPrune = (trigger) => {
2214
+ if (!eventsRetentionPolicy) return;
2215
+ try {
2216
+ const res = pruneEvents(db.raw, Date.now(), eventsRetentionPolicy);
2217
+ if (res.skipped) {
2218
+ console.log(`[events-retention] skipped (${trigger}): ${res.reason}`);
2219
+ db.logEvent('events-prune-skipped', { reason: res.reason, trigger });
2220
+ } else if (res.dryRun) {
2221
+ console.log(`[events-retention] DRY-RUN (${trigger}) would delete ${res.preview.total} (default ${res.preview.default}, diag ${res.preview.diagnostic}, cap ${res.preview.cap})`);
2222
+ db.logEvent('events-prune-preview', { ...res.preview, trigger });
2223
+ } else if (res.deleted.total > 0) {
2224
+ console.log(`[events-retention] pruned ${res.deleted.total} (default ${res.deleted.default}, diag ${res.deleted.diagnostic}, cap ${res.deleted.cap}) ${res.before}→${res.after}`);
2225
+ db.logEvent('events-pruned', { ...res.deleted, before: res.before, after: res.after, trigger });
2226
+ }
2227
+ } catch (err) {
2228
+ console.error(`[events-retention] prune failed (${trigger}): ${err.message}`);
2229
+ }
2230
+ };
2231
+ if (eventsRetentionPolicy && eventsRetentionPolicy.enabled) {
2232
+ setImmediate(() => runEventsPrune('boot'));
2233
+ setInterval(() => runEventsPrune('interval'), 24 * 3_600_000).unref?.();
2234
+ }
2235
+
2198
2236
  // 0.8.0 Phase 1 step 11 + rc.50: defensive uncaughtException +
2199
2237
  // unhandledRejection handlers. The new pm wraps every Query
2200
2238
  // iteration in try/catch so SDK throws never leak — but if a