@pi-unipi/compactor 0.1.7 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,198 @@
1
+ /**
2
+ * AnalyticsEngine — Runtime savings + session continuity reporting.
3
+ *
4
+ * Ported from context-mode's AnalyticsEngine, trimmed to budget-focused stats.
5
+ * Omits: formatReport(), categoryLabels, categoryHints, ThinkInCodeComparison,
6
+ * SandboxIO, dataBar(), visual formatting helpers.
7
+ *
8
+ * Usage:
9
+ * const engine = new AnalyticsEngine(sessionDb);
10
+ * const report = engine.queryAll(runtimeStats);
11
+ */
12
+
13
+ // ─────────────────────────────────────────────────────────
14
+ // Types
15
+ // ─────────────────────────────────────────────────────────
16
+
17
+ /** Database adapter — anything with a prepare() method (better-sqlite3, bun:sqlite, etc.) */
18
+ export interface DatabaseAdapter {
19
+ prepare(sql: string): {
20
+ run(...params: unknown[]): unknown;
21
+ get(...params: unknown[]): unknown;
22
+ all(...params: unknown[]): unknown[];
23
+ };
24
+ }
25
+
26
+ /** Context savings result */
27
+ export interface ContextSavings {
28
+ rawBytes: number;
29
+ contextBytes: number;
30
+ savedBytes: number;
31
+ savedPercent: number;
32
+ }
33
+
34
+ /** Runtime stats tracked during a live session. */
35
+ export interface RuntimeStats {
36
+ bytesReturned: Record<string, number>;
37
+ bytesIndexed: number;
38
+ bytesSandboxed: number;
39
+ calls: Record<string, number>;
40
+ sessionStart: number;
41
+ cacheHits: number;
42
+ cacheBytesSaved: number;
43
+ }
44
+
45
+ /** Unified report combining runtime stats, DB analytics, and continuity data. */
46
+ export interface FullReport {
47
+ /** Runtime context savings */
48
+ savings: {
49
+ processed_kb: number;
50
+ entered_kb: number;
51
+ saved_kb: number;
52
+ pct: number;
53
+ savings_ratio: number;
54
+ by_tool: Array<{ tool: string; calls: number; context_kb: number; tokens: number }>;
55
+ total_calls: number;
56
+ total_bytes_returned: number;
57
+ kept_out: number;
58
+ total_processed: number;
59
+ };
60
+ /** Session metadata from SessionDB */
61
+ session: {
62
+ id: string;
63
+ uptime_min: string;
64
+ };
65
+ /** Session continuity data */
66
+ continuity: {
67
+ total_events: number;
68
+ compact_count: number;
69
+ resume_ready: boolean;
70
+ };
71
+ /** Persistent project memory — all events across all sessions */
72
+ projectMemory: {
73
+ total_events: number;
74
+ session_count: number;
75
+ };
76
+ }
77
+
78
+ // ─────────────────────────────────────────────────────────
79
+ // AnalyticsEngine
80
+ // ─────────────────────────────────────────────────────────
81
+
82
+ export class AnalyticsEngine {
83
+ private readonly db: DatabaseAdapter;
84
+
85
+ constructor(db: DatabaseAdapter) {
86
+ this.db = db;
87
+ }
88
+
89
+ /**
90
+ * Build a FullReport by merging runtime stats (passed in)
91
+ * with continuity data from the DB.
92
+ */
93
+ queryAll(runtimeStats: RuntimeStats): FullReport {
94
+ // ── Resolve latest session ID ──
95
+ const latestSession = this.db.prepare(
96
+ "SELECT session_id FROM session_meta ORDER BY started_at DESC LIMIT 1",
97
+ ).get() as { session_id: string } | undefined;
98
+ const sid = latestSession?.session_id ?? "";
99
+
100
+ // ── Runtime savings ──
101
+ const totalBytesReturned = Object.values(runtimeStats.bytesReturned).reduce(
102
+ (sum, b) => sum + b, 0,
103
+ );
104
+ const totalCalls = Object.values(runtimeStats.calls).reduce(
105
+ (sum, c) => sum + c, 0,
106
+ );
107
+ const keptOut = runtimeStats.bytesIndexed + runtimeStats.bytesSandboxed;
108
+ const totalProcessed = keptOut + totalBytesReturned;
109
+ const savingsRatio = totalProcessed / Math.max(totalBytesReturned, 1);
110
+ const reductionPct = totalProcessed > 0
111
+ ? Math.round((1 - totalBytesReturned / totalProcessed) * 100)
112
+ : 0;
113
+
114
+ const toolNames = new Set([
115
+ ...Object.keys(runtimeStats.calls),
116
+ ...Object.keys(runtimeStats.bytesReturned),
117
+ ]);
118
+ const byTool = Array.from(toolNames).sort().map((tool) => ({
119
+ tool,
120
+ calls: runtimeStats.calls[tool] || 0,
121
+ context_kb: Math.round((runtimeStats.bytesReturned[tool] || 0) / 1024 * 10) / 10,
122
+ tokens: Math.round((runtimeStats.bytesReturned[tool] || 0) / 4),
123
+ }));
124
+
125
+ const uptimeMs = Date.now() - runtimeStats.sessionStart;
126
+ const uptimeMin = (uptimeMs / 60_000).toFixed(1);
127
+
128
+ // ── Continuity data (scoped to current session) ──
129
+ const eventTotal = (this.db.prepare(
130
+ "SELECT COUNT(*) as cnt FROM session_events WHERE session_id = ?",
131
+ ).get(sid) as { cnt: number }).cnt;
132
+
133
+ const meta = this.db.prepare(
134
+ "SELECT compact_count FROM session_meta WHERE session_id = ?",
135
+ ).get(sid) as { compact_count: number } | undefined;
136
+ const compactCount = meta?.compact_count ?? 0;
137
+
138
+ const resume = this.db.prepare(
139
+ "SELECT event_count, consumed FROM session_resume WHERE session_id = ? ORDER BY created_at DESC LIMIT 1",
140
+ ).get(sid) as { event_count: number; consumed: number } | undefined;
141
+ const resumeReady = resume ? !resume.consumed : false;
142
+
143
+ // ── Project-wide persistent memory (all sessions, no session_id filter) ──
144
+ const projectTotals = this.db.prepare(
145
+ "SELECT COUNT(*) as cnt, COUNT(DISTINCT session_id) as sessions FROM session_events",
146
+ ).get() as { cnt: number; sessions: number };
147
+
148
+ return {
149
+ savings: {
150
+ processed_kb: Math.round(totalProcessed / 1024 * 10) / 10,
151
+ entered_kb: Math.round(totalBytesReturned / 1024 * 10) / 10,
152
+ saved_kb: Math.round(keptOut / 1024 * 10) / 10,
153
+ pct: reductionPct,
154
+ savings_ratio: Math.round(savingsRatio * 10) / 10,
155
+ by_tool: byTool,
156
+ total_calls: totalCalls,
157
+ total_bytes_returned: totalBytesReturned,
158
+ kept_out: keptOut,
159
+ total_processed: totalProcessed,
160
+ },
161
+ session: {
162
+ id: sid,
163
+ uptime_min: uptimeMin,
164
+ },
165
+ continuity: {
166
+ total_events: eventTotal,
167
+ compact_count: compactCount,
168
+ resume_ready: resumeReady,
169
+ },
170
+ projectMemory: {
171
+ total_events: projectTotals.cnt,
172
+ session_count: projectTotals.sessions,
173
+ },
174
+ };
175
+ }
176
+ }
177
+
178
+ // ─────────────────────────────────────────────────────────
179
+ // createMinimalDb — in-memory SQLite fallback
180
+ // ─────────────────────────────────────────────────────────
181
+
182
+ /**
183
+ * Create a minimal in-memory DatabaseAdapter for when SessionDB is unavailable.
184
+ * Returns zeroed/empty results for all queries.
185
+ */
186
+ export function createMinimalDb(): DatabaseAdapter {
187
+ // Use an in-memory SQLite database with the expected schema
188
+ // so AnalyticsEngine queries don't fail.
189
+ const emptyStmt = {
190
+ run: (..._params: unknown[]) => {},
191
+ get: (..._params: unknown[]) => ({ cnt: 0, sessions: 0, compact_count: 0, session_id: "", event_count: 0, consumed: 1 }),
192
+ all: (..._params: unknown[]) => [] as unknown[],
193
+ };
194
+
195
+ return {
196
+ prepare: (_sql: string) => emptyStmt,
197
+ };
198
+ }
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Auto-injection builder — builds minimal behavioral state injection
3
+ * after compaction. Budget: 150 tokens max.
4
+ *
5
+ * Only includes:
6
+ * - behavioral_directive (role event) — never dropped
7
+ * - session_mode (intent event) — only if budget remains
8
+ *
9
+ * Rules and active_skills are dropped from auto-injection (findable via session_recall).
10
+ */
11
+
12
+ import type { StoredEvent } from "../types.js";
13
+
14
+ const MAX_TOKENS = 150;
15
+
16
+ function estimateTokens(text: string): number {
17
+ // Rough: ~4 chars per token
18
+ return Math.ceil(text.length / 4);
19
+ }
20
+
21
+ export interface AutoInjection {
22
+ text: string;
23
+ tokens: number;
24
+ }
25
+
26
+ export function buildAutoInjection(events: StoredEvent[]): AutoInjection | null {
27
+ const parts: string[] = [];
28
+ let tokenBudget = MAX_TOKENS;
29
+
30
+ // 1. behavioral_directive (role) — critical, always included
31
+ const roleEvents = events.filter((e) => e.category === "rule");
32
+ if (roleEvents.length > 0) {
33
+ const directive = roleEvents[roleEvents.length - 1].data;
34
+ const directiveText = `[Role Directive]\n${directive}`;
35
+ const tokens = estimateTokens(directiveText);
36
+ if (tokens <= tokenBudget) {
37
+ parts.push(directiveText);
38
+ tokenBudget -= tokens;
39
+ }
40
+ }
41
+
42
+ // 2. session_mode (intent) — included if budget remains
43
+ if (tokenBudget > 80) {
44
+ const intentEvents = events.filter((e) => e.category === "intent");
45
+ if (intentEvents.length > 0) {
46
+ const mode = intentEvents[intentEvents.length - 1].data;
47
+ const modeText = `[Session Mode]\n${mode}`;
48
+ const tokens = estimateTokens(modeText);
49
+ if (tokens <= tokenBudget) {
50
+ parts.push(modeText);
51
+ tokenBudget -= tokens;
52
+ }
53
+ }
54
+ }
55
+
56
+ if (parts.length === 0) return null;
57
+
58
+ const text = parts.join("\n\n");
59
+ return { text, tokens: estimateTokens(text) };
60
+ }
package/src/session/db.ts CHANGED
@@ -69,7 +69,7 @@ const DEDUP_WINDOW = 5;
69
69
 
70
70
  export class SessionDB {
71
71
  private db: any;
72
- private stmts: Map<string, PreparedStatement> = new Map();
72
+ private stmts: Map<string, PreparedStatement> | null = null;
73
73
  private dbPath: string;
74
74
 
75
75
  constructor(opts?: { dbPath?: string }) {
@@ -101,11 +101,8 @@ export class SessionDB {
101
101
  priority INTEGER NOT NULL DEFAULT 2,
102
102
  data TEXT NOT NULL,
103
103
  project_dir TEXT NOT NULL DEFAULT '',
104
- attribution_source TEXT NOT NULL DEFAULT 'unknown',
105
- attribution_confidence REAL NOT NULL DEFAULT 0,
106
104
  source_hook TEXT NOT NULL,
107
- created_at TEXT NOT NULL DEFAULT (datetime('now')),
108
- data_hash TEXT NOT NULL DEFAULT ''
105
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
109
106
  );
110
107
  CREATE INDEX IF NOT EXISTS idx_session_events_session ON session_events(session_id);
111
108
  CREATE INDEX IF NOT EXISTS idx_session_events_type ON session_events(session_id, type);
@@ -129,11 +126,42 @@ export class SessionDB {
129
126
  consumed INTEGER NOT NULL DEFAULT 0
130
127
  );
131
128
  `);
129
+
130
+ // Run version-gated schema migrations
131
+ this.runMigrations();
132
+ }
133
+
134
+ /** Run version-gated schema migrations using PRAGMA user_version. */
135
+ private runMigrations(): void {
136
+ const currentVersion = this.db.pragma("user_version", { simple: true }) as number;
137
+
138
+ if (currentVersion < 1) {
139
+ // V1: Add columns introduced by compactor gap analysis (2026-04-30)
140
+ // Each ALTER TABLE is wrapped individually — SQLite auto-commits DDL,
141
+ // so a partial failure from a prior run would leave some columns added
142
+ // and others not. We catch "duplicate column" to handle this safely.
143
+ const safeAddColumn = (table: string, col: string, def: string) => {
144
+ try {
145
+ this.db.exec(`ALTER TABLE ${table} ADD COLUMN ${col} ${def}`);
146
+ } catch (e: any) {
147
+ if (e?.message?.includes("duplicate column")) return;
148
+ throw e;
149
+ }
150
+ };
151
+ safeAddColumn("session_meta", "total_chars_before", "INTEGER NOT NULL DEFAULT 0");
152
+ safeAddColumn("session_meta", "total_chars_kept", "INTEGER NOT NULL DEFAULT 0");
153
+ safeAddColumn("session_meta", "total_messages_summarized", "INTEGER NOT NULL DEFAULT 0");
154
+ safeAddColumn("session_events", "attribution_source", "TEXT NOT NULL DEFAULT 'unknown'");
155
+ safeAddColumn("session_events", "attribution_confidence", "REAL NOT NULL DEFAULT 0");
156
+ safeAddColumn("session_events", "data_hash", "TEXT NOT NULL DEFAULT ''");
157
+ this.db.pragma("user_version = 1");
158
+ }
132
159
  }
133
160
 
134
161
  private prepareStatements(): void {
162
+ this.stmts = new Map();
135
163
  const p = (key: string, sql: string) => {
136
- this.stmts.set(key, this.db.prepare(sql) as PreparedStatement);
164
+ this.stmts!.set(key, this.db.prepare(sql) as PreparedStatement);
137
165
  };
138
166
 
139
167
  p("insertEvent", `INSERT INTO session_events (session_id, type, category, priority, data, project_dir, attribution_source, attribution_confidence, source_hook, data_hash) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`);
@@ -143,8 +171,10 @@ export class SessionDB {
143
171
  p("evictLowestPriority", `DELETE FROM session_events WHERE id = (SELECT id FROM session_events WHERE session_id = ? ORDER BY priority ASC, id ASC LIMIT 1)`);
144
172
  p("updateMetaLastEvent", `UPDATE session_meta SET last_event_at = datetime('now'), event_count = event_count + 1 WHERE session_id = ?`);
145
173
  p("ensureSession", `INSERT OR IGNORE INTO session_meta (session_id, project_dir) VALUES (?, ?)`);
146
- p("getSessionStats", `SELECT session_id, project_dir, started_at, last_event_at, event_count, compact_count FROM session_meta WHERE session_id = ?`);
174
+ p("getSessionStats", `SELECT session_id, project_dir, started_at, last_event_at, event_count, compact_count, total_chars_before, total_chars_kept, total_messages_summarized FROM session_meta WHERE session_id = ?`);
147
175
  p("incrementCompactCount", `UPDATE session_meta SET compact_count = compact_count + 1 WHERE session_id = ?`);
176
+ p("addCompactionStats", `UPDATE session_meta SET total_chars_before = total_chars_before + ?, total_chars_kept = total_chars_kept + ?, total_messages_summarized = total_messages_summarized + ? WHERE session_id = ?`);
177
+ p("getAllTimeStats", `SELECT COALESCE(SUM(total_chars_before), 0) AS all_chars_before, COALESCE(SUM(total_chars_kept), 0) AS all_chars_kept, COALESCE(SUM(total_messages_summarized), 0) AS all_messages_summarized, COALESCE(SUM(compact_count), 0) AS all_compactions FROM session_meta`);
148
178
  p("upsertResume", `INSERT INTO session_resume (session_id, snapshot, event_count) VALUES (?, ?, ?) ON CONFLICT(session_id) DO UPDATE SET snapshot = excluded.snapshot, event_count = excluded.event_count, created_at = datetime('now'), consumed = 0`);
149
179
  p("getResume", `SELECT snapshot, event_count, consumed FROM session_resume WHERE session_id = ?`);
150
180
  p("markResumeConsumed", `UPDATE session_resume SET consumed = 1 WHERE session_id = ?`);
@@ -155,10 +185,11 @@ export class SessionDB {
155
185
  }
156
186
 
157
187
  private stmt(key: string): PreparedStatement {
158
- return this.stmts.get(key)!;
188
+ return this.stmts!.get(key)!;
159
189
  }
160
190
 
161
191
  insertEvent(sessionId: string, event: SessionEvent, sourceHook: string = "PostToolUse"): void {
192
+ if (!this.stmts) return;
162
193
  const dataHash = createHash("sha256").update(event.data).digest("hex").slice(0, 16).toUpperCase();
163
194
  const projectDir = String(event.project_dir ?? "").trim();
164
195
  const attributionSource = String(event.attribution_source ?? "unknown");
@@ -185,42 +216,67 @@ export class SessionDB {
185
216
  }
186
217
 
187
218
  getEvents(sessionId: string, opts?: { type?: string; minPriority?: number; limit?: number }): StoredEvent[] {
219
+ if (!this.stmts) return [];
188
220
  const limit = opts?.limit ?? 1000;
189
221
  return this.stmt("getEvents").all(sessionId, limit) as StoredEvent[];
190
222
  }
191
223
 
192
224
  getEventCount(sessionId: string): number {
225
+ if (!this.stmts) return 0;
193
226
  const row = this.stmt("getEventCount").get(sessionId) as { cnt: number };
194
227
  return row.cnt;
195
228
  }
196
229
 
197
230
  ensureSession(sessionId: string, projectDir: string): void {
231
+ if (!this.stmts) return;
198
232
  this.stmt("ensureSession").run(sessionId, projectDir);
199
233
  }
200
234
 
201
235
  getSessionStats(sessionId: string): SessionMeta | null {
236
+ if (!this.stmts) return null;
202
237
  const row = this.stmt("getSessionStats").get(sessionId) as SessionMeta | undefined;
203
238
  return row ?? null;
204
239
  }
205
240
 
206
241
  incrementCompactCount(sessionId: string): void {
242
+ if (!this.stmts) return;
207
243
  this.stmt("incrementCompactCount").run(sessionId);
208
244
  }
209
245
 
246
+ addCompactionStats(sessionId: string, charsBefore: number, charsKept: number, messagesSummarized: number): void {
247
+ if (!this.stmts) return;
248
+ this.stmt("addCompactionStats").run(charsBefore, charsKept, messagesSummarized, sessionId);
249
+ }
250
+
251
+ getAllTimeStats(): { allCharsBefore: number; allCharsKept: number; allMessagesSummarized: number; allCompactions: number } {
252
+ if (!this.stmts) return { allCharsBefore: 0, allCharsKept: 0, allMessagesSummarized: 0, allCompactions: 0 };
253
+ const row = this.stmt("getAllTimeStats").get() as { all_chars_before: number; all_chars_kept: number; all_messages_summarized: number; all_compactions: number };
254
+ return {
255
+ allCharsBefore: row?.all_chars_before ?? 0,
256
+ allCharsKept: row?.all_chars_kept ?? 0,
257
+ allMessagesSummarized: row?.all_messages_summarized ?? 0,
258
+ allCompactions: row?.all_compactions ?? 0,
259
+ };
260
+ }
261
+
210
262
  upsertResume(sessionId: string, snapshot: string, eventCount?: number): void {
263
+ if (!this.stmts) return;
211
264
  this.stmt("upsertResume").run(sessionId, snapshot, eventCount ?? 0);
212
265
  }
213
266
 
214
267
  getResume(sessionId: string): ResumeRow | null {
268
+ if (!this.stmts) return null;
215
269
  const row = this.stmt("getResume").get(sessionId) as ResumeRow | undefined;
216
270
  return row ?? null;
217
271
  }
218
272
 
219
273
  markResumeConsumed(sessionId: string): void {
274
+ if (!this.stmts) return;
220
275
  this.stmt("markResumeConsumed").run(sessionId);
221
276
  }
222
277
 
223
278
  deleteSession(sessionId: string): void {
279
+ if (!this.stmts) return;
224
280
  this.db.transaction(() => {
225
281
  this.stmt("deleteEvents").run(sessionId);
226
282
  this.stmt("deleteResume").run(sessionId);
@@ -229,6 +285,7 @@ export class SessionDB {
229
285
  }
230
286
 
231
287
  cleanupOldSessions(maxAgeDays: number = 7): number {
288
+ if (!this.stmts) return 0;
232
289
  const oldSessions = this.stmt("getOldSessions").all(`-${maxAgeDays}`) as Array<{ session_id: string }>;
233
290
  for (const { session_id } of oldSessions) {
234
291
  this.deleteSession(session_id);
@@ -236,6 +293,9 @@ export class SessionDB {
236
293
  return oldSessions.length;
237
294
  }
238
295
 
296
+ /** Expose the underlying db for AnalyticsEngine (read-only queries). Returns null if init failed. */
297
+ getDb(): any { return this.db ?? null; }
298
+
239
299
  close(): void {
240
300
  try { this.db.close(); } catch { /* ignore */ }
241
301
  }
@@ -4,6 +4,8 @@
4
4
 
5
5
  import type { SessionDB } from "./db.js";
6
6
  import { buildResumeSnapshot } from "./snapshot.js";
7
+ import { buildAutoInjection } from "./auto-inject.js";
8
+ import { loadConfig } from "../config/manager.js";
7
9
 
8
10
  export async function injectResumeSnapshot(
9
11
  db: SessionDB,
@@ -20,6 +22,16 @@ export async function injectResumeSnapshot(
20
22
  searchTool: opts?.searchTool ?? "ctx_search",
21
23
  });
22
24
 
25
+ // Auto-injection: add behavioral state after compaction (if enabled)
26
+ const config = loadConfig();
27
+ let fullSnapshot = snapshot;
28
+ if (config.pipeline.autoInjection) {
29
+ const autoInjection = buildAutoInjection(events);
30
+ if (autoInjection) {
31
+ fullSnapshot = `${snapshot}\n\n${autoInjection}`;
32
+ }
33
+ }
34
+
23
35
  db.markResumeConsumed(sessionId);
24
- return snapshot;
36
+ return fullSnapshot;
25
37
  }
@@ -42,6 +42,17 @@ export async function loadSQLite() {
42
42
  export function applyWALPragmas(db: any): void {
43
43
  db.exec("PRAGMA journal_mode = WAL;");
44
44
  db.exec("PRAGMA synchronous = NORMAL;");
45
+ // Memory-map the DB file for read-heavy FTS5 search workloads (if enabled)
46
+ try {
47
+ const { loadConfig } = require("../config/manager.js");
48
+ const config = loadConfig();
49
+ if (config.pipeline?.mmapPragma !== false) {
50
+ db.exec("PRAGMA mmap_size = 268435456;"); // 256MB
51
+ }
52
+ } catch {
53
+ // Fallback: always apply mmap if config can't be loaded
54
+ try { db.exec("PRAGMA mmap_size = 268435456;"); } catch { /* unsupported runtime */ }
55
+ }
45
56
  }
46
57
 
47
58
  export function withRetry<T>(fn: () => T, maxRetries = 3): T {