@pi-unipi/compactor 0.1.7 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -24
- package/index.ts +7 -0
- package/package.json +2 -1
- package/skills/compactor/SKILL.md +21 -65
- package/skills/compactor-detail/SKILL.md +133 -0
- package/src/commands/index.ts +186 -109
- package/src/compaction/filter-noise.ts +4 -3
- package/src/compaction/hooks.ts +22 -1
- package/src/compaction/search-entries.ts +51 -4
- package/src/config/manager.ts +55 -6
- package/src/config/presets.ts +69 -5
- package/src/config/schema.ts +9 -0
- package/src/index.ts +183 -10
- package/src/info-screen.ts +10 -4
- package/src/security/policy.ts +23 -0
- package/src/session/auto-inject.ts +60 -0
- package/src/session/db.ts +65 -8
- package/src/session/resume-inject.ts +13 -1
- package/src/store/db-base.ts +11 -0
- package/src/store/index.ts +150 -4
- package/src/store/unified.ts +109 -0
- package/src/tools/context-budget.ts +50 -0
- package/src/tools/ctx-batch-execute.ts +2 -5
- package/src/tools/ctx-fetch-and-index.ts +3 -8
- package/src/tools/ctx-index.ts +3 -9
- package/src/tools/ctx-search.ts +3 -7
- package/src/tools/ctx-stats.ts +6 -4
- package/src/tools/register.ts +251 -216
- package/src/tui/settings-overlay.ts +359 -149
- package/src/types.ts +25 -1
- package/skills/compactor-ops/SKILL.md +0 -65
- package/skills/compactor-tools/SKILL.md +0 -120
package/src/session/db.ts
CHANGED
|
@@ -69,7 +69,7 @@ const DEDUP_WINDOW = 5;
|
|
|
69
69
|
|
|
70
70
|
export class SessionDB {
|
|
71
71
|
private db: any;
|
|
72
|
-
private stmts: Map<string, PreparedStatement> =
|
|
72
|
+
private stmts: Map<string, PreparedStatement> | null = null;
|
|
73
73
|
private dbPath: string;
|
|
74
74
|
|
|
75
75
|
constructor(opts?: { dbPath?: string }) {
|
|
@@ -101,11 +101,8 @@ export class SessionDB {
|
|
|
101
101
|
priority INTEGER NOT NULL DEFAULT 2,
|
|
102
102
|
data TEXT NOT NULL,
|
|
103
103
|
project_dir TEXT NOT NULL DEFAULT '',
|
|
104
|
-
attribution_source TEXT NOT NULL DEFAULT 'unknown',
|
|
105
|
-
attribution_confidence REAL NOT NULL DEFAULT 0,
|
|
106
104
|
source_hook TEXT NOT NULL,
|
|
107
|
-
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
108
|
-
data_hash TEXT NOT NULL DEFAULT ''
|
|
105
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
109
106
|
);
|
|
110
107
|
CREATE INDEX IF NOT EXISTS idx_session_events_session ON session_events(session_id);
|
|
111
108
|
CREATE INDEX IF NOT EXISTS idx_session_events_type ON session_events(session_id, type);
|
|
@@ -129,11 +126,42 @@ export class SessionDB {
|
|
|
129
126
|
consumed INTEGER NOT NULL DEFAULT 0
|
|
130
127
|
);
|
|
131
128
|
`);
|
|
129
|
+
|
|
130
|
+
// Run version-gated schema migrations
|
|
131
|
+
this.runMigrations();
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/** Run version-gated schema migrations using PRAGMA user_version. */
|
|
135
|
+
private runMigrations(): void {
|
|
136
|
+
const currentVersion = this.db.pragma("user_version", { simple: true }) as number;
|
|
137
|
+
|
|
138
|
+
if (currentVersion < 1) {
|
|
139
|
+
// V1: Add columns introduced by compactor gap analysis (2026-04-30)
|
|
140
|
+
// Each ALTER TABLE is wrapped individually — SQLite auto-commits DDL,
|
|
141
|
+
// so a partial failure from a prior run would leave some columns added
|
|
142
|
+
// and others not. We catch "duplicate column" to handle this safely.
|
|
143
|
+
const safeAddColumn = (table: string, col: string, def: string) => {
|
|
144
|
+
try {
|
|
145
|
+
this.db.exec(`ALTER TABLE ${table} ADD COLUMN ${col} ${def}`);
|
|
146
|
+
} catch (e: any) {
|
|
147
|
+
if (e?.message?.includes("duplicate column")) return;
|
|
148
|
+
throw e;
|
|
149
|
+
}
|
|
150
|
+
};
|
|
151
|
+
safeAddColumn("session_meta", "total_chars_before", "INTEGER NOT NULL DEFAULT 0");
|
|
152
|
+
safeAddColumn("session_meta", "total_chars_kept", "INTEGER NOT NULL DEFAULT 0");
|
|
153
|
+
safeAddColumn("session_meta", "total_messages_summarized", "INTEGER NOT NULL DEFAULT 0");
|
|
154
|
+
safeAddColumn("session_events", "attribution_source", "TEXT NOT NULL DEFAULT 'unknown'");
|
|
155
|
+
safeAddColumn("session_events", "attribution_confidence", "REAL NOT NULL DEFAULT 0");
|
|
156
|
+
safeAddColumn("session_events", "data_hash", "TEXT NOT NULL DEFAULT ''");
|
|
157
|
+
this.db.pragma("user_version = 1");
|
|
158
|
+
}
|
|
132
159
|
}
|
|
133
160
|
|
|
134
161
|
private prepareStatements(): void {
|
|
162
|
+
this.stmts = new Map();
|
|
135
163
|
const p = (key: string, sql: string) => {
|
|
136
|
-
this.stmts
|
|
164
|
+
this.stmts!.set(key, this.db.prepare(sql) as PreparedStatement);
|
|
137
165
|
};
|
|
138
166
|
|
|
139
167
|
p("insertEvent", `INSERT INTO session_events (session_id, type, category, priority, data, project_dir, attribution_source, attribution_confidence, source_hook, data_hash) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`);
|
|
@@ -143,8 +171,10 @@ export class SessionDB {
|
|
|
143
171
|
p("evictLowestPriority", `DELETE FROM session_events WHERE id = (SELECT id FROM session_events WHERE session_id = ? ORDER BY priority ASC, id ASC LIMIT 1)`);
|
|
144
172
|
p("updateMetaLastEvent", `UPDATE session_meta SET last_event_at = datetime('now'), event_count = event_count + 1 WHERE session_id = ?`);
|
|
145
173
|
p("ensureSession", `INSERT OR IGNORE INTO session_meta (session_id, project_dir) VALUES (?, ?)`);
|
|
146
|
-
p("getSessionStats", `SELECT session_id, project_dir, started_at, last_event_at, event_count, compact_count FROM session_meta WHERE session_id = ?`);
|
|
174
|
+
p("getSessionStats", `SELECT session_id, project_dir, started_at, last_event_at, event_count, compact_count, total_chars_before, total_chars_kept, total_messages_summarized FROM session_meta WHERE session_id = ?`);
|
|
147
175
|
p("incrementCompactCount", `UPDATE session_meta SET compact_count = compact_count + 1 WHERE session_id = ?`);
|
|
176
|
+
p("addCompactionStats", `UPDATE session_meta SET total_chars_before = total_chars_before + ?, total_chars_kept = total_chars_kept + ?, total_messages_summarized = total_messages_summarized + ? WHERE session_id = ?`);
|
|
177
|
+
p("getAllTimeStats", `SELECT COALESCE(SUM(total_chars_before), 0) AS all_chars_before, COALESCE(SUM(total_chars_kept), 0) AS all_chars_kept, COALESCE(SUM(total_messages_summarized), 0) AS all_messages_summarized, COALESCE(SUM(compact_count), 0) AS all_compactions FROM session_meta`);
|
|
148
178
|
p("upsertResume", `INSERT INTO session_resume (session_id, snapshot, event_count) VALUES (?, ?, ?) ON CONFLICT(session_id) DO UPDATE SET snapshot = excluded.snapshot, event_count = excluded.event_count, created_at = datetime('now'), consumed = 0`);
|
|
149
179
|
p("getResume", `SELECT snapshot, event_count, consumed FROM session_resume WHERE session_id = ?`);
|
|
150
180
|
p("markResumeConsumed", `UPDATE session_resume SET consumed = 1 WHERE session_id = ?`);
|
|
@@ -155,10 +185,11 @@ export class SessionDB {
|
|
|
155
185
|
}
|
|
156
186
|
|
|
157
187
|
private stmt(key: string): PreparedStatement {
|
|
158
|
-
return this.stmts
|
|
188
|
+
return this.stmts!.get(key)!;
|
|
159
189
|
}
|
|
160
190
|
|
|
161
191
|
insertEvent(sessionId: string, event: SessionEvent, sourceHook: string = "PostToolUse"): void {
|
|
192
|
+
if (!this.stmts) return;
|
|
162
193
|
const dataHash = createHash("sha256").update(event.data).digest("hex").slice(0, 16).toUpperCase();
|
|
163
194
|
const projectDir = String(event.project_dir ?? "").trim();
|
|
164
195
|
const attributionSource = String(event.attribution_source ?? "unknown");
|
|
@@ -185,42 +216,67 @@ export class SessionDB {
|
|
|
185
216
|
}
|
|
186
217
|
|
|
187
218
|
getEvents(sessionId: string, opts?: { type?: string; minPriority?: number; limit?: number }): StoredEvent[] {
|
|
219
|
+
if (!this.stmts) return [];
|
|
188
220
|
const limit = opts?.limit ?? 1000;
|
|
189
221
|
return this.stmt("getEvents").all(sessionId, limit) as StoredEvent[];
|
|
190
222
|
}
|
|
191
223
|
|
|
192
224
|
getEventCount(sessionId: string): number {
|
|
225
|
+
if (!this.stmts) return 0;
|
|
193
226
|
const row = this.stmt("getEventCount").get(sessionId) as { cnt: number };
|
|
194
227
|
return row.cnt;
|
|
195
228
|
}
|
|
196
229
|
|
|
197
230
|
ensureSession(sessionId: string, projectDir: string): void {
|
|
231
|
+
if (!this.stmts) return;
|
|
198
232
|
this.stmt("ensureSession").run(sessionId, projectDir);
|
|
199
233
|
}
|
|
200
234
|
|
|
201
235
|
getSessionStats(sessionId: string): SessionMeta | null {
|
|
236
|
+
if (!this.stmts) return null;
|
|
202
237
|
const row = this.stmt("getSessionStats").get(sessionId) as SessionMeta | undefined;
|
|
203
238
|
return row ?? null;
|
|
204
239
|
}
|
|
205
240
|
|
|
206
241
|
incrementCompactCount(sessionId: string): void {
|
|
242
|
+
if (!this.stmts) return;
|
|
207
243
|
this.stmt("incrementCompactCount").run(sessionId);
|
|
208
244
|
}
|
|
209
245
|
|
|
246
|
+
addCompactionStats(sessionId: string, charsBefore: number, charsKept: number, messagesSummarized: number): void {
|
|
247
|
+
if (!this.stmts) return;
|
|
248
|
+
this.stmt("addCompactionStats").run(charsBefore, charsKept, messagesSummarized, sessionId);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
getAllTimeStats(): { allCharsBefore: number; allCharsKept: number; allMessagesSummarized: number; allCompactions: number } {
|
|
252
|
+
if (!this.stmts) return { allCharsBefore: 0, allCharsKept: 0, allMessagesSummarized: 0, allCompactions: 0 };
|
|
253
|
+
const row = this.stmt("getAllTimeStats").get() as { all_chars_before: number; all_chars_kept: number; all_messages_summarized: number; all_compactions: number };
|
|
254
|
+
return {
|
|
255
|
+
allCharsBefore: row?.all_chars_before ?? 0,
|
|
256
|
+
allCharsKept: row?.all_chars_kept ?? 0,
|
|
257
|
+
allMessagesSummarized: row?.all_messages_summarized ?? 0,
|
|
258
|
+
allCompactions: row?.all_compactions ?? 0,
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
|
|
210
262
|
upsertResume(sessionId: string, snapshot: string, eventCount?: number): void {
|
|
263
|
+
if (!this.stmts) return;
|
|
211
264
|
this.stmt("upsertResume").run(sessionId, snapshot, eventCount ?? 0);
|
|
212
265
|
}
|
|
213
266
|
|
|
214
267
|
getResume(sessionId: string): ResumeRow | null {
|
|
268
|
+
if (!this.stmts) return null;
|
|
215
269
|
const row = this.stmt("getResume").get(sessionId) as ResumeRow | undefined;
|
|
216
270
|
return row ?? null;
|
|
217
271
|
}
|
|
218
272
|
|
|
219
273
|
markResumeConsumed(sessionId: string): void {
|
|
274
|
+
if (!this.stmts) return;
|
|
220
275
|
this.stmt("markResumeConsumed").run(sessionId);
|
|
221
276
|
}
|
|
222
277
|
|
|
223
278
|
deleteSession(sessionId: string): void {
|
|
279
|
+
if (!this.stmts) return;
|
|
224
280
|
this.db.transaction(() => {
|
|
225
281
|
this.stmt("deleteEvents").run(sessionId);
|
|
226
282
|
this.stmt("deleteResume").run(sessionId);
|
|
@@ -229,6 +285,7 @@ export class SessionDB {
|
|
|
229
285
|
}
|
|
230
286
|
|
|
231
287
|
cleanupOldSessions(maxAgeDays: number = 7): number {
|
|
288
|
+
if (!this.stmts) return 0;
|
|
232
289
|
const oldSessions = this.stmt("getOldSessions").all(`-${maxAgeDays}`) as Array<{ session_id: string }>;
|
|
233
290
|
for (const { session_id } of oldSessions) {
|
|
234
291
|
this.deleteSession(session_id);
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
import type { SessionDB } from "./db.js";
|
|
6
6
|
import { buildResumeSnapshot } from "./snapshot.js";
|
|
7
|
+
import { buildAutoInjection } from "./auto-inject.js";
|
|
8
|
+
import { loadConfig } from "../config/manager.js";
|
|
7
9
|
|
|
8
10
|
export async function injectResumeSnapshot(
|
|
9
11
|
db: SessionDB,
|
|
@@ -20,6 +22,16 @@ export async function injectResumeSnapshot(
|
|
|
20
22
|
searchTool: opts?.searchTool ?? "ctx_search",
|
|
21
23
|
});
|
|
22
24
|
|
|
25
|
+
// Auto-injection: add behavioral state after compaction (if enabled)
|
|
26
|
+
const config = loadConfig();
|
|
27
|
+
let fullSnapshot = snapshot;
|
|
28
|
+
if (config.pipeline.autoInjection) {
|
|
29
|
+
const autoInjection = buildAutoInjection(events);
|
|
30
|
+
if (autoInjection) {
|
|
31
|
+
fullSnapshot = `${snapshot}\n\n${autoInjection}`;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
23
35
|
db.markResumeConsumed(sessionId);
|
|
24
|
-
return
|
|
36
|
+
return fullSnapshot;
|
|
25
37
|
}
|
package/src/store/db-base.ts
CHANGED
|
@@ -42,6 +42,17 @@ export async function loadSQLite() {
|
|
|
42
42
|
export function applyWALPragmas(db: any): void {
|
|
43
43
|
db.exec("PRAGMA journal_mode = WAL;");
|
|
44
44
|
db.exec("PRAGMA synchronous = NORMAL;");
|
|
45
|
+
// Memory-map the DB file for read-heavy FTS5 search workloads (if enabled)
|
|
46
|
+
try {
|
|
47
|
+
const { loadConfig } = require("../config/manager.js");
|
|
48
|
+
const config = loadConfig();
|
|
49
|
+
if (config.pipeline?.mmapPragma !== false) {
|
|
50
|
+
db.exec("PRAGMA mmap_size = 268435456;"); // 256MB
|
|
51
|
+
}
|
|
52
|
+
} catch {
|
|
53
|
+
// Fallback: always apply mmap if config can't be loaded
|
|
54
|
+
try { db.exec("PRAGMA mmap_size = 268435456;"); } catch { /* unsupported runtime */ }
|
|
55
|
+
}
|
|
45
56
|
}
|
|
46
57
|
|
|
47
58
|
export function withRetry<T>(fn: () => T, maxRetries = 3): T {
|
package/src/store/index.ts
CHANGED
|
@@ -8,6 +8,7 @@ import { loadSQLite, applyWALPragmas, withRetry, isSQLiteCorruptionError, defaul
|
|
|
8
8
|
import type { PreparedStatement } from "./db-base.js";
|
|
9
9
|
import { autoChunk } from "./chunking.js";
|
|
10
10
|
import type { IndexResult, SearchResult, StoreStats } from "../types.js";
|
|
11
|
+
import { loadConfig } from "../config/manager.js";
|
|
11
12
|
|
|
12
13
|
// --- Fuzzy correction ---
|
|
13
14
|
|
|
@@ -131,6 +132,118 @@ function rrfMerge(
|
|
|
131
132
|
.map((s) => ({ ...s.result, rank: s.score }));
|
|
132
133
|
}
|
|
133
134
|
|
|
135
|
+
// ── Proximity Reranking (from context-mode) ──────────────────
|
|
136
|
+
|
|
137
|
+
/** Find all character positions of a term in text */
|
|
138
|
+
function findAllPositions(text: string, term: string): number[] {
|
|
139
|
+
const positions: number[] = [];
|
|
140
|
+
let idx = text.indexOf(term);
|
|
141
|
+
while (idx !== -1) {
|
|
142
|
+
positions.push(idx);
|
|
143
|
+
idx = text.indexOf(term, idx + 1);
|
|
144
|
+
}
|
|
145
|
+
return positions;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/** Sweep-line algorithm to find minimum span covering all terms */
|
|
149
|
+
function findMinSpan(positionLists: number[][]): number {
|
|
150
|
+
if (positionLists.length === 0) return Infinity;
|
|
151
|
+
if (positionLists.length === 1) return 0;
|
|
152
|
+
|
|
153
|
+
const sorted = positionLists.map((p) => [...p].sort((a, b) => a - b));
|
|
154
|
+
const ptrs = new Array(sorted.length).fill(0);
|
|
155
|
+
let minSpan = Infinity;
|
|
156
|
+
|
|
157
|
+
while (true) {
|
|
158
|
+
let curMin = Infinity;
|
|
159
|
+
let curMax = -Infinity;
|
|
160
|
+
let minIdx = 0;
|
|
161
|
+
|
|
162
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
163
|
+
const val = sorted[i][ptrs[i]];
|
|
164
|
+
if (val < curMin) { curMin = val; minIdx = i; }
|
|
165
|
+
if (val > curMax) { curMax = val; }
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const span = curMax - curMin;
|
|
169
|
+
if (span < minSpan) minSpan = span;
|
|
170
|
+
|
|
171
|
+
ptrs[minIdx]++;
|
|
172
|
+
if (ptrs[minIdx] >= sorted[minIdx].length) break;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return minSpan;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/** Count adjacent term pairs within a character gap */
|
|
179
|
+
function countAdjacentPairs(
|
|
180
|
+
positionLists: number[][],
|
|
181
|
+
terms: string[],
|
|
182
|
+
gap: number = 30,
|
|
183
|
+
): number {
|
|
184
|
+
if (positionLists.length < 2 || terms.length < 2) return 0;
|
|
185
|
+
let total = 0;
|
|
186
|
+
const pairs = Math.min(positionLists.length, terms.length) - 1;
|
|
187
|
+
for (let i = 0; i < pairs; i++) {
|
|
188
|
+
const left = positionLists[i];
|
|
189
|
+
const right = positionLists[i + 1];
|
|
190
|
+
const leftLen = terms[i].length;
|
|
191
|
+
let j = 0;
|
|
192
|
+
for (const p of left) {
|
|
193
|
+
const minStart = p + leftLen;
|
|
194
|
+
const maxStart = minStart + gap;
|
|
195
|
+
while (j < right.length && right[j] < minStart) j++;
|
|
196
|
+
if (j < right.length && right[j] <= maxStart) {
|
|
197
|
+
total++;
|
|
198
|
+
j++;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
return total;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/** Apply proximity reranking to RRF results */
|
|
206
|
+
function applyProximityReranking(
|
|
207
|
+
results: SearchResult[],
|
|
208
|
+
query: string,
|
|
209
|
+
): SearchResult[] {
|
|
210
|
+
const allTerms = query
|
|
211
|
+
.toLowerCase()
|
|
212
|
+
.split(/\s+/)
|
|
213
|
+
.filter((w) => w.length >= 2);
|
|
214
|
+
const filtered = allTerms.filter((w) => !STOPWORDS.has(w));
|
|
215
|
+
const terms = filtered.length > 0 ? filtered : allTerms;
|
|
216
|
+
|
|
217
|
+
if (terms.length < 2) return results; // Single-term queries skip proximity
|
|
218
|
+
|
|
219
|
+
const scored = results.map((r) => {
|
|
220
|
+
const titleLower = r.title.toLowerCase();
|
|
221
|
+
const titleHits = terms.filter((t) => titleLower.includes(t)).length;
|
|
222
|
+
const titleWeight = r.contentType === "code" ? 0.6 : 0.3;
|
|
223
|
+
const titleBoost = titleHits > 0 ? titleWeight * (titleHits / terms.length) : 0;
|
|
224
|
+
|
|
225
|
+
let proximityBoost = 0;
|
|
226
|
+
let phraseBoost = 0;
|
|
227
|
+
|
|
228
|
+
const content = r.content.toLowerCase();
|
|
229
|
+
const positions = terms.map((t) => findAllPositions(content, t));
|
|
230
|
+
|
|
231
|
+
if (!positions.some((p) => p.length === 0)) {
|
|
232
|
+
const minSpan = findMinSpan(positions);
|
|
233
|
+
proximityBoost = 1 / (1 + minSpan / Math.max(content.length, 1));
|
|
234
|
+
|
|
235
|
+
const adjacentPairs = countAdjacentPairs(positions, terms);
|
|
236
|
+
phraseBoost = 0.5 * Math.min(1, adjacentPairs / 4);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
return { result: r, boost: titleBoost + proximityBoost + phraseBoost };
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
return scored
|
|
243
|
+
.sort((a, b) => b.boost - a.boost || a.result.rank - b.result.rank)
|
|
244
|
+
.map((s) => s.result);
|
|
245
|
+
}
|
|
246
|
+
|
|
134
247
|
const STOPWORDS = new Set([
|
|
135
248
|
"the", "and", "for", "are", "but", "not", "you", "all", "can", "had",
|
|
136
249
|
"her", "was", "one", "our", "out", "has", "his", "how", "its", "may",
|
|
@@ -176,6 +289,7 @@ export class ContentStore {
|
|
|
176
289
|
private stmts: Map<string, PreparedStatement> = new Map();
|
|
177
290
|
private dbPath: string;
|
|
178
291
|
private ready = false;
|
|
292
|
+
private writeCount = 0;
|
|
179
293
|
|
|
180
294
|
constructor(opts?: { dbPath?: string }) {
|
|
181
295
|
this.dbPath = opts?.dbPath ?? defaultDBPath("content");
|
|
@@ -227,6 +341,7 @@ export class ContentStore {
|
|
|
227
341
|
p("deleteByLabel", `DELETE FROM content_fts WHERE label = ?`);
|
|
228
342
|
p("insertSource", `INSERT INTO content_sources (label, source, content_type, mtime, sha256, chunk_count) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT(label) DO UPDATE SET source=excluded.source, content_type=excluded.content_type, mtime=excluded.mtime, sha256=excluded.sha256, chunk_count=excluded.chunk_count, indexed_at=datetime('now')`);
|
|
229
343
|
p("getSource", `SELECT label, source, content_type, mtime, sha256, chunk_count, indexed_at FROM content_sources WHERE label = ?`);
|
|
344
|
+
p("getSourceMeta", `SELECT label, chunk_count, indexed_at FROM content_sources WHERE label = ?`);
|
|
230
345
|
p("deleteSource", `DELETE FROM content_sources WHERE label = ?`);
|
|
231
346
|
p("countSources", `SELECT COUNT(*) AS cnt FROM content_sources`);
|
|
232
347
|
p("countFTS", `SELECT COUNT(*) AS cnt FROM content_fts`);
|
|
@@ -271,6 +386,7 @@ export class ContentStore {
|
|
|
271
386
|
});
|
|
272
387
|
|
|
273
388
|
withRetry(() => transaction());
|
|
389
|
+
this.afterWrite();
|
|
274
390
|
|
|
275
391
|
return { sourceId: 1, label, totalChunks: chunks.length, codeChunks };
|
|
276
392
|
}
|
|
@@ -312,7 +428,13 @@ export class ContentStore {
|
|
|
312
428
|
// RRF fusion
|
|
313
429
|
const rrfResults = rrfMerge(porterResults, trigramResults);
|
|
314
430
|
|
|
315
|
-
|
|
431
|
+
// Apply proximity reranking to all RRF results (if enabled)
|
|
432
|
+
const config = loadConfig();
|
|
433
|
+
const rerankedResults = config.pipeline.proximityReranking
|
|
434
|
+
? applyProximityReranking(rrfResults, query)
|
|
435
|
+
: rrfResults;
|
|
436
|
+
|
|
437
|
+
if (mode === "rrf") return rerankedResults.slice(0, limit);
|
|
316
438
|
|
|
317
439
|
// Fuzzy mode: apply fuzzy correction to query terms
|
|
318
440
|
const vocab = buildVocabulary(allRows);
|
|
@@ -336,11 +458,11 @@ export class ContentStore {
|
|
|
336
458
|
matchLayer: "fuzzy" as const,
|
|
337
459
|
rank: r.rank * 0.9, // slightly lower confidence
|
|
338
460
|
}));
|
|
339
|
-
const merged = rrfMerge(
|
|
340
|
-
return merged.slice(0, limit);
|
|
461
|
+
const merged = rrfMerge(rerankedResults, correctedResults);
|
|
462
|
+
return applyProximityReranking(merged, query).slice(0, limit);
|
|
341
463
|
}
|
|
342
464
|
|
|
343
|
-
return
|
|
465
|
+
return rerankedResults.slice(0, limit);
|
|
344
466
|
}
|
|
345
467
|
|
|
346
468
|
async getStats(): Promise<StoreStats> {
|
|
@@ -354,13 +476,37 @@ export class ContentStore {
|
|
|
354
476
|
};
|
|
355
477
|
}
|
|
356
478
|
|
|
479
|
+
/** Get source metadata for TTL cache check */
|
|
480
|
+
getSourceMeta(label: string): { label: string; chunkCount: number; indexedAt: string } | null {
|
|
481
|
+
const row = this.stmt("getSourceMeta").get(label) as { label: string; chunk_count: number; indexed_at: string } | undefined;
|
|
482
|
+
if (!row) return null;
|
|
483
|
+
return { label: row.label, chunkCount: row.chunk_count, indexedAt: row.indexed_at };
|
|
484
|
+
}
|
|
485
|
+
|
|
357
486
|
async purge(): Promise<number> {
|
|
358
487
|
if (!this.ready) await this.init();
|
|
359
488
|
this.db.exec(`DELETE FROM content_fts; DELETE FROM content_sources;`);
|
|
489
|
+
this.afterWrite();
|
|
360
490
|
const row = this.stmt("countSources").get() as { cnt: number };
|
|
361
491
|
return row.cnt;
|
|
362
492
|
}
|
|
363
493
|
|
|
494
|
+
/** Run WAL checkpoint to prevent unbounded WAL file growth. */
|
|
495
|
+
checkpointWAL(mode: "PASSIVE" | "TRUNCATE" = "PASSIVE"): void {
|
|
496
|
+
if (!this.db) return;
|
|
497
|
+
try {
|
|
498
|
+
this.db.exec(`PRAGMA wal_checkpoint(${mode});`);
|
|
499
|
+
} catch { /* ignore */ }
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
/** Increment write counter and trigger PASSIVE checkpoint every 10th write. */
|
|
503
|
+
private afterWrite(): void {
|
|
504
|
+
this.writeCount++;
|
|
505
|
+
if (this.writeCount % 10 === 0) {
|
|
506
|
+
this.checkpointWAL("PASSIVE");
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
364
510
|
close(): void {
|
|
365
511
|
try { this.db.close(); } catch { /* ignore */ }
|
|
366
512
|
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified search across ContentStore + SessionDB events
|
|
3
|
+
* Supports timeline (chronological) and relevance sorting
|
|
4
|
+
* (from context-mode src/search/unified.ts)
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { ContentStore } from "./index.js";
|
|
8
|
+
import type { SessionDB } from "../session/db.js";
|
|
9
|
+
import type { SearchResult } from "../types.js";
|
|
10
|
+
|
|
11
|
+
export interface UnifiedSearchResult {
|
|
12
|
+
title: string;
|
|
13
|
+
content: string;
|
|
14
|
+
source: string;
|
|
15
|
+
origin: "current-session" | "prior-session";
|
|
16
|
+
timestamp: string;
|
|
17
|
+
rank: number;
|
|
18
|
+
matchLayer: string;
|
|
19
|
+
contentType: "prose" | "code";
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface UnifiedSearchOptions {
|
|
23
|
+
query: string;
|
|
24
|
+
limit?: number;
|
|
25
|
+
sort?: "relevance" | "timeline";
|
|
26
|
+
source?: string;
|
|
27
|
+
contentType?: string;
|
|
28
|
+
projectDir?: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Search across multiple sources and optionally sort chronologically.
|
|
33
|
+
* - relevance: ContentStore only, ranked by RRF
|
|
34
|
+
* - timeline: ContentStore + SessionDB events, sorted by timestamp
|
|
35
|
+
*/
|
|
36
|
+
export async function searchAllSources(
|
|
37
|
+
store: ContentStore,
|
|
38
|
+
sessionDB: SessionDB | null,
|
|
39
|
+
opts: UnifiedSearchOptions,
|
|
40
|
+
): Promise<UnifiedSearchResult[]> {
|
|
41
|
+
const limit = opts.limit ?? 10;
|
|
42
|
+
const sort = opts.sort ?? "relevance";
|
|
43
|
+
const sessionStartTime = new Date().toISOString();
|
|
44
|
+
|
|
45
|
+
const results: UnifiedSearchResult[] = [];
|
|
46
|
+
|
|
47
|
+
// Source 1: ContentStore (always, both modes)
|
|
48
|
+
try {
|
|
49
|
+
const storeResults = await store.search(opts.query, { limit });
|
|
50
|
+
results.push(
|
|
51
|
+
...storeResults.map((r) => ({
|
|
52
|
+
title: r.title,
|
|
53
|
+
content: r.content,
|
|
54
|
+
source: r.source,
|
|
55
|
+
origin: "current-session" as const,
|
|
56
|
+
timestamp: sessionStartTime, // ContentStore doesn't track per-result timestamps yet
|
|
57
|
+
rank: r.rank,
|
|
58
|
+
matchLayer: r.matchLayer ?? "porter",
|
|
59
|
+
contentType: r.contentType,
|
|
60
|
+
})),
|
|
61
|
+
);
|
|
62
|
+
} catch {
|
|
63
|
+
// ContentStore search failed — continue with other sources
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Source 2: SessionDB events (timeline mode only)
|
|
67
|
+
if (sort === "timeline" && sessionDB) {
|
|
68
|
+
try {
|
|
69
|
+
const sessionId = opts.projectDir ?? "";
|
|
70
|
+
const events = sessionDB.getEvents(sessionId, { limit: 100 });
|
|
71
|
+
const queryLower = opts.query.toLowerCase();
|
|
72
|
+
const matchingEvents = events.filter((e) => {
|
|
73
|
+
const data = String(e.data ?? "").toLowerCase();
|
|
74
|
+
const type = String(e.type ?? "").toLowerCase();
|
|
75
|
+
const category = String(e.category ?? "").toLowerCase();
|
|
76
|
+
return data.includes(queryLower) || type.includes(queryLower) || category.includes(queryLower);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
results.push(
|
|
80
|
+
...matchingEvents.slice(0, limit).map((e) => ({
|
|
81
|
+
title: `[${e.category}] ${e.type}`,
|
|
82
|
+
content: String(e.data ?? "").slice(0, 500),
|
|
83
|
+
source: "prior-session",
|
|
84
|
+
origin: "prior-session" as const,
|
|
85
|
+
timestamp: e.created_at ?? sessionStartTime,
|
|
86
|
+
rank: 0,
|
|
87
|
+
matchLayer: "event",
|
|
88
|
+
contentType: "prose" as const,
|
|
89
|
+
})),
|
|
90
|
+
);
|
|
91
|
+
} catch {
|
|
92
|
+
// SessionDB search failed — continue
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Normalize SQLite datetime format to ISO 8601
|
|
97
|
+
for (const r of results) {
|
|
98
|
+
if (r.timestamp && !r.timestamp.includes("T")) {
|
|
99
|
+
r.timestamp = r.timestamp.replace(" ", "T") + "Z";
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Sort: timeline = chronological, relevance = by rank
|
|
104
|
+
if (sort === "timeline") {
|
|
105
|
+
results.sort((a, b) => (a.timestamp || "").localeCompare(b.timestamp || ""));
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return results.slice(0, limit);
|
|
109
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* context_budget tool — estimate remaining context window
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export interface ContextBudgetResult {
|
|
6
|
+
percentFull: number;
|
|
7
|
+
remainingTokens: number;
|
|
8
|
+
totalTokens: number;
|
|
9
|
+
message: string;
|
|
10
|
+
advice: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function estimateContextBudget(
|
|
14
|
+
tokensBefore?: number,
|
|
15
|
+
contextWindowSize?: number,
|
|
16
|
+
): ContextBudgetResult | null {
|
|
17
|
+
const windowSize = contextWindowSize ?? 200000; // Default 200K context
|
|
18
|
+
const used = tokensBefore ?? 0;
|
|
19
|
+
|
|
20
|
+
if (used <= 0 && tokensBefore === undefined) return null;
|
|
21
|
+
|
|
22
|
+
const remaining = Math.max(0, windowSize - used);
|
|
23
|
+
const percentFull = windowSize > 0 ? Math.round((used / windowSize) * 100) : 0;
|
|
24
|
+
|
|
25
|
+
let advice: string;
|
|
26
|
+
if (percentFull >= 90) {
|
|
27
|
+
advice = "CRITICAL: Compact immediately. Very little room for complex tasks.";
|
|
28
|
+
} else if (percentFull >= 75) {
|
|
29
|
+
advice = "Context is filling up. Compact before starting complex work.";
|
|
30
|
+
} else if (percentFull >= 50) {
|
|
31
|
+
advice = "Moderate context usage. Compact before large multi-step tasks.";
|
|
32
|
+
} else {
|
|
33
|
+
advice = "Context has plenty of room. No compaction needed yet.";
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const message = `Context: ~${percentFull}% full (estimated ${remaining.toLocaleString()} tokens remaining)`;
|
|
37
|
+
|
|
38
|
+
return { percentFull, remainingTokens: remaining, totalTokens: windowSize, message, advice };
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* The context_budget tool handler.
|
|
43
|
+
* Called from the tool registration — receives tokensBefore from Pi context.
|
|
44
|
+
*/
|
|
45
|
+
export function contextBudgetTool(tokensBefore?: number): string {
|
|
46
|
+
const budget = estimateContextBudget(tokensBefore);
|
|
47
|
+
if (!budget) return "Context budget: Unknown (no token data available from session).";
|
|
48
|
+
|
|
49
|
+
return `${budget.message}\nAdvice: ${budget.advice}`;
|
|
50
|
+
}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import { PolyglotExecutor } from "../executor/executor.js";
|
|
6
|
-
import { ContentStore } from "../store/index.js";
|
|
6
|
+
import type { ContentStore } from "../store/index.js";
|
|
7
7
|
import type { Language, ExecResult, SearchResult } from "../types.js";
|
|
8
8
|
|
|
9
9
|
export interface BatchCommand {
|
|
@@ -28,11 +28,9 @@ export interface BatchResult {
|
|
|
28
28
|
>;
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
-
export async function ctxBatchExecute(items: BatchItem[]): Promise<BatchResult> {
|
|
31
|
+
export async function ctxBatchExecute(store: ContentStore, items: BatchItem[]): Promise<BatchResult> {
|
|
32
32
|
const results: BatchResult["results"] = [];
|
|
33
33
|
const executor = new PolyglotExecutor();
|
|
34
|
-
const store = new ContentStore();
|
|
35
|
-
await store.init();
|
|
36
34
|
|
|
37
35
|
for (const item of items) {
|
|
38
36
|
if (item.type === "execute") {
|
|
@@ -48,6 +46,5 @@ export async function ctxBatchExecute(items: BatchItem[]): Promise<BatchResult>
|
|
|
48
46
|
}
|
|
49
47
|
}
|
|
50
48
|
|
|
51
|
-
store.close();
|
|
52
49
|
return { results };
|
|
53
50
|
}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* ctx_fetch_and_index tool — fetch URL → markdown → index
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import { ContentStore } from "../store/index.js";
|
|
5
|
+
import type { ContentStore } from "../store/index.js";
|
|
6
6
|
import type { IndexResult } from "../types.js";
|
|
7
7
|
|
|
8
8
|
export interface CtxFetchAndIndexInput {
|
|
@@ -11,7 +11,7 @@ export interface CtxFetchAndIndexInput {
|
|
|
11
11
|
chunkSize?: number;
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
-
export async function ctxFetchAndIndex(input: CtxFetchAndIndexInput): Promise<IndexResult> {
|
|
14
|
+
export async function ctxFetchAndIndex(store: ContentStore, input: CtxFetchAndIndexInput): Promise<IndexResult> {
|
|
15
15
|
const label = input.label ?? input.url;
|
|
16
16
|
|
|
17
17
|
const response = await fetch(input.url, {
|
|
@@ -23,15 +23,10 @@ export async function ctxFetchAndIndex(input: CtxFetchAndIndexInput): Promise<In
|
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
const text = await response.text();
|
|
26
|
-
const store = new ContentStore();
|
|
27
|
-
await store.init();
|
|
28
26
|
|
|
29
|
-
|
|
27
|
+
return store.index(label, text, {
|
|
30
28
|
contentType: "plain",
|
|
31
29
|
source: input.url,
|
|
32
30
|
chunkSize: input.chunkSize,
|
|
33
31
|
});
|
|
34
|
-
|
|
35
|
-
store.close();
|
|
36
|
-
return result;
|
|
37
32
|
}
|
package/src/tools/ctx-index.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* ctx_index tool — chunk content → index into FTS5
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import { ContentStore } from "../store/index.js";
|
|
5
|
+
import type { ContentStore } from "../store/index.js";
|
|
6
6
|
import type { IndexResult } from "../types.js";
|
|
7
7
|
import { readFileSync } from "node:fs";
|
|
8
8
|
|
|
@@ -14,10 +14,7 @@ export interface CtxIndexInput {
|
|
|
14
14
|
chunkSize?: number;
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
export async function ctxIndex(input: CtxIndexInput): Promise<IndexResult> {
|
|
18
|
-
const store = new ContentStore();
|
|
19
|
-
await store.init();
|
|
20
|
-
|
|
17
|
+
export async function ctxIndex(store: ContentStore, input: CtxIndexInput): Promise<IndexResult> {
|
|
21
18
|
let text: string;
|
|
22
19
|
let source: string;
|
|
23
20
|
|
|
@@ -31,12 +28,9 @@ export async function ctxIndex(input: CtxIndexInput): Promise<IndexResult> {
|
|
|
31
28
|
throw new Error("Either content or filePath must be provided");
|
|
32
29
|
}
|
|
33
30
|
|
|
34
|
-
|
|
31
|
+
return store.index(input.label, text, {
|
|
35
32
|
contentType: input.contentType ?? "plain",
|
|
36
33
|
source,
|
|
37
34
|
chunkSize: input.chunkSize,
|
|
38
35
|
});
|
|
39
|
-
|
|
40
|
-
store.close();
|
|
41
|
-
return result;
|
|
42
36
|
}
|