@pi-unipi/compactor 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -0
- package/package.json +54 -0
- package/skills/compactor/SKILL.md +74 -0
- package/skills/compactor-doctor/SKILL.md +74 -0
- package/skills/compactor-ops/SKILL.md +65 -0
- package/skills/compactor-stats/SKILL.md +49 -0
- package/skills/compactor-tools/SKILL.md +120 -0
- package/src/commands/index.ts +248 -0
- package/src/compaction/brief.ts +334 -0
- package/src/compaction/build-sections.ts +77 -0
- package/src/compaction/content.ts +47 -0
- package/src/compaction/cut.ts +80 -0
- package/src/compaction/extract/commits.ts +52 -0
- package/src/compaction/extract/files.ts +58 -0
- package/src/compaction/extract/goals.ts +36 -0
- package/src/compaction/extract/preferences.ts +40 -0
- package/src/compaction/filter-noise.ts +46 -0
- package/src/compaction/format.ts +48 -0
- package/src/compaction/hooks.ts +145 -0
- package/src/compaction/merge.ts +113 -0
- package/src/compaction/normalize.ts +68 -0
- package/src/compaction/recall-scope.ts +32 -0
- package/src/compaction/sanitize.ts +12 -0
- package/src/compaction/search-entries.ts +101 -0
- package/src/compaction/sections.ts +15 -0
- package/src/compaction/summarize.ts +29 -0
- package/src/config/manager.ts +89 -0
- package/src/config/presets.ts +83 -0
- package/src/config/schema.ts +55 -0
- package/src/display/bash-display.ts +28 -0
- package/src/display/diff-presentation.ts +20 -0
- package/src/display/diff-renderer.ts +255 -0
- package/src/display/line-width-safety.ts +16 -0
- package/src/display/pending-diff-preview.ts +51 -0
- package/src/display/render-utils.ts +52 -0
- package/src/display/thinking-label.ts +18 -0
- package/src/display/tool-overrides.ts +136 -0
- package/src/display/user-message-box.ts +16 -0
- package/src/executor/executor.ts +242 -0
- package/src/executor/runtime.ts +125 -0
- package/src/index.ts +211 -0
- package/src/info-screen.ts +60 -0
- package/src/security/evaluator.ts +142 -0
- package/src/security/policy.ts +74 -0
- package/src/security/scanner.ts +65 -0
- package/src/session/db.ts +237 -0
- package/src/session/extract.ts +107 -0
- package/src/session/resume-inject.ts +25 -0
- package/src/session/snapshot.ts +326 -0
- package/src/store/chunking.ts +126 -0
- package/src/store/db-base.ts +79 -0
- package/src/store/index.ts +364 -0
- package/src/tools/compact.ts +20 -0
- package/src/tools/ctx-batch-execute.ts +53 -0
- package/src/tools/ctx-doctor.ts +78 -0
- package/src/tools/ctx-execute-file.ts +26 -0
- package/src/tools/ctx-execute.ts +21 -0
- package/src/tools/ctx-fetch-and-index.ts +37 -0
- package/src/tools/ctx-index.ts +42 -0
- package/src/tools/ctx-search.ts +23 -0
- package/src/tools/ctx-stats.ts +37 -0
- package/src/tools/register.ts +360 -0
- package/src/tools/vcc-recall.ts +64 -0
- package/src/tui/settings-overlay.ts +290 -0
- package/src/types.ts +269 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ContentStore — FTS5 BM25-based knowledge base with trigram/fuzzy/RRF
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { readFileSync, statSync, existsSync } from "node:fs";
|
|
6
|
+
import { createHash } from "node:crypto";
|
|
7
|
+
import { loadSQLite, applyWALPragmas, withRetry, isSQLiteCorruptionError, defaultDBPath } from "./db-base.js";
|
|
8
|
+
import type { PreparedStatement } from "./db-base.js";
|
|
9
|
+
import { autoChunk } from "./chunking.js";
|
|
10
|
+
import type { IndexResult, SearchResult, StoreStats } from "../types.js";
|
|
11
|
+
|
|
12
|
+
// --- Fuzzy correction ---
|
|
13
|
+
|
|
14
|
+
/** Build a vocabulary from indexed content for fuzzy suggestions */
|
|
15
|
+
function buildVocabulary(rows: Array<{ content: string }>): Set<string> {
|
|
16
|
+
const vocab = new Set<string>();
|
|
17
|
+
for (const row of rows) {
|
|
18
|
+
const words = row.content.toLowerCase().match(/[a-z_]{3,}/g) ?? [];
|
|
19
|
+
for (const w of words) vocab.add(w);
|
|
20
|
+
}
|
|
21
|
+
return vocab;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/** Levenshtein distance for fuzzy matching */
|
|
25
|
+
function levenshtein(a: string, b: string): number {
|
|
26
|
+
const m = a.length;
|
|
27
|
+
const n = b.length;
|
|
28
|
+
const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
|
|
29
|
+
for (let i = 0; i <= m; i++) dp[i][0] = i;
|
|
30
|
+
for (let j = 0; j <= n; j++) dp[0][j] = j;
|
|
31
|
+
for (let i = 1; i <= m; i++) {
|
|
32
|
+
for (let j = 1; j <= n; j++) {
|
|
33
|
+
dp[i][j] = Math.min(
|
|
34
|
+
dp[i - 1][j] + 1,
|
|
35
|
+
dp[i][j - 1] + 1,
|
|
36
|
+
dp[i - 1][j - 1] + (a[i - 1] === b[j - 1] ? 0 : 1),
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return dp[m][n];
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** Find closest vocabulary word for fuzzy correction */
|
|
44
|
+
function fuzzySuggest(word: string, vocab: Set<string>, maxDistance = 2): string | undefined {
|
|
45
|
+
const lower = word.toLowerCase();
|
|
46
|
+
let best: string | undefined;
|
|
47
|
+
let bestDist = maxDistance + 1;
|
|
48
|
+
for (const v of vocab) {
|
|
49
|
+
if (Math.abs(v.length - lower.length) > maxDistance) continue;
|
|
50
|
+
const dist = levenshtein(lower, v);
|
|
51
|
+
if (dist < bestDist && dist <= maxDistance) {
|
|
52
|
+
bestDist = dist;
|
|
53
|
+
best = v;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return best;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Trigram similarity for fuzzy matching */
|
|
60
|
+
function trigramSimilarity(a: string, b: string): number {
|
|
61
|
+
const trigrams = (s: string): Set<string> => {
|
|
62
|
+
const set = new Set<string>();
|
|
63
|
+
const padded = ` ${s} `;
|
|
64
|
+
for (let i = 0; i < padded.length - 2; i++) set.add(padded.slice(i, i + 3));
|
|
65
|
+
return set;
|
|
66
|
+
};
|
|
67
|
+
const aTri = trigrams(a.toLowerCase());
|
|
68
|
+
const bTri = trigrams(b.toLowerCase());
|
|
69
|
+
let intersection = 0;
|
|
70
|
+
for (const t of aTri) if (bTri.has(t)) intersection++;
|
|
71
|
+
return intersection / (aTri.size + bTri.size - intersection);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Trigram search: find rows with high trigram similarity to query */
|
|
75
|
+
function trigramSearch(rows: Array<{ title: string; content: string; content_type: string; label: string; source: string; rank: number }>, query: string, limit: number): SearchResult[] {
|
|
76
|
+
const queryLower = query.toLowerCase();
|
|
77
|
+
const scored = rows
|
|
78
|
+
.map((r) => ({
|
|
79
|
+
...r,
|
|
80
|
+
trigramScore: Math.max(
|
|
81
|
+
trigramSimilarity(queryLower, r.title.toLowerCase()),
|
|
82
|
+
trigramSimilarity(queryLower, r.content.toLowerCase().slice(0, 200)),
|
|
83
|
+
),
|
|
84
|
+
}))
|
|
85
|
+
.filter((r) => r.trigramScore > 0.1)
|
|
86
|
+
.sort((a, b) => b.trigramScore - a.trigramScore)
|
|
87
|
+
.slice(0, limit);
|
|
88
|
+
|
|
89
|
+
return scored.map((r) => ({
|
|
90
|
+
title: r.title,
|
|
91
|
+
content: r.content,
|
|
92
|
+
source: r.source,
|
|
93
|
+
rank: r.trigramScore,
|
|
94
|
+
contentType: r.content_type === "markdown" || r.content_type === "json" ? "prose" as const : "code" as const,
|
|
95
|
+
matchLayer: "trigram" as const,
|
|
96
|
+
}));
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** Reciprocal Rank Fusion: merge results from multiple search layers */
|
|
100
|
+
function rrfMerge(
|
|
101
|
+
porterResults: SearchResult[],
|
|
102
|
+
trigramResults: SearchResult[],
|
|
103
|
+
k = 60,
|
|
104
|
+
): SearchResult[] {
|
|
105
|
+
const scores = new Map<string, { result: SearchResult; score: number }>();
|
|
106
|
+
|
|
107
|
+
for (let i = 0; i < porterResults.length; i++) {
|
|
108
|
+
const key = porterResults[i].title + porterResults[i].source;
|
|
109
|
+
const existing = scores.get(key);
|
|
110
|
+
const rrfScore = 1 / (k + i + 1);
|
|
111
|
+
if (existing) {
|
|
112
|
+
existing.score += rrfScore;
|
|
113
|
+
} else {
|
|
114
|
+
scores.set(key, { result: { ...porterResults[i], matchLayer: "rrf" }, score: rrfScore });
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
for (let i = 0; i < trigramResults.length; i++) {
|
|
119
|
+
const key = trigramResults[i].title + trigramResults[i].source;
|
|
120
|
+
const existing = scores.get(key);
|
|
121
|
+
const rrfScore = 1 / (k + i + 1);
|
|
122
|
+
if (existing) {
|
|
123
|
+
existing.score += rrfScore;
|
|
124
|
+
} else {
|
|
125
|
+
scores.set(key, { result: { ...trigramResults[i], matchLayer: "rrf" }, score: rrfScore });
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return Array.from(scores.values())
|
|
130
|
+
.sort((a, b) => b.score - a.score)
|
|
131
|
+
.map((s) => ({ ...s.result, rank: s.score }));
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const STOPWORDS = new Set([
|
|
135
|
+
"the", "and", "for", "are", "but", "not", "you", "all", "can", "had",
|
|
136
|
+
"her", "was", "one", "our", "out", "has", "his", "how", "its", "may",
|
|
137
|
+
"new", "now", "old", "see", "way", "who", "did", "get", "got", "let",
|
|
138
|
+
"say", "she", "too", "use", "will", "with", "this", "that", "from",
|
|
139
|
+
"they", "been", "have", "many", "some", "them", "than", "each", "make",
|
|
140
|
+
"like", "just", "over", "such", "take", "into", "year", "your", "good",
|
|
141
|
+
"could", "would", "about", "which", "their", "there", "other", "after",
|
|
142
|
+
"should", "through", "also", "more", "most", "only", "very", "when",
|
|
143
|
+
"what", "then", "these", "those", "being", "does", "done", "both",
|
|
144
|
+
"same", "still", "while", "where", "here", "were", "much",
|
|
145
|
+
"update", "updates", "updated", "deps", "dev", "tests", "test",
|
|
146
|
+
"add", "added", "fix", "fixed", "run", "running", "using",
|
|
147
|
+
]);
|
|
148
|
+
|
|
149
|
+
function dedupeTokens(tokens: string[]): string[] {
|
|
150
|
+
const seen = new Set<string>();
|
|
151
|
+
const out: string[] = [];
|
|
152
|
+
for (const t of tokens) {
|
|
153
|
+
const key = t.toLowerCase();
|
|
154
|
+
if (!seen.has(key)) {
|
|
155
|
+
seen.add(key);
|
|
156
|
+
out.push(t);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
return out;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export function sanitizeQuery(query: string, mode: "AND" | "OR" = "AND"): string {
|
|
163
|
+
const words = dedupeTokens(
|
|
164
|
+
query.replace(/['"(){}[\]*:^~]/g, " ").split(/\s+/).filter(
|
|
165
|
+
(w) => w.length > 0 && !["AND", "OR", "NOT", "NEAR"].includes(w.toUpperCase()),
|
|
166
|
+
),
|
|
167
|
+
);
|
|
168
|
+
if (words.length === 0) return '""';
|
|
169
|
+
const meaningful = words.filter((w) => !STOPWORDS.has(w.toLowerCase()));
|
|
170
|
+
const final = meaningful.length > 0 ? meaningful : words;
|
|
171
|
+
return final.map((w) => `"${w}"`).join(mode === "OR" ? " OR " : " ");
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
export class ContentStore {
|
|
175
|
+
private db: any;
|
|
176
|
+
private stmts: Map<string, PreparedStatement> = new Map();
|
|
177
|
+
private dbPath: string;
|
|
178
|
+
private ready = false;
|
|
179
|
+
|
|
180
|
+
constructor(opts?: { dbPath?: string }) {
|
|
181
|
+
this.dbPath = opts?.dbPath ?? defaultDBPath("content");
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
async init(): Promise<void> {
|
|
185
|
+
const { lib } = await loadSQLite();
|
|
186
|
+
const Database = lib.Database ?? lib.default?.Database ?? lib;
|
|
187
|
+
this.db = new Database(this.dbPath);
|
|
188
|
+
applyWALPragmas(this.db);
|
|
189
|
+
this.initSchema();
|
|
190
|
+
this.prepareStatements();
|
|
191
|
+
this.ready = true;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
private initSchema(): void {
|
|
195
|
+
this.db.exec(`
|
|
196
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS content_fts USING fts5(
|
|
197
|
+
title, content, content_type, label, source,
|
|
198
|
+
tokenize='porter unicode61'
|
|
199
|
+
);
|
|
200
|
+
|
|
201
|
+
CREATE TABLE IF NOT EXISTS content_sources (
|
|
202
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
203
|
+
label TEXT NOT NULL UNIQUE,
|
|
204
|
+
source TEXT NOT NULL,
|
|
205
|
+
content_type TEXT NOT NULL DEFAULT 'plain',
|
|
206
|
+
mtime INTEGER,
|
|
207
|
+
sha256 TEXT,
|
|
208
|
+
chunk_count INTEGER NOT NULL DEFAULT 0,
|
|
209
|
+
indexed_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
210
|
+
);
|
|
211
|
+
|
|
212
|
+
CREATE INDEX IF NOT EXISTS idx_sources_label ON content_sources(label);
|
|
213
|
+
`);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
private prepareStatements(): void {
|
|
217
|
+
const p = (key: string, sql: string) => {
|
|
218
|
+
this.stmts.set(key, this.db.prepare(sql) as PreparedStatement);
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
p("insertFTS", `INSERT INTO content_fts (title, content, content_type, label, source) VALUES (?, ?, ?, ?, ?)`);
|
|
222
|
+
p("searchFTS", `SELECT title, content, content_type, label, source, rank FROM content_fts WHERE content_fts MATCH ? ORDER BY rank LIMIT ?`);
|
|
223
|
+
p("searchFTSAll", `SELECT title, content, content_type, label, source, rank FROM content_fts ORDER BY rank LIMIT ?`);
|
|
224
|
+
p("deleteByLabel", `DELETE FROM content_fts WHERE label = ?`);
|
|
225
|
+
p("insertSource", `INSERT INTO content_sources (label, source, content_type, mtime, sha256, chunk_count) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT(label) DO UPDATE SET source=excluded.source, content_type=excluded.content_type, mtime=excluded.mtime, sha256=excluded.sha256, chunk_count=excluded.chunk_count, indexed_at=datetime('now')`);
|
|
226
|
+
p("getSource", `SELECT label, source, content_type, mtime, sha256, chunk_count, indexed_at FROM content_sources WHERE label = ?`);
|
|
227
|
+
p("deleteSource", `DELETE FROM content_sources WHERE label = ?`);
|
|
228
|
+
p("countSources", `SELECT COUNT(*) AS cnt FROM content_sources`);
|
|
229
|
+
p("countFTS", `SELECT COUNT(*) AS cnt FROM content_fts`);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
private stmt(key: string): PreparedStatement {
|
|
233
|
+
return this.stmts.get(key)!;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
async index(label: string, text: string, opts?: { contentType?: "markdown" | "json" | "plain"; source?: string; chunkSize?: number }): Promise<IndexResult> {
|
|
237
|
+
if (!this.ready) await this.init();
|
|
238
|
+
|
|
239
|
+
const contentType = opts?.contentType ?? "plain";
|
|
240
|
+
const source = opts?.source ?? label;
|
|
241
|
+
const chunkSize = opts?.chunkSize ?? 4096;
|
|
242
|
+
|
|
243
|
+
// Check staleness for file-backed sources
|
|
244
|
+
let mtime: number | undefined;
|
|
245
|
+
let sha256: string | undefined;
|
|
246
|
+
if (existsSync(source)) {
|
|
247
|
+
const stat = statSync(source);
|
|
248
|
+
mtime = stat.mtimeMs;
|
|
249
|
+
const existing = this.stmt("getSource").get(label) as { mtime?: number; sha256?: string } | undefined;
|
|
250
|
+
if (existing?.mtime === mtime) {
|
|
251
|
+
return { sourceId: -1, label, totalChunks: existing.sha256 ? parseInt(existing.sha256) : 0, codeChunks: 0 };
|
|
252
|
+
}
|
|
253
|
+
sha256 = createHash("sha256").update(text).digest("hex");
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Delete old chunks
|
|
257
|
+
this.stmt("deleteByLabel").run(label);
|
|
258
|
+
|
|
259
|
+
const chunks = autoChunk(text, contentType, chunkSize);
|
|
260
|
+
let codeChunks = 0;
|
|
261
|
+
|
|
262
|
+
const transaction = this.db.transaction(() => {
|
|
263
|
+
for (const chunk of chunks) {
|
|
264
|
+
this.stmt("insertFTS").run(chunk.title, chunk.content, contentType, label, source);
|
|
265
|
+
if (chunk.hasCode) codeChunks++;
|
|
266
|
+
}
|
|
267
|
+
this.stmt("insertSource").run(label, source, contentType, mtime ?? null, sha256 ?? null, chunks.length);
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
withRetry(() => transaction());
|
|
271
|
+
|
|
272
|
+
return { sourceId: 1, label, totalChunks: chunks.length, codeChunks };
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
async search(query: string, opts?: { limit?: number; offset?: number; mode?: "porter" | "trigram" | "rrf" | "fuzzy" }): Promise<SearchResult[]> {
|
|
276
|
+
if (!this.ready) await this.init();
|
|
277
|
+
|
|
278
|
+
const limit = opts?.limit ?? 10;
|
|
279
|
+
const mode = opts?.mode ?? "rrf";
|
|
280
|
+
const sanitized = sanitizeQuery(query);
|
|
281
|
+
|
|
282
|
+
// Porter stemmer search (FTS5 default)
|
|
283
|
+
const porterRows = this.stmt("searchFTS").all(sanitized, limit * 2) as Array<{
|
|
284
|
+
title: string;
|
|
285
|
+
content: string;
|
|
286
|
+
content_type: string;
|
|
287
|
+
label: string;
|
|
288
|
+
source: string;
|
|
289
|
+
rank: number;
|
|
290
|
+
}>;
|
|
291
|
+
|
|
292
|
+
const porterResults: SearchResult[] = porterRows.map((r) => ({
|
|
293
|
+
title: r.title,
|
|
294
|
+
content: r.content,
|
|
295
|
+
source: r.source,
|
|
296
|
+
rank: r.rank,
|
|
297
|
+
contentType: r.content_type === "markdown" || r.content_type === "json" ? "prose" as const : "code" as const,
|
|
298
|
+
matchLayer: "porter" as const,
|
|
299
|
+
}));
|
|
300
|
+
|
|
301
|
+
if (mode === "porter") return porterResults.slice(0, limit);
|
|
302
|
+
|
|
303
|
+
// Trigram search
|
|
304
|
+
const allRows = this.stmt("searchFTSAll").all(limit * 3) as typeof porterRows;
|
|
305
|
+
const trigramResults = trigramSearch(allRows, query, limit * 2);
|
|
306
|
+
|
|
307
|
+
if (mode === "trigram") return trigramResults.slice(0, limit);
|
|
308
|
+
|
|
309
|
+
// RRF fusion
|
|
310
|
+
const rrfResults = rrfMerge(porterResults, trigramResults);
|
|
311
|
+
|
|
312
|
+
if (mode === "rrf") return rrfResults.slice(0, limit);
|
|
313
|
+
|
|
314
|
+
// Fuzzy mode: apply fuzzy correction to query terms
|
|
315
|
+
const vocab = buildVocabulary(allRows);
|
|
316
|
+
const queryWords = query.toLowerCase().split(/\s+/).filter((w) => w.length > 2);
|
|
317
|
+
const corrections: string[] = [];
|
|
318
|
+
for (const word of queryWords) {
|
|
319
|
+
const suggestion = fuzzySuggest(word, vocab);
|
|
320
|
+
if (suggestion && suggestion !== word) corrections.push(`${word} → ${suggestion}`);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
if (corrections.length > 0 && rrfResults.length < 3) {
|
|
324
|
+
// Re-search with corrected terms
|
|
325
|
+
const correctedQuery = queryWords
|
|
326
|
+
.map((w) => fuzzySuggest(w, vocab) ?? w)
|
|
327
|
+
.join(" ");
|
|
328
|
+
const correctedSanitized = sanitizeQuery(correctedQuery);
|
|
329
|
+
const correctedRows = this.stmt("searchFTS").all(correctedSanitized, limit * 2) as typeof porterRows;
|
|
330
|
+
const correctedResults: SearchResult[] = correctedRows.map((r) => ({
|
|
331
|
+
...r,
|
|
332
|
+
contentType: r.content_type === "markdown" || r.content_type === "json" ? "prose" as const : "code" as const,
|
|
333
|
+
matchLayer: "fuzzy" as const,
|
|
334
|
+
rank: r.rank * 0.9, // slightly lower confidence
|
|
335
|
+
}));
|
|
336
|
+
const merged = rrfMerge(rrfResults, correctedResults);
|
|
337
|
+
return merged.slice(0, limit);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
return rrfResults.slice(0, limit);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
async getStats(): Promise<StoreStats> {
|
|
344
|
+
if (!this.ready) await this.init();
|
|
345
|
+
const sourcesRow = this.stmt("countSources").get() as { cnt: number };
|
|
346
|
+
const chunksRow = this.stmt("countFTS").get() as { cnt: number };
|
|
347
|
+
return {
|
|
348
|
+
sources: sourcesRow.cnt,
|
|
349
|
+
chunks: chunksRow.cnt,
|
|
350
|
+
codeChunks: 0,
|
|
351
|
+
};
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
async purge(): Promise<number> {
|
|
355
|
+
if (!this.ready) await this.init();
|
|
356
|
+
this.db.exec(`DELETE FROM content_fts; DELETE FROM content_sources;`);
|
|
357
|
+
const row = this.stmt("countSources").get() as { cnt: number };
|
|
358
|
+
return row.cnt;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
close(): void {
|
|
362
|
+
try { this.db.close(); } catch { /* ignore */ }
|
|
363
|
+
}
|
|
364
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* compact tool — trigger manual compaction with stats
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { CompactionStats } from "../types.js";
|
|
6
|
+
|
|
7
|
+
export interface CompactResult {
|
|
8
|
+
success: boolean;
|
|
9
|
+
stats?: CompactionStats;
|
|
10
|
+
message: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function compactTool(): CompactResult {
|
|
14
|
+
// The actual compaction is handled by the session_before_compact hook.
|
|
15
|
+
// This tool just signals the intent and returns current stats.
|
|
16
|
+
return {
|
|
17
|
+
success: true,
|
|
18
|
+
message: "Compaction triggered. Stats will be available after next compact event.",
|
|
19
|
+
};
|
|
20
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ctx_batch_execute tool — atomic batch of commands + searches
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { PolyglotExecutor } from "../executor/executor.js";
|
|
6
|
+
import { ContentStore } from "../store/index.js";
|
|
7
|
+
import type { Language, ExecResult, SearchResult } from "../types.js";
|
|
8
|
+
|
|
9
|
+
export interface BatchCommand {
|
|
10
|
+
type: "execute";
|
|
11
|
+
language: Language;
|
|
12
|
+
code: string;
|
|
13
|
+
timeout?: number;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface BatchSearch {
|
|
17
|
+
type: "search";
|
|
18
|
+
query: string;
|
|
19
|
+
limit?: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export type BatchItem = BatchCommand | BatchSearch;
|
|
23
|
+
|
|
24
|
+
export interface BatchResult {
|
|
25
|
+
results: Array<
|
|
26
|
+
| { type: "execute"; result: ExecResult }
|
|
27
|
+
| { type: "search"; results: SearchResult[] }
|
|
28
|
+
>;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export async function ctxBatchExecute(items: BatchItem[]): Promise<BatchResult> {
|
|
32
|
+
const results: BatchResult["results"] = [];
|
|
33
|
+
const executor = new PolyglotExecutor();
|
|
34
|
+
const store = new ContentStore();
|
|
35
|
+
await store.init();
|
|
36
|
+
|
|
37
|
+
for (const item of items) {
|
|
38
|
+
if (item.type === "execute") {
|
|
39
|
+
const result = await executor.execute({
|
|
40
|
+
language: item.language,
|
|
41
|
+
code: item.code,
|
|
42
|
+
timeout: item.timeout ?? 30000,
|
|
43
|
+
});
|
|
44
|
+
results.push({ type: "execute", result });
|
|
45
|
+
} else {
|
|
46
|
+
const searchResults = await store.search(item.query, { limit: item.limit ?? 10 });
|
|
47
|
+
results.push({ type: "search", results: searchResults });
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
store.close();
|
|
52
|
+
return { results };
|
|
53
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ctx_doctor tool — diagnostics checklist
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { existsSync } from "node:fs";
|
|
6
|
+
import { COMPACTOR_CONFIG_PATH } from "../config/manager.js";
|
|
7
|
+
import type { SessionDB } from "../session/db.js";
|
|
8
|
+
import type { ContentStore } from "../store/index.js";
|
|
9
|
+
|
|
10
|
+
export interface DoctorResult {
|
|
11
|
+
healthy: boolean;
|
|
12
|
+
checks: Array<{
|
|
13
|
+
name: string;
|
|
14
|
+
status: "pass" | "fail" | "warn";
|
|
15
|
+
message: string;
|
|
16
|
+
}>;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function ctxDoctor(
|
|
20
|
+
sessionDB: SessionDB,
|
|
21
|
+
contentStore: ContentStore,
|
|
22
|
+
): Promise<DoctorResult> {
|
|
23
|
+
const checks: DoctorResult["checks"] = [];
|
|
24
|
+
|
|
25
|
+
// Config check
|
|
26
|
+
checks.push({
|
|
27
|
+
name: "Config file",
|
|
28
|
+
status: existsSync(COMPACTOR_CONFIG_PATH) ? "pass" : "warn",
|
|
29
|
+
message: existsSync(COMPACTOR_CONFIG_PATH) ? "Config found" : "Using defaults (no config file)",
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
// Session DB check
|
|
33
|
+
try {
|
|
34
|
+
const count = sessionDB.getEventCount("test");
|
|
35
|
+
checks.push({
|
|
36
|
+
name: "Session DB",
|
|
37
|
+
status: "pass",
|
|
38
|
+
message: "SQLite connection OK",
|
|
39
|
+
});
|
|
40
|
+
} catch (err) {
|
|
41
|
+
checks.push({
|
|
42
|
+
name: "Session DB",
|
|
43
|
+
status: "fail",
|
|
44
|
+
message: `Connection failed: ${err}`,
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Content store check
|
|
49
|
+
try {
|
|
50
|
+
const stats = await contentStore.getStats();
|
|
51
|
+
checks.push({
|
|
52
|
+
name: "Content Store",
|
|
53
|
+
status: "pass",
|
|
54
|
+
message: `FTS5 index: ${stats.sources} sources, ${stats.chunks} chunks`,
|
|
55
|
+
});
|
|
56
|
+
} catch (err) {
|
|
57
|
+
checks.push({
|
|
58
|
+
name: "Content Store",
|
|
59
|
+
status: "fail",
|
|
60
|
+
message: `FTS5 error: ${err}`,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Runtime checks
|
|
65
|
+
const runtimes = ["node", "python3", "bash"];
|
|
66
|
+
for (const rt of runtimes) {
|
|
67
|
+
try {
|
|
68
|
+
const { execSync } = await import("node:child_process");
|
|
69
|
+
execSync(`command -v ${rt}`, { stdio: "ignore" });
|
|
70
|
+
checks.push({ name: `Runtime: ${rt}`, status: "pass", message: "Available" });
|
|
71
|
+
} catch {
|
|
72
|
+
checks.push({ name: `Runtime: ${rt}`, status: "warn", message: "Not found" });
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const healthy = checks.every((c) => c.status !== "fail");
|
|
77
|
+
return { healthy, checks };
|
|
78
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ctx_execute_file tool — process file via FILE_CONTENT variable
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { PolyglotExecutor } from "../executor/executor.js";
|
|
6
|
+
import type { Language, ExecResult } from "../types.js";
|
|
7
|
+
import { readFileSync } from "node:fs";
|
|
8
|
+
|
|
9
|
+
export interface CtxExecuteFileInput {
|
|
10
|
+
language: Language;
|
|
11
|
+
path: string;
|
|
12
|
+
timeout?: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export async function ctxExecuteFile(input: CtxExecuteFileInput): Promise<ExecResult> {
|
|
16
|
+
const content = readFileSync(input.path, "utf-8");
|
|
17
|
+
const code = `const FILE_CONTENT = ${JSON.stringify(content)};\n// User script follows:\n`;
|
|
18
|
+
|
|
19
|
+
const executor = new PolyglotExecutor();
|
|
20
|
+
return executor.executeFile({
|
|
21
|
+
language: input.language,
|
|
22
|
+
path: input.path,
|
|
23
|
+
code,
|
|
24
|
+
timeout: input.timeout ?? 30000,
|
|
25
|
+
});
|
|
26
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ctx_execute tool — run code in sandbox, only stdout enters context
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { PolyglotExecutor } from "../executor/executor.js";
|
|
6
|
+
import type { Language, ExecResult } from "../types.js";
|
|
7
|
+
|
|
8
|
+
export interface CtxExecuteInput {
|
|
9
|
+
language: Language;
|
|
10
|
+
code: string;
|
|
11
|
+
timeout?: number;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export async function ctxExecute(input: CtxExecuteInput): Promise<ExecResult> {
|
|
15
|
+
const executor = new PolyglotExecutor();
|
|
16
|
+
return executor.execute({
|
|
17
|
+
language: input.language,
|
|
18
|
+
code: input.code,
|
|
19
|
+
timeout: input.timeout ?? 30000,
|
|
20
|
+
});
|
|
21
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ctx_fetch_and_index tool — fetch URL → markdown → index
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { ContentStore } from "../store/index.js";
|
|
6
|
+
import type { IndexResult } from "../types.js";
|
|
7
|
+
|
|
8
|
+
export interface CtxFetchAndIndexInput {
|
|
9
|
+
url: string;
|
|
10
|
+
label?: string;
|
|
11
|
+
chunkSize?: number;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export async function ctxFetchAndIndex(input: CtxFetchAndIndexInput): Promise<IndexResult> {
|
|
15
|
+
const label = input.label ?? input.url;
|
|
16
|
+
|
|
17
|
+
const response = await fetch(input.url, {
|
|
18
|
+
headers: { "User-Agent": "pi-unipi-compactor/0.1.0" },
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
if (!response.ok) {
|
|
22
|
+
throw new Error(`Fetch failed: ${response.status} ${response.statusText}`);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const text = await response.text();
|
|
26
|
+
const store = new ContentStore();
|
|
27
|
+
await store.init();
|
|
28
|
+
|
|
29
|
+
const result = await store.index(label, text, {
|
|
30
|
+
contentType: "plain",
|
|
31
|
+
source: input.url,
|
|
32
|
+
chunkSize: input.chunkSize,
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
store.close();
|
|
36
|
+
return result;
|
|
37
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ctx_index tool — chunk content → index into FTS5
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { ContentStore } from "../store/index.js";
|
|
6
|
+
import type { IndexResult } from "../types.js";
|
|
7
|
+
import { readFileSync } from "node:fs";
|
|
8
|
+
|
|
9
|
+
export interface CtxIndexInput {
|
|
10
|
+
label: string;
|
|
11
|
+
content?: string;
|
|
12
|
+
filePath?: string;
|
|
13
|
+
contentType?: "markdown" | "json" | "plain";
|
|
14
|
+
chunkSize?: number;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export async function ctxIndex(input: CtxIndexInput): Promise<IndexResult> {
|
|
18
|
+
const store = new ContentStore();
|
|
19
|
+
await store.init();
|
|
20
|
+
|
|
21
|
+
let text: string;
|
|
22
|
+
let source: string;
|
|
23
|
+
|
|
24
|
+
if (input.filePath) {
|
|
25
|
+
text = readFileSync(input.filePath, "utf-8");
|
|
26
|
+
source = input.filePath;
|
|
27
|
+
} else if (input.content) {
|
|
28
|
+
text = input.content;
|
|
29
|
+
source = input.label;
|
|
30
|
+
} else {
|
|
31
|
+
throw new Error("Either content or filePath must be provided");
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const result = await store.index(input.label, text, {
|
|
35
|
+
contentType: input.contentType ?? "plain",
|
|
36
|
+
source,
|
|
37
|
+
chunkSize: input.chunkSize,
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
store.close();
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ctx_search tool — query indexed content
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { ContentStore } from "../store/index.js";
|
|
6
|
+
import type { SearchResult } from "../types.js";
|
|
7
|
+
|
|
8
|
+
export interface CtxSearchInput {
|
|
9
|
+
query: string;
|
|
10
|
+
limit?: number;
|
|
11
|
+
offset?: number;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export async function ctxSearch(input: CtxSearchInput): Promise<SearchResult[]> {
|
|
15
|
+
const store = new ContentStore();
|
|
16
|
+
await store.init();
|
|
17
|
+
const results = await store.search(input.query, {
|
|
18
|
+
limit: input.limit ?? 10,
|
|
19
|
+
offset: input.offset ?? 0,
|
|
20
|
+
});
|
|
21
|
+
store.close();
|
|
22
|
+
return results;
|
|
23
|
+
}
|