@pi-unipi/compactor 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -0
- package/package.json +54 -0
- package/skills/compactor/SKILL.md +74 -0
- package/skills/compactor-doctor/SKILL.md +74 -0
- package/skills/compactor-ops/SKILL.md +65 -0
- package/skills/compactor-stats/SKILL.md +49 -0
- package/skills/compactor-tools/SKILL.md +120 -0
- package/src/commands/index.ts +248 -0
- package/src/compaction/brief.ts +334 -0
- package/src/compaction/build-sections.ts +77 -0
- package/src/compaction/content.ts +47 -0
- package/src/compaction/cut.ts +80 -0
- package/src/compaction/extract/commits.ts +52 -0
- package/src/compaction/extract/files.ts +58 -0
- package/src/compaction/extract/goals.ts +36 -0
- package/src/compaction/extract/preferences.ts +40 -0
- package/src/compaction/filter-noise.ts +46 -0
- package/src/compaction/format.ts +48 -0
- package/src/compaction/hooks.ts +145 -0
- package/src/compaction/merge.ts +113 -0
- package/src/compaction/normalize.ts +68 -0
- package/src/compaction/recall-scope.ts +32 -0
- package/src/compaction/sanitize.ts +12 -0
- package/src/compaction/search-entries.ts +101 -0
- package/src/compaction/sections.ts +15 -0
- package/src/compaction/summarize.ts +29 -0
- package/src/config/manager.ts +89 -0
- package/src/config/presets.ts +83 -0
- package/src/config/schema.ts +55 -0
- package/src/display/bash-display.ts +28 -0
- package/src/display/diff-presentation.ts +20 -0
- package/src/display/diff-renderer.ts +255 -0
- package/src/display/line-width-safety.ts +16 -0
- package/src/display/pending-diff-preview.ts +51 -0
- package/src/display/render-utils.ts +52 -0
- package/src/display/thinking-label.ts +18 -0
- package/src/display/tool-overrides.ts +136 -0
- package/src/display/user-message-box.ts +16 -0
- package/src/executor/executor.ts +242 -0
- package/src/executor/runtime.ts +125 -0
- package/src/index.ts +211 -0
- package/src/info-screen.ts +60 -0
- package/src/security/evaluator.ts +142 -0
- package/src/security/policy.ts +74 -0
- package/src/security/scanner.ts +65 -0
- package/src/session/db.ts +237 -0
- package/src/session/extract.ts +107 -0
- package/src/session/resume-inject.ts +25 -0
- package/src/session/snapshot.ts +326 -0
- package/src/store/chunking.ts +126 -0
- package/src/store/db-base.ts +79 -0
- package/src/store/index.ts +364 -0
- package/src/tools/compact.ts +20 -0
- package/src/tools/ctx-batch-execute.ts +53 -0
- package/src/tools/ctx-doctor.ts +78 -0
- package/src/tools/ctx-execute-file.ts +26 -0
- package/src/tools/ctx-execute.ts +21 -0
- package/src/tools/ctx-fetch-and-index.ts +37 -0
- package/src/tools/ctx-index.ts +42 -0
- package/src/tools/ctx-search.ts +23 -0
- package/src/tools/ctx-stats.ts +37 -0
- package/src/tools/register.ts +360 -0
- package/src/tools/vcc-recall.ts +64 -0
- package/src/tui/settings-overlay.ts +290 -0
- package/src/types.ts +269 -0
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Snapshot builder — converts stored SessionEvents into XML resume snapshot
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { StoredEvent } from "../types.js";
|
|
6
|
+
|
|
7
|
+
export interface BuildSnapshotOpts {
|
|
8
|
+
compactCount?: number;
|
|
9
|
+
searchTool?: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
const MAX_ACTIVE_FILES = 10;
|
|
13
|
+
|
|
14
|
+
function escapeXML(text: string): string {
|
|
15
|
+
return text
|
|
16
|
+
.replace(/&/g, "&")
|
|
17
|
+
.replace(/</g, "<")
|
|
18
|
+
.replace(/>/g, ">")
|
|
19
|
+
.replace(/"/g, """)
|
|
20
|
+
.replace(/'/g, "'");
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function buildQueries(items: string[], maxQueries = 4): string[] {
|
|
24
|
+
const unique = [...new Set(items.filter((s) => s.length > 0))];
|
|
25
|
+
const selected = unique.slice(0, maxQueries);
|
|
26
|
+
return selected.map((s) => (s.length > 80 ? s.slice(0, 80) : s));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function toolCall(toolName: string, queries: string[]): string {
|
|
30
|
+
if (queries.length === 0) return "";
|
|
31
|
+
const escaped = queries.map((q) => `"${escapeXML(q)}"`).join(", ");
|
|
32
|
+
return `\n For full details:\n ${escapeXML(toolName)}(\n queries: [${escaped}],\n source: "session-events"\n )`;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function buildFilesSection(fileEvents: StoredEvent[], searchTool: string): string {
|
|
36
|
+
if (fileEvents.length === 0) return "";
|
|
37
|
+
const fileMap = new Map<string, { ops: Map<string, number> }>();
|
|
38
|
+
for (const ev of fileEvents) {
|
|
39
|
+
const path = ev.data;
|
|
40
|
+
let entry = fileMap.get(path);
|
|
41
|
+
if (!entry) {
|
|
42
|
+
entry = { ops: new Map() };
|
|
43
|
+
fileMap.set(path, entry);
|
|
44
|
+
}
|
|
45
|
+
let op: string;
|
|
46
|
+
if (ev.type === "file_write") op = "write";
|
|
47
|
+
else if (ev.type === "file_edit") op = "edit";
|
|
48
|
+
else if (ev.type === "file_read") op = "read";
|
|
49
|
+
else op = ev.type;
|
|
50
|
+
entry.ops.set(op, (entry.ops.get(op) ?? 0) + 1);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const entries = Array.from(fileMap.entries());
|
|
54
|
+
const limited = entries.slice(-MAX_ACTIVE_FILES);
|
|
55
|
+
const summaryLines: string[] = [];
|
|
56
|
+
const queryTerms: string[] = [];
|
|
57
|
+
|
|
58
|
+
for (const [path, { ops }] of limited) {
|
|
59
|
+
const opsStr = Array.from(ops.entries()).map(([k, v]) => `${k}×${v}`).join(", ");
|
|
60
|
+
const fileName = path.split("/").pop() ?? path;
|
|
61
|
+
summaryLines.push(` ${escapeXML(fileName)} (${escapeXML(opsStr)})`);
|
|
62
|
+
queryTerms.push(`${fileName} ${Array.from(ops.keys()).join(" ")}`);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const queries = buildQueries(queryTerms);
|
|
66
|
+
return [
|
|
67
|
+
` <files count="${fileMap.size}">`,
|
|
68
|
+
...summaryLines,
|
|
69
|
+
toolCall(searchTool, queries),
|
|
70
|
+
` </files>`,
|
|
71
|
+
].join("\n");
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function buildErrorsSection(errorEvents: StoredEvent[], searchTool: string): string {
|
|
75
|
+
if (errorEvents.length === 0) return "";
|
|
76
|
+
const summaryLines = errorEvents.map((ev) => ` ${escapeXML(ev.data)}`);
|
|
77
|
+
const queries = buildQueries(errorEvents.map((ev) => ev.data));
|
|
78
|
+
return [
|
|
79
|
+
` <errors count="${errorEvents.length}">`,
|
|
80
|
+
...summaryLines,
|
|
81
|
+
toolCall(searchTool, queries),
|
|
82
|
+
` </errors>`,
|
|
83
|
+
].join("\n");
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function buildDecisionsSection(decisionEvents: StoredEvent[], searchTool: string): string {
|
|
87
|
+
if (decisionEvents.length === 0) return "";
|
|
88
|
+
const seen = new Set<string>();
|
|
89
|
+
const summaryLines: string[] = [];
|
|
90
|
+
const queryTerms: string[] = [];
|
|
91
|
+
for (const ev of decisionEvents) {
|
|
92
|
+
if (seen.has(ev.data)) continue;
|
|
93
|
+
seen.add(ev.data);
|
|
94
|
+
summaryLines.push(` ${escapeXML(ev.data)}`);
|
|
95
|
+
queryTerms.push(ev.data);
|
|
96
|
+
}
|
|
97
|
+
if (summaryLines.length === 0) return "";
|
|
98
|
+
const queries = buildQueries(queryTerms);
|
|
99
|
+
return [
|
|
100
|
+
` <decisions count="${summaryLines.length}">`,
|
|
101
|
+
...summaryLines,
|
|
102
|
+
toolCall(searchTool, queries),
|
|
103
|
+
` </decisions>`,
|
|
104
|
+
].join("\n");
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function buildRulesSection(ruleEvents: StoredEvent[], searchTool: string): string {
|
|
108
|
+
if (ruleEvents.length === 0) return "";
|
|
109
|
+
const seen = new Set<string>();
|
|
110
|
+
const summaryLines: string[] = [];
|
|
111
|
+
const queryTerms: string[] = [];
|
|
112
|
+
for (const ev of ruleEvents) {
|
|
113
|
+
if (seen.has(ev.data)) continue;
|
|
114
|
+
seen.add(ev.data);
|
|
115
|
+
summaryLines.push(` ${escapeXML(ev.data)}`);
|
|
116
|
+
queryTerms.push(ev.data);
|
|
117
|
+
}
|
|
118
|
+
if (summaryLines.length === 0) return "";
|
|
119
|
+
const queries = buildQueries(queryTerms);
|
|
120
|
+
return [
|
|
121
|
+
` <rules count="${summaryLines.length}">`,
|
|
122
|
+
...summaryLines,
|
|
123
|
+
toolCall(searchTool, queries),
|
|
124
|
+
` </rules>`,
|
|
125
|
+
].join("\n");
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function buildGitSection(gitEvents: StoredEvent[], searchTool: string): string {
|
|
129
|
+
if (gitEvents.length === 0) return "";
|
|
130
|
+
const summaryLines = gitEvents.map((ev) => ` ${escapeXML(ev.data)}`);
|
|
131
|
+
const queries = buildQueries(gitEvents.map((ev) => ev.data));
|
|
132
|
+
return [
|
|
133
|
+
` <git count="${gitEvents.length}">`,
|
|
134
|
+
...summaryLines,
|
|
135
|
+
toolCall(searchTool, queries),
|
|
136
|
+
` </git>`,
|
|
137
|
+
].join("\n");
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export function renderTaskState(taskEvents: StoredEvent[]): string {
|
|
141
|
+
if (taskEvents.length === 0) return "";
|
|
142
|
+
const creates: string[] = [];
|
|
143
|
+
const updates: Record<string, string> = {};
|
|
144
|
+
for (const ev of taskEvents) {
|
|
145
|
+
try {
|
|
146
|
+
const parsed = JSON.parse(ev.data) as Record<string, unknown>;
|
|
147
|
+
if (typeof parsed.subject === "string") creates.push(parsed.subject);
|
|
148
|
+
else if (typeof parsed.taskId === "string" && typeof parsed.status === "string") {
|
|
149
|
+
updates[parsed.taskId] = parsed.status;
|
|
150
|
+
}
|
|
151
|
+
} catch { /* not JSON */ }
|
|
152
|
+
}
|
|
153
|
+
if (creates.length === 0) return "";
|
|
154
|
+
const DONE = new Set(["completed", "deleted", "failed"]);
|
|
155
|
+
const sortedIds = Object.keys(updates).sort((a, b) => Number(a) - Number(b));
|
|
156
|
+
const pending: string[] = [];
|
|
157
|
+
for (let i = 0; i < creates.length; i++) {
|
|
158
|
+
const matchedId = sortedIds[i];
|
|
159
|
+
const status = matchedId ? (updates[matchedId] ?? "pending") : "pending";
|
|
160
|
+
if (!DONE.has(status)) pending.push(creates[i]);
|
|
161
|
+
}
|
|
162
|
+
if (pending.length === 0) return "";
|
|
163
|
+
return pending.map((task) => ` [pending] ${escapeXML(task)}`).join("\n");
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function buildTaskSection(taskEvents: StoredEvent[], searchTool: string): string {
|
|
167
|
+
const taskContent = renderTaskState(taskEvents);
|
|
168
|
+
if (!taskContent) return "";
|
|
169
|
+
const queryTerms: string[] = [];
|
|
170
|
+
for (const ev of taskEvents) {
|
|
171
|
+
try {
|
|
172
|
+
const parsed = JSON.parse(ev.data) as Record<string, unknown>;
|
|
173
|
+
if (typeof parsed.subject === "string") queryTerms.push(parsed.subject);
|
|
174
|
+
} catch { /* not JSON */ }
|
|
175
|
+
}
|
|
176
|
+
const queries = buildQueries(queryTerms);
|
|
177
|
+
const pendingCount = taskContent.split("\n").length;
|
|
178
|
+
return [
|
|
179
|
+
` <task_state count="${pendingCount}">`,
|
|
180
|
+
taskContent,
|
|
181
|
+
toolCall(searchTool, queries),
|
|
182
|
+
` </task_state>`,
|
|
183
|
+
].join("\n");
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function buildEnvironmentSection(cwdEvents: StoredEvent[], envEvents: StoredEvent[], searchTool: string): string {
|
|
187
|
+
if (cwdEvents.length === 0 && envEvents.length === 0) return "";
|
|
188
|
+
const summaryLines: string[] = [];
|
|
189
|
+
const queryTerms: string[] = [];
|
|
190
|
+
if (cwdEvents.length > 0) {
|
|
191
|
+
summaryLines.push(` cwd: ${escapeXML(cwdEvents[cwdEvents.length - 1].data)}`);
|
|
192
|
+
queryTerms.push("working directory");
|
|
193
|
+
}
|
|
194
|
+
for (const env of envEvents) {
|
|
195
|
+
summaryLines.push(` ${escapeXML(env.data)}`);
|
|
196
|
+
queryTerms.push(env.data);
|
|
197
|
+
}
|
|
198
|
+
const queries = buildQueries(queryTerms);
|
|
199
|
+
return [
|
|
200
|
+
` <environment>`,
|
|
201
|
+
...summaryLines,
|
|
202
|
+
toolCall(searchTool, queries),
|
|
203
|
+
` </environment>`,
|
|
204
|
+
].join("\n");
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function buildSubagentsSection(subagentEvents: StoredEvent[], searchTool: string): string {
|
|
208
|
+
if (subagentEvents.length === 0) return "";
|
|
209
|
+
const summaryLines: string[] = [];
|
|
210
|
+
const queryTerms: string[] = [];
|
|
211
|
+
for (const ev of subagentEvents) {
|
|
212
|
+
const status = ev.type === "subagent_completed" ? "completed"
|
|
213
|
+
: ev.type === "subagent_launched" ? "launched"
|
|
214
|
+
: "unknown";
|
|
215
|
+
summaryLines.push(` [${status}] ${escapeXML(ev.data)}`);
|
|
216
|
+
queryTerms.push(`subagent ${ev.data}`);
|
|
217
|
+
}
|
|
218
|
+
const queries = buildQueries(queryTerms);
|
|
219
|
+
return [
|
|
220
|
+
` <subagents count="${subagentEvents.length}">`,
|
|
221
|
+
...summaryLines,
|
|
222
|
+
toolCall(searchTool, queries),
|
|
223
|
+
` </subagents>`,
|
|
224
|
+
].join("\n");
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
function buildSkillsSection(skillEvents: StoredEvent[], searchTool: string): string {
|
|
228
|
+
if (skillEvents.length === 0) return "";
|
|
229
|
+
const skillCounts = new Map<string, number>();
|
|
230
|
+
for (const ev of skillEvents) {
|
|
231
|
+
const name = ev.data.split(":")[0].trim();
|
|
232
|
+
skillCounts.set(name, (skillCounts.get(name) ?? 0) + 1);
|
|
233
|
+
}
|
|
234
|
+
const summaryLines: string[] = [];
|
|
235
|
+
const queryTerms: string[] = [];
|
|
236
|
+
for (const [name, count] of skillCounts) {
|
|
237
|
+
summaryLines.push(` ${escapeXML(name)} (${count}×)`);
|
|
238
|
+
queryTerms.push(`skill ${name} invocation`);
|
|
239
|
+
}
|
|
240
|
+
const queries = buildQueries(queryTerms);
|
|
241
|
+
return [
|
|
242
|
+
` <skills count="${skillEvents.length}">`,
|
|
243
|
+
...summaryLines,
|
|
244
|
+
toolCall(searchTool, queries),
|
|
245
|
+
` </skills>`,
|
|
246
|
+
].join("\n");
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function buildIntentSection(intentEvents: StoredEvent[]): string {
|
|
250
|
+
if (intentEvents.length === 0) return "";
|
|
251
|
+
return ` <intent mode="${escapeXML(intentEvents[intentEvents.length - 1].data)}"/>`;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
export function buildResumeSnapshot(
|
|
255
|
+
events: StoredEvent[],
|
|
256
|
+
opts?: BuildSnapshotOpts,
|
|
257
|
+
): string {
|
|
258
|
+
const compactCount = opts?.compactCount ?? 1;
|
|
259
|
+
const searchTool = opts?.searchTool ?? "ctx_search";
|
|
260
|
+
const now = new Date().toISOString();
|
|
261
|
+
|
|
262
|
+
const fileEvents: StoredEvent[] = [];
|
|
263
|
+
const taskEvents: StoredEvent[] = [];
|
|
264
|
+
const ruleEvents: StoredEvent[] = [];
|
|
265
|
+
const decisionEvents: StoredEvent[] = [];
|
|
266
|
+
const cwdEvents: StoredEvent[] = [];
|
|
267
|
+
const errorEvents: StoredEvent[] = [];
|
|
268
|
+
const envEvents: StoredEvent[] = [];
|
|
269
|
+
const gitEvents: StoredEvent[] = [];
|
|
270
|
+
const subagentEvents: StoredEvent[] = [];
|
|
271
|
+
const intentEvents: StoredEvent[] = [];
|
|
272
|
+
const skillEvents: StoredEvent[] = [];
|
|
273
|
+
|
|
274
|
+
for (const ev of events) {
|
|
275
|
+
switch (ev.category) {
|
|
276
|
+
case "file": fileEvents.push(ev); break;
|
|
277
|
+
case "task": taskEvents.push(ev); break;
|
|
278
|
+
case "rule": ruleEvents.push(ev); break;
|
|
279
|
+
case "decision": decisionEvents.push(ev); break;
|
|
280
|
+
case "cwd": cwdEvents.push(ev); break;
|
|
281
|
+
case "error": errorEvents.push(ev); break;
|
|
282
|
+
case "env": envEvents.push(ev); break;
|
|
283
|
+
case "git": gitEvents.push(ev); break;
|
|
284
|
+
case "subagent": subagentEvents.push(ev); break;
|
|
285
|
+
case "intent": intentEvents.push(ev); break;
|
|
286
|
+
case "skill": skillEvents.push(ev); break;
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
const sections: string[] = [];
|
|
291
|
+
sections.push(` <how_to_search>
|
|
292
|
+
Each section below contains a summary of prior work.
|
|
293
|
+
For FULL DETAILS, run the exact tool call shown under each section.
|
|
294
|
+
Do NOT ask the user to re-explain prior work. Search first.
|
|
295
|
+
Do NOT invent your own queries — use the ones provided.
|
|
296
|
+
</how_to_search>`);
|
|
297
|
+
|
|
298
|
+
const files = buildFilesSection(fileEvents, searchTool);
|
|
299
|
+
if (files) sections.push(files);
|
|
300
|
+
const errors = buildErrorsSection(errorEvents, searchTool);
|
|
301
|
+
if (errors) sections.push(errors);
|
|
302
|
+
const decisions = buildDecisionsSection(decisionEvents, searchTool);
|
|
303
|
+
if (decisions) sections.push(decisions);
|
|
304
|
+
const rules = buildRulesSection(ruleEvents, searchTool);
|
|
305
|
+
if (rules) sections.push(rules);
|
|
306
|
+
const git = buildGitSection(gitEvents, searchTool);
|
|
307
|
+
if (git) sections.push(git);
|
|
308
|
+
const tasks = buildTaskSection(taskEvents, searchTool);
|
|
309
|
+
if (tasks) sections.push(tasks);
|
|
310
|
+
const environment = buildEnvironmentSection(cwdEvents, envEvents, searchTool);
|
|
311
|
+
if (environment) sections.push(environment);
|
|
312
|
+
const subagents = buildSubagentsSection(subagentEvents, searchTool);
|
|
313
|
+
if (subagents) sections.push(subagents);
|
|
314
|
+
const skills = buildSkillsSection(skillEvents, searchTool);
|
|
315
|
+
if (skills) sections.push(skills);
|
|
316
|
+
const intent = buildIntentSection(intentEvents);
|
|
317
|
+
if (intent) sections.push(intent);
|
|
318
|
+
|
|
319
|
+
const header = `<session_resume events="${events.length}" compact_count="${compactCount}" generated_at="${now}">`;
|
|
320
|
+
const footer = `</session_resume>`;
|
|
321
|
+
const body = sections.join("\n\n");
|
|
322
|
+
if (body) {
|
|
323
|
+
return `${header}\n\n${body}\n\n${footer}`;
|
|
324
|
+
}
|
|
325
|
+
return `${header}\n${footer}`;
|
|
326
|
+
}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content chunking — markdown by headings, JSON recursive, plain text
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export interface Chunk {
|
|
6
|
+
title: string;
|
|
7
|
+
content: string;
|
|
8
|
+
hasCode: boolean;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function chunkMarkdown(text: string, maxChunkSize: number = 4096): Chunk[] {
|
|
12
|
+
const chunks: Chunk[] = [];
|
|
13
|
+
const lines = text.split("\n");
|
|
14
|
+
let currentTitle = "";
|
|
15
|
+
let currentLines: string[] = [];
|
|
16
|
+
let inCodeBlock = false;
|
|
17
|
+
|
|
18
|
+
const flush = () => {
|
|
19
|
+
if (currentLines.length === 0) return;
|
|
20
|
+
const content = currentLines.join("\n");
|
|
21
|
+
chunks.push({
|
|
22
|
+
title: currentTitle || "Untitled",
|
|
23
|
+
content,
|
|
24
|
+
hasCode: content.includes("```"),
|
|
25
|
+
});
|
|
26
|
+
currentLines = [];
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
for (const line of lines) {
|
|
30
|
+
if (line.startsWith("```")) inCodeBlock = !inCodeBlock;
|
|
31
|
+
|
|
32
|
+
if (!inCodeBlock && /^#{1,6}\s/.test(line)) {
|
|
33
|
+
flush();
|
|
34
|
+
currentTitle = line.replace(/^#{1,6}\s*/, "").trim();
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
currentLines.push(line);
|
|
39
|
+
|
|
40
|
+
if (currentLines.join("\n").length > maxChunkSize && !inCodeBlock) {
|
|
41
|
+
flush();
|
|
42
|
+
currentTitle = currentTitle ? `${currentTitle} (continued)` : "Continued";
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
flush();
|
|
47
|
+
return chunks;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function chunkJSON(text: string, maxChunkSize: number = 4096): Chunk[] {
|
|
51
|
+
try {
|
|
52
|
+
const obj = JSON.parse(text);
|
|
53
|
+
return chunkObject(obj, "root", maxChunkSize);
|
|
54
|
+
} catch {
|
|
55
|
+
return chunkPlainText(text, maxChunkSize);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function chunkObject(obj: any, path: string, maxChunkSize: number): Chunk[] {
|
|
60
|
+
const chunks: Chunk[] = [];
|
|
61
|
+
|
|
62
|
+
if (typeof obj !== "object" || obj === null) {
|
|
63
|
+
const content = String(obj);
|
|
64
|
+
if (content.length > maxChunkSize) {
|
|
65
|
+
for (let i = 0; i < content.length; i += maxChunkSize) {
|
|
66
|
+
chunks.push({
|
|
67
|
+
title: `${path} [${i}-${i + maxChunkSize}]`,
|
|
68
|
+
content: content.slice(i, i + maxChunkSize),
|
|
69
|
+
hasCode: false,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
} else {
|
|
73
|
+
chunks.push({ title: path, content, hasCode: false });
|
|
74
|
+
}
|
|
75
|
+
return chunks;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (Array.isArray(obj)) {
|
|
79
|
+
for (let i = 0; i < obj.length; i++) {
|
|
80
|
+
const sub = chunkObject(obj[i], `${path}[${i}]`, maxChunkSize);
|
|
81
|
+
chunks.push(...sub);
|
|
82
|
+
}
|
|
83
|
+
return chunks;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
for (const [key, val] of Object.entries(obj)) {
|
|
87
|
+
const sub = chunkObject(val, `${path}.${key}`, maxChunkSize);
|
|
88
|
+
chunks.push(...sub);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return chunks;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export function chunkPlainText(text: string, maxChunkSize: number = 4096): Chunk[] {
|
|
95
|
+
const chunks: Chunk[] = [];
|
|
96
|
+
const paragraphs = text.split(/\n\s*\n/);
|
|
97
|
+
let current: string[] = [];
|
|
98
|
+
|
|
99
|
+
const flush = () => {
|
|
100
|
+
if (current.length === 0) return;
|
|
101
|
+
chunks.push({
|
|
102
|
+
title: "Text",
|
|
103
|
+
content: current.join("\n\n"),
|
|
104
|
+
hasCode: false,
|
|
105
|
+
});
|
|
106
|
+
current = [];
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
for (const para of paragraphs) {
|
|
110
|
+
if ((current.join("\n\n").length + para.length) > maxChunkSize) {
|
|
111
|
+
flush();
|
|
112
|
+
}
|
|
113
|
+
current.push(para);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
flush();
|
|
117
|
+
return chunks;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export function autoChunk(text: string, contentType: "markdown" | "json" | "plain", maxChunkSize?: number): Chunk[] {
|
|
121
|
+
switch (contentType) {
|
|
122
|
+
case "markdown": return chunkMarkdown(text, maxChunkSize);
|
|
123
|
+
case "json": return chunkJSON(text, maxChunkSize);
|
|
124
|
+
default: return chunkPlainText(text, maxChunkSize);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SQLite backend abstraction with auto-detection
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { existsSync, mkdirSync } from "node:fs";
|
|
6
|
+
import { homedir } from "node:os";
|
|
7
|
+
import { dirname, join } from "node:path";
|
|
8
|
+
|
|
9
|
+
export function defaultDBPath(name: string): string {
|
|
10
|
+
const path = join(homedir(), ".unipi", "db", "compactor", `${name}.db`);
|
|
11
|
+
const dir = dirname(path);
|
|
12
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
13
|
+
return path;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
let sqliteLib: any = null;
|
|
17
|
+
let sqliteFlavor: "bun" | "node" | "better-sqlite3" | null = null;
|
|
18
|
+
|
|
19
|
+
export async function loadSQLite() {
|
|
20
|
+
if (sqliteLib) return { lib: sqliteLib, flavor: sqliteFlavor! };
|
|
21
|
+
|
|
22
|
+
try {
|
|
23
|
+
sqliteLib = await import("bun:sqlite" as any);
|
|
24
|
+
sqliteFlavor = "bun";
|
|
25
|
+
return { lib: sqliteLib, flavor: sqliteFlavor };
|
|
26
|
+
} catch {
|
|
27
|
+
try {
|
|
28
|
+
sqliteLib = await import("node:sqlite" as any);
|
|
29
|
+
sqliteFlavor = "node";
|
|
30
|
+
return { lib: sqliteLib, flavor: sqliteFlavor };
|
|
31
|
+
} catch {
|
|
32
|
+
try {
|
|
33
|
+
sqliteLib = await import("better-sqlite3");
|
|
34
|
+
sqliteFlavor = "better-sqlite3";
|
|
35
|
+
return { lib: sqliteLib, flavor: sqliteFlavor };
|
|
36
|
+
} catch {
|
|
37
|
+
sqliteLib = {};
|
|
38
|
+
sqliteFlavor = "better-sqlite3";
|
|
39
|
+
return { lib: sqliteLib, flavor: sqliteFlavor };
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function applyWALPragmas(db: any): void {
|
|
46
|
+
db.exec("PRAGMA journal_mode = WAL;");
|
|
47
|
+
db.exec("PRAGMA synchronous = NORMAL;");
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function withRetry<T>(fn: () => T, maxRetries = 3): T {
|
|
51
|
+
let lastErr: any;
|
|
52
|
+
for (let i = 0; i < maxRetries; i++) {
|
|
53
|
+
try {
|
|
54
|
+
return fn();
|
|
55
|
+
} catch (err: any) {
|
|
56
|
+
lastErr = err;
|
|
57
|
+
if (err?.code === "SQLITE_BUSY" && i < maxRetries - 1) {
|
|
58
|
+
const delay = Math.pow(2, i) * 10 + Math.random() * 10;
|
|
59
|
+
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, Math.floor(delay));
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
throw err;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
throw lastErr;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export function isSQLiteCorruptionError(err: any): boolean {
|
|
69
|
+
const msg = String(err?.message ?? "").toLowerCase();
|
|
70
|
+
return msg.includes("database disk image is malformed") ||
|
|
71
|
+
msg.includes("database is locked") ||
|
|
72
|
+
msg.includes("file is not a database");
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export interface PreparedStatement {
|
|
76
|
+
get(...args: any[]): any;
|
|
77
|
+
all(...args: any[]): any[];
|
|
78
|
+
run(...args: any[]): { changes: number };
|
|
79
|
+
}
|