ultimate-pi 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/PACKAGING.md +3 -2
- package/.pi/extensions/lib/harness-vcc-settings.ts +50 -0
- package/.pi/extensions/ultimate-pi-vcc.ts +17 -0
- package/.pi/harness/docs/adrs/0030-inhouse-vcc-compaction.md +40 -0
- package/.pi/harness/docs/adrs/README.md +1 -0
- package/.pi/prompts/harness-setup.md +2 -2
- package/.pi/scripts/vendor-pi-vcc-settings.stub.ts +8 -0
- package/.pi/scripts/vendor-sync-pi-vcc.sh +40 -0
- package/.pi/settings.example.json +1 -6
- package/CHANGELOG.md +9 -7
- package/THIRD_PARTY_NOTICES.md +8 -22
- package/package.json +7 -6
- package/vendor/pi-vcc/README.md +215 -0
- package/vendor/pi-vcc/UPSTREAM_PIN.md +12 -0
- package/vendor/pi-vcc/demo.gif +0 -0
- package/vendor/pi-vcc/index.ts +12 -0
- package/vendor/pi-vcc/package.json +26 -0
- package/vendor/pi-vcc/scripts/audit-sessions.ts +88 -0
- package/vendor/pi-vcc/scripts/benchmark-real-sessions.ts +25 -0
- package/vendor/pi-vcc/scripts/compare-before-after.ts +36 -0
- package/vendor/pi-vcc/scripts/dump-branch-output.ts +20 -0
- package/vendor/pi-vcc/src/commands/pi-vcc.ts +36 -0
- package/vendor/pi-vcc/src/commands/vcc-recall.ts +65 -0
- package/vendor/pi-vcc/src/core/brief.ts +381 -0
- package/vendor/pi-vcc/src/core/build-sections.ts +79 -0
- package/vendor/pi-vcc/src/core/content.ts +60 -0
- package/vendor/pi-vcc/src/core/filter-noise.ts +42 -0
- package/vendor/pi-vcc/src/core/format-recall.ts +27 -0
- package/vendor/pi-vcc/src/core/format.ts +49 -0
- package/vendor/pi-vcc/src/core/lineage.ts +26 -0
- package/vendor/pi-vcc/src/core/load-messages.ts +41 -0
- package/vendor/pi-vcc/src/core/normalize.ts +66 -0
- package/vendor/pi-vcc/src/core/recall-scope.ts +14 -0
- package/vendor/pi-vcc/src/core/render-entries.ts +55 -0
- package/vendor/pi-vcc/src/core/report.ts +237 -0
- package/vendor/pi-vcc/src/core/sanitize.ts +5 -0
- package/vendor/pi-vcc/src/core/search-entries.ts +221 -0
- package/vendor/pi-vcc/src/core/settings.ts +8 -0
- package/vendor/pi-vcc/src/core/skill-collapse.ts +35 -0
- package/vendor/pi-vcc/src/core/summarize.ts +157 -0
- package/vendor/pi-vcc/src/core/tool-args.ts +14 -0
- package/vendor/pi-vcc/src/details.ts +7 -0
- package/vendor/pi-vcc/src/extract/commits.ts +69 -0
- package/vendor/pi-vcc/src/extract/files.ts +80 -0
- package/vendor/pi-vcc/src/extract/goals.ts +79 -0
- package/vendor/pi-vcc/src/extract/preferences.ts +55 -0
- package/vendor/pi-vcc/src/hooks/before-compact.ts +314 -0
- package/vendor/pi-vcc/src/sections.ts +12 -0
- package/vendor/pi-vcc/src/tools/recall.ts +109 -0
- package/vendor/pi-vcc/src/types.ts +14 -0
- package/vendor/pi-vcc/tests/before-compact-hook.test.ts +204 -0
- package/vendor/pi-vcc/tests/before-compact.test.ts +145 -0
- package/vendor/pi-vcc/tests/brief.test.ts +206 -0
- package/vendor/pi-vcc/tests/build-sections.test.ts +59 -0
- package/vendor/pi-vcc/tests/compile.test.ts +80 -0
- package/vendor/pi-vcc/tests/content.test.ts +31 -0
- package/vendor/pi-vcc/tests/extract-goals.test.ts +86 -0
- package/vendor/pi-vcc/tests/extract-preferences.test.ts +30 -0
- package/vendor/pi-vcc/tests/filter-noise.test.ts +61 -0
- package/vendor/pi-vcc/tests/fixtures.ts +61 -0
- package/vendor/pi-vcc/tests/format-recall.test.ts +30 -0
- package/vendor/pi-vcc/tests/format.test.ts +62 -0
- package/vendor/pi-vcc/tests/lineage.test.ts +33 -0
- package/vendor/pi-vcc/tests/load-messages.test.ts +51 -0
- package/vendor/pi-vcc/tests/normalize.test.ts +97 -0
- package/vendor/pi-vcc/tests/real-sessions.test.ts +38 -0
- package/vendor/pi-vcc/tests/recall-expand.test.ts +15 -0
- package/vendor/pi-vcc/tests/recall-scope.test.ts +32 -0
- package/vendor/pi-vcc/tests/recall-tool-scope.test.ts +67 -0
- package/vendor/pi-vcc/tests/render-entries.test.ts +62 -0
- package/vendor/pi-vcc/tests/report.test.ts +44 -0
- package/vendor/pi-vcc/tests/sanitize.test.ts +24 -0
- package/vendor/pi-vcc/tests/search-entries.test.ts +144 -0
- package/vendor/pi-vcc/tests/support/load-session.ts +23 -0
- package/vendor/pi-vcc/tests/support/real-sessions.ts +51 -0
- package/.pi/pi-vcc-config.json +0 -4
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import type { Message } from "@mariozechner/pi-ai";
|
|
2
|
+
import { buildSections } from "./build-sections";
|
|
3
|
+
import { clip } from "./content";
|
|
4
|
+
import { normalize } from "./normalize";
|
|
5
|
+
import { renderMessage } from "./render-entries";
|
|
6
|
+
import { searchEntries } from "./search-entries";
|
|
7
|
+
import { type CompileInput, compile } from "./summarize";
|
|
8
|
+
|
|
9
|
+
const SECTION_HEADERS = ["Session Goal", "Files And Changes", "Commits", "Outstanding Context"];
|
|
10
|
+
|
|
11
|
+
interface RoleCounts {
|
|
12
|
+
user: number;
|
|
13
|
+
assistant: number;
|
|
14
|
+
toolResult: number;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
interface BlockCounts {
|
|
18
|
+
user: number;
|
|
19
|
+
assistant: number;
|
|
20
|
+
toolCalls: number;
|
|
21
|
+
toolResults: number;
|
|
22
|
+
thinking: number;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface RecallProbe {
|
|
26
|
+
label: string;
|
|
27
|
+
sourceText: string;
|
|
28
|
+
query: string;
|
|
29
|
+
summaryMentioned: boolean;
|
|
30
|
+
recallHits: number;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface CompactReport {
|
|
34
|
+
summary: string;
|
|
35
|
+
before: {
|
|
36
|
+
messageCount: number;
|
|
37
|
+
roleCounts: RoleCounts;
|
|
38
|
+
blockCounts: BlockCounts;
|
|
39
|
+
inputChars: number;
|
|
40
|
+
estimatedTokens: number;
|
|
41
|
+
topFiles: string[];
|
|
42
|
+
preview: string;
|
|
43
|
+
};
|
|
44
|
+
after: {
|
|
45
|
+
summaryLength: number;
|
|
46
|
+
estimatedTokens: number;
|
|
47
|
+
sectionCount: number;
|
|
48
|
+
summaryPreview: string;
|
|
49
|
+
goalsCount: number;
|
|
50
|
+
blockersCount: number;
|
|
51
|
+
briefTranscriptLines: number;
|
|
52
|
+
};
|
|
53
|
+
compression: {
|
|
54
|
+
charsBefore: number;
|
|
55
|
+
charsAfter: number;
|
|
56
|
+
ratio: number;
|
|
57
|
+
messagesBefore: number;
|
|
58
|
+
};
|
|
59
|
+
recall: {
|
|
60
|
+
probes: RecallProbe[];
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const estimateTokensFromChars = (chars: number): number =>
|
|
65
|
+
Math.ceil(chars / 4);
|
|
66
|
+
|
|
67
|
+
const countRoles = (messages: Message[]): RoleCounts => {
|
|
68
|
+
const counts: RoleCounts = { user: 0, assistant: 0, toolResult: 0 };
|
|
69
|
+
for (const msg of messages) {
|
|
70
|
+
if (msg.role === "user") counts.user += 1;
|
|
71
|
+
else if (msg.role === "assistant") counts.assistant += 1;
|
|
72
|
+
else if (msg.role === "toolResult") counts.toolResult += 1;
|
|
73
|
+
}
|
|
74
|
+
return counts;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
const countBlocks = (messages: Message[]): BlockCounts => {
|
|
78
|
+
const counts: BlockCounts = {
|
|
79
|
+
user: 0,
|
|
80
|
+
assistant: 0,
|
|
81
|
+
toolCalls: 0,
|
|
82
|
+
toolResults: 0,
|
|
83
|
+
thinking: 0,
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
for (const block of normalize(messages)) {
|
|
87
|
+
if (block.kind === "user") counts.user += 1;
|
|
88
|
+
else if (block.kind === "assistant") counts.assistant += 1;
|
|
89
|
+
else if (block.kind === "tool_call") counts.toolCalls += 1;
|
|
90
|
+
else if (block.kind === "tool_result") counts.toolResults += 1;
|
|
91
|
+
else if (block.kind === "thinking") counts.thinking += 1;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return counts;
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
const inputCharsOf = (messages: Message[]): number =>
|
|
98
|
+
messages
|
|
99
|
+
.map((msg, index) => renderMessage(msg, index, true).summary.length)
|
|
100
|
+
.reduce((sum, len) => sum + len, 0);
|
|
101
|
+
|
|
102
|
+
const topFilesOf = (messages: Message[]): string[] => {
|
|
103
|
+
const files = new Set<string>();
|
|
104
|
+
for (const block of normalize(messages)) {
|
|
105
|
+
if (block.kind === "tool_call") {
|
|
106
|
+
for (const key of ["path", "file_path", "filePath", "file"]) {
|
|
107
|
+
const val = block.args[key];
|
|
108
|
+
if (typeof val === "string") { files.add(val); break; }
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return [...files].slice(0, 10);
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
const previewOf = (messages: Message[], edgeCount = 3): string => {
|
|
116
|
+
const rendered = messages.map((msg, index) => renderMessage(msg, index));
|
|
117
|
+
if (rendered.length === 0) return "(empty)";
|
|
118
|
+
if (rendered.length <= edgeCount * 2) {
|
|
119
|
+
return rendered
|
|
120
|
+
.map((entry) => `#${entry.index} [${entry.role}] ${clip(entry.summary, 220)}`)
|
|
121
|
+
.join("\n");
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const first = rendered.slice(0, edgeCount);
|
|
125
|
+
const last = rendered.slice(-edgeCount);
|
|
126
|
+
return [
|
|
127
|
+
...first.map((entry) => `#${entry.index} [${entry.role}] ${clip(entry.summary, 220)}`),
|
|
128
|
+
"...",
|
|
129
|
+
...last.map((entry) => `#${entry.index} [${entry.role}] ${clip(entry.summary, 220)}`),
|
|
130
|
+
].join("\n");
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
const sectionCountOf = (summary: string): number =>
|
|
134
|
+
SECTION_HEADERS.filter((header) => summary.includes(`[${header}]`)).length;
|
|
135
|
+
|
|
136
|
+
const briefLineCountOf = (summary: string): number => {
|
|
137
|
+
const sep = "\n\n---\n\n";
|
|
138
|
+
const idx = summary.indexOf(sep);
|
|
139
|
+
if (idx < 0) return 0;
|
|
140
|
+
return summary.slice(idx + sep.length).split("\n").length;
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
const queryTermsOf = (text: string): string[] =>
|
|
144
|
+
(text.match(/[\p{L}\p{N}_./-]{3,}/gu) ?? [])
|
|
145
|
+
.map((part) => part.trim())
|
|
146
|
+
.filter(Boolean);
|
|
147
|
+
|
|
148
|
+
const queryOf = (text: string): string => {
|
|
149
|
+
const terms = queryTermsOf(text);
|
|
150
|
+
return terms.slice(0, 6).join(" ");
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
const matchesQuery = (text: string, query: string): boolean => {
|
|
154
|
+
const hay = text.toLowerCase();
|
|
155
|
+
return query
|
|
156
|
+
.toLowerCase()
|
|
157
|
+
.split(/\s+/)
|
|
158
|
+
.filter(Boolean)
|
|
159
|
+
.every((term) => hay.includes(term));
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
const probesOf = (messages: Message[], summary: string): RecallProbe[] => {
|
|
163
|
+
const blocks = normalize(messages);
|
|
164
|
+
const data = buildSections({ blocks });
|
|
165
|
+
|
|
166
|
+
// Find first file from tool calls
|
|
167
|
+
let firstFile = "";
|
|
168
|
+
for (const b of blocks) {
|
|
169
|
+
if (b.kind === "tool_call") {
|
|
170
|
+
for (const key of ["path", "file_path", "filePath", "file"]) {
|
|
171
|
+
if (typeof b.args[key] === "string") { firstFile = b.args[key] as string; break; }
|
|
172
|
+
}
|
|
173
|
+
if (firstFile) break;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const rawProbes = [
|
|
178
|
+
{ label: "goal", text: data.sessionGoal[0] ?? "" },
|
|
179
|
+
{ label: "file", text: firstFile },
|
|
180
|
+
{ label: "problem", text: data.outstandingContext[0] ?? "" },
|
|
181
|
+
];
|
|
182
|
+
|
|
183
|
+
const rendered = messages.map((msg, index) => renderMessage(msg, index));
|
|
184
|
+
|
|
185
|
+
return rawProbes
|
|
186
|
+
.map(({ label, text }) => {
|
|
187
|
+
const sourceText = text.trim();
|
|
188
|
+
const query = queryOf(sourceText);
|
|
189
|
+
if (!query) return null;
|
|
190
|
+
return {
|
|
191
|
+
label,
|
|
192
|
+
sourceText,
|
|
193
|
+
query,
|
|
194
|
+
summaryMentioned: matchesQuery(summary, query),
|
|
195
|
+
recallHits: searchEntries(rendered, query).length,
|
|
196
|
+
};
|
|
197
|
+
})
|
|
198
|
+
.filter((probe): probe is RecallProbe => probe !== null);
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
export const buildCompactReport = (input: CompileInput): CompactReport => {
|
|
202
|
+
const summary = compile(input);
|
|
203
|
+
const data = buildSections({ blocks: normalize(input.messages) });
|
|
204
|
+
const inputChars = inputCharsOf(input.messages);
|
|
205
|
+
const topFiles = topFilesOf(input.messages);
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
summary,
|
|
209
|
+
before: {
|
|
210
|
+
messageCount: input.messages.length,
|
|
211
|
+
roleCounts: countRoles(input.messages),
|
|
212
|
+
blockCounts: countBlocks(input.messages),
|
|
213
|
+
inputChars,
|
|
214
|
+
estimatedTokens: estimateTokensFromChars(inputChars),
|
|
215
|
+
topFiles,
|
|
216
|
+
preview: previewOf(input.messages),
|
|
217
|
+
},
|
|
218
|
+
after: {
|
|
219
|
+
summaryLength: summary.length,
|
|
220
|
+
estimatedTokens: estimateTokensFromChars(summary.length),
|
|
221
|
+
sectionCount: sectionCountOf(summary),
|
|
222
|
+
summaryPreview: summary,
|
|
223
|
+
goalsCount: data.sessionGoal.length,
|
|
224
|
+
blockersCount: data.outstandingContext.length,
|
|
225
|
+
briefTranscriptLines: briefLineCountOf(summary),
|
|
226
|
+
},
|
|
227
|
+
compression: {
|
|
228
|
+
charsBefore: inputChars,
|
|
229
|
+
charsAfter: summary.length,
|
|
230
|
+
ratio: summary.length === 0 ? 0 : Number((inputChars / summary.length).toFixed(2)),
|
|
231
|
+
messagesBefore: input.messages.length,
|
|
232
|
+
},
|
|
233
|
+
recall: {
|
|
234
|
+
probes: probesOf(input.messages, summary),
|
|
235
|
+
},
|
|
236
|
+
};
|
|
237
|
+
};
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
import type { Message } from "@mariozechner/pi-ai";
|
|
2
|
+
import type { RenderedEntry } from "./render-entries";
|
|
3
|
+
import { textOf } from "./content";
|
|
4
|
+
|
|
5
|
+
export interface SearchHit extends RenderedEntry {
|
|
6
|
+
/** Context snippet around the first matched term (only when query provided) */
|
|
7
|
+
snippet?: string;
|
|
8
|
+
/** Number of query terms matched (for ranking) */
|
|
9
|
+
matchCount?: number;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
const escapeRegex = (s: string): string =>
|
|
13
|
+
s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
14
|
+
|
|
15
|
+
/** Try to compile as regex; fall back to escaped literal. */
|
|
16
|
+
const safeRegex = (pattern: string): RegExp => {
|
|
17
|
+
try {
|
|
18
|
+
return new RegExp(pattern, "i");
|
|
19
|
+
} catch {
|
|
20
|
+
return new RegExp(escapeRegex(pattern), "i");
|
|
21
|
+
}
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/** Detect if the query looks like a single regex pattern (contains regex metacharacters). */
|
|
25
|
+
const looksLikeRegex = (query: string): boolean =>
|
|
26
|
+
/[|*+?{}()[\]\\^$.]/.test(query);
|
|
27
|
+
|
|
28
|
+
/** Build a regex for snippet highlighting — matches first available term. */
|
|
29
|
+
const snippetRegex = (terms: string[]): RegExp => {
|
|
30
|
+
const alts = terms.map((t) => {
|
|
31
|
+
try {
|
|
32
|
+
// Validate that it's a valid regex
|
|
33
|
+
new RegExp(t, "i");
|
|
34
|
+
return t;
|
|
35
|
+
} catch {
|
|
36
|
+
return escapeRegex(t);
|
|
37
|
+
}
|
|
38
|
+
});
|
|
39
|
+
return new RegExp(alts.join("|"), "i");
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
// ── Stopwords for natural language queries ──
|
|
43
|
+
const STOPWORDS = new Set([
|
|
44
|
+
// English
|
|
45
|
+
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
|
|
46
|
+
"have", "has", "had", "do", "does", "did", "will", "would", "could",
|
|
47
|
+
"should", "may", "might", "can", "shall", "of", "in", "to", "for",
|
|
48
|
+
"with", "on", "at", "from", "by", "as", "into", "through", "during",
|
|
49
|
+
"before", "after", "above", "below", "between", "out", "off", "over",
|
|
50
|
+
"under", "again", "further", "then", "once", "here", "there", "when",
|
|
51
|
+
"where", "why", "how", "all", "both", "each", "few", "more", "most",
|
|
52
|
+
"other", "some", "such", "no", "nor", "not", "only", "own", "same",
|
|
53
|
+
"so", "than", "too", "very", "just", "about", "it", "its", "that",
|
|
54
|
+
"this", "what", "which", "who", "whom", "these", "those",
|
|
55
|
+
]);
|
|
56
|
+
|
|
57
|
+
/** Remove stopwords, keep meaningful terms. */
|
|
58
|
+
const filterStopwords = (terms: string[]): string[] => {
|
|
59
|
+
const meaningful = terms.filter((t) => !STOPWORDS.has(t.toLowerCase()) && t.length > 1);
|
|
60
|
+
// If all terms were stopwords, return original (don't lose everything)
|
|
61
|
+
return meaningful.length > 0 ? meaningful : terms;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
/** Count how many distinct terms match the haystack. */
|
|
65
|
+
const countMatches = (hay: string, terms: string[]): number => {
|
|
66
|
+
let count = 0;
|
|
67
|
+
for (const t of terms) {
|
|
68
|
+
if (safeRegex(t).test(hay)) count++;
|
|
69
|
+
}
|
|
70
|
+
return count;
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
// ── BM25-lite scoring ──
|
|
74
|
+
const BM25_K = 1.2;
|
|
75
|
+
const BM25_B = 0.75;
|
|
76
|
+
|
|
77
|
+
/** Count occurrences of a regex pattern in text. */
|
|
78
|
+
const termFreq = (text: string, pattern: RegExp): number => {
|
|
79
|
+
const matches = text.match(new RegExp(pattern.source, "gi"));
|
|
80
|
+
return matches ? matches.length : 0;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
interface BM25Context {
|
|
84
|
+
n: number; // total docs
|
|
85
|
+
avgDl: number; // average doc length (words)
|
|
86
|
+
df: Map<string, number>; // term -> number of docs containing it
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Precompute IDF and avgDl across all docs. */
|
|
90
|
+
const buildBM25Context = (docs: string[], terms: string[]): BM25Context => {
|
|
91
|
+
const n = docs.length;
|
|
92
|
+
const df = new Map<string, number>();
|
|
93
|
+
let totalLen = 0;
|
|
94
|
+
|
|
95
|
+
for (const doc of docs) {
|
|
96
|
+
totalLen += doc.split(/\s+/).length;
|
|
97
|
+
for (const t of terms) {
|
|
98
|
+
if (safeRegex(t).test(doc)) {
|
|
99
|
+
df.set(t, (df.get(t) ?? 0) + 1);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return { n, avgDl: totalLen / Math.max(n, 1), df };
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
/** BM25 score for a single doc against query terms. */
|
|
108
|
+
const bm25Score = (doc: string, terms: string[], ctx: BM25Context): number => {
|
|
109
|
+
const dl = doc.split(/\s+/).length;
|
|
110
|
+
let score = 0;
|
|
111
|
+
|
|
112
|
+
for (const t of terms) {
|
|
113
|
+
const tf = termFreq(doc, safeRegex(t));
|
|
114
|
+
if (tf === 0) continue;
|
|
115
|
+
|
|
116
|
+
const docFreq = ctx.df.get(t) ?? 0;
|
|
117
|
+
// IDF: log((N - df + 0.5) / (df + 0.5) + 1)
|
|
118
|
+
const idf = Math.log((ctx.n - docFreq + 0.5) / (docFreq + 0.5) + 1);
|
|
119
|
+
// TF saturation with length normalization
|
|
120
|
+
const tfNorm = (tf * (BM25_K + 1)) / (tf + BM25_K * (1 - BM25_B + BM25_B * dl / ctx.avgDl));
|
|
121
|
+
score += idf * tfNorm;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return score;
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
/** Line-based snippet: ±contextLines around first regex match. */
|
|
128
|
+
const lineSnippet = (text: string, regex: RegExp, contextLines = 2): string | undefined => {
|
|
129
|
+
const lines = text.split("\n");
|
|
130
|
+
let matchIdx = -1;
|
|
131
|
+
for (let i = 0; i < lines.length; i++) {
|
|
132
|
+
if (regex.test(lines[i])) {
|
|
133
|
+
matchIdx = i;
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
if (matchIdx === -1) return undefined;
|
|
138
|
+
|
|
139
|
+
const start = Math.max(0, matchIdx - contextLines);
|
|
140
|
+
const end = Math.min(lines.length, matchIdx + contextLines + 1);
|
|
141
|
+
const slice = lines.slice(start, end);
|
|
142
|
+
|
|
143
|
+
const parts: string[] = [];
|
|
144
|
+
if (start > 0) parts.push(`...(${start} lines above)`);
|
|
145
|
+
parts.push(...slice);
|
|
146
|
+
if (end < lines.length) parts.push(`...(${lines.length - end} lines below)`);
|
|
147
|
+
return parts.join("\n");
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
/** Build full searchable text for a message. */
|
|
151
|
+
const fullText = (msg: Message): string => {
|
|
152
|
+
if ((msg as any).role === "bashExecution") {
|
|
153
|
+
return `${(msg as any).command ?? ""} ${(msg as any).output ?? ""}`;
|
|
154
|
+
}
|
|
155
|
+
return textOf(msg.content);
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
export const searchEntries = (
|
|
159
|
+
entries: RenderedEntry[],
|
|
160
|
+
messages: Message[],
|
|
161
|
+
query?: string,
|
|
162
|
+
): SearchHit[] => {
|
|
163
|
+
if (!query?.trim()) return entries;
|
|
164
|
+
|
|
165
|
+
const rawQuery = query.trim();
|
|
166
|
+
|
|
167
|
+
// If query looks like a single regex pattern (contains metacharacters),
|
|
168
|
+
// treat the whole thing as one pattern — don't split into terms
|
|
169
|
+
if (looksLikeRegex(rawQuery)) {
|
|
170
|
+
const regex = safeRegex(rawQuery);
|
|
171
|
+
const hits: SearchHit[] = [];
|
|
172
|
+
for (let i = 0; i < entries.length; i++) {
|
|
173
|
+
const e = entries[i];
|
|
174
|
+
const msg = messages[i];
|
|
175
|
+
const text = msg ? fullText(msg) : e.summary;
|
|
176
|
+
const filePart = e.files?.join(" ") ?? "";
|
|
177
|
+
const hay = `${e.role} ${text} ${filePart}`;
|
|
178
|
+
if (regex.test(hay)) {
|
|
179
|
+
const snip = lineSnippet(text, regex);
|
|
180
|
+
hits.push({ ...e, snippet: snip, matchCount: 1 });
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
return hits;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Natural language / multi-word query: BM25 scoring
|
|
187
|
+
const rawTerms = rawQuery.split(/\s+/);
|
|
188
|
+
const terms = filterStopwords(rawTerms);
|
|
189
|
+
const snipRe = snippetRegex(terms);
|
|
190
|
+
|
|
191
|
+
// Build all docs for BM25 context
|
|
192
|
+
const docs: string[] = [];
|
|
193
|
+
for (let i = 0; i < entries.length; i++) {
|
|
194
|
+
const e = entries[i];
|
|
195
|
+
const msg = messages[i];
|
|
196
|
+
const text = msg ? fullText(msg) : e.summary;
|
|
197
|
+
const filePart = e.files?.join(" ") ?? "";
|
|
198
|
+
docs.push(`${e.role} ${text} ${filePart}`);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const ctx = buildBM25Context(docs, terms);
|
|
202
|
+
|
|
203
|
+
const scored: Array<{ hit: SearchHit; score: number }> = [];
|
|
204
|
+
for (let i = 0; i < entries.length; i++) {
|
|
205
|
+
const e = entries[i];
|
|
206
|
+
const hay = docs[i];
|
|
207
|
+
const mc = countMatches(hay, terms);
|
|
208
|
+
if (mc === 0) continue;
|
|
209
|
+
const score = bm25Score(hay, terms, ctx);
|
|
210
|
+
const text = messages[i] ? fullText(messages[i]) : e.summary;
|
|
211
|
+
const snip = lineSnippet(text, snipRe);
|
|
212
|
+
scored.push({
|
|
213
|
+
hit: { ...e, snippet: snip, matchCount: mc },
|
|
214
|
+
score,
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Sort by BM25 score desc
|
|
219
|
+
scored.sort((a, b) => b.score - a.score);
|
|
220
|
+
return scored.map((s) => s.hit);
|
|
221
|
+
};
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ultimate-pi harness settings (env-only). Re-exported for vendored pi-vcc layout.
|
|
3
|
+
*/
|
|
4
|
+
export type { PiVccSettings } from "../../../../.pi/extensions/lib/harness-vcc-settings.js";
|
|
5
|
+
export {
|
|
6
|
+
loadSettings,
|
|
7
|
+
scaffoldSettings,
|
|
8
|
+
} from "../../../../.pi/extensions/lib/harness-vcc-settings.js";
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/** Shared skill-tag collapse utilities */
|
|
2
|
+
|
|
3
|
+
const SKILL_TAG_RE = /^-?\s*<skill\s+name="([^"]+)"/;
|
|
4
|
+
const SKILL_CLOSE_RE = /^-?\s*<\/skill>/;
|
|
5
|
+
|
|
6
|
+
/** Collapse skill tags in an array of lines — dedup by name, drop all content inside block */
|
|
7
|
+
export const collapseSkillLines = (lines: string[]): string[] => {
|
|
8
|
+
const result: string[] = [];
|
|
9
|
+
const seenSkills = new Set<string>();
|
|
10
|
+
let insideSkill = false;
|
|
11
|
+
|
|
12
|
+
for (const line of lines) {
|
|
13
|
+
const skillMatch = line.match(SKILL_TAG_RE);
|
|
14
|
+
if (skillMatch) {
|
|
15
|
+
insideSkill = true;
|
|
16
|
+
const name = skillMatch[1];
|
|
17
|
+
if (!seenSkills.has(name)) {
|
|
18
|
+
seenSkills.add(name);
|
|
19
|
+
result.push(`[skill: ${name}]`);
|
|
20
|
+
}
|
|
21
|
+
continue;
|
|
22
|
+
}
|
|
23
|
+
if (insideSkill) {
|
|
24
|
+
if (SKILL_CLOSE_RE.test(line)) insideSkill = false;
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
result.push(line);
|
|
28
|
+
}
|
|
29
|
+
return result;
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/** Collapse <skill name="X" ...>...</skill> blocks in raw text */
|
|
33
|
+
const SKILL_BLOCK_RE = /<skill\s+name="([^"]+)"[^>]*>[\s\S]*?(?:<\/skill>|$)/g;
|
|
34
|
+
export const collapseSkillText = (text: string): string =>
|
|
35
|
+
text.replace(SKILL_BLOCK_RE, (_, name) => `[skill: ${name}]`);
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import type { Message } from "@mariozechner/pi-ai";
|
|
2
|
+
import type { FileOps } from "../types";
|
|
3
|
+
import { normalize } from "./normalize";
|
|
4
|
+
import { filterNoise } from "./filter-noise";
|
|
5
|
+
import { buildSections } from "./build-sections";
|
|
6
|
+
import { formatSummary, capBrief, RECALL_NOTE } from "./format";
|
|
7
|
+
|
|
8
|
+
export interface CompileInput {
|
|
9
|
+
messages: Message[];
|
|
10
|
+
previousSummary?: string;
|
|
11
|
+
fileOps?: FileOps;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const HEADER_NAMES = ["Session Goal", "Files And Changes", "Commits", "Outstanding Context", "User Preferences"];
|
|
15
|
+
|
|
16
|
+
const SEPARATOR = "\n\n---\n\n";
|
|
17
|
+
|
|
18
|
+
/** Extract a named section from summary text */
|
|
19
|
+
const sectionOf = (text: string, header: string): string => {
|
|
20
|
+
const tag = `[${header}]`;
|
|
21
|
+
const start = text.indexOf(tag);
|
|
22
|
+
if (start < 0) return "";
|
|
23
|
+
const after = text.slice(start);
|
|
24
|
+
// Find next section header or separator
|
|
25
|
+
const nextSection = HEADER_NAMES
|
|
26
|
+
.filter((h) => h !== header)
|
|
27
|
+
.map((h) => after.indexOf(`[${h}]`))
|
|
28
|
+
.filter((n) => n > 0);
|
|
29
|
+
const nextSep = after.indexOf("\n\n---\n\n");
|
|
30
|
+
const candidates = [...nextSection, ...(nextSep > 0 ? [nextSep] : [])].sort((a, b) => a - b);
|
|
31
|
+
const end = candidates[0];
|
|
32
|
+
return (end ? after.slice(0, end) : after).trim();
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
/** Extract the brief transcript part (everything after ---) */
|
|
36
|
+
const briefOf = (text: string): string => {
|
|
37
|
+
const idx = text.indexOf(SEPARATOR);
|
|
38
|
+
if (idx < 0) return "";
|
|
39
|
+
return text.slice(idx + SEPARATOR.length).trim();
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
/** Merge a header section */
|
|
43
|
+
const mergeHeaderSection = (header: string, prev: string, fresh: string): string => {
|
|
44
|
+
// Outstanding Context is volatile -- always use fresh only
|
|
45
|
+
if (header === "Outstanding Context") return fresh;
|
|
46
|
+
if (!prev) return fresh;
|
|
47
|
+
if (!fresh) return prev;
|
|
48
|
+
|
|
49
|
+
// Files And Changes: merge by category (Modified/Created/Read), dedup paths
|
|
50
|
+
if (header === "Files And Changes") {
|
|
51
|
+
return mergeFileLines(prev, fresh);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Session Goal, User Preferences: line-level dedup, cap
|
|
55
|
+
const isClean = (l: string) => l.startsWith("- ") && !l.includes("<skill") && !l.includes("</skill");
|
|
56
|
+
const prevLines = prev.split("\n").filter(isClean);
|
|
57
|
+
const freshLines = fresh.split("\n").filter(isClean);
|
|
58
|
+
const combined = [...new Set([...prevLines, ...freshLines])];
|
|
59
|
+
const CAP = header === "Session Goal" ? 8 : header === "Commits" ? 8 : 15;
|
|
60
|
+
const capped = combined.length > CAP ? combined.slice(-CAP) : combined;
|
|
61
|
+
if (capped.length === 0) return "";
|
|
62
|
+
return `[${header}]\n${capped.join("\n")}`;
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
/** Merge Files And Changes by category, dedup paths across compactions */
|
|
66
|
+
const mergeFileLines = (prev: string, fresh: string): string => {
|
|
67
|
+
const categories = ["Modified", "Created", "Read"] as const;
|
|
68
|
+
const merged: Record<string, Set<string>> = {};
|
|
69
|
+
for (const cat of categories) merged[cat] = new Set();
|
|
70
|
+
|
|
71
|
+
// Parse "- Modified: a, b, c (+N more)" lines from both prev and fresh
|
|
72
|
+
for (const text of [prev, fresh]) {
|
|
73
|
+
for (const line of text.split("\n")) {
|
|
74
|
+
for (const cat of categories) {
|
|
75
|
+
const prefix = `- ${cat}: `;
|
|
76
|
+
if (!line.startsWith(prefix)) continue;
|
|
77
|
+
let rest = line.slice(prefix.length);
|
|
78
|
+
// Strip "(+N more)" suffix
|
|
79
|
+
rest = rest.replace(/\s*\(\+\d+ more\)\s*$/, "");
|
|
80
|
+
for (const p of rest.split(",")) {
|
|
81
|
+
const trimmed = p.trim();
|
|
82
|
+
if (trimmed) merged[cat].add(trimmed);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Dedup: if already in Modified, drop from Created (file existed before)
|
|
89
|
+
for (const p of merged.Modified) merged.Created.delete(p);
|
|
90
|
+
|
|
91
|
+
const cap = (set: Set<string>, limit: number) => {
|
|
92
|
+
const arr = [...set];
|
|
93
|
+
if (arr.length <= limit) return arr.join(", ");
|
|
94
|
+
return arr.slice(0, limit).join(", ") + ` (+${arr.length - limit} more)`;
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
const lines: string[] = [];
|
|
98
|
+
if (merged.Modified.size > 0) lines.push(`- Modified: ${cap(merged.Modified, 10)}`);
|
|
99
|
+
if (merged.Created.size > 0) lines.push(`- Created: ${cap(merged.Created, 10)}`);
|
|
100
|
+
if (merged.Read.size > 0) lines.push(`- Read: ${cap(merged.Read, 10)}`);
|
|
101
|
+
if (lines.length === 0) return "";
|
|
102
|
+
return `[Files And Changes]\n${lines.join("\n")}`;
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
const mergeBriefTranscript = (prev: string, fresh: string): string => {
|
|
106
|
+
if (!prev) return fresh;
|
|
107
|
+
if (!fresh) return prev;
|
|
108
|
+
return prev + "\n\n" + fresh;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
const mergePrevious = (prev: string, fresh: string): string => {
|
|
112
|
+
// Merge header sections
|
|
113
|
+
const headers = HEADER_NAMES
|
|
114
|
+
.map((header) => {
|
|
115
|
+
const freshSec = sectionOf(fresh, header);
|
|
116
|
+
const prevSec = sectionOf(prev, header);
|
|
117
|
+
return mergeHeaderSection(header, prevSec, freshSec);
|
|
118
|
+
})
|
|
119
|
+
.filter(Boolean);
|
|
120
|
+
|
|
121
|
+
// Merge brief transcript
|
|
122
|
+
const prevBrief = briefOf(prev);
|
|
123
|
+
const freshBrief = briefOf(fresh);
|
|
124
|
+
const mergedBrief = mergeBriefTranscript(prevBrief, freshBrief);
|
|
125
|
+
|
|
126
|
+
const parts: string[] = [];
|
|
127
|
+
if (headers.length > 0) {
|
|
128
|
+
parts.push(headers.join("\n\n"));
|
|
129
|
+
}
|
|
130
|
+
if (mergedBrief) {
|
|
131
|
+
parts.push(capBrief(mergedBrief));
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return parts.join(SEPARATOR);
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
export const compile = (input: CompileInput): string => {
|
|
138
|
+
const blocks = filterNoise(normalize(input.messages));
|
|
139
|
+
const data = buildSections({ blocks });
|
|
140
|
+
const fresh = formatSummary(data);
|
|
141
|
+
// Strip any legacy RECALL_NOTE baked into prev summary (pre-fix format)
|
|
142
|
+
// so merge doesn't re-stack it inside the brief.
|
|
143
|
+
const prev = input.previousSummary
|
|
144
|
+
? stripRecallNote(input.previousSummary)
|
|
145
|
+
: undefined;
|
|
146
|
+
const merged = prev ? mergePrevious(prev, fresh) : fresh;
|
|
147
|
+
if (!merged) return "";
|
|
148
|
+
return merged + SEPARATOR + RECALL_NOTE;
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
const stripRecallNote = (text: string): string => {
|
|
152
|
+
// Remove trailing RECALL_NOTE (and any separators surrounding it) if present.
|
|
153
|
+
// Handles both current format (---\n\nNOTE) and bare trailing NOTE.
|
|
154
|
+
const idx = text.lastIndexOf(RECALL_NOTE);
|
|
155
|
+
if (idx < 0) return text;
|
|
156
|
+
return text.slice(0, idx).replace(/\s*(?:\n\n---\n\n)?\s*$/, "").trimEnd();
|
|
157
|
+
};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export const extractPath = (args: Record<string, unknown>): string | null => {
|
|
2
|
+
for (const key of ["path", "file_path", "filePath", "file"]) {
|
|
3
|
+
if (typeof args[key] === "string") return args[key] as string;
|
|
4
|
+
}
|
|
5
|
+
return null;
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
export const summarizeToolArgs = (args: Record<string, unknown>): string => {
|
|
9
|
+
const path = extractPath(args);
|
|
10
|
+
if (path) return `path=${path}`;
|
|
11
|
+
if (typeof args.command === "string") return `command=${args.command}`;
|
|
12
|
+
if (typeof args.query === "string") return `query=${args.query}`;
|
|
13
|
+
return Object.keys(args).join(", ");
|
|
14
|
+
};
|