ultimate-pi 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/.agents/skills/harness-decisions/SKILL.md +15 -0
  2. package/.agents/skills/scrapling-web/SKILL.md +45 -40
  3. package/.agents/skills/wiki-autoresearch/SKILL.md +3 -3
  4. package/.pi/PACKAGING.md +3 -2
  5. package/.pi/SYSTEM.md +12 -13
  6. package/.pi/agents/pi-pi/agent-expert.md +3 -3
  7. package/.pi/extensions/harness-web-guard.ts +95 -0
  8. package/.pi/extensions/harness-web-tools.ts +209 -0
  9. package/.pi/extensions/lib/harness-vcc-settings.ts +50 -0
  10. package/.pi/extensions/lib/harness-web/run-cli.ts +92 -0
  11. package/.pi/extensions/ultimate-pi-vcc.ts +17 -0
  12. package/.pi/harness/docs/adrs/0030-inhouse-vcc-compaction.md +40 -0
  13. package/.pi/harness/docs/adrs/README.md +1 -0
  14. package/.pi/harness/env.harness.template +3 -1
  15. package/.pi/prompts/harness-setup.md +48 -2
  16. package/.pi/scripts/harness-cli-verify.sh +12 -3
  17. package/.pi/scripts/harness-searxng-bootstrap.mjs +270 -0
  18. package/.pi/scripts/harness-web-search.md +24 -5
  19. package/.pi/scripts/harness-web.py +24 -7
  20. package/.pi/scripts/harness_web/config.py +37 -3
  21. package/.pi/scripts/harness_web/output.py +8 -2
  22. package/.pi/scripts/harness_web/search.py +22 -0
  23. package/.pi/scripts/harness_web/search_ddg.py +1 -5
  24. package/.pi/scripts/harness_web/search_searxng.py +100 -0
  25. package/.pi/scripts/vendor-pi-vcc-settings.stub.ts +8 -0
  26. package/.pi/scripts/vendor-sync-pi-vcc.sh +40 -0
  27. package/.pi/settings.example.json +1 -6
  28. package/CHANGELOG.md +20 -6
  29. package/THIRD_PARTY_NOTICES.md +8 -22
  30. package/package.json +7 -6
  31. package/vendor/pi-vcc/README.md +215 -0
  32. package/vendor/pi-vcc/UPSTREAM_PIN.md +12 -0
  33. package/vendor/pi-vcc/demo.gif +0 -0
  34. package/vendor/pi-vcc/index.ts +12 -0
  35. package/vendor/pi-vcc/package.json +26 -0
  36. package/vendor/pi-vcc/scripts/audit-sessions.ts +88 -0
  37. package/vendor/pi-vcc/scripts/benchmark-real-sessions.ts +25 -0
  38. package/vendor/pi-vcc/scripts/compare-before-after.ts +36 -0
  39. package/vendor/pi-vcc/scripts/dump-branch-output.ts +20 -0
  40. package/vendor/pi-vcc/src/commands/pi-vcc.ts +36 -0
  41. package/vendor/pi-vcc/src/commands/vcc-recall.ts +65 -0
  42. package/vendor/pi-vcc/src/core/brief.ts +381 -0
  43. package/vendor/pi-vcc/src/core/build-sections.ts +79 -0
  44. package/vendor/pi-vcc/src/core/content.ts +60 -0
  45. package/vendor/pi-vcc/src/core/filter-noise.ts +42 -0
  46. package/vendor/pi-vcc/src/core/format-recall.ts +27 -0
  47. package/vendor/pi-vcc/src/core/format.ts +49 -0
  48. package/vendor/pi-vcc/src/core/lineage.ts +26 -0
  49. package/vendor/pi-vcc/src/core/load-messages.ts +41 -0
  50. package/vendor/pi-vcc/src/core/normalize.ts +66 -0
  51. package/vendor/pi-vcc/src/core/recall-scope.ts +14 -0
  52. package/vendor/pi-vcc/src/core/render-entries.ts +55 -0
  53. package/vendor/pi-vcc/src/core/report.ts +237 -0
  54. package/vendor/pi-vcc/src/core/sanitize.ts +5 -0
  55. package/vendor/pi-vcc/src/core/search-entries.ts +221 -0
  56. package/vendor/pi-vcc/src/core/settings.ts +8 -0
  57. package/vendor/pi-vcc/src/core/skill-collapse.ts +35 -0
  58. package/vendor/pi-vcc/src/core/summarize.ts +157 -0
  59. package/vendor/pi-vcc/src/core/tool-args.ts +14 -0
  60. package/vendor/pi-vcc/src/details.ts +7 -0
  61. package/vendor/pi-vcc/src/extract/commits.ts +69 -0
  62. package/vendor/pi-vcc/src/extract/files.ts +80 -0
  63. package/vendor/pi-vcc/src/extract/goals.ts +79 -0
  64. package/vendor/pi-vcc/src/extract/preferences.ts +55 -0
  65. package/vendor/pi-vcc/src/hooks/before-compact.ts +314 -0
  66. package/vendor/pi-vcc/src/sections.ts +12 -0
  67. package/vendor/pi-vcc/src/tools/recall.ts +109 -0
  68. package/vendor/pi-vcc/src/types.ts +14 -0
  69. package/vendor/pi-vcc/tests/before-compact-hook.test.ts +204 -0
  70. package/vendor/pi-vcc/tests/before-compact.test.ts +145 -0
  71. package/vendor/pi-vcc/tests/brief.test.ts +206 -0
  72. package/vendor/pi-vcc/tests/build-sections.test.ts +59 -0
  73. package/vendor/pi-vcc/tests/compile.test.ts +80 -0
  74. package/vendor/pi-vcc/tests/content.test.ts +31 -0
  75. package/vendor/pi-vcc/tests/extract-goals.test.ts +86 -0
  76. package/vendor/pi-vcc/tests/extract-preferences.test.ts +30 -0
  77. package/vendor/pi-vcc/tests/filter-noise.test.ts +61 -0
  78. package/vendor/pi-vcc/tests/fixtures.ts +61 -0
  79. package/vendor/pi-vcc/tests/format-recall.test.ts +30 -0
  80. package/vendor/pi-vcc/tests/format.test.ts +62 -0
  81. package/vendor/pi-vcc/tests/lineage.test.ts +33 -0
  82. package/vendor/pi-vcc/tests/load-messages.test.ts +51 -0
  83. package/vendor/pi-vcc/tests/normalize.test.ts +97 -0
  84. package/vendor/pi-vcc/tests/real-sessions.test.ts +38 -0
  85. package/vendor/pi-vcc/tests/recall-expand.test.ts +15 -0
  86. package/vendor/pi-vcc/tests/recall-scope.test.ts +32 -0
  87. package/vendor/pi-vcc/tests/recall-tool-scope.test.ts +67 -0
  88. package/vendor/pi-vcc/tests/render-entries.test.ts +62 -0
  89. package/vendor/pi-vcc/tests/report.test.ts +44 -0
  90. package/vendor/pi-vcc/tests/sanitize.test.ts +24 -0
  91. package/vendor/pi-vcc/tests/search-entries.test.ts +144 -0
  92. package/vendor/pi-vcc/tests/support/load-session.ts +23 -0
  93. package/vendor/pi-vcc/tests/support/real-sessions.ts +51 -0
  94. package/.pi/pi-vcc-config.json +0 -4
@@ -0,0 +1,41 @@
1
+ import { readFileSync } from "fs";
2
+ import type { Message } from "@mariozechner/pi-ai";
3
+ import { renderMessage, type RenderedEntry } from "./render-entries";
4
+
5
+ export interface LoadedMessages {
6
+ rendered: RenderedEntry[];
7
+ rawMessages: Message[];
8
+ entryIds: string[];
9
+ }
10
+
11
+ export const loadAllMessages = (
12
+ sessionFile: string,
13
+ full: boolean,
14
+ allowedEntryIds?: Set<string>,
15
+ ): LoadedMessages => {
16
+ const content = readFileSync(sessionFile, "utf-8");
17
+ const entries: any[] = [];
18
+ for (const line of content.split("\n")) {
19
+ if (!line.trim()) continue;
20
+ try { entries.push(JSON.parse(line)); } catch {}
21
+ }
22
+ const rendered: RenderedEntry[] = [];
23
+ const rawMessages: Message[] = [];
24
+ const entryIds: string[] = [];
25
+
26
+ let messageIndex = 0;
27
+ for (const e of entries) {
28
+ const isMessage = e.type === "message" && e.message;
29
+ if (!isMessage) continue;
30
+
31
+ const allowed = !allowedEntryIds || allowedEntryIds.has(e.id);
32
+ if (allowed) {
33
+ rendered.push(renderMessage(e.message, messageIndex, full));
34
+ rawMessages.push(e.message);
35
+ entryIds.push(String(e.id));
36
+ }
37
+ messageIndex++;
38
+ }
39
+
40
+ return { rendered, rawMessages, entryIds };
41
+ };
@@ -0,0 +1,66 @@
1
+ import type { Message } from "@mariozechner/pi-ai";
2
+ import type { NormalizedBlock } from "../types";
3
+ import { textOf } from "./content";
4
+ import { sanitize } from "./sanitize";
5
+
6
+ const normalizeOne = (msg: Message, msgIndex: number): NormalizedBlock[] => {
7
+ if (msg.role === "user") {
8
+ const blocks: NormalizedBlock[] = [];
9
+ const text = sanitize(textOf(msg.content));
10
+ if (text) blocks.push({ kind: "user", text, sourceIndex: msgIndex });
11
+ if (msg.content && typeof msg.content !== "string") {
12
+ for (const part of msg.content) {
13
+ if (part.type === "image") {
14
+ blocks.push({ kind: "user", text: `[image: ${part.mimeType}]`, sourceIndex: msgIndex });
15
+ }
16
+ }
17
+ }
18
+ return blocks.length > 0 ? blocks : [{ kind: "user", text: "", sourceIndex: msgIndex }];
19
+ }
20
+
21
+ if (msg.role === "toolResult") {
22
+ return [{
23
+ kind: "tool_result",
24
+ name: msg.toolName,
25
+ text: sanitize(textOf(msg.content)),
26
+ isError: msg.isError,
27
+ sourceIndex: msgIndex,
28
+ }];
29
+ }
30
+
31
+ if (msg.role === "assistant") {
32
+ if (!msg.content) return [];
33
+ if (typeof msg.content === "string") {
34
+ return [{ kind: "assistant", text: sanitize(msg.content), sourceIndex: msgIndex }];
35
+ }
36
+
37
+ const blocks: NormalizedBlock[] = [];
38
+ for (const part of msg.content) {
39
+ if (part.type === "text") {
40
+ blocks.push({ kind: "assistant", text: sanitize(part.text), sourceIndex: msgIndex });
41
+ } else if (part.type === "thinking") {
42
+ blocks.push({
43
+ kind: "thinking",
44
+ text: sanitize(part.thinking),
45
+ redacted: part.redacted ?? false,
46
+ sourceIndex: msgIndex,
47
+ });
48
+ } else if (part.type === "toolCall") {
49
+ blocks.push({
50
+ kind: "tool_call",
51
+ name: part.name,
52
+ args: part.arguments,
53
+ sourceIndex: msgIndex,
54
+ });
55
+ }
56
+ }
57
+ return blocks;
58
+ }
59
+
60
+ return [];
61
+ };
62
+
63
+ export const normalize = (messages: Message[]): NormalizedBlock[] =>
64
+ messages.flatMap((msg, i) => normalizeOne(msg, i));
65
+
66
+
@@ -0,0 +1,14 @@
1
+ export type RecallScope = "lineage" | "all";
2
+
3
+ const SCOPE_RE = /\bscope:(lineage|all)\b/i;
4
+
5
+ export const normalizeRecallScope = (scope?: unknown): RecallScope =>
6
+ typeof scope === "string" && scope.toLowerCase() === "all" ? "all" : "lineage";
7
+
8
+ export const parseRecallScope = (text: string): { scope: RecallScope; text: string } => {
9
+ const match = text.match(SCOPE_RE);
10
+ return {
11
+ scope: normalizeRecallScope(match?.[1]),
12
+ text: text.replace(SCOPE_RE, "").replace(/\s+/g, " ").trim(),
13
+ };
14
+ };
@@ -0,0 +1,55 @@
1
+ import type { Message } from "@mariozechner/pi-ai";
2
+ import { clip, textOf } from "./content";
3
+ import { summarizeToolArgs } from "./tool-args";
4
+ import { extractPath } from "./tool-args";
5
+
6
+ export interface RenderedEntry {
7
+ index: number;
8
+ role: string;
9
+ summary: string;
10
+ files?: string[];
11
+ }
12
+
13
+ const toolCalls = (content: Message["content"]): string => {
14
+ if (!content || typeof content === "string") return "";
15
+ return content
16
+ .filter((c) => c.type === "toolCall")
17
+ .map((c) => `${c.name}(${summarizeToolArgs(c.arguments)})`)
18
+ .join(", ");
19
+ };
20
+
21
+ const extractFilesFromContent = (content: Message["content"]): string[] => {
22
+ if (!content || typeof content === "string") return [];
23
+ return content
24
+ .filter((c) => c.type === "toolCall")
25
+ .map((c) => extractPath(c.arguments))
26
+ .filter((p): p is string => p !== null);
27
+ };
28
+
29
+ export const renderMessage = (msg: Message, index: number, full = false): RenderedEntry => {
30
+ if (msg.role === "user") {
31
+ return { index, role: "user", summary: full ? textOf(msg.content) : clip(textOf(msg.content), 300) };
32
+ }
33
+ if (msg.role === "toolResult") {
34
+ const prefix = msg.isError ? "ERROR " : "";
35
+ const text = full ? textOf(msg.content) : clip(textOf(msg.content), 200);
36
+ return {
37
+ index, role: "tool_result",
38
+ summary: `${prefix}[${msg.toolName}] ${text}`,
39
+ };
40
+ }
41
+ // bashExecution has command+output instead of content
42
+ if ((msg as any).role === "bashExecution") {
43
+ const cmd = (msg as any).command ?? "";
44
+ const out = (msg as any).output ?? "";
45
+ const text = full ? `$ ${cmd}\n${out}` : clip(`$ ${cmd}\n${out}`, 300);
46
+ return { index, role: "bash", summary: text };
47
+ }
48
+ const text = full ? textOf(msg.content) : clip(textOf(msg.content), 300);
49
+ const tools = toolCalls(msg.content);
50
+ const files = extractFilesFromContent(msg.content);
51
+ const summary = tools ? `${tools}\n${text}` : text;
52
+ return { index, role: "assistant", summary, ...(files.length > 0 && { files }) };
53
+ };
54
+
55
+
@@ -0,0 +1,237 @@
1
+ import type { Message } from "@mariozechner/pi-ai";
2
+ import { buildSections } from "./build-sections";
3
+ import { clip } from "./content";
4
+ import { normalize } from "./normalize";
5
+ import { renderMessage } from "./render-entries";
6
+ import { searchEntries } from "./search-entries";
7
+ import { type CompileInput, compile } from "./summarize";
8
+
9
+ const SECTION_HEADERS = ["Session Goal", "Files And Changes", "Commits", "Outstanding Context"];
10
+
11
+ interface RoleCounts {
12
+ user: number;
13
+ assistant: number;
14
+ toolResult: number;
15
+ }
16
+
17
+ interface BlockCounts {
18
+ user: number;
19
+ assistant: number;
20
+ toolCalls: number;
21
+ toolResults: number;
22
+ thinking: number;
23
+ }
24
+
25
+ export interface RecallProbe {
26
+ label: string;
27
+ sourceText: string;
28
+ query: string;
29
+ summaryMentioned: boolean;
30
+ recallHits: number;
31
+ }
32
+
33
+ export interface CompactReport {
34
+ summary: string;
35
+ before: {
36
+ messageCount: number;
37
+ roleCounts: RoleCounts;
38
+ blockCounts: BlockCounts;
39
+ inputChars: number;
40
+ estimatedTokens: number;
41
+ topFiles: string[];
42
+ preview: string;
43
+ };
44
+ after: {
45
+ summaryLength: number;
46
+ estimatedTokens: number;
47
+ sectionCount: number;
48
+ summaryPreview: string;
49
+ goalsCount: number;
50
+ blockersCount: number;
51
+ briefTranscriptLines: number;
52
+ };
53
+ compression: {
54
+ charsBefore: number;
55
+ charsAfter: number;
56
+ ratio: number;
57
+ messagesBefore: number;
58
+ };
59
+ recall: {
60
+ probes: RecallProbe[];
61
+ };
62
+ }
63
+
64
+ const estimateTokensFromChars = (chars: number): number =>
65
+ Math.ceil(chars / 4);
66
+
67
+ const countRoles = (messages: Message[]): RoleCounts => {
68
+ const counts: RoleCounts = { user: 0, assistant: 0, toolResult: 0 };
69
+ for (const msg of messages) {
70
+ if (msg.role === "user") counts.user += 1;
71
+ else if (msg.role === "assistant") counts.assistant += 1;
72
+ else if (msg.role === "toolResult") counts.toolResult += 1;
73
+ }
74
+ return counts;
75
+ };
76
+
77
+ const countBlocks = (messages: Message[]): BlockCounts => {
78
+ const counts: BlockCounts = {
79
+ user: 0,
80
+ assistant: 0,
81
+ toolCalls: 0,
82
+ toolResults: 0,
83
+ thinking: 0,
84
+ };
85
+
86
+ for (const block of normalize(messages)) {
87
+ if (block.kind === "user") counts.user += 1;
88
+ else if (block.kind === "assistant") counts.assistant += 1;
89
+ else if (block.kind === "tool_call") counts.toolCalls += 1;
90
+ else if (block.kind === "tool_result") counts.toolResults += 1;
91
+ else if (block.kind === "thinking") counts.thinking += 1;
92
+ }
93
+
94
+ return counts;
95
+ };
96
+
97
+ const inputCharsOf = (messages: Message[]): number =>
98
+ messages
99
+ .map((msg, index) => renderMessage(msg, index, true).summary.length)
100
+ .reduce((sum, len) => sum + len, 0);
101
+
102
+ const topFilesOf = (messages: Message[]): string[] => {
103
+ const files = new Set<string>();
104
+ for (const block of normalize(messages)) {
105
+ if (block.kind === "tool_call") {
106
+ for (const key of ["path", "file_path", "filePath", "file"]) {
107
+ const val = block.args[key];
108
+ if (typeof val === "string") { files.add(val); break; }
109
+ }
110
+ }
111
+ }
112
+ return [...files].slice(0, 10);
113
+ };
114
+
115
+ const previewOf = (messages: Message[], edgeCount = 3): string => {
116
+ const rendered = messages.map((msg, index) => renderMessage(msg, index));
117
+ if (rendered.length === 0) return "(empty)";
118
+ if (rendered.length <= edgeCount * 2) {
119
+ return rendered
120
+ .map((entry) => `#${entry.index} [${entry.role}] ${clip(entry.summary, 220)}`)
121
+ .join("\n");
122
+ }
123
+
124
+ const first = rendered.slice(0, edgeCount);
125
+ const last = rendered.slice(-edgeCount);
126
+ return [
127
+ ...first.map((entry) => `#${entry.index} [${entry.role}] ${clip(entry.summary, 220)}`),
128
+ "...",
129
+ ...last.map((entry) => `#${entry.index} [${entry.role}] ${clip(entry.summary, 220)}`),
130
+ ].join("\n");
131
+ };
132
+
133
+ const sectionCountOf = (summary: string): number =>
134
+ SECTION_HEADERS.filter((header) => summary.includes(`[${header}]`)).length;
135
+
136
+ const briefLineCountOf = (summary: string): number => {
137
+ const sep = "\n\n---\n\n";
138
+ const idx = summary.indexOf(sep);
139
+ if (idx < 0) return 0;
140
+ return summary.slice(idx + sep.length).split("\n").length;
141
+ };
142
+
143
+ const queryTermsOf = (text: string): string[] =>
144
+ (text.match(/[\p{L}\p{N}_./-]{3,}/gu) ?? [])
145
+ .map((part) => part.trim())
146
+ .filter(Boolean);
147
+
148
+ const queryOf = (text: string): string => {
149
+ const terms = queryTermsOf(text);
150
+ return terms.slice(0, 6).join(" ");
151
+ };
152
+
153
+ const matchesQuery = (text: string, query: string): boolean => {
154
+ const hay = text.toLowerCase();
155
+ return query
156
+ .toLowerCase()
157
+ .split(/\s+/)
158
+ .filter(Boolean)
159
+ .every((term) => hay.includes(term));
160
+ };
161
+
162
+ const probesOf = (messages: Message[], summary: string): RecallProbe[] => {
163
+ const blocks = normalize(messages);
164
+ const data = buildSections({ blocks });
165
+
166
+ // Find first file from tool calls
167
+ let firstFile = "";
168
+ for (const b of blocks) {
169
+ if (b.kind === "tool_call") {
170
+ for (const key of ["path", "file_path", "filePath", "file"]) {
171
+ if (typeof b.args[key] === "string") { firstFile = b.args[key] as string; break; }
172
+ }
173
+ if (firstFile) break;
174
+ }
175
+ }
176
+
177
+ const rawProbes = [
178
+ { label: "goal", text: data.sessionGoal[0] ?? "" },
179
+ { label: "file", text: firstFile },
180
+ { label: "problem", text: data.outstandingContext[0] ?? "" },
181
+ ];
182
+
183
+ const rendered = messages.map((msg, index) => renderMessage(msg, index));
184
+
185
+ return rawProbes
186
+ .map(({ label, text }) => {
187
+ const sourceText = text.trim();
188
+ const query = queryOf(sourceText);
189
+ if (!query) return null;
190
+ return {
191
+ label,
192
+ sourceText,
193
+ query,
194
+ summaryMentioned: matchesQuery(summary, query),
195
+ recallHits: searchEntries(rendered, query).length,
196
+ };
197
+ })
198
+ .filter((probe): probe is RecallProbe => probe !== null);
199
+ };
200
+
201
+ export const buildCompactReport = (input: CompileInput): CompactReport => {
202
+ const summary = compile(input);
203
+ const data = buildSections({ blocks: normalize(input.messages) });
204
+ const inputChars = inputCharsOf(input.messages);
205
+ const topFiles = topFilesOf(input.messages);
206
+
207
+ return {
208
+ summary,
209
+ before: {
210
+ messageCount: input.messages.length,
211
+ roleCounts: countRoles(input.messages),
212
+ blockCounts: countBlocks(input.messages),
213
+ inputChars,
214
+ estimatedTokens: estimateTokensFromChars(inputChars),
215
+ topFiles,
216
+ preview: previewOf(input.messages),
217
+ },
218
+ after: {
219
+ summaryLength: summary.length,
220
+ estimatedTokens: estimateTokensFromChars(summary.length),
221
+ sectionCount: sectionCountOf(summary),
222
+ summaryPreview: summary,
223
+ goalsCount: data.sessionGoal.length,
224
+ blockersCount: data.outstandingContext.length,
225
+ briefTranscriptLines: briefLineCountOf(summary),
226
+ },
227
+ compression: {
228
+ charsBefore: inputChars,
229
+ charsAfter: summary.length,
230
+ ratio: summary.length === 0 ? 0 : Number((inputChars / summary.length).toFixed(2)),
231
+ messagesBefore: input.messages.length,
232
+ },
233
+ recall: {
234
+ probes: probesOf(input.messages, summary),
235
+ },
236
+ };
237
+ };
@@ -0,0 +1,5 @@
1
+ const ANSI_RE = /\x1b\[[0-9;]*[A-Za-z]/g;
2
+ const CTRL_RE = /[\x00-\x08\x0b\x0c\x0e-\x1f]/g;
3
+
4
+ export const sanitize = (text: string): string =>
5
+ text.replace(/\r\n/g, "\n").replace(/\r/g, "\n").replace(ANSI_RE, "").replace(CTRL_RE, "");
@@ -0,0 +1,221 @@
1
+ import type { Message } from "@mariozechner/pi-ai";
2
+ import type { RenderedEntry } from "./render-entries";
3
+ import { textOf } from "./content";
4
+
5
+ export interface SearchHit extends RenderedEntry {
6
+ /** Context snippet around the first matched term (only when query provided) */
7
+ snippet?: string;
8
+ /** Number of query terms matched (for ranking) */
9
+ matchCount?: number;
10
+ }
11
+
12
+ const escapeRegex = (s: string): string =>
13
+ s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
14
+
15
+ /** Try to compile as regex; fall back to escaped literal. */
16
+ const safeRegex = (pattern: string): RegExp => {
17
+ try {
18
+ return new RegExp(pattern, "i");
19
+ } catch {
20
+ return new RegExp(escapeRegex(pattern), "i");
21
+ }
22
+ };
23
+
24
+ /** Detect if the query looks like a single regex pattern (contains regex metacharacters). */
25
+ const looksLikeRegex = (query: string): boolean =>
26
+ /[|*+?{}()[\]\\^$.]/.test(query);
27
+
28
+ /** Build a regex for snippet highlighting — matches first available term. */
29
+ const snippetRegex = (terms: string[]): RegExp => {
30
+ const alts = terms.map((t) => {
31
+ try {
32
+ // Validate that it's a valid regex
33
+ new RegExp(t, "i");
34
+ return t;
35
+ } catch {
36
+ return escapeRegex(t);
37
+ }
38
+ });
39
+ return new RegExp(alts.join("|"), "i");
40
+ };
41
+
42
+ // ── Stopwords for natural language queries ──
43
+ const STOPWORDS = new Set([
44
+ // English
45
+ "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
46
+ "have", "has", "had", "do", "does", "did", "will", "would", "could",
47
+ "should", "may", "might", "can", "shall", "of", "in", "to", "for",
48
+ "with", "on", "at", "from", "by", "as", "into", "through", "during",
49
+ "before", "after", "above", "below", "between", "out", "off", "over",
50
+ "under", "again", "further", "then", "once", "here", "there", "when",
51
+ "where", "why", "how", "all", "both", "each", "few", "more", "most",
52
+ "other", "some", "such", "no", "nor", "not", "only", "own", "same",
53
+ "so", "than", "too", "very", "just", "about", "it", "its", "that",
54
+ "this", "what", "which", "who", "whom", "these", "those",
55
+ ]);
56
+
57
+ /** Remove stopwords, keep meaningful terms. */
58
+ const filterStopwords = (terms: string[]): string[] => {
59
+ const meaningful = terms.filter((t) => !STOPWORDS.has(t.toLowerCase()) && t.length > 1);
60
+ // If all terms were stopwords, return original (don't lose everything)
61
+ return meaningful.length > 0 ? meaningful : terms;
62
+ };
63
+
64
+ /** Count how many distinct terms match the haystack. */
65
+ const countMatches = (hay: string, terms: string[]): number => {
66
+ let count = 0;
67
+ for (const t of terms) {
68
+ if (safeRegex(t).test(hay)) count++;
69
+ }
70
+ return count;
71
+ };
72
+
73
+ // ── BM25-lite scoring ──
74
+ const BM25_K = 1.2;
75
+ const BM25_B = 0.75;
76
+
77
+ /** Count occurrences of a regex pattern in text. */
78
+ const termFreq = (text: string, pattern: RegExp): number => {
79
+ const matches = text.match(new RegExp(pattern.source, "gi"));
80
+ return matches ? matches.length : 0;
81
+ };
82
+
83
+ interface BM25Context {
84
+ n: number; // total docs
85
+ avgDl: number; // average doc length (words)
86
+ df: Map<string, number>; // term -> number of docs containing it
87
+ }
88
+
89
+ /** Precompute IDF and avgDl across all docs. */
90
+ const buildBM25Context = (docs: string[], terms: string[]): BM25Context => {
91
+ const n = docs.length;
92
+ const df = new Map<string, number>();
93
+ let totalLen = 0;
94
+
95
+ for (const doc of docs) {
96
+ totalLen += doc.split(/\s+/).length;
97
+ for (const t of terms) {
98
+ if (safeRegex(t).test(doc)) {
99
+ df.set(t, (df.get(t) ?? 0) + 1);
100
+ }
101
+ }
102
+ }
103
+
104
+ return { n, avgDl: totalLen / Math.max(n, 1), df };
105
+ };
106
+
107
+ /** BM25 score for a single doc against query terms. */
108
+ const bm25Score = (doc: string, terms: string[], ctx: BM25Context): number => {
109
+ const dl = doc.split(/\s+/).length;
110
+ let score = 0;
111
+
112
+ for (const t of terms) {
113
+ const tf = termFreq(doc, safeRegex(t));
114
+ if (tf === 0) continue;
115
+
116
+ const docFreq = ctx.df.get(t) ?? 0;
117
+ // IDF: log((N - df + 0.5) / (df + 0.5) + 1)
118
+ const idf = Math.log((ctx.n - docFreq + 0.5) / (docFreq + 0.5) + 1);
119
+ // TF saturation with length normalization
120
+ const tfNorm = (tf * (BM25_K + 1)) / (tf + BM25_K * (1 - BM25_B + BM25_B * dl / ctx.avgDl));
121
+ score += idf * tfNorm;
122
+ }
123
+
124
+ return score;
125
+ };
126
+
127
+ /** Line-based snippet: ±contextLines around first regex match. */
128
+ const lineSnippet = (text: string, regex: RegExp, contextLines = 2): string | undefined => {
129
+ const lines = text.split("\n");
130
+ let matchIdx = -1;
131
+ for (let i = 0; i < lines.length; i++) {
132
+ if (regex.test(lines[i])) {
133
+ matchIdx = i;
134
+ break;
135
+ }
136
+ }
137
+ if (matchIdx === -1) return undefined;
138
+
139
+ const start = Math.max(0, matchIdx - contextLines);
140
+ const end = Math.min(lines.length, matchIdx + contextLines + 1);
141
+ const slice = lines.slice(start, end);
142
+
143
+ const parts: string[] = [];
144
+ if (start > 0) parts.push(`...(${start} lines above)`);
145
+ parts.push(...slice);
146
+ if (end < lines.length) parts.push(`...(${lines.length - end} lines below)`);
147
+ return parts.join("\n");
148
+ };
149
+
150
+ /** Build full searchable text for a message. */
151
+ const fullText = (msg: Message): string => {
152
+ if ((msg as any).role === "bashExecution") {
153
+ return `${(msg as any).command ?? ""} ${(msg as any).output ?? ""}`;
154
+ }
155
+ return textOf(msg.content);
156
+ };
157
+
158
+ export const searchEntries = (
159
+ entries: RenderedEntry[],
160
+ messages: Message[],
161
+ query?: string,
162
+ ): SearchHit[] => {
163
+ if (!query?.trim()) return entries;
164
+
165
+ const rawQuery = query.trim();
166
+
167
+ // If query looks like a single regex pattern (contains metacharacters),
168
+ // treat the whole thing as one pattern — don't split into terms
169
+ if (looksLikeRegex(rawQuery)) {
170
+ const regex = safeRegex(rawQuery);
171
+ const hits: SearchHit[] = [];
172
+ for (let i = 0; i < entries.length; i++) {
173
+ const e = entries[i];
174
+ const msg = messages[i];
175
+ const text = msg ? fullText(msg) : e.summary;
176
+ const filePart = e.files?.join(" ") ?? "";
177
+ const hay = `${e.role} ${text} ${filePart}`;
178
+ if (regex.test(hay)) {
179
+ const snip = lineSnippet(text, regex);
180
+ hits.push({ ...e, snippet: snip, matchCount: 1 });
181
+ }
182
+ }
183
+ return hits;
184
+ }
185
+
186
+ // Natural language / multi-word query: BM25 scoring
187
+ const rawTerms = rawQuery.split(/\s+/);
188
+ const terms = filterStopwords(rawTerms);
189
+ const snipRe = snippetRegex(terms);
190
+
191
+ // Build all docs for BM25 context
192
+ const docs: string[] = [];
193
+ for (let i = 0; i < entries.length; i++) {
194
+ const e = entries[i];
195
+ const msg = messages[i];
196
+ const text = msg ? fullText(msg) : e.summary;
197
+ const filePart = e.files?.join(" ") ?? "";
198
+ docs.push(`${e.role} ${text} ${filePart}`);
199
+ }
200
+
201
+ const ctx = buildBM25Context(docs, terms);
202
+
203
+ const scored: Array<{ hit: SearchHit; score: number }> = [];
204
+ for (let i = 0; i < entries.length; i++) {
205
+ const e = entries[i];
206
+ const hay = docs[i];
207
+ const mc = countMatches(hay, terms);
208
+ if (mc === 0) continue;
209
+ const score = bm25Score(hay, terms, ctx);
210
+ const text = messages[i] ? fullText(messages[i]) : e.summary;
211
+ const snip = lineSnippet(text, snipRe);
212
+ scored.push({
213
+ hit: { ...e, snippet: snip, matchCount: mc },
214
+ score,
215
+ });
216
+ }
217
+
218
+ // Sort by BM25 score desc
219
+ scored.sort((a, b) => b.score - a.score);
220
+ return scored.map((s) => s.hit);
221
+ };
@@ -0,0 +1,8 @@
1
+ /**
2
+ * ultimate-pi harness settings (env-only). Re-exported for vendored pi-vcc layout.
3
+ */
4
+ export type { PiVccSettings } from "../../../../.pi/extensions/lib/harness-vcc-settings.js";
5
+ export {
6
+ loadSettings,
7
+ scaffoldSettings,
8
+ } from "../../../../.pi/extensions/lib/harness-vcc-settings.js";
@@ -0,0 +1,35 @@
1
+ /** Shared skill-tag collapse utilities */
2
+
3
+ const SKILL_TAG_RE = /^-?\s*<skill\s+name="([^"]+)"/;
4
+ const SKILL_CLOSE_RE = /^-?\s*<\/skill>/;
5
+
6
+ /** Collapse skill tags in an array of lines — dedup by name, drop all content inside block */
7
+ export const collapseSkillLines = (lines: string[]): string[] => {
8
+ const result: string[] = [];
9
+ const seenSkills = new Set<string>();
10
+ let insideSkill = false;
11
+
12
+ for (const line of lines) {
13
+ const skillMatch = line.match(SKILL_TAG_RE);
14
+ if (skillMatch) {
15
+ insideSkill = true;
16
+ const name = skillMatch[1];
17
+ if (!seenSkills.has(name)) {
18
+ seenSkills.add(name);
19
+ result.push(`[skill: ${name}]`);
20
+ }
21
+ continue;
22
+ }
23
+ if (insideSkill) {
24
+ if (SKILL_CLOSE_RE.test(line)) insideSkill = false;
25
+ continue;
26
+ }
27
+ result.push(line);
28
+ }
29
+ return result;
30
+ };
31
+
32
+ /** Collapse <skill name="X" ...>...</skill> blocks in raw text */
33
+ const SKILL_BLOCK_RE = /<skill\s+name="([^"]+)"[^>]*>[\s\S]*?(?:<\/skill>|$)/g;
34
+ export const collapseSkillText = (text: string): string =>
35
+ text.replace(SKILL_BLOCK_RE, (_, name) => `[skill: ${name}]`);