pi-crew 0.9.7 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,334 @@
1
+ /**
2
+ * Staleness-aware tool output pruning.
3
+ *
4
+ * Identifies tool results that have been superseded by a later result for the
5
+ * same target (same file read again, same search re-run) or invalidated by a
6
+ * later successful edit/write to a covered file, and replaces the stale
7
+ * content with a compact digest notice. Protect-window and minimum-savings
8
+ * hysteresis ensure recent results are preserved and pruning only fires when
9
+ * the savings justify it.
10
+ *
11
+ * Ported and adapted from gajae-code's compaction/pruning.ts to pi-crew's
12
+ * data shapes. Pi-crew delegates conversation management to child Pi
13
+ * processes, so this module operates on a generic {@link ToolResultEntry}
14
+ * sequence rather than SessionEntry[]. The primary integration point is
15
+ * task-output-context.ts (dependency output context injected into worker
16
+ * prompts), but the module is designed to be reusable for any in-process
17
+ * tool-result sequence.
18
+ */
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Types
22
+ // ---------------------------------------------------------------------------
23
+
24
+ /**
25
+ * A single tool result in a sequence (oldest → newest).
26
+ * Adapted to pi-crew's shapes — does not depend on gajae-code's SessionEntry.
27
+ */
28
+ export interface ToolResultEntry {
29
+ /** Stable identifier for deduplication and correlation. */
30
+ id: string;
31
+ /** Tool name: "read", "bash", "grep", "search", "edit", "write", etc. */
32
+ toolName: string;
33
+ /**
34
+ * Target identity: file path for read/edit/write, search pattern for
35
+ * grep/search, or undefined for tools without a natural target key.
36
+ */
37
+ target?: string;
38
+ /** The tool result content text. */
39
+ content: string;
40
+ /** Whether the tool result represents an error. */
41
+ isError?: boolean;
42
+ }
43
+
44
+ /** A file mutation event (edit/write) that can invalidate earlier reads. */
45
+ export interface FileEditEvent {
46
+ /** The file path that was mutated. */
47
+ target: string;
48
+ /**
49
+ * Sequence index of this edit relative to tool results. A read at index
50
+ * `i` is stale if an edit at index `j > i` touches the same file.
51
+ */
52
+ index: number;
53
+ }
54
+
55
+ export interface PruneConfig {
56
+ /** Keep the most recent tool output tokens intact (protect window). */
57
+ protectTokens: number;
58
+ /** Only prune if total savings meets this threshold (hysteresis). */
59
+ minimumSavings: number;
60
+ /** Tool names that should never be pruned. */
61
+ protectedTools: string[];
62
+ /**
63
+ * Tools in `protectedTools` whose protection is waived once the result is
64
+ * superseded (a later result for the same target, or a later successful
65
+ * edit/write to the covered file). The most recent result per target is
66
+ * never considered superseded. Optional; defaults to none.
67
+ */
68
+ staleOverridableTools?: string[];
69
+ }
70
+
71
+ export const DEFAULT_PRUNE_CONFIG: PruneConfig = {
72
+ protectTokens: 40_000,
73
+ minimumSavings: 20_000,
74
+ protectedTools: ["read"],
75
+ staleOverridableTools: ["read"],
76
+ };
77
+
78
+ export interface PruneResult {
79
+ /** Number of entries pruned. */
80
+ prunedCount: number;
81
+ /** Estimated tokens saved. */
82
+ tokensSaved: number;
83
+ /** The pruned result entries (same length as input, content replaced for pruned). */
84
+ results: ToolResultEntry[];
85
+ /** IDs of entries that were pruned. */
86
+ prunedIds: string[];
87
+ }
88
+
89
+ // ---------------------------------------------------------------------------
90
+ // Token estimation (rough char/4 heuristic, matching gajae-code)
91
+ // ---------------------------------------------------------------------------
92
+
93
+ function estimateTokens(text: string): number {
94
+ return Math.ceil(text.length / 4);
95
+ }
96
+
97
+ // ---------------------------------------------------------------------------
98
+ // Digest notice generation
99
+ // ---------------------------------------------------------------------------
100
+
101
+ const DIGEST_NOTICE_TOKEN_CAP_MULTIPLIER = 1.25;
102
+
103
+ function firstErrorLine(text: string): string | undefined {
104
+ return text
105
+ .split(/\r?\n/)
106
+ .find((line) => /error|failed|exception|panic/i.test(line))
107
+ ?.trim();
108
+ }
109
+
110
+ function truncateField(value: string, maxLength: number): string {
111
+ if (value.length <= maxLength) return value;
112
+ if (maxLength <= 1) return "…";
113
+ return `${value.slice(0, maxLength - 1)}…`;
114
+ }
115
+
116
+ /**
117
+ * Generate a compact digest of a tool result for the digest notice.
118
+ * Supports bash (exit code + tail line), grep/search (match/file counts),
119
+ * and falls back to undefined for tools without a known digest format.
120
+ */
121
+ export function resultDigest(toolName: string, content: string, isError?: boolean): string | undefined {
122
+ const name = toolName.toLowerCase();
123
+ const text = content ?? "";
124
+ if (name === "bash") {
125
+ const exitCode = isError ? 1 : 0;
126
+ const tail = text.trim().split(/\r?\n/).filter(Boolean).at(-1) ?? "";
127
+ const error = firstErrorLine(text);
128
+ return [`exit=${exitCode}`, tail ? `tail=${tail}` : undefined, error ? `error=${error}` : undefined]
129
+ .filter((part): part is string => part !== undefined)
130
+ .join("; ");
131
+ }
132
+ if (name === "search" || name === "grep") {
133
+ const match = text.match(/(\d+)\s+matches?/i) ?? text.match(/totalMatches["']?:\s*(\d+)/i);
134
+ const files = text.match(/(\d+)\s+files?/i) ?? text.match(/filesWithMatches["']?:\s*(\d+)/i);
135
+ const error = firstErrorLine(text);
136
+ return (
137
+ [
138
+ match ? `matches=${match[1]}` : undefined,
139
+ files ? `files=${files[1]}` : undefined,
140
+ error ? `error=${error}` : undefined,
141
+ ]
142
+ .filter((part): part is string => part !== undefined)
143
+ .join("; ") || "search digest unavailable"
144
+ );
145
+ }
146
+ return undefined;
147
+ }
148
+
149
+ function createPrunedNotice(tokens: number, entry: ToolResultEntry): string {
150
+ const generic = `[Output pruned — ${tokens} tokens]`;
151
+ const digest = resultDigest(entry.toolName, entry.content, entry.isError);
152
+ if (!digest) return generic;
153
+ const genericTokens = Math.ceil(generic.length / 4);
154
+ const maxTokens = Math.max(genericTokens, Math.floor(genericTokens * DIGEST_NOTICE_TOKEN_CAP_MULTIPLIER));
155
+ const prefix = `[Output pruned — ${tokens} tokens; `;
156
+ const suffix = "]";
157
+ const maxChars = Math.max(0, maxTokens * 4 - prefix.length - suffix.length);
158
+ return `${prefix}${truncateField(digest, maxChars)}${suffix}`;
159
+ }
160
+
161
+ // ---------------------------------------------------------------------------
162
+ // Target key resolution
163
+ // ---------------------------------------------------------------------------
164
+
165
+ /**
166
+ * Trailing read selectors (`:50`, `:50-200`, `:50+150`, `:5-16,960-973`,
167
+ * `:raw`, `:conflicts`), possibly stacked. Stripped to resolve the
168
+ * underlying file for edit invalidation.
169
+ */
170
+ const READ_SELECTOR_SUFFIX = /:(?:raw|conflicts|\d+(?:[-+]\d+)?(?:,\d+(?:[-+]\d+)?)*)$/;
171
+
172
+ /** Base file path of a read target with any line/mode selectors stripped. */
173
+ function readBasePath(filePath: string): string {
174
+ let base = filePath;
175
+ while (READ_SELECTOR_SUFFIX.test(base)) {
176
+ base = base.replace(READ_SELECTOR_SUFFIX, "");
177
+ }
178
+ return base;
179
+ }
180
+
181
+ /**
182
+ * Stable identity for "the same logical lookup": same tool re-targeting the
183
+ * same subject. A later result with the same key supersedes earlier ones.
184
+ */
185
+ function toolTargetKey(entry: ToolResultEntry): string | undefined {
186
+ if (!entry.target || entry.target.length === 0) return undefined;
187
+ return JSON.stringify([entry.toolName, entry.target]);
188
+ }
189
+
190
+ // ---------------------------------------------------------------------------
191
+ // Staleness index
192
+ // ---------------------------------------------------------------------------
193
+
194
+ export interface StalenessIndex {
195
+ /** Indices of tool results superseded by a later same-target result or edit. */
196
+ staleIndices: Set<number>;
197
+ }
198
+
199
+ /**
200
+ * Build a staleness index over a sequence of tool results (oldest → newest):
201
+ * - a tool result is stale when a later non-error result shares its target key;
202
+ * - a `read` result is stale when a later edit event touches its file.
203
+ * The most recent result per target is never stale.
204
+ *
205
+ * @param toolResults Ordered tool result entries (oldest first).
206
+ * @param fileEdits Optional file mutation events with sequence indices.
207
+ */
208
+ export function buildStalenessIndex(toolResults: ToolResultEntry[], fileEdits: FileEditEvent[] = []): StalenessIndex {
209
+ // Map target key → last result index that has it.
210
+ const lastResultIndexByKey = new Map<string, number>();
211
+ for (let i = 0; i < toolResults.length; i++) {
212
+ const entry = toolResults[i]!;
213
+ if (entry.isError) continue;
214
+ const key = toolTargetKey(entry);
215
+ if (key !== undefined) lastResultIndexByKey.set(key, i);
216
+ }
217
+
218
+ // Map file path → last edit index.
219
+ const lastEditIndexByPath = new Map<string, number>();
220
+ for (const edit of fileEdits) {
221
+ lastEditIndexByPath.set(edit.target, edit.index);
222
+ }
223
+
224
+ const staleIndices = new Set<number>();
225
+ for (let i = 0; i < toolResults.length; i++) {
226
+ const entry = toolResults[i]!;
227
+ // Check superseded by same-target re-read.
228
+ const key = toolTargetKey(entry);
229
+ if (key !== undefined) {
230
+ const lastIndex = lastResultIndexByKey.get(key);
231
+ if (lastIndex !== undefined && lastIndex > i) {
232
+ staleIndices.add(i);
233
+ continue;
234
+ }
235
+ }
236
+ // Check invalidated by later file edit (read-specific).
237
+ if (entry.toolName.toLowerCase() === "read" && entry.target) {
238
+ const basePath = readBasePath(entry.target);
239
+ const editIndex = lastEditIndexByPath.get(basePath);
240
+ if (editIndex !== undefined && editIndex > i) {
241
+ staleIndices.add(i);
242
+ }
243
+ }
244
+ }
245
+
246
+ return { staleIndices };
247
+ }
248
+
249
+ // ---------------------------------------------------------------------------
250
+ // Pruning
251
+ // ---------------------------------------------------------------------------
252
+
253
+ /**
254
+ * Prune stale tool outputs from a sequence, replacing superseded content with
255
+ * compact digest notices. Protect-window, protected-tools immunity, and
256
+ * minimum-savings hysteresis are all respected.
257
+ *
258
+ * OPT-IN by default: {@link DEFAULT_PRUNE_CONFIG} protects recent results via
259
+ * a generous `protectTokens` window. Only results outside the window AND not
260
+ * protected AND stale (or old enough) are pruned.
261
+ *
262
+ * @param results Ordered tool result entries (oldest first).
263
+ * @param config Prune configuration. Defaults to {@link DEFAULT_PRUNE_CONFIG}.
264
+ */
265
+ export function pruneToolOutputs(results: ToolResultEntry[], config: PruneConfig = DEFAULT_PRUNE_CONFIG): PruneResult {
266
+ const { staleIndices } = buildStalenessIndex(results);
267
+ const staleOverridable = new Set(config.staleOverridableTools ?? []);
268
+
269
+ let accumulatedTokens = 0;
270
+ let tokensSaved = 0;
271
+ let prunedCount = 0;
272
+
273
+ interface Candidate {
274
+ index: number;
275
+ entry: ToolResultEntry;
276
+ tokens: number;
277
+ notice: string;
278
+ savings: number;
279
+ }
280
+ const candidates: Candidate[] = [];
281
+ const prunedIds: string[] = [];
282
+
283
+ // Iterate newest → oldest to accumulate the protect window from the tail.
284
+ for (let i = results.length - 1; i >= 0; i--) {
285
+ const entry = results[i]!;
286
+ const tokens = estimateTokens(entry.content);
287
+ const isStale = staleIndices.has(i);
288
+
289
+ // Staleness waives protected-tool immunity for overridable tools
290
+ // (e.g. a superseded `read`); the most recent result per target is
291
+ // never stale, so the latest read of each file stays protected.
292
+ const isProtected =
293
+ config.protectedTools.includes(entry.toolName) &&
294
+ !(isStale && staleOverridable.has(entry.toolName));
295
+
296
+ // Stale results are prunable even inside the recency protect window —
297
+ // they are superseded, so recency no longer implies relevance. They
298
+ // still count toward window accounting so non-stale protection is
299
+ // unchanged.
300
+ const insideProtectWindow = accumulatedTokens < config.protectTokens;
301
+ if ((insideProtectWindow && !isStale) || isProtected) {
302
+ accumulatedTokens += tokens;
303
+ continue;
304
+ }
305
+
306
+ const notice = createPrunedNotice(tokens, entry);
307
+ candidates.push({
308
+ index: i,
309
+ entry,
310
+ tokens,
311
+ notice,
312
+ savings: Math.max(0, tokens - Math.ceil(notice.length / 4)),
313
+ });
314
+ accumulatedTokens += tokens;
315
+ }
316
+
317
+ for (const candidate of candidates) {
318
+ tokensSaved += candidate.savings;
319
+ }
320
+
321
+ // Hysteresis: only prune if savings meet the threshold.
322
+ if (tokensSaved < config.minimumSavings || candidates.length === 0) {
323
+ return { prunedCount: 0, tokensSaved: 0, results, prunedIds: [] };
324
+ }
325
+
326
+ const prunedResults = [...results];
327
+ for (const candidate of candidates) {
328
+ prunedResults[candidate.index] = { ...candidate.entry, content: candidate.notice };
329
+ prunedIds.push(candidate.entry.id);
330
+ prunedCount++;
331
+ }
332
+
333
+ return { prunedCount, tokensSaved, results: prunedResults, prunedIds };
334
+ }
@@ -7,6 +7,7 @@ import { fileURLToPath } from "node:url";
7
7
  import { getAgentDir } from "../runtime/peer-dep.ts";
8
8
  import { logInternalError } from "../utils/internal-error.ts";
9
9
  import { isSafePathId, resolveContainedPath, resolveRealContainedPath } from "../utils/safe-paths.ts";
10
+ import { parseSkillFrontmatter, validateSkillFrontmatter, type SkillValidationError } from "./validate.ts";
10
11
 
11
12
  const PACKAGE_SKILLS_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..", "..", "skills");
12
13
 
@@ -54,16 +55,44 @@ function listSkillDirs(cwd: string): Array<{ root: string; source: SkillDescript
54
55
  ];
55
56
  }
56
57
 
57
- function frontmatterDescription(content: string): string | undefined {
58
- const match = /^---\r?\n([\s\S]*?)\r?\n---/.exec(content);
59
- if (!match) return undefined;
60
- const line = match[1].split(/\r?\n/).find((entry) => entry.startsWith("description:"));
61
- return line?.slice("description:".length).trim();
58
+ // ── Diagnostics (L3) ──────────────────────────────────────────────────────────
59
+ // Module-level buffer populated each `discoverSkills()` call. Cleared at the
60
+ // start of every call so callers see only the most recent run's diagnostics.
61
+ // Surfaced via `getLastDiscoveryDiagnostics()` so capability-inventory and
62
+ // other consumers can convert silent exclusions into visible feedback.
63
+ let lastDiagnostics: SkillValidationError[] = [];
64
+
65
+ export function getLastDiscoveryDiagnostics(): SkillValidationError[] {
66
+ return lastDiagnostics;
67
+ }
68
+
69
+ /**
70
+ * Parse frontmatter defensively. Falls back to the legacy line-prefix match
71
+ * if YAML parsing fails — preserves back-compat for malformed but readable
72
+ * SKILL.md files that pre-date the validator (we record a diagnostic in that
73
+ * case but still return the description we could salvage).
74
+ */
75
+ function readDescription(content: string): { description: string; parseError: string | null } {
76
+ const parsed = parseSkillFrontmatter(content);
77
+ if (parsed.ok) {
78
+ const d = parsed.data.description;
79
+ return { description: typeof d === "string" ? d : "", parseError: null };
80
+ }
81
+ // YAML parse failed — fall back to legacy line-prefix match so we don't
82
+ // regress existing skills whose frontmatter the old parser could read.
83
+ const legacyMatch = /^---\r?\n([\s\S]*?)\r?\n---/.exec(content);
84
+ if (legacyMatch) {
85
+ const line = legacyMatch[1].split(/\r?\n/).find((entry) => entry.startsWith("description:"));
86
+ const fallback = line?.slice("description:".length).trim() ?? "";
87
+ return { description: fallback, parseError: parsed.error };
88
+ }
89
+ return { description: "", parseError: parsed.error };
62
90
  }
63
91
 
64
92
  export function discoverSkills(cwd: string): SkillDescriptor[] {
65
93
  if (cache && cache.cwd === cwd && Date.now() - cache.cachedAt < CACHE_TTL_MS) return cache.skills;
66
94
  const results: SkillDescriptor[] = [];
95
+ const diagnostics: SkillValidationError[] = [];
67
96
  for (const dir of listSkillDirs(cwd)) {
68
97
  if (!fs.existsSync(dir.root)) continue;
69
98
  try {
@@ -94,7 +123,16 @@ export function discoverSkills(cwd: string): SkillDescriptor[] {
94
123
  // (e.g. macOS /var → /private/var). Fall through with un-resolved path.
95
124
  }
96
125
  const content = fs.readFileSync(readPath, "utf-8");
97
- description = frontmatterDescription(content) ?? "";
126
+ const { description: desc, parseError } = readDescription(content);
127
+ description = desc;
128
+ if (parseError) {
129
+ diagnostics.push({
130
+ path: path.dirname(skillMdPath),
131
+ field: "frontmatter",
132
+ reason: parseError,
133
+ severity: "error",
134
+ });
135
+ }
98
136
  } catch (error) {
99
137
  logInternalError("discoverSkills.readSkill", error, `skill=${entry.name}`);
100
138
  }
@@ -104,6 +142,21 @@ export function discoverSkills(cwd: string): SkillDescriptor[] {
104
142
  logInternalError("discoverSkills.readdir", error, `root=${dir.root}`);
105
143
  }
106
144
  }
107
- cache = { skills: results, cachedAt: Date.now(), cwd };
108
- return results;
145
+ // L3: strict validation pass after we've collected every (skill, source)
146
+ // candidate. Excludes malformed skills (HYBRID policy: missing/malformed
147
+ // `name`/`description` hard-fail; unknown props warn). Diagnostics are
148
+ // always recorded, including for skills that PASSED validation but had
149
+ // unknown-prop warnings.
150
+ const filtered: SkillDescriptor[] = [];
151
+ for (const skill of results) {
152
+ const validation = validateSkillFrontmatter(path.dirname(skill.path));
153
+ if (validation.ok) {
154
+ filtered.push(skill);
155
+ } else {
156
+ diagnostics.push(...validation.errors);
157
+ }
158
+ }
159
+ lastDiagnostics = diagnostics;
160
+ cache = { skills: filtered, cachedAt: Date.now(), cwd };
161
+ return filtered;
109
162
  }