pi-crew 0.7.7 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +325 -0
- package/package.json +1 -1
- package/src/agents/agent-config.ts +101 -1
- package/src/agents/discover-agents.ts +34 -3
- package/src/config/types.ts +8 -0
- package/src/errors.ts +9 -0
- package/src/extension/context-status-injection.ts +14 -5
- package/src/extension/register.ts +4 -18
- package/src/extension/registration/compaction-guard.ts +44 -13
- package/src/extension/team-tool/handle-settings.ts +2 -0
- package/src/runtime/live-session-runtime.ts +69 -7
- package/src/runtime/model-fallback.ts +39 -1
- package/src/runtime/model-scope.ts +141 -0
- package/src/runtime/pi-args.ts +21 -6
- package/src/runtime/skill-effectiveness.ts +84 -10
- package/src/runtime/skill-instructions.ts +14 -4
- package/src/runtime/task-runner.ts +21 -0
- package/src/skills/discover-skills.ts +31 -2
- package/src/ui/agent-management-overlay.ts +1 -1
- package/src/utils/session-utils.ts +30 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
2
2
|
import { listRecentRuns } from "../run-index.ts";
|
|
3
|
+
import { extractSessionId } from "../../utils/session-utils.ts";
|
|
3
4
|
import type { ArtifactDescriptor, TeamRunManifest } from "../../state/types.ts";
|
|
4
5
|
|
|
5
6
|
export interface RegisterCompactionGuardOptions {
|
|
@@ -71,11 +72,25 @@ function formatCrewArtifactIndex(entries: CrewArtifactIndexEntry[]): string {
|
|
|
71
72
|
/**
|
|
72
73
|
* Collect in-flight (non-terminal) crew runs that must be resumable after
|
|
73
74
|
* compaction. These are runs the agent was actively working on or awaiting.
|
|
75
|
+
*
|
|
76
|
+
* @param cwd - project working directory (shared, per-project state root).
|
|
77
|
+
* @param currentSessionId - if provided, restrict to runs OWNED BY THIS
|
|
78
|
+
* session (`run.ownerSessionId === currentSessionId`). The state store is
|
|
79
|
+
* per-PROJECT, not per-SESSION — multiple sessions share `.crew/state/runs/`.
|
|
80
|
+
* Without this filter, Session B's compaction would pick up Session A's
|
|
81
|
+
* in-flight runs and wrongly resume them. Legacy runs with no
|
|
82
|
+
* `ownerSessionId` are excluded under filtering (strict): a run with no
|
|
83
|
+
* declared owner must not be auto-resumed by an arbitrary session; true
|
|
84
|
+
* orphans are handled separately by crash-recovery. When omitted, returns
|
|
85
|
+
* ALL in-flight runs (back-compat for callers that deliberately want the
|
|
86
|
+
* cross-session view, e.g. diagnostics).
|
|
74
87
|
*/
|
|
75
|
-
export function collectInFlightRuns(cwd: string): TeamRunManifest[] {
|
|
76
|
-
return listRecentRuns(cwd, MAX_ARTIFACT_INDEX_RUNS).filter((run) =>
|
|
77
|
-
IN_FLIGHT_RUN_STATUSES.has(run.status)
|
|
78
|
-
|
|
88
|
+
export function collectInFlightRuns(cwd: string, currentSessionId?: string): TeamRunManifest[] {
|
|
89
|
+
return listRecentRuns(cwd, MAX_ARTIFACT_INDEX_RUNS).filter((run) => {
|
|
90
|
+
if (!IN_FLIGHT_RUN_STATUSES.has(run.status)) return false;
|
|
91
|
+
if (currentSessionId === undefined) return true; // no filter → back-compat
|
|
92
|
+
return run.ownerSessionId === currentSessionId; // strict: legacy ownerless runs excluded
|
|
93
|
+
});
|
|
79
94
|
}
|
|
80
95
|
|
|
81
96
|
/**
|
|
@@ -130,29 +145,43 @@ export function buildContinuationPrompt(runs: TeamRunManifest[]): string {
|
|
|
130
145
|
* Trigger automatic agent continuation after compaction. Fire-and-forget the
|
|
131
146
|
* promise — never block the compaction flow. The sendUserMessage type is
|
|
132
147
|
* declared `void` but the runtime returns a Promise (it triggers an agent turn).
|
|
148
|
+
*
|
|
149
|
+
* During compaction the agent may still be mid-processing, so Pi can reject
|
|
150
|
+
* the queued message with "Agent is already processing a prompt...". This is
|
|
151
|
+
* BENIGN — the in-flight worker continues independently regardless — so we
|
|
152
|
+
* detect that specific race and downgrade it to a silent debug log instead of
|
|
153
|
+
* surfacing a scary warning to the user. Other errors still notify.
|
|
133
154
|
*/
|
|
134
155
|
export function triggerContinuation(pi: ExtensionAPI, ctx: ExtensionContext, runs: TeamRunManifest[]): void {
|
|
135
156
|
if (!runs.length) return;
|
|
136
157
|
const prompt = buildContinuationPrompt(runs);
|
|
158
|
+
const isBenignProcessingRace = (err: unknown): boolean => {
|
|
159
|
+
const msg = err instanceof Error ? err.message : String(err ?? "");
|
|
160
|
+
return /already processing a prompt/i.test(msg) || /use steer\(\) or followUp\(\)/i.test(msg);
|
|
161
|
+
};
|
|
137
162
|
try {
|
|
138
163
|
const result = pi.sendUserMessage(prompt) as unknown;
|
|
139
|
-
Promise.resolve(result).catch(() => {
|
|
140
|
-
//
|
|
164
|
+
Promise.resolve(result).catch((err: unknown) => {
|
|
165
|
+
// Benign race: the worker keeps running independently — no need to alarm.
|
|
166
|
+
if (isBenignProcessingRace(err)) return;
|
|
167
|
+
// Real failure: surface a hint so the user can resume manually.
|
|
141
168
|
try {
|
|
142
169
|
ctx.ui.notify("pi-crew: auto-continuation after compaction failed — use team status to resume manually.", "warning");
|
|
143
170
|
} catch {
|
|
144
171
|
// swallow
|
|
145
172
|
}
|
|
146
173
|
});
|
|
147
|
-
} catch {
|
|
174
|
+
} catch (err: unknown) {
|
|
175
|
+
// Synchronous throw — same benign-race handling.
|
|
176
|
+
if (isBenignProcessingRace(err)) return;
|
|
148
177
|
// best-effort
|
|
149
178
|
}
|
|
150
179
|
}
|
|
151
180
|
|
|
152
181
|
/** Combined customInstructions injected into proactive compaction summaries. */
|
|
153
|
-
function buildCompactionInstructions(cwd: string): string {
|
|
182
|
+
function buildCompactionInstructions(cwd: string, currentSessionId?: string): string {
|
|
154
183
|
const artifactIndex = collectCrewArtifactIndex(cwd);
|
|
155
|
-
const inFlight = collectInFlightRuns(cwd);
|
|
184
|
+
const inFlight = collectInFlightRuns(cwd, currentSessionId);
|
|
156
185
|
const parts = [
|
|
157
186
|
"Prioritize keeping pi-crew run state, task results, artifact references, run IDs, and next actions. Keep completed-task detail concise.",
|
|
158
187
|
];
|
|
@@ -168,10 +197,11 @@ export function registerCompactionGuard(pi: ExtensionAPI, options: RegisterCompa
|
|
|
168
197
|
const startCompact = (ctx: ExtensionContext, reason: string): void => {
|
|
169
198
|
if (compactionInProgress) return;
|
|
170
199
|
compactionInProgress = true;
|
|
171
|
-
const
|
|
200
|
+
const sessionId = extractSessionId(ctx);
|
|
201
|
+
const customInstructions = buildCompactionInstructions(ctx.cwd, sessionId);
|
|
172
202
|
// Append a durable resume entry so it appears in the post-compaction
|
|
173
203
|
// context regardless of how summarization treats customInstructions.
|
|
174
|
-
const inFlight = collectInFlightRuns(ctx.cwd);
|
|
204
|
+
const inFlight = collectInFlightRuns(ctx.cwd, sessionId);
|
|
175
205
|
if (inFlight.length > 0) {
|
|
176
206
|
pi.appendEntry("crew:resume-directive", {
|
|
177
207
|
reason,
|
|
@@ -192,7 +222,7 @@ export function registerCompactionGuard(pi: ExtensionAPI, options: RegisterCompa
|
|
|
192
222
|
// O10 FIX: Pi's threshold compaction does NOT auto-retry — it
|
|
193
223
|
// stops and waits for user input. Trigger automatic
|
|
194
224
|
// continuation so the agent resumes the in-flight crew task.
|
|
195
|
-
const runs = collectInFlightRuns(ctx.cwd);
|
|
225
|
+
const runs = collectInFlightRuns(ctx.cwd, extractSessionId(ctx));
|
|
196
226
|
triggerContinuation(pi, ctx, runs);
|
|
197
227
|
ctx.ui.notify(reason === "deferred" ? "Deferred compaction completed" : "Auto-compacted context during team run", "info");
|
|
198
228
|
},
|
|
@@ -219,7 +249,8 @@ export function registerCompactionGuard(pi: ExtensionAPI, options: RegisterCompa
|
|
|
219
249
|
// our proactive startCompact path.
|
|
220
250
|
pi.on("session_compact", (_event, ctx) => {
|
|
221
251
|
try {
|
|
222
|
-
const
|
|
252
|
+
const sessionId = extractSessionId(ctx);
|
|
253
|
+
const inFlight = collectInFlightRuns(ctx.cwd, sessionId);
|
|
223
254
|
if (inFlight.length === 0) return;
|
|
224
255
|
// Re-append the resume directive entry for durable record.
|
|
225
256
|
pi.appendEntry("crew:resume-directive", {
|
|
@@ -205,6 +205,8 @@ const KNOWN_KEYS = new Set([
|
|
|
205
205
|
"reliability.retryPolicy.jitterRatio",
|
|
206
206
|
"reliability.retryPolicy.exponentialFactor",
|
|
207
207
|
"reliability.retryPolicy.retryableErrors",
|
|
208
|
+
// F7: opt-in model scope enforcement (hard-error caller out-of-scope, warn frontmatter).
|
|
209
|
+
"reliability.scopeModels",
|
|
208
210
|
// otlp
|
|
209
211
|
"otlp.enabled",
|
|
210
212
|
"otlp.endpoint",
|
|
@@ -15,6 +15,9 @@ import type { WorkflowStep } from "../workflows/workflow-config.ts";
|
|
|
15
15
|
import { isLiveSessionRuntimeAvailable } from "./runtime-resolver.ts";
|
|
16
16
|
import { redactSecrets } from "../utils/redaction.ts";
|
|
17
17
|
import { buildConfiguredModelRouting } from "./model-fallback.ts";
|
|
18
|
+
import { readEnabledModelsPatterns } from "./model-scope.ts";
|
|
19
|
+
import { resolveToolPolicy } from "../agents/agent-config.ts";
|
|
20
|
+
import { loadConfig } from "../config/config.ts";
|
|
18
21
|
import { DEFAULT_LIVE_SESSION } from "../config/defaults.ts";
|
|
19
22
|
import { buildYieldReminder, hasYieldInOutput, isYieldEvent, extractYieldResult, validateYieldData, DEFAULT_YIELD_CONFIG, type YieldResult } from "./yield-handler.ts";
|
|
20
23
|
import { buildMcpProxyFromSession } from "./mcp-proxy.ts";
|
|
@@ -28,6 +31,30 @@ import { buildSensitivePathConstraint } from "./sensitive-paths.ts";
|
|
|
28
31
|
import { collectLiveSessionHealth, formatLiveSessionDiagnostics, type LiveSessionHealth } from "./live-session-health.ts";
|
|
29
32
|
import { listLiveAgents } from "./live-agent-manager.ts";
|
|
30
33
|
|
|
34
|
+
/**
|
|
35
|
+
* Module-scoped latch for the optional peer dependency import. When N
|
|
36
|
+
* in-process live-session subagents spawn CONCURRENTLY (e.g. several
|
|
37
|
+
* `Agent({run_in_background:true})` started at once), each used to call
|
|
38
|
+
* `await import("@earendil-works/pi-coding-agent")` independently. Under the
|
|
39
|
+
* tsx loader (registering load/resolve hooks), concurrent first-imports can
|
|
40
|
+
* each enter the loader and race module-record instantiation, yielding
|
|
41
|
+
* `Cannot read properties of undefined (reading 'existsSync')` /
|
|
42
|
+
* `'validateWorkflowForTeam'` as namespace bindings observed mid-evaluation.
|
|
43
|
+
* Sequential retries always succeed → this is a cold-start race, not a logic
|
|
44
|
+
* bug. ESM engines memoize imports, but that memoization is not guaranteed
|
|
45
|
+
* to be observed synchronously across concurrent evaluation under transpiling
|
|
46
|
+
* loaders, so we add an explicit JS-level latch: the first caller wins, every
|
|
47
|
+
* later caller awaits the same in-flight promise. (Observed 2026-06-16 when 4
|
|
48
|
+
* explorer subagents launched together; 3 of 4 crashed.)
|
|
49
|
+
*/
|
|
50
|
+
let liveSessionModulePromise: Promise<LiveSessionModule> | undefined;
|
|
51
|
+
function loadLiveSessionModule(): Promise<LiveSessionModule> {
|
|
52
|
+
if (!liveSessionModulePromise) {
|
|
53
|
+
liveSessionModulePromise = import("@earendil-works/pi-coding-agent") as unknown as Promise<LiveSessionModule>;
|
|
54
|
+
}
|
|
55
|
+
return liveSessionModulePromise;
|
|
56
|
+
}
|
|
57
|
+
|
|
31
58
|
export interface LiveSessionSpawnInput {
|
|
32
59
|
manifest: TeamRunManifest;
|
|
33
60
|
task: TeamTaskState;
|
|
@@ -179,6 +206,25 @@ function numberField(obj: Record<string, unknown> | undefined, keys: string[]):
|
|
|
179
206
|
return undefined;
|
|
180
207
|
}
|
|
181
208
|
|
|
209
|
+
/**
|
|
210
|
+
* F7: resolve the enabledModels allowlist for the current project, but only
|
|
211
|
+
* if the `runtime.reliability.scopeModels` toggle is ON. Returns an empty
|
|
212
|
+
* array when the toggle is off or no allowlist is configured — the routing
|
|
213
|
+
* gate treats empty patterns as "no enforcement" (no-op). Best-effort:
|
|
214
|
+
* any failure to read the toggle or the allowlist silently disables the gate
|
|
215
|
+
* rather than blocking spawn.
|
|
216
|
+
*/
|
|
217
|
+
async function resolveScopeModelsPatterns(cwd: string, agentDir?: string): Promise<string[]> {
|
|
218
|
+
let scopeModels = false;
|
|
219
|
+
try {
|
|
220
|
+
scopeModels = loadConfig(cwd).config.reliability?.scopeModels === true;
|
|
221
|
+
} catch {
|
|
222
|
+
return [];
|
|
223
|
+
}
|
|
224
|
+
if (!scopeModels) return [];
|
|
225
|
+
return readEnabledModelsPatterns(cwd, agentDir);
|
|
226
|
+
}
|
|
227
|
+
|
|
182
228
|
function modelFromRegistry(modelRegistry: unknown, modelId: string | undefined): unknown {
|
|
183
229
|
if (!modelId || !modelId.includes("/")) return undefined;
|
|
184
230
|
const registry = asRecord(modelRegistry);
|
|
@@ -298,11 +344,17 @@ function liveSystemPrompt(input: LiveSessionSpawnInput): string {
|
|
|
298
344
|
].filter(Boolean).join("\n");
|
|
299
345
|
}
|
|
300
346
|
|
|
301
|
-
function filterActiveTools(session: LiveSessionLike, agent: AgentConfig): void {
|
|
347
|
+
function filterActiveTools(session: LiveSessionLike, agent: AgentConfig, role?: string): void {
|
|
302
348
|
if (typeof session.getActiveToolNames !== "function" || typeof session.setActiveToolsByName !== "function") return;
|
|
303
349
|
const recursiveTools = new Set(["team", "Team", "Agent", "get_subagent_result", "steer_subagent"]);
|
|
304
|
-
|
|
305
|
-
|
|
350
|
+
// F1 unify (v0.8.0): use the shared resolveToolPolicy so this path agrees
|
|
351
|
+
// with child-pi (pi-args.ts). Before this, live-session used frontmatter
|
|
352
|
+
// only and ignored role-config entirely — so a builtin explorer on the
|
|
353
|
+
// live-session path wasn't bound by the role's read-only security constraint.
|
|
354
|
+
// Now allowlist precedence is source-aware and the denylist is additive.
|
|
355
|
+
const policy = resolveToolPolicy(agent, role);
|
|
356
|
+
const disallowed = policy.excludeTools?.length ? new Set(policy.excludeTools) : undefined;
|
|
357
|
+
const allowed = policy.tools?.length ? new Set(policy.tools) : undefined;
|
|
306
358
|
const active = session.getActiveToolNames().filter((name) => !recursiveTools.has(name) && (!disallowed || !disallowed.has(name)) && (!allowed || allowed.has(name)));
|
|
307
359
|
session.setActiveToolsByName(active);
|
|
308
360
|
}
|
|
@@ -369,8 +421,11 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
|
|
|
369
421
|
}
|
|
370
422
|
const availability = await isLiveSessionRuntimeAvailable();
|
|
371
423
|
if (!availability.available) return { available: true, exitCode: 1, stdout: "", stderr: availability.reason ?? "Live-session runtime unavailable.", jsonEvents: 0, error: availability.reason };
|
|
372
|
-
// LAZY: optional peer dependency — only loaded when live-session runtime is
|
|
373
|
-
|
|
424
|
+
// LAZY: optional peer dependency — only loaded when live-session runtime is
|
|
425
|
+
// chosen. Goes through the module-scoped latch (loadLiveSessionModule) so
|
|
426
|
+
// concurrent first-imports share ONE in-flight promise instead of racing
|
|
427
|
+
// module-record instantiation under the tsx loader.
|
|
428
|
+
const mod = await loadLiveSessionModule();
|
|
374
429
|
if (typeof mod.createAgentSession !== "function") return { available: true, exitCode: 1, stdout: "", stderr: "createAgentSession export is unavailable.", jsonEvents: 0, error: "createAgentSession export is unavailable." };
|
|
375
430
|
let session: LiveSessionLike | undefined;
|
|
376
431
|
let unsubscribe: (() => void) | undefined;
|
|
@@ -393,6 +448,13 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
|
|
|
393
448
|
try {
|
|
394
449
|
const agentDir = typeof mod.getAgentDir === "function" ? mod.getAgentDir() : undefined;
|
|
395
450
|
let resourceLoader: unknown;
|
|
451
|
+
// F1 (v0.7.9) NOTE: `agent.excludeExtensions` is applied on the
|
|
452
|
+
// child-pi path (see `pi-args.ts`). The live-session path loads
|
|
453
|
+
// extensions via pi's `DefaultResourceLoader`, which has no explicit
|
|
454
|
+
// per-extension allow/deny API at the point we hand off. For
|
|
455
|
+
// v0.7.9, the denylist is honored on the default async path only;
|
|
456
|
+
// the live-session path (opt-in via `runtime.preferLiveSession`)
|
|
457
|
+
// ignores it. This is a documented limitation, not a silent bug.
|
|
396
458
|
if (mod.DefaultResourceLoader && agentDir) {
|
|
397
459
|
resourceLoader = new mod.DefaultResourceLoader({
|
|
398
460
|
cwd: input.task.cwd,
|
|
@@ -405,7 +467,7 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
|
|
|
405
467
|
});
|
|
406
468
|
await (resourceLoader as { reload?: () => Promise<void> }).reload?.();
|
|
407
469
|
}
|
|
408
|
-
const modelRouting = buildConfiguredModelRouting({ overrideModel: input.modelOverride, stepModel: input.step.model, teamRoleModel: input.teamRoleModel, agentModel: input.agent.model, fallbackModels: input.agent.fallbackModels, parentModel: input.parentModel, modelRegistry: input.modelRegistry, cwd: input.manifest.cwd });
|
|
470
|
+
const modelRouting = buildConfiguredModelRouting({ overrideModel: input.modelOverride, stepModel: input.step.model, teamRoleModel: input.teamRoleModel, agentModel: input.agent.model, fallbackModels: input.agent.fallbackModels, parentModel: input.parentModel, modelRegistry: input.modelRegistry, cwd: input.manifest.cwd, scopeModelsPatterns: await resolveScopeModelsPatterns(input.manifest.cwd) });
|
|
409
471
|
const resolvedModel = modelFromRegistry(input.modelRegistry, modelRouting.candidates[0] ?? modelRouting.requested) ?? input.parentModel;
|
|
410
472
|
// Phase 4: MCP proxy — will be determined after session creation
|
|
411
473
|
// (we check parent's MCP tools and share connections when available)
|
|
@@ -434,7 +496,7 @@ export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<
|
|
|
434
496
|
});
|
|
435
497
|
session = created.session;
|
|
436
498
|
appendEvent(input.manifest.eventsPath, { type: "live-session.session_created", runId: input.manifest.runId, taskId: input.task.id, data: { elapsedMs: Date.now() - sessionCreateStart, modelFallbackMessage: created.modelFallbackMessage } });
|
|
437
|
-
filterActiveTools(session, input.agent);
|
|
499
|
+
filterActiveTools(session, input.agent, input.task.role);
|
|
438
500
|
|
|
439
501
|
// Diagnostic: log before bindExtensions so we can identify extension-loading hangs
|
|
440
502
|
const bindExtensionsStart = Date.now();
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import * as fs from "node:fs";
|
|
2
2
|
import * as os from "node:os";
|
|
3
3
|
import * as path from "node:path";
|
|
4
|
+
import { errors } from "../errors.ts";
|
|
5
|
+
import { checkModelScope } from "./model-scope.ts";
|
|
4
6
|
import { fuzzyResolveModelId } from "./model-resolver.ts";
|
|
5
7
|
|
|
6
8
|
export interface AvailableModelInfo {
|
|
@@ -241,6 +243,15 @@ export interface ConfiguredModelRouting {
|
|
|
241
243
|
requested?: string;
|
|
242
244
|
candidates: string[];
|
|
243
245
|
reason?: string;
|
|
246
|
+
/**
|
|
247
|
+
* F7 scope gate verdict. Populated when the caller passed `scopeModelsPatterns`.
|
|
248
|
+
* - `inScope: true` → the resolved model is inside the allowlist (or no allowlist).
|
|
249
|
+
* - `inScope: false, source: "caller"` → caller override is out-of-scope; the
|
|
250
|
+
* function throws `errors.modelOutOfScope` (hard error before spawn) UNLESS
|
|
251
|
+
* the caller marked it as a frontmatter override (`isFrontmatterOverride: true`),
|
|
252
|
+
* in which case the verdict is returned for the caller to log as a warning.
|
|
253
|
+
*/
|
|
254
|
+
scopeVerdict?: import("./model-scope.ts").ModelScopeCheck;
|
|
244
255
|
}
|
|
245
256
|
|
|
246
257
|
export function buildConfiguredModelRouting(input: {
|
|
@@ -252,6 +263,19 @@ export function buildConfiguredModelRouting(input: {
|
|
|
252
263
|
parentModel?: unknown;
|
|
253
264
|
modelRegistry?: unknown;
|
|
254
265
|
cwd?: string;
|
|
266
|
+
/**
|
|
267
|
+
* F7: when set, enforce the enabledModels allowlist. Caller-supplied out-of-
|
|
268
|
+
* scope models throw `errors.modelOutOfScope`; frontmatter-pinned out-of-scope
|
|
269
|
+
* models are returned as a `scopeVerdict` for the caller to log.
|
|
270
|
+
*/
|
|
271
|
+
scopeModelsPatterns?: string[];
|
|
272
|
+
/**
|
|
273
|
+
* F7: when true, the `overrideModel` (if any) is treated as a frontmatter
|
|
274
|
+
* (agent) override rather than a per-spawn caller override — out-of-scope
|
|
275
|
+
* is a warning, not a hard error. Used when the agent config is the
|
|
276
|
+
* authoritative source.
|
|
277
|
+
*/
|
|
278
|
+
isFrontmatterOverride?: boolean;
|
|
255
279
|
}): ConfiguredModelRouting {
|
|
256
280
|
const registryModels = availableModelInfosFromRegistry(input.modelRegistry);
|
|
257
281
|
const configModels = configuredModelInfosFromPiConfig(input.cwd);
|
|
@@ -275,7 +299,21 @@ export function buildConfiguredModelRouting(input: {
|
|
|
275
299
|
: candidates.length > 1
|
|
276
300
|
? "configured Pi fallback chain"
|
|
277
301
|
: undefined;
|
|
278
|
-
|
|
302
|
+
// F7 scope gate: when `scopeModelsPatterns` is configured, check the
|
|
303
|
+
// resolved model. Caller-supplied (override/step/team role) out-of-scope
|
|
304
|
+
// is a HARD ERROR (we surface it via the verdict AND throw, so spawn aborts
|
|
305
|
+
// before any cost is incurred). Frontmatter-pinned out-of-scope is a
|
|
306
|
+
// WARNING returned on the verdict for the caller to log.
|
|
307
|
+
let scopeVerdict: ConfiguredModelRouting["scopeVerdict"];
|
|
308
|
+
if (input.scopeModelsPatterns && input.scopeModelsPatterns.length > 0) {
|
|
309
|
+
const resolved = candidates[0] ?? requested;
|
|
310
|
+
const source = input.overrideModel ? "caller" : input.agentModel ? "frontmatter" : "resolved";
|
|
311
|
+
scopeVerdict = checkModelScope(resolved, input.scopeModelsPatterns, source);
|
|
312
|
+
if (!scopeVerdict.inScope && source === "caller" && !input.isFrontmatterOverride) {
|
|
313
|
+
throw errors.modelOutOfScope(resolved ?? "", input.scopeModelsPatterns);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
return { requested, candidates, reason, scopeVerdict };
|
|
279
317
|
}
|
|
280
318
|
|
|
281
319
|
export function buildConfiguredModelCandidates(input: Parameters<typeof buildConfiguredModelRouting>[0]): string[] {
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* model-scope.ts — Opt-in model-scope enforcement (F7).
|
|
3
|
+
*
|
|
4
|
+
* When `runtime.reliability.scopeModels` is enabled, subagent model choices
|
|
5
|
+
* that fall outside the user's pi `enabledModels` allowlist are flagged:
|
|
6
|
+
* - Caller-supplied (per-spawn override / step / team role) out-of-scope
|
|
7
|
+
* → HARD ERROR to orchestrator (fail fast before spawn).
|
|
8
|
+
* - Frontmatter-pinned (AgentConfig.model) out-of-scope
|
|
9
|
+
* → WARNING + runs anyway (frontmatter is authoritative; the agent
|
|
10
|
+
* author made a deliberate choice).
|
|
11
|
+
*
|
|
12
|
+
* Pattern semantics match pi's `--models` CLI / `enabledModels` allowlist:
|
|
13
|
+
* - `"anthropic/claude-opus-4-5"` — exact match (case-insensitive).
|
|
14
|
+
* - `"claude-*"`, `"*sonnet*"`, `"github-copilot/*"` — glob (single `*`).
|
|
15
|
+
* - Any other string — case-insensitive substring fallback (pi's
|
|
16
|
+
* `tryMatchModel` behavior, model-resolver.ts).
|
|
17
|
+
*
|
|
18
|
+
* This module is pure (no I/O, no globals). Reading the actual
|
|
19
|
+
* `enabledModels` from pi's settings is the caller's job (instantiate
|
|
20
|
+
* `SettingsManager.create(cwd, agentDir).getEnabledModels()`).
|
|
21
|
+
*
|
|
22
|
+
* The toggle itself lives in `config/defaults.ts` (`reliability.scopeModels`,
|
|
23
|
+
* default `false` = opt-in, fully back-compat).
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
export type ModelScopeSource = "caller" | "frontmatter" | "resolved" | "fallback";
|
|
27
|
+
|
|
28
|
+
export interface ModelScopeCheck {
|
|
29
|
+
/** True when the model is in scope, or no allowlist is configured. */
|
|
30
|
+
inScope: boolean;
|
|
31
|
+
/** What the model came from. Informational; the gate decision lives in `enforce`. */
|
|
32
|
+
source: ModelScopeSource;
|
|
33
|
+
/** The model id that was checked. */
|
|
34
|
+
model: string;
|
|
35
|
+
/** The pattern(s) that matched, or undefined when no allowlist was configured. */
|
|
36
|
+
matchedPattern?: string;
|
|
37
|
+
/** Human-readable reason for out-of-scope (caller-facing when rejected). */
|
|
38
|
+
reason?: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Convert a glob pattern with `*` wildcards into a RegExp.
|
|
43
|
+
* Escape all regex meta-characters except `*`, which becomes `.*`.
|
|
44
|
+
* Anchored (^...$) and case-insensitive.
|
|
45
|
+
*/
|
|
46
|
+
export function patternToRegExp(pattern: string): RegExp {
|
|
47
|
+
const escaped = pattern.replace(/[.+?^${}()|[\]\\]/g, "\\$&");
|
|
48
|
+
return new RegExp(`${escaped.replace(/\*/g, ".*")}`, "i");
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Does a model id match a single allowlist pattern?
|
|
53
|
+
* Semantics (in order):
|
|
54
|
+
* 1. Exact case-insensitive match.
|
|
55
|
+
* 2. Glob match (pattern contains `*`).
|
|
56
|
+
* 3. Case-insensitive substring match (pi's fallback).
|
|
57
|
+
* Returns true on first hit; false otherwise.
|
|
58
|
+
*/
|
|
59
|
+
export function matchesModelPattern(modelId: string, pattern: string): boolean {
|
|
60
|
+
if (!modelId || !pattern) return false;
|
|
61
|
+
const id = modelId.trim();
|
|
62
|
+
const pat = pattern.trim();
|
|
63
|
+
if (!id || !pat) return false;
|
|
64
|
+
if (id.toLowerCase() === pat.toLowerCase()) return true;
|
|
65
|
+
if (pat.includes("*")) {
|
|
66
|
+
try {
|
|
67
|
+
return patternToRegExp(pat).test(id);
|
|
68
|
+
} catch {
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return id.toLowerCase().includes(pat.toLowerCase());
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Is the model id accepted by ANY of the allowlist patterns?
|
|
77
|
+
* Returns false when patterns is empty/undefined (caller treats as "no scope").
|
|
78
|
+
*/
|
|
79
|
+
export function isModelInScope(modelId: string | undefined, patterns: readonly string[] | undefined): boolean {
|
|
80
|
+
if (!modelId || !patterns || patterns.length === 0) return false;
|
|
81
|
+
return patterns.some((p) => matchesModelPattern(modelId, p));
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Check a model against the allowlist and return a verdict.
|
|
86
|
+
* Returns `inScope: true` with no `reason` when no allowlist is configured
|
|
87
|
+
* (so callers can no-op cleanly).
|
|
88
|
+
*/
|
|
89
|
+
export function checkModelScope(
|
|
90
|
+
modelId: string | undefined,
|
|
91
|
+
patterns: readonly string[] | undefined,
|
|
92
|
+
source: ModelScopeSource,
|
|
93
|
+
): ModelScopeCheck {
|
|
94
|
+
if (!modelId) {
|
|
95
|
+
return { inScope: true, source, model: "", reason: "no model specified" };
|
|
96
|
+
}
|
|
97
|
+
if (!patterns || patterns.length === 0) {
|
|
98
|
+
// No allowlist → not enforcing. The toggle is opt-in; the user hasn't
|
|
99
|
+
// configured `enabledModels` so there is nothing to enforce against.
|
|
100
|
+
return { inScope: true, source, model: modelId };
|
|
101
|
+
}
|
|
102
|
+
for (const pattern of patterns) {
|
|
103
|
+
if (matchesModelPattern(modelId, pattern)) {
|
|
104
|
+
return { inScope: true, source, model: modelId, matchedPattern: pattern };
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return {
|
|
108
|
+
inScope: false,
|
|
109
|
+
source,
|
|
110
|
+
model: modelId,
|
|
111
|
+
reason: `model "${modelId}" is not in enabledModels allowlist (${patterns.join(", ")})`,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Read the user's `enabledModels` allowlist from pi's SettingsManager.
|
|
117
|
+
* Returns an empty array when the SettingsManager export is unavailable, the
|
|
118
|
+
* allowlist is unset, or any error occurs (best-effort, never throws). The
|
|
119
|
+
* caller should still gate on `runtime.reliability.scopeModels` — an empty
|
|
120
|
+
* patterns array is a no-op (nothing to enforce against).
|
|
121
|
+
*
|
|
122
|
+
* @internal Only the runtime spawn layers should call this. Pure module: pure
|
|
123
|
+
* function over a cwd + optional agentDir.
|
|
124
|
+
*/
|
|
125
|
+
export async function readEnabledModelsPatterns(cwd: string, agentDir?: string): Promise<string[]> {
|
|
126
|
+
try {
|
|
127
|
+
// Match the pattern live-session-runtime.ts:428 uses to bridge to pi's
|
|
128
|
+
// SDK. SettingsManager is dynamically imported because the module
|
|
129
|
+
// shape differs across pi versions; the create() factory is the
|
|
130
|
+
// canonical, version-stable entry point.
|
|
131
|
+
const mod = await import("@earendil-works/pi-coding-agent" as string).catch(() => null);
|
|
132
|
+
if (!mod) return [];
|
|
133
|
+
const SettingsManagerCtor = (mod as { SettingsManager?: { create?: (cwd: string, agentDir?: string) => { getEnabledModels?: () => string[] | undefined } } }).SettingsManager;
|
|
134
|
+
if (!SettingsManagerCtor?.create) return [];
|
|
135
|
+
const sm = SettingsManagerCtor.create(cwd, agentDir);
|
|
136
|
+
const patterns = sm.getEnabledModels?.();
|
|
137
|
+
return Array.isArray(patterns) ? patterns : [];
|
|
138
|
+
} catch {
|
|
139
|
+
return [];
|
|
140
|
+
}
|
|
141
|
+
}
|
package/src/runtime/pi-args.ts
CHANGED
|
@@ -3,7 +3,7 @@ import * as os from "node:os";
|
|
|
3
3
|
import * as path from "node:path";
|
|
4
4
|
import { fileURLToPath } from "node:url";
|
|
5
5
|
import type { AgentConfig } from "../agents/agent-config.ts";
|
|
6
|
-
import {
|
|
6
|
+
import { resolveToolPolicy } from "../agents/agent-config.ts";
|
|
7
7
|
import { userPiRoot } from "../utils/paths.ts";
|
|
8
8
|
|
|
9
9
|
const THINKING_LEVELS = ["off", "minimal", "low", "medium", "high", "xhigh"];
|
|
@@ -257,10 +257,17 @@ export function buildPiWorkerArgs(input: BuildPiWorkerArgsInput): BuildPiWorkerA
|
|
|
257
257
|
}
|
|
258
258
|
|
|
259
259
|
// Apply role-based tool restrictions (from role-tools.ts)
|
|
260
|
-
//
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
260
|
+
// F1 unify (v0.8.0): the tool policy is resolved by the shared
|
|
261
|
+
// `resolveToolPolicy` helper (same code as the live-session path), so the
|
|
262
|
+
// two spawn paths agree. Before this, child-pi used role-config
|
|
263
|
+
// authoritative and ignored `agent.disallowedTools`; live-session used
|
|
264
|
+
// frontmatter authoritative and ignored role-config. Now:
|
|
265
|
+
// - allowlist precedence is source-aware (builtin → role authoritative;
|
|
266
|
+
// user/project → frontmatter authoritative)
|
|
267
|
+
// - denylist is additive (role excludeTools + agent disallowedTools merged)
|
|
268
|
+
const policy = resolveToolPolicy(input.agent, input.role);
|
|
269
|
+
const explicitTools = policy.tools;
|
|
270
|
+
const excludeTools = policy.excludeTools;
|
|
264
271
|
|
|
265
272
|
if (explicitTools?.length) args.push("--tools", explicitTools.join(","));
|
|
266
273
|
if (excludeTools?.length) args.push("--exclude-tools", excludeTools.join(","));
|
|
@@ -268,7 +275,15 @@ export function buildPiWorkerArgs(input: BuildPiWorkerArgsInput): BuildPiWorkerA
|
|
|
268
275
|
// User extensions in ~/.pi/agent/extensions/ may fail due to missing dependencies.
|
|
269
276
|
args.push("--no-extensions");
|
|
270
277
|
if (input.agent.extensions !== undefined) {
|
|
271
|
-
|
|
278
|
+
// F1 (v0.7.9): apply `excludeExtensions` denylist (case-insensitive
|
|
279
|
+
// basename match) BEFORE the trusted PROMPT_RUNTIME_EXTENSION_PATH is
|
|
280
|
+
// prepended. The prompt-runtime is a pi-crew internal and is never
|
|
281
|
+
// excludable. Unknown names in the denylist are tolerated (logged
|
|
282
|
+
// would be nice but this path is sync and minimal — keeping parity
|
|
283
|
+
// with the rest of the agent loader's best-effort semantics).
|
|
284
|
+
const excluded = new Set((input.agent.excludeExtensions ?? []).map((name) => path.basename(name).toLowerCase()));
|
|
285
|
+
const allowed = input.agent.extensions.filter((ext) => !excluded.has(path.basename(ext).toLowerCase()));
|
|
286
|
+
for (const extension of [PROMPT_RUNTIME_EXTENSION_PATH, ...allowed]) args.push("--extension", extension);
|
|
272
287
|
} else {
|
|
273
288
|
args.push("--extension", PROMPT_RUNTIME_EXTENSION_PATH);
|
|
274
289
|
}
|
|
@@ -162,6 +162,40 @@ export function adjustConfidence(current: number, passed: boolean): number {
|
|
|
162
162
|
return Math.max(0.1, Math.min(0.95, current + delta)); // Clamp to [0.1, 0.95]
|
|
163
163
|
}
|
|
164
164
|
|
|
165
|
+
/**
|
|
166
|
+
* Compute the rolling confidence to RECORD for a new activation of `skillId`,
|
|
167
|
+
* given the existing activations for that skill in the run.
|
|
168
|
+
*
|
|
169
|
+
* Bug fixed (T7, v0.8.2): the `task_completed`/`task_failed` hooks used to
|
|
170
|
+
* hardcode `confidence: computeInitialConfidence(1)` (= 0.3) on every write,
|
|
171
|
+
* which made `adjustConfidence` dead code — every skill stayed stuck at ~0.3
|
|
172
|
+
* regardless of how often it succeeded or failed. The whole confidence
|
|
173
|
+
* system was effectively inert.
|
|
174
|
+
*
|
|
175
|
+
* Now the recorded confidence is the PRIOR rolling confidence for that skill
|
|
176
|
+
* adjusted by this outcome (+0.05 success / -0.1 failure), so the stored
|
|
177
|
+
* value evolves over the run. First activation seeds from the observation
|
|
178
|
+
* count (which is 0 prior + this one = 1 observation -> initial 0.3). This
|
|
179
|
+
* preserves the existing `adjustConfidence` clamp range [0.1, 0.95] and the
|
|
180
|
+
* existing tests (which assert on the stored numeric values).
|
|
181
|
+
*/
|
|
182
|
+
export function computeNextActivationConfidence(
|
|
183
|
+
skillId: string,
|
|
184
|
+
activations: SkillActivation[],
|
|
185
|
+
passed: boolean,
|
|
186
|
+
): number {
|
|
187
|
+
const prior = activations.filter((a) => a.skillId === skillId);
|
|
188
|
+
if (prior.length === 0) {
|
|
189
|
+
// First activation of this skill in the run: seed by observation count.
|
|
190
|
+
// (computeInitialConfidence(1) = 0.3 — the tentative floor.)
|
|
191
|
+
return computeInitialConfidence(1);
|
|
192
|
+
}
|
|
193
|
+
// Rolling confidence = last recorded confidence for this skill, adjusted.
|
|
194
|
+
const lastConfidence = prior[prior.length - 1]?.confidence
|
|
195
|
+
?? computeInitialConfidence(prior.length);
|
|
196
|
+
return adjustConfidence(lastConfidence, passed);
|
|
197
|
+
}
|
|
198
|
+
|
|
165
199
|
/**
|
|
166
200
|
* Apply decay to confidence for skills not observed recently.
|
|
167
201
|
*/
|
|
@@ -424,7 +458,13 @@ export function registerSkillEffectivenessHooks(): void {
|
|
|
424
458
|
if (hooksRegistered) return;
|
|
425
459
|
hooksRegistered = true;
|
|
426
460
|
|
|
427
|
-
// Track task completion for skill effectiveness
|
|
461
|
+
// Track task completion for skill effectiveness.
|
|
462
|
+
// T7 (v0.8.2): record the ROLLING adjusted confidence, not a hardcoded
|
|
463
|
+
// 0.3. computeNextActivationConfidence seeds the first activation and
|
|
464
|
+
// applies adjustConfidence (+0.05 success / -0.1 failure) on subsequent
|
|
465
|
+
// ones, so the stored confidence evolves across the run. Before this fix
|
|
466
|
+
// every activation was written with confidence 0.3, which made
|
|
467
|
+
// adjustConfidence dead code and left every skill stuck at ~0.3.
|
|
428
468
|
crewHooks.register("task_completed", (event) => {
|
|
429
469
|
const { taskId, runId, data } = event;
|
|
430
470
|
if (!taskId || !runId) return;
|
|
@@ -433,8 +473,19 @@ export function registerSkillEffectivenessHooks(): void {
|
|
|
433
473
|
const skillNames = (data?.skills as string[]) ?? [];
|
|
434
474
|
const success = (data?.status as string) === "completed";
|
|
435
475
|
|
|
436
|
-
//
|
|
476
|
+
// cwd comes from the event payload (set by callers) so that the
|
|
477
|
+
// activation lands in the correct .pi/teams/ or .crew/state/runs/
|
|
478
|
+
// (see issue #29).
|
|
479
|
+
const eventCwd = (data?.cwd as string) ?? process.cwd();
|
|
480
|
+
const existingActivations = getSkillActivations(eventCwd, runId);
|
|
481
|
+
|
|
482
|
+
// Record each skill activation with its rolling confidence
|
|
437
483
|
for (const skillId of skillNames) {
|
|
484
|
+
const confidence = computeNextActivationConfidence(
|
|
485
|
+
skillId,
|
|
486
|
+
existingActivations,
|
|
487
|
+
success,
|
|
488
|
+
);
|
|
438
489
|
const activation: SkillActivation = {
|
|
439
490
|
id: `act-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
|
440
491
|
skillId,
|
|
@@ -443,23 +494,46 @@ export function registerSkillEffectivenessHooks(): void {
|
|
|
443
494
|
taskId,
|
|
444
495
|
timestamp: new Date().toISOString(),
|
|
445
496
|
passed: success,
|
|
446
|
-
confidence
|
|
497
|
+
confidence,
|
|
447
498
|
};
|
|
448
|
-
// cwd comes from the event payload (set by callers) so that the
|
|
449
|
-
// activation lands in the correct .pi/teams/ or .crew/state/runs/
|
|
450
|
-
// (see issue #29).
|
|
451
|
-
const eventCwd = (data?.cwd as string) ?? process.cwd();
|
|
452
499
|
recordSkillActivation(eventCwd, activation);
|
|
453
500
|
}
|
|
454
501
|
});
|
|
455
502
|
|
|
456
|
-
// Track task failures
|
|
503
|
+
// Track task failures.
|
|
504
|
+
// T7 (v0.8.2): this used to be a no-op ("handled by computeSkillMetrics"),
|
|
505
|
+
// but computeSkillMetrics derives passRate from recorded activations —
|
|
506
|
+
// and since failed tasks recorded NOTHING, the failure never fed back
|
|
507
|
+
// into the confidence/decay loop. Now we record a `passed:false`
|
|
508
|
+
// activation for each skill tied to the failed task, which both lowers
|
|
509
|
+
// passRate AND triggers the -0.1 contradicting delta via
|
|
510
|
+
// computeNextActivationConfidence on the next recorded activation.
|
|
457
511
|
crewHooks.register("task_failed", (event) => {
|
|
458
512
|
const { taskId, runId, data } = event;
|
|
459
513
|
if (!taskId || !runId) return;
|
|
460
514
|
|
|
461
|
-
|
|
462
|
-
|
|
515
|
+
const skillNames = (data?.skills as string[]) ?? [];
|
|
516
|
+
const eventCwd = (data?.cwd as string) ?? process.cwd();
|
|
517
|
+
const existingActivations = getSkillActivations(eventCwd, runId);
|
|
518
|
+
|
|
519
|
+
for (const skillId of skillNames) {
|
|
520
|
+
const confidence = computeNextActivationConfidence(
|
|
521
|
+
skillId,
|
|
522
|
+
existingActivations,
|
|
523
|
+
false,
|
|
524
|
+
);
|
|
525
|
+
const activation: SkillActivation = {
|
|
526
|
+
id: `act-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
|
527
|
+
skillId,
|
|
528
|
+
role: (data?.role as string) ?? "unknown",
|
|
529
|
+
runId,
|
|
530
|
+
taskId,
|
|
531
|
+
timestamp: new Date().toISOString(),
|
|
532
|
+
passed: false,
|
|
533
|
+
confidence,
|
|
534
|
+
};
|
|
535
|
+
recordSkillActivation(eventCwd, activation);
|
|
536
|
+
}
|
|
463
537
|
});
|
|
464
538
|
}
|
|
465
539
|
|