npm - @fiale-plus/pi-rogue - Versions diffs - 0.2.2 → 0.2.4 - Mend

@fiale-plus/pi-rogue 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/node_modules/@fiale-plus/pi-rogue-router/src/reports.test.ts ADDED Viewed

@@ -0,0 +1,92 @@
+import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { describe, expect, it } from "vitest";
+import { writeRouterReport } from "./reports.js";
+function tempFile(name: string): string {
+  return join(mkdtempSync(join(tmpdir(), "pi-router-report-")), name);
+}
+const rawRef = { schema: "pi-router.raw-session-ref.v1", path: "/tmp/session.jsonl", fromEvent: 0, toEvent: 1, fromByte: 0, toByte: 1, contentHash: "hash" };
+function event(action = "run_verifier") {
+  return {
+    schema: "pi-router.route-event.v1",
+    eventId: "event-1",
+    recordedAt: "2026-06-14T00:00:00.000Z",
+    checkpointId: "checkpoint-1",
+    sessionId: "session-1",
+    rawSessionRef: rawRef,
+    sourceEvent: { index: 0, timestamp: null },
+    decision: { schema: "pi-router.route-decision.v1", decisionId: "decision-1", checkpointId: "checkpoint-1", action, reason: "test", confidence: 0.5, policyVersion: "test", alternatives: [] },
+    runtime: { activeModel: "qwen", provider: "local", contextTokensApprox: 1000, gitDirty: true },
+    observed: { followed: false, overriddenBy: "continue_current" },
+    metrics: { loopScore: 0.2, progressScore: 0.8, sameCommandRepeatedCount: 1, sameErrorRepeatedCount: 0, verifierUsed: true, diffLines: 10, diffFilesChanged: 1 },
+  };
+}
+function outcome() {
+  return {
+    schema: "pi-router.outcome.v1",
+    outcomeId: "outcome-1",
+    recordedAt: "2026-06-14T00:00:00.000Z",
+    sessionId: "session-1",
+    checkpointId: "checkpoint-1",
+    routeEventId: "event-1",
+    taskType: "implementation",
+    taskStatus: "success",
+    testsPassedAfter: true,
+    verifierImproved: true,
+    acceptedDiff: true,
+    userInterrupted: false,
+    userOverrodeDecision: true,
+    finalFilesTouched: 1,
+    finalDiffLines: 10,
+    wallTimeMs: null,
+    cloudCostUsd: null,
+    frontierCalls: 0,
+    localTurns: 2,
+    reworkTurns: 0,
+    evidence: { source: "manual", rawSessionRef: rawRef, routeEventId: "event-1", notesHash: "notes" },
+  };
+}
+function trainingRow(label: "continue" | "intervene" | "unknown") {
+  return {
+    schema: "pi-router.training-row.v1",
+    checkpointId: `checkpoint-${label}`,
+    sessionId: "session-1",
+    rawSessionRef: rawRef,
+    features: { phase: "implementation", activeModel: "qwen", provider: "local", contextTokensApprox: 1000, sameCommandRepeatedCount: 1, sameErrorRepeatedCount: 0, loopScore: 0.1, progressScore: 0.9, verifierUsed: true, noVerifierUsed: false, diffLines: 10, diffFilesChanged: 1, diffChurnScore: 0.01, filesTouched: 1 },
+    labels: { routeAction: label === "unknown" ? null : "continue_current", binaryGate: label, source: label === "unknown" ? "unknown" : "teacher", confidence: label === "unknown" ? null : 0.8 },
+    outcome: { taskStatus: "unknown", testsPassedAfter: null, acceptedDiff: null, userOverrodeDecision: null, reworkTurns: null },
+    provenance: { localRuleAction: "continue_current", excludedLocalRuleAsTruth: label === "unknown" },
+  };
+}
+describe("router report", () => {
+  it("writes JSON and Markdown summaries", () => {
+    const eventsPath = tempFile("events.jsonl");
+    const outcomesPath = tempFile("outcomes.jsonl");
+    const rowsPath = tempFile("training.jsonl");
+    const gatePath = tempFile("gate-report.json");
+    const outputPath = tempFile("report.json");
+    const markdownPath = tempFile("report.md");
+    writeFileSync(eventsPath, `${JSON.stringify(event())}\n`);
+    writeFileSync(outcomesPath, `${JSON.stringify(outcome())}\n`);
+    writeFileSync(rowsPath, [JSON.stringify(trainingRow("continue")), JSON.stringify(trainingRow("unknown"))].join("\n") + "\n");
+    writeFileSync(gatePath, JSON.stringify({ schema: "pi-router.binary-gate-eval.v1", candidate: { accuracy: 0.8, f1: 0.7 }, ruleBaseline: { accuracy: 0.6, f1: 0.5 } }));
+    const report = writeRouterReport({ eventsPath, outcomesPath, trainingRowsPath: rowsPath, gateReportPath: gatePath, outputPath, markdownPath });
+    expect(report).toMatchObject({ schema: "pi-router.report.v1", routeEvents: { total: 1, mismatches: 1 }, outcomes: { total: 1, linked: 1 }, trainingRows: { total: 2, labeled: 1, localRuleExcluded: 1 } });
+    expect(JSON.parse(readFileSync(outputPath, "utf8")).schema).toBe("pi-router.report.v1");
+    expect(readFileSync(markdownPath, "utf8")).toContain("# Pi router report");
+  });
+  it("requires at least one report input and rejects missing provided inputs", () => {
+    expect(() => writeRouterReport({ outputPath: tempFile("report.json") })).toThrow(/requires at least one input/);
+    expect(() => writeRouterReport({ eventsPath: tempFile("missing-events.jsonl"), outputPath: tempFile("report.json") })).toThrow(/report input file not found/);
+  });
+});

package/node_modules/@fiale-plus/pi-rogue-router/src/reports.ts ADDED Viewed

@@ -0,0 +1,116 @@
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { dirname, resolve } from "node:path";
+import { readTrainingRows } from "./binary-gate.js";
+import { readRouteEvents, type RouteEvent } from "./ledger.js";
+import { readOutcomes, type RouterOutcome } from "./outcomes.js";
+export const ROUTER_REPORT_SCHEMA = "pi-router.report.v1" as const;
+export interface RouterReport {
+  schema: typeof ROUTER_REPORT_SCHEMA;
+  generatedAt: string;
+  inputs: { events?: string; outcomes?: string; gateReport?: string; trainingRows?: string };
+  routeEvents: { total: number; byAction: Record<string, number>; byModel: Record<string, number>; mismatches: number };
+  outcomes: { total: number; byStatus: Record<string, number>; linked: number; missingEvidence: number };
+  trainingRows: { total: number; labeled: number; unlabeled: number; localRuleExcluded: number; byGate: Record<string, number> };
+  gate?: unknown;
+}
+function increment(map: Record<string, number>, key: string): void {
+  map[key] = (map[key] ?? 0) + 1;
+}
+function routeSummary(events: RouteEvent[]): RouterReport["routeEvents"] {
+  const byAction: Record<string, number> = {};
+  const byModel: Record<string, number> = {};
+  let mismatches = 0;
+  for (const event of events) {
+    increment(byAction, event.decision.action);
+    increment(byModel, event.runtime.activeModel ?? "unknown");
+    if (event.observed.followed === false || event.observed.overriddenBy) mismatches++;
+  }
+  return { total: events.length, byAction, byModel, mismatches };
+}
+function outcomeSummary(outcomes: RouterOutcome[]): RouterReport["outcomes"] {
+  const byStatus: Record<string, number> = {};
+  let linked = 0;
+  let missingEvidence = 0;
+  for (const outcome of outcomes) {
+    increment(byStatus, outcome.taskStatus);
+    if (outcome.routeEventId || outcome.checkpointId) linked++;
+    if (!outcome.evidence.rawSessionRef && !outcome.evidence.notesHash) missingEvidence++;
+  }
+  return { total: outcomes.length, byStatus, linked, missingEvidence };
+}
+function trainingSummary(rowsPath?: string): RouterReport["trainingRows"] {
+  if (!rowsPath) return { total: 0, labeled: 0, unlabeled: 0, localRuleExcluded: 0, byGate: {} };
+  const rows = readTrainingRows(rowsPath);
+  const byGate: Record<string, number> = {};
+  let labeled = 0;
+  let localRuleExcluded = 0;
+  for (const row of rows) {
+    increment(byGate, row.labels.binaryGate);
+    if (row.labels.binaryGate === "unknown") localRuleExcluded += row.provenance.excludedLocalRuleAsTruth ? 1 : 0;
+    else labeled++;
+  }
+  return { total: rows.length, labeled, unlabeled: rows.length - labeled, localRuleExcluded, byGate };
+}
+function readJson(path?: string): unknown {
+  if (!path) return undefined;
+  if (!existsSync(resolve(path))) throw new Error(`report input file not found: ${path}`);
+  return JSON.parse(readFileSync(resolve(path), "utf8"));
+}
+function markdown(report: RouterReport): string {
+  const gate = report.gate && typeof report.gate === "object" ? report.gate as { candidate?: { accuracy?: number; f1?: number }; ruleBaseline?: { accuracy?: number; f1?: number } } : undefined;
+  const lines = [
+    "# Pi router report",
+    "",
+    `- generatedAt: ${report.generatedAt}`,
+    `- route events: ${report.routeEvents.total}`,
+    `- route mismatches/overrides: ${report.routeEvents.mismatches}`,
+    `- outcomes: ${report.outcomes.total}`,
+    `- training rows: ${report.trainingRows.total} (${report.trainingRows.labeled} labeled, ${report.trainingRows.unlabeled} unlabeled)`,
+    `- local-rule labels excluded: ${report.trainingRows.localRuleExcluded}`,
+    "",
+    "## Route actions",
+    ...Object.entries(report.routeEvents.byAction).sort().map(([key, value]) => `- ${key}: ${value}`),
+    "",
+    "## Outcome status",
+    ...Object.entries(report.outcomes.byStatus).sort().map(([key, value]) => `- ${key}: ${value}`),
+  ];
+  if (gate) {
+    lines.push("", "## Gate eval", `- candidate accuracy/f1: ${gate.candidate?.accuracy ?? "n/a"}/${gate.candidate?.f1 ?? "n/a"}`, `- rule baseline accuracy/f1: ${gate.ruleBaseline?.accuracy ?? "n/a"}/${gate.ruleBaseline?.f1 ?? "n/a"}`);
+  }
+  return `${lines.join("\n")}\n`;
+}
+export function buildRouterReport(options: { eventsPath?: string; outcomesPath?: string; trainingRowsPath?: string; gateReportPath?: string; generatedAt?: string }): RouterReport {
+  if (options.eventsPath && !existsSync(resolve(options.eventsPath))) throw new Error(`report input file not found: ${options.eventsPath}`);
+  const events = options.eventsPath ? readRouteEvents(options.eventsPath) : [];
+  const outcomes = options.outcomesPath ? readOutcomes(options.outcomesPath) : [];
+  return {
+    schema: ROUTER_REPORT_SCHEMA,
+    generatedAt: options.generatedAt ?? new Date().toISOString(),
+    inputs: { events: options.eventsPath, outcomes: options.outcomesPath, trainingRows: options.trainingRowsPath, gateReport: options.gateReportPath },
+    routeEvents: routeSummary(events),
+    outcomes: outcomeSummary(outcomes),
+    trainingRows: trainingSummary(options.trainingRowsPath),
+    gate: readJson(options.gateReportPath),
+  };
+}
+export function writeRouterReport(options: { outputPath: string; markdownPath?: string; eventsPath?: string; outcomesPath?: string; trainingRowsPath?: string; gateReportPath?: string }): RouterReport {
+  if (!options.eventsPath && !options.outcomesPath && !options.trainingRowsPath && !options.gateReportPath) throw new Error("router report requires at least one input file");
+  const report = buildRouterReport(options);
+  mkdirSync(dirname(resolve(options.outputPath)), { recursive: true });
+  writeFileSync(resolve(options.outputPath), `${JSON.stringify(report, null, 2)}\n`);
+  if (options.markdownPath) {
+    mkdirSync(dirname(resolve(options.markdownPath)), { recursive: true });
+    writeFileSync(resolve(options.markdownPath), markdown(report));
+  }
+  return report;
+}

package/node_modules/@fiale-plus/pi-rogue-router/src/sharpening.test.ts ADDED Viewed

@@ -0,0 +1,223 @@
+import { mkdtempSync, readFileSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { describe, expect, it } from "vitest";
+import { buildRouteEvent, type RouteEvent } from "./ledger.js";
+import { generateSharpeningHints, writeSharpeningHints } from "./sharpening.js";
+import type { RouterOutcome } from "./outcomes.js";
+import type { RouteAction, RouteDecision, RouterCheckpoint, TaskStatus } from "./types.js";
+function tempFile(name: string): string {
+  return join(mkdtempSync(join(tmpdir(), "pi-router-sharpening-")), name);
+}
+function checkpoint(id: string, model: string, provider: string, progressScore: number, loopScore: number): RouterCheckpoint {
+  return {
+    schema: "pi-router.checkpoint.v1",
+    sessionId: `session-${id.slice(0, 1)}`,
+    checkpointId: `session-${id.slice(0, 1)}:event-${id}`,
+    createdAt: "2026-06-14T00:00:00.000Z",
+    rawSessionRef: {
+      schema: "pi-router.raw-session-ref.v1",
+      path: "/tmp/raw-session-with-SECRET_TOKEN.jsonl",
+      fromEvent: 1,
+      toEvent: 2,
+      fromByte: 10,
+      toByte: 20,
+      contentHash: `hash-${id}`,
+    },
+    harness: "pi",
+    phase: "implementation",
+    activeModel: model,
+    provider,
+    features: {
+      turnIndex: 2,
+      sameCommandRepeatedCount: 1,
+      sameErrorRepeatedCount: 0,
+      errorChanged: true,
+      testsImproved: null,
+      filesTouched: 1,
+      diffLines: 12,
+      diffFilesChanged: 1,
+      diffLinesAdded: 8,
+      diffLinesDeleted: 4,
+      diffChurnScore: 0,
+      toolThrashScore: 0,
+      goalDriftScore: 0,
+      loopScore,
+      progressScore,
+      verifierUsed: true,
+      noVerifierUsed: false,
+      toolCallsLast10Turns: 3,
+      contextTokensApprox: 1000,
+      gitDirty: null,
+    },
+    recent: { touchedFileHashes: ["file-hash"] },
+    sourceEvent: { index: 2, byteStart: 10, byteEnd: 20, id: `event-${id}`, timestamp: "2026-06-14T00:00:01.000Z", type: "message", role: "toolResult" },
+  };
+}
+function decision(checkpointId: string, action: RouteAction): RouteDecision {
+  return {
+    schema: "pi-router.decision.v1",
+    checkpointId,
+    action,
+    adviceShape: "none",
+    contextPolicy: "none",
+    confidence: 0.75,
+    reason: "test decision",
+    policyVersion: "test-policy",
+  };
+}
+function event(id: string, action: RouteAction, model: string, provider: string, progressScore: number, loopScore: number): RouteEvent {
+  const item = checkpoint(id, model, provider, progressScore, loopScore);
+  return buildRouteEvent(item, decision(item.checkpointId, action), `2026-06-14T00:00:${id.padStart(2, "0")}.000Z`);
+}
+function outcomeFor(routeEvent: RouteEvent, status: TaskStatus): RouterOutcome {
+  return {
+    schema: "pi-router.outcome.v1",
+    outcomeId: `outcome-${routeEvent.eventId}`,
+    recordedAt: "2026-06-14T00:01:00.000Z",
+    sessionId: routeEvent.sessionId,
+    checkpointId: routeEvent.checkpointId,
+    routeEventId: routeEvent.eventId,
+    taskType: "implementation",
+    taskStatus: status,
+    testsPassedAfter: null,
+    verifierImproved: null,
+    acceptedDiff: null,
+    userInterrupted: false,
+    userOverrodeDecision: false,
+    finalFilesTouched: 1,
+    finalDiffLines: 12,
+    wallTimeMs: null,
+    cloudCostUsd: null,
+    frontierCalls: providerIsFrontier(routeEvent.runtime.activeModel) ? 1 : 0,
+    localTurns: providerIsFrontier(routeEvent.runtime.activeModel) ? 0 : 1,
+    reworkTurns: 0,
+    evidence: { source: "manual", routeEventId: routeEvent.eventId },
+  };
+}
+function providerIsFrontier(model?: string): boolean {
+  return Boolean(model && /gpt|claude|gemini/i.test(model));
+}
+describe("router sharpening hints", () => {
+  it("generates deterministic provenance-backed model preference hints without transcript content", () => {
+    const qwen = [
+      event("11", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.9, 0.05),
+      event("12", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.85, 0.1),
+      event("13", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.88, 0.08),
+      event("14", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.86, 0.12),
+      event("15", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.82, 0.1),
+    ];
+    const gpt = [
+      event("21", "run_verifier", "gpt-5.5", "openai-codex", 0.62, 0.35),
+      event("22", "run_verifier", "gpt-5.5", "openai-codex", 0.58, 0.4),
+      event("23", "run_verifier", "gpt-5.5", "openai-codex", 0.65, 0.32),
+      event("24", "run_verifier", "gpt-5.5", "openai-codex", 0.6, 0.38),
+      event("25", "run_verifier", "gpt-5.5", "openai-codex", 0.61, 0.37),
+    ];
+    const artifact = generateSharpeningHints({
+      events: [...gpt, ...qwen].reverse(),
+      outcomes: [...qwen.map((item) => outcomeFor(item, "success")), ...gpt.map((item) => outcomeFor(item, "partial"))],
+      generatedAt: "2026-06-14T00:02:00.000Z",
+      inputs: { events: "events.jsonl", outcomes: "outcomes.jsonl" },
+    });
+    expect(artifact).toMatchObject({
+      schema: "pi-router.sharpening-hints.v1",
+      generatedAt: "2026-06-14T00:02:00.000Z",
+      totals: { events: 10, outcomes: 10, sessions: 2, models: 2 },
+      learningPolicy: { scope: "repo-local", ignoresRawTranscript: true, fallback: "baseline-router" },
+      manualPromotionRequired: true,
+    });
+    const prefer = artifact.hints.find((hint) => hint.kind === "prefer_model_for_action");
+    expect(prefer).toMatchObject({
+      action: "run_verifier",
+      modelId: "qwen3.6-35b-a3b-128k",
+      provider: "local",
+      confidence: "medium",
+      guardrails: { manualPromotionOnly: true, sparse: true, autoUse: { eligible: false } },
+    });
+    expect(prefer?.provenance.comparedWith?.[0]).toMatchObject({ modelId: "gpt-5.5", provider: "openai-codex", events: 5 });
+    expect(JSON.stringify(artifact)).not.toContain("SECRET_TOKEN");
+    expect(JSON.stringify(artifact)).not.toContain("raw-session");
+  });
+  it("marks sparse hints low-confidence and sample-size capped", () => {
+    const local = [event("31", "continue_current", "qwen-local", "local", 0.9, 0.05)];
+    const cloud = [event("41", "continue_current", "gpt-5.5", "openai-codex", 0.4, 0.6)];
+    const artifact = generateSharpeningHints({ events: [...local, ...cloud], generatedAt: "2026-06-14T00:03:00.000Z" });
+    const hint = artifact.hints.find((item) => item.kind === "prefer_model_for_action");
+    expect(hint?.confidence).toBe("low");
+    expect(hint?.guardrails).toMatchObject({ sparse: true, sampleSizeCapped: true, manualPromotionOnly: true, autoUse: { eligible: false } });
+  });
+  it("requires cross-session outcome-backed evidence before future auto bias eligibility", () => {
+    const strongLocal = [
+      ...Array.from({ length: 10 }, (_, index) => event(`8${index}`, "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.9, 0.04)),
+      ...Array.from({ length: 10 }, (_, index) => event(`9${index}`, "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.88, 0.06)),
+    ];
+    const weakerCloud = [
+      ...Array.from({ length: 10 }, (_, index) => event(`a${index}`, "run_verifier", "gpt-5.5", "openai-codex", 0.58, 0.4)),
+      ...Array.from({ length: 10 }, (_, index) => event(`b${index}`, "run_verifier", "gpt-5.5", "openai-codex", 0.6, 0.38)),
+    ];
+    const artifact = generateSharpeningHints({
+      events: [...strongLocal, ...weakerCloud],
+      outcomes: [...strongLocal.map((item) => outcomeFor(item, "success")), ...weakerCloud.map((item) => outcomeFor(item, "partial"))],
+      generatedAt: "2026-06-14T00:03:30.000Z",
+    });
+    const hint = artifact.hints.find((item) => item.kind === "prefer_model_for_action" && item.modelId === "qwen3.6-35b-a3b-128k");
+    expect(hint?.confidence).toBe("high");
+    expect(hint?.guardrails).toMatchObject({ sparse: false, sampleSizeCapped: false, autoUse: { eligible: true } });
+  });
+  it("emits local savings candidates only as manual hints", () => {
+    const events = [
+      event("51", "continue_current", "qwen3.6-35b-a3b-128k", "local", 0.9, 0.05),
+      event("52", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.88, 0.08),
+      event("53", "summarize_context", "qwen3.6-35b-a3b-128k", "local", 0.85, 0.1),
+    ];
+    const artifact = generateSharpeningHints({ events, generatedAt: "2026-06-14T00:04:00.000Z" });
+    const savings = artifact.hints.find((hint) => hint.kind === "savings_candidate");
+    expect(savings).toMatchObject({ modelId: "qwen3.6-35b-a3b-128k", provider: "local", confidence: "low" });
+    expect(savings?.rationale).toContain("manual hint, not an automatic promotion");
+  });
+  it("suppresses savings candidates when linked outcomes are all poor", () => {
+    const events = [
+      event("71", "continue_current", "qwen3.6-35b-a3b-128k", "local", 0.95, 0.02),
+      event("72", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.93, 0.03),
+      event("73", "summarize_context", "qwen3.6-35b-a3b-128k", "local", 0.91, 0.04),
+      event("74", "continue_current", "qwen3.6-35b-a3b-128k", "local", 0.92, 0.03),
+      event("75", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.94, 0.02),
+    ];
+    const artifact = generateSharpeningHints({ events, outcomes: events.map((item) => outcomeFor(item, "failed")), generatedAt: "2026-06-14T00:04:30.000Z" });
+    expect(artifact.hints.find((hint) => hint.kind === "savings_candidate")).toBeUndefined();
+  });
+  it("writes sharpening hints and fails clearly for missing event inputs", () => {
+    const eventPath = tempFile("events.jsonl");
+    const outputPath = tempFile("hints.json");
+    const item = event("61", "run_verifier", "qwen3.6-35b-a3b-128k", "local", 0.9, 0.05);
+    writeFileSync(eventPath, `${JSON.stringify(item)}\n`);
+    const artifact = writeSharpeningHints({ eventsPath: eventPath, outputPath, generatedAt: "2026-06-14T00:05:00.000Z" });
+    expect(artifact.schema).toBe("pi-router.sharpening-hints.v1");
+    expect(JSON.parse(readFileSync(outputPath, "utf8")).schema).toBe("pi-router.sharpening-hints.v1");
+    expect(() => writeSharpeningHints({ eventsPath: "/tmp/pi-router-missing-events.jsonl", outputPath })).toThrow(/required route events file not found/);
+  });
+});