npm - @tracemarketplace/shared - Versions diffs - 0.0.10 → 0.0.11 - Mend

@tracemarketplace/shared 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

package/dist/extractor-claude-code.test.js +53 -0
package/dist/extractor-claude-code.test.js.map +1 -1
package/dist/extractor-codex.test.js +5 -0
package/dist/extractor-codex.test.js.map +1 -1
package/dist/extractors/claude-code.d.ts.map +1 -1
package/dist/extractors/claude-code.js +4 -4
package/dist/extractors/claude-code.js.map +1 -1
package/dist/extractors/codex.d.ts.map +1 -1
package/dist/extractors/codex.js +2 -0
package/dist/extractors/codex.js.map +1 -1
package/dist/extractors/common.d.ts +1 -2
package/dist/extractors/common.d.ts.map +1 -1
package/dist/extractors/common.js +2 -37
package/dist/extractors/common.js.map +1 -1
package/dist/extractors/common.test.d.ts +2 -0
package/dist/extractors/common.test.d.ts.map +1 -0
package/dist/extractors/common.test.js +17 -0
package/dist/extractors/common.test.js.map +1 -0
package/dist/extractors/cursor.d.ts.map +1 -1
package/dist/extractors/cursor.js +8 -0
package/dist/extractors/cursor.js.map +1 -1
package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -0
package/dist/index.js.map +1 -1
package/dist/redact.d.ts.map +1 -1
package/dist/redact.js +3 -1
package/dist/redact.js.map +1 -1
package/dist/redact.test.js +9 -0
package/dist/redact.test.js.map +1 -1
package/dist/scoring.d.ts +5 -3
package/dist/scoring.d.ts.map +1 -1
package/dist/scoring.fixtures.test.d.ts +2 -0
package/dist/scoring.fixtures.test.d.ts.map +1 -0
package/dist/scoring.fixtures.test.js +47 -0
package/dist/scoring.fixtures.test.js.map +1 -0
package/dist/scoring.js +381 -62
package/dist/scoring.js.map +1 -1
package/dist/scoring.test.js +125 -26
package/dist/scoring.test.js.map +1 -1
package/dist/tool-normalization.d.ts +66 -0
package/dist/tool-normalization.d.ts.map +1 -0
package/dist/tool-normalization.generated.d.ts +181 -0
package/dist/tool-normalization.generated.d.ts.map +1 -0
package/dist/tool-normalization.generated.js +261 -0
package/dist/tool-normalization.generated.js.map +1 -0
package/dist/tool-normalization.js +463 -0
package/dist/tool-normalization.js.map +1 -0
package/dist/tool-normalization.test.d.ts +2 -0
package/dist/tool-normalization.test.d.ts.map +1 -0
package/dist/tool-normalization.test.js +188 -0
package/dist/tool-normalization.test.js.map +1 -0
package/dist/types.d.ts +38 -1
package/dist/types.d.ts.map +1 -1
package/dist/validators.d.ts +23 -6
package/dist/validators.d.ts.map +1 -1
package/dist/validators.js +4 -0
package/dist/validators.js.map +1 -1
package/dist/validators.test.js +7 -0
package/dist/validators.test.js.map +1 -1
package/package.json +5 -5
package/scripts/generate-tool-normalization.mjs +16 -0
package/src/extractor-claude-code.test.ts +59 -0
package/src/extractor-codex.test.ts +5 -0
package/src/extractors/claude-code.ts +8 -4
package/src/extractors/codex.ts +2 -0
package/src/extractors/common.test.ts +21 -0
package/src/extractors/common.ts +15 -49
package/src/extractors/cursor.ts +9 -0
package/src/index.ts +1 -0
package/src/redact.test.ts +9 -0
package/src/redact.ts +3 -1
package/src/scoring.fixtures.test.ts +71 -0
package/src/scoring.test.ts +151 -26
package/src/scoring.ts +582 -84
package/src/tool-normalization.generated.ts +262 -0
package/src/tool-normalization.spec.json +205 -0
package/src/tool-normalization.test.ts +221 -0
package/src/tool-normalization.ts +670 -0
package/src/types.ts +50 -0
package/src/validators.test.ts +8 -0
package/src/validators.ts +8 -0

package/src/extractors/claude-code.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import type {
   TokenUsage,
 } from "../types.js";
 import { deriveTurnActors } from "../turn-actors.js";
+import { pushUniqueTextBlock } from "./common.js";
 export async function extractClaudeCode(
   sessionFilePath: string,
@@ -146,10 +147,11 @@ export async function extractClaudeCode(
         if (block.type === "text") {
           contentBlocks.push({ type: "text", text: block.text ?? "" });
         } else if (block.type === "thinking") {
-          contentBlocks.push({
-            type: "thinking",
-            text: block.thinking ?? block.text ?? "",
-          });
+          pushUniqueTextBlock(
+            contentBlocks,
+            "thinking",
+            block.thinking ?? block.text ?? "",
+          );
         } else if (block.type === "tool_use") {
           contentBlocks.push({
             type: "tool_use",
@@ -278,6 +280,8 @@ export async function extractClaudeCode(
       extraction_method: "passive",
     },
     score: null,
+    raw_json: { lines } as NonNullable<NormalizedTrace["raw_json"]>,
+    raw_json_format: "claude_code.jsonl",
     raw_r2_key: "",
     normalized_r2_key: "",
   };

package/src/extractors/codex.ts CHANGED Viewed

@@ -229,6 +229,8 @@ export async function extractCodex(
     content_fidelity: "full",
     env_state: createPassiveEnvState(),
     score: null,
+    raw_json: { events } as NonNullable<NormalizedTrace["raw_json"]>,
+    raw_json_format: "codex_cli.jsonl",
     raw_r2_key: "",
     normalized_r2_key: "",
   };

package/src/extractors/common.test.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import { describe, expect, it } from "vitest";
+import { isFileMutationTool, isShellToolName, isWriteShapedShellCommand } from "./common.js";
+describe("common extractor helpers", () => {
+  it("treats codex shell aliases as shell tools", () => {
+    expect(isShellToolName("bash_command")).toBe(true);
+    expect(isShellToolName("run_shell_command")).toBe(true);
+    expect(isShellToolName("functions.exec_command")).toBe(true);
+  });
+  it("treats replace as a file mutation tool", () => {
+    expect(isFileMutationTool("replace", { input: "/app/ars.R" })).toBe(true);
+    expect(isFileMutationTool("write_file", { input: "/app/ars.R" })).toBe(true);
+  });
+  it("unwraps wrapped shell commands for write-shape detection", () => {
+    expect(
+      isWriteShapedShellCommand('bash -lc "cd /app && sort -o tmp/out.txt src/input.txt"'),
+    ).toBe(true);
+  });
+});

package/src/extractors/common.ts CHANGED Viewed

@@ -1,54 +1,20 @@
+import {
+  isFileMutationTool,
+  isShellToolName,
+} from "../tool-normalization.js";
 import type { ContentBlock, EnvState, Turn } from "../types.js";
-const SHELL_TOOL_NAMES = ["exec_command", "bash", "shell", "write_stdin"];
-const FILE_MUTATION_TOOL_NAMES = [
-  "apply_patch",
-  "write_file",
-  "create_file",
-  "delete_file",
-  "rename_file",
-  "move_file",
-  "file_change",
-  "edit",
-  "edit_file",
-  "multiedit",
-  "write",
-];
-function normalizeToolName(toolName: string): string {
-  return toolName.trim().toLowerCase();
-}
-function toolNameMatches(toolName: string, candidate: string): boolean {
-  const normalized = normalizeToolName(toolName);
-  return normalized === candidate || normalized.endsWith(`.${candidate}`);
-}
-export function isShellToolName(toolName: string): boolean {
-  return SHELL_TOOL_NAMES.some((candidate) => toolNameMatches(toolName, candidate));
-}
-export function isFileMutationTool(
-  toolName: string,
-  toolInput: Record<string, unknown>,
-): boolean {
-  if (
-    FILE_MUTATION_TOOL_NAMES.some((candidate) => toolNameMatches(toolName, candidate))
-  ) {
-    return true;
-  }
-  if (!isShellToolName(toolName)) return false;
-  const command =
-    typeof toolInput.cmd === "string"
-      ? toolInput.cmd
-      : typeof toolInput.command === "string"
-        ? toolInput.command
-        : null;
-  return command !== null && /\bapply_patch\b|\bsed\s+-i\b|\bperl\s+-pi\b/.test(command);
-}
+export {
+  extractShellCommand,
+  getNormalizedToolHierarchy,
+  hasShellWriteRedirect,
+  isFileMutationTool,
+  isShellToolName,
+  isWriteShapedShellCommand,
+  listNormalizedToolsByFamily,
+  normalizeToolName,
+  toolNameMatches,
+} from "../tool-normalization.js";
 export function collectTraceMetrics(turns: Turn[]) {
   const allBlocks = turns.flatMap((turn) => turn.content);

package/src/extractors/cursor.ts CHANGED Viewed

@@ -265,6 +265,7 @@ export async function extractCursor(
     const turns: Turn[] = [];
     const openFiles: string[] = [];
+    const rawBubbles: unknown[] = [];
     let totalInputTokens = 0;
     let totalOutputTokens = 0;
@@ -275,6 +276,8 @@ export async function extractCursor(
       const blob = readCursorBlob(db, sessionId, bubbleId);
       if (!blob) continue;
+      rawBubbles.push({ header, blob });
       const role = cursorRole(header, blob);
       const tokenUsage = extractCursorTokenUsage(blob);
       if (tokenUsage) {
@@ -344,6 +347,12 @@ export async function extractCursor(
         open_files_in_editor: openFiles.length > 0 ? Array.from(new Set(openFiles)) : null,
       }),
       score: null,
+      raw_json: {
+        composerData,
+        headers,
+        bubbles: rawBubbles,
+      } as NonNullable<NormalizedTrace["raw_json"]>,
+      raw_json_format: "cursor.composer_disk_kv",
       raw_r2_key: "",
       normalized_r2_key: "",
     };

package/src/index.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 export * from "./types.js";
 export * from "./hash.js";
 export * from "./scoring.js";
+export * from "./tool-normalization.js";
 export * from "./utils.js";
 export * from "./validators.js";
 export * from "./redact.js";

package/src/redact.test.ts CHANGED Viewed

@@ -68,6 +68,11 @@ function makeTrace(): NormalizedTrace {
       extraction_method: "passive",
     },
     score: null,
+    raw_json: {
+      secret: "sk-proj-secret-secret-secret-secret",
+      path: "/Users/tester/project/.env",
+    },
+    raw_json_format: "claude_code.jsonl",
     raw_r2_key: "",
     normalized_r2_key: "",
   };
@@ -93,6 +98,10 @@ describe("redactTraceWithStats", () => {
     }
     expect(toolResult.result_content).toBe("Bearer [BEARER_TOKEN]");
     expect(result.trace.env_state?.inferred_file_tree?.[0]).toBe("~/project/.env");
+    expect(result.trace.raw_json).toEqual({
+      secret: "[OPENAI_KEY]",
+      path: "~/project/.env",
+    });
     expect(result.stats.changed).toBe(true);
     expect(result.stats.piiMatches).toBe(0);
     expect(result.stats.totalMatches).toBeGreaterThanOrEqual(3);

package/src/redact.ts CHANGED Viewed

@@ -230,6 +230,7 @@ export function redactTraceWithStats(
   const envFileTree = trace.env_state?.inferred_file_tree?.map((path) => redactStringWithStats(path, home)) ?? [];
   const envChangedFiles = trace.env_state?.inferred_changed_files?.map((path) => redactStringWithStats(path, home)) ?? [];
   const envErrorFiles = trace.env_state?.inferred_error_files?.map((path) => redactStringWithStats(path, home)) ?? [];
+  const rawJsonResult = redactUnknown(trace.raw_json ?? null, home);
   const envStats = mergeStats(
     ...envFileTree.map((entry) => entry.stats),
     ...envChangedFiles.map((entry) => entry.stats),
@@ -241,6 +242,7 @@ export function redactTraceWithStats(
       ...trace,
       submitted_by: "[redacted]",
       turns: turnResults.map((result) => result.turn),
+      raw_json: rawJsonResult.value as NormalizedTrace["raw_json"],
       env_state: trace.env_state
         ? {
             ...trace.env_state,
@@ -250,6 +252,6 @@ export function redactTraceWithStats(
           }
         : null,
     },
-    stats: mergeStats(...turnResults.map((result) => result.stats), envStats),
+    stats: mergeStats(...turnResults.map((result) => result.stats), envStats, rawJsonResult.stats),
   };
 }

package/src/scoring.fixtures.test.ts ADDED Viewed

@@ -0,0 +1,71 @@
+import { readFileSync } from "node:fs";
+import { describe, expect, it } from "vitest";
+import { scoreTrace } from "./scoring.js";
+import type { FailureMode, NormalizedTrace, SourceTool, TraceScore } from "./types.js";
+interface FixtureManifestEntry {
+  fixture_id: string;
+  trace_path: string;
+  description: string;
+  source_tool: SourceTool;
+  ts_completeness?: TraceScore["completeness"];
+  ts_failure_modes: FailureMode[];
+}
+interface FixtureManifest {
+  schema_version: number;
+  fixtures: FixtureManifestEntry[];
+}
+function loadManifest(): FixtureManifest {
+  return JSON.parse(
+    readFileSync(new URL("../../../fixtures/pipeline/manifest.json", import.meta.url), "utf8"),
+  ) as FixtureManifest;
+}
+function loadTrace(tracePath: string): NormalizedTrace {
+  return JSON.parse(
+    readFileSync(new URL(`../../../fixtures/pipeline/${tracePath}`, import.meta.url), "utf8"),
+  ) as NormalizedTrace;
+}
+describe("shared pipeline fixtures", () => {
+  const manifest = loadManifest();
+  it("covers the expected first-pass parity pack", () => {
+    expect(manifest.schema_version).toBe(1);
+    expect(manifest.fixtures.map((fixture) => fixture.fixture_id)).toEqual([
+      "eligibility_gap",
+      "bronze_floor_eligibility",
+      "matching_trigger_success",
+      "matching_trigger_failure",
+      "incomplete_trace_ineligible",
+      "bash_category_drift",
+      "sed_read_semantics",
+      "bash_text_filter_taxonomy",
+      "bash_text_filter_write_edges",
+      "recovery_without_recovery_text",
+      "long_context_without_mention",
+      "catastrophic_failure_boundary",
+      "codex_exec_command_collapse",
+      "codex_write_stdin_followup",
+      "codex_write_stdin_control_sequences",
+      "codex_write_stdin_interrupt_with_error_output",
+      "cursor_bronze_cap",
+      "text_only_no_exchange",
+    ]);
+  });
+  for (const fixture of manifest.fixtures) {
+    it(`${fixture.fixture_id} matches TypeScript failure expectations`, () => {
+      const trace = loadTrace(fixture.trace_path);
+      const score = scoreTrace(trace);
+      expect(trace.source_tool).toBe(fixture.source_tool);
+      expect([...score.failure_modes].sort()).toEqual([...fixture.ts_failure_modes].sort());
+      if (fixture.ts_completeness) {
+        expect(score.completeness).toBe(fixture.ts_completeness);
+      }
+    });
+  }
+});

package/src/scoring.test.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { describe, it, expect } from "vitest";
-import { detectFailureModes, checkCompleteness, scoreTrace } from "./scoring.js";
+import { detectFailureModes, checkCompleteness, deriveQualityTier, scoreTrace } from "./scoring.js";
 import type { NormalizedTrace, Turn, ContentBlock } from "./types.js";
 function makeTrace(overrides: Partial<NormalizedTrace> = {}): NormalizedTrace {
@@ -49,32 +49,62 @@ function makeTurn(role: "user" | "assistant", content: ContentBlock[]): Turn {
   };
 }
+function makeToolUse(
+  tool_call_id: string,
+  tool_name: string,
+  tool_input: Record<string, unknown>,
+): ContentBlock {
+  return { type: "tool_use", tool_call_id, tool_name, tool_input };
+}
+function makeToolResult(
+  tool_call_id: string,
+  is_error: boolean,
+  result_content: string,
+  exit_code: number | null,
+): ContentBlock {
+  return { type: "tool_result", tool_call_id, is_error, result_content, exit_code };
+}
 describe("detectFailureModes", () => {
   it("empty turns → no_failure", () => {
     const result = detectFailureModes(makeTrace({ turns: [] }));
     expect(result).toEqual(["no_failure"]);
   });
-  it("tool_result with is_error → tool_call_failure", () => {
+  it("failed exchange → tool_call_failure", () => {
     const turns = [
-      makeTurn("user", [{ type: "tool_result", tool_call_id: "t1", is_error: true, result_content: "err", exit_code: 1 }]),
+      makeTurn("user", [{ type: "text", text: "Run the tests." }]),
+      makeTurn("assistant", [
+        makeToolUse("t1", "functions.exec_command", { cmd: "pnpm test" }),
+        makeToolResult("t1", true, "test failure", 1),
+      ]),
     ];
     const result = detectFailureModes(makeTrace({ turns }));
     expect(result).toContain("tool_call_failure");
   });
-  it("same tool 3× consecutive → repeated_tool_calls", () => {
-    const toolUse = (n: number): ContentBlock => ({
-      type: "tool_use",
-      tool_call_id: `t${n}`,
-      tool_name: "bash",
-      tool_input: {},
-    });
+  it("repeated failing root cause across three exchanges is detected", () => {
     const turns = [
-      makeTurn("assistant", [toolUse(1), toolUse(2), toolUse(3)]),
+      makeTurn("user", [{ type: "text", text: "Run the tests." }]),
+      makeTurn("assistant", [
+        makeToolUse("t1", "functions.exec_command", { cmd: "pnpm test" }),
+        makeToolResult("t1", true, "FAIL src/app.test.ts\nTypeError: config is undefined", 1),
+      ]),
+      makeTurn("user", [{ type: "text", text: "Try again with a focused rerun." }]),
+      makeTurn("assistant", [
+        makeToolUse("t2", "functions.exec_command", { cmd: "pnpm test --runInBand" }),
+        makeToolResult("t2", true, "FAIL src/app.test.ts\nTypeError: config is undefined", 1),
+      ]),
+      makeTurn("user", [{ type: "text", text: "One more focused attempt." }]),
+      makeTurn("assistant", [
+        makeToolUse("t3", "functions.exec_command", { cmd: "vitest src/app.test.ts" }),
+        makeToolResult("t3", true, "FAIL src/app.test.ts\nTypeError: config is undefined", 1),
+      ]),
     ];
     const result = detectFailureModes(makeTrace({ turns }));
-    expect(result).toContain("repeated_tool_calls");
+    expect(result).toContain("repeated_failing_root_cause");
+    expect(result).toContain("catastrophic_failure");
   });
   it("context window text → context_limit_approached", () => {
@@ -85,24 +115,34 @@ describe("detectFailureModes", () => {
     expect(result).toContain("context_limit_approached");
   });
-  it("final turns all errors → catastrophic_failure", () => {
-    const errResult: ContentBlock = { type: "tool_result", tool_call_id: "t1", is_error: true, result_content: "fail", exit_code: 1 };
+  it("final failed exchange → catastrophic_failure", () => {
     const turns = [
-      makeTurn("user", [errResult]),
-      makeTurn("user", [errResult]),
+      makeTurn("user", [{ type: "text", text: "Run the failing command." }]),
+      makeTurn("assistant", [
+        makeToolUse("t1", "functions.exec_command", { cmd: "pnpm test" }),
+        makeToolResult("t1", true, "fail", 1),
+      ]),
     ];
     const result = detectFailureModes(makeTrace({ turns }));
     expect(result).toContain("catastrophic_failure");
   });
-  it("tool errors + later recovery text → graceful_recovery", () => {
-    const errResult: ContentBlock = { type: "tool_result", tool_call_id: "t1", is_error: true, result_content: "fail", exit_code: 1 };
+  it("failed exchange followed by successful exchange → graceful_recovery", () => {
     const turns = [
-      makeTurn("user", [errResult]),
-      makeTurn("assistant", [{ type: "text", text: "Let me try a different approach instead." }]),
+      makeTurn("user", [{ type: "text", text: "Run the tests." }]),
+      makeTurn("assistant", [
+        makeToolUse("t1", "functions.exec_command", { cmd: "pnpm test" }),
+        makeToolResult("t1", true, "fail", 1),
+      ]),
+      makeTurn("user", [{ type: "text", text: "Try a safer command." }]),
+      makeTurn("assistant", [
+        makeToolUse("t2", "functions.exec_command", { cmd: "pnpm test --runInBand" }),
+        makeToolResult("t2", false, "ok", 0),
+      ]),
     ];
     const result = detectFailureModes(makeTrace({ turns }));
     expect(result).toContain("graceful_recovery");
+    expect(result).toContain("tool_call_failure");
   });
 });
@@ -135,14 +175,22 @@ describe("scoreTrace", () => {
     const score = scoreTrace(makeTrace({ turns: [], content_fidelity: "chat_only" }));
     expect(score.completeness).toBe("malformed");
     expect(score.payout_cents).toBeLessThan(100);
+    expect(score.breakdown.component_count).toBeGreaterThan(0);
   });
   it("graceful_recovery + tool_call_failure → bonuses stack", () => {
     const baseScore = scoreTrace(makeTrace({ turns: [], content_fidelity: "full" }));
-    const errResult: ContentBlock = { type: "tool_result", tool_call_id: "t1", is_error: true, result_content: "fail", exit_code: 1 };
     const turns = [
-      makeTurn("user", [errResult]),
-      makeTurn("assistant", [{ type: "text", text: "Let me try a different approach instead." }]),
+      makeTurn("user", [{ type: "text", text: "Run the tests." }]),
+      makeTurn("assistant", [
+        makeToolUse("t1", "functions.exec_command", { cmd: "pnpm test" }),
+        makeToolResult("t1", true, "fail", 1),
+      ]),
+      makeTurn("user", [{ type: "text", text: "Try again." }]),
+      makeTurn("assistant", [
+        makeToolUse("t2", "functions.exec_command", { cmd: "pnpm test --runInBand" }),
+        makeToolResult("t2", false, "ok", 0),
+      ]),
     ];
     const bonusScore = scoreTrace(makeTrace({ turns, content_fidelity: "full" }));
     expect(bonusScore.payout_cents).toBeGreaterThan(baseScore.payout_cents);
@@ -151,10 +199,17 @@ describe("scoreTrace", () => {
   });
   it("total clamps to [0, 1]", () => {
-    const errResult: ContentBlock = { type: "tool_result", tool_call_id: "t1", is_error: true, result_content: "fail", exit_code: 1 };
     const turns = [
-      makeTurn("user", [errResult]),
-      makeTurn("assistant", [{ type: "text", text: "Let me try a different approach instead." }]),
+      makeTurn("user", [{ type: "text", text: "Run the tests." }]),
+      makeTurn("assistant", [
+        makeToolUse("t1", "functions.exec_command", { cmd: "pnpm test" }),
+        makeToolResult("t1", true, "fail", 1),
+      ]),
+      makeTurn("user", [{ type: "text", text: "Try again." }]),
+      makeTurn("assistant", [
+        makeToolUse("t2", "functions.exec_command", { cmd: "pnpm test --runInBand" }),
+        makeToolResult("t2", false, "ok", 0),
+      ]),
     ];
     const score = scoreTrace(makeTrace({ turns, content_fidelity: "full", total_input_tokens: 1000000, total_output_tokens: 1000000 }));
     expect(score.total).toBeGreaterThanOrEqual(0);
@@ -170,4 +225,74 @@ describe("scoreTrace", () => {
     const expected = Math.min(500, Math.round(score.total * 500));
     expect(score.payout_cents).toBe(expected);
   });
+  it("uses async label and novelty context when provided", () => {
+    const turns = [
+      makeTurn("user", [{ type: "text", text: "Run the tests." }]),
+      makeTurn("assistant", [
+        makeToolUse("t1", "functions.exec_command", { cmd: "pnpm test" }),
+        makeToolResult("t1", true, "fail", 1),
+      ]),
+      makeTurn("user", [{ type: "text", text: "Try again with a fix." }]),
+      makeTurn("assistant", [
+        makeToolUse("t2", "functions.exec_command", { cmd: "pnpm test --runInBand" }),
+        makeToolResult("t2", false, "ok", 0),
+      ]),
+    ];
+    const baseScore = scoreTrace(makeTrace({ turns, content_fidelity: "full" }));
+    const enrichedScore = scoreTrace(
+      makeTrace({ turns, content_fidelity: "full" }),
+      undefined,
+      {
+        anomaly_score: 2.4,
+        workflow_shape: "shell_and_editor",
+        length_bucket: "medium",
+        tool_density: "medium",
+        failure_judge_verdict: "confirmed_failure",
+        failure_judge_agreement: "agree",
+        failure_judge_confidence: 0.94,
+      },
+    );
+    expect(enrichedScore.total).toBeGreaterThan(baseScore.total);
+    expect(enrichedScore.breakdown.components.some((component) => component.key === "novelty")).toBe(true);
+    expect(enrichedScore.breakdown.components.some((component) => component.key === "failure_judge")).toBe(true);
+  });
+  it("respects failure mode overrides from downstream labels", () => {
+    const turns = [
+      makeTurn("user", [{ type: "text", text: "Run the tests." }]),
+      makeTurn("assistant", [
+        makeToolUse("t1", "functions.exec_command", { cmd: "pnpm test" }),
+        makeToolResult("t1", true, "fail", 1),
+      ]),
+    ];
+    const baseScore = scoreTrace(makeTrace({ turns, content_fidelity: "full" }));
+    const overriddenScore = scoreTrace(
+      makeTrace({ turns, content_fidelity: "full" }),
+      undefined,
+      {
+        failure_modes_override: ["no_failure"],
+        failure_judge_verdict: "false_positive",
+        failure_judge_agreement: "disagree",
+        failure_judge_confidence: 0.9,
+      },
+    );
+    expect(baseScore.failure_modes).toContain("tool_call_failure");
+    expect(overriddenScore.failure_modes).toEqual(["no_failure"]);
+    expect(overriddenScore.total).toBeLessThan(baseScore.total);
+  });
+});
+describe("deriveQualityTier", () => {
+  it("applies the new score band thresholds", () => {
+    expect(deriveQualityTier(0.2)).toBe("bronze");
+    expect(deriveQualityTier(0.63)).toBe("bronze");
+    expect(deriveQualityTier(0.64)).toBe("silver");
+    expect(deriveQualityTier(0.81)).toBe("silver");
+    expect(deriveQualityTier(0.82)).toBe("gold");
+  });
 });