npm - @tracemarketplace/shared - Versions diffs - 0.0.10 → 0.0.13 - Mend

@tracemarketplace/shared 0.0.10 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

package/dist/extractor-claude-code.test.js +53 -0
package/dist/extractor-claude-code.test.js.map +1 -1
package/dist/extractor-codex.test.js +5 -0
package/dist/extractor-codex.test.js.map +1 -1
package/dist/extractors/claude-code.d.ts.map +1 -1
package/dist/extractors/claude-code.js +4 -4
package/dist/extractors/claude-code.js.map +1 -1
package/dist/extractors/codex.d.ts.map +1 -1
package/dist/extractors/codex.js +3 -1
package/dist/extractors/codex.js.map +1 -1
package/dist/extractors/common.d.ts +1 -2
package/dist/extractors/common.d.ts.map +1 -1
package/dist/extractors/common.js +2 -37
package/dist/extractors/common.js.map +1 -1
package/dist/extractors/common.test.d.ts +2 -0
package/dist/extractors/common.test.d.ts.map +1 -0
package/dist/extractors/common.test.js +17 -0
package/dist/extractors/common.test.js.map +1 -0
package/dist/extractors/cursor.d.ts.map +1 -1
package/dist/extractors/cursor.js +8 -0
package/dist/extractors/cursor.js.map +1 -1
package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -0
package/dist/index.js.map +1 -1
package/dist/redact.d.ts.map +1 -1
package/dist/redact.js +3 -1
package/dist/redact.js.map +1 -1
package/dist/redact.test.js +9 -0
package/dist/redact.test.js.map +1 -1
package/dist/scoring.d.ts +5 -3
package/dist/scoring.d.ts.map +1 -1
package/dist/scoring.fixtures.test.d.ts +2 -0
package/dist/scoring.fixtures.test.d.ts.map +1 -0
package/dist/scoring.fixtures.test.js +47 -0
package/dist/scoring.fixtures.test.js.map +1 -0
package/dist/scoring.js +381 -62
package/dist/scoring.js.map +1 -1
package/dist/scoring.test.js +125 -26
package/dist/scoring.test.js.map +1 -1
package/dist/tool-normalization.d.ts +66 -0
package/dist/tool-normalization.d.ts.map +1 -0
package/dist/tool-normalization.generated.d.ts +181 -0
package/dist/tool-normalization.generated.d.ts.map +1 -0
package/dist/tool-normalization.generated.js +261 -0
package/dist/tool-normalization.generated.js.map +1 -0
package/dist/tool-normalization.js +463 -0
package/dist/tool-normalization.js.map +1 -0
package/dist/tool-normalization.test.d.ts +2 -0
package/dist/tool-normalization.test.d.ts.map +1 -0
package/dist/tool-normalization.test.js +188 -0
package/dist/tool-normalization.test.js.map +1 -0
package/dist/types.d.ts +38 -1
package/dist/types.d.ts.map +1 -1
package/dist/validators.d.ts +23 -6
package/dist/validators.d.ts.map +1 -1
package/dist/validators.js +4 -0
package/dist/validators.js.map +1 -1
package/dist/validators.test.js +7 -0
package/dist/validators.test.js.map +1 -1
package/package.json +5 -5
package/scripts/generate-tool-normalization.mjs +16 -0
package/src/extractor-claude-code.test.ts +59 -0
package/src/extractor-codex.test.ts +5 -0
package/src/extractors/claude-code.ts +8 -4
package/src/extractors/codex.ts +4 -2
package/src/extractors/common.test.ts +21 -0
package/src/extractors/common.ts +15 -49
package/src/extractors/cursor.ts +9 -0
package/src/index.ts +1 -0
package/src/redact.test.ts +9 -0
package/src/redact.ts +3 -1
package/src/scoring.fixtures.test.ts +71 -0
package/src/scoring.test.ts +151 -26
package/src/scoring.ts +582 -84
package/src/tool-normalization.generated.ts +262 -0
package/src/tool-normalization.spec.json +205 -0
package/src/tool-normalization.test.ts +221 -0
package/src/tool-normalization.ts +670 -0
package/src/types.ts +50 -0
package/src/validators.test.ts +8 -0
package/src/validators.ts +8 -0

package/src/scoring.ts CHANGED Viewed

@@ -1,76 +1,446 @@
-import type { NormalizedTrace, TraceScore, FailureMode } from "./types.js";
+import type {
+  FailureMode,
+  JsonValue,
+  NormalizedTrace,
+  QualityTier,
+  TraceScore,
+  TraceScoreBreakdown,
+  TraceScoreComponent,
+  TraceScoreContext,
+} from "./types.js";
+import {
+  extractFailureExchanges,
+  type FailureExchange,
+  normalizeTraceForEvaluation,
+  type TraceNormalization,
+} from "./tool-normalization.js";
-export function detectFailureModes(trace: NormalizedTrace): FailureMode[] {
-  const modes = new Set<FailureMode>();
-  const allBlocks = trace.turns.flatMap((t) => t.content);
+const CONTEXT_LIMIT_REGEX = /context.*(limit|window|maximum)|context limit/i;
+const CONTEXT_LIMIT_TOKEN_THRESHOLD = 150_000;
+const GENERIC_ERROR_LINE_REGEX =
+  /^(process exited with code|exit status \d+|\^c|error: process exited|process running with session id)/i;
+const LOW_SIGNAL_ERROR_LINE_REGEX = /^(fail|error)(\s+(<file>|<path>))?$/i;
+const REPEATED_ROOT_CAUSE_EXCHANGE_THRESHOLD = 3;
+const SILVER_SCORE_THRESHOLD = 0.64;
+const GOLD_SCORE_THRESHOLD = 0.82;
+const SCORE_VERSION = "v1-signal-aggregation";
+const COMPONENT_WEIGHTS = {
+  completeness: 1.25,
+  fidelity: 1.2,
+  executionDepth: 1.0,
+  failureSalience: 1.2,
+  complexity: 0.9,
+  workflowShape: 0.25,
+  lengthBucket: 0.2,
+  toolDensity: 0.2,
+  failureJudge: 0.45,
+  novelty: 0.55,
+} as const;
+const TOTAL_COMPONENT_WEIGHT = Object.values(COMPONENT_WEIGHTS).reduce((sum, weight) => sum + weight, 0);
+const FAILURE_MODE_ALIASES: Partial<Record<FailureMode, FailureMode>> = {
+  repeated_tool_calls: "repeated_failing_root_cause",
+};
+const WORKFLOW_SHAPE_SCORES = {
+  chat_only: 0.2,
+  tool_other: 0.5,
+  shell_only: 0.62,
+  editor_only: 0.68,
+  shell_and_editor: 0.9,
+} as const;
+const LENGTH_BUCKET_SCORES = {
+  short: 0.35,
+  medium: 0.68,
+  long: 0.92,
+} as const;
+const TOOL_DENSITY_SCORES = {
+  none: 0.2,
+  light: 0.45,
+  medium: 0.72,
+  heavy: 0.9,
+} as const;
+const FAILURE_JUDGE_VERDICT_SCORES: Record<string, number> = {
+  confirmed_failure: 0.92,
+  unclear: 0.55,
+  false_positive: 0.18,
+};
+const FAILURE_JUDGE_AGREEMENT_ADJUSTMENT: Record<string, number> = {
+  agree: 0.08,
+  partial: 0.03,
+  disagree: -0.08,
+};
+interface NormalizationStats {
+  exchangeCount: number;
+  actionCount: number;
+  uniqueToolKinds: number;
+}
+function hasContextLimitEvidence(trace: NormalizedTrace): boolean {
+  const totalTokens = (trace.total_input_tokens ?? 0) + (trace.total_output_tokens ?? 0);
+  if (totalTokens > CONTEXT_LIMIT_TOKEN_THRESHOLD) {
+    return true;
+  }
-  // tool_call_failure: any tool_result with is_error=true
-  const hasToolError = allBlocks.some(
-    (b) => b.type === "tool_result" && b.is_error
+  return trace.turns.some((turn) =>
+    turn.content.some(
+      (block) => block.type === "text" && CONTEXT_LIMIT_REGEX.test(block.text),
+    ),
   );
-  if (hasToolError) modes.add("tool_call_failure");
+}
+function normalizeErrorText(text: string | null | undefined, limit = 140): string | null {
+  if (!text) return null;
+  const candidateLines: string[] = [];
+  for (const rawLine of text.split(/\r?\n/)) {
+    let cleaned = rawLine.replace(/\s+/g, " ").trim().toLowerCase();
+    if (!cleaned || GENERIC_ERROR_LINE_REGEX.test(cleaned)) {
+      continue;
+    }
+    cleaned = cleaned
+      .replace(/\/[^\s]+/g, "<path>")
+      .replace(/\b[\w./-]+\.[a-z]{1,6}\b/g, "<file>")
+      .replace(/\b0x[a-f0-9]+\b/g, "<hex>")
+      .replace(/\b\d+\b/g, "<num>");
+    if (cleaned === "fail" || cleaned === "error" || LOW_SIGNAL_ERROR_LINE_REGEX.test(cleaned)) {
+      continue;
+    }
+    candidateLines.push(cleaned);
+  }
+  const normalized = candidateLines[0] ?? null;
+  if (!normalized || normalized.length < 12) {
+    return null;
+  }
-  // repeated_tool_calls: same tool_name 3+ times in a row
-  const toolUses = allBlocks.filter((b) => b.type === "tool_use") as Array<{
-    type: "tool_use";
-    tool_call_id: string;
-    tool_name: string;
-    tool_input: Record<string, unknown>;
-  }>;
-  let streak = 1;
-  for (let i = 1; i < toolUses.length; i++) {
-    if (toolUses[i].tool_name === toolUses[i - 1].tool_name) {
-      streak++;
-      if (streak >= 3) {
-        modes.add("repeated_tool_calls");
-        break;
+  return normalized.slice(0, limit);
+}
+function buildErrorSignature(
+  toolName: string | null | undefined,
+  resultContent: string | null | undefined,
+  exitCode: number | null | undefined,
+): string | null {
+  const normalizedText = normalizeErrorText(resultContent);
+  if (!normalizedText) return null;
+  const toolSegment = toolName?.trim().toLowerCase() || "unknown";
+  const exitSegment = exitCode == null ? "" : `:${exitCode}`;
+  return `${toolSegment}${exitSegment}|${normalizedText}`;
+}
+function hasRepeatedFailingRootCause(trace: NormalizedTrace): boolean {
+  let exchangeIndex = -1;
+  const toolNames = new Map<string, string>();
+  const signatureToExchanges = new Map<string, Set<number>>();
+  for (const turn of trace.turns) {
+    if (turn.role === "user") {
+      exchangeIndex += 1;
+      continue;
+    }
+    if (exchangeIndex < 0) {
+      exchangeIndex = 0;
+    }
+    for (const block of turn.content) {
+      if (block.type === "tool_use") {
+        toolNames.set(block.tool_call_id, block.tool_name);
+        continue;
+      }
+      if (block.type !== "tool_result") {
+        continue;
+      }
+      const isError = block.is_error || (block.exit_code != null && block.exit_code !== 0);
+      if (!isError) {
+        continue;
+      }
+      const signature = buildErrorSignature(
+        toolNames.get(block.tool_call_id),
+        block.result_content,
+        block.exit_code,
+      );
+      if (!signature) {
+        continue;
       }
-    } else {
-      streak = 1;
+      const exchanges = signatureToExchanges.get(signature) ?? new Set<number>();
+      exchanges.add(exchangeIndex);
+      signatureToExchanges.set(signature, exchanges);
     }
   }
-  // context_limit_approached: text mentioning context/limit
-  const contextLimitRegex = /context.*(limit|window|maximum)|context limit/i;
-  const hasContextLimit = trace.turns.some((t) =>
-    t.content.some(
-      (b) =>
-        b.type === "text" && contextLimitRegex.test(b.text)
-    )
+  return Array.from(signatureToExchanges.values()).some(
+    (exchangeIndexes) => exchangeIndexes.size >= REPEATED_ROOT_CAUSE_EXCHANGE_THRESHOLD,
   );
-  if (hasContextLimit) modes.add("context_limit_approached");
-  // graceful_recovery: tool errors followed by recovery text
-  if (hasToolError) {
-    const recoveryRegex = /let me try|instead|alternative|another approach|different way/i;
-    const laterTurns = trace.turns.slice(Math.floor(trace.turns.length / 2));
-    const hasRecovery = laterTurns.some((t) =>
-      t.content.some(
-        (b) => b.type === "text" && recoveryRegex.test(b.text)
-      )
-    );
-    if (hasRecovery) modes.add("graceful_recovery");
+}
+function hasGracefulRecovery(exchanges: FailureExchange[]): boolean {
+  for (let index = 0; index < exchanges.length - 1; index += 1) {
+    if (
+      exchanges[index]?.outcome === "failure" &&
+      ["success", "success_after_retry"].includes(exchanges[index + 1]?.outcome ?? "")
+    ) {
+      return true;
+    }
   }
-  // repeated_tool_calls → graceful_recovery if later success
-  if (modes.has("repeated_tool_calls")) {
-    const lastTurn = trace.turns[trace.turns.length - 1];
-    if (lastTurn?.role === "assistant") {
-      const hasSuccessText = lastTurn.content.some(
-        (b) => b.type === "text" && b.text.length > 50
-      );
-      if (hasSuccessText) modes.add("graceful_recovery");
+  return false;
+}
+function clamp01(value: number): number {
+  return Math.max(0, Math.min(1, value));
+}
+function round4(value: number): number {
+  return Number(value.toFixed(4));
+}
+function normalizeLinear(value: number, minValue: number, maxValue: number): number {
+  if (maxValue <= minValue) {
+    return value > minValue ? 1 : 0;
+  }
+  return clamp01((value - minValue) / (maxValue - minValue));
+}
+function normalizeLogScale(value: number, minValue: number, maxValue: number): number {
+  if (value <= minValue) return 0;
+  if (value >= maxValue) return 1;
+  const numerator = Math.log1p(value) - Math.log1p(minValue);
+  const denominator = Math.log1p(maxValue) - Math.log1p(minValue);
+  if (denominator <= 0) return 0;
+  return clamp01(numerator / denominator);
+}
+function countToolUses(trace: NormalizedTrace): number {
+  let count = 0;
+  for (const turn of trace.turns) {
+    for (const block of turn.content) {
+      if (block.type === "tool_use") {
+        count += 1;
+      }
     }
   }
+  return count;
+}
-  // catastrophic_failure: last 3+ turns are all errors with no recovery
-  const lastTurns = trace.turns.slice(-3);
-  const allLastAreErrors =
-    lastTurns.length >= 2 &&
-    lastTurns.every((t) =>
-      t.content.some((b) => b.type === "tool_result" && b.is_error)
-    );
-  if (allLastAreErrors && !modes.has("graceful_recovery")) {
+function collectNormalizationStats(normalization: TraceNormalization): NormalizationStats {
+  const uniqueToolKinds = new Set<string>();
+  let actionCount = 0;
+  for (const exchange of normalization.exchanges) {
+    actionCount += exchange.actions.length;
+    for (const action of exchange.actions) {
+      uniqueToolKinds.add(action.normalizedToolId);
+    }
+  }
+  return {
+    exchangeCount: normalization.exchanges.length,
+    actionCount,
+    uniqueToolKinds: uniqueToolKinds.size,
+  };
+}
+function normalizeFailureModes(
+  failureModes: FailureMode[] | null | undefined,
+): FailureMode[] {
+  const normalized: FailureMode[] = [];
+  for (const mode of failureModes ?? []) {
+    const canonical = FAILURE_MODE_ALIASES[mode] ?? mode;
+    if (!normalized.includes(canonical)) {
+      normalized.push(canonical);
+    }
+  }
+  return normalized.length > 0 ? normalized : ["no_failure"];
+}
+function resolveFailureModes(
+  trace: NormalizedTrace,
+  normalization: TraceNormalization,
+  overrideModes?: FailureMode[],
+): FailureMode[] {
+  if (overrideModes && overrideModes.length > 0) {
+    return normalizeFailureModes(overrideModes);
+  }
+  return normalizeFailureModes(detectFailureModes(trace, normalization));
+}
+function scoreCompletenessValue(
+  completeness: TraceScore["completeness"],
+): number {
+  if (completeness === "complete") return 1;
+  if (completeness === "incomplete") return 0.45;
+  return 0.02;
+}
+function scoreFidelityValue(contentFidelity: NormalizedTrace["content_fidelity"]): number {
+  return contentFidelity === "full" ? 1 : 0.4;
+}
+function scoreExecutionDepthValue(
+  trace: NormalizedTrace,
+  normalizationStats: NormalizationStats,
+): number {
+  const toolCallCount = trace.tool_call_count ?? countToolUses(trace);
+  const toolCallScore = normalizeLogScale(toolCallCount, 0, 12);
+  const exchangeScore = normalizeLinear(normalizationStats.exchangeCount, 0, 6);
+  const actionScore = normalizeLogScale(normalizationStats.actionCount, 0, 18);
+  let modalityBonus = 0;
+  if (trace.has_shell_commands) modalityBonus += 0.18;
+  if (trace.has_file_changes) modalityBonus += 0.18;
+  if (trace.has_thinking_blocks) modalityBonus += 0.08;
+  const base = toolCallCount > 0 ? 0.12 : 0.04;
+  return clamp01(
+    base
+      + toolCallScore * 0.34
+      + exchangeScore * 0.22
+      + actionScore * 0.18
+      + modalityBonus,
+  );
+}
+function scoreFailureSalienceValue(failureModes: FailureMode[]): number {
+  let score = failureModes.includes("no_failure") ? 0.12 : 0.18;
+  if (failureModes.includes("tool_call_failure")) score += 0.18;
+  if (failureModes.includes("repeated_failing_root_cause")) score += 0.24;
+  if (failureModes.includes("context_limit_approached")) score += 0.12;
+  if (failureModes.includes("graceful_recovery")) score += 0.22;
+  if (failureModes.includes("catastrophic_failure")) score += 0.18;
+  return clamp01(score);
+}
+function scoreComplexityValue(
+  trace: NormalizedTrace,
+  normalizationStats: NormalizationStats,
+): number {
+  const totalTokens = (trace.total_input_tokens ?? 0) + (trace.total_output_tokens ?? 0);
+  const turnCount = trace.turn_count ?? trace.turns.length;
+  const tokenScore = normalizeLogScale(totalTokens, 800, 80_000);
+  const turnScore = normalizeLinear(turnCount, 2, 24);
+  const exchangeScore = normalizeLinear(normalizationStats.exchangeCount, 1, 8);
+  const toolVarietyScore = normalizeLinear(normalizationStats.uniqueToolKinds, 1, 6);
+  return clamp01(
+    tokenScore * 0.38
+      + turnScore * 0.23
+      + exchangeScore * 0.17
+      + toolVarietyScore * 0.22,
+  );
+}
+function scoreMappedLabelValue(
+  value: string | null | undefined,
+  scoreMap: Record<string, number>,
+): number | null {
+  if (!value) return null;
+  return value in scoreMap ? scoreMap[value] : null;
+}
+function scoreFailureJudgeValue(context: TraceScoreContext): number | null {
+  if (!context.failure_judge_verdict) {
+    return null;
+  }
+  const verdictScore = FAILURE_JUDGE_VERDICT_SCORES[context.failure_judge_verdict];
+  if (verdictScore == null) {
+    return null;
+  }
+  const agreementAdjustment = context.failure_judge_agreement
+    ? (FAILURE_JUDGE_AGREEMENT_ADJUSTMENT[context.failure_judge_agreement] ?? 0)
+    : 0;
+  const confidence = context.failure_judge_confidence == null
+    ? null
+    : clamp01(context.failure_judge_confidence);
+  const confidenceMultiplier = confidence == null ? 1 : 0.7 + (confidence * 0.3);
+  return clamp01((verdictScore + agreementAdjustment) * confidenceMultiplier);
+}
+function scoreNoveltyValue(anomalyScore: number | null | undefined): number | null {
+  if (anomalyScore == null || !Number.isFinite(anomalyScore) || anomalyScore <= 0) {
+    return null;
+  }
+  return clamp01(Math.log1p(anomalyScore) / Math.log1p(4));
+}
+function buildComponent(
+  key: string,
+  label: string,
+  score: number,
+  weight: number,
+  source: TraceScoreComponent["source"],
+  reason: string,
+  evidence: JsonValue | null,
+): TraceScoreComponent {
+  return {
+    key,
+    label,
+    score: round4(clamp01(score)),
+    weight: round4(weight),
+    source,
+    available: true,
+    reason,
+    evidence,
+  };
+}
+function buildScoreBreakdown(components: TraceScoreComponent[]): TraceScoreBreakdown {
+  const availableWeight = components.reduce((sum, component) => sum + component.weight, 0);
+  return {
+    aggregation: "weighted_average",
+    component_count: components.length,
+    available_weight: round4(availableWeight),
+    total_weight: round4(TOTAL_COMPONENT_WEIGHT),
+    components,
+  };
+}
+export function deriveQualityTier(total: number): QualityTier {
+  if (total >= GOLD_SCORE_THRESHOLD) return "gold";
+  if (total >= SILVER_SCORE_THRESHOLD) return "silver";
+  return "bronze";
+}
+export function detectFailureModes(
+  trace: NormalizedTrace,
+  normalization?: TraceNormalization,
+): FailureMode[] {
+  const modes = new Set<FailureMode>();
+  const exchanges = normalization
+    ? normalization.exchanges.map((exchange) => ({
+        toolTokens: exchange.toolTokens,
+        hasError: exchange.hasError,
+        outcome: exchange.outcome,
+      }))
+    : extractFailureExchanges(trace);
+  const hasToolError = exchanges.some((exchange) => exchange.hasError);
+  if (hasToolError) modes.add("tool_call_failure");
+  if (hasRepeatedFailingRootCause(trace)) modes.add("repeated_failing_root_cause");
+  if (hasContextLimitEvidence(trace)) modes.add("context_limit_approached");
+  if (hasGracefulRecovery(exchanges)) modes.add("graceful_recovery");
+  const finalOutcome = exchanges[exchanges.length - 1]?.outcome;
+  if (finalOutcome === "failure" && !modes.has("graceful_recovery")) {
     modes.add("catastrophic_failure");
   }
@@ -103,47 +473,175 @@ export function checkCompleteness(
   return "incomplete";
 }
-export function scoreTrace(trace: NormalizedTrace): TraceScore {
+export function scoreTrace(
+  trace: NormalizedTrace,
+  normalization: TraceNormalization = normalizeTraceForEvaluation(trace),
+  context: TraceScoreContext = {},
+): TraceScore {
   const completeness = checkCompleteness(trace);
-  const failureModes = detectFailureModes(trace);
-  const fidelityBase = trace.content_fidelity === "full" ? 0.4 : 0.15;
-  let interestBonus = 0;
-  if (failureModes.includes("graceful_recovery")) interestBonus += 0.3;
-  if (failureModes.includes("repeated_tool_calls")) interestBonus += 0.2;
-  if (failureModes.includes("catastrophic_failure")) interestBonus += 0.15;
-  if (
-    failureModes.includes("tool_call_failure") &&
-    !failureModes.includes("graceful_recovery")
-  )
-    interestBonus += 0.1;
-  const totalTokens =
-    (trace.total_input_tokens ?? 0) + (trace.total_output_tokens ?? 0);
-  const lengthBonus = Math.min(
-    0.15,
-    Math.log10(Math.max(1, totalTokens / 1000)) * 0.05
-  );
+  const failureModes = resolveFailureModes(trace, normalization, context.failure_modes_override);
+  const normalizationStats = collectNormalizationStats(normalization);
+  const components: TraceScoreComponent[] = [
+    buildComponent(
+      "completeness",
+      "Completeness",
+      scoreCompletenessValue(completeness),
+      COMPONENT_WEIGHTS.completeness,
+      "trace",
+      completeness,
+      { completeness },
+    ),
+    buildComponent(
+      "fidelity",
+      "Content Fidelity",
+      scoreFidelityValue(trace.content_fidelity),
+      COMPONENT_WEIGHTS.fidelity,
+      "trace",
+      trace.content_fidelity === "full" ? "full transcript" : "chat-only transcript",
+      { content_fidelity: trace.content_fidelity },
+    ),
+    buildComponent(
+      "execution_depth",
+      "Execution Depth",
+      scoreExecutionDepthValue(trace, normalizationStats),
+      COMPONENT_WEIGHTS.executionDepth,
+      "normalization",
+      "tool/exchange richness",
+      {
+        tool_call_count: trace.tool_call_count ?? countToolUses(trace),
+        exchange_count: normalizationStats.exchangeCount,
+        action_count: normalizationStats.actionCount,
+        has_shell_commands: trace.has_shell_commands ?? false,
+        has_file_changes: trace.has_file_changes ?? false,
+        has_thinking_blocks: trace.has_thinking_blocks ?? false,
+      },
+    ),
+    buildComponent(
+      "failure_salience",
+      "Failure Salience",
+      scoreFailureSalienceValue(failureModes),
+      COMPONENT_WEIGHTS.failureSalience,
+      "trace",
+      failureModes.join(", "),
+      { failure_modes: failureModes },
+    ),
+    buildComponent(
+      "complexity",
+      "Complexity",
+      scoreComplexityValue(trace, normalizationStats),
+      COMPONENT_WEIGHTS.complexity,
+      "normalization",
+      "token/turn/tool complexity",
+      {
+        total_tokens: (trace.total_input_tokens ?? 0) + (trace.total_output_tokens ?? 0),
+        turn_count: trace.turn_count ?? trace.turns.length,
+        exchange_count: normalizationStats.exchangeCount,
+        unique_tool_kinds: normalizationStats.uniqueToolKinds,
+      },
+    ),
+  ];
+  const workflowShapeScore = scoreMappedLabelValue(context.workflow_shape, WORKFLOW_SHAPE_SCORES);
+  if (workflowShapeScore != null) {
+    components.push(
+      buildComponent(
+        "workflow_shape",
+        "Workflow Shape",
+        workflowShapeScore,
+        COMPONENT_WEIGHTS.workflowShape,
+        "label",
+        context.workflow_shape ?? "unknown",
+        { workflow_shape: context.workflow_shape ?? null },
+      ),
+    );
+  }
+  const lengthBucketScore = scoreMappedLabelValue(context.length_bucket, LENGTH_BUCKET_SCORES);
+  if (lengthBucketScore != null) {
+    components.push(
+      buildComponent(
+        "length_bucket",
+        "Length Bucket",
+        lengthBucketScore,
+        COMPONENT_WEIGHTS.lengthBucket,
+        "label",
+        context.length_bucket ?? "unknown",
+        { length_bucket: context.length_bucket ?? null },
+      ),
+    );
+  }
+  const toolDensityScore = scoreMappedLabelValue(context.tool_density, TOOL_DENSITY_SCORES);
+  if (toolDensityScore != null) {
+    components.push(
+      buildComponent(
+        "tool_density",
+        "Tool Density",
+        toolDensityScore,
+        COMPONENT_WEIGHTS.toolDensity,
+        "label",
+        context.tool_density ?? "unknown",
+        { tool_density: context.tool_density ?? null },
+      ),
+    );
+  }
+  const failureJudgeScore = scoreFailureJudgeValue(context);
+  if (failureJudgeScore != null) {
+    components.push(
+      buildComponent(
+        "failure_judge",
+        "Failure Judge",
+        failureJudgeScore,
+        COMPONENT_WEIGHTS.failureJudge,
+        "judge",
+        context.failure_judge_verdict ?? "unknown",
+        {
+          verdict: context.failure_judge_verdict ?? null,
+          agreement: context.failure_judge_agreement ?? null,
+          confidence: context.failure_judge_confidence ?? null,
+        },
+      ),
+    );
+  }
+  const noveltyScore = scoreNoveltyValue(context.anomaly_score);
+  if (noveltyScore != null) {
+    components.push(
+      buildComponent(
+        "novelty",
+        "Novelty",
+        noveltyScore,
+        COMPONENT_WEIGHTS.novelty,
+        "corpus",
+        "corpus anomaly signal",
+        { anomaly_score: round4(context.anomaly_score ?? 0) },
+      ),
+    );
+  }
-  const total = Math.min(1.0, fidelityBase + interestBonus + lengthBonus);
+  const breakdown = buildScoreBreakdown(components);
+  const weightedTotal = components.reduce((sum, component) => sum + (component.score * component.weight), 0);
+  const total = round4(components.length > 0 ? weightedTotal / breakdown.available_weight : 0);
   const payoutCents = Math.min(500, Math.round(total * 500));
   return {
     completeness,
     failure_modes: failureModes,
     has_error_recovery: failureModes.includes("graceful_recovery"),
-    has_repeated_calls: failureModes.includes("repeated_tool_calls"),
+    has_repeated_calls: failureModes.includes("repeated_failing_root_cause"),
     content_fidelity: trace.content_fidelity,
     total,
     payout_cents: payoutCents,
+    breakdown,
     failure_taxonomy_label: null,
     failure_taxonomy_explanation: null,
-    rarity_score: null,
+    rarity_score: noveltyScore == null ? null : round4(noveltyScore),
     cluster_id: null,
     is_duplicate: false,
     duplicate_of: null,
     scored_at: new Date().toISOString(),
-    scorer_version: "v0-heuristic",
+    scorer_version: SCORE_VERSION,
   };
 }