@bilalimamoglu/sift 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1262,7 +1262,8 @@ function renderInstructionBody() {
1262
1262
  "- After making or planning a fix, refresh the truth with `sift rerun` so the same full suite runs again at `standard` and shows what is resolved or still remaining.",
1263
1263
  "- The normal stop budget is `standard` first, then at most one zoom step before raw.",
1264
1264
  "- Only if more detail is still needed after `sift rerun`, use `sift rerun --remaining --detail focused`, then `sift rerun --remaining --detail verbose`, then `sift rerun --remaining --detail verbose --show-raw`.",
1265
- "- `sift rerun --remaining` currently supports only argv-mode `pytest ...` or `python -m pytest ...` runs; otherwise rerun a narrowed command manually with `sift exec --preset test-status -- <narrowed pytest command>`.",
1265
+ "- `sift rerun --remaining` narrows automatically for `pytest` and reruns the full original command for `vitest` and `jest` while keeping the diagnosis focused on what still fails.",
1266
+ "- For other runners, rerun a narrowed command manually with `sift exec --preset test-status -- <narrowed test command>` if you need a smaller surface.",
1266
1267
  "- Start with `standard` text. Use diagnose JSON only when automation or machine branching truly needs it.",
1267
1268
  "- If `standard` already shows bucket-level root cause, anchor, and fix lines, trust it and report from it directly.",
1268
1269
  "- In that case, do not re-verify the same bucket with raw pytest; at most do one targeted source read before you edit.",
@@ -2124,7 +2125,125 @@ function createProvider(config) {
2124
2125
 
2125
2126
  // src/core/testStatusDecision.ts
2126
2127
  import { z as z2 } from "zod";
2127
- var TEST_STATUS_DIAGNOSE_JSON_CONTRACT = '{"status":"ok|insufficient","diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","primary_suspect_kind":"test|app_code|config|environment|tooling|unknown","confidence_reason":string,"dominant_blocker_bucket_index":number|null,"provider_used":boolean,"provider_confidence":number|null,"provider_failed":boolean,"raw_slice_used":boolean,"raw_slice_strategy":"none|bucket_evidence|traceback_window|head_tail","resolved_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_subset_available":boolean,"main_buckets":[{"bucket_index":number,"label":string,"count":number,"root_cause":string,"suspect_kind":"test|app_code|config|environment|tooling|unknown","fix_hint":string,"evidence":string[],"bucket_confidence":number,"root_cause_confidence":number,"dominant":boolean,"secondary_visible_despite_blocker":boolean,"mini_diff":{"added_paths"?:number,"removed_models"?:number,"changed_task_mappings"?:number}|null}],"read_targets":[{"file":string,"line":number|null,"why":string,"bucket_index":number,"context_hint":{"start_line":number|null,"end_line":number|null,"search_hint":string|null}}],"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string},"resolved_tests"?:string[],"remaining_tests"?:string[]}';
2128
+
2129
+ // src/core/testStatusTargets.ts
2130
+ function unique(values) {
2131
+ return [...new Set(values)];
2132
+ }
2133
+ function normalizeTestId(value) {
2134
+ return value.replace(/\\/g, "/").replace(/\s+/g, " ").trim();
2135
+ }
2136
+ function stripMatcherProse(value) {
2137
+ return value.replace(/\s+-\s+.*$/, "").trim();
2138
+ }
2139
+ function extractJsFile(value) {
2140
+ const match = value.match(/([A-Za-z0-9_./-]+\.(?:test|spec)\.[cm]?[jt]sx?)/i);
2141
+ return match ? normalizeTestId(match[1]) : null;
2142
+ }
2143
+ function normalizeFailingTarget(label, runner) {
2144
+ const normalized = normalizeTestId(label).replace(/^['"]|['"]$/g, "");
2145
+ if (runner === "pytest") {
2146
+ return stripMatcherProse(normalized);
2147
+ }
2148
+ if (runner === "vitest" || runner === "jest") {
2149
+ const compact = normalized.replace(/^FAIL\s+/i, "").replace(/^[❯×]\s*/, "").replace(/\s+\[[^\]]+\]\s*$/, "").trim();
2150
+ const file = extractJsFile(compact);
2151
+ if (!file) {
2152
+ return stripMatcherProse(compact);
2153
+ }
2154
+ const fileIndex = compact.indexOf(file);
2155
+ const suffix = compact.slice(fileIndex + file.length).trim();
2156
+ if (!suffix) {
2157
+ return file;
2158
+ }
2159
+ if (suffix.startsWith(">")) {
2160
+ const testName = stripMatcherProse(suffix.replace(/^>\s*/, ""));
2161
+ return testName.length > 0 ? `${file} > ${testName}` : file;
2162
+ }
2163
+ return file;
2164
+ }
2165
+ return normalized;
2166
+ }
2167
+ function extractFamilyPrefix(value) {
2168
+ const normalized = normalizeTestId(value);
2169
+ const filePart = normalized.split("::")[0]?.split(" > ")[0]?.trim() ?? normalized;
2170
+ const workflowMatch = filePart.match(/^(\.github\/workflows\/)/);
2171
+ if (workflowMatch) {
2172
+ return workflowMatch[1];
2173
+ }
2174
+ const testsMatch = filePart.match(/^((?:test|tests)\/[^/]+\/)/);
2175
+ if (testsMatch) {
2176
+ return testsMatch[1];
2177
+ }
2178
+ const srcMatch = filePart.match(/^(src\/[^/]+\/)/);
2179
+ if (srcMatch) {
2180
+ return srcMatch[1];
2181
+ }
2182
+ const configMatch = filePart.match(
2183
+ /^((?:[^/]+\/)*(?:package\.json|pytest\.ini|pyproject\.toml|tox\.ini|conftest\.py|(?:vitest|jest)\.config\.[^/]+|tsconfig(?:\.[^/]+)?\.json|[^/]*config[^/]*\.(?:json|ya?ml)))$/i
2184
+ );
2185
+ if (configMatch) {
2186
+ return configMatch[1];
2187
+ }
2188
+ const segments = filePart.replace(/^\/+/, "").split("/").filter(Boolean);
2189
+ if (segments.length >= 2) {
2190
+ return `${segments[0]}/${segments[1]}/`;
2191
+ }
2192
+ if (segments.length === 1) {
2193
+ return segments[0];
2194
+ }
2195
+ return "other";
2196
+ }
2197
+ function buildTestTargetSummary(values) {
2198
+ const uniqueValues = unique(values);
2199
+ const counts = /* @__PURE__ */ new Map();
2200
+ for (const value of uniqueValues) {
2201
+ const prefix = extractFamilyPrefix(value);
2202
+ counts.set(prefix, (counts.get(prefix) ?? 0) + 1);
2203
+ }
2204
+ const families = [...counts.entries()].map(([prefix, count]) => ({
2205
+ prefix,
2206
+ count
2207
+ })).sort((left, right) => {
2208
+ if (right.count !== left.count) {
2209
+ return right.count - left.count;
2210
+ }
2211
+ return left.prefix.localeCompare(right.prefix);
2212
+ }).slice(0, 5);
2213
+ return {
2214
+ count: uniqueValues.length,
2215
+ families
2216
+ };
2217
+ }
2218
+ function formatTargetSummary(summary) {
2219
+ if (summary.count === 0) {
2220
+ return "count=0";
2221
+ }
2222
+ const families = summary.families.length > 0 ? summary.families.map((family) => `${family.prefix}${family.count}`).join(", ") : "none";
2223
+ return `count=${summary.count}; families=${families}`;
2224
+ }
2225
+ function joinFamilies(families) {
2226
+ if (families.length === 0) {
2227
+ return "";
2228
+ }
2229
+ if (families.length === 1) {
2230
+ return families[0];
2231
+ }
2232
+ if (families.length === 2) {
2233
+ return `${families[0]} and ${families[1]}`;
2234
+ }
2235
+ return `${families.slice(0, -1).join(", ")}, and ${families.at(-1)}`;
2236
+ }
2237
+ function describeTargetSummary(summary) {
2238
+ if (summary.count === 0 || summary.families.length === 0) {
2239
+ return null;
2240
+ }
2241
+ const families = summary.families.map((family) => `${family.prefix} (${family.count})`);
2242
+ return `across ${joinFamilies(families)}`;
2243
+ }
2244
+
2245
+ // src/core/testStatusDecision.ts
2246
+ var TEST_STATUS_DIAGNOSE_JSON_CONTRACT = '{"status":"ok|insufficient","diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","remaining_mode":"none|subset_rerun|full_rerun_diff","primary_suspect_kind":"test|app_code|config|environment|tooling|unknown","confidence_reason":string,"dominant_blocker_bucket_index":number|null,"provider_used":boolean,"provider_confidence":number|null,"provider_failed":boolean,"raw_slice_used":boolean,"raw_slice_strategy":"none|bucket_evidence|traceback_window|head_tail","resolved_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_subset_available":boolean,"main_buckets":[{"bucket_index":number,"label":string,"count":number,"root_cause":string,"suspect_kind":"test|app_code|config|environment|tooling|unknown","fix_hint":string,"evidence":string[],"bucket_confidence":number,"root_cause_confidence":number,"dominant":boolean,"secondary_visible_despite_blocker":boolean,"mini_diff":{"added_paths"?:number,"removed_models"?:number,"changed_task_mappings"?:number}|null}],"read_targets":[{"file":string,"line":number|null,"why":string,"bucket_index":number,"context_hint":{"start_line":number|null,"end_line":number|null,"search_hint":string|null}}],"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string},"resolved_tests"?:string[],"remaining_tests"?:string[]}';
2128
2247
  var TEST_STATUS_PROVIDER_SUPPLEMENT_JSON_CONTRACT = '{"diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","provider_confidence":number|null,"bucket_supplements":[{"label":string,"count":number,"root_cause":string,"anchor":{"file":string|null,"line":number|null,"search_hint":string|null},"fix_hint":string|null,"confidence":number}],"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string}}';
2129
2248
  var nextBestActionSchema = z2.object({
2130
2249
  code: z2.enum([
@@ -2166,6 +2285,7 @@ var testStatusDiagnoseContractSchema = z2.object({
2166
2285
  additional_source_read_likely_low_value: z2.boolean(),
2167
2286
  read_raw_only_if: z2.string().nullable(),
2168
2287
  decision: z2.enum(["stop", "zoom", "read_source", "read_raw"]),
2288
+ remaining_mode: z2.enum(["none", "subset_rerun", "full_rerun_diff"]),
2169
2289
  primary_suspect_kind: z2.enum([
2170
2290
  "test",
2171
2291
  "app_code",
@@ -2499,54 +2619,127 @@ function extractReasonDetail(reason, prefix) {
2499
2619
  function formatCount(count, singular, plural = `${singular}s`) {
2500
2620
  return `${count} ${count === 1 ? singular : plural}`;
2501
2621
  }
2502
- function unique(values) {
2622
+ function unique2(values) {
2503
2623
  return [...new Set(values)];
2504
2624
  }
2505
- function normalizeTestId(value) {
2625
+ function normalizeTestId2(value) {
2506
2626
  return value.replace(/\\/g, "/").trim();
2507
2627
  }
2508
- function extractTestFamilyPrefix(value) {
2509
- const normalized = normalizeTestId(value);
2510
- const testsMatch = normalized.match(/^(tests\/[^/]+\/)/);
2511
- if (testsMatch) {
2512
- return testsMatch[1];
2628
+ function normalizePathCandidate(value) {
2629
+ if (!value) {
2630
+ return null;
2513
2631
  }
2514
- const filePart = normalized.split("::")[0]?.trim() ?? "";
2515
- if (!filePart.includes("/")) {
2516
- return "other";
2632
+ let normalized = value.replace(/\\/g, "/").trim();
2633
+ normalized = normalized.replace(/^[("'`<\[]+/, "").replace(/[>"'`\]),:;]+$/, "");
2634
+ normalized = normalized.replace(/^<repo>\//, "").replace(/^\.\//, "");
2635
+ if (normalized.includes("::")) {
2636
+ normalized = normalized.split("::")[0]?.trim() ?? normalized;
2517
2637
  }
2518
- const segments = filePart.replace(/^\/+/, "").split("/").filter(Boolean);
2519
- if (segments.length === 0) {
2520
- return "other";
2638
+ if (normalized.startsWith("/") && !normalized.startsWith("/tmp/") && !normalized.startsWith("/var/tmp/")) {
2639
+ return null;
2640
+ }
2641
+ if (/^\.github\/workflows\/.+\.(?:yml|yaml)$/i.test(normalized)) {
2642
+ return normalized;
2521
2643
  }
2522
- return `${segments[0]}/`;
2644
+ if (/^(?:src|test|tests)\/.+\.[A-Za-z0-9._-]+$/i.test(normalized)) {
2645
+ return normalized;
2646
+ }
2647
+ if (/^(?:package\.json|pytest\.ini|pyproject\.toml|tox\.ini|(?:[A-Za-z0-9._/-]+\/)?conftest\.py)$/i.test(
2648
+ normalized
2649
+ )) {
2650
+ return normalized;
2651
+ }
2652
+ if (/^(?:[A-Za-z0-9._/-]+\/)?(?:vitest|jest)\.config\.[A-Za-z0-9._-]+$/i.test(normalized)) {
2653
+ return normalized;
2654
+ }
2655
+ if (/^(?:[A-Za-z0-9._/-]+\/)?tsconfig(?:\.[A-Za-z0-9_-]+)?\.json$/i.test(normalized)) {
2656
+ return normalized;
2657
+ }
2658
+ if (/^[A-Za-z0-9._/-]*config[A-Za-z0-9._/-]*\.(?:json|yml|yaml)$/i.test(normalized)) {
2659
+ return normalized;
2660
+ }
2661
+ return null;
2523
2662
  }
2524
- function buildTestTargetSummary(values) {
2525
- const counts = /* @__PURE__ */ new Map();
2526
- for (const value of values) {
2527
- const prefix = extractTestFamilyPrefix(value);
2528
- counts.set(prefix, (counts.get(prefix) ?? 0) + 1);
2663
+ function addPathCandidatesFromText(target, text) {
2664
+ if (!text) {
2665
+ return;
2529
2666
  }
2530
- const families = [...counts.entries()].map(([prefix, count]) => ({
2531
- prefix,
2532
- count
2533
- })).sort((left, right) => {
2534
- if (right.count !== left.count) {
2535
- return right.count - left.count;
2667
+ const pattern = /(?:^|[\s("'`])((?:\.github\/workflows\/[A-Za-z0-9._/-]+\.(?:yml|yaml)|(?:src|test|tests)\/[A-Za-z0-9._/-]+\.[A-Za-z0-9._-]+|package\.json|pytest\.ini|pyproject\.toml|tox\.ini|(?:[A-Za-z0-9._/-]+\/)?conftest\.py|(?:[A-Za-z0-9._/-]+\/)?(?:vitest|jest)\.config\.[A-Za-z0-9._-]+|(?:[A-Za-z0-9._/-]+\/)?tsconfig(?:\.[A-Za-z0-9_-]+)?\.json|[A-Za-z0-9._/-]*config[A-Za-z0-9._/-]*\.(?:json|yml|yaml)))/g;
2668
+ for (const match of text.matchAll(pattern)) {
2669
+ const normalized = normalizePathCandidate(match[1] ?? null);
2670
+ if (normalized) {
2671
+ target.add(normalized);
2672
+ }
2673
+ }
2674
+ }
2675
+ function extractBucketPathCandidates(args) {
2676
+ const candidates = /* @__PURE__ */ new Set();
2677
+ const push = (value) => {
2678
+ const normalized = normalizePathCandidate(value);
2679
+ if (normalized) {
2680
+ candidates.add(normalized);
2536
2681
  }
2537
- return left.prefix.localeCompare(right.prefix);
2538
- }).slice(0, 5);
2539
- return {
2540
- count: values.length,
2541
- families
2542
2682
  };
2683
+ push(args.readTarget?.file);
2684
+ for (const item of args.bucket.representativeItems) {
2685
+ push(item.file);
2686
+ addPathCandidatesFromText(candidates, item.label);
2687
+ addPathCandidatesFromText(candidates, item.reason);
2688
+ }
2689
+ addPathCandidatesFromText(candidates, args.bucket.reason);
2690
+ addPathCandidatesFromText(candidates, args.bucket.headline);
2691
+ for (const line of args.bucket.summaryLines) {
2692
+ addPathCandidatesFromText(candidates, line);
2693
+ }
2694
+ return [...candidates];
2543
2695
  }
2544
- function formatTargetSummary(summary) {
2545
- if (summary.count === 0) {
2546
- return "count=0";
2696
+ function isConfigPathCandidate(path8) {
2697
+ return /^\.github\/workflows\/.+\.(?:yml|yaml)$/i.test(path8) || /^(?:package\.json|pytest\.ini|pyproject\.toml|tox\.ini|(?:[A-Za-z0-9._/-]+\/)?conftest\.py)$/i.test(
2698
+ path8
2699
+ ) || /^(?:[A-Za-z0-9._/-]+\/)?(?:vitest|jest)\.config\.[A-Za-z0-9._-]+$/i.test(path8) || /^(?:[A-Za-z0-9._/-]+\/)?tsconfig(?:\.[A-Za-z0-9_-]+)?\.json$/i.test(path8) || /^[A-Za-z0-9._/-]*config[A-Za-z0-9._/-]*\.(?:json|yml|yaml)$/i.test(path8);
2700
+ }
2701
+ function isAppPathCandidate(path8) {
2702
+ return path8.startsWith("src/");
2703
+ }
2704
+ function isTestPathCandidate(path8) {
2705
+ return path8.startsWith("test/") || path8.startsWith("tests/");
2706
+ }
2707
+ function looksLikeMatcherLiteralComparison(detail) {
2708
+ return /\bexpected\b[\s\S]*\bto (?:be|contain)\b/i.test(detail);
2709
+ }
2710
+ function looksLikeGoldenLiteralDrift(detail) {
2711
+ return /\\n/.test(detail) || /-\s+(?:Tests|Decision|Likely owner|Next|Stop signal)\b/.test(detail) || /\b(?:node-version|workflow_dispatch|run-name|matrix|registry-url)\b/i.test(detail);
2712
+ }
2713
+ function isGoldenOutputDriftBucket(bucket) {
2714
+ if (bucket.type !== "assertion_failure") {
2715
+ return false;
2547
2716
  }
2548
- const families = summary.families.length > 0 ? summary.families.map((family) => `${family.prefix}${family.count}`).join(", ") : "none";
2549
- return `count=${summary.count}; families=${families}`;
2717
+ const detail = extractReasonDetail(bucket.reason, "assertion failed:") ?? bucket.reason;
2718
+ if (!looksLikeMatcherLiteralComparison(detail)) {
2719
+ return false;
2720
+ }
2721
+ if (bucket.reason.startsWith("snapshot mismatch:")) {
2722
+ return false;
2723
+ }
2724
+ if (!looksLikeGoldenLiteralDrift(detail)) {
2725
+ return false;
2726
+ }
2727
+ const candidates = extractBucketPathCandidates({
2728
+ bucket
2729
+ });
2730
+ return candidates.some((candidate) => isConfigPathCandidate(candidate) || isTestPathCandidate(candidate));
2731
+ }
2732
+ function specializeBucket(bucket) {
2733
+ if (!isGoldenOutputDriftBucket(bucket)) {
2734
+ return bucket;
2735
+ }
2736
+ return {
2737
+ ...bucket,
2738
+ type: "golden_output_drift",
2739
+ reason: "golden output drift: expected literal or golden output no longer matches current output",
2740
+ labelOverride: "golden output drift",
2741
+ hint: "Update the expected literal or golden output if the new output is intentional; otherwise fix the generated output and rerun."
2742
+ };
2550
2743
  }
2551
2744
  function classifyGenericBucketType(reason) {
2552
2745
  const extended = findExtendedBucketSpec(reason);
@@ -2571,6 +2764,9 @@ function classifyGenericBucketType(reason) {
2571
2764
  if (reason.startsWith("missing module:")) {
2572
2765
  return "import_dependency_failure";
2573
2766
  }
2767
+ if (reason.startsWith("golden output drift:")) {
2768
+ return "golden_output_drift";
2769
+ }
2574
2770
  if (reason.startsWith("assertion failed:")) {
2575
2771
  return "assertion_failure";
2576
2772
  }
@@ -2723,7 +2919,7 @@ function mergeBucketDetails(existing, incoming) {
2723
2919
  count,
2724
2920
  confidence: Math.max(existing.confidence, incoming.confidence),
2725
2921
  representativeItems,
2726
- entities: unique([...existing.entities, ...incoming.entities]),
2922
+ entities: unique2([...existing.entities, ...incoming.entities]),
2727
2923
  hint: existing.hint ?? incoming.hint,
2728
2924
  overflowCount: Math.max(
2729
2925
  existing.overflowCount,
@@ -2915,6 +3111,9 @@ function labelForBucket(bucket) {
2915
3111
  if (bucket.type === "import_dependency_failure") {
2916
3112
  return "import dependency failure";
2917
3113
  }
3114
+ if (bucket.type === "golden_output_drift") {
3115
+ return "golden output drift";
3116
+ }
2918
3117
  if (bucket.type === "assertion_failure") {
2919
3118
  return "assertion failure";
2920
3119
  }
@@ -2949,6 +3148,9 @@ function rootCauseConfidenceFor(bucket) {
2949
3148
  if (bucket.type === "contract_snapshot_drift") {
2950
3149
  return bucket.entities.length > 0 ? 0.92 : 0.76;
2951
3150
  }
3151
+ if (bucket.type === "golden_output_drift") {
3152
+ return 0.78;
3153
+ }
2952
3154
  if (bucket.source === "provider") {
2953
3155
  return Math.max(0.6, Math.min(bucket.confidence, 0.82));
2954
3156
  }
@@ -3023,6 +3225,9 @@ function buildReadTargetWhy(args) {
3023
3225
  if (args.bucket.type === "import_dependency_failure") {
3024
3226
  return "it is the first visible failing module in this missing dependency bucket";
3025
3227
  }
3228
+ if (args.bucket.type === "golden_output_drift") {
3229
+ return "it is the first visible golden or literal drift anchor for this bucket";
3230
+ }
3026
3231
  if (args.bucket.type === "assertion_failure") {
3027
3232
  return "it is the first visible failing test in this bucket";
3028
3233
  }
@@ -3100,6 +3305,9 @@ function buildReadTargetSearchHint(bucket, anchor) {
3100
3305
  if (assertionText) {
3101
3306
  return assertionText;
3102
3307
  }
3308
+ if (bucket.type === "golden_output_drift") {
3309
+ return bucket.representativeItems.map((item) => item.reason.match(/^assertion failed:\s+(.+)$/)?.[1] ?? item.reason).find(Boolean) ?? anchor.label.split("::")[1]?.trim() ?? null;
3310
+ }
3103
3311
  if (bucket.reason.startsWith("unknown ")) {
3104
3312
  return anchor.reason;
3105
3313
  }
@@ -3154,18 +3362,36 @@ function buildConcreteNextNote(args) {
3154
3362
  }
3155
3363
  const lead = primaryTarget.context_hint.start_line !== null && primaryTarget.context_hint.end_line !== null ? `Read ${primaryTarget.file} lines ${primaryTarget.context_hint.start_line}-${primaryTarget.context_hint.end_line} first; ${primaryTarget.why}.` : primaryTarget.context_hint.search_hint ? `Search for ${primaryTarget.context_hint.search_hint} in ${primaryTarget.file} first; ${primaryTarget.why}.` : `Read ${formatReadTargetLocation(primaryTarget)} first; ${primaryTarget.why}.`;
3156
3364
  if (args.nextBestAction.code === "fix_dominant_blocker") {
3365
+ if (args.remainingMode === "subset_rerun") {
3366
+ return "Fix the remaining bucket first, then refresh the full-suite truth with sift rerun.";
3367
+ }
3368
+ if (args.remainingMode === "full_rerun_diff") {
3369
+ return "Fix the remaining bucket first. The cached full-suite baseline is still preserved; use sift rerun when you want to refresh it.";
3370
+ }
3157
3371
  if (args.nextBestAction.bucket_index === 1 && args.hasSecondaryVisibleBucket) {
3158
3372
  return "Fix bucket 1 first, then rerun the full suite at standard. Secondary buckets are already visible behind it.";
3159
3373
  }
3160
3374
  return `Fix bucket ${args.nextBestAction.bucket_index ?? 1} first, then rerun the full suite at standard.`;
3161
3375
  }
3162
3376
  if (args.nextBestAction.code === "read_source_for_bucket") {
3377
+ if (args.remainingMode === "subset_rerun") {
3378
+ return "Fix the remaining bucket first, then refresh the full-suite truth with sift rerun.";
3379
+ }
3380
+ if (args.remainingMode === "full_rerun_diff") {
3381
+ return "Fix the remaining bucket first. The cached full-suite baseline is still preserved; use sift rerun when you want to refresh it.";
3382
+ }
3163
3383
  return lead;
3164
3384
  }
3165
3385
  if (args.nextBestAction.code === "insufficient_signal") {
3166
- if (args.nextBestAction.note.startsWith("Provider follow-up failed")) {
3386
+ if (args.nextBestAction.note.startsWith("Provider follow-up")) {
3167
3387
  return args.nextBestAction.note;
3168
3388
  }
3389
+ if (args.remainingMode === "subset_rerun") {
3390
+ return "Fix the remaining bucket first, then refresh the full-suite truth with sift rerun.";
3391
+ }
3392
+ if (args.remainingMode === "full_rerun_diff") {
3393
+ return "Fix the remaining bucket first. The cached full-suite baseline is still preserved; use sift rerun when you want to refresh it.";
3394
+ }
3169
3395
  return `${lead} Then take one deeper sift pass before raw traceback.`;
3170
3396
  }
3171
3397
  return args.nextBestAction.note;
@@ -3174,13 +3400,13 @@ function extractMiniDiff(input, bucket) {
3174
3400
  if (bucket.type !== "contract_snapshot_drift") {
3175
3401
  return null;
3176
3402
  }
3177
- const addedPaths = unique(
3403
+ const addedPaths = unique2(
3178
3404
  [...input.matchAll(/[+-]\s+'(\/api\/[^']+)'/g)].map((match) => match[1])
3179
3405
  ).length;
3180
- const removedModels = unique(
3406
+ const removedModels = unique2(
3181
3407
  [...input.matchAll(/[+-]\s+'([A-Za-z0-9._/-]+-[A-Za-z0-9._-]+)'/g)].map((match) => match[1])
3182
3408
  ).length;
3183
- const changedTaskMappings = unique(
3409
+ const changedTaskMappings = unique2(
3184
3410
  [...input.matchAll(/[+-]\s+'([a-z]+(?:_[a-z0-9]+)+)'/g)].map((match) => match[1])
3185
3411
  ).length;
3186
3412
  if (addedPaths === 0 && removedModels === 0 && changedTaskMappings === 0) {
@@ -3281,7 +3507,7 @@ function pickUnknownAnchor(args) {
3281
3507
  }
3282
3508
  const label = args.kind === "error" ? args.analysis.visibleErrorLabels[0] : args.analysis.visibleFailedLabels[0];
3283
3509
  if (label) {
3284
- const normalizedLabel = normalizeTestId(label);
3510
+ const normalizedLabel = normalizeTestId2(label);
3285
3511
  const fileMatch = normalizedLabel.match(/^([A-Za-z0-9_./-]+\.[A-Za-z0-9]+)\b/);
3286
3512
  const file = fileMatch?.[1] ?? normalizedLabel.split("::")[0] ?? null;
3287
3513
  return {
@@ -3409,16 +3635,29 @@ function buildDecisionLine(contract) {
3409
3635
  }
3410
3636
  return "- Decision: raw only if exact traceback is required.";
3411
3637
  }
3638
+ function buildRemainingPassLine(contract) {
3639
+ if (contract.remaining_mode === "subset_rerun") {
3640
+ return "- Remaining pass: showing only what is still failing from the cached baseline.";
3641
+ }
3642
+ if (contract.remaining_mode === "full_rerun_diff") {
3643
+ return "- Remaining pass: full rerun analyzed against the cached baseline because narrowed rerun is not available for this runner.";
3644
+ }
3645
+ return null;
3646
+ }
3412
3647
  function buildComparisonLines(contract) {
3413
3648
  const lines = [];
3649
+ const resolvedSummary = buildTestTargetSummary(contract.resolved_tests);
3650
+ const remainingSummary = buildTestTargetSummary(contract.remaining_tests);
3414
3651
  if (contract.resolved_tests.length > 0) {
3652
+ const summaryText = describeTargetSummary(resolvedSummary);
3415
3653
  lines.push(
3416
- `- Resolved in this rerun: ${formatCount(contract.resolved_tests.length, "test")} dropped out of the failing set.`
3654
+ `- Resolved in this rerun: ${formatCount(contract.resolved_tests.length, "test")} dropped out of the failing set${summaryText ? ` ${summaryText}` : ""}.`
3417
3655
  );
3418
3656
  }
3419
- if (contract.resolved_tests.length > 0 && contract.remaining_tests.length > 0) {
3657
+ if (contract.remaining_tests.length > 0 && (contract.resolved_tests.length > 0 || contract.remaining_mode !== "none")) {
3658
+ const summaryText = describeTargetSummary(remainingSummary);
3420
3659
  lines.push(
3421
- `- Remaining failing targets: ${formatCount(contract.remaining_tests.length, "test/module", "tests/modules")}.`
3660
+ `- Remaining failing targets: ${formatCount(contract.remaining_tests.length, "test/module", "tests/modules")}${summaryText ? ` ${summaryText}` : ""}.`
3422
3661
  );
3423
3662
  }
3424
3663
  return lines;
@@ -3490,6 +3729,13 @@ function resolveBucketFixHint(args) {
3490
3729
  return "Inspect the first visible anchor for this bucket, apply the smallest fix that explains it, then rerun the full suite at standard.";
3491
3730
  }
3492
3731
  function deriveBucketSuspectKind(args) {
3732
+ const pathCandidates = extractBucketPathCandidates({
3733
+ bucket: args.bucket,
3734
+ readTarget: args.readTarget
3735
+ });
3736
+ const hasConfigCandidate = pathCandidates.some((candidate) => isConfigPathCandidate(candidate));
3737
+ const hasAppCandidate = pathCandidates.some((candidate) => isAppPathCandidate(candidate));
3738
+ const hasTestCandidate = pathCandidates.some((candidate) => isTestPathCandidate(candidate));
3493
3739
  if (args.bucket.type === "shared_environment_blocker" || args.bucket.type === "fixture_guard_failure" || args.bucket.type === "permission_denied_failure" || args.bucket.type === "django_db_access_denied" || args.bucket.type === "network_failure" || args.bucket.type === "service_unavailable" || args.bucket.type === "db_connection_failure" || args.bucket.type === "auth_bypass_absent" || args.bucket.type === "fixture_teardown_failure") {
3494
3740
  return "environment";
3495
3741
  }
@@ -3499,6 +3745,18 @@ function deriveBucketSuspectKind(args) {
3499
3745
  if (args.bucket.type === "contract_snapshot_drift" || args.bucket.type === "snapshot_mismatch" || args.bucket.type === "flaky_test_detected" || args.bucket.type === "xfail_strict_unexpected_pass") {
3500
3746
  return "test";
3501
3747
  }
3748
+ if (args.bucket.type === "golden_output_drift") {
3749
+ if (hasConfigCandidate) {
3750
+ return "config";
3751
+ }
3752
+ if (hasAppCandidate) {
3753
+ return "app_code";
3754
+ }
3755
+ if (hasTestCandidate) {
3756
+ return "test";
3757
+ }
3758
+ return "unknown";
3759
+ }
3502
3760
  if (args.bucket.type === "xdist_worker_crash" || args.bucket.type === "timeout_failure" || args.bucket.type === "async_event_loop_failure" || args.bucket.type === "subprocess_crash_segfault" || args.bucket.type === "memory_error" || args.bucket.type === "resource_leak_warning" || args.bucket.type === "interrupted_run") {
3503
3761
  return "tooling";
3504
3762
  }
@@ -3506,11 +3764,13 @@ function deriveBucketSuspectKind(args) {
3506
3764
  return "unknown";
3507
3765
  }
3508
3766
  if (args.bucket.type === "assertion_failure" || args.bucket.type === "runtime_failure" || args.bucket.type === "type_error_failure" || args.bucket.type === "serialization_encoding_failure") {
3509
- const file = args.readTarget?.file ?? "";
3510
- if (file.startsWith("src/")) {
3767
+ if (hasConfigCandidate) {
3768
+ return "config";
3769
+ }
3770
+ if (hasAppCandidate) {
3511
3771
  return "app_code";
3512
3772
  }
3513
- if (file.startsWith("test/") || file.startsWith("tests/")) {
3773
+ if (hasTestCandidate) {
3514
3774
  return "test";
3515
3775
  }
3516
3776
  return "unknown";
@@ -3563,6 +3823,10 @@ function buildStandardBucketSupport(args) {
3563
3823
  }
3564
3824
  function renderStandard(args) {
3565
3825
  const lines = [...buildOutcomeLines(args.analysis), ...buildComparisonLines(args.contract)];
3826
+ const remainingPassLine = buildRemainingPassLine(args.contract);
3827
+ if (remainingPassLine) {
3828
+ lines.push(remainingPassLine);
3829
+ }
3566
3830
  if (args.contract.main_buckets.length > 0) {
3567
3831
  for (const bucket of args.contract.main_buckets.slice(0, 3)) {
3568
3832
  const rawBucket = args.buckets[bucket.bucket_index - 1];
@@ -3590,13 +3854,19 @@ function renderStandard(args) {
3590
3854
  }
3591
3855
  }
3592
3856
  lines.push(buildDecisionLine(args.contract));
3593
- lines.push(`- Likely owner: ${formatSuspectKindLabel(args.contract.primary_suspect_kind)}`);
3857
+ if (args.contract.main_buckets.length > 0 && args.contract.primary_suspect_kind !== "unknown") {
3858
+ lines.push(`- Likely owner: ${formatSuspectKindLabel(args.contract.primary_suspect_kind)}`);
3859
+ }
3594
3860
  lines.push(`- Next: ${args.contract.next_best_action.note}`);
3595
3861
  lines.push(buildStopSignal(args.contract));
3596
3862
  return lines.join("\n");
3597
3863
  }
3598
3864
  function renderFocused(args) {
3599
3865
  const lines = [...buildOutcomeLines(args.analysis), ...buildComparisonLines(args.contract)];
3866
+ const remainingPassLine = buildRemainingPassLine(args.contract);
3867
+ if (remainingPassLine) {
3868
+ lines.push(remainingPassLine);
3869
+ }
3600
3870
  for (const bucket of args.contract.main_buckets) {
3601
3871
  const rawBucket = args.buckets[bucket.bucket_index - 1];
3602
3872
  lines.push(
@@ -3616,6 +3886,10 @@ function renderFocused(args) {
3616
3886
  }
3617
3887
  function renderVerbose(args) {
3618
3888
  const lines = [...buildOutcomeLines(args.analysis), ...buildComparisonLines(args.contract)];
3889
+ const remainingPassLine = buildRemainingPassLine(args.contract);
3890
+ if (remainingPassLine) {
3891
+ lines.push(remainingPassLine);
3892
+ }
3619
3893
  for (const bucket of args.contract.main_buckets) {
3620
3894
  const rawBucket = args.buckets[bucket.bucket_index - 1];
3621
3895
  lines.push(
@@ -3665,7 +3939,9 @@ function buildTestStatusDiagnoseContract(args) {
3665
3939
  count: residuals.remainingFailed
3666
3940
  })
3667
3941
  ].filter((bucket) => Boolean(bucket));
3668
- const buckets = prioritizeBuckets([...combinedBuckets, ...unknownBuckets]).slice(0, 3);
3942
+ const buckets = prioritizeBuckets(
3943
+ [...combinedBuckets, ...unknownBuckets].map((bucket) => specializeBucket(bucket))
3944
+ ).slice(0, 3);
3669
3945
  const simpleCollectionFailure = args.analysis.collectionErrorCount !== void 0 && args.analysis.collectionItems.length === 0 && buckets.length === 0;
3670
3946
  const dominantBucket = buckets.map((bucket, index) => ({
3671
3947
  bucket,
@@ -3713,9 +3989,9 @@ function buildTestStatusDiagnoseContract(args) {
3713
3989
  mini_diff: extractMiniDiff(args.input, bucket)
3714
3990
  };
3715
3991
  });
3716
- const resolvedTests = unique(args.resolvedTests ?? []);
3717
- const remainingTests = unique(
3718
- args.remainingTests ?? unique([...args.analysis.visibleErrorLabels, ...args.analysis.visibleFailedLabels])
3992
+ const resolvedTests = unique2(args.resolvedTests ?? []);
3993
+ const remainingTests = unique2(
3994
+ args.remainingTests ?? unique2([...args.analysis.visibleErrorLabels, ...args.analysis.visibleFailedLabels])
3719
3995
  );
3720
3996
  const primarySuspectKind = derivePrimarySuspectKind({
3721
3997
  mainBuckets,
@@ -3765,6 +4041,7 @@ function buildTestStatusDiagnoseContract(args) {
3765
4041
  raw_needed: rawNeeded,
3766
4042
  additional_source_read_likely_low_value: diagnosisComplete && !rawNeeded,
3767
4043
  read_raw_only_if: rawNeeded ? "you still need exact traceback lines after focused or verbose detail" : null,
4044
+ remaining_mode: args.remainingMode ?? "none",
3768
4045
  dominant_blocker_bucket_index: dominantBlockerBucketIndex,
3769
4046
  primary_suspect_kind: primarySuspectKind,
3770
4047
  confidence_reason: "Unknown or low-confidence buckets remain; one deeper sift pass is justified.",
@@ -3795,7 +4072,8 @@ function buildTestStatusDiagnoseContract(args) {
3795
4072
  readTargets,
3796
4073
  hasSecondaryVisibleBucket: mainBuckets.some(
3797
4074
  (bucket) => bucket.secondary_visible_despite_blocker
3798
- )
4075
+ ),
4076
+ remainingMode: args.contractOverrides?.remaining_mode ?? baseContract.remaining_mode
3799
4077
  })
3800
4078
  }
3801
4079
  };
@@ -3860,6 +4138,7 @@ function buildTestStatusAnalysisContext(args) {
3860
4138
  `- diagnosis_complete=${args.contract.diagnosis_complete}`,
3861
4139
  `- raw_needed=${args.contract.raw_needed}`,
3862
4140
  `- decision=${args.contract.decision}`,
4141
+ `- remaining_mode=${args.contract.remaining_mode}`,
3863
4142
  `- provider_used=${args.contract.provider_used}`,
3864
4143
  `- provider_failed=${args.contract.provider_failed}`,
3865
4144
  `- raw_slice_strategy=${args.contract.raw_slice_strategy}`,
@@ -4427,7 +4706,7 @@ function detectTestRunner(input) {
4427
4706
  if (/^\s*Test Suites:\s+\d+\s+failed,\s+\d+\s+passed(?:,\s+\d+\s+total)?/m.test(input) || /^\s*Tests:\s+\d+\s+failed,\s+\d+\s+passed(?:,\s+\d+\s+total)?/m.test(input)) {
4428
4707
  return "jest";
4429
4708
  }
4430
- if (/\bpytest\b/i.test(input) || /^\s*=+.*\b\d+\s+failed\b.*=+\s*$/m.test(input) || /\bcollected\s+\d+\s+items\b/i.test(input)) {
4709
+ if (/\bpytest\b/i.test(input) || /^\s*(?:FAILED|ERROR)\s+[A-Za-z0-9_./-]+::[^\n]+$/m.test(input) || /^\s*=+.*\b\d+\s+failed\b.*=+\s*$/m.test(input) || /\bcollected\s+\d+\s+items\b/i.test(input)) {
4431
4710
  return "pytest";
4432
4711
  }
4433
4712
  return "unknown";
@@ -5632,6 +5911,9 @@ function classifyBucketTypeFromReason(reason) {
5632
5911
  if (reason.startsWith("missing module:")) {
5633
5912
  return "import_dependency_failure";
5634
5913
  }
5914
+ if (reason.startsWith("golden output drift:")) {
5915
+ return "golden_output_drift";
5916
+ }
5635
5917
  if (reason.startsWith("assertion failed:")) {
5636
5918
  return "assertion_failure";
5637
5919
  }
@@ -6888,7 +7170,7 @@ function prepareInput(raw, config) {
6888
7170
  function escapeRegExp2(value) {
6889
7171
  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
6890
7172
  }
6891
- function unique2(values) {
7173
+ function unique3(values) {
6892
7174
  return [...new Set(values)];
6893
7175
  }
6894
7176
  var genericBucketSearchTerms = /* @__PURE__ */ new Set([
@@ -6989,7 +7271,7 @@ function extractBucketSearchTerms(args) {
6989
7271
  ...args.bucket.evidence,
6990
7272
  ...args.readTargets.filter((target) => target.bucket_index === args.bucket.bucket_index).flatMap((target) => [target.context_hint.search_hint ?? "", target.file])
6991
7273
  ];
6992
- const prioritized = unique2(
7274
+ const prioritized = unique3(
6993
7275
  sources.flatMap((value) => collectCandidateSearchTerms(value)).filter(isHighSignalSearchTerm)
6994
7276
  ).sort((left, right) => {
6995
7277
  const delta = scoreSearchTerm(right) - scoreSearchTerm(left);
@@ -7001,7 +7283,7 @@ function extractBucketSearchTerms(args) {
7001
7283
  if (prioritized.length > 0) {
7002
7284
  return prioritized.slice(0, 6);
7003
7285
  }
7004
- const fallbackTerms = unique2(
7286
+ const fallbackTerms = unique3(
7005
7287
  [...args.bucket.evidence, args.bucket.root_cause].flatMap((value) => value.split(/->|:/).map((part) => normalizeSearchTerm(part))).filter(isHighSignalSearchTerm)
7006
7288
  );
7007
7289
  return fallbackTerms.slice(0, 4);
@@ -7039,7 +7321,7 @@ function buildLineWindows(args) {
7039
7321
  return [...selected].sort((left, right) => left - right).map((index) => args.lines[index]);
7040
7322
  }
7041
7323
  function buildPriorityLineGroup(args) {
7042
- return unique2([
7324
+ return unique3([
7043
7325
  ...args.indexes.map((index) => args.lines[index]).filter(Boolean),
7044
7326
  ...buildLineWindows(args)
7045
7327
  ]);
@@ -7048,7 +7330,7 @@ function collapseSelectedLines(args) {
7048
7330
  if (args.lines.length === 0) {
7049
7331
  return args.fallback();
7050
7332
  }
7051
- const joined = unique2(args.lines).join("\n").trim();
7333
+ const joined = unique3(args.lines).join("\n").trim();
7052
7334
  if (joined.length === 0) {
7053
7335
  return args.fallback();
7054
7336
  }
@@ -7199,7 +7481,7 @@ function buildTestStatusRawSlice(args) {
7199
7481
  const indexes = lines.map(
7200
7482
  (line, index) => bucketTerms.some((term) => new RegExp(escapeRegExp2(term), "i").test(line)) ? index : -1
7201
7483
  ).filter((index) => index >= 0);
7202
- return unique2([
7484
+ return unique3([
7203
7485
  ...indexes.map((index) => lines[index]).filter(Boolean),
7204
7486
  ...buildPriorityLineGroup({
7205
7487
  lines,
@@ -7242,7 +7524,7 @@ function buildTestStatusRawSlice(args) {
7242
7524
  return [
7243
7525
  buildPriorityLineGroup({
7244
7526
  lines,
7245
- indexes: unique2([...searchHintIndexes, ...fileIndexes]),
7527
+ indexes: unique3([...searchHintIndexes, ...fileIndexes]),
7246
7528
  radius,
7247
7529
  maxLines
7248
7530
  })
@@ -7261,7 +7543,7 @@ function buildTestStatusRawSlice(args) {
7261
7543
  const selected = collapseSelectedLineGroups({
7262
7544
  groups: [
7263
7545
  ...targetGroups,
7264
- unique2([
7546
+ unique3([
7265
7547
  ...summaryIndexes.map((index) => lines[index]).filter(Boolean),
7266
7548
  ...buildLineWindows({
7267
7549
  lines,
@@ -7459,6 +7741,34 @@ function hasRecognizableTestStatusSignal(input) {
7459
7741
  const analysis = analyzeTestStatus(input);
7460
7742
  return analysis.collectionErrorCount !== void 0 || analysis.noTestsCollected || analysis.interrupted || analysis.failed > 0 || analysis.errors > 0 || analysis.passed > 0 || analysis.inlineItems.length > 0 || analysis.buckets.length > 0;
7461
7743
  }
7744
+ function shouldUseCompactTestStatusBypass(args) {
7745
+ if (args.request.policyName !== "test-status") {
7746
+ return false;
7747
+ }
7748
+ if (args.request.detail && args.request.detail !== "standard") {
7749
+ return false;
7750
+ }
7751
+ if (args.request.goal === "diagnose" && args.request.format === "json") {
7752
+ return false;
7753
+ }
7754
+ if (args.request.testStatusContext?.resolvedTests?.length || args.request.testStatusContext?.remainingTests?.length || args.request.testStatusContext?.remainingSubsetAvailable || args.request.testStatusContext?.remainingMode && args.request.testStatusContext.remainingMode !== "none") {
7755
+ return false;
7756
+ }
7757
+ return args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0 || args.analysis.collectionErrorCount !== void 0 && args.analysis.collectionItems.length === 0 && args.analysis.inlineItems.length === 0 && args.analysis.buckets.length === 0 || args.analysis.noTestsCollected || args.analysis.interrupted && args.analysis.failed === 0 && args.analysis.errors === 0;
7758
+ }
7759
+ function sanitizeProviderFailureReason(reason) {
7760
+ const normalized = reason.trim();
7761
+ const httpStatus = normalized.match(/\bHTTP\s+(\d{3})\b/i)?.[1];
7762
+ if (httpStatus) {
7763
+ return `provider follow-up unavailable (HTTP ${httpStatus})`;
7764
+ }
7765
+ if (/unterminated string|invalid json|unexpected token|json at position|schema|zod|parse/i.test(
7766
+ normalized
7767
+ )) {
7768
+ return "provider follow-up returned unusable structured output";
7769
+ }
7770
+ return "provider follow-up failed";
7771
+ }
7462
7772
  function renderTestStatusDecisionOutput(args) {
7463
7773
  if (args.request.goal === "diagnose" && args.request.format === "json") {
7464
7774
  return JSON.stringify(
@@ -7480,6 +7790,7 @@ function renderTestStatusDecisionOutput(args) {
7480
7790
  return args.decision.standardText;
7481
7791
  }
7482
7792
  function buildTestStatusProviderFailureDecision(args) {
7793
+ const sanitizedReason = sanitizeProviderFailureReason(args.reason);
7483
7794
  const concreteReadTarget = args.baseDecision.contract.read_targets.find(
7484
7795
  (target) => Boolean(target.file)
7485
7796
  );
@@ -7492,6 +7803,7 @@ function buildTestStatusProviderFailureDecision(args) {
7492
7803
  analysis: args.analysis,
7493
7804
  resolvedTests: args.baseDecision.contract.resolved_tests,
7494
7805
  remainingTests: args.baseDecision.contract.remaining_tests,
7806
+ remainingMode: args.request.testStatusContext?.remainingMode,
7495
7807
  contractOverrides: {
7496
7808
  ...args.baseDecision.contract,
7497
7809
  diagnosis_complete: false,
@@ -7507,7 +7819,9 @@ function buildTestStatusProviderFailureDecision(args) {
7507
7819
  next_best_action: {
7508
7820
  code: "read_source_for_bucket",
7509
7821
  bucket_index: args.baseDecision.contract.dominant_blocker_bucket_index ?? concreteReadTarget.bucket_index,
7510
- note: `Provider follow-up failed (${args.reason}). The heuristic anchor is concrete enough to inspect source for the current bucket before reading raw traceback.`
7822
+ note: `${sanitizedReason[0].toUpperCase()}${sanitizedReason.slice(
7823
+ 1
7824
+ )}. The heuristic anchor is concrete enough to inspect source for the current bucket before reading raw traceback.`
7511
7825
  }
7512
7826
  }
7513
7827
  });
@@ -7518,6 +7832,7 @@ function buildTestStatusProviderFailureDecision(args) {
7518
7832
  analysis: args.analysis,
7519
7833
  resolvedTests: args.baseDecision.contract.resolved_tests,
7520
7834
  remainingTests: args.baseDecision.contract.remaining_tests,
7835
+ remainingMode: args.request.testStatusContext?.remainingMode,
7521
7836
  contractOverrides: {
7522
7837
  ...args.baseDecision.contract,
7523
7838
  diagnosis_complete: false,
@@ -7533,7 +7848,11 @@ function buildTestStatusProviderFailureDecision(args) {
7533
7848
  next_best_action: {
7534
7849
  code: shouldZoomFirst ? "insufficient_signal" : "read_raw_for_exact_traceback",
7535
7850
  bucket_index: args.baseDecision.contract.dominant_blocker_bucket_index ?? args.baseDecision.contract.main_buckets[0]?.bucket_index ?? null,
7536
- note: shouldZoomFirst ? `Provider follow-up failed (${args.reason}). Use one deeper sift pass on the same cached output before reading raw traceback lines.` : `Provider follow-up failed (${args.reason}). Read raw traceback only if exact stack lines are still needed.`
7851
+ note: shouldZoomFirst ? `${sanitizedReason[0].toUpperCase()}${sanitizedReason.slice(
7852
+ 1
7853
+ )}. Use one deeper sift pass on the same cached output before reading raw traceback lines.` : `${sanitizedReason[0].toUpperCase()}${sanitizedReason.slice(
7854
+ 1
7855
+ )}. Read raw traceback only if exact stack lines are still needed.`
7537
7856
  }
7538
7857
  }
7539
7858
  });
@@ -7554,23 +7873,28 @@ async function runSiftCore(request, recorder) {
7554
7873
  const provider = createProvider(request.config);
7555
7874
  const hasTestStatusSignal = request.policyName === "test-status" && hasRecognizableTestStatusSignal(heuristicInput);
7556
7875
  const testStatusAnalysis = hasTestStatusSignal ? analyzeTestStatus(heuristicInput) : null;
7557
- const testStatusDecision = hasTestStatusSignal && testStatusAnalysis ? buildTestStatusDiagnoseContract({
7876
+ const useCompactTestStatusOutput = hasTestStatusSignal && testStatusAnalysis ? shouldUseCompactTestStatusBypass({
7877
+ request,
7878
+ analysis: testStatusAnalysis
7879
+ }) : false;
7880
+ const testStatusDecision = hasTestStatusSignal && testStatusAnalysis && !useCompactTestStatusOutput ? buildTestStatusDiagnoseContract({
7558
7881
  input: heuristicInput,
7559
7882
  analysis: testStatusAnalysis,
7560
7883
  resolvedTests: request.testStatusContext?.resolvedTests,
7561
- remainingTests: request.testStatusContext?.remainingTests
7884
+ remainingTests: request.testStatusContext?.remainingTests,
7885
+ remainingMode: request.testStatusContext?.remainingMode
7562
7886
  }) : null;
7563
7887
  const testStatusHeuristicOutput = testStatusDecision ? renderTestStatusDecisionOutput({
7564
7888
  request,
7565
7889
  decision: testStatusDecision
7566
- }) : null;
7890
+ }) : useCompactTestStatusOutput ? applyHeuristicPolicy("test-status", heuristicInput, "standard") : null;
7567
7891
  if (request.config.runtime.verbose) {
7568
7892
  process.stderr.write(
7569
7893
  `${pc2.dim("sift")} provider=${provider.name} model=${request.config.provider.model} base_url=${request.config.provider.baseUrl} input_chars=${prepared.meta.finalLength}
7570
7894
  `
7571
7895
  );
7572
7896
  }
7573
- const heuristicOutput = request.policyName === "test-status" ? testStatusDecision?.contract.diagnosis_complete ? testStatusHeuristicOutput : null : applyHeuristicPolicy(request.policyName, heuristicInput, request.detail);
7897
+ const heuristicOutput = request.policyName === "test-status" ? useCompactTestStatusOutput ? testStatusHeuristicOutput : testStatusDecision?.contract.diagnosis_complete ? testStatusHeuristicOutput : null : applyHeuristicPolicy(request.policyName, heuristicInput, request.detail);
7574
7898
  if (heuristicOutput) {
7575
7899
  if (request.config.runtime.verbose) {
7576
7900
  process.stderr.write(`${pc2.dim("sift")} heuristic=${request.policyName}
@@ -7694,6 +8018,7 @@ async function runSiftCore(request, recorder) {
7694
8018
  analysis: testStatusAnalysis,
7695
8019
  resolvedTests: request.testStatusContext?.resolvedTests,
7696
8020
  remainingTests: request.testStatusContext?.remainingTests,
8021
+ remainingMode: request.testStatusContext?.remainingMode,
7697
8022
  providerBucketSupplements: supplement.bucket_supplements,
7698
8023
  contractOverrides: {
7699
8024
  diagnosis_complete: supplement.diagnosis_complete,
@@ -7939,6 +8264,7 @@ var failureBucketTypeSchema = z3.enum([
7939
8264
  "import_dependency_failure",
7940
8265
  "collection_failure",
7941
8266
  "assertion_failure",
8267
+ "golden_output_drift",
7942
8268
  "runtime_failure",
7943
8269
  "interrupted_run",
7944
8270
  "no_tests_collected",
@@ -7979,7 +8305,19 @@ var cachedPytestStateSchema = z3.object({
7979
8305
  failingNodeIds: z3.array(z3.string()),
7980
8306
  remainingNodeIds: z3.array(z3.string()).optional()
7981
8307
  }).optional();
7982
- var cachedRunSchema = z3.object({
8308
+ var testRunnerSchema = z3.enum(["pytest", "vitest", "jest", "unknown"]);
8309
+ var cachedRunnerSubsetSchema = z3.object({
8310
+ available: z3.boolean(),
8311
+ strategy: z3.enum(["pytest-node-ids", "none"]),
8312
+ baseArgv: z3.array(z3.string()).min(1).optional()
8313
+ });
8314
+ var cachedRunnerStateSchema = z3.object({
8315
+ name: testRunnerSchema,
8316
+ failingTargets: z3.array(z3.string()),
8317
+ baselineCommand: cachedCommandSchema,
8318
+ subset: cachedRunnerSubsetSchema
8319
+ });
8320
+ var cachedRunV1Schema = z3.object({
7983
8321
  version: z3.literal(1),
7984
8322
  timestamp: z3.string(),
7985
8323
  presetName: z3.literal("test-status"),
@@ -7997,6 +8335,25 @@ var cachedRunSchema = z3.object({
7997
8335
  analysis: cachedAnalysisSchema,
7998
8336
  pytest: cachedPytestStateSchema
7999
8337
  });
8338
+ var cachedRunV2Schema = z3.object({
8339
+ version: z3.literal(2),
8340
+ timestamp: z3.string(),
8341
+ presetName: z3.literal("test-status"),
8342
+ cwd: z3.string(),
8343
+ commandKey: z3.string(),
8344
+ commandPreview: z3.string(),
8345
+ command: cachedCommandSchema,
8346
+ detail: detailSchema,
8347
+ exitCode: z3.number().int(),
8348
+ rawOutput: z3.string(),
8349
+ capture: z3.object({
8350
+ originalChars: countSchema,
8351
+ truncatedApplied: z3.boolean()
8352
+ }),
8353
+ analysis: cachedAnalysisSchema,
8354
+ runner: cachedRunnerStateSchema
8355
+ });
8356
+ var cachedRunSchema = z3.discriminatedUnion("version", [cachedRunV1Schema, cachedRunV2Schema]);
8000
8357
  var MissingCachedTestStatusRunError = class extends Error {
8001
8358
  constructor() {
8002
8359
  super(
@@ -8045,6 +8402,37 @@ function isPytestExecutable(value) {
8045
8402
  function isPythonExecutable(value) {
8046
8403
  return basenameMatches(value, /^python(?:\d+(?:\.\d+)*)?(?:\.exe)?$/i);
8047
8404
  }
8405
+ function detectRunnerFromCommand(command) {
8406
+ if (!command) {
8407
+ return "unknown";
8408
+ }
8409
+ if (command.mode === "argv") {
8410
+ const [first, second, third] = command.argv;
8411
+ if (first && isPytestExecutable(first)) {
8412
+ return "pytest";
8413
+ }
8414
+ if (first && isPythonExecutable(first) && second === "-m" && third === "pytest") {
8415
+ return "pytest";
8416
+ }
8417
+ if (first && basenameMatches(first, /^vitest(?:\.exe)?$/i)) {
8418
+ return "vitest";
8419
+ }
8420
+ if (first && basenameMatches(first, /^jest(?:\.exe)?$/i)) {
8421
+ return "jest";
8422
+ }
8423
+ return "unknown";
8424
+ }
8425
+ if (/\bpython(?:\d+(?:\.\d+)*)?\s+-m\s+pytest\b|\bpytest\b/i.test(command.shellCommand)) {
8426
+ return "pytest";
8427
+ }
8428
+ if (/\bvitest\b/i.test(command.shellCommand)) {
8429
+ return "vitest";
8430
+ }
8431
+ if (/\bjest\b/i.test(command.shellCommand)) {
8432
+ return "jest";
8433
+ }
8434
+ return "unknown";
8435
+ }
8048
8436
  var shortPytestOptionsWithValue = /* @__PURE__ */ new Set([
8049
8437
  "-c",
8050
8438
  "-k",
@@ -8139,26 +8527,52 @@ function buildCachedCommand(args) {
8139
8527
  }
8140
8528
  return void 0;
8141
8529
  }
8142
- function buildFailingNodeIds(analysis) {
8530
+ function buildFailingTargets(analysis) {
8531
+ const runner = analysis.runner;
8143
8532
  const values = [];
8144
8533
  for (const value of [...analysis.visibleErrorLabels, ...analysis.visibleFailedLabels]) {
8145
- if (value.length > 0 && !values.includes(value)) {
8146
- values.push(value);
8534
+ const normalized = normalizeFailingTarget(value, runner);
8535
+ if (normalized.length > 0 && !values.includes(normalized)) {
8536
+ values.push(normalized);
8147
8537
  }
8148
8538
  }
8149
8539
  return values;
8150
8540
  }
8151
- function buildCachedPytestState(args) {
8541
+ function buildCachedRunnerState(args) {
8152
8542
  const baseArgv = args.command?.mode === "argv" && isSubsetCapablePytestArgv(args.command.argv) ? [...args.command.argv] : void 0;
8543
+ const runnerName = args.analysis.runner !== "unknown" ? args.analysis.runner : detectRunnerFromCommand(args.command);
8153
8544
  return {
8154
- subsetCapable: Boolean(baseArgv),
8155
- baseArgv,
8156
- failingNodeIds: buildFailingNodeIds(args.analysis),
8157
- remainingNodeIds: args.remainingNodeIds
8545
+ name: runnerName,
8546
+ failingTargets: buildFailingTargets(args.analysis),
8547
+ baselineCommand: args.command,
8548
+ subset: {
8549
+ available: runnerName === "pytest" && Boolean(baseArgv),
8550
+ strategy: runnerName === "pytest" && baseArgv ? "pytest-node-ids" : "none",
8551
+ ...runnerName === "pytest" && baseArgv ? { baseArgv } : {}
8552
+ }
8158
8553
  };
8159
8554
  }
8555
+ function normalizeCwd(value) {
8556
+ return path7.resolve(value).replace(/\\/g, "/");
8557
+ }
8558
+ function buildTestStatusBaselineIdentity(args) {
8559
+ const cwd = normalizeCwd(args.cwd);
8560
+ const command = args.command ?? buildCachedCommand({
8561
+ shellCommand: args.shellCommand,
8562
+ command: args.shellCommand ? void 0 : args.commandPreview?.split(" ")
8563
+ });
8564
+ const mode = command?.mode ?? (args.shellCommand ? "shell" : "argv");
8565
+ const normalizedCommand = command?.mode === "argv" ? command.argv.join("") : command?.mode === "shell" ? command.shellCommand.trim().replace(/\s+/g, " ") : (args.commandPreview ?? "").trim().replace(/\s+/g, " ");
8566
+ return [cwd, args.runner, mode, normalizedCommand].join("");
8567
+ }
8160
8568
  function buildTestStatusCommandKey(args) {
8161
- return `${args.shellCommand ? "shell" : "argv"}:${args.commandPreview}`;
8569
+ return buildTestStatusBaselineIdentity({
8570
+ cwd: args.cwd ?? process.cwd(),
8571
+ runner: args.runner ?? "unknown",
8572
+ command: args.command,
8573
+ commandPreview: args.commandPreview,
8574
+ shellCommand: args.shellCommand
8575
+ });
8162
8576
  }
8163
8577
  function snapshotTestStatusAnalysis(analysis) {
8164
8578
  return {
@@ -8184,13 +8598,22 @@ function createCachedTestStatusRun(args) {
8184
8598
  command: args.command,
8185
8599
  shellCommand: args.shellCommand
8186
8600
  });
8601
+ const runnerName = args.analysis.runner !== "unknown" ? args.analysis.runner : detectRunnerFromCommand(command);
8602
+ const commandPreview = args.commandPreview ?? args.shellCommand ?? (args.command ?? []).join(" ");
8603
+ const commandKey = args.commandKey ?? buildTestStatusBaselineIdentity({
8604
+ cwd: args.cwd,
8605
+ runner: runnerName,
8606
+ command,
8607
+ commandPreview,
8608
+ shellCommand: args.shellCommand
8609
+ });
8187
8610
  return {
8188
- version: 1,
8611
+ version: 2,
8189
8612
  timestamp: args.timestamp ?? (/* @__PURE__ */ new Date()).toISOString(),
8190
8613
  presetName: "test-status",
8191
8614
  cwd: args.cwd,
8192
- commandKey: args.commandKey,
8193
- commandPreview: args.commandPreview,
8615
+ commandKey,
8616
+ commandPreview,
8194
8617
  command,
8195
8618
  detail: args.detail,
8196
8619
  exitCode: args.exitCode,
@@ -8200,13 +8623,61 @@ function createCachedTestStatusRun(args) {
8200
8623
  truncatedApplied: args.truncatedApplied
8201
8624
  },
8202
8625
  analysis: snapshotTestStatusAnalysis(args.analysis),
8203
- pytest: buildCachedPytestState({
8626
+ runner: buildCachedRunnerState({
8204
8627
  command,
8205
- analysis: args.analysis,
8206
- remainingNodeIds: args.remainingNodeIds
8628
+ analysis: args.analysis
8207
8629
  })
8208
8630
  };
8209
8631
  }
8632
+ function migrateCachedTestStatusRun(state) {
8633
+ if (state.version === 2) {
8634
+ return state;
8635
+ }
8636
+ const runnerFromOutput = detectTestRunner(state.rawOutput);
8637
+ const runner = runnerFromOutput !== "unknown" ? runnerFromOutput : detectRunnerFromCommand(state.command);
8638
+ const storedCommand = state.command;
8639
+ const fallbackBaseArgv = !storedCommand && state.pytest?.baseArgv ? {
8640
+ mode: "argv",
8641
+ argv: [...state.pytest.baseArgv]
8642
+ } : void 0;
8643
+ const baselineCommand = storedCommand ?? fallbackBaseArgv;
8644
+ const commandPreview = state.commandPreview ?? (baselineCommand?.mode === "argv" ? baselineCommand.argv.join(" ") : baselineCommand?.mode === "shell" ? baselineCommand.shellCommand : "");
8645
+ const commandKey = buildTestStatusBaselineIdentity({
8646
+ cwd: state.cwd,
8647
+ runner,
8648
+ command: baselineCommand,
8649
+ commandPreview
8650
+ });
8651
+ return {
8652
+ version: 2,
8653
+ timestamp: state.timestamp,
8654
+ presetName: state.presetName,
8655
+ cwd: state.cwd,
8656
+ commandKey,
8657
+ commandPreview,
8658
+ command: state.command,
8659
+ detail: state.detail,
8660
+ exitCode: state.exitCode,
8661
+ rawOutput: state.rawOutput,
8662
+ capture: state.capture,
8663
+ analysis: state.analysis,
8664
+ runner: {
8665
+ name: runner,
8666
+ failingTargets: [...new Set((state.pytest?.failingNodeIds ?? []).map(
8667
+ (target) => normalizeFailingTarget(target, runner)
8668
+ ))],
8669
+ baselineCommand,
8670
+ subset: {
8671
+ available: runner === "pytest" && Boolean(state.pytest?.baseArgv),
8672
+ strategy: runner === "pytest" && state.pytest?.baseArgv ? "pytest-node-ids" : "none",
8673
+ ...runner === "pytest" && state.pytest?.baseArgv ? {
8674
+ baseArgv: [...state.pytest.baseArgv]
8675
+ } : {}
8676
+ }
8677
+ },
8678
+ ...fallbackBaseArgv ? { runnerMigrationFallbackUsed: true } : {}
8679
+ };
8680
+ }
8210
8681
  function readCachedTestStatusRun(statePath = getDefaultTestStatusStatePath()) {
8211
8682
  let raw = "";
8212
8683
  try {
@@ -8218,7 +8689,7 @@ function readCachedTestStatusRun(statePath = getDefaultTestStatusStatePath()) {
8218
8689
  throw new InvalidCachedTestStatusRunError();
8219
8690
  }
8220
8691
  try {
8221
- return cachedRunSchema.parse(JSON.parse(raw));
8692
+ return migrateCachedTestStatusRun(cachedRunSchema.parse(JSON.parse(raw)));
8222
8693
  } catch {
8223
8694
  throw new InvalidCachedTestStatusRunError();
8224
8695
  }
@@ -8247,7 +8718,7 @@ function getNextEscalationDetail(detail) {
8247
8718
  return null;
8248
8719
  }
8249
8720
  function buildTargetDelta(args) {
8250
- if (args.previous.presetName !== "test-status" || args.current.presetName !== "test-status" || args.previous.cwd !== args.current.cwd || args.previous.commandKey !== args.current.commandKey) {
8721
+ if (args.previous.presetName !== "test-status" || args.current.presetName !== "test-status" || args.previous.cwd !== args.current.cwd || args.previous.commandKey !== args.current.commandKey || args.previous.runner.name !== args.current.runner.name || args.previous.runner.name === "unknown") {
8251
8722
  return {
8252
8723
  comparable: false,
8253
8724
  resolved: [],
@@ -8255,16 +8726,8 @@ function buildTargetDelta(args) {
8255
8726
  introduced: []
8256
8727
  };
8257
8728
  }
8258
- if (!args.previous.pytest || !args.current.pytest) {
8259
- return {
8260
- comparable: false,
8261
- resolved: [],
8262
- remaining: [],
8263
- introduced: []
8264
- };
8265
- }
8266
- const previousTargets = args.previous.pytest.failingNodeIds;
8267
- const currentTargets = args.current.pytest.failingNodeIds;
8729
+ const previousTargets = args.previous.runner.failingTargets;
8730
+ const currentTargets = args.current.runner.failingTargets;
8268
8731
  const currentTargetSet = new Set(currentTargets);
8269
8732
  const previousTargetSet = new Set(previousTargets);
8270
8733
  return {
@@ -8277,8 +8740,11 @@ function buildTargetDelta(args) {
8277
8740
  function diffTestStatusTargets(args) {
8278
8741
  return buildTargetDelta(args);
8279
8742
  }
8743
+ function isRemainingSubsetAvailable(state) {
8744
+ return state.runner.name === "pytest" && state.runner.subset.available;
8745
+ }
8280
8746
  function getRemainingPytestNodeIds(state) {
8281
- return state.pytest?.remainingNodeIds ?? state.pytest?.failingNodeIds ?? [];
8747
+ return state.runner.name === "pytest" ? state.runner.failingTargets : [];
8282
8748
  }
8283
8749
  function diffTestStatusRuns(args) {
8284
8750
  const targetDelta = buildTargetDelta(args);
@@ -8289,21 +8755,45 @@ function diffTestStatusRuns(args) {
8289
8755
  args.current.analysis.buckets.map((bucket) => [buildBucketSignature(bucket), bucket])
8290
8756
  );
8291
8757
  const lines = [];
8292
- if (targetDelta.resolved.length > 0) {
8293
- lines.push(
8294
- `- Resolved: ${formatCount3(targetDelta.resolved.length, "failing test/module", "failing tests/modules")} no longer appear${appendPreview(targetDelta.resolved)}.`
8295
- );
8296
- }
8297
- if (targetDelta.remaining.length > 0) {
8298
- lines.push(
8299
- `- Remaining: ${formatCount3(targetDelta.remaining.length, "failing test/module", "failing tests/modules")} still appear${appendPreview(targetDelta.remaining)}.`
8300
- );
8301
- }
8302
- if (targetDelta.introduced.length > 0) {
8758
+ const resolvedSummary = buildTestTargetSummary(targetDelta.resolved);
8759
+ const remainingSummary = buildTestTargetSummary(targetDelta.remaining);
8760
+ const introducedSummary = buildTestTargetSummary(targetDelta.introduced);
8761
+ const pushTargetLine = (args2) => {
8762
+ if (args2.summary.count === 0) {
8763
+ return;
8764
+ }
8765
+ const summaryText = describeTargetSummary(args2.summary);
8766
+ if (summaryText) {
8767
+ lines.push(
8768
+ `- ${args2.kind}: ${formatCount3(args2.summary.count, args2.countLabel, `${args2.countLabel}s`)} ${args2.verb} ${summaryText}.`
8769
+ );
8770
+ return;
8771
+ }
8303
8772
  lines.push(
8304
- `- New: ${formatCount3(targetDelta.introduced.length, "failing test/module", "failing tests/modules")} appeared${appendPreview(targetDelta.introduced)}.`
8773
+ `- ${args2.kind}: ${formatCount3(args2.summary.count, args2.countLabel, `${args2.countLabel}s`)} ${args2.verb}${appendPreview(args2.fallbackValues)}.`
8305
8774
  );
8306
- }
8775
+ };
8776
+ pushTargetLine({
8777
+ kind: "Resolved",
8778
+ summary: resolvedSummary,
8779
+ countLabel: "failing target",
8780
+ fallbackValues: targetDelta.resolved,
8781
+ verb: "no longer appear"
8782
+ });
8783
+ pushTargetLine({
8784
+ kind: "Remaining",
8785
+ summary: remainingSummary,
8786
+ countLabel: "failing target",
8787
+ fallbackValues: targetDelta.remaining,
8788
+ verb: "still appear"
8789
+ });
8790
+ pushTargetLine({
8791
+ kind: "New",
8792
+ summary: introducedSummary,
8793
+ countLabel: "failing target",
8794
+ fallbackValues: targetDelta.introduced,
8795
+ verb: "appeared"
8796
+ });
8307
8797
  for (const bucket of args.current.analysis.buckets) {
8308
8798
  const signature = buildBucketSignature(bucket);
8309
8799
  const previous = previousBuckets.get(signature);
@@ -8331,19 +8821,19 @@ function diffTestStatusRuns(args) {
8331
8821
  }
8332
8822
  }
8333
8823
  return {
8334
- lines: lines.slice(0, 4),
8335
- remainingNodeIds: targetDelta.comparable ? targetDelta.remaining : void 0
8824
+ lines: lines.slice(0, 4)
8336
8825
  };
8337
8826
  }
8338
8827
  function getCachedRerunCommand(state) {
8339
- if (state.command?.mode === "argv") {
8828
+ const baselineCommand = state.runner.baselineCommand ?? state.command;
8829
+ if (baselineCommand?.mode === "argv") {
8340
8830
  return {
8341
- command: [...state.command.argv]
8831
+ command: [...baselineCommand.argv]
8342
8832
  };
8343
8833
  }
8344
- if (state.command?.mode === "shell") {
8834
+ if (baselineCommand?.mode === "shell") {
8345
8835
  return {
8346
- shellCommand: state.command.shellCommand
8836
+ shellCommand: baselineCommand.shellCommand
8347
8837
  };
8348
8838
  }
8349
8839
  throw new Error(
@@ -8351,13 +8841,13 @@ function getCachedRerunCommand(state) {
8351
8841
  );
8352
8842
  }
8353
8843
  function getRemainingPytestRerunCommand(state) {
8354
- if (!state.pytest?.subsetCapable || !state.pytest.baseArgv) {
8844
+ if (!isRemainingSubsetAvailable(state) || !state.runner.subset.baseArgv) {
8355
8845
  throw new Error(
8356
8846
  "Cached test-status run cannot use `sift rerun --remaining`. Automatic remaining-subset reruns currently support only argv-mode `pytest ...` or `python -m pytest ...` commands. Run a narrowed command manually with `sift exec --preset test-status -- <narrowed pytest command>`."
8357
8847
  );
8358
8848
  }
8359
8849
  const remainingNodeIds = getRemainingPytestNodeIds(state);
8360
- return [...state.pytest.baseArgv, ...remainingNodeIds];
8850
+ return [...state.runner.subset.baseArgv, ...remainingNodeIds];
8361
8851
  }
8362
8852
 
8363
8853
  // src/core/escalate.ts
@@ -8405,7 +8895,8 @@ async function runEscalate(request) {
8405
8895
  outputContract: request.outputContract,
8406
8896
  fallbackJson: request.fallbackJson,
8407
8897
  testStatusContext: {
8408
- remainingSubsetAvailable: Boolean(state.pytest?.subsetCapable) && (state.pytest?.failingNodeIds.length ?? 0) > 0
8898
+ remainingSubsetAvailable: isRemainingSubsetAvailable(state) && state.runner.failingTargets.length > 0,
8899
+ remainingMode: "none"
8409
8900
  }
8410
8901
  });
8411
8902
  let output = result.output;
@@ -8619,8 +9110,9 @@ async function runTestStatusWatch(request, cycles) {
8619
9110
  testStatusContext: {
8620
9111
  ...request.testStatusContext,
8621
9112
  resolvedTests: targetDelta?.resolved ?? request.testStatusContext?.resolvedTests,
8622
- remainingTests: targetDelta?.remaining ?? currentRun.pytest?.failingNodeIds ?? request.testStatusContext?.remainingTests,
8623
- remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable ?? (Boolean(currentRun.pytest?.subsetCapable) && (currentRun.pytest?.failingNodeIds.length ?? 0) > 0)
9113
+ remainingTests: targetDelta?.remaining ?? currentRun.runner.failingTargets ?? request.testStatusContext?.remainingTests,
9114
+ remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable ?? (isRemainingSubsetAvailable(currentRun) && currentRun.runner.failingTargets.length > 0),
9115
+ remainingMode: request.testStatusContext?.remainingMode ?? "none"
8624
9116
  }
8625
9117
  });
8626
9118
  if (request.goal === "diagnose" && request.format === "json") {
@@ -8767,8 +9259,10 @@ async function runExec(request) {
8767
9259
  const shellPath = process.env.SHELL || "/bin/bash";
8768
9260
  const commandPreview = buildCommandPreview(request);
8769
9261
  const commandCwd = request.cwd ?? process.cwd();
8770
- const shouldCacheTestStatusBase = request.presetName === "test-status" && !request.skipCacheWrite;
8771
- const previousCachedRun = shouldCacheTestStatusBase ? tryReadCachedTestStatusRun() : null;
9262
+ const isTestStatusPreset = request.presetName === "test-status";
9263
+ const readCachedBaseline = isTestStatusPreset && (request.readCachedBaseline ?? true);
9264
+ const writeCachedBaselineRequested = isTestStatusPreset && (request.writeCachedBaseline ?? (request.skipCacheWrite ? false : true));
9265
+ const previousCachedRun = readCachedBaseline ? tryReadCachedTestStatusRun() : null;
8772
9266
  if (request.config.runtime.verbose) {
8773
9267
  process.stderr.write(
8774
9268
  `${pc5.dim("sift")} exec mode=${hasShellCommand ? "shell" : "argv"} command=${commandPreview}
@@ -8826,7 +9320,8 @@ async function runExec(request) {
8826
9320
  const capturedOutput = capture.render();
8827
9321
  const autoWatchDetected = !request.watch && looksLikeWatchStream(capturedOutput);
8828
9322
  const useWatchFlow = Boolean(request.watch) || autoWatchDetected;
8829
- const shouldCacheTestStatus = shouldCacheTestStatusBase && !useWatchFlow;
9323
+ const shouldBuildTestStatusState = isTestStatusPreset && !useWatchFlow;
9324
+ const shouldWriteCachedBaseline = writeCachedBaselineRequested && !useWatchFlow;
8830
9325
  if (request.config.runtime.verbose) {
8831
9326
  process.stderr.write(
8832
9327
  `${pc5.dim("sift")} child_exit=${exitCode} captured_chars=${capture.getTotalChars()} capture_truncated=${capture.wasTruncated()}
@@ -8889,10 +9384,19 @@ async function runExec(request) {
8889
9384
  `);
8890
9385
  return exitCode;
8891
9386
  }
8892
- const analysis = shouldCacheTestStatus ? analyzeTestStatus(capturedOutput) : null;
8893
- let currentCachedRun = shouldCacheTestStatus && analysis ? createCachedTestStatusRun({
9387
+ const analysis = shouldBuildTestStatusState ? analyzeTestStatus(capturedOutput) : null;
9388
+ let currentCachedRun = shouldBuildTestStatusState && analysis ? createCachedTestStatusRun({
8894
9389
  cwd: commandCwd,
8895
9390
  commandKey: buildTestStatusCommandKey({
9391
+ cwd: commandCwd,
9392
+ runner: analysis.runner,
9393
+ command: Array.isArray(request.command) && request.command.length > 0 ? {
9394
+ mode: "argv",
9395
+ argv: [...request.command]
9396
+ } : request.shellCommand ? {
9397
+ mode: "shell",
9398
+ shellCommand: request.shellCommand
9399
+ } : void 0,
8896
9400
  commandPreview,
8897
9401
  shellCommand: request.shellCommand
8898
9402
  }),
@@ -8906,31 +9410,32 @@ async function runExec(request) {
8906
9410
  truncatedApplied: capture.wasTruncated(),
8907
9411
  analysis
8908
9412
  }) : null;
8909
- const targetDelta = request.diff && !request.dryRun && previousCachedRun && currentCachedRun ? diffTestStatusTargets({
9413
+ const targetDelta = (request.diff || request.testStatusContext?.remainingMode === "subset_rerun" || request.testStatusContext?.remainingMode === "full_rerun_diff") && !request.dryRun && previousCachedRun && currentCachedRun ? diffTestStatusTargets({
8910
9414
  previous: previousCachedRun,
8911
9415
  current: currentCachedRun
8912
9416
  }) : null;
8913
9417
  const result = await runSiftWithStats({
8914
9418
  ...request,
8915
9419
  stdin: capturedOutput,
8916
- analysisContext: request.skipCacheWrite && request.presetName === "test-status" ? [
9420
+ analysisContext: request.testStatusContext?.remainingMode && request.testStatusContext.remainingMode !== "none" && request.presetName === "test-status" ? [
8917
9421
  request.analysisContext,
8918
9422
  "Zoom context:",
8919
9423
  "- This pass is remaining-only.",
8920
9424
  "- The full-suite truth already exists from the cached full run.",
8921
9425
  "- Do not reintroduce resolved tests into the diagnosis."
8922
9426
  ].filter((value) => Boolean(value)).join("\n") : request.analysisContext,
8923
- testStatusContext: shouldCacheTestStatus && analysis ? {
9427
+ testStatusContext: shouldBuildTestStatusState && analysis ? {
8924
9428
  ...request.testStatusContext,
8925
9429
  resolvedTests: targetDelta?.resolved ?? request.testStatusContext?.resolvedTests,
8926
- remainingTests: targetDelta?.remaining ?? currentCachedRun?.pytest?.failingNodeIds ?? request.testStatusContext?.remainingTests,
9430
+ remainingTests: targetDelta?.remaining ?? currentCachedRun?.runner.failingTargets ?? request.testStatusContext?.remainingTests,
8927
9431
  remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable ?? Boolean(
8928
- currentCachedRun?.pytest?.subsetCapable && (targetDelta?.remaining ?? currentCachedRun?.pytest?.failingNodeIds ?? []).length > 0
8929
- )
9432
+ currentCachedRun && isRemainingSubsetAvailable(currentCachedRun) && (targetDelta?.remaining ?? currentCachedRun?.runner.failingTargets ?? []).length > 0
9433
+ ),
9434
+ remainingMode: request.testStatusContext?.remainingMode ?? "none"
8930
9435
  } : request.testStatusContext
8931
9436
  });
8932
9437
  let output = result.output;
8933
- if (shouldCacheTestStatus) {
9438
+ if (shouldBuildTestStatusState) {
8934
9439
  if (isInsufficientSignalOutput(output)) {
8935
9440
  output = buildInsufficientSignalOutput({
8936
9441
  presetName: request.presetName,
@@ -8945,26 +9450,12 @@ async function runExec(request) {
8945
9450
  previous: previousCachedRun,
8946
9451
  current: currentCachedRun
8947
9452
  });
8948
- currentCachedRun = createCachedTestStatusRun({
8949
- cwd: commandCwd,
8950
- commandKey: currentCachedRun.commandKey,
8951
- commandPreview,
8952
- command: request.command,
8953
- shellCommand: request.shellCommand,
8954
- detail: request.detail ?? "standard",
8955
- exitCode,
8956
- rawOutput: capturedOutput,
8957
- originalChars: capture.getTotalChars(),
8958
- truncatedApplied: capture.wasTruncated(),
8959
- analysis,
8960
- remainingNodeIds: delta.remainingNodeIds
8961
- });
8962
9453
  if (delta.lines.length > 0) {
8963
9454
  output = `${delta.lines.join("\n")}
8964
9455
  ${output}`;
8965
9456
  }
8966
9457
  }
8967
- if (currentCachedRun) {
9458
+ if (currentCachedRun && shouldWriteCachedBaseline) {
8968
9459
  try {
8969
9460
  writeCachedTestStatusRun(currentCachedRun);
8970
9461
  } catch (error) {
@@ -9011,25 +9502,60 @@ async function runRerun(request) {
9011
9502
  diff: true,
9012
9503
  presetName: "test-status",
9013
9504
  detail: "standard",
9014
- showRaw: false
9505
+ showRaw: false,
9506
+ readCachedBaseline: true,
9507
+ writeCachedBaseline: true,
9508
+ testStatusContext: {
9509
+ ...request.testStatusContext,
9510
+ remainingMode: "none"
9511
+ }
9015
9512
  });
9016
9513
  }
9017
- const remainingNodeIds = getRemainingPytestNodeIds(state);
9018
- if (remainingNodeIds.length === 0) {
9019
- process.stdout.write("No remaining failing pytest targets.\n");
9020
- return 0;
9514
+ if (state.runner.name === "pytest") {
9515
+ const remainingNodeIds = getRemainingPytestNodeIds(state);
9516
+ if (remainingNodeIds.length === 0) {
9517
+ process.stdout.write("No remaining failing pytest targets.\n");
9518
+ return 0;
9519
+ }
9520
+ return runExec({
9521
+ ...request,
9522
+ command: getRemainingPytestRerunCommand(state),
9523
+ cwd: state.cwd,
9524
+ diff: false,
9525
+ presetName: "test-status",
9526
+ readCachedBaseline: true,
9527
+ writeCachedBaseline: false,
9528
+ testStatusContext: {
9529
+ ...request.testStatusContext,
9530
+ remainingSubsetAvailable: isRemainingSubsetAvailable(state),
9531
+ remainingMode: "subset_rerun"
9532
+ }
9533
+ });
9021
9534
  }
9022
- return runExec({
9023
- ...request,
9024
- command: getRemainingPytestRerunCommand(state),
9025
- cwd: state.cwd,
9026
- diff: false,
9027
- presetName: "test-status",
9028
- skipCacheWrite: true,
9029
- testStatusContext: {
9030
- remainingSubsetAvailable: true
9535
+ if (state.runner.name === "vitest" || state.runner.name === "jest") {
9536
+ if (!state.runner.baselineCommand || state.runnerMigrationFallbackUsed) {
9537
+ throw new Error(
9538
+ "Cached test-status run cannot use `sift rerun --remaining` yet because the original full command is unavailable from cache. Refresh the baseline with `sift exec --preset test-status -- <test command>` and retry."
9539
+ );
9031
9540
  }
9032
- });
9541
+ return runExec({
9542
+ ...request,
9543
+ ...getCachedRerunCommand(state),
9544
+ cwd: state.cwd,
9545
+ diff: false,
9546
+ presetName: "test-status",
9547
+ readCachedBaseline: true,
9548
+ writeCachedBaseline: false,
9549
+ testStatusContext: {
9550
+ ...request.testStatusContext,
9551
+ remainingSubsetAvailable: false,
9552
+ remainingMode: "full_rerun_diff"
9553
+ }
9554
+ });
9555
+ }
9556
+ throw new Error(
9557
+ "Cached test-status run cannot use `sift rerun --remaining` for this runner. Refresh with `sift exec --preset test-status -- <test command>` or rerun a narrowed command manually."
9558
+ );
9033
9559
  }
9034
9560
 
9035
9561
  // src/core/stdin.ts
@@ -9542,8 +10068,14 @@ function createCliApp(args = {}) {
9542
10068
  });
9543
10069
  });
9544
10070
  applySharedOptions(
9545
- cli.command("rerun", "Rerun the cached test-status command or only the remaining pytest subset")
9546
- ).usage("rerun [options]").example("rerun").example("rerun --remaining").example("rerun --remaining --detail focused").example("rerun --remaining --detail verbose --show-raw").option("--remaining", "Rerun only the remaining failing pytest node IDs from the cached full run").action(async (options) => {
10071
+ cli.command(
10072
+ "rerun",
10073
+ "Rerun the cached test-status command or focus on what still fails from the cached baseline"
10074
+ )
10075
+ ).usage("rerun [options]").example("rerun").example("rerun --remaining").example("rerun --remaining --detail focused").example("rerun --remaining --detail verbose --show-raw").option(
10076
+ "--remaining",
10077
+ "Focus on what still fails from the cached baseline; narrows automatically for pytest and diffs a full rerun for vitest/jest"
10078
+ ).action(async (options) => {
9547
10079
  const remaining = Boolean(options.remaining);
9548
10080
  if (!remaining && Boolean(options.showRaw)) {
9549
10081
  throw new Error("--show-raw is supported only with `sift rerun --remaining`.");