@bilalimamoglu/sift 0.3.3 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1262,7 +1262,8 @@ function renderInstructionBody() {
1262
1262
  "- After making or planning a fix, refresh the truth with `sift rerun` so the same full suite runs again at `standard` and shows what is resolved or still remaining.",
1263
1263
  "- The normal stop budget is `standard` first, then at most one zoom step before raw.",
1264
1264
  "- Only if more detail is still needed after `sift rerun`, use `sift rerun --remaining --detail focused`, then `sift rerun --remaining --detail verbose`, then `sift rerun --remaining --detail verbose --show-raw`.",
1265
- "- `sift rerun --remaining` currently supports only argv-mode `pytest ...` or `python -m pytest ...` runs; otherwise rerun a narrowed command manually with `sift exec --preset test-status -- <narrowed pytest command>`.",
1265
+ "- `sift rerun --remaining` narrows automatically for `pytest` and reruns the full original command for `vitest` and `jest` while keeping the diagnosis focused on what still fails.",
1266
+ "- For other runners, rerun a narrowed command manually with `sift exec --preset test-status -- <narrowed test command>` if you need a smaller surface.",
1266
1267
  "- Start with `standard` text. Use diagnose JSON only when automation or machine branching truly needs it.",
1267
1268
  "- If `standard` already shows bucket-level root cause, anchor, and fix lines, trust it and report from it directly.",
1268
1269
  "- In that case, do not re-verify the same bucket with raw pytest; at most do one targeted source read before you edit.",
@@ -2124,7 +2125,125 @@ function createProvider(config) {
2124
2125
 
2125
2126
  // src/core/testStatusDecision.ts
2126
2127
  import { z as z2 } from "zod";
2127
- var TEST_STATUS_DIAGNOSE_JSON_CONTRACT = '{"status":"ok|insufficient","diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","primary_suspect_kind":"test|app_code|config|environment|tooling|unknown","confidence_reason":string,"dominant_blocker_bucket_index":number|null,"provider_used":boolean,"provider_confidence":number|null,"provider_failed":boolean,"raw_slice_used":boolean,"raw_slice_strategy":"none|bucket_evidence|traceback_window|head_tail","resolved_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_subset_available":boolean,"main_buckets":[{"bucket_index":number,"label":string,"count":number,"root_cause":string,"suspect_kind":"test|app_code|config|environment|tooling|unknown","fix_hint":string,"evidence":string[],"bucket_confidence":number,"root_cause_confidence":number,"dominant":boolean,"secondary_visible_despite_blocker":boolean,"mini_diff":{"added_paths"?:number,"removed_models"?:number,"changed_task_mappings"?:number}|null}],"read_targets":[{"file":string,"line":number|null,"why":string,"bucket_index":number,"context_hint":{"start_line":number|null,"end_line":number|null,"search_hint":string|null}}],"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string},"resolved_tests"?:string[],"remaining_tests"?:string[]}';
2128
+
2129
+ // src/core/testStatusTargets.ts
2130
+ function unique(values) {
2131
+ return [...new Set(values)];
2132
+ }
2133
+ function normalizeTestId(value) {
2134
+ return value.replace(/\\/g, "/").replace(/\s+/g, " ").trim();
2135
+ }
2136
+ function stripMatcherProse(value) {
2137
+ return value.replace(/\s+-\s+.*$/, "").trim();
2138
+ }
2139
+ function extractJsFile(value) {
2140
+ const match = value.match(/([A-Za-z0-9_./-]+\.(?:test|spec)\.[cm]?[jt]sx?)/i);
2141
+ return match ? normalizeTestId(match[1]) : null;
2142
+ }
2143
+ function normalizeFailingTarget(label, runner) {
2144
+ const normalized = normalizeTestId(label).replace(/^['"]|['"]$/g, "");
2145
+ if (runner === "pytest") {
2146
+ return stripMatcherProse(normalized);
2147
+ }
2148
+ if (runner === "vitest" || runner === "jest") {
2149
+ const compact = normalized.replace(/^FAIL\s+/i, "").replace(/^[❯×]\s*/, "").replace(/\s+\[[^\]]+\]\s*$/, "").trim();
2150
+ const file = extractJsFile(compact);
2151
+ if (!file) {
2152
+ return stripMatcherProse(compact);
2153
+ }
2154
+ const fileIndex = compact.indexOf(file);
2155
+ const suffix = compact.slice(fileIndex + file.length).trim();
2156
+ if (!suffix) {
2157
+ return file;
2158
+ }
2159
+ if (suffix.startsWith(">")) {
2160
+ const testName = stripMatcherProse(suffix.replace(/^>\s*/, ""));
2161
+ return testName.length > 0 ? `${file} > ${testName}` : file;
2162
+ }
2163
+ return file;
2164
+ }
2165
+ return normalized;
2166
+ }
2167
+ function extractFamilyPrefix(value) {
2168
+ const normalized = normalizeTestId(value);
2169
+ const filePart = normalized.split("::")[0]?.split(" > ")[0]?.trim() ?? normalized;
2170
+ const workflowMatch = filePart.match(/^(\.github\/workflows\/)/);
2171
+ if (workflowMatch) {
2172
+ return workflowMatch[1];
2173
+ }
2174
+ const testsMatch = filePart.match(/^((?:test|tests)\/[^/]+\/)/);
2175
+ if (testsMatch) {
2176
+ return testsMatch[1];
2177
+ }
2178
+ const srcMatch = filePart.match(/^(src\/[^/]+\/)/);
2179
+ if (srcMatch) {
2180
+ return srcMatch[1];
2181
+ }
2182
+ const configMatch = filePart.match(
2183
+ /^((?:[^/]+\/)*(?:package\.json|pytest\.ini|pyproject\.toml|tox\.ini|conftest\.py|(?:vitest|jest)\.config\.[^/]+|tsconfig(?:\.[^/]+)?\.json|[^/]*config[^/]*\.(?:json|ya?ml)))$/i
2184
+ );
2185
+ if (configMatch) {
2186
+ return configMatch[1];
2187
+ }
2188
+ const segments = filePart.replace(/^\/+/, "").split("/").filter(Boolean);
2189
+ if (segments.length >= 2) {
2190
+ return `${segments[0]}/${segments[1]}/`;
2191
+ }
2192
+ if (segments.length === 1) {
2193
+ return segments[0];
2194
+ }
2195
+ return "other";
2196
+ }
2197
+ function buildTestTargetSummary(values) {
2198
+ const uniqueValues = unique(values);
2199
+ const counts = /* @__PURE__ */ new Map();
2200
+ for (const value of uniqueValues) {
2201
+ const prefix = extractFamilyPrefix(value);
2202
+ counts.set(prefix, (counts.get(prefix) ?? 0) + 1);
2203
+ }
2204
+ const families = [...counts.entries()].map(([prefix, count]) => ({
2205
+ prefix,
2206
+ count
2207
+ })).sort((left, right) => {
2208
+ if (right.count !== left.count) {
2209
+ return right.count - left.count;
2210
+ }
2211
+ return left.prefix.localeCompare(right.prefix);
2212
+ }).slice(0, 5);
2213
+ return {
2214
+ count: uniqueValues.length,
2215
+ families
2216
+ };
2217
+ }
2218
+ function formatTargetSummary(summary) {
2219
+ if (summary.count === 0) {
2220
+ return "count=0";
2221
+ }
2222
+ const families = summary.families.length > 0 ? summary.families.map((family) => `${family.prefix}${family.count}`).join(", ") : "none";
2223
+ return `count=${summary.count}; families=${families}`;
2224
+ }
2225
+ function joinFamilies(families) {
2226
+ if (families.length === 0) {
2227
+ return "";
2228
+ }
2229
+ if (families.length === 1) {
2230
+ return families[0];
2231
+ }
2232
+ if (families.length === 2) {
2233
+ return `${families[0]} and ${families[1]}`;
2234
+ }
2235
+ return `${families.slice(0, -1).join(", ")}, and ${families.at(-1)}`;
2236
+ }
2237
+ function describeTargetSummary(summary) {
2238
+ if (summary.count === 0 || summary.families.length === 0) {
2239
+ return null;
2240
+ }
2241
+ const families = summary.families.map((family) => `${family.prefix} (${family.count})`);
2242
+ return `across ${joinFamilies(families)}`;
2243
+ }
2244
+
2245
+ // src/core/testStatusDecision.ts
2246
+ var TEST_STATUS_DIAGNOSE_JSON_CONTRACT = '{"status":"ok|insufficient","diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","remaining_mode":"none|subset_rerun|full_rerun_diff","primary_suspect_kind":"test|app_code|config|environment|tooling|unknown","confidence_reason":string,"dominant_blocker_bucket_index":number|null,"provider_used":boolean,"provider_confidence":number|null,"provider_failed":boolean,"raw_slice_used":boolean,"raw_slice_strategy":"none|bucket_evidence|traceback_window|head_tail","resolved_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_subset_available":boolean,"main_buckets":[{"bucket_index":number,"label":string,"count":number,"root_cause":string,"suspect_kind":"test|app_code|config|environment|tooling|unknown","fix_hint":string,"evidence":string[],"bucket_confidence":number,"root_cause_confidence":number,"dominant":boolean,"secondary_visible_despite_blocker":boolean,"mini_diff":{"added_paths"?:number,"removed_models"?:number,"changed_task_mappings"?:number}|null}],"read_targets":[{"file":string,"line":number|null,"why":string,"bucket_index":number,"context_hint":{"start_line":number|null,"end_line":number|null,"search_hint":string|null}}],"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string},"resolved_tests"?:string[],"remaining_tests"?:string[]}';
2128
2247
  var TEST_STATUS_PROVIDER_SUPPLEMENT_JSON_CONTRACT = '{"diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","provider_confidence":number|null,"bucket_supplements":[{"label":string,"count":number,"root_cause":string,"anchor":{"file":string|null,"line":number|null,"search_hint":string|null},"fix_hint":string|null,"confidence":number}],"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string}}';
2129
2248
  var nextBestActionSchema = z2.object({
2130
2249
  code: z2.enum([
@@ -2166,6 +2285,7 @@ var testStatusDiagnoseContractSchema = z2.object({
2166
2285
  additional_source_read_likely_low_value: z2.boolean(),
2167
2286
  read_raw_only_if: z2.string().nullable(),
2168
2287
  decision: z2.enum(["stop", "zoom", "read_source", "read_raw"]),
2288
+ remaining_mode: z2.enum(["none", "subset_rerun", "full_rerun_diff"]),
2169
2289
  primary_suspect_kind: z2.enum([
2170
2290
  "test",
2171
2291
  "app_code",
@@ -2499,54 +2619,127 @@ function extractReasonDetail(reason, prefix) {
2499
2619
  function formatCount(count, singular, plural = `${singular}s`) {
2500
2620
  return `${count} ${count === 1 ? singular : plural}`;
2501
2621
  }
2502
- function unique(values) {
2622
+ function unique2(values) {
2503
2623
  return [...new Set(values)];
2504
2624
  }
2505
- function normalizeTestId(value) {
2625
+ function normalizeTestId2(value) {
2506
2626
  return value.replace(/\\/g, "/").trim();
2507
2627
  }
2508
- function extractTestFamilyPrefix(value) {
2509
- const normalized = normalizeTestId(value);
2510
- const testsMatch = normalized.match(/^(tests\/[^/]+\/)/);
2511
- if (testsMatch) {
2512
- return testsMatch[1];
2628
+ function normalizePathCandidate(value) {
2629
+ if (!value) {
2630
+ return null;
2513
2631
  }
2514
- const filePart = normalized.split("::")[0]?.trim() ?? "";
2515
- if (!filePart.includes("/")) {
2516
- return "other";
2632
+ let normalized = value.replace(/\\/g, "/").trim();
2633
+ normalized = normalized.replace(/^[("'`<\[]+/, "").replace(/[>"'`\]),:;]+$/, "");
2634
+ normalized = normalized.replace(/^<repo>\//, "").replace(/^\.\//, "");
2635
+ if (normalized.includes("::")) {
2636
+ normalized = normalized.split("::")[0]?.trim() ?? normalized;
2517
2637
  }
2518
- const segments = filePart.replace(/^\/+/, "").split("/").filter(Boolean);
2519
- if (segments.length === 0) {
2520
- return "other";
2638
+ if (normalized.startsWith("/") && !normalized.startsWith("/tmp/") && !normalized.startsWith("/var/tmp/")) {
2639
+ return null;
2640
+ }
2641
+ if (/^\.github\/workflows\/.+\.(?:yml|yaml)$/i.test(normalized)) {
2642
+ return normalized;
2643
+ }
2644
+ if (/^(?:src|test|tests)\/.+\.[A-Za-z0-9._-]+$/i.test(normalized)) {
2645
+ return normalized;
2521
2646
  }
2522
- return `${segments[0]}/`;
2647
+ if (/^(?:package\.json|pytest\.ini|pyproject\.toml|tox\.ini|(?:[A-Za-z0-9._/-]+\/)?conftest\.py)$/i.test(
2648
+ normalized
2649
+ )) {
2650
+ return normalized;
2651
+ }
2652
+ if (/^(?:[A-Za-z0-9._/-]+\/)?(?:vitest|jest)\.config\.[A-Za-z0-9._-]+$/i.test(normalized)) {
2653
+ return normalized;
2654
+ }
2655
+ if (/^(?:[A-Za-z0-9._/-]+\/)?tsconfig(?:\.[A-Za-z0-9_-]+)?\.json$/i.test(normalized)) {
2656
+ return normalized;
2657
+ }
2658
+ if (/^[A-Za-z0-9._/-]*config[A-Za-z0-9._/-]*\.(?:json|yml|yaml)$/i.test(normalized)) {
2659
+ return normalized;
2660
+ }
2661
+ return null;
2523
2662
  }
2524
- function buildTestTargetSummary(values) {
2525
- const counts = /* @__PURE__ */ new Map();
2526
- for (const value of values) {
2527
- const prefix = extractTestFamilyPrefix(value);
2528
- counts.set(prefix, (counts.get(prefix) ?? 0) + 1);
2663
+ function addPathCandidatesFromText(target, text) {
2664
+ if (!text) {
2665
+ return;
2529
2666
  }
2530
- const families = [...counts.entries()].map(([prefix, count]) => ({
2531
- prefix,
2532
- count
2533
- })).sort((left, right) => {
2534
- if (right.count !== left.count) {
2535
- return right.count - left.count;
2667
+ const pattern = /(?:^|[\s("'`])((?:\.github\/workflows\/[A-Za-z0-9._/-]+\.(?:yml|yaml)|(?:src|test|tests)\/[A-Za-z0-9._/-]+\.[A-Za-z0-9._-]+|package\.json|pytest\.ini|pyproject\.toml|tox\.ini|(?:[A-Za-z0-9._/-]+\/)?conftest\.py|(?:[A-Za-z0-9._/-]+\/)?(?:vitest|jest)\.config\.[A-Za-z0-9._-]+|(?:[A-Za-z0-9._/-]+\/)?tsconfig(?:\.[A-Za-z0-9_-]+)?\.json|[A-Za-z0-9._/-]*config[A-Za-z0-9._/-]*\.(?:json|yml|yaml)))/g;
2668
+ for (const match of text.matchAll(pattern)) {
2669
+ const normalized = normalizePathCandidate(match[1] ?? null);
2670
+ if (normalized) {
2671
+ target.add(normalized);
2672
+ }
2673
+ }
2674
+ }
2675
+ function extractBucketPathCandidates(args) {
2676
+ const candidates = /* @__PURE__ */ new Set();
2677
+ const push = (value) => {
2678
+ const normalized = normalizePathCandidate(value);
2679
+ if (normalized) {
2680
+ candidates.add(normalized);
2536
2681
  }
2537
- return left.prefix.localeCompare(right.prefix);
2538
- }).slice(0, 5);
2539
- return {
2540
- count: values.length,
2541
- families
2542
2682
  };
2683
+ push(args.readTarget?.file);
2684
+ for (const item of args.bucket.representativeItems) {
2685
+ push(item.file);
2686
+ addPathCandidatesFromText(candidates, item.label);
2687
+ addPathCandidatesFromText(candidates, item.reason);
2688
+ }
2689
+ addPathCandidatesFromText(candidates, args.bucket.reason);
2690
+ addPathCandidatesFromText(candidates, args.bucket.headline);
2691
+ for (const line of args.bucket.summaryLines) {
2692
+ addPathCandidatesFromText(candidates, line);
2693
+ }
2694
+ return [...candidates];
2543
2695
  }
2544
- function formatTargetSummary(summary) {
2545
- if (summary.count === 0) {
2546
- return "count=0";
2696
+ function isConfigPathCandidate(path8) {
2697
+ return /^\.github\/workflows\/.+\.(?:yml|yaml)$/i.test(path8) || /^(?:package\.json|pytest\.ini|pyproject\.toml|tox\.ini|(?:[A-Za-z0-9._/-]+\/)?conftest\.py)$/i.test(
2698
+ path8
2699
+ ) || /^(?:[A-Za-z0-9._/-]+\/)?(?:vitest|jest)\.config\.[A-Za-z0-9._-]+$/i.test(path8) || /^(?:[A-Za-z0-9._/-]+\/)?tsconfig(?:\.[A-Za-z0-9_-]+)?\.json$/i.test(path8) || /^[A-Za-z0-9._/-]*config[A-Za-z0-9._/-]*\.(?:json|yml|yaml)$/i.test(path8);
2700
+ }
2701
+ function isAppPathCandidate(path8) {
2702
+ return path8.startsWith("src/");
2703
+ }
2704
+ function isTestPathCandidate(path8) {
2705
+ return path8.startsWith("test/") || path8.startsWith("tests/");
2706
+ }
2707
+ function looksLikeMatcherLiteralComparison(detail) {
2708
+ return /\bexpected\b[\s\S]*\bto (?:be|contain)\b/i.test(detail);
2709
+ }
2710
+ function looksLikeGoldenLiteralDrift(detail) {
2711
+ return /\\n/.test(detail) || /-\s+(?:Tests|Decision|Likely owner|Next|Stop signal)\b/.test(detail) || /\b(?:node-version|workflow_dispatch|run-name|matrix|registry-url)\b/i.test(detail);
2712
+ }
2713
+ function isGoldenOutputDriftBucket(bucket) {
2714
+ if (bucket.type !== "assertion_failure") {
2715
+ return false;
2547
2716
  }
2548
- const families = summary.families.length > 0 ? summary.families.map((family) => `${family.prefix}${family.count}`).join(", ") : "none";
2549
- return `count=${summary.count}; families=${families}`;
2717
+ const detail = extractReasonDetail(bucket.reason, "assertion failed:") ?? bucket.reason;
2718
+ if (!looksLikeMatcherLiteralComparison(detail)) {
2719
+ return false;
2720
+ }
2721
+ if (bucket.reason.startsWith("snapshot mismatch:")) {
2722
+ return false;
2723
+ }
2724
+ if (!looksLikeGoldenLiteralDrift(detail)) {
2725
+ return false;
2726
+ }
2727
+ const candidates = extractBucketPathCandidates({
2728
+ bucket
2729
+ });
2730
+ return candidates.some((candidate) => isConfigPathCandidate(candidate) || isTestPathCandidate(candidate));
2731
+ }
2732
+ function specializeBucket(bucket) {
2733
+ if (!isGoldenOutputDriftBucket(bucket)) {
2734
+ return bucket;
2735
+ }
2736
+ return {
2737
+ ...bucket,
2738
+ type: "golden_output_drift",
2739
+ reason: "golden output drift: expected literal or golden output no longer matches current output",
2740
+ labelOverride: "golden output drift",
2741
+ hint: "Update the expected literal or golden output if the new output is intentional; otherwise fix the generated output and rerun."
2742
+ };
2550
2743
  }
2551
2744
  function classifyGenericBucketType(reason) {
2552
2745
  const extended = findExtendedBucketSpec(reason);
@@ -2571,6 +2764,9 @@ function classifyGenericBucketType(reason) {
2571
2764
  if (reason.startsWith("missing module:")) {
2572
2765
  return "import_dependency_failure";
2573
2766
  }
2767
+ if (reason.startsWith("golden output drift:")) {
2768
+ return "golden_output_drift";
2769
+ }
2574
2770
  if (reason.startsWith("assertion failed:")) {
2575
2771
  return "assertion_failure";
2576
2772
  }
@@ -2723,7 +2919,7 @@ function mergeBucketDetails(existing, incoming) {
2723
2919
  count,
2724
2920
  confidence: Math.max(existing.confidence, incoming.confidence),
2725
2921
  representativeItems,
2726
- entities: unique([...existing.entities, ...incoming.entities]),
2922
+ entities: unique2([...existing.entities, ...incoming.entities]),
2727
2923
  hint: existing.hint ?? incoming.hint,
2728
2924
  overflowCount: Math.max(
2729
2925
  existing.overflowCount,
@@ -2915,6 +3111,9 @@ function labelForBucket(bucket) {
2915
3111
  if (bucket.type === "import_dependency_failure") {
2916
3112
  return "import dependency failure";
2917
3113
  }
3114
+ if (bucket.type === "golden_output_drift") {
3115
+ return "golden output drift";
3116
+ }
2918
3117
  if (bucket.type === "assertion_failure") {
2919
3118
  return "assertion failure";
2920
3119
  }
@@ -2949,6 +3148,9 @@ function rootCauseConfidenceFor(bucket) {
2949
3148
  if (bucket.type === "contract_snapshot_drift") {
2950
3149
  return bucket.entities.length > 0 ? 0.92 : 0.76;
2951
3150
  }
3151
+ if (bucket.type === "golden_output_drift") {
3152
+ return 0.78;
3153
+ }
2952
3154
  if (bucket.source === "provider") {
2953
3155
  return Math.max(0.6, Math.min(bucket.confidence, 0.82));
2954
3156
  }
@@ -3023,6 +3225,9 @@ function buildReadTargetWhy(args) {
3023
3225
  if (args.bucket.type === "import_dependency_failure") {
3024
3226
  return "it is the first visible failing module in this missing dependency bucket";
3025
3227
  }
3228
+ if (args.bucket.type === "golden_output_drift") {
3229
+ return "it is the first visible golden or literal drift anchor for this bucket";
3230
+ }
3026
3231
  if (args.bucket.type === "assertion_failure") {
3027
3232
  return "it is the first visible failing test in this bucket";
3028
3233
  }
@@ -3100,6 +3305,9 @@ function buildReadTargetSearchHint(bucket, anchor) {
3100
3305
  if (assertionText) {
3101
3306
  return assertionText;
3102
3307
  }
3308
+ if (bucket.type === "golden_output_drift") {
3309
+ return bucket.representativeItems.map((item) => item.reason.match(/^assertion failed:\s+(.+)$/)?.[1] ?? item.reason).find(Boolean) ?? anchor.label.split("::")[1]?.trim() ?? null;
3310
+ }
3103
3311
  if (bucket.reason.startsWith("unknown ")) {
3104
3312
  return anchor.reason;
3105
3313
  }
@@ -3154,18 +3362,36 @@ function buildConcreteNextNote(args) {
3154
3362
  }
3155
3363
  const lead = primaryTarget.context_hint.start_line !== null && primaryTarget.context_hint.end_line !== null ? `Read ${primaryTarget.file} lines ${primaryTarget.context_hint.start_line}-${primaryTarget.context_hint.end_line} first; ${primaryTarget.why}.` : primaryTarget.context_hint.search_hint ? `Search for ${primaryTarget.context_hint.search_hint} in ${primaryTarget.file} first; ${primaryTarget.why}.` : `Read ${formatReadTargetLocation(primaryTarget)} first; ${primaryTarget.why}.`;
3156
3364
  if (args.nextBestAction.code === "fix_dominant_blocker") {
3365
+ if (args.remainingMode === "subset_rerun") {
3366
+ return "Fix the remaining bucket first, then refresh the full-suite truth with sift rerun.";
3367
+ }
3368
+ if (args.remainingMode === "full_rerun_diff") {
3369
+ return "Fix the remaining bucket first. The cached full-suite baseline is still preserved; use sift rerun when you want to refresh it.";
3370
+ }
3157
3371
  if (args.nextBestAction.bucket_index === 1 && args.hasSecondaryVisibleBucket) {
3158
3372
  return "Fix bucket 1 first, then rerun the full suite at standard. Secondary buckets are already visible behind it.";
3159
3373
  }
3160
3374
  return `Fix bucket ${args.nextBestAction.bucket_index ?? 1} first, then rerun the full suite at standard.`;
3161
3375
  }
3162
3376
  if (args.nextBestAction.code === "read_source_for_bucket") {
3377
+ if (args.remainingMode === "subset_rerun") {
3378
+ return "Fix the remaining bucket first, then refresh the full-suite truth with sift rerun.";
3379
+ }
3380
+ if (args.remainingMode === "full_rerun_diff") {
3381
+ return "Fix the remaining bucket first. The cached full-suite baseline is still preserved; use sift rerun when you want to refresh it.";
3382
+ }
3163
3383
  return lead;
3164
3384
  }
3165
3385
  if (args.nextBestAction.code === "insufficient_signal") {
3166
- if (args.nextBestAction.note.startsWith("Provider follow-up failed")) {
3386
+ if (args.nextBestAction.note.startsWith("Provider follow-up")) {
3167
3387
  return args.nextBestAction.note;
3168
3388
  }
3389
+ if (args.remainingMode === "subset_rerun") {
3390
+ return "Fix the remaining bucket first, then refresh the full-suite truth with sift rerun.";
3391
+ }
3392
+ if (args.remainingMode === "full_rerun_diff") {
3393
+ return "Fix the remaining bucket first. The cached full-suite baseline is still preserved; use sift rerun when you want to refresh it.";
3394
+ }
3169
3395
  return `${lead} Then take one deeper sift pass before raw traceback.`;
3170
3396
  }
3171
3397
  return args.nextBestAction.note;
@@ -3174,13 +3400,13 @@ function extractMiniDiff(input, bucket) {
3174
3400
  if (bucket.type !== "contract_snapshot_drift") {
3175
3401
  return null;
3176
3402
  }
3177
- const addedPaths = unique(
3403
+ const addedPaths = unique2(
3178
3404
  [...input.matchAll(/[+-]\s+'(\/api\/[^']+)'/g)].map((match) => match[1])
3179
3405
  ).length;
3180
- const removedModels = unique(
3406
+ const removedModels = unique2(
3181
3407
  [...input.matchAll(/[+-]\s+'([A-Za-z0-9._/-]+-[A-Za-z0-9._-]+)'/g)].map((match) => match[1])
3182
3408
  ).length;
3183
- const changedTaskMappings = unique(
3409
+ const changedTaskMappings = unique2(
3184
3410
  [...input.matchAll(/[+-]\s+'([a-z]+(?:_[a-z0-9]+)+)'/g)].map((match) => match[1])
3185
3411
  ).length;
3186
3412
  if (addedPaths === 0 && removedModels === 0 && changedTaskMappings === 0) {
@@ -3281,7 +3507,7 @@ function pickUnknownAnchor(args) {
3281
3507
  }
3282
3508
  const label = args.kind === "error" ? args.analysis.visibleErrorLabels[0] : args.analysis.visibleFailedLabels[0];
3283
3509
  if (label) {
3284
- const normalizedLabel = normalizeTestId(label);
3510
+ const normalizedLabel = normalizeTestId2(label);
3285
3511
  const fileMatch = normalizedLabel.match(/^([A-Za-z0-9_./-]+\.[A-Za-z0-9]+)\b/);
3286
3512
  const file = fileMatch?.[1] ?? normalizedLabel.split("::")[0] ?? null;
3287
3513
  return {
@@ -3409,16 +3635,29 @@ function buildDecisionLine(contract) {
3409
3635
  }
3410
3636
  return "- Decision: raw only if exact traceback is required.";
3411
3637
  }
3638
+ function buildRemainingPassLine(contract) {
3639
+ if (contract.remaining_mode === "subset_rerun") {
3640
+ return "- Remaining pass: showing only what is still failing from the cached baseline.";
3641
+ }
3642
+ if (contract.remaining_mode === "full_rerun_diff") {
3643
+ return "- Remaining pass: full rerun analyzed against the cached baseline because narrowed rerun is not available for this runner.";
3644
+ }
3645
+ return null;
3646
+ }
3412
3647
  function buildComparisonLines(contract) {
3413
3648
  const lines = [];
3649
+ const resolvedSummary = buildTestTargetSummary(contract.resolved_tests);
3650
+ const remainingSummary = buildTestTargetSummary(contract.remaining_tests);
3414
3651
  if (contract.resolved_tests.length > 0) {
3652
+ const summaryText = describeTargetSummary(resolvedSummary);
3415
3653
  lines.push(
3416
- `- Resolved in this rerun: ${formatCount(contract.resolved_tests.length, "test")} dropped out of the failing set.`
3654
+ `- Resolved in this rerun: ${formatCount(contract.resolved_tests.length, "test")} dropped out of the failing set${summaryText ? ` ${summaryText}` : ""}.`
3417
3655
  );
3418
3656
  }
3419
- if (contract.resolved_tests.length > 0 && contract.remaining_tests.length > 0) {
3657
+ if (contract.remaining_tests.length > 0 && (contract.resolved_tests.length > 0 || contract.remaining_mode !== "none")) {
3658
+ const summaryText = describeTargetSummary(remainingSummary);
3420
3659
  lines.push(
3421
- `- Remaining failing targets: ${formatCount(contract.remaining_tests.length, "test/module", "tests/modules")}.`
3660
+ `- Remaining failing targets: ${formatCount(contract.remaining_tests.length, "test/module", "tests/modules")}${summaryText ? ` ${summaryText}` : ""}.`
3422
3661
  );
3423
3662
  }
3424
3663
  return lines;
@@ -3490,6 +3729,13 @@ function resolveBucketFixHint(args) {
3490
3729
  return "Inspect the first visible anchor for this bucket, apply the smallest fix that explains it, then rerun the full suite at standard.";
3491
3730
  }
3492
3731
  function deriveBucketSuspectKind(args) {
3732
+ const pathCandidates = extractBucketPathCandidates({
3733
+ bucket: args.bucket,
3734
+ readTarget: args.readTarget
3735
+ });
3736
+ const hasConfigCandidate = pathCandidates.some((candidate) => isConfigPathCandidate(candidate));
3737
+ const hasAppCandidate = pathCandidates.some((candidate) => isAppPathCandidate(candidate));
3738
+ const hasTestCandidate = pathCandidates.some((candidate) => isTestPathCandidate(candidate));
3493
3739
  if (args.bucket.type === "shared_environment_blocker" || args.bucket.type === "fixture_guard_failure" || args.bucket.type === "permission_denied_failure" || args.bucket.type === "django_db_access_denied" || args.bucket.type === "network_failure" || args.bucket.type === "service_unavailable" || args.bucket.type === "db_connection_failure" || args.bucket.type === "auth_bypass_absent" || args.bucket.type === "fixture_teardown_failure") {
3494
3740
  return "environment";
3495
3741
  }
@@ -3499,6 +3745,18 @@ function deriveBucketSuspectKind(args) {
3499
3745
  if (args.bucket.type === "contract_snapshot_drift" || args.bucket.type === "snapshot_mismatch" || args.bucket.type === "flaky_test_detected" || args.bucket.type === "xfail_strict_unexpected_pass") {
3500
3746
  return "test";
3501
3747
  }
3748
+ if (args.bucket.type === "golden_output_drift") {
3749
+ if (hasConfigCandidate) {
3750
+ return "config";
3751
+ }
3752
+ if (hasAppCandidate) {
3753
+ return "app_code";
3754
+ }
3755
+ if (hasTestCandidate) {
3756
+ return "test";
3757
+ }
3758
+ return "unknown";
3759
+ }
3502
3760
  if (args.bucket.type === "xdist_worker_crash" || args.bucket.type === "timeout_failure" || args.bucket.type === "async_event_loop_failure" || args.bucket.type === "subprocess_crash_segfault" || args.bucket.type === "memory_error" || args.bucket.type === "resource_leak_warning" || args.bucket.type === "interrupted_run") {
3503
3761
  return "tooling";
3504
3762
  }
@@ -3506,11 +3764,13 @@ function deriveBucketSuspectKind(args) {
3506
3764
  return "unknown";
3507
3765
  }
3508
3766
  if (args.bucket.type === "assertion_failure" || args.bucket.type === "runtime_failure" || args.bucket.type === "type_error_failure" || args.bucket.type === "serialization_encoding_failure") {
3509
- const file = args.readTarget?.file ?? "";
3510
- if (file.startsWith("src/")) {
3767
+ if (hasConfigCandidate) {
3768
+ return "config";
3769
+ }
3770
+ if (hasAppCandidate) {
3511
3771
  return "app_code";
3512
3772
  }
3513
- if (file.startsWith("test/") || file.startsWith("tests/")) {
3773
+ if (hasTestCandidate) {
3514
3774
  return "test";
3515
3775
  }
3516
3776
  return "unknown";
@@ -3563,6 +3823,10 @@ function buildStandardBucketSupport(args) {
3563
3823
  }
3564
3824
  function renderStandard(args) {
3565
3825
  const lines = [...buildOutcomeLines(args.analysis), ...buildComparisonLines(args.contract)];
3826
+ const remainingPassLine = buildRemainingPassLine(args.contract);
3827
+ if (remainingPassLine) {
3828
+ lines.push(remainingPassLine);
3829
+ }
3566
3830
  if (args.contract.main_buckets.length > 0) {
3567
3831
  for (const bucket of args.contract.main_buckets.slice(0, 3)) {
3568
3832
  const rawBucket = args.buckets[bucket.bucket_index - 1];
@@ -3590,13 +3854,19 @@ function renderStandard(args) {
3590
3854
  }
3591
3855
  }
3592
3856
  lines.push(buildDecisionLine(args.contract));
3593
- lines.push(`- Likely owner: ${formatSuspectKindLabel(args.contract.primary_suspect_kind)}`);
3857
+ if (args.contract.main_buckets.length > 0 && args.contract.primary_suspect_kind !== "unknown") {
3858
+ lines.push(`- Likely owner: ${formatSuspectKindLabel(args.contract.primary_suspect_kind)}`);
3859
+ }
3594
3860
  lines.push(`- Next: ${args.contract.next_best_action.note}`);
3595
3861
  lines.push(buildStopSignal(args.contract));
3596
3862
  return lines.join("\n");
3597
3863
  }
3598
3864
  function renderFocused(args) {
3599
3865
  const lines = [...buildOutcomeLines(args.analysis), ...buildComparisonLines(args.contract)];
3866
+ const remainingPassLine = buildRemainingPassLine(args.contract);
3867
+ if (remainingPassLine) {
3868
+ lines.push(remainingPassLine);
3869
+ }
3600
3870
  for (const bucket of args.contract.main_buckets) {
3601
3871
  const rawBucket = args.buckets[bucket.bucket_index - 1];
3602
3872
  lines.push(
@@ -3616,6 +3886,10 @@ function renderFocused(args) {
3616
3886
  }
3617
3887
  function renderVerbose(args) {
3618
3888
  const lines = [...buildOutcomeLines(args.analysis), ...buildComparisonLines(args.contract)];
3889
+ const remainingPassLine = buildRemainingPassLine(args.contract);
3890
+ if (remainingPassLine) {
3891
+ lines.push(remainingPassLine);
3892
+ }
3619
3893
  for (const bucket of args.contract.main_buckets) {
3620
3894
  const rawBucket = args.buckets[bucket.bucket_index - 1];
3621
3895
  lines.push(
@@ -3665,7 +3939,9 @@ function buildTestStatusDiagnoseContract(args) {
3665
3939
  count: residuals.remainingFailed
3666
3940
  })
3667
3941
  ].filter((bucket) => Boolean(bucket));
3668
- const buckets = prioritizeBuckets([...combinedBuckets, ...unknownBuckets]).slice(0, 3);
3942
+ const buckets = prioritizeBuckets(
3943
+ [...combinedBuckets, ...unknownBuckets].map((bucket) => specializeBucket(bucket))
3944
+ ).slice(0, 3);
3669
3945
  const simpleCollectionFailure = args.analysis.collectionErrorCount !== void 0 && args.analysis.collectionItems.length === 0 && buckets.length === 0;
3670
3946
  const dominantBucket = buckets.map((bucket, index) => ({
3671
3947
  bucket,
@@ -3713,9 +3989,9 @@ function buildTestStatusDiagnoseContract(args) {
3713
3989
  mini_diff: extractMiniDiff(args.input, bucket)
3714
3990
  };
3715
3991
  });
3716
- const resolvedTests = unique(args.resolvedTests ?? []);
3717
- const remainingTests = unique(
3718
- args.remainingTests ?? unique([...args.analysis.visibleErrorLabels, ...args.analysis.visibleFailedLabels])
3992
+ const resolvedTests = unique2(args.resolvedTests ?? []);
3993
+ const remainingTests = unique2(
3994
+ args.remainingTests ?? unique2([...args.analysis.visibleErrorLabels, ...args.analysis.visibleFailedLabels])
3719
3995
  );
3720
3996
  const primarySuspectKind = derivePrimarySuspectKind({
3721
3997
  mainBuckets,
@@ -3765,6 +4041,7 @@ function buildTestStatusDiagnoseContract(args) {
3765
4041
  raw_needed: rawNeeded,
3766
4042
  additional_source_read_likely_low_value: diagnosisComplete && !rawNeeded,
3767
4043
  read_raw_only_if: rawNeeded ? "you still need exact traceback lines after focused or verbose detail" : null,
4044
+ remaining_mode: args.remainingMode ?? "none",
3768
4045
  dominant_blocker_bucket_index: dominantBlockerBucketIndex,
3769
4046
  primary_suspect_kind: primarySuspectKind,
3770
4047
  confidence_reason: "Unknown or low-confidence buckets remain; one deeper sift pass is justified.",
@@ -3795,7 +4072,8 @@ function buildTestStatusDiagnoseContract(args) {
3795
4072
  readTargets,
3796
4073
  hasSecondaryVisibleBucket: mainBuckets.some(
3797
4074
  (bucket) => bucket.secondary_visible_despite_blocker
3798
- )
4075
+ ),
4076
+ remainingMode: args.contractOverrides?.remaining_mode ?? baseContract.remaining_mode
3799
4077
  })
3800
4078
  }
3801
4079
  };
@@ -3860,6 +4138,7 @@ function buildTestStatusAnalysisContext(args) {
3860
4138
  `- diagnosis_complete=${args.contract.diagnosis_complete}`,
3861
4139
  `- raw_needed=${args.contract.raw_needed}`,
3862
4140
  `- decision=${args.contract.decision}`,
4141
+ `- remaining_mode=${args.contract.remaining_mode}`,
3863
4142
  `- provider_used=${args.contract.provider_used}`,
3864
4143
  `- provider_failed=${args.contract.provider_failed}`,
3865
4144
  `- raw_slice_strategy=${args.contract.raw_slice_strategy}`,
@@ -4427,7 +4706,7 @@ function detectTestRunner(input) {
4427
4706
  if (/^\s*Test Suites:\s+\d+\s+failed,\s+\d+\s+passed(?:,\s+\d+\s+total)?/m.test(input) || /^\s*Tests:\s+\d+\s+failed,\s+\d+\s+passed(?:,\s+\d+\s+total)?/m.test(input)) {
4428
4707
  return "jest";
4429
4708
  }
4430
- if (/\bpytest\b/i.test(input) || /^\s*=+.*\b\d+\s+failed\b.*=+\s*$/m.test(input) || /\bcollected\s+\d+\s+items\b/i.test(input)) {
4709
+ if (/\bpytest\b/i.test(input) || /^\s*(?:FAILED|ERROR)\s+[A-Za-z0-9_./-]+::[^\n]+$/m.test(input) || /^\s*=+.*\b\d+\s+failed\b.*=+\s*$/m.test(input) || /\bcollected\s+\d+\s+items\b/i.test(input)) {
4431
4710
  return "pytest";
4432
4711
  }
4433
4712
  return "unknown";
@@ -5632,6 +5911,9 @@ function classifyBucketTypeFromReason(reason) {
5632
5911
  if (reason.startsWith("missing module:")) {
5633
5912
  return "import_dependency_failure";
5634
5913
  }
5914
+ if (reason.startsWith("golden output drift:")) {
5915
+ return "golden_output_drift";
5916
+ }
5635
5917
  if (reason.startsWith("assertion failed:")) {
5636
5918
  return "assertion_failure";
5637
5919
  }
@@ -6888,7 +7170,7 @@ function prepareInput(raw, config) {
6888
7170
  function escapeRegExp2(value) {
6889
7171
  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
6890
7172
  }
6891
- function unique2(values) {
7173
+ function unique3(values) {
6892
7174
  return [...new Set(values)];
6893
7175
  }
6894
7176
  var genericBucketSearchTerms = /* @__PURE__ */ new Set([
@@ -6989,7 +7271,7 @@ function extractBucketSearchTerms(args) {
6989
7271
  ...args.bucket.evidence,
6990
7272
  ...args.readTargets.filter((target) => target.bucket_index === args.bucket.bucket_index).flatMap((target) => [target.context_hint.search_hint ?? "", target.file])
6991
7273
  ];
6992
- const prioritized = unique2(
7274
+ const prioritized = unique3(
6993
7275
  sources.flatMap((value) => collectCandidateSearchTerms(value)).filter(isHighSignalSearchTerm)
6994
7276
  ).sort((left, right) => {
6995
7277
  const delta = scoreSearchTerm(right) - scoreSearchTerm(left);
@@ -7001,7 +7283,7 @@ function extractBucketSearchTerms(args) {
7001
7283
  if (prioritized.length > 0) {
7002
7284
  return prioritized.slice(0, 6);
7003
7285
  }
7004
- const fallbackTerms = unique2(
7286
+ const fallbackTerms = unique3(
7005
7287
  [...args.bucket.evidence, args.bucket.root_cause].flatMap((value) => value.split(/->|:/).map((part) => normalizeSearchTerm(part))).filter(isHighSignalSearchTerm)
7006
7288
  );
7007
7289
  return fallbackTerms.slice(0, 4);
@@ -7039,7 +7321,7 @@ function buildLineWindows(args) {
7039
7321
  return [...selected].sort((left, right) => left - right).map((index) => args.lines[index]);
7040
7322
  }
7041
7323
  function buildPriorityLineGroup(args) {
7042
- return unique2([
7324
+ return unique3([
7043
7325
  ...args.indexes.map((index) => args.lines[index]).filter(Boolean),
7044
7326
  ...buildLineWindows(args)
7045
7327
  ]);
@@ -7048,7 +7330,7 @@ function collapseSelectedLines(args) {
7048
7330
  if (args.lines.length === 0) {
7049
7331
  return args.fallback();
7050
7332
  }
7051
- const joined = unique2(args.lines).join("\n").trim();
7333
+ const joined = unique3(args.lines).join("\n").trim();
7052
7334
  if (joined.length === 0) {
7053
7335
  return args.fallback();
7054
7336
  }
@@ -7199,7 +7481,7 @@ function buildTestStatusRawSlice(args) {
7199
7481
  const indexes = lines.map(
7200
7482
  (line, index) => bucketTerms.some((term) => new RegExp(escapeRegExp2(term), "i").test(line)) ? index : -1
7201
7483
  ).filter((index) => index >= 0);
7202
- return unique2([
7484
+ return unique3([
7203
7485
  ...indexes.map((index) => lines[index]).filter(Boolean),
7204
7486
  ...buildPriorityLineGroup({
7205
7487
  lines,
@@ -7242,7 +7524,7 @@ function buildTestStatusRawSlice(args) {
7242
7524
  return [
7243
7525
  buildPriorityLineGroup({
7244
7526
  lines,
7245
- indexes: unique2([...searchHintIndexes, ...fileIndexes]),
7527
+ indexes: unique3([...searchHintIndexes, ...fileIndexes]),
7246
7528
  radius,
7247
7529
  maxLines
7248
7530
  })
@@ -7261,7 +7543,7 @@ function buildTestStatusRawSlice(args) {
7261
7543
  const selected = collapseSelectedLineGroups({
7262
7544
  groups: [
7263
7545
  ...targetGroups,
7264
- unique2([
7546
+ unique3([
7265
7547
  ...summaryIndexes.map((index) => lines[index]).filter(Boolean),
7266
7548
  ...buildLineWindows({
7267
7549
  lines,
@@ -7312,7 +7594,7 @@ function buildGenericRawSlice(args) {
7312
7594
 
7313
7595
  // src/core/run.ts
7314
7596
  var RETRY_DELAY_MS = 300;
7315
- var PROVIDER_PENDING_NOTICE_DELAY_MS = 150;
7597
+ var PENDING_NOTICE_DELAY_MS = 150;
7316
7598
  function estimateTokenCount(text) {
7317
7599
  return Math.max(1, Math.ceil(text.length / 4));
7318
7600
  }
@@ -7393,17 +7675,16 @@ function buildDryRunOutput(args) {
7393
7675
  async function delay(ms) {
7394
7676
  await new Promise((resolve) => setTimeout(resolve, ms));
7395
7677
  }
7396
- function startProviderPendingNotice() {
7397
- if (!process.stderr.isTTY) {
7678
+ function startPendingNotice(message, enabled) {
7679
+ if (!enabled) {
7398
7680
  return () => {
7399
7681
  };
7400
7682
  }
7401
- const message = "sift waiting for provider...";
7402
7683
  let shown = false;
7403
7684
  const timer = setTimeout(() => {
7404
7685
  shown = true;
7405
7686
  process.stderr.write(`${message}\r`);
7406
- }, PROVIDER_PENDING_NOTICE_DELAY_MS);
7687
+ }, PENDING_NOTICE_DELAY_MS);
7407
7688
  return () => {
7408
7689
  clearTimeout(timer);
7409
7690
  if (!shown) {
@@ -7433,7 +7714,10 @@ async function generateWithRetry(args) {
7433
7714
  responseMode: args.responseMode,
7434
7715
  jsonResponseFormat: args.request.config.provider.jsonResponseFormat
7435
7716
  });
7436
- const stopPendingNotice = startProviderPendingNotice();
7717
+ const stopPendingNotice = startPendingNotice(
7718
+ "sift waiting for provider...",
7719
+ Boolean(process.stderr.isTTY)
7720
+ );
7437
7721
  try {
7438
7722
  try {
7439
7723
  return await generate();
@@ -7459,6 +7743,34 @@ function hasRecognizableTestStatusSignal(input) {
7459
7743
  const analysis = analyzeTestStatus(input);
7460
7744
  return analysis.collectionErrorCount !== void 0 || analysis.noTestsCollected || analysis.interrupted || analysis.failed > 0 || analysis.errors > 0 || analysis.passed > 0 || analysis.inlineItems.length > 0 || analysis.buckets.length > 0;
7461
7745
  }
7746
+ function shouldUseCompactTestStatusBypass(args) {
7747
+ if (args.request.policyName !== "test-status") {
7748
+ return false;
7749
+ }
7750
+ if (args.request.detail && args.request.detail !== "standard") {
7751
+ return false;
7752
+ }
7753
+ if (args.request.goal === "diagnose" && args.request.format === "json") {
7754
+ return false;
7755
+ }
7756
+ if (args.request.testStatusContext?.resolvedTests?.length || args.request.testStatusContext?.remainingTests?.length || args.request.testStatusContext?.remainingSubsetAvailable || args.request.testStatusContext?.remainingMode && args.request.testStatusContext.remainingMode !== "none") {
7757
+ return false;
7758
+ }
7759
+ return args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0 || args.analysis.collectionErrorCount !== void 0 && args.analysis.collectionItems.length === 0 && args.analysis.inlineItems.length === 0 && args.analysis.buckets.length === 0 || args.analysis.noTestsCollected || args.analysis.interrupted && args.analysis.failed === 0 && args.analysis.errors === 0;
7760
+ }
7761
+ function sanitizeProviderFailureReason(reason) {
7762
+ const normalized = reason.trim();
7763
+ const httpStatus = normalized.match(/\bHTTP\s+(\d{3})\b/i)?.[1];
7764
+ if (httpStatus) {
7765
+ return `provider follow-up unavailable (HTTP ${httpStatus})`;
7766
+ }
7767
+ if (/unterminated string|invalid json|unexpected token|json at position|schema|zod|parse/i.test(
7768
+ normalized
7769
+ )) {
7770
+ return "provider follow-up returned unusable structured output";
7771
+ }
7772
+ return "provider follow-up failed";
7773
+ }
7462
7774
  function renderTestStatusDecisionOutput(args) {
7463
7775
  if (args.request.goal === "diagnose" && args.request.format === "json") {
7464
7776
  return JSON.stringify(
@@ -7480,6 +7792,7 @@ function renderTestStatusDecisionOutput(args) {
7480
7792
  return args.decision.standardText;
7481
7793
  }
7482
7794
  function buildTestStatusProviderFailureDecision(args) {
7795
+ const sanitizedReason = sanitizeProviderFailureReason(args.reason);
7483
7796
  const concreteReadTarget = args.baseDecision.contract.read_targets.find(
7484
7797
  (target) => Boolean(target.file)
7485
7798
  );
@@ -7492,6 +7805,7 @@ function buildTestStatusProviderFailureDecision(args) {
7492
7805
  analysis: args.analysis,
7493
7806
  resolvedTests: args.baseDecision.contract.resolved_tests,
7494
7807
  remainingTests: args.baseDecision.contract.remaining_tests,
7808
+ remainingMode: args.request.testStatusContext?.remainingMode,
7495
7809
  contractOverrides: {
7496
7810
  ...args.baseDecision.contract,
7497
7811
  diagnosis_complete: false,
@@ -7507,7 +7821,9 @@ function buildTestStatusProviderFailureDecision(args) {
7507
7821
  next_best_action: {
7508
7822
  code: "read_source_for_bucket",
7509
7823
  bucket_index: args.baseDecision.contract.dominant_blocker_bucket_index ?? concreteReadTarget.bucket_index,
7510
- note: `Provider follow-up failed (${args.reason}). The heuristic anchor is concrete enough to inspect source for the current bucket before reading raw traceback.`
7824
+ note: `${sanitizedReason[0].toUpperCase()}${sanitizedReason.slice(
7825
+ 1
7826
+ )}. The heuristic anchor is concrete enough to inspect source for the current bucket before reading raw traceback.`
7511
7827
  }
7512
7828
  }
7513
7829
  });
@@ -7518,6 +7834,7 @@ function buildTestStatusProviderFailureDecision(args) {
7518
7834
  analysis: args.analysis,
7519
7835
  resolvedTests: args.baseDecision.contract.resolved_tests,
7520
7836
  remainingTests: args.baseDecision.contract.remaining_tests,
7837
+ remainingMode: args.request.testStatusContext?.remainingMode,
7521
7838
  contractOverrides: {
7522
7839
  ...args.baseDecision.contract,
7523
7840
  diagnosis_complete: false,
@@ -7533,7 +7850,11 @@ function buildTestStatusProviderFailureDecision(args) {
7533
7850
  next_best_action: {
7534
7851
  code: shouldZoomFirst ? "insufficient_signal" : "read_raw_for_exact_traceback",
7535
7852
  bucket_index: args.baseDecision.contract.dominant_blocker_bucket_index ?? args.baseDecision.contract.main_buckets[0]?.bucket_index ?? null,
7536
- note: shouldZoomFirst ? `Provider follow-up failed (${args.reason}). Use one deeper sift pass on the same cached output before reading raw traceback lines.` : `Provider follow-up failed (${args.reason}). Read raw traceback only if exact stack lines are still needed.`
7853
+ note: shouldZoomFirst ? `${sanitizedReason[0].toUpperCase()}${sanitizedReason.slice(
7854
+ 1
7855
+ )}. Use one deeper sift pass on the same cached output before reading raw traceback lines.` : `${sanitizedReason[0].toUpperCase()}${sanitizedReason.slice(
7856
+ 1
7857
+ )}. Read raw traceback only if exact stack lines are still needed.`
7537
7858
  }
7538
7859
  }
7539
7860
  });
@@ -7554,23 +7875,28 @@ async function runSiftCore(request, recorder) {
7554
7875
  const provider = createProvider(request.config);
7555
7876
  const hasTestStatusSignal = request.policyName === "test-status" && hasRecognizableTestStatusSignal(heuristicInput);
7556
7877
  const testStatusAnalysis = hasTestStatusSignal ? analyzeTestStatus(heuristicInput) : null;
7557
- const testStatusDecision = hasTestStatusSignal && testStatusAnalysis ? buildTestStatusDiagnoseContract({
7878
+ const useCompactTestStatusOutput = hasTestStatusSignal && testStatusAnalysis ? shouldUseCompactTestStatusBypass({
7879
+ request,
7880
+ analysis: testStatusAnalysis
7881
+ }) : false;
7882
+ const testStatusDecision = hasTestStatusSignal && testStatusAnalysis && !useCompactTestStatusOutput ? buildTestStatusDiagnoseContract({
7558
7883
  input: heuristicInput,
7559
7884
  analysis: testStatusAnalysis,
7560
7885
  resolvedTests: request.testStatusContext?.resolvedTests,
7561
- remainingTests: request.testStatusContext?.remainingTests
7886
+ remainingTests: request.testStatusContext?.remainingTests,
7887
+ remainingMode: request.testStatusContext?.remainingMode
7562
7888
  }) : null;
7563
7889
  const testStatusHeuristicOutput = testStatusDecision ? renderTestStatusDecisionOutput({
7564
7890
  request,
7565
7891
  decision: testStatusDecision
7566
- }) : null;
7892
+ }) : useCompactTestStatusOutput ? applyHeuristicPolicy("test-status", heuristicInput, "standard") : null;
7567
7893
  if (request.config.runtime.verbose) {
7568
7894
  process.stderr.write(
7569
7895
  `${pc2.dim("sift")} provider=${provider.name} model=${request.config.provider.model} base_url=${request.config.provider.baseUrl} input_chars=${prepared.meta.finalLength}
7570
7896
  `
7571
7897
  );
7572
7898
  }
7573
- const heuristicOutput = request.policyName === "test-status" ? testStatusDecision?.contract.diagnosis_complete ? testStatusHeuristicOutput : null : applyHeuristicPolicy(request.policyName, heuristicInput, request.detail);
7899
+ const heuristicOutput = request.policyName === "test-status" ? useCompactTestStatusOutput ? testStatusHeuristicOutput : testStatusDecision?.contract.diagnosis_complete ? testStatusHeuristicOutput : null : applyHeuristicPolicy(request.policyName, heuristicInput, request.detail);
7574
7900
  if (heuristicOutput) {
7575
7901
  if (request.config.runtime.verbose) {
7576
7902
  process.stderr.write(`${pc2.dim("sift")} heuristic=${request.policyName}
@@ -7694,6 +8020,7 @@ async function runSiftCore(request, recorder) {
7694
8020
  analysis: testStatusAnalysis,
7695
8021
  resolvedTests: request.testStatusContext?.resolvedTests,
7696
8022
  remainingTests: request.testStatusContext?.remainingTests,
8023
+ remainingMode: request.testStatusContext?.remainingMode,
7697
8024
  providerBucketSupplements: supplement.bucket_supplements,
7698
8025
  contractOverrides: {
7699
8026
  diagnosis_complete: supplement.diagnosis_complete,
@@ -7939,6 +8266,7 @@ var failureBucketTypeSchema = z3.enum([
7939
8266
  "import_dependency_failure",
7940
8267
  "collection_failure",
7941
8268
  "assertion_failure",
8269
+ "golden_output_drift",
7942
8270
  "runtime_failure",
7943
8271
  "interrupted_run",
7944
8272
  "no_tests_collected",
@@ -7979,7 +8307,19 @@ var cachedPytestStateSchema = z3.object({
7979
8307
  failingNodeIds: z3.array(z3.string()),
7980
8308
  remainingNodeIds: z3.array(z3.string()).optional()
7981
8309
  }).optional();
7982
- var cachedRunSchema = z3.object({
8310
+ var testRunnerSchema = z3.enum(["pytest", "vitest", "jest", "unknown"]);
8311
+ var cachedRunnerSubsetSchema = z3.object({
8312
+ available: z3.boolean(),
8313
+ strategy: z3.enum(["pytest-node-ids", "none"]),
8314
+ baseArgv: z3.array(z3.string()).min(1).optional()
8315
+ });
8316
+ var cachedRunnerStateSchema = z3.object({
8317
+ name: testRunnerSchema,
8318
+ failingTargets: z3.array(z3.string()),
8319
+ baselineCommand: cachedCommandSchema,
8320
+ subset: cachedRunnerSubsetSchema
8321
+ });
8322
+ var cachedRunV1Schema = z3.object({
7983
8323
  version: z3.literal(1),
7984
8324
  timestamp: z3.string(),
7985
8325
  presetName: z3.literal("test-status"),
@@ -7997,6 +8337,25 @@ var cachedRunSchema = z3.object({
7997
8337
  analysis: cachedAnalysisSchema,
7998
8338
  pytest: cachedPytestStateSchema
7999
8339
  });
8340
+ var cachedRunV2Schema = z3.object({
8341
+ version: z3.literal(2),
8342
+ timestamp: z3.string(),
8343
+ presetName: z3.literal("test-status"),
8344
+ cwd: z3.string(),
8345
+ commandKey: z3.string(),
8346
+ commandPreview: z3.string(),
8347
+ command: cachedCommandSchema,
8348
+ detail: detailSchema,
8349
+ exitCode: z3.number().int(),
8350
+ rawOutput: z3.string(),
8351
+ capture: z3.object({
8352
+ originalChars: countSchema,
8353
+ truncatedApplied: z3.boolean()
8354
+ }),
8355
+ analysis: cachedAnalysisSchema,
8356
+ runner: cachedRunnerStateSchema
8357
+ });
8358
+ var cachedRunSchema = z3.discriminatedUnion("version", [cachedRunV1Schema, cachedRunV2Schema]);
8000
8359
  var MissingCachedTestStatusRunError = class extends Error {
8001
8360
  constructor() {
8002
8361
  super(
@@ -8045,6 +8404,37 @@ function isPytestExecutable(value) {
8045
8404
  function isPythonExecutable(value) {
8046
8405
  return basenameMatches(value, /^python(?:\d+(?:\.\d+)*)?(?:\.exe)?$/i);
8047
8406
  }
8407
+ function detectRunnerFromCommand(command) {
8408
+ if (!command) {
8409
+ return "unknown";
8410
+ }
8411
+ if (command.mode === "argv") {
8412
+ const [first, second, third] = command.argv;
8413
+ if (first && isPytestExecutable(first)) {
8414
+ return "pytest";
8415
+ }
8416
+ if (first && isPythonExecutable(first) && second === "-m" && third === "pytest") {
8417
+ return "pytest";
8418
+ }
8419
+ if (first && basenameMatches(first, /^vitest(?:\.exe)?$/i)) {
8420
+ return "vitest";
8421
+ }
8422
+ if (first && basenameMatches(first, /^jest(?:\.exe)?$/i)) {
8423
+ return "jest";
8424
+ }
8425
+ return "unknown";
8426
+ }
8427
+ if (/\bpython(?:\d+(?:\.\d+)*)?\s+-m\s+pytest\b|\bpytest\b/i.test(command.shellCommand)) {
8428
+ return "pytest";
8429
+ }
8430
+ if (/\bvitest\b/i.test(command.shellCommand)) {
8431
+ return "vitest";
8432
+ }
8433
+ if (/\bjest\b/i.test(command.shellCommand)) {
8434
+ return "jest";
8435
+ }
8436
+ return "unknown";
8437
+ }
8048
8438
  var shortPytestOptionsWithValue = /* @__PURE__ */ new Set([
8049
8439
  "-c",
8050
8440
  "-k",
@@ -8139,26 +8529,52 @@ function buildCachedCommand(args) {
8139
8529
  }
8140
8530
  return void 0;
8141
8531
  }
8142
- function buildFailingNodeIds(analysis) {
8532
+ function buildFailingTargets(analysis) {
8533
+ const runner = analysis.runner;
8143
8534
  const values = [];
8144
8535
  for (const value of [...analysis.visibleErrorLabels, ...analysis.visibleFailedLabels]) {
8145
- if (value.length > 0 && !values.includes(value)) {
8146
- values.push(value);
8536
+ const normalized = normalizeFailingTarget(value, runner);
8537
+ if (normalized.length > 0 && !values.includes(normalized)) {
8538
+ values.push(normalized);
8147
8539
  }
8148
8540
  }
8149
8541
  return values;
8150
8542
  }
8151
- function buildCachedPytestState(args) {
8543
+ function buildCachedRunnerState(args) {
8152
8544
  const baseArgv = args.command?.mode === "argv" && isSubsetCapablePytestArgv(args.command.argv) ? [...args.command.argv] : void 0;
8545
+ const runnerName = args.analysis.runner !== "unknown" ? args.analysis.runner : detectRunnerFromCommand(args.command);
8153
8546
  return {
8154
- subsetCapable: Boolean(baseArgv),
8155
- baseArgv,
8156
- failingNodeIds: buildFailingNodeIds(args.analysis),
8157
- remainingNodeIds: args.remainingNodeIds
8547
+ name: runnerName,
8548
+ failingTargets: buildFailingTargets(args.analysis),
8549
+ baselineCommand: args.command,
8550
+ subset: {
8551
+ available: runnerName === "pytest" && Boolean(baseArgv),
8552
+ strategy: runnerName === "pytest" && baseArgv ? "pytest-node-ids" : "none",
8553
+ ...runnerName === "pytest" && baseArgv ? { baseArgv } : {}
8554
+ }
8158
8555
  };
8159
8556
  }
8557
+ function normalizeCwd(value) {
8558
+ return path7.resolve(value).replace(/\\/g, "/");
8559
+ }
8560
+ function buildTestStatusBaselineIdentity(args) {
8561
+ const cwd = normalizeCwd(args.cwd);
8562
+ const command = args.command ?? buildCachedCommand({
8563
+ shellCommand: args.shellCommand,
8564
+ command: args.shellCommand ? void 0 : args.commandPreview?.split(" ")
8565
+ });
8566
+ const mode = command?.mode ?? (args.shellCommand ? "shell" : "argv");
8567
+ const normalizedCommand = command?.mode === "argv" ? command.argv.join("") : command?.mode === "shell" ? command.shellCommand.trim().replace(/\s+/g, " ") : (args.commandPreview ?? "").trim().replace(/\s+/g, " ");
8568
+ return [cwd, args.runner, mode, normalizedCommand].join("");
8569
+ }
8160
8570
  function buildTestStatusCommandKey(args) {
8161
- return `${args.shellCommand ? "shell" : "argv"}:${args.commandPreview}`;
8571
+ return buildTestStatusBaselineIdentity({
8572
+ cwd: args.cwd ?? process.cwd(),
8573
+ runner: args.runner ?? "unknown",
8574
+ command: args.command,
8575
+ commandPreview: args.commandPreview,
8576
+ shellCommand: args.shellCommand
8577
+ });
8162
8578
  }
8163
8579
  function snapshotTestStatusAnalysis(analysis) {
8164
8580
  return {
@@ -8184,13 +8600,22 @@ function createCachedTestStatusRun(args) {
8184
8600
  command: args.command,
8185
8601
  shellCommand: args.shellCommand
8186
8602
  });
8603
+ const runnerName = args.analysis.runner !== "unknown" ? args.analysis.runner : detectRunnerFromCommand(command);
8604
+ const commandPreview = args.commandPreview ?? args.shellCommand ?? (args.command ?? []).join(" ");
8605
+ const commandKey = args.commandKey ?? buildTestStatusBaselineIdentity({
8606
+ cwd: args.cwd,
8607
+ runner: runnerName,
8608
+ command,
8609
+ commandPreview,
8610
+ shellCommand: args.shellCommand
8611
+ });
8187
8612
  return {
8188
- version: 1,
8613
+ version: 2,
8189
8614
  timestamp: args.timestamp ?? (/* @__PURE__ */ new Date()).toISOString(),
8190
8615
  presetName: "test-status",
8191
8616
  cwd: args.cwd,
8192
- commandKey: args.commandKey,
8193
- commandPreview: args.commandPreview,
8617
+ commandKey,
8618
+ commandPreview,
8194
8619
  command,
8195
8620
  detail: args.detail,
8196
8621
  exitCode: args.exitCode,
@@ -8200,13 +8625,61 @@ function createCachedTestStatusRun(args) {
8200
8625
  truncatedApplied: args.truncatedApplied
8201
8626
  },
8202
8627
  analysis: snapshotTestStatusAnalysis(args.analysis),
8203
- pytest: buildCachedPytestState({
8628
+ runner: buildCachedRunnerState({
8204
8629
  command,
8205
- analysis: args.analysis,
8206
- remainingNodeIds: args.remainingNodeIds
8630
+ analysis: args.analysis
8207
8631
  })
8208
8632
  };
8209
8633
  }
8634
+ function migrateCachedTestStatusRun(state) {
8635
+ if (state.version === 2) {
8636
+ return state;
8637
+ }
8638
+ const runnerFromOutput = detectTestRunner(state.rawOutput);
8639
+ const runner = runnerFromOutput !== "unknown" ? runnerFromOutput : detectRunnerFromCommand(state.command);
8640
+ const storedCommand = state.command;
8641
+ const fallbackBaseArgv = !storedCommand && state.pytest?.baseArgv ? {
8642
+ mode: "argv",
8643
+ argv: [...state.pytest.baseArgv]
8644
+ } : void 0;
8645
+ const baselineCommand = storedCommand ?? fallbackBaseArgv;
8646
+ const commandPreview = state.commandPreview ?? (baselineCommand?.mode === "argv" ? baselineCommand.argv.join(" ") : baselineCommand?.mode === "shell" ? baselineCommand.shellCommand : "");
8647
+ const commandKey = buildTestStatusBaselineIdentity({
8648
+ cwd: state.cwd,
8649
+ runner,
8650
+ command: baselineCommand,
8651
+ commandPreview
8652
+ });
8653
+ return {
8654
+ version: 2,
8655
+ timestamp: state.timestamp,
8656
+ presetName: state.presetName,
8657
+ cwd: state.cwd,
8658
+ commandKey,
8659
+ commandPreview,
8660
+ command: state.command,
8661
+ detail: state.detail,
8662
+ exitCode: state.exitCode,
8663
+ rawOutput: state.rawOutput,
8664
+ capture: state.capture,
8665
+ analysis: state.analysis,
8666
+ runner: {
8667
+ name: runner,
8668
+ failingTargets: [...new Set((state.pytest?.failingNodeIds ?? []).map(
8669
+ (target) => normalizeFailingTarget(target, runner)
8670
+ ))],
8671
+ baselineCommand,
8672
+ subset: {
8673
+ available: runner === "pytest" && Boolean(state.pytest?.baseArgv),
8674
+ strategy: runner === "pytest" && state.pytest?.baseArgv ? "pytest-node-ids" : "none",
8675
+ ...runner === "pytest" && state.pytest?.baseArgv ? {
8676
+ baseArgv: [...state.pytest.baseArgv]
8677
+ } : {}
8678
+ }
8679
+ },
8680
+ ...fallbackBaseArgv ? { runnerMigrationFallbackUsed: true } : {}
8681
+ };
8682
+ }
8210
8683
  function readCachedTestStatusRun(statePath = getDefaultTestStatusStatePath()) {
8211
8684
  let raw = "";
8212
8685
  try {
@@ -8218,7 +8691,7 @@ function readCachedTestStatusRun(statePath = getDefaultTestStatusStatePath()) {
8218
8691
  throw new InvalidCachedTestStatusRunError();
8219
8692
  }
8220
8693
  try {
8221
- return cachedRunSchema.parse(JSON.parse(raw));
8694
+ return migrateCachedTestStatusRun(cachedRunSchema.parse(JSON.parse(raw)));
8222
8695
  } catch {
8223
8696
  throw new InvalidCachedTestStatusRunError();
8224
8697
  }
@@ -8247,7 +8720,7 @@ function getNextEscalationDetail(detail) {
8247
8720
  return null;
8248
8721
  }
8249
8722
  function buildTargetDelta(args) {
8250
- if (args.previous.presetName !== "test-status" || args.current.presetName !== "test-status" || args.previous.cwd !== args.current.cwd || args.previous.commandKey !== args.current.commandKey) {
8723
+ if (args.previous.presetName !== "test-status" || args.current.presetName !== "test-status" || args.previous.cwd !== args.current.cwd || args.previous.commandKey !== args.current.commandKey || args.previous.runner.name !== args.current.runner.name || args.previous.runner.name === "unknown") {
8251
8724
  return {
8252
8725
  comparable: false,
8253
8726
  resolved: [],
@@ -8255,16 +8728,8 @@ function buildTargetDelta(args) {
8255
8728
  introduced: []
8256
8729
  };
8257
8730
  }
8258
- if (!args.previous.pytest || !args.current.pytest) {
8259
- return {
8260
- comparable: false,
8261
- resolved: [],
8262
- remaining: [],
8263
- introduced: []
8264
- };
8265
- }
8266
- const previousTargets = args.previous.pytest.failingNodeIds;
8267
- const currentTargets = args.current.pytest.failingNodeIds;
8731
+ const previousTargets = args.previous.runner.failingTargets;
8732
+ const currentTargets = args.current.runner.failingTargets;
8268
8733
  const currentTargetSet = new Set(currentTargets);
8269
8734
  const previousTargetSet = new Set(previousTargets);
8270
8735
  return {
@@ -8277,8 +8742,11 @@ function buildTargetDelta(args) {
8277
8742
  function diffTestStatusTargets(args) {
8278
8743
  return buildTargetDelta(args);
8279
8744
  }
8745
+ function isRemainingSubsetAvailable(state) {
8746
+ return state.runner.name === "pytest" && state.runner.subset.available;
8747
+ }
8280
8748
  function getRemainingPytestNodeIds(state) {
8281
- return state.pytest?.remainingNodeIds ?? state.pytest?.failingNodeIds ?? [];
8749
+ return state.runner.name === "pytest" ? state.runner.failingTargets : [];
8282
8750
  }
8283
8751
  function diffTestStatusRuns(args) {
8284
8752
  const targetDelta = buildTargetDelta(args);
@@ -8289,21 +8757,45 @@ function diffTestStatusRuns(args) {
8289
8757
  args.current.analysis.buckets.map((bucket) => [buildBucketSignature(bucket), bucket])
8290
8758
  );
8291
8759
  const lines = [];
8292
- if (targetDelta.resolved.length > 0) {
8293
- lines.push(
8294
- `- Resolved: ${formatCount3(targetDelta.resolved.length, "failing test/module", "failing tests/modules")} no longer appear${appendPreview(targetDelta.resolved)}.`
8295
- );
8296
- }
8297
- if (targetDelta.remaining.length > 0) {
8298
- lines.push(
8299
- `- Remaining: ${formatCount3(targetDelta.remaining.length, "failing test/module", "failing tests/modules")} still appear${appendPreview(targetDelta.remaining)}.`
8300
- );
8301
- }
8302
- if (targetDelta.introduced.length > 0) {
8760
+ const resolvedSummary = buildTestTargetSummary(targetDelta.resolved);
8761
+ const remainingSummary = buildTestTargetSummary(targetDelta.remaining);
8762
+ const introducedSummary = buildTestTargetSummary(targetDelta.introduced);
8763
+ const pushTargetLine = (args2) => {
8764
+ if (args2.summary.count === 0) {
8765
+ return;
8766
+ }
8767
+ const summaryText = describeTargetSummary(args2.summary);
8768
+ if (summaryText) {
8769
+ lines.push(
8770
+ `- ${args2.kind}: ${formatCount3(args2.summary.count, args2.countLabel, `${args2.countLabel}s`)} ${args2.verb} ${summaryText}.`
8771
+ );
8772
+ return;
8773
+ }
8303
8774
  lines.push(
8304
- `- New: ${formatCount3(targetDelta.introduced.length, "failing test/module", "failing tests/modules")} appeared${appendPreview(targetDelta.introduced)}.`
8775
+ `- ${args2.kind}: ${formatCount3(args2.summary.count, args2.countLabel, `${args2.countLabel}s`)} ${args2.verb}${appendPreview(args2.fallbackValues)}.`
8305
8776
  );
8306
- }
8777
+ };
8778
+ pushTargetLine({
8779
+ kind: "Resolved",
8780
+ summary: resolvedSummary,
8781
+ countLabel: "failing target",
8782
+ fallbackValues: targetDelta.resolved,
8783
+ verb: "no longer appear"
8784
+ });
8785
+ pushTargetLine({
8786
+ kind: "Remaining",
8787
+ summary: remainingSummary,
8788
+ countLabel: "failing target",
8789
+ fallbackValues: targetDelta.remaining,
8790
+ verb: "still appear"
8791
+ });
8792
+ pushTargetLine({
8793
+ kind: "New",
8794
+ summary: introducedSummary,
8795
+ countLabel: "failing target",
8796
+ fallbackValues: targetDelta.introduced,
8797
+ verb: "appeared"
8798
+ });
8307
8799
  for (const bucket of args.current.analysis.buckets) {
8308
8800
  const signature = buildBucketSignature(bucket);
8309
8801
  const previous = previousBuckets.get(signature);
@@ -8331,19 +8823,19 @@ function diffTestStatusRuns(args) {
8331
8823
  }
8332
8824
  }
8333
8825
  return {
8334
- lines: lines.slice(0, 4),
8335
- remainingNodeIds: targetDelta.comparable ? targetDelta.remaining : void 0
8826
+ lines: lines.slice(0, 4)
8336
8827
  };
8337
8828
  }
8338
8829
  function getCachedRerunCommand(state) {
8339
- if (state.command?.mode === "argv") {
8830
+ const baselineCommand = state.runner.baselineCommand ?? state.command;
8831
+ if (baselineCommand?.mode === "argv") {
8340
8832
  return {
8341
- command: [...state.command.argv]
8833
+ command: [...baselineCommand.argv]
8342
8834
  };
8343
8835
  }
8344
- if (state.command?.mode === "shell") {
8836
+ if (baselineCommand?.mode === "shell") {
8345
8837
  return {
8346
- shellCommand: state.command.shellCommand
8838
+ shellCommand: baselineCommand.shellCommand
8347
8839
  };
8348
8840
  }
8349
8841
  throw new Error(
@@ -8351,13 +8843,13 @@ function getCachedRerunCommand(state) {
8351
8843
  );
8352
8844
  }
8353
8845
  function getRemainingPytestRerunCommand(state) {
8354
- if (!state.pytest?.subsetCapable || !state.pytest.baseArgv) {
8846
+ if (!isRemainingSubsetAvailable(state) || !state.runner.subset.baseArgv) {
8355
8847
  throw new Error(
8356
8848
  "Cached test-status run cannot use `sift rerun --remaining`. Automatic remaining-subset reruns currently support only argv-mode `pytest ...` or `python -m pytest ...` commands. Run a narrowed command manually with `sift exec --preset test-status -- <narrowed pytest command>`."
8357
8849
  );
8358
8850
  }
8359
8851
  const remainingNodeIds = getRemainingPytestNodeIds(state);
8360
- return [...state.pytest.baseArgv, ...remainingNodeIds];
8852
+ return [...state.runner.subset.baseArgv, ...remainingNodeIds];
8361
8853
  }
8362
8854
 
8363
8855
  // src/core/escalate.ts
@@ -8405,7 +8897,8 @@ async function runEscalate(request) {
8405
8897
  outputContract: request.outputContract,
8406
8898
  fallbackJson: request.fallbackJson,
8407
8899
  testStatusContext: {
8408
- remainingSubsetAvailable: Boolean(state.pytest?.subsetCapable) && (state.pytest?.failingNodeIds.length ?? 0) > 0
8900
+ remainingSubsetAvailable: isRemainingSubsetAvailable(state) && state.runner.failingTargets.length > 0,
8901
+ remainingMode: "none"
8409
8902
  }
8410
8903
  });
8411
8904
  let output = result.output;
@@ -8619,8 +9112,9 @@ async function runTestStatusWatch(request, cycles) {
8619
9112
  testStatusContext: {
8620
9113
  ...request.testStatusContext,
8621
9114
  resolvedTests: targetDelta?.resolved ?? request.testStatusContext?.resolvedTests,
8622
- remainingTests: targetDelta?.remaining ?? currentRun.pytest?.failingNodeIds ?? request.testStatusContext?.remainingTests,
8623
- remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable ?? (Boolean(currentRun.pytest?.subsetCapable) && (currentRun.pytest?.failingNodeIds.length ?? 0) > 0)
9115
+ remainingTests: targetDelta?.remaining ?? currentRun.runner.failingTargets ?? request.testStatusContext?.remainingTests,
9116
+ remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable ?? (isRemainingSubsetAvailable(currentRun) && currentRun.runner.failingTargets.length > 0),
9117
+ remainingMode: request.testStatusContext?.remainingMode ?? "none"
8624
9118
  }
8625
9119
  });
8626
9120
  if (request.goal === "diagnose" && request.format === "json") {
@@ -8767,8 +9261,10 @@ async function runExec(request) {
8767
9261
  const shellPath = process.env.SHELL || "/bin/bash";
8768
9262
  const commandPreview = buildCommandPreview(request);
8769
9263
  const commandCwd = request.cwd ?? process.cwd();
8770
- const shouldCacheTestStatusBase = request.presetName === "test-status" && !request.skipCacheWrite;
8771
- const previousCachedRun = shouldCacheTestStatusBase ? tryReadCachedTestStatusRun() : null;
9264
+ const isTestStatusPreset = request.presetName === "test-status";
9265
+ const readCachedBaseline = isTestStatusPreset && (request.readCachedBaseline ?? true);
9266
+ const writeCachedBaselineRequested = isTestStatusPreset && (request.writeCachedBaseline ?? (request.skipCacheWrite ? false : true));
9267
+ const previousCachedRun = readCachedBaseline ? tryReadCachedTestStatusRun() : null;
8772
9268
  if (request.config.runtime.verbose) {
8773
9269
  process.stderr.write(
8774
9270
  `${pc5.dim("sift")} exec mode=${hasShellCommand ? "shell" : "argv"} command=${commandPreview}
@@ -8787,6 +9283,10 @@ async function runExec(request) {
8787
9283
  cwd: commandCwd,
8788
9284
  stdio: ["inherit", "pipe", "pipe"]
8789
9285
  });
9286
+ const stopChildPendingNotice = startPendingNotice(
9287
+ "sift waiting for child command...",
9288
+ Boolean(process.stderr.isTTY) && !request.quiet
9289
+ );
8790
9290
  const handleChunk = (chunk) => {
8791
9291
  const text = Buffer.isBuffer(chunk) ? chunk.toString("utf8") : String(chunk);
8792
9292
  if (bypassed) {
@@ -8807,26 +9307,31 @@ async function runExec(request) {
8807
9307
  };
8808
9308
  child.stdout.on("data", handleChunk);
8809
9309
  child.stderr.on("data", handleChunk);
8810
- await new Promise((resolve, reject) => {
8811
- child.on("error", (error) => {
8812
- reject(error);
8813
- });
8814
- child.on("close", (status, signal) => {
8815
- childStatus = status;
8816
- childSignal = signal;
8817
- resolve();
9310
+ try {
9311
+ await new Promise((resolve, reject) => {
9312
+ child.on("error", (error) => {
9313
+ reject(error);
9314
+ });
9315
+ child.on("close", (status, signal) => {
9316
+ childStatus = status;
9317
+ childSignal = signal;
9318
+ resolve();
9319
+ });
8818
9320
  });
8819
- }).catch((error) => {
9321
+ } catch (error) {
8820
9322
  if (error instanceof Error) {
8821
9323
  throw error;
8822
9324
  }
8823
9325
  throw new Error("Failed to start child process.");
8824
- });
9326
+ } finally {
9327
+ stopChildPendingNotice();
9328
+ }
8825
9329
  const exitCode = normalizeChildExitCode(childStatus, childSignal);
8826
9330
  const capturedOutput = capture.render();
8827
9331
  const autoWatchDetected = !request.watch && looksLikeWatchStream(capturedOutput);
8828
9332
  const useWatchFlow = Boolean(request.watch) || autoWatchDetected;
8829
- const shouldCacheTestStatus = shouldCacheTestStatusBase && !useWatchFlow;
9333
+ const shouldBuildTestStatusState = isTestStatusPreset && !useWatchFlow;
9334
+ const shouldWriteCachedBaseline = writeCachedBaselineRequested && !useWatchFlow;
8830
9335
  if (request.config.runtime.verbose) {
8831
9336
  process.stderr.write(
8832
9337
  `${pc5.dim("sift")} child_exit=${exitCode} captured_chars=${capture.getTotalChars()} capture_truncated=${capture.wasTruncated()}
@@ -8889,10 +9394,19 @@ async function runExec(request) {
8889
9394
  `);
8890
9395
  return exitCode;
8891
9396
  }
8892
- const analysis = shouldCacheTestStatus ? analyzeTestStatus(capturedOutput) : null;
8893
- let currentCachedRun = shouldCacheTestStatus && analysis ? createCachedTestStatusRun({
9397
+ const analysis = shouldBuildTestStatusState ? analyzeTestStatus(capturedOutput) : null;
9398
+ let currentCachedRun = shouldBuildTestStatusState && analysis ? createCachedTestStatusRun({
8894
9399
  cwd: commandCwd,
8895
9400
  commandKey: buildTestStatusCommandKey({
9401
+ cwd: commandCwd,
9402
+ runner: analysis.runner,
9403
+ command: Array.isArray(request.command) && request.command.length > 0 ? {
9404
+ mode: "argv",
9405
+ argv: [...request.command]
9406
+ } : request.shellCommand ? {
9407
+ mode: "shell",
9408
+ shellCommand: request.shellCommand
9409
+ } : void 0,
8896
9410
  commandPreview,
8897
9411
  shellCommand: request.shellCommand
8898
9412
  }),
@@ -8906,31 +9420,32 @@ async function runExec(request) {
8906
9420
  truncatedApplied: capture.wasTruncated(),
8907
9421
  analysis
8908
9422
  }) : null;
8909
- const targetDelta = request.diff && !request.dryRun && previousCachedRun && currentCachedRun ? diffTestStatusTargets({
9423
+ const targetDelta = (request.diff || request.testStatusContext?.remainingMode === "subset_rerun" || request.testStatusContext?.remainingMode === "full_rerun_diff") && !request.dryRun && previousCachedRun && currentCachedRun ? diffTestStatusTargets({
8910
9424
  previous: previousCachedRun,
8911
9425
  current: currentCachedRun
8912
9426
  }) : null;
8913
9427
  const result = await runSiftWithStats({
8914
9428
  ...request,
8915
9429
  stdin: capturedOutput,
8916
- analysisContext: request.skipCacheWrite && request.presetName === "test-status" ? [
9430
+ analysisContext: request.testStatusContext?.remainingMode && request.testStatusContext.remainingMode !== "none" && request.presetName === "test-status" ? [
8917
9431
  request.analysisContext,
8918
9432
  "Zoom context:",
8919
9433
  "- This pass is remaining-only.",
8920
9434
  "- The full-suite truth already exists from the cached full run.",
8921
9435
  "- Do not reintroduce resolved tests into the diagnosis."
8922
9436
  ].filter((value) => Boolean(value)).join("\n") : request.analysisContext,
8923
- testStatusContext: shouldCacheTestStatus && analysis ? {
9437
+ testStatusContext: shouldBuildTestStatusState && analysis ? {
8924
9438
  ...request.testStatusContext,
8925
9439
  resolvedTests: targetDelta?.resolved ?? request.testStatusContext?.resolvedTests,
8926
- remainingTests: targetDelta?.remaining ?? currentCachedRun?.pytest?.failingNodeIds ?? request.testStatusContext?.remainingTests,
9440
+ remainingTests: targetDelta?.remaining ?? currentCachedRun?.runner.failingTargets ?? request.testStatusContext?.remainingTests,
8927
9441
  remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable ?? Boolean(
8928
- currentCachedRun?.pytest?.subsetCapable && (targetDelta?.remaining ?? currentCachedRun?.pytest?.failingNodeIds ?? []).length > 0
8929
- )
9442
+ currentCachedRun && isRemainingSubsetAvailable(currentCachedRun) && (targetDelta?.remaining ?? currentCachedRun?.runner.failingTargets ?? []).length > 0
9443
+ ),
9444
+ remainingMode: request.testStatusContext?.remainingMode ?? "none"
8930
9445
  } : request.testStatusContext
8931
9446
  });
8932
9447
  let output = result.output;
8933
- if (shouldCacheTestStatus) {
9448
+ if (shouldBuildTestStatusState) {
8934
9449
  if (isInsufficientSignalOutput(output)) {
8935
9450
  output = buildInsufficientSignalOutput({
8936
9451
  presetName: request.presetName,
@@ -8945,26 +9460,12 @@ async function runExec(request) {
8945
9460
  previous: previousCachedRun,
8946
9461
  current: currentCachedRun
8947
9462
  });
8948
- currentCachedRun = createCachedTestStatusRun({
8949
- cwd: commandCwd,
8950
- commandKey: currentCachedRun.commandKey,
8951
- commandPreview,
8952
- command: request.command,
8953
- shellCommand: request.shellCommand,
8954
- detail: request.detail ?? "standard",
8955
- exitCode,
8956
- rawOutput: capturedOutput,
8957
- originalChars: capture.getTotalChars(),
8958
- truncatedApplied: capture.wasTruncated(),
8959
- analysis,
8960
- remainingNodeIds: delta.remainingNodeIds
8961
- });
8962
9463
  if (delta.lines.length > 0) {
8963
9464
  output = `${delta.lines.join("\n")}
8964
9465
  ${output}`;
8965
9466
  }
8966
9467
  }
8967
- if (currentCachedRun) {
9468
+ if (currentCachedRun && shouldWriteCachedBaseline) {
8968
9469
  try {
8969
9470
  writeCachedTestStatusRun(currentCachedRun);
8970
9471
  } catch (error) {
@@ -9011,25 +9512,60 @@ async function runRerun(request) {
9011
9512
  diff: true,
9012
9513
  presetName: "test-status",
9013
9514
  detail: "standard",
9014
- showRaw: false
9515
+ showRaw: false,
9516
+ readCachedBaseline: true,
9517
+ writeCachedBaseline: true,
9518
+ testStatusContext: {
9519
+ ...request.testStatusContext,
9520
+ remainingMode: "none"
9521
+ }
9015
9522
  });
9016
9523
  }
9017
- const remainingNodeIds = getRemainingPytestNodeIds(state);
9018
- if (remainingNodeIds.length === 0) {
9019
- process.stdout.write("No remaining failing pytest targets.\n");
9020
- return 0;
9524
+ if (state.runner.name === "pytest") {
9525
+ const remainingNodeIds = getRemainingPytestNodeIds(state);
9526
+ if (remainingNodeIds.length === 0) {
9527
+ process.stdout.write("No remaining failing pytest targets.\n");
9528
+ return 0;
9529
+ }
9530
+ return runExec({
9531
+ ...request,
9532
+ command: getRemainingPytestRerunCommand(state),
9533
+ cwd: state.cwd,
9534
+ diff: false,
9535
+ presetName: "test-status",
9536
+ readCachedBaseline: true,
9537
+ writeCachedBaseline: false,
9538
+ testStatusContext: {
9539
+ ...request.testStatusContext,
9540
+ remainingSubsetAvailable: isRemainingSubsetAvailable(state),
9541
+ remainingMode: "subset_rerun"
9542
+ }
9543
+ });
9021
9544
  }
9022
- return runExec({
9023
- ...request,
9024
- command: getRemainingPytestRerunCommand(state),
9025
- cwd: state.cwd,
9026
- diff: false,
9027
- presetName: "test-status",
9028
- skipCacheWrite: true,
9029
- testStatusContext: {
9030
- remainingSubsetAvailable: true
9545
+ if (state.runner.name === "vitest" || state.runner.name === "jest") {
9546
+ if (!state.runner.baselineCommand || state.runnerMigrationFallbackUsed) {
9547
+ throw new Error(
9548
+ "Cached test-status run cannot use `sift rerun --remaining` yet because the original full command is unavailable from cache. Refresh the baseline with `sift exec --preset test-status -- <test command>` and retry."
9549
+ );
9031
9550
  }
9032
- });
9551
+ return runExec({
9552
+ ...request,
9553
+ ...getCachedRerunCommand(state),
9554
+ cwd: state.cwd,
9555
+ diff: false,
9556
+ presetName: "test-status",
9557
+ readCachedBaseline: true,
9558
+ writeCachedBaseline: false,
9559
+ testStatusContext: {
9560
+ ...request.testStatusContext,
9561
+ remainingSubsetAvailable: false,
9562
+ remainingMode: "full_rerun_diff"
9563
+ }
9564
+ });
9565
+ }
9566
+ throw new Error(
9567
+ "Cached test-status run cannot use `sift rerun --remaining` for this runner. Refresh with `sift exec --preset test-status -- <test command>` or rerun a narrowed command manually."
9568
+ );
9033
9569
  }
9034
9570
 
9035
9571
  // src/core/stdin.ts
@@ -9542,8 +10078,14 @@ function createCliApp(args = {}) {
9542
10078
  });
9543
10079
  });
9544
10080
  applySharedOptions(
9545
- cli.command("rerun", "Rerun the cached test-status command or only the remaining pytest subset")
9546
- ).usage("rerun [options]").example("rerun").example("rerun --remaining").example("rerun --remaining --detail focused").example("rerun --remaining --detail verbose --show-raw").option("--remaining", "Rerun only the remaining failing pytest node IDs from the cached full run").action(async (options) => {
10081
+ cli.command(
10082
+ "rerun",
10083
+ "Rerun the cached test-status command or focus on what still fails from the cached baseline"
10084
+ )
10085
+ ).usage("rerun [options]").example("rerun").example("rerun --remaining").example("rerun --remaining --detail focused").example("rerun --remaining --detail verbose --show-raw").option(
10086
+ "--remaining",
10087
+ "Focus on what still fails from the cached baseline; narrows automatically for pytest and diffs a full rerun for vitest/jest"
10088
+ ).action(async (options) => {
9547
10089
  const remaining = Boolean(options.remaining);
9548
10090
  if (!remaining && Boolean(options.showRaw)) {
9549
10091
  throw new Error("--show-raw is supported only with `sift rerun --remaining`.");