@bilalimamoglu/sift 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  // src/core/exec.ts
2
2
  import { spawn } from "child_process";
3
3
  import { constants as osConstants } from "os";
4
- import pc2 from "picocolors";
4
+ import pc3 from "picocolors";
5
5
 
6
6
  // src/constants.ts
7
7
  import os from "os";
@@ -61,7 +61,125 @@ function evaluateGate(args) {
61
61
 
62
62
  // src/core/testStatusDecision.ts
63
63
  import { z } from "zod";
64
- var TEST_STATUS_DIAGNOSE_JSON_CONTRACT = '{"status":"ok|insufficient","diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","dominant_blocker_bucket_index":number|null,"provider_used":boolean,"provider_confidence":number|null,"provider_failed":boolean,"raw_slice_used":boolean,"raw_slice_strategy":"none|bucket_evidence|traceback_window|head_tail","resolved_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_subset_available":boolean,"main_buckets":[{"bucket_index":number,"label":string,"count":number,"root_cause":string,"evidence":string[],"bucket_confidence":number,"root_cause_confidence":number,"dominant":boolean,"secondary_visible_despite_blocker":boolean,"mini_diff":{"added_paths"?:number,"removed_models"?:number,"changed_task_mappings"?:number}|null}],"read_targets":[{"file":string,"line":number|null,"why":string,"bucket_index":number,"context_hint":{"start_line":number|null,"end_line":number|null,"search_hint":string|null}}],"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string},"resolved_tests"?:string[],"remaining_tests"?:string[]}';
64
+
65
+ // src/core/testStatusTargets.ts
66
+ function unique(values) {
67
+ return [...new Set(values)];
68
+ }
69
+ function normalizeTestId(value) {
70
+ return value.replace(/\\/g, "/").replace(/\s+/g, " ").trim();
71
+ }
72
+ function stripMatcherProse(value) {
73
+ return value.replace(/\s+-\s+.*$/, "").trim();
74
+ }
75
+ function extractJsFile(value) {
76
+ const match = value.match(/([A-Za-z0-9_./-]+\.(?:test|spec)\.[cm]?[jt]sx?)/i);
77
+ return match ? normalizeTestId(match[1]) : null;
78
+ }
79
+ function normalizeFailingTarget(label, runner) {
80
+ const normalized = normalizeTestId(label).replace(/^['"]|['"]$/g, "");
81
+ if (runner === "pytest") {
82
+ return stripMatcherProse(normalized);
83
+ }
84
+ if (runner === "vitest" || runner === "jest") {
85
+ const compact = normalized.replace(/^FAIL\s+/i, "").replace(/^[❯×]\s*/, "").replace(/\s+\[[^\]]+\]\s*$/, "").trim();
86
+ const file = extractJsFile(compact);
87
+ if (!file) {
88
+ return stripMatcherProse(compact);
89
+ }
90
+ const fileIndex = compact.indexOf(file);
91
+ const suffix = compact.slice(fileIndex + file.length).trim();
92
+ if (!suffix) {
93
+ return file;
94
+ }
95
+ if (suffix.startsWith(">")) {
96
+ const testName = stripMatcherProse(suffix.replace(/^>\s*/, ""));
97
+ return testName.length > 0 ? `${file} > ${testName}` : file;
98
+ }
99
+ return file;
100
+ }
101
+ return normalized;
102
+ }
103
+ function extractFamilyPrefix(value) {
104
+ const normalized = normalizeTestId(value);
105
+ const filePart = normalized.split("::")[0]?.split(" > ")[0]?.trim() ?? normalized;
106
+ const workflowMatch = filePart.match(/^(\.github\/workflows\/)/);
107
+ if (workflowMatch) {
108
+ return workflowMatch[1];
109
+ }
110
+ const testsMatch = filePart.match(/^((?:test|tests)\/[^/]+\/)/);
111
+ if (testsMatch) {
112
+ return testsMatch[1];
113
+ }
114
+ const srcMatch = filePart.match(/^(src\/[^/]+\/)/);
115
+ if (srcMatch) {
116
+ return srcMatch[1];
117
+ }
118
+ const configMatch = filePart.match(
119
+ /^((?:[^/]+\/)*(?:package\.json|pytest\.ini|pyproject\.toml|tox\.ini|conftest\.py|(?:vitest|jest)\.config\.[^/]+|tsconfig(?:\.[^/]+)?\.json|[^/]*config[^/]*\.(?:json|ya?ml)))$/i
120
+ );
121
+ if (configMatch) {
122
+ return configMatch[1];
123
+ }
124
+ const segments = filePart.replace(/^\/+/, "").split("/").filter(Boolean);
125
+ if (segments.length >= 2) {
126
+ return `${segments[0]}/${segments[1]}/`;
127
+ }
128
+ if (segments.length === 1) {
129
+ return segments[0];
130
+ }
131
+ return "other";
132
+ }
133
+ function buildTestTargetSummary(values) {
134
+ const uniqueValues = unique(values);
135
+ const counts = /* @__PURE__ */ new Map();
136
+ for (const value of uniqueValues) {
137
+ const prefix = extractFamilyPrefix(value);
138
+ counts.set(prefix, (counts.get(prefix) ?? 0) + 1);
139
+ }
140
+ const families = [...counts.entries()].map(([prefix, count]) => ({
141
+ prefix,
142
+ count
143
+ })).sort((left, right) => {
144
+ if (right.count !== left.count) {
145
+ return right.count - left.count;
146
+ }
147
+ return left.prefix.localeCompare(right.prefix);
148
+ }).slice(0, 5);
149
+ return {
150
+ count: uniqueValues.length,
151
+ families
152
+ };
153
+ }
154
+ function formatTargetSummary(summary) {
155
+ if (summary.count === 0) {
156
+ return "count=0";
157
+ }
158
+ const families = summary.families.length > 0 ? summary.families.map((family) => `${family.prefix}${family.count}`).join(", ") : "none";
159
+ return `count=${summary.count}; families=${families}`;
160
+ }
161
+ function joinFamilies(families) {
162
+ if (families.length === 0) {
163
+ return "";
164
+ }
165
+ if (families.length === 1) {
166
+ return families[0];
167
+ }
168
+ if (families.length === 2) {
169
+ return `${families[0]} and ${families[1]}`;
170
+ }
171
+ return `${families.slice(0, -1).join(", ")}, and ${families.at(-1)}`;
172
+ }
173
+ function describeTargetSummary(summary) {
174
+ if (summary.count === 0 || summary.families.length === 0) {
175
+ return null;
176
+ }
177
+ const families = summary.families.map((family) => `${family.prefix} (${family.count})`);
178
+ return `across ${joinFamilies(families)}`;
179
+ }
180
+
181
+ // src/core/testStatusDecision.ts
182
+ var TEST_STATUS_DIAGNOSE_JSON_CONTRACT = '{"status":"ok|insufficient","diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","remaining_mode":"none|subset_rerun|full_rerun_diff","primary_suspect_kind":"test|app_code|config|environment|tooling|unknown","confidence_reason":string,"dominant_blocker_bucket_index":number|null,"provider_used":boolean,"provider_confidence":number|null,"provider_failed":boolean,"raw_slice_used":boolean,"raw_slice_strategy":"none|bucket_evidence|traceback_window|head_tail","resolved_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_subset_available":boolean,"main_buckets":[{"bucket_index":number,"label":string,"count":number,"root_cause":string,"suspect_kind":"test|app_code|config|environment|tooling|unknown","fix_hint":string,"evidence":string[],"bucket_confidence":number,"root_cause_confidence":number,"dominant":boolean,"secondary_visible_despite_blocker":boolean,"mini_diff":{"added_paths"?:number,"removed_models"?:number,"changed_task_mappings"?:number}|null}],"read_targets":[{"file":string,"line":number|null,"why":string,"bucket_index":number,"context_hint":{"start_line":number|null,"end_line":number|null,"search_hint":string|null}}],"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string},"resolved_tests"?:string[],"remaining_tests"?:string[]}';
65
183
  var TEST_STATUS_PROVIDER_SUPPLEMENT_JSON_CONTRACT = '{"diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","provider_confidence":number|null,"bucket_supplements":[{"label":string,"count":number,"root_cause":string,"anchor":{"file":string|null,"line":number|null,"search_hint":string|null},"fix_hint":string|null,"confidence":number}],"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string}}';
66
184
  var nextBestActionSchema = z.object({
67
185
  code: z.enum([
@@ -103,6 +221,16 @@ var testStatusDiagnoseContractSchema = z.object({
103
221
  additional_source_read_likely_low_value: z.boolean(),
104
222
  read_raw_only_if: z.string().nullable(),
105
223
  decision: z.enum(["stop", "zoom", "read_source", "read_raw"]),
224
+ remaining_mode: z.enum(["none", "subset_rerun", "full_rerun_diff"]),
225
+ primary_suspect_kind: z.enum([
226
+ "test",
227
+ "app_code",
228
+ "config",
229
+ "environment",
230
+ "tooling",
231
+ "unknown"
232
+ ]),
233
+ confidence_reason: z.string().min(1),
106
234
  dominant_blocker_bucket_index: z.number().int().nullable(),
107
235
  provider_used: z.boolean(),
108
236
  provider_confidence: z.number().min(0).max(1).nullable(),
@@ -117,6 +245,15 @@ var testStatusDiagnoseContractSchema = z.object({
117
245
  label: z.string(),
118
246
  count: z.number().int(),
119
247
  root_cause: z.string(),
248
+ suspect_kind: z.enum([
249
+ "test",
250
+ "app_code",
251
+ "config",
252
+ "environment",
253
+ "tooling",
254
+ "unknown"
255
+ ]),
256
+ fix_hint: z.string().min(1),
120
257
  evidence: z.array(z.string()).max(2),
121
258
  bucket_confidence: z.number(),
122
259
  root_cause_confidence: z.number(),
@@ -167,6 +304,42 @@ function parseTestStatusProviderSupplement(input) {
167
304
  return testStatusProviderSupplementSchema.parse(JSON.parse(input));
168
305
  }
169
306
  var extendedBucketSpecs = [
307
+ {
308
+ prefix: "service unavailable:",
309
+ type: "service_unavailable",
310
+ label: "service unavailable",
311
+ genericTitle: "Service unavailable failures",
312
+ defaultCoverage: "error",
313
+ rootCauseConfidence: 0.9,
314
+ dominantPriority: 2,
315
+ dominantBlocker: true,
316
+ why: "it contains the dependency service or API path that is unavailable in the test environment",
317
+ fix: "Restore the dependency service or test double before rerunning the full suite."
318
+ },
319
+ {
320
+ prefix: "db refused:",
321
+ type: "db_connection_failure",
322
+ label: "database connection",
323
+ genericTitle: "Database connection failures",
324
+ defaultCoverage: "error",
325
+ rootCauseConfidence: 0.9,
326
+ dominantPriority: 2,
327
+ dominantBlocker: true,
328
+ why: "it contains the database host, DSN, or startup path that is refusing connections",
329
+ fix: "Restore the test database connectivity before rerunning the full suite."
330
+ },
331
+ {
332
+ prefix: "auth bypass absent:",
333
+ type: "auth_bypass_absent",
334
+ label: "auth bypass missing",
335
+ genericTitle: "Auth bypass setup failures",
336
+ defaultCoverage: "error",
337
+ rootCauseConfidence: 0.86,
338
+ dominantPriority: 2,
339
+ dominantBlocker: true,
340
+ why: "it contains the auth bypass fixture or setup path that tests expected to be active",
341
+ fix: "Restore the test auth bypass fixture or mock before rerunning the full suite."
342
+ },
170
343
  {
171
344
  prefix: "snapshot mismatch:",
172
345
  type: "snapshot_mismatch",
@@ -351,6 +524,16 @@ var extendedBucketSpecs = [
351
524
  why: "it contains the deprecated API or warning filter that is failing the test run",
352
525
  fix: "Update the deprecated call site or relax the warning policy only if that is intentional."
353
526
  },
527
+ {
528
+ prefix: "assertion failed:",
529
+ type: "assertion_failure",
530
+ label: "assertion failure",
531
+ genericTitle: "Assertion failures",
532
+ defaultCoverage: "failed",
533
+ rootCauseConfidence: 0.76,
534
+ why: "it contains the expected-versus-actual assertion that failed inside the visible test",
535
+ fix: "Read the assertion diff or expectation and fix the code or expected value before rerunning."
536
+ },
354
537
  {
355
538
  prefix: "xfail strict:",
356
539
  type: "xfail_strict_unexpected_pass",
@@ -372,54 +555,127 @@ function extractReasonDetail(reason, prefix) {
372
555
  function formatCount(count, singular, plural = `${singular}s`) {
373
556
  return `${count} ${count === 1 ? singular : plural}`;
374
557
  }
375
- function unique(values) {
558
+ function unique2(values) {
376
559
  return [...new Set(values)];
377
560
  }
378
- function normalizeTestId(value) {
561
+ function normalizeTestId2(value) {
379
562
  return value.replace(/\\/g, "/").trim();
380
563
  }
381
- function extractTestFamilyPrefix(value) {
382
- const normalized = normalizeTestId(value);
383
- const testsMatch = normalized.match(/^(tests\/[^/]+\/)/);
384
- if (testsMatch) {
385
- return testsMatch[1];
564
+ function normalizePathCandidate(value) {
565
+ if (!value) {
566
+ return null;
386
567
  }
387
- const filePart = normalized.split("::")[0]?.trim() ?? "";
388
- if (!filePart.includes("/")) {
389
- return "other";
568
+ let normalized = value.replace(/\\/g, "/").trim();
569
+ normalized = normalized.replace(/^[("'`<\[]+/, "").replace(/[>"'`\]),:;]+$/, "");
570
+ normalized = normalized.replace(/^<repo>\//, "").replace(/^\.\//, "");
571
+ if (normalized.includes("::")) {
572
+ normalized = normalized.split("::")[0]?.trim() ?? normalized;
390
573
  }
391
- const segments = filePart.replace(/^\/+/, "").split("/").filter(Boolean);
392
- if (segments.length === 0) {
393
- return "other";
574
+ if (normalized.startsWith("/") && !normalized.startsWith("/tmp/") && !normalized.startsWith("/var/tmp/")) {
575
+ return null;
576
+ }
577
+ if (/^\.github\/workflows\/.+\.(?:yml|yaml)$/i.test(normalized)) {
578
+ return normalized;
579
+ }
580
+ if (/^(?:src|test|tests)\/.+\.[A-Za-z0-9._-]+$/i.test(normalized)) {
581
+ return normalized;
582
+ }
583
+ if (/^(?:package\.json|pytest\.ini|pyproject\.toml|tox\.ini|(?:[A-Za-z0-9._/-]+\/)?conftest\.py)$/i.test(
584
+ normalized
585
+ )) {
586
+ return normalized;
587
+ }
588
+ if (/^(?:[A-Za-z0-9._/-]+\/)?(?:vitest|jest)\.config\.[A-Za-z0-9._-]+$/i.test(normalized)) {
589
+ return normalized;
590
+ }
591
+ if (/^(?:[A-Za-z0-9._/-]+\/)?tsconfig(?:\.[A-Za-z0-9_-]+)?\.json$/i.test(normalized)) {
592
+ return normalized;
394
593
  }
395
- return `${segments[0]}/`;
594
+ if (/^[A-Za-z0-9._/-]*config[A-Za-z0-9._/-]*\.(?:json|yml|yaml)$/i.test(normalized)) {
595
+ return normalized;
596
+ }
597
+ return null;
396
598
  }
397
- function buildTestTargetSummary(values) {
398
- const counts = /* @__PURE__ */ new Map();
399
- for (const value of values) {
400
- const prefix = extractTestFamilyPrefix(value);
401
- counts.set(prefix, (counts.get(prefix) ?? 0) + 1);
599
+ function addPathCandidatesFromText(target, text) {
600
+ if (!text) {
601
+ return;
402
602
  }
403
- const families = [...counts.entries()].map(([prefix, count]) => ({
404
- prefix,
405
- count
406
- })).sort((left, right) => {
407
- if (right.count !== left.count) {
408
- return right.count - left.count;
603
+ const pattern = /(?:^|[\s("'`])((?:\.github\/workflows\/[A-Za-z0-9._/-]+\.(?:yml|yaml)|(?:src|test|tests)\/[A-Za-z0-9._/-]+\.[A-Za-z0-9._-]+|package\.json|pytest\.ini|pyproject\.toml|tox\.ini|(?:[A-Za-z0-9._/-]+\/)?conftest\.py|(?:[A-Za-z0-9._/-]+\/)?(?:vitest|jest)\.config\.[A-Za-z0-9._-]+|(?:[A-Za-z0-9._/-]+\/)?tsconfig(?:\.[A-Za-z0-9_-]+)?\.json|[A-Za-z0-9._/-]*config[A-Za-z0-9._/-]*\.(?:json|yml|yaml)))/g;
604
+ for (const match of text.matchAll(pattern)) {
605
+ const normalized = normalizePathCandidate(match[1] ?? null);
606
+ if (normalized) {
607
+ target.add(normalized);
608
+ }
609
+ }
610
+ }
611
+ function extractBucketPathCandidates(args) {
612
+ const candidates = /* @__PURE__ */ new Set();
613
+ const push = (value) => {
614
+ const normalized = normalizePathCandidate(value);
615
+ if (normalized) {
616
+ candidates.add(normalized);
409
617
  }
410
- return left.prefix.localeCompare(right.prefix);
411
- }).slice(0, 5);
412
- return {
413
- count: values.length,
414
- families
415
618
  };
619
+ push(args.readTarget?.file);
620
+ for (const item of args.bucket.representativeItems) {
621
+ push(item.file);
622
+ addPathCandidatesFromText(candidates, item.label);
623
+ addPathCandidatesFromText(candidates, item.reason);
624
+ }
625
+ addPathCandidatesFromText(candidates, args.bucket.reason);
626
+ addPathCandidatesFromText(candidates, args.bucket.headline);
627
+ for (const line of args.bucket.summaryLines) {
628
+ addPathCandidatesFromText(candidates, line);
629
+ }
630
+ return [...candidates];
416
631
  }
417
- function formatTargetSummary(summary) {
418
- if (summary.count === 0) {
419
- return "count=0";
632
+ function isConfigPathCandidate(path4) {
633
+ return /^\.github\/workflows\/.+\.(?:yml|yaml)$/i.test(path4) || /^(?:package\.json|pytest\.ini|pyproject\.toml|tox\.ini|(?:[A-Za-z0-9._/-]+\/)?conftest\.py)$/i.test(
634
+ path4
635
+ ) || /^(?:[A-Za-z0-9._/-]+\/)?(?:vitest|jest)\.config\.[A-Za-z0-9._-]+$/i.test(path4) || /^(?:[A-Za-z0-9._/-]+\/)?tsconfig(?:\.[A-Za-z0-9_-]+)?\.json$/i.test(path4) || /^[A-Za-z0-9._/-]*config[A-Za-z0-9._/-]*\.(?:json|yml|yaml)$/i.test(path4);
636
+ }
637
+ function isAppPathCandidate(path4) {
638
+ return path4.startsWith("src/");
639
+ }
640
+ function isTestPathCandidate(path4) {
641
+ return path4.startsWith("test/") || path4.startsWith("tests/");
642
+ }
643
+ function looksLikeMatcherLiteralComparison(detail) {
644
+ return /\bexpected\b[\s\S]*\bto (?:be|contain)\b/i.test(detail);
645
+ }
646
+ function looksLikeGoldenLiteralDrift(detail) {
647
+ return /\\n/.test(detail) || /-\s+(?:Tests|Decision|Likely owner|Next|Stop signal)\b/.test(detail) || /\b(?:node-version|workflow_dispatch|run-name|matrix|registry-url)\b/i.test(detail);
648
+ }
649
+ function isGoldenOutputDriftBucket(bucket) {
650
+ if (bucket.type !== "assertion_failure") {
651
+ return false;
420
652
  }
421
- const families = summary.families.length > 0 ? summary.families.map((family) => `${family.prefix}${family.count}`).join(", ") : "none";
422
- return `count=${summary.count}; families=${families}`;
653
+ const detail = extractReasonDetail(bucket.reason, "assertion failed:") ?? bucket.reason;
654
+ if (!looksLikeMatcherLiteralComparison(detail)) {
655
+ return false;
656
+ }
657
+ if (bucket.reason.startsWith("snapshot mismatch:")) {
658
+ return false;
659
+ }
660
+ if (!looksLikeGoldenLiteralDrift(detail)) {
661
+ return false;
662
+ }
663
+ const candidates = extractBucketPathCandidates({
664
+ bucket
665
+ });
666
+ return candidates.some((candidate) => isConfigPathCandidate(candidate) || isTestPathCandidate(candidate));
667
+ }
668
+ function specializeBucket(bucket) {
669
+ if (!isGoldenOutputDriftBucket(bucket)) {
670
+ return bucket;
671
+ }
672
+ return {
673
+ ...bucket,
674
+ type: "golden_output_drift",
675
+ reason: "golden output drift: expected literal or golden output no longer matches current output",
676
+ labelOverride: "golden output drift",
677
+ hint: "Update the expected literal or golden output if the new output is intentional; otherwise fix the generated output and rerun."
678
+ };
423
679
  }
424
680
  function classifyGenericBucketType(reason) {
425
681
  const extended = findExtendedBucketSpec(reason);
@@ -444,6 +700,9 @@ function classifyGenericBucketType(reason) {
444
700
  if (reason.startsWith("missing module:")) {
445
701
  return "import_dependency_failure";
446
702
  }
703
+ if (reason.startsWith("golden output drift:")) {
704
+ return "golden_output_drift";
705
+ }
447
706
  if (reason.startsWith("assertion failed:")) {
448
707
  return "assertion_failure";
449
708
  }
@@ -596,7 +855,7 @@ function mergeBucketDetails(existing, incoming) {
596
855
  count,
597
856
  confidence: Math.max(existing.confidence, incoming.confidence),
598
857
  representativeItems,
599
- entities: unique([...existing.entities, ...incoming.entities]),
858
+ entities: unique2([...existing.entities, ...incoming.entities]),
600
859
  hint: existing.hint ?? incoming.hint,
601
860
  overflowCount: Math.max(
602
861
  existing.overflowCount,
@@ -788,6 +1047,9 @@ function labelForBucket(bucket) {
788
1047
  if (bucket.type === "import_dependency_failure") {
789
1048
  return "import dependency failure";
790
1049
  }
1050
+ if (bucket.type === "golden_output_drift") {
1051
+ return "golden output drift";
1052
+ }
791
1053
  if (bucket.type === "assertion_failure") {
792
1054
  return "assertion failure";
793
1055
  }
@@ -822,6 +1084,9 @@ function rootCauseConfidenceFor(bucket) {
822
1084
  if (bucket.type === "contract_snapshot_drift") {
823
1085
  return bucket.entities.length > 0 ? 0.92 : 0.76;
824
1086
  }
1087
+ if (bucket.type === "golden_output_drift") {
1088
+ return 0.78;
1089
+ }
825
1090
  if (bucket.source === "provider") {
826
1091
  return Math.max(0.6, Math.min(bucket.confidence, 0.82));
827
1092
  }
@@ -896,6 +1161,9 @@ function buildReadTargetWhy(args) {
896
1161
  if (args.bucket.type === "import_dependency_failure") {
897
1162
  return "it is the first visible failing module in this missing dependency bucket";
898
1163
  }
1164
+ if (args.bucket.type === "golden_output_drift") {
1165
+ return "it is the first visible golden or literal drift anchor for this bucket";
1166
+ }
899
1167
  if (args.bucket.type === "assertion_failure") {
900
1168
  return "it is the first visible failing test in this bucket";
901
1169
  }
@@ -973,6 +1241,9 @@ function buildReadTargetSearchHint(bucket, anchor) {
973
1241
  if (assertionText) {
974
1242
  return assertionText;
975
1243
  }
1244
+ if (bucket.type === "golden_output_drift") {
1245
+ return bucket.representativeItems.map((item) => item.reason.match(/^assertion failed:\s+(.+)$/)?.[1] ?? item.reason).find(Boolean) ?? anchor.label.split("::")[1]?.trim() ?? null;
1246
+ }
976
1247
  if (bucket.reason.startsWith("unknown ")) {
977
1248
  return anchor.reason;
978
1249
  }
@@ -1027,18 +1298,36 @@ function buildConcreteNextNote(args) {
1027
1298
  }
1028
1299
  const lead = primaryTarget.context_hint.start_line !== null && primaryTarget.context_hint.end_line !== null ? `Read ${primaryTarget.file} lines ${primaryTarget.context_hint.start_line}-${primaryTarget.context_hint.end_line} first; ${primaryTarget.why}.` : primaryTarget.context_hint.search_hint ? `Search for ${primaryTarget.context_hint.search_hint} in ${primaryTarget.file} first; ${primaryTarget.why}.` : `Read ${formatReadTargetLocation(primaryTarget)} first; ${primaryTarget.why}.`;
1029
1300
  if (args.nextBestAction.code === "fix_dominant_blocker") {
1301
+ if (args.remainingMode === "subset_rerun") {
1302
+ return "Fix the remaining bucket first, then refresh the full-suite truth with sift rerun.";
1303
+ }
1304
+ if (args.remainingMode === "full_rerun_diff") {
1305
+ return "Fix the remaining bucket first. The cached full-suite baseline is still preserved; use sift rerun when you want to refresh it.";
1306
+ }
1030
1307
  if (args.nextBestAction.bucket_index === 1 && args.hasSecondaryVisibleBucket) {
1031
1308
  return "Fix bucket 1 first, then rerun the full suite at standard. Secondary buckets are already visible behind it.";
1032
1309
  }
1033
1310
  return `Fix bucket ${args.nextBestAction.bucket_index ?? 1} first, then rerun the full suite at standard.`;
1034
1311
  }
1035
1312
  if (args.nextBestAction.code === "read_source_for_bucket") {
1313
+ if (args.remainingMode === "subset_rerun") {
1314
+ return "Fix the remaining bucket first, then refresh the full-suite truth with sift rerun.";
1315
+ }
1316
+ if (args.remainingMode === "full_rerun_diff") {
1317
+ return "Fix the remaining bucket first. The cached full-suite baseline is still preserved; use sift rerun when you want to refresh it.";
1318
+ }
1036
1319
  return lead;
1037
1320
  }
1038
1321
  if (args.nextBestAction.code === "insufficient_signal") {
1039
- if (args.nextBestAction.note.startsWith("Provider follow-up failed")) {
1322
+ if (args.nextBestAction.note.startsWith("Provider follow-up")) {
1040
1323
  return args.nextBestAction.note;
1041
1324
  }
1325
+ if (args.remainingMode === "subset_rerun") {
1326
+ return "Fix the remaining bucket first, then refresh the full-suite truth with sift rerun.";
1327
+ }
1328
+ if (args.remainingMode === "full_rerun_diff") {
1329
+ return "Fix the remaining bucket first. The cached full-suite baseline is still preserved; use sift rerun when you want to refresh it.";
1330
+ }
1042
1331
  return `${lead} Then take one deeper sift pass before raw traceback.`;
1043
1332
  }
1044
1333
  return args.nextBestAction.note;
@@ -1047,13 +1336,13 @@ function extractMiniDiff(input, bucket) {
1047
1336
  if (bucket.type !== "contract_snapshot_drift") {
1048
1337
  return null;
1049
1338
  }
1050
- const addedPaths = unique(
1339
+ const addedPaths = unique2(
1051
1340
  [...input.matchAll(/[+-]\s+'(\/api\/[^']+)'/g)].map((match) => match[1])
1052
1341
  ).length;
1053
- const removedModels = unique(
1342
+ const removedModels = unique2(
1054
1343
  [...input.matchAll(/[+-]\s+'([A-Za-z0-9._/-]+-[A-Za-z0-9._-]+)'/g)].map((match) => match[1])
1055
1344
  ).length;
1056
- const changedTaskMappings = unique(
1345
+ const changedTaskMappings = unique2(
1057
1346
  [...input.matchAll(/[+-]\s+'([a-z]+(?:_[a-z0-9]+)+)'/g)].map((match) => match[1])
1058
1347
  ).length;
1059
1348
  if (addedPaths === 0 && removedModels === 0 && changedTaskMappings === 0) {
@@ -1140,7 +1429,7 @@ function buildProviderSupplementBuckets(args) {
1140
1429
  });
1141
1430
  }
1142
1431
  function pickUnknownAnchor(args) {
1143
- const fromStatusItems = args.kind === "error" ? args.analysis.visibleErrorItems[0] : null;
1432
+ const fromStatusItems = args.kind === "error" ? args.analysis.visibleErrorItems[0] : args.analysis.visibleFailedItems[0];
1144
1433
  if (fromStatusItems) {
1145
1434
  return {
1146
1435
  label: fromStatusItems.label,
@@ -1154,7 +1443,7 @@ function pickUnknownAnchor(args) {
1154
1443
  }
1155
1444
  const label = args.kind === "error" ? args.analysis.visibleErrorLabels[0] : args.analysis.visibleFailedLabels[0];
1156
1445
  if (label) {
1157
- const normalizedLabel = normalizeTestId(label);
1446
+ const normalizedLabel = normalizeTestId2(label);
1158
1447
  const fileMatch = normalizedLabel.match(/^([A-Za-z0-9_./-]+\.[A-Za-z0-9]+)\b/);
1159
1448
  const file = fileMatch?.[1] ?? normalizedLabel.split("::")[0] ?? null;
1160
1449
  return {
@@ -1177,12 +1466,14 @@ function buildUnknownBucket(args) {
1177
1466
  const isError = args.kind === "error";
1178
1467
  const label = isError ? "unknown setup blocker" : "unknown failure family";
1179
1468
  const reason = isError ? "unknown setup blocker: setup failures share a repeated but unclassified pattern" : "unknown failure family: failing tests share a repeated but unclassified pattern";
1469
+ const firstConcreteSignal = anchor && anchor.reason !== reason && anchor.reason !== "setup failures share a repeated but unclassified pattern" && anchor.reason !== "failing tests share a repeated but unclassified pattern" ? `First concrete signal: ${anchor.reason}` : null;
1180
1470
  return {
1181
1471
  type: "unknown_failure",
1182
1472
  headline: `${label}: ${formatCount(args.count, "visible failure")} share a repeated but unclassified pattern.`,
1183
1473
  summaryLines: [
1184
- `${label}: ${formatCount(args.count, "visible failure")} share a repeated but unclassified pattern.`
1185
- ],
1474
+ `${label}: ${formatCount(args.count, "visible failure")} share a repeated but unclassified pattern.`,
1475
+ firstConcreteSignal
1476
+ ].filter((value) => Boolean(value)),
1186
1477
  reason,
1187
1478
  count: args.count,
1188
1479
  confidence: 0.45,
@@ -1280,16 +1571,29 @@ function buildDecisionLine(contract) {
1280
1571
  }
1281
1572
  return "- Decision: raw only if exact traceback is required.";
1282
1573
  }
1574
+ function buildRemainingPassLine(contract) {
1575
+ if (contract.remaining_mode === "subset_rerun") {
1576
+ return "- Remaining pass: showing only what is still failing from the cached baseline.";
1577
+ }
1578
+ if (contract.remaining_mode === "full_rerun_diff") {
1579
+ return "- Remaining pass: full rerun analyzed against the cached baseline because narrowed rerun is not available for this runner.";
1580
+ }
1581
+ return null;
1582
+ }
1283
1583
  function buildComparisonLines(contract) {
1284
1584
  const lines = [];
1585
+ const resolvedSummary = buildTestTargetSummary(contract.resolved_tests);
1586
+ const remainingSummary = buildTestTargetSummary(contract.remaining_tests);
1285
1587
  if (contract.resolved_tests.length > 0) {
1588
+ const summaryText = describeTargetSummary(resolvedSummary);
1286
1589
  lines.push(
1287
- `- Resolved in this rerun: ${formatCount(contract.resolved_tests.length, "test")} dropped out of the failing set.`
1590
+ `- Resolved in this rerun: ${formatCount(contract.resolved_tests.length, "test")} dropped out of the failing set${summaryText ? ` ${summaryText}` : ""}.`
1288
1591
  );
1289
1592
  }
1290
- if (contract.resolved_tests.length > 0 && contract.remaining_tests.length > 0) {
1593
+ if (contract.remaining_tests.length > 0 && (contract.resolved_tests.length > 0 || contract.remaining_mode !== "none")) {
1594
+ const summaryText = describeTargetSummary(remainingSummary);
1291
1595
  lines.push(
1292
- `- Remaining failing targets: ${formatCount(contract.remaining_tests.length, "test/module", "tests/modules")}.`
1596
+ `- Remaining failing targets: ${formatCount(contract.remaining_tests.length, "test/module", "tests/modules")}${summaryText ? ` ${summaryText}` : ""}.`
1293
1597
  );
1294
1598
  }
1295
1599
  return lines;
@@ -1309,7 +1613,7 @@ function buildStandardAnchorText(target) {
1309
1613
  }
1310
1614
  return formatReadTargetLocation(target);
1311
1615
  }
1312
- function buildStandardFixText(args) {
1616
+ function resolveBucketFixHint(args) {
1313
1617
  if (args.bucket.hint) {
1314
1618
  return args.bucket.hint;
1315
1619
  }
@@ -1358,13 +1662,96 @@ function buildStandardFixText(args) {
1358
1662
  if (args.bucket.type === "runtime_failure") {
1359
1663
  return `Fix the visible ${args.bucketLabel} and rerun the full suite at standard.`;
1360
1664
  }
1361
- return null;
1665
+ return "Inspect the first visible anchor for this bucket, apply the smallest fix that explains it, then rerun the full suite at standard.";
1666
+ }
1667
+ function deriveBucketSuspectKind(args) {
1668
+ const pathCandidates = extractBucketPathCandidates({
1669
+ bucket: args.bucket,
1670
+ readTarget: args.readTarget
1671
+ });
1672
+ const hasConfigCandidate = pathCandidates.some((candidate) => isConfigPathCandidate(candidate));
1673
+ const hasAppCandidate = pathCandidates.some((candidate) => isAppPathCandidate(candidate));
1674
+ const hasTestCandidate = pathCandidates.some((candidate) => isTestPathCandidate(candidate));
1675
+ if (args.bucket.type === "shared_environment_blocker" || args.bucket.type === "fixture_guard_failure" || args.bucket.type === "permission_denied_failure" || args.bucket.type === "django_db_access_denied" || args.bucket.type === "network_failure" || args.bucket.type === "service_unavailable" || args.bucket.type === "db_connection_failure" || args.bucket.type === "auth_bypass_absent" || args.bucket.type === "fixture_teardown_failure") {
1676
+ return "environment";
1677
+ }
1678
+ if (args.bucket.type === "configuration_error" || args.bucket.type === "db_migration_failure" || args.bucket.type === "import_dependency_failure" || args.bucket.type === "collection_failure" || args.bucket.type === "no_tests_collected" || args.bucket.type === "deprecation_warning_as_error" || args.bucket.type === "file_not_found_failure") {
1679
+ return "config";
1680
+ }
1681
+ if (args.bucket.type === "contract_snapshot_drift" || args.bucket.type === "snapshot_mismatch" || args.bucket.type === "flaky_test_detected" || args.bucket.type === "xfail_strict_unexpected_pass") {
1682
+ return "test";
1683
+ }
1684
+ if (args.bucket.type === "golden_output_drift") {
1685
+ if (hasConfigCandidate) {
1686
+ return "config";
1687
+ }
1688
+ if (hasAppCandidate) {
1689
+ return "app_code";
1690
+ }
1691
+ if (hasTestCandidate) {
1692
+ return "test";
1693
+ }
1694
+ return "unknown";
1695
+ }
1696
+ if (args.bucket.type === "xdist_worker_crash" || args.bucket.type === "timeout_failure" || args.bucket.type === "async_event_loop_failure" || args.bucket.type === "subprocess_crash_segfault" || args.bucket.type === "memory_error" || args.bucket.type === "resource_leak_warning" || args.bucket.type === "interrupted_run") {
1697
+ return "tooling";
1698
+ }
1699
+ if (args.bucket.type === "unknown_failure") {
1700
+ return "unknown";
1701
+ }
1702
+ if (args.bucket.type === "assertion_failure" || args.bucket.type === "runtime_failure" || args.bucket.type === "type_error_failure" || args.bucket.type === "serialization_encoding_failure") {
1703
+ if (hasConfigCandidate) {
1704
+ return "config";
1705
+ }
1706
+ if (hasAppCandidate) {
1707
+ return "app_code";
1708
+ }
1709
+ if (hasTestCandidate) {
1710
+ return "test";
1711
+ }
1712
+ return "unknown";
1713
+ }
1714
+ return "unknown";
1715
+ }
1716
+ function derivePrimarySuspectKind(args) {
1717
+ const primaryBucket = (args.dominantBlockerBucketIndex !== null ? args.mainBuckets.find((bucket) => bucket.bucket_index === args.dominantBlockerBucketIndex) : null) ?? args.mainBuckets[0];
1718
+ return primaryBucket?.suspect_kind ?? "unknown";
1719
+ }
1720
+ function buildConfidenceReason(args) {
1721
+ const primaryBucket = args.mainBuckets.find((bucket) => bucket.dominant) ?? args.mainBuckets[0];
1722
+ if (args.decision === "stop" && primaryBucket && args.primarySuspectKind !== "unknown") {
1723
+ return `Dominant blocker (${primaryBucket.label}) is anchored and actionable.`;
1724
+ }
1725
+ if (args.decision === "zoom") {
1726
+ return "Unknown or low-confidence buckets remain; one deeper sift pass is justified.";
1727
+ }
1728
+ if (args.decision === "read_source") {
1729
+ return "The bucket is identified, but source context is still needed to make the next fix clear.";
1730
+ }
1731
+ return "Heuristic signal is still insufficient; exact traceback lines are needed.";
1732
+ }
1733
+ function formatSuspectKindLabel(kind) {
1734
+ switch (kind) {
1735
+ case "test":
1736
+ return "test code";
1737
+ case "app_code":
1738
+ return "application code";
1739
+ case "config":
1740
+ return "test or project configuration";
1741
+ case "environment":
1742
+ return "environment setup";
1743
+ case "tooling":
1744
+ return "test runner or tooling";
1745
+ default:
1746
+ return "unknown";
1747
+ }
1362
1748
  }
1363
1749
  function buildStandardBucketSupport(args) {
1364
1750
  return {
1365
1751
  headline: args.bucket.summaryLines[0] ? `- ${args.bucket.summaryLines[0]}` : renderBucketHeadline(args.contractBucket),
1752
+ firstConcreteSignalText: args.bucket.source === "unknown" ? args.bucket.summaryLines[1] ?? null : null,
1366
1753
  anchorText: buildStandardAnchorText(args.readTarget),
1367
- fixText: buildStandardFixText({
1754
+ fixText: resolveBucketFixHint({
1368
1755
  bucket: args.bucket,
1369
1756
  bucketLabel: args.contractBucket.label
1370
1757
  })
@@ -1372,6 +1759,10 @@ function buildStandardBucketSupport(args) {
1372
1759
  }
1373
1760
  function renderStandard(args) {
1374
1761
  const lines = [...buildOutcomeLines(args.analysis), ...buildComparisonLines(args.contract)];
1762
+ const remainingPassLine = buildRemainingPassLine(args.contract);
1763
+ if (remainingPassLine) {
1764
+ lines.push(remainingPassLine);
1765
+ }
1375
1766
  if (args.contract.main_buckets.length > 0) {
1376
1767
  for (const bucket of args.contract.main_buckets.slice(0, 3)) {
1377
1768
  const rawBucket = args.buckets[bucket.bucket_index - 1];
@@ -1387,6 +1778,9 @@ function renderStandard(args) {
1387
1778
  )
1388
1779
  });
1389
1780
  lines.push(support.headline);
1781
+ if (support.firstConcreteSignalText) {
1782
+ lines.push(`- ${support.firstConcreteSignalText}`);
1783
+ }
1390
1784
  if (support.anchorText) {
1391
1785
  lines.push(`- Anchor: ${support.anchorText}`);
1392
1786
  }
@@ -1396,12 +1790,19 @@ function renderStandard(args) {
1396
1790
  }
1397
1791
  }
1398
1792
  lines.push(buildDecisionLine(args.contract));
1793
+ if (args.contract.main_buckets.length > 0 && args.contract.primary_suspect_kind !== "unknown") {
1794
+ lines.push(`- Likely owner: ${formatSuspectKindLabel(args.contract.primary_suspect_kind)}`);
1795
+ }
1399
1796
  lines.push(`- Next: ${args.contract.next_best_action.note}`);
1400
1797
  lines.push(buildStopSignal(args.contract));
1401
1798
  return lines.join("\n");
1402
1799
  }
1403
1800
  function renderFocused(args) {
1404
1801
  const lines = [...buildOutcomeLines(args.analysis), ...buildComparisonLines(args.contract)];
1802
+ const remainingPassLine = buildRemainingPassLine(args.contract);
1803
+ if (remainingPassLine) {
1804
+ lines.push(remainingPassLine);
1805
+ }
1405
1806
  for (const bucket of args.contract.main_buckets) {
1406
1807
  const rawBucket = args.buckets[bucket.bucket_index - 1];
1407
1808
  lines.push(
@@ -1421,6 +1822,10 @@ function renderFocused(args) {
1421
1822
  }
1422
1823
  function renderVerbose(args) {
1423
1824
  const lines = [...buildOutcomeLines(args.analysis), ...buildComparisonLines(args.contract)];
1825
+ const remainingPassLine = buildRemainingPassLine(args.contract);
1826
+ if (remainingPassLine) {
1827
+ lines.push(remainingPassLine);
1828
+ }
1424
1829
  for (const bucket of args.contract.main_buckets) {
1425
1830
  const rawBucket = args.buckets[bucket.bucket_index - 1];
1426
1831
  lines.push(
@@ -1470,7 +1875,9 @@ function buildTestStatusDiagnoseContract(args) {
1470
1875
  count: residuals.remainingFailed
1471
1876
  })
1472
1877
  ].filter((bucket) => Boolean(bucket));
1473
- const buckets = prioritizeBuckets([...combinedBuckets, ...unknownBuckets]).slice(0, 3);
1878
+ const buckets = prioritizeBuckets(
1879
+ [...combinedBuckets, ...unknownBuckets].map((bucket) => specializeBucket(bucket))
1880
+ ).slice(0, 3);
1474
1881
  const simpleCollectionFailure = args.analysis.collectionErrorCount !== void 0 && args.analysis.collectionItems.length === 0 && buckets.length === 0;
1475
1882
  const dominantBucket = buckets.map((bucket, index) => ({
1476
1883
  bucket,
@@ -1483,29 +1890,49 @@ function buildTestStatusDiagnoseContract(args) {
1483
1890
  })[0] ?? null;
1484
1891
  const hasUnknownBucket = buckets.some((bucket) => isUnknownBucket(bucket));
1485
1892
  const hasConcreteCoverage = args.analysis.failed === 0 && args.analysis.errors === 0 ? true : residuals.remainingErrors === 0 && residuals.remainingFailed === 0;
1486
- const diagnosisComplete = args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0 || simpleCollectionFailure || buckets.length > 0 && hasConcreteCoverage && !hasUnknownBucket && (dominantBucket?.bucket.confidence ?? 0) >= 0.6;
1487
- const rawNeeded = buckets.length === 0 ? !(args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0 || simpleCollectionFailure) : !diagnosisComplete && !hasUnknownBucket && buckets.every((bucket) => bucket.confidence < 0.7);
1488
1893
  const dominantBlockerBucketIndex = dominantBucket && isDominantBlockerType(dominantBucket.bucket.type) ? dominantBucket.index + 1 : null;
1489
1894
  const readTargets = buildReadTargets({
1490
1895
  buckets,
1491
1896
  dominantBucketIndex: dominantBlockerBucketIndex
1492
1897
  });
1493
- const mainBuckets = buckets.map((bucket, index) => ({
1494
- bucket_index: index + 1,
1495
- label: labelForBucket(bucket),
1496
- count: bucket.count,
1497
- root_cause: bucket.reason,
1498
- evidence: buildBucketEvidence(bucket),
1499
- bucket_confidence: Number(bucket.confidence.toFixed(2)),
1500
- root_cause_confidence: Number(rootCauseConfidenceFor(bucket).toFixed(2)),
1501
- dominant: dominantBucket?.index === index,
1502
- secondary_visible_despite_blocker: dominantBlockerBucketIndex !== null && dominantBlockerBucketIndex !== index + 1,
1503
- mini_diff: extractMiniDiff(args.input, bucket)
1504
- }));
1505
- const resolvedTests = unique(args.resolvedTests ?? []);
1506
- const remainingTests = unique(
1507
- args.remainingTests ?? unique([...args.analysis.visibleErrorLabels, ...args.analysis.visibleFailedLabels])
1898
+ const dominantBucketHasConcreteAnchor = dominantBucket !== null && (readTargets.some((target) => target.bucket_index === dominantBucket.index + 1 && target.file.length > 0) || dominantBucket.bucket.representativeItems.some((item) => item.anchor_kind !== "none"));
1899
+ const smallConcreteSuite = args.analysis.failed + args.analysis.errors <= 2 && residuals.remainingErrors === 0 && residuals.remainingFailed === 0 && buckets.length === 1 && !hasUnknownBucket && dominantBucket !== null && dominantBucketHasConcreteAnchor;
1900
+ const dominantConfidenceThreshold = smallConcreteSuite ? 0.55 : 0.6;
1901
+ const diagnosisComplete = args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0 || simpleCollectionFailure || buckets.length > 0 && hasConcreteCoverage && !hasUnknownBucket && (dominantBucket?.bucket.confidence ?? 0) >= dominantConfidenceThreshold;
1902
+ const rawNeeded = buckets.length === 0 ? !(args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0 || simpleCollectionFailure) : !diagnosisComplete && !hasUnknownBucket && buckets.every((bucket) => bucket.confidence < 0.7);
1903
+ const mainBuckets = buckets.map((bucket, index) => {
1904
+ const bucketIndex = index + 1;
1905
+ const label = labelForBucket(bucket);
1906
+ const readTarget = readTargets.find((target) => target.bucket_index === bucketIndex);
1907
+ return {
1908
+ bucket_index: bucketIndex,
1909
+ label,
1910
+ count: bucket.count,
1911
+ root_cause: bucket.reason,
1912
+ suspect_kind: deriveBucketSuspectKind({
1913
+ bucket,
1914
+ readTarget
1915
+ }),
1916
+ fix_hint: resolveBucketFixHint({
1917
+ bucket,
1918
+ bucketLabel: label
1919
+ }),
1920
+ evidence: buildBucketEvidence(bucket),
1921
+ bucket_confidence: Number(bucket.confidence.toFixed(2)),
1922
+ root_cause_confidence: Number(rootCauseConfidenceFor(bucket).toFixed(2)),
1923
+ dominant: dominantBucket?.index === index,
1924
+ secondary_visible_despite_blocker: dominantBlockerBucketIndex !== null && dominantBlockerBucketIndex !== bucketIndex,
1925
+ mini_diff: extractMiniDiff(args.input, bucket)
1926
+ };
1927
+ });
1928
+ const resolvedTests = unique2(args.resolvedTests ?? []);
1929
+ const remainingTests = unique2(
1930
+ args.remainingTests ?? unique2([...args.analysis.visibleErrorLabels, ...args.analysis.visibleFailedLabels])
1508
1931
  );
1932
+ const primarySuspectKind = derivePrimarySuspectKind({
1933
+ mainBuckets,
1934
+ dominantBlockerBucketIndex
1935
+ });
1509
1936
  let nextBestAction;
1510
1937
  if (args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0) {
1511
1938
  nextBestAction = {
@@ -1550,7 +1977,10 @@ function buildTestStatusDiagnoseContract(args) {
1550
1977
  raw_needed: rawNeeded,
1551
1978
  additional_source_read_likely_low_value: diagnosisComplete && !rawNeeded,
1552
1979
  read_raw_only_if: rawNeeded ? "you still need exact traceback lines after focused or verbose detail" : null,
1980
+ remaining_mode: args.remainingMode ?? "none",
1553
1981
  dominant_blocker_bucket_index: dominantBlockerBucketIndex,
1982
+ primary_suspect_kind: primarySuspectKind,
1983
+ confidence_reason: "Unknown or low-confidence buckets remain; one deeper sift pass is justified.",
1554
1984
  provider_used: false,
1555
1985
  provider_confidence: null,
1556
1986
  provider_failed: false,
@@ -1578,13 +2008,21 @@ function buildTestStatusDiagnoseContract(args) {
1578
2008
  readTargets,
1579
2009
  hasSecondaryVisibleBucket: mainBuckets.some(
1580
2010
  (bucket) => bucket.secondary_visible_despite_blocker
1581
- )
2011
+ ),
2012
+ remainingMode: args.contractOverrides?.remaining_mode ?? baseContract.remaining_mode
1582
2013
  })
1583
2014
  }
1584
2015
  };
2016
+ const resolvedDecision = effectiveDecision ?? deriveDecision(mergedContractWithoutDecision);
2017
+ const resolvedConfidenceReason = buildConfidenceReason({
2018
+ decision: resolvedDecision,
2019
+ mainBuckets,
2020
+ primarySuspectKind: mergedContractWithoutDecision.primary_suspect_kind
2021
+ });
1585
2022
  const contract = testStatusDiagnoseContractSchema.parse({
1586
2023
  ...mergedContractWithoutDecision,
1587
- decision: effectiveDecision ?? deriveDecision(mergedContractWithoutDecision)
2024
+ confidence_reason: resolvedConfidenceReason,
2025
+ decision: resolvedDecision
1588
2026
  });
1589
2027
  return {
1590
2028
  contract,
@@ -1636,6 +2074,7 @@ function buildTestStatusAnalysisContext(args) {
1636
2074
  `- diagnosis_complete=${args.contract.diagnosis_complete}`,
1637
2075
  `- raw_needed=${args.contract.raw_needed}`,
1638
2076
  `- decision=${args.contract.decision}`,
2077
+ `- remaining_mode=${args.contract.remaining_mode}`,
1639
2078
  `- provider_used=${args.contract.provider_used}`,
1640
2079
  `- provider_failed=${args.contract.provider_failed}`,
1641
2080
  `- raw_slice_strategy=${args.contract.raw_slice_strategy}`,
@@ -1656,6 +2095,27 @@ function buildTestStatusAnalysisContext(args) {
1656
2095
  var RISK_LINE_PATTERN = /(destroy|delete|drop|recreate|replace|revoke|deny|downtime|data loss|iam|network exposure)/i;
1657
2096
  var ZERO_DESTRUCTIVE_SUMMARY_PATTERN = /\b0\s+to\s+(destroy|delete|drop|recreate|replace|revoke)\b/i;
1658
2097
  var SAFE_LINE_PATTERN = /(no changes|up-to-date|up to date|no risky changes|safe to apply)/i;
2098
+ var RESOURCE_DESTROY_HEADER_PATTERN = /^#\s+.+\bwill be (destroyed|deleted|replaced)\b/i;
2099
+ var DESTROY_ERROR_PATTERN = /(instance cannot be destroyed|prevent_destroy|downtime|data loss)/i;
2100
+ var ACTION_DESTROY_PATTERN = /^-\s+destroy$/i;
2101
+ var TSC_CODE_LABELS = {
2102
+ TS1002: "syntax error",
2103
+ TS1005: "syntax error",
2104
+ TS2304: "cannot find name",
2105
+ TS2307: "cannot find module",
2106
+ TS2322: "type mismatch",
2107
+ TS2339: "missing property on type",
2108
+ TS2345: "argument type mismatch",
2109
+ TS2554: "wrong argument count",
2110
+ TS2741: "missing required property",
2111
+ TS2769: "no matching overload",
2112
+ TS5083: "config file error",
2113
+ TS6133: "declared but unused",
2114
+ TS7006: "implicit any",
2115
+ TS18003: "no inputs were found",
2116
+ TS18046: "unknown type",
2117
+ TS18048: "possibly undefined"
2118
+ };
1659
2119
  function collectEvidence(input, matcher, limit = 3) {
1660
2120
  return input.split("\n").map((line) => line.trim()).filter((line) => line.length > 0 && matcher.test(line)).slice(0, limit);
1661
2121
  }
@@ -1669,66 +2129,234 @@ function inferPackage(line) {
1669
2129
  function inferRemediation(pkg) {
1670
2130
  return `Upgrade ${pkg} to a patched version.`;
1671
2131
  }
1672
- function getCount(input, label) {
1673
- const matches = [...input.matchAll(new RegExp(`(\\d+)\\s+${label}`, "gi"))];
1674
- const lastMatch = matches.at(-1);
1675
- return lastMatch ? Number(lastMatch[1]) : 0;
1676
- }
1677
- function detectTestRunner(input) {
1678
- if (/^\s*Test Files?\s+(?:\d+\s+failed\s*\|\s*)?\d+\s+passed/m.test(input) || /^\s*Tests?\s+(?:\d+\s+failed\s*\|\s*)?\d+\s+passed/m.test(input) || /^\s*Snapshots?\s+(?:\d+\s+failed\s*\|\s*)?\d+\s+passed/m.test(input) || /⎯{2,}\s+Failed Tests?\s+\d+\s+⎯{2,}/.test(input)) {
1679
- return "vitest";
1680
- }
1681
- if (/^\s*Test Suites:\s+\d+\s+failed,\s+\d+\s+passed(?:,\s+\d+\s+total)?/m.test(input) || /^\s*Tests:\s+\d+\s+failed,\s+\d+\s+passed(?:,\s+\d+\s+total)?/m.test(input)) {
1682
- return "jest";
1683
- }
1684
- if (/\bpytest\b/i.test(input) || /^\s*=+.*\b\d+\s+failed\b.*=+\s*$/m.test(input) || /\bcollected\s+\d+\s+items\b/i.test(input)) {
1685
- return "pytest";
1686
- }
1687
- return "unknown";
1688
- }
1689
- function extractVitestLineCount(input, label, metric) {
1690
- const matcher = new RegExp(`^\\s*${label}\\s+(.+)$`, "gmi");
1691
- const lines = [...input.matchAll(matcher)];
1692
- const line = lines.at(-1)?.[1];
1693
- if (!line) {
2132
+ function parseCompactAuditVulnerability(line) {
2133
+ if (/^Severity:\s*/i.test(line)) {
1694
2134
  return null;
1695
2135
  }
1696
- const metricMatch = line.match(new RegExp(`(\\d+)\\s+${metric}`, "i"));
1697
- return metricMatch ? Number(metricMatch[1]) : null;
1698
- }
1699
- function extractJestLineCount(input, label, metric) {
1700
- const matcher = new RegExp(`^\\s*${label}:\\s+(.+)$`, "gmi");
1701
- const lines = [...input.matchAll(matcher)];
1702
- const line = lines.at(-1)?.[1];
1703
- if (!line) {
2136
+ if (!/\b(critical|high)\b/i.test(line)) {
1704
2137
  return null;
1705
2138
  }
1706
- const metricMatch = line.match(new RegExp(`(\\d+)\\s+${metric}`, "i"));
1707
- return metricMatch ? Number(metricMatch[1]) : null;
1708
- }
1709
- function extractTestStatusCounts(input, runner) {
1710
- if (runner === "vitest") {
1711
- return {
1712
- passed: extractVitestLineCount(input, "Tests?", "passed") ?? getCount(input, "passed"),
1713
- failed: extractVitestLineCount(input, "Tests?", "failed") ?? getCount(input, "failed"),
1714
- errors: extractVitestLineCount(input, "Errors?", "error") ?? extractVitestLineCount(input, "Errors?", "errors") ?? Math.max(getCount(input, "errors"), getCount(input, "error")),
1715
- skipped: extractVitestLineCount(input, "Tests?", "skipped") ?? getCount(input, "skipped"),
1716
- snapshotFailures: extractVitestLineCount(input, "Snapshots?", "failed") ?? void 0
1717
- };
1718
- }
1719
- if (runner === "jest") {
1720
- return {
1721
- passed: extractJestLineCount(input, "Tests", "passed") ?? getCount(input, "passed"),
1722
- failed: extractJestLineCount(input, "Tests", "failed") ?? getCount(input, "failed"),
1723
- errors: Math.max(getCount(input, "errors"), getCount(input, "error")),
1724
- skipped: extractJestLineCount(input, "Tests", "skipped") ?? getCount(input, "skipped")
1725
- };
2139
+ const pkg = inferPackage(line);
2140
+ if (!pkg) {
2141
+ return null;
1726
2142
  }
1727
2143
  return {
1728
- passed: getCount(input, "passed"),
1729
- failed: getCount(input, "failed"),
1730
- errors: Math.max(getCount(input, "errors"), getCount(input, "error")),
1731
- skipped: getCount(input, "skipped")
2144
+ package: pkg,
2145
+ severity: inferSeverity(line),
2146
+ remediation: inferRemediation(pkg)
2147
+ };
2148
+ }
2149
+ function inferAuditPackageHeader(line) {
2150
+ const trimmed = line.trim();
2151
+ if (trimmed.length === 0 || trimmed.startsWith("#") || trimmed.includes(":") || /^node_modules\//i.test(trimmed)) {
2152
+ return null;
2153
+ }
2154
+ const match = trimmed.match(/^([@a-z0-9._/-]+)(?:\s{2,}|\s+(?:[<>=~^*]|\d))/i);
2155
+ return match?.[1] ?? null;
2156
+ }
2157
+ function collectAuditCriticalVulnerabilities(input) {
2158
+ const lines = input.split("\n");
2159
+ const vulnerabilities = [];
2160
+ const seen = /* @__PURE__ */ new Set();
2161
+ const pushVulnerability = (pkg, severity) => {
2162
+ const key = `${pkg}:${severity}`;
2163
+ if (seen.has(key)) {
2164
+ return;
2165
+ }
2166
+ seen.add(key);
2167
+ vulnerabilities.push({
2168
+ package: pkg,
2169
+ severity,
2170
+ remediation: inferRemediation(pkg)
2171
+ });
2172
+ };
2173
+ for (let index = 0; index < lines.length; index += 1) {
2174
+ const line = lines[index].trim();
2175
+ if (!line) {
2176
+ continue;
2177
+ }
2178
+ const compact = parseCompactAuditVulnerability(line);
2179
+ if (compact) {
2180
+ pushVulnerability(compact.package, compact.severity);
2181
+ continue;
2182
+ }
2183
+ const pkg = inferAuditPackageHeader(line);
2184
+ if (!pkg) {
2185
+ continue;
2186
+ }
2187
+ for (let cursor = index + 1; cursor < Math.min(lines.length, index + 5); cursor += 1) {
2188
+ const candidate = lines[cursor].trim();
2189
+ if (!candidate) {
2190
+ continue;
2191
+ }
2192
+ const severityMatch = candidate.match(/^Severity:\s*(critical|high)\b/i);
2193
+ if (severityMatch) {
2194
+ pushVulnerability(pkg, severityMatch[1].toLowerCase());
2195
+ break;
2196
+ }
2197
+ if (inferAuditPackageHeader(candidate) || parseCompactAuditVulnerability(candidate)) {
2198
+ break;
2199
+ }
2200
+ }
2201
+ }
2202
+ return vulnerabilities;
2203
+ }
2204
+ function getCount(input, label) {
2205
+ const matches = [...input.matchAll(new RegExp(`(\\d+)\\s+${label}`, "gi"))];
2206
+ const lastMatch = matches.at(-1);
2207
+ return lastMatch ? Number(lastMatch[1]) : 0;
2208
+ }
2209
+ function collectInfraRiskEvidence(input) {
2210
+ const lines = input.split("\n").map((line) => line.trim()).filter((line) => line.length > 0);
2211
+ const evidence = [];
2212
+ const seen = /* @__PURE__ */ new Set();
2213
+ const pushMatches = (matcher, options) => {
2214
+ let added = 0;
2215
+ for (const line of lines) {
2216
+ if (!matcher.test(line)) {
2217
+ continue;
2218
+ }
2219
+ if (options?.exclude?.test(line)) {
2220
+ continue;
2221
+ }
2222
+ if (seen.has(line)) {
2223
+ continue;
2224
+ }
2225
+ evidence.push(line);
2226
+ seen.add(line);
2227
+ added += 1;
2228
+ if (options?.limit && added >= options.limit) {
2229
+ return;
2230
+ }
2231
+ if (evidence.length >= (options?.maxEvidence ?? 4)) {
2232
+ return;
2233
+ }
2234
+ }
2235
+ };
2236
+ pushMatches(/Plan:/i, {
2237
+ exclude: ZERO_DESTRUCTIVE_SUMMARY_PATTERN,
2238
+ limit: 1
2239
+ });
2240
+ if (evidence.length < 4) {
2241
+ pushMatches(RESOURCE_DESTROY_HEADER_PATTERN, { limit: 2 });
2242
+ }
2243
+ if (evidence.length < 4) {
2244
+ pushMatches(DESTROY_ERROR_PATTERN, { limit: 1 });
2245
+ }
2246
+ if (evidence.length < 4) {
2247
+ pushMatches(ACTION_DESTROY_PATTERN, { limit: 1 });
2248
+ }
2249
+ if (evidence.length < 4) {
2250
+ pushMatches(RISK_LINE_PATTERN, {
2251
+ exclude: /->\s+null$|\b0\s+to\s+(destroy|delete|drop|recreate|replace|revoke)\b/i,
2252
+ maxEvidence: 4
2253
+ });
2254
+ }
2255
+ return evidence.slice(0, 4);
2256
+ }
2257
+ function collectInfraDestroyTargets(input) {
2258
+ const targets = [];
2259
+ const seen = /* @__PURE__ */ new Set();
2260
+ for (const line of input.split("\n").map((entry) => entry.trim())) {
2261
+ const match = line.match(/^#\s+(.+?)\s+will be (destroyed|deleted|replaced)\b/i);
2262
+ const target = match?.[1]?.trim();
2263
+ if (!target || seen.has(target)) {
2264
+ continue;
2265
+ }
2266
+ seen.add(target);
2267
+ targets.push(target);
2268
+ }
2269
+ return targets;
2270
+ }
2271
+ function inferInfraDestroyCount(input, destroyTargets) {
2272
+ const matches = [
2273
+ ...input.matchAll(/\b(\d+)\s+to\s+(destroy|delete|drop|recreate|replace|revoke)\b/gi)
2274
+ ];
2275
+ const lastMatch = matches.at(-1);
2276
+ return lastMatch ? Number(lastMatch[1]) : destroyTargets.length;
2277
+ }
2278
+ function collectInfraBlockers(input) {
2279
+ const lines = input.split("\n");
2280
+ const blockers = [];
2281
+ const seen = /* @__PURE__ */ new Set();
2282
+ for (let index = 0; index < lines.length; index += 1) {
2283
+ const trimmed = lines[index]?.trim();
2284
+ const errorMatch = trimmed?.match(/^(?:[│|]\s*)?Error:\s+(.+)$/);
2285
+ if (!errorMatch) {
2286
+ continue;
2287
+ }
2288
+ const message = errorMatch[1].trim();
2289
+ const nearby = lines.slice(index, index + 8).join("\n");
2290
+ const preventDestroyTarget = nearby.match(/Resource\s+([^\s]+)\s+has lifecycle\.prevent_destroy set/i)?.[1] ?? null;
2291
+ const type = preventDestroyTarget ? "prevent_destroy" : "destroy_blocked";
2292
+ const key = `${type}:${preventDestroyTarget ?? ""}:${message}`;
2293
+ if (seen.has(key)) {
2294
+ continue;
2295
+ }
2296
+ seen.add(key);
2297
+ blockers.push({
2298
+ type,
2299
+ target: preventDestroyTarget,
2300
+ message
2301
+ });
2302
+ }
2303
+ return blockers;
2304
+ }
2305
+ function detectTestRunner(input) {
2306
+ if (/^\s*Test Files?\s+(?:\d+\s+failed\s*\|\s*)?\d+\s+passed/m.test(input) || /^\s*Tests?\s+(?:\d+\s+failed\s*\|\s*)?\d+\s+passed/m.test(input) || /^\s*Snapshots?\s+(?:\d+\s+failed\s*\|\s*)?\d+\s+passed/m.test(input) || /⎯{2,}\s+Failed Tests?\s+\d+\s+⎯{2,}/.test(input)) {
2307
+ return "vitest";
2308
+ }
2309
+ if (/^\s*Test Suites:\s+\d+\s+failed,\s+\d+\s+passed(?:,\s+\d+\s+total)?/m.test(input) || /^\s*Tests:\s+\d+\s+failed,\s+\d+\s+passed(?:,\s+\d+\s+total)?/m.test(input)) {
2310
+ return "jest";
2311
+ }
2312
+ if (/\bpytest\b/i.test(input) || /^\s*(?:FAILED|ERROR)\s+[A-Za-z0-9_./-]+::[^\n]+$/m.test(input) || /^\s*=+.*\b\d+\s+failed\b.*=+\s*$/m.test(input) || /\bcollected\s+\d+\s+items\b/i.test(input)) {
2313
+ return "pytest";
2314
+ }
2315
+ return "unknown";
2316
+ }
2317
+ function extractVitestLineCount(input, label, metric) {
2318
+ const matcher = new RegExp(`^\\s*${label}\\s+(.+)$`, "gmi");
2319
+ const lines = [...input.matchAll(matcher)];
2320
+ const line = lines.at(-1)?.[1];
2321
+ if (!line) {
2322
+ return null;
2323
+ }
2324
+ const metricMatch = line.match(new RegExp(`(\\d+)\\s+${metric}`, "i"));
2325
+ return metricMatch ? Number(metricMatch[1]) : null;
2326
+ }
2327
+ function extractJestLineCount(input, label, metric) {
2328
+ const matcher = new RegExp(`^\\s*${label}:\\s+(.+)$`, "gmi");
2329
+ const lines = [...input.matchAll(matcher)];
2330
+ const line = lines.at(-1)?.[1];
2331
+ if (!line) {
2332
+ return null;
2333
+ }
2334
+ const metricMatch = line.match(new RegExp(`(\\d+)\\s+${metric}`, "i"));
2335
+ return metricMatch ? Number(metricMatch[1]) : null;
2336
+ }
2337
+ function extractTestStatusCounts(input, runner) {
2338
+ if (runner === "vitest") {
2339
+ return {
2340
+ passed: extractVitestLineCount(input, "Tests?", "passed") ?? getCount(input, "passed"),
2341
+ failed: extractVitestLineCount(input, "Tests?", "failed") ?? getCount(input, "failed"),
2342
+ errors: extractVitestLineCount(input, "Errors?", "error") ?? extractVitestLineCount(input, "Errors?", "errors") ?? Math.max(getCount(input, "errors"), getCount(input, "error")),
2343
+ skipped: extractVitestLineCount(input, "Tests?", "skipped") ?? getCount(input, "skipped"),
2344
+ snapshotFailures: extractVitestLineCount(input, "Snapshots?", "failed") ?? void 0
2345
+ };
2346
+ }
2347
+ if (runner === "jest") {
2348
+ return {
2349
+ passed: extractJestLineCount(input, "Tests", "passed") ?? getCount(input, "passed"),
2350
+ failed: extractJestLineCount(input, "Tests", "failed") ?? getCount(input, "failed"),
2351
+ errors: Math.max(getCount(input, "errors"), getCount(input, "error")),
2352
+ skipped: extractJestLineCount(input, "Tests", "skipped") ?? getCount(input, "skipped")
2353
+ };
2354
+ }
2355
+ return {
2356
+ passed: getCount(input, "passed"),
2357
+ failed: getCount(input, "failed"),
2358
+ errors: Math.max(getCount(input, "errors"), getCount(input, "error")),
2359
+ skipped: getCount(input, "skipped")
1732
2360
  };
1733
2361
  }
1734
2362
  function formatCount2(count, singular, plural = `${singular}s`) {
@@ -1751,6 +2379,21 @@ function collectUniqueMatches(input, matcher, limit = 6) {
1751
2379
  }
1752
2380
  return values;
1753
2381
  }
2382
+ function compactDisplayFile(file) {
2383
+ const normalized = file.replace(/\\/g, "/").trim();
2384
+ if (!normalized) {
2385
+ return file;
2386
+ }
2387
+ const looksAbsolute = normalized.startsWith("/") || /^[A-Za-z]:\//.test(normalized);
2388
+ if (!looksAbsolute && normalized.length <= 60) {
2389
+ return normalized;
2390
+ }
2391
+ const basename = normalized.split("/").at(-1);
2392
+ return basename && basename.length > 0 ? basename : normalized;
2393
+ }
2394
+ function formatDisplayedFiles(files, limit = 3) {
2395
+ return [...new Set([...files].map((file) => file.trim()).filter(Boolean))].sort((left, right) => left.localeCompare(right)).slice(0, limit).map((file) => compactDisplayFile(file));
2396
+ }
1754
2397
  function emptyAnchor() {
1755
2398
  return {
1756
2399
  file: null,
@@ -2030,6 +2673,31 @@ function classifyFailureReason(line, options) {
2030
2673
  group: "permission or locked resource failures"
2031
2674
  };
2032
2675
  }
2676
+ const osDiskFullFailure = normalized.match(
2677
+ /(OSError:\s*\[Errno 28\][^$]*|No space left on device)/i
2678
+ );
2679
+ if (osDiskFullFailure) {
2680
+ return {
2681
+ reason: buildClassifiedReason(
2682
+ "configuration",
2683
+ `disk full (${buildExcerptDetail(
2684
+ osDiskFullFailure[1] ?? normalized,
2685
+ "No space left on device"
2686
+ )})`
2687
+ ),
2688
+ group: "test configuration failures"
2689
+ };
2690
+ }
2691
+ const osPermissionFailure = normalized.match(/OSError:\s*\[Errno 13\][^$]*/i);
2692
+ if (osPermissionFailure) {
2693
+ return {
2694
+ reason: buildClassifiedReason(
2695
+ "permission",
2696
+ buildExcerptDetail(osPermissionFailure[0] ?? normalized, "permission denied")
2697
+ ),
2698
+ group: "permission or locked resource failures"
2699
+ };
2700
+ }
2033
2701
  const xdistWorkerCrash = normalized.match(
2034
2702
  /(worker ['"][^'"]+['"] crashed|node down:\s*[^,;]+|WorkerLost[^,;]*|Worker exited unexpectedly[^,;]*|worker exited unexpectedly[^,;]*)/i
2035
2703
  );
@@ -2055,7 +2723,7 @@ function classifyFailureReason(line, options) {
2055
2723
  };
2056
2724
  }
2057
2725
  const networkFailure = normalized.match(
2058
- /(Max retries exceeded[^,;]*|gaierror[^,;]*|SSLCertVerificationError[^,;]*|Network is unreachable)/i
2726
+ /(Max retries exceeded[^,;]*|gaierror[^,;]*|SSLCertVerificationError[^,;]*|Network is unreachable|ConnectionResetError[^,;]*|BrokenPipeError[^,;]*|HTTPError:\s*[45]\d\d[^,;]*)/i
2059
2727
  );
2060
2728
  if (networkFailure) {
2061
2729
  return {
@@ -2066,6 +2734,15 @@ function classifyFailureReason(line, options) {
2066
2734
  group: "network dependency failures"
2067
2735
  };
2068
2736
  }
2737
+ const matcherAssertionFailure = normalized.match(
2738
+ /(expect\(received\)\.(?:toBe|toEqual|toStrictEqual|toMatchObject)\(expected\))/i
2739
+ );
2740
+ if (matcherAssertionFailure) {
2741
+ return {
2742
+ reason: `assertion failed: ${matcherAssertionFailure[1]}`.slice(0, 120),
2743
+ group: "assertion failures"
2744
+ };
2745
+ }
2069
2746
  const relationMigration = normalized.match(/relation ["'`]([^"'`]+)["'`] does not exist/i);
2070
2747
  if (relationMigration) {
2071
2748
  return {
@@ -2104,6 +2781,34 @@ function classifyFailureReason(line, options) {
2104
2781
  group: "memory exhaustion failures"
2105
2782
  };
2106
2783
  }
2784
+ const propertySetterOverrideFailure = normalized.match(
2785
+ /AttributeError:\s*(property ['"][^'"]+['"] of ['"][^'"]+['"] object has no setter|can't set attribute|readonly attribute|read-only attribute)/i
2786
+ );
2787
+ if (propertySetterOverrideFailure) {
2788
+ return {
2789
+ reason: buildClassifiedReason(
2790
+ "configuration",
2791
+ `invalid test setup override (${buildExcerptDetail(
2792
+ `AttributeError: ${propertySetterOverrideFailure[1] ?? normalized}`,
2793
+ "AttributeError: can't set attribute"
2794
+ )})`
2795
+ ),
2796
+ group: "test configuration failures"
2797
+ };
2798
+ }
2799
+ const setupOverrideFailure = normalized.match(/\b(AttributeError|TypeError):\s*(.+)$/i);
2800
+ if (setupOverrideFailure && /(monkeypatch|patch|fixture|settings|conftest)/i.test(normalized)) {
2801
+ return {
2802
+ reason: buildClassifiedReason(
2803
+ "configuration",
2804
+ `invalid test setup override (${buildExcerptDetail(
2805
+ `${setupOverrideFailure[1]}: ${setupOverrideFailure[2] ?? ""}`,
2806
+ `${setupOverrideFailure[1]}`
2807
+ )})`
2808
+ ),
2809
+ group: "test configuration failures"
2810
+ };
2811
+ }
2107
2812
  const typeErrorFailure = normalized.match(/TypeError:\s*(.+)$/i);
2108
2813
  if (typeErrorFailure) {
2109
2814
  return {
@@ -2809,6 +3514,9 @@ function classifyBucketTypeFromReason(reason) {
2809
3514
  if (reason.startsWith("missing module:")) {
2810
3515
  return "import_dependency_failure";
2811
3516
  }
3517
+ if (reason.startsWith("golden output drift:")) {
3518
+ return "golden_output_drift";
3519
+ }
2812
3520
  if (reason.startsWith("assertion failed:")) {
2813
3521
  return "assertion_failure";
2814
3522
  }
@@ -3185,13 +3893,17 @@ function analyzeTestStatus(input) {
3185
3893
  const interrupted = /\binterrupted\b/i.test(input) || /\bKeyboardInterrupt\b/i.test(input);
3186
3894
  const collectionItems = chooseStrongestFailureItems(collectCollectionFailureItems(input));
3187
3895
  const inlineItems = chooseStrongestFailureItems(collectInlineFailureItems(input));
3896
+ const statusItems = collectInlineFailureItemsWithStatus(input);
3188
3897
  const visibleErrorItems = chooseStrongestStatusFailureItems([
3189
3898
  ...collectionItems.map((item) => ({
3190
3899
  ...item,
3191
3900
  status: "error"
3192
3901
  })),
3193
- ...collectInlineFailureItemsWithStatus(input).filter((item) => item.status === "error")
3902
+ ...statusItems.filter((item) => item.status === "error")
3194
3903
  ]);
3904
+ const visibleFailedItems = chooseStrongestStatusFailureItems(
3905
+ statusItems.filter((item) => item.status === "failed")
3906
+ );
3195
3907
  const labels = collectFailureLabels(input);
3196
3908
  const visibleErrorLabels = labels.filter((item) => item.status === "error").map((item) => item.label);
3197
3909
  const visibleFailedLabels = labels.filter((item) => item.status === "failed").map((item) => item.label);
@@ -3250,6 +3962,7 @@ function analyzeTestStatus(input) {
3250
3962
  visibleErrorLabels,
3251
3963
  visibleFailedLabels,
3252
3964
  visibleErrorItems,
3965
+ visibleFailedItems,
3253
3966
  buckets
3254
3967
  };
3255
3968
  }
@@ -3310,74 +4023,626 @@ function testStatusHeuristic(input, detail = "standard") {
3310
4023
  return null;
3311
4024
  }
3312
4025
  function auditCriticalHeuristic(input) {
3313
- const vulnerabilities = input.split("\n").map((line) => line.trim()).filter(Boolean).map((line) => {
3314
- if (!/\b(critical|high)\b/i.test(line)) {
3315
- return null;
4026
+ if (/\bfound\s+0\s+vulnerabilities\b/i.test(input) || /\b0\s+vulnerabilities\b/i.test(input)) {
4027
+ return JSON.stringify(
4028
+ {
4029
+ status: "ok",
4030
+ vulnerabilities: [],
4031
+ summary: "No high or critical vulnerabilities found in the provided input."
4032
+ },
4033
+ null,
4034
+ 2
4035
+ );
4036
+ }
4037
+ const vulnerabilities = collectAuditCriticalVulnerabilities(input);
4038
+ if (vulnerabilities.length === 0) {
4039
+ return null;
4040
+ }
4041
+ const firstVulnerability = vulnerabilities[0];
4042
+ return JSON.stringify(
4043
+ {
4044
+ status: "ok",
4045
+ vulnerabilities,
4046
+ summary: vulnerabilities.length === 1 ? `One ${firstVulnerability.severity} vulnerability found in ${firstVulnerability.package}.` : `${vulnerabilities.length} high or critical vulnerabilities found in the provided input.`
4047
+ },
4048
+ null,
4049
+ 2
4050
+ );
4051
+ }
4052
+ function infraRiskHeuristic(input) {
4053
+ const destroyTargets = collectInfraDestroyTargets(input);
4054
+ const blockers = collectInfraBlockers(input);
4055
+ const zeroDestructiveEvidence = input.split("\n").map((line) => line.trim()).filter((line) => line.length > 0 && ZERO_DESTRUCTIVE_SUMMARY_PATTERN.test(line)).slice(0, 3);
4056
+ const riskEvidence = collectInfraRiskEvidence(input);
4057
+ if (riskEvidence.length > 0) {
4058
+ return JSON.stringify(
4059
+ {
4060
+ verdict: "fail",
4061
+ reason: "Destructive or clearly risky infrastructure change signals are present.",
4062
+ evidence: riskEvidence,
4063
+ destroy_count: inferInfraDestroyCount(input, destroyTargets),
4064
+ destroy_targets: destroyTargets,
4065
+ blockers
4066
+ },
4067
+ null,
4068
+ 2
4069
+ );
4070
+ }
4071
+ if (zeroDestructiveEvidence.length > 0) {
4072
+ return JSON.stringify(
4073
+ {
4074
+ verdict: "pass",
4075
+ reason: "The provided input explicitly indicates zero destructive changes.",
4076
+ evidence: zeroDestructiveEvidence,
4077
+ destroy_count: 0,
4078
+ destroy_targets: [],
4079
+ blockers: []
4080
+ },
4081
+ null,
4082
+ 2
4083
+ );
4084
+ }
4085
+ const safeEvidence = collectEvidence(input, SAFE_LINE_PATTERN);
4086
+ if (safeEvidence.length > 0) {
4087
+ return JSON.stringify(
4088
+ {
4089
+ verdict: "pass",
4090
+ reason: "The provided input explicitly indicates no risky infrastructure changes.",
4091
+ evidence: safeEvidence,
4092
+ destroy_count: 0,
4093
+ destroy_targets: [],
4094
+ blockers: []
4095
+ },
4096
+ null,
4097
+ 2
4098
+ );
4099
+ }
4100
+ return null;
4101
+ }
4102
+ function parseTscErrors(input) {
4103
+ const diagnostics = [];
4104
+ for (const rawLine of input.split("\n")) {
4105
+ const line = rawLine.replace(/\u001b\[[0-9;]*m/g, "").trimEnd();
4106
+ if (!line.trim()) {
4107
+ continue;
3316
4108
  }
3317
- const pkg = inferPackage(line);
3318
- if (!pkg) {
3319
- return null;
4109
+ let match = line.match(/^(.+)\((\d+),(\d+)\):\s+error\s+(TS\d+):\s+(.+)$/);
4110
+ if (match) {
4111
+ diagnostics.push({
4112
+ file: match[1].replace(/\\/g, "/").trim(),
4113
+ line: Number(match[2]),
4114
+ column: Number(match[3]),
4115
+ code: match[4],
4116
+ message: match[5].trim()
4117
+ });
4118
+ continue;
4119
+ }
4120
+ match = line.match(/^(.+):(\d+):(\d+)\s+-\s+error\s+(TS\d+):\s+(.+)$/);
4121
+ if (match) {
4122
+ diagnostics.push({
4123
+ file: match[1].replace(/\\/g, "/").trim(),
4124
+ line: Number(match[2]),
4125
+ column: Number(match[3]),
4126
+ code: match[4],
4127
+ message: match[5].trim()
4128
+ });
4129
+ continue;
4130
+ }
4131
+ match = line.match(/^\s*error\s+(TS\d+):\s+(.+)$/);
4132
+ if (match) {
4133
+ diagnostics.push({
4134
+ file: null,
4135
+ line: null,
4136
+ column: null,
4137
+ code: match[1],
4138
+ message: match[2].trim()
4139
+ });
4140
+ }
4141
+ }
4142
+ return diagnostics;
4143
+ }
4144
+ function extractTscSummary(input) {
4145
+ const matches = [
4146
+ ...input.matchAll(/\bFound\s+(\d+)\s+errors?\b(?:\s+in\s+(\d+)\s+files?)?\.?/gi)
4147
+ ];
4148
+ const summary = matches.at(-1);
4149
+ if (!summary) {
4150
+ return null;
4151
+ }
4152
+ return {
4153
+ errorCount: Number(summary[1]),
4154
+ fileCount: summary[2] ? Number(summary[2]) : null
4155
+ };
4156
+ }
4157
+ function formatTscGroup(args) {
4158
+ const label = TSC_CODE_LABELS[args.code];
4159
+ const displayFiles = formatDisplayedFiles(args.files);
4160
+ let line = `- ${args.code}`;
4161
+ if (label) {
4162
+ line += ` (${label})`;
4163
+ }
4164
+ line += `: ${formatCount2(args.count, "occurrence")}`;
4165
+ if (displayFiles.length > 0) {
4166
+ line += ` across ${displayFiles.join(", ")}`;
4167
+ }
4168
+ return `${line}.`;
4169
+ }
4170
+ function typecheckSummaryHeuristic(input) {
4171
+ if (input.trim().length === 0) {
4172
+ return null;
4173
+ }
4174
+ const diagnostics = parseTscErrors(input);
4175
+ const summary = extractTscSummary(input);
4176
+ const hasTscSignal = diagnostics.length > 0 || summary !== null || /\berror\s+TS\d+:/m.test(input);
4177
+ if (!hasTscSignal) {
4178
+ return null;
4179
+ }
4180
+ if (summary?.errorCount === 0) {
4181
+ return "No type errors.";
4182
+ }
4183
+ if (diagnostics.length === 0 && summary === null) {
4184
+ return null;
4185
+ }
4186
+ const errorCount = summary?.errorCount ?? diagnostics.length;
4187
+ const allFiles = new Set(
4188
+ diagnostics.map((diagnostic) => diagnostic.file).filter((file) => Boolean(file))
4189
+ );
4190
+ const fileCount = summary?.fileCount ?? (allFiles.size > 0 ? allFiles.size : null);
4191
+ const groups = /* @__PURE__ */ new Map();
4192
+ for (const diagnostic of diagnostics) {
4193
+ const group = groups.get(diagnostic.code) ?? {
4194
+ count: 0,
4195
+ files: /* @__PURE__ */ new Set()
4196
+ };
4197
+ group.count += 1;
4198
+ if (diagnostic.file) {
4199
+ group.files.add(diagnostic.file);
4200
+ }
4201
+ groups.set(diagnostic.code, group);
4202
+ }
4203
+ const bullets = [
4204
+ `- Typecheck failed: ${formatCount2(errorCount, "error")}${fileCount ? ` in ${formatCount2(fileCount, "file")}` : ""}.`
4205
+ ];
4206
+ const sortedGroups = [...groups.entries()].map(([code, group]) => ({
4207
+ code,
4208
+ count: group.count,
4209
+ files: group.files
4210
+ })).sort((left, right) => right.count - left.count || left.code.localeCompare(right.code));
4211
+ for (const group of sortedGroups.slice(0, 3)) {
4212
+ bullets.push(formatTscGroup(group));
4213
+ }
4214
+ if (sortedGroups.length > 3) {
4215
+ const overflowFiles = /* @__PURE__ */ new Set();
4216
+ for (const group of sortedGroups.slice(3)) {
4217
+ for (const file of group.files) {
4218
+ overflowFiles.add(file);
4219
+ }
4220
+ }
4221
+ let overflow = `- ${formatCount2(sortedGroups.length - 3, "more error code")}`;
4222
+ if (overflowFiles.size > 0) {
4223
+ overflow += ` across ${formatCount2(overflowFiles.size, "file")}`;
4224
+ }
4225
+ bullets.push(`${overflow}.`);
4226
+ }
4227
+ return bullets.join("\n");
4228
+ }
4229
+ function looksLikeEslintFileHeader(line) {
4230
+ if (line.trim().length === 0 || line.trim() !== line) {
4231
+ return false;
4232
+ }
4233
+ if (/^\s*[✖×x]\s+\d+\s+problems?\b/i.test(line) || /potentially\s+fixable/i.test(line) || /^\d+\s+problems?\b/i.test(line)) {
4234
+ return false;
4235
+ }
4236
+ const normalized = line.replace(/\\/g, "/");
4237
+ const pathLike = normalized.startsWith("/") || normalized.startsWith("./") || normalized.startsWith("../") || /^[A-Za-z]:\//.test(normalized) || /^[A-Za-z0-9_.-]+\//.test(normalized);
4238
+ return pathLike && /\.[A-Za-z0-9]+$/.test(normalized);
4239
+ }
4240
+ function normalizeEslintRule(rule, message) {
4241
+ if (rule && rule.trim().length > 0) {
4242
+ return rule.trim();
4243
+ }
4244
+ if (/parsing error/i.test(message)) {
4245
+ return "parsing error";
4246
+ }
4247
+ if (/fatal/i.test(message)) {
4248
+ return "fatal error";
4249
+ }
4250
+ return "unclassified lint error";
4251
+ }
4252
+ function parseEslintStylish(input) {
4253
+ const violations = [];
4254
+ let currentFile = null;
4255
+ for (const rawLine of input.split("\n")) {
4256
+ const line = rawLine.replace(/\u001b\[[0-9;]*m/g, "").replace(/\r$/, "");
4257
+ if (looksLikeEslintFileHeader(line.trim())) {
4258
+ currentFile = line.trim().replace(/\\/g, "/");
4259
+ continue;
4260
+ }
4261
+ let match = line.match(/^\s*(\d+):(\d+)\s+(error|warning)\s+(.+?)\s{2,}(\S+)\s*$/);
4262
+ if (match) {
4263
+ violations.push({
4264
+ file: currentFile ?? "(unknown file)",
4265
+ line: Number(match[1]),
4266
+ column: Number(match[2]),
4267
+ severity: match[3],
4268
+ message: match[4].trim(),
4269
+ rule: normalizeEslintRule(match[5], match[4])
4270
+ });
4271
+ continue;
4272
+ }
4273
+ match = line.match(/^\s*(\d+):(\d+)\s+(error|warning)\s+(.+?)\s*$/);
4274
+ if (match) {
4275
+ violations.push({
4276
+ file: currentFile ?? "(unknown file)",
4277
+ line: Number(match[1]),
4278
+ column: Number(match[2]),
4279
+ severity: match[3],
4280
+ message: match[4].trim(),
4281
+ rule: normalizeEslintRule(null, match[4])
4282
+ });
4283
+ }
4284
+ }
4285
+ return violations;
4286
+ }
4287
+ function extractEslintSummary(input) {
4288
+ const summaryMatches = [
4289
+ ...input.matchAll(
4290
+ /^\s*[✖×x]?\s*(\d+)\s+problems?\s+\((\d+)\s+errors?,\s+(\d+)\s+warnings?\)/gim
4291
+ )
4292
+ ];
4293
+ const summary = summaryMatches.at(-1);
4294
+ if (!summary) {
4295
+ return null;
4296
+ }
4297
+ const fixableMatch = input.match(
4298
+ /(\d+)\s+errors?\s+and\s+(\d+)\s+warnings?\s+(?:are|is)\s+potentially\s+fixable/i
4299
+ );
4300
+ return {
4301
+ problems: Number(summary[1]),
4302
+ errors: Number(summary[2]),
4303
+ warnings: Number(summary[3]),
4304
+ fixableProblems: fixableMatch ? Number(fixableMatch[1]) + Number(fixableMatch[2]) : null
4305
+ };
4306
+ }
4307
+ function formatLintGroup(args) {
4308
+ const totalErrors = args.errors;
4309
+ const totalWarnings = args.warnings;
4310
+ const displayFiles = formatDisplayedFiles(args.files);
4311
+ let detail = "";
4312
+ if (totalErrors > 0 && totalWarnings > 0) {
4313
+ detail = `${formatCount2(totalErrors, "error")}, ${formatCount2(totalWarnings, "warning")}`;
4314
+ } else if (totalErrors > 0) {
4315
+ detail = formatCount2(totalErrors, "error");
4316
+ } else {
4317
+ detail = formatCount2(totalWarnings, "warning");
4318
+ }
4319
+ let line = `- ${args.rule}: ${detail}`;
4320
+ if (displayFiles.length > 0) {
4321
+ line += ` across ${displayFiles.join(", ")}`;
4322
+ }
4323
+ return `${line}.`;
4324
+ }
4325
+ function lintFailuresHeuristic(input) {
4326
+ const trimmed = input.trim();
4327
+ if (trimmed.length === 0 || trimmed.startsWith("[") || trimmed.startsWith("{")) {
4328
+ return null;
4329
+ }
4330
+ const summary = extractEslintSummary(input);
4331
+ const violations = parseEslintStylish(input);
4332
+ if (summary === null && violations.length === 0) {
4333
+ return null;
4334
+ }
4335
+ if (summary?.problems === 0) {
4336
+ return "No lint failures.";
4337
+ }
4338
+ const problems = summary?.problems ?? violations.length;
4339
+ const errors = summary?.errors ?? countPattern(input, /^\s*\d+:\d+\s+error\b/gm);
4340
+ const warnings = summary?.warnings ?? countPattern(input, /^\s*\d+:\d+\s+warning\b/gm);
4341
+ const bullets = [];
4342
+ if (errors > 0) {
4343
+ let headline = `- Lint failed: ${formatCount2(problems, "problem")} (${formatCount2(errors, "error")}, ${formatCount2(warnings, "warning")}).`;
4344
+ if ((summary?.fixableProblems ?? 0) > 0) {
4345
+ headline += ` ${formatCount2(summary.fixableProblems, "problem")} potentially fixable with --fix.`;
4346
+ }
4347
+ bullets.push(headline);
4348
+ } else {
4349
+ bullets.push(`- No lint errors visible: ${formatCount2(warnings, "warning")}.`);
4350
+ }
4351
+ const groups = /* @__PURE__ */ new Map();
4352
+ for (const violation of violations) {
4353
+ const group = groups.get(violation.rule) ?? {
4354
+ errors: 0,
4355
+ warnings: 0,
4356
+ files: /* @__PURE__ */ new Set()
4357
+ };
4358
+ if (violation.severity === "error") {
4359
+ group.errors += 1;
4360
+ } else {
4361
+ group.warnings += 1;
4362
+ }
4363
+ group.files.add(violation.file);
4364
+ groups.set(violation.rule, group);
4365
+ }
4366
+ const sortedGroups = [...groups.entries()].map(([rule, group]) => ({
4367
+ rule,
4368
+ errors: group.errors,
4369
+ warnings: group.warnings,
4370
+ total: group.errors + group.warnings,
4371
+ files: group.files
4372
+ })).sort((left, right) => {
4373
+ const leftHasErrors = left.errors > 0 ? 1 : 0;
4374
+ const rightHasErrors = right.errors > 0 ? 1 : 0;
4375
+ return rightHasErrors - leftHasErrors || right.total - left.total || left.rule.localeCompare(right.rule);
4376
+ });
4377
+ for (const group of sortedGroups.slice(0, 3)) {
4378
+ bullets.push(formatLintGroup(group));
4379
+ }
4380
+ if (sortedGroups.length > 3) {
4381
+ const overflowFiles = /* @__PURE__ */ new Set();
4382
+ for (const group of sortedGroups.slice(3)) {
4383
+ for (const file of group.files) {
4384
+ overflowFiles.add(file);
4385
+ }
4386
+ }
4387
+ let overflow = `- ${formatCount2(sortedGroups.length - 3, "more rule")}`;
4388
+ if (overflowFiles.size > 0) {
4389
+ overflow += ` across ${formatCount2(overflowFiles.size, "file")}`;
4390
+ }
4391
+ bullets.push(`${overflow}.`);
4392
+ }
4393
+ return bullets.join("\n");
4394
+ }
4395
+ function stripAnsiText(input) {
4396
+ return input.replace(/\u001b\[[0-9;]*m/g, "");
4397
+ }
4398
+ function normalizeBuildPath(file) {
4399
+ return file.replace(/\\/g, "/").replace(/^\.\//, "").trim();
4400
+ }
4401
+ function trimTrailingSentencePunctuation(input) {
4402
+ return input.replace(/[.:]+$/, "").trim();
4403
+ }
4404
+ function containsKnownBuildFailureSignal(input) {
4405
+ return /^ERROR in /m.test(input) || /^(?:[✘✗]\s*)?\[ERROR\]\s+/m.test(input) || /^error(?:\[E\d+\])?:\s+/m.test(input) || /^.+?\.go:\d+:\d+:\s+\S+/m.test(input) || /^.+?\.(?:c|cc|cpp|cxx|h|hpp|m|mm):\d+:\d+:\s*error:\s+/m.test(input) || /\berror\s+TS\d+:/m.test(input) || /^\s*npm ERR!/m.test(input) || /\bERR_PNPM_/m.test(input) || /^\s*error Command failed/m.test(input);
4406
+ }
4407
+ function detectExplicitBuildSuccess(input) {
4408
+ if (containsKnownBuildFailureSignal(input)) {
4409
+ return false;
4410
+ }
4411
+ return /\bcompiled successfully\b/i.test(input) || /^\s*Build succeeded\.?\s*$/im.test(input) || /\bcompiled with 0 errors?\b/i.test(input);
4412
+ }
4413
+ function inferBuildFailureCategory(message) {
4414
+ if (/module not found|can't resolve|could not resolve|cannot find module|no required module provides package/i.test(
4415
+ message
4416
+ )) {
4417
+ return "module-resolution";
4418
+ }
4419
+ if (/no matching export|does not provide an export named|missing export/i.test(message)) {
4420
+ return "missing-export";
4421
+ }
4422
+ if (/cannot find name|cannot find value|not found in this scope|undefined:|undeclared identifier/i.test(
4423
+ message
4424
+ )) {
4425
+ return "undefined-identifier";
4426
+ }
4427
+ if (/syntax error|unexpected token|expected ['"`;)]|expected .* after expression/i.test(message)) {
4428
+ return "syntax";
4429
+ }
4430
+ if (/\bTS\d+\b/.test(message) || /type .* is not assignable|type error|no matching overload/i.test(message)) {
4431
+ return "type";
4432
+ }
4433
+ return "generic";
4434
+ }
4435
+ function buildFailureSuggestion(category) {
4436
+ switch (category) {
4437
+ case "module-resolution":
4438
+ return "Install the missing package or fix the import path.";
4439
+ case "missing-export":
4440
+ return "Check the export name in the source module.";
4441
+ case "undefined-identifier":
4442
+ return "Define or import the missing identifier.";
4443
+ case "syntax":
4444
+ return "Fix the syntax error at the indicated location.";
4445
+ case "type":
4446
+ return "Fix the type error at the indicated location.";
4447
+ case "wrapper":
4448
+ return "Check the underlying build tool output above.";
4449
+ default:
4450
+ return "Fix the first reported error and rebuild.";
4451
+ }
4452
+ }
4453
+ function formatBuildFailureOutput(match) {
4454
+ const message = trimTrailingSentencePunctuation(match.message);
4455
+ const suggestion = buildFailureSuggestion(match.category);
4456
+ const displayFile = match.file ? compactDisplayFile(match.file) : null;
4457
+ if (displayFile && match.line !== null) {
4458
+ return `Build failed: ${message} in ${displayFile}:${match.line}. Fix: ${suggestion}`;
4459
+ }
4460
+ if (displayFile) {
4461
+ return `Build failed: ${message} in ${displayFile}. Fix: ${suggestion}`;
4462
+ }
4463
+ return `Build failed: ${message}. Fix: ${suggestion}`;
4464
+ }
4465
+ function extractWebpackBuildFailure(input) {
4466
+ const lines = stripAnsiText(input).split("\n").map((line) => line.trimEnd());
4467
+ for (let index = 0; index < lines.length; index += 1) {
4468
+ const match = lines[index]?.match(/^ERROR in (.+?)(?:\s+(\d+):(\d+))?$/);
4469
+ if (!match) {
4470
+ continue;
4471
+ }
4472
+ const candidates = [];
4473
+ for (let cursor = index + 1; cursor < Math.min(lines.length, index + 6); cursor += 1) {
4474
+ const candidate = lines[cursor]?.trim();
4475
+ if (!candidate) {
4476
+ continue;
4477
+ }
4478
+ if (/^ERROR in /.test(candidate) || /compiled with \d+ errors?/i.test(candidate)) {
4479
+ break;
4480
+ }
4481
+ if (/^(?:>|\|)|^\d+\s+\|/.test(candidate)) {
4482
+ continue;
4483
+ }
4484
+ candidates.push(candidate);
4485
+ }
4486
+ let message = "Compilation error";
4487
+ if (candidates.length > 0) {
4488
+ const preferred = candidates.find(
4489
+ (candidate) => !/^Module build failed\b/i.test(candidate) && !/^Error:\s+TypeScript compilation failed\b/i.test(candidate) && inferBuildFailureCategory(candidate) !== "generic"
4490
+ ) ?? candidates.find(
4491
+ (candidate) => !/^Module build failed\b/i.test(candidate) && !/^Error:\s+TypeScript compilation failed\b/i.test(candidate)
4492
+ ) ?? candidates[0];
4493
+ message = preferred ?? message;
3320
4494
  }
3321
4495
  return {
3322
- package: pkg,
3323
- severity: inferSeverity(line),
3324
- remediation: inferRemediation(pkg)
4496
+ message,
4497
+ file: normalizeBuildPath(match[1]),
4498
+ line: match[2] ? Number(match[2]) : null,
4499
+ column: match[3] ? Number(match[3]) : null,
4500
+ category: inferBuildFailureCategory(message)
3325
4501
  };
3326
- }).filter((item) => item !== null);
3327
- if (vulnerabilities.length === 0) {
4502
+ }
4503
+ return null;
4504
+ }
4505
+ function extractViteImportAnalysisBuildFailure(input) {
4506
+ const lines = stripAnsiText(input).split("\n").map((line) => line.trim());
4507
+ for (const line of lines) {
4508
+ const match = line.match(
4509
+ /^\[plugin:vite:import-analysis\]\s+Failed to resolve import\s+"([^"]+)"\s+from\s+"([^"]+)"/i
4510
+ );
4511
+ if (!match) {
4512
+ continue;
4513
+ }
4514
+ return {
4515
+ message: `Failed to resolve import "${match[1]}"`,
4516
+ file: normalizeBuildPath(match[2]),
4517
+ line: null,
4518
+ column: null,
4519
+ category: "module-resolution"
4520
+ };
4521
+ }
4522
+ return null;
4523
+ }
4524
+ function extractEsbuildBuildFailure(input) {
4525
+ const lines = stripAnsiText(input).split("\n").map((line) => line.trimEnd());
4526
+ for (let index = 0; index < lines.length; index += 1) {
4527
+ const match = lines[index]?.match(/^(?:[✘✗]\s*)?\[ERROR\]\s*(.+)$/);
4528
+ if (!match) {
4529
+ continue;
4530
+ }
4531
+ const message = match[1].replace(/^\[vite\]\s*/i, "").trim();
4532
+ let file = null;
4533
+ let line = null;
4534
+ let column = null;
4535
+ for (let cursor = index + 1; cursor < Math.min(lines.length, index + 6); cursor += 1) {
4536
+ const locationMatch = lines[cursor]?.trim().match(/^(.+?):(\d+):(\d+):$/);
4537
+ if (!locationMatch) {
4538
+ continue;
4539
+ }
4540
+ file = normalizeBuildPath(locationMatch[1]);
4541
+ line = Number(locationMatch[2]);
4542
+ column = Number(locationMatch[3]);
4543
+ break;
4544
+ }
4545
+ return {
4546
+ message,
4547
+ file,
4548
+ line,
4549
+ column,
4550
+ category: inferBuildFailureCategory(message)
4551
+ };
4552
+ }
4553
+ return null;
4554
+ }
4555
+ function extractCargoBuildFailure(input) {
4556
+ if (!/^error(?:\[E\d+\])?:\s+/m.test(input) || !(/^\s*-->\s+/m.test(input) || /could not compile/i.test(input))) {
4557
+ return null;
4558
+ }
4559
+ const lines = stripAnsiText(input).split("\n").map((line) => line.trimEnd());
4560
+ for (let index = 0; index < lines.length; index += 1) {
4561
+ const match = lines[index]?.match(/^error(?:\[(E\d+)\])?:\s+(.+)$/);
4562
+ if (!match) {
4563
+ continue;
4564
+ }
4565
+ const code = match[1];
4566
+ const locationMatch = lines.slice(index + 1, index + 7).join("\n").match(/^\s*-->\s+(.+?):(\d+):(\d+)/m);
4567
+ return {
4568
+ message: code ? `${code}: ${match[2].trim()}` : match[2].trim(),
4569
+ file: locationMatch ? normalizeBuildPath(locationMatch[1]) : null,
4570
+ line: locationMatch ? Number(locationMatch[2]) : null,
4571
+ column: locationMatch ? Number(locationMatch[3]) : null,
4572
+ category: inferBuildFailureCategory(match[2])
4573
+ };
4574
+ }
4575
+ return null;
4576
+ }
4577
+ function extractCompilerStyleBuildFailure(input) {
4578
+ const lines = stripAnsiText(input).split("\n").map((line) => line.trimEnd());
4579
+ for (const rawLine of lines) {
4580
+ let match = rawLine.match(
4581
+ /^(.+?\.(?:c|cc|cpp|cxx|h|hpp|m|mm)):([0-9]+):([0-9]+):\s*error:\s+(.+)$/
4582
+ );
4583
+ if (match) {
4584
+ return {
4585
+ message: match[4].trim(),
4586
+ file: normalizeBuildPath(match[1]),
4587
+ line: Number(match[2]),
4588
+ column: Number(match[3]),
4589
+ category: inferBuildFailureCategory(match[4])
4590
+ };
4591
+ }
4592
+ match = rawLine.match(/^(.+?\.go):([0-9]+):([0-9]+):\s+(.+)$/);
4593
+ if (match && !/^\s*warning:/i.test(match[4])) {
4594
+ return {
4595
+ message: match[4].trim(),
4596
+ file: normalizeBuildPath(match[1]),
4597
+ line: Number(match[2]),
4598
+ column: Number(match[3]),
4599
+ category: inferBuildFailureCategory(match[4])
4600
+ };
4601
+ }
4602
+ }
4603
+ return null;
4604
+ }
4605
+ function extractTscBuildFailure(input) {
4606
+ const diagnostics = parseTscErrors(input);
4607
+ const first = diagnostics[0];
4608
+ if (!first) {
4609
+ return null;
4610
+ }
4611
+ return {
4612
+ message: `${first.code}: ${first.message}`,
4613
+ file: first.file,
4614
+ line: first.line,
4615
+ column: first.column,
4616
+ category: inferBuildFailureCategory(`${first.code}: ${first.message}`)
4617
+ };
4618
+ }
4619
+ function extractWrapperBuildFailure(input) {
4620
+ if (!/^\s*npm ERR!|\bERR_PNPM_|^\s*error Command failed/m.test(input)) {
3328
4621
  return null;
3329
4622
  }
3330
- const firstVulnerability = vulnerabilities[0];
3331
- return JSON.stringify(
3332
- {
3333
- status: "ok",
3334
- vulnerabilities,
3335
- summary: vulnerabilities.length === 1 ? `One ${firstVulnerability.severity} vulnerability found in ${firstVulnerability.package}.` : `${vulnerabilities.length} high or critical vulnerabilities found in the provided input.`
3336
- },
3337
- null,
3338
- 2
3339
- );
4623
+ const npmCommandMatch = input.match(/^\s*npm ERR!\s+.*?\bbuild:\s+`([^`]+)`/m);
4624
+ const genericCommandMatch = input.match(/^\s*.+?\s+build:\s+`([^`]+)`/m);
4625
+ const command = npmCommandMatch?.[1] ?? genericCommandMatch?.[1] ?? null;
4626
+ return {
4627
+ message: command ? `build script \`${command}\` failed` : "the build script failed",
4628
+ file: null,
4629
+ line: null,
4630
+ column: null,
4631
+ category: "wrapper"
4632
+ };
3340
4633
  }
3341
- function infraRiskHeuristic(input) {
3342
- const zeroDestructiveEvidence = input.split("\n").map((line) => line.trim()).filter((line) => line.length > 0 && ZERO_DESTRUCTIVE_SUMMARY_PATTERN.test(line)).slice(0, 3);
3343
- const riskEvidence = input.split("\n").map((line) => line.trim()).filter(
3344
- (line) => line.length > 0 && RISK_LINE_PATTERN.test(line) && !ZERO_DESTRUCTIVE_SUMMARY_PATTERN.test(line)
3345
- ).slice(0, 3);
3346
- if (riskEvidence.length > 0) {
3347
- return JSON.stringify(
3348
- {
3349
- verdict: "fail",
3350
- reason: "Destructive or clearly risky infrastructure change signals are present.",
3351
- evidence: riskEvidence
3352
- },
3353
- null,
3354
- 2
3355
- );
4634
+ function buildFailureHeuristic(input) {
4635
+ if (input.trim().length === 0) {
4636
+ return null;
3356
4637
  }
3357
- if (zeroDestructiveEvidence.length > 0) {
3358
- return JSON.stringify(
3359
- {
3360
- verdict: "pass",
3361
- reason: "The provided input explicitly indicates zero destructive changes.",
3362
- evidence: zeroDestructiveEvidence
3363
- },
3364
- null,
3365
- 2
3366
- );
4638
+ if (detectExplicitBuildSuccess(input)) {
4639
+ return "Build succeeded.";
3367
4640
  }
3368
- const safeEvidence = collectEvidence(input, SAFE_LINE_PATTERN);
3369
- if (safeEvidence.length > 0) {
3370
- return JSON.stringify(
3371
- {
3372
- verdict: "pass",
3373
- reason: "The provided input explicitly indicates no risky infrastructure changes.",
3374
- evidence: safeEvidence
3375
- },
3376
- null,
3377
- 2
3378
- );
4641
+ const match = extractViteImportAnalysisBuildFailure(input) ?? extractWebpackBuildFailure(input) ?? extractEsbuildBuildFailure(input) ?? extractCargoBuildFailure(input) ?? extractCompilerStyleBuildFailure(input) ?? extractTscBuildFailure(input) ?? extractWrapperBuildFailure(input);
4642
+ if (!match) {
4643
+ return null;
3379
4644
  }
3380
- return null;
4645
+ return formatBuildFailureOutput(match);
3381
4646
  }
3382
4647
  function applyHeuristicPolicy(policyName, input, detail) {
3383
4648
  if (!policyName) {
@@ -3392,6 +4657,15 @@ function applyHeuristicPolicy(policyName, input, detail) {
3392
4657
  if (policyName === "test-status") {
3393
4658
  return testStatusHeuristic(input, detail);
3394
4659
  }
4660
+ if (policyName === "typecheck-summary") {
4661
+ return typecheckSummaryHeuristic(input);
4662
+ }
4663
+ if (policyName === "lint-failures") {
4664
+ return lintFailuresHeuristic(input);
4665
+ }
4666
+ if (policyName === "build-failure") {
4667
+ return buildFailureHeuristic(input);
4668
+ }
3395
4669
  return null;
3396
4670
  }
3397
4671
 
@@ -3414,8 +4688,8 @@ function buildInsufficientSignalOutput(input) {
3414
4688
  } else {
3415
4689
  hint = "Hint: the captured output did not contain a clear answer for this preset.";
3416
4690
  }
3417
- return `${INSUFFICIENT_SIGNAL_TEXT}
3418
- ${hint}`;
4691
+ const presetSuggestion = input.recognizedRunner && input.recognizedRunner !== "unknown" && input.presetName !== "test-status" ? `Hint: captured output looks like ${input.recognizedRunner} test output; try --preset test-status.` : null;
4692
+ return [INSUFFICIENT_SIGNAL_TEXT, hint, presetSuggestion].filter((value) => Boolean(value)).join("\n");
3419
4693
  }
3420
4694
 
3421
4695
  // src/core/run.ts
@@ -4076,9 +5350,141 @@ function prepareInput(raw, config) {
4076
5350
  function escapeRegExp(value) {
4077
5351
  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
4078
5352
  }
4079
- function unique2(values) {
5353
+ function unique3(values) {
4080
5354
  return [...new Set(values)];
4081
5355
  }
5356
+ var genericBucketSearchTerms = /* @__PURE__ */ new Set([
5357
+ "runtimeerror",
5358
+ "typeerror",
5359
+ "error",
5360
+ "exception",
5361
+ "failed",
5362
+ "failure",
5363
+ "visible failure",
5364
+ "failing tests",
5365
+ "setup failures",
5366
+ "runtime failure",
5367
+ "assertion failed",
5368
+ "network",
5369
+ "permission",
5370
+ "configuration"
5371
+ ]);
5372
+ function normalizeSearchTerm(value) {
5373
+ return value.replace(/^['"`]+|['"`]+$/g, "").trim();
5374
+ }
5375
+ function isHighSignalSearchTerm(term) {
5376
+ const normalized = normalizeSearchTerm(term);
5377
+ if (normalized.length < 4) {
5378
+ return false;
5379
+ }
5380
+ const lower = normalized.toLowerCase();
5381
+ if (genericBucketSearchTerms.has(lower)) {
5382
+ return false;
5383
+ }
5384
+ if (/^(runtime|type|assertion|network|permission|configuration)\b/i.test(normalized)) {
5385
+ return false;
5386
+ }
5387
+ return true;
5388
+ }
5389
+ function scoreSearchTerm(term) {
5390
+ const normalized = normalizeSearchTerm(term);
5391
+ let score = normalized.length;
5392
+ if (/^[A-Z][A-Z0-9_]{2,}$/.test(normalized)) {
5393
+ score += 80;
5394
+ }
5395
+ if (/^TS\d+$/.test(normalized)) {
5396
+ score += 70;
5397
+ }
5398
+ if (/^[45]\d\d\b/.test(normalized) || /\bHTTPError:\s*[45]\d\d\b/i.test(normalized)) {
5399
+ score += 60;
5400
+ }
5401
+ if (normalized.includes("/") || normalized.includes("\\")) {
5402
+ score += 50;
5403
+ }
5404
+ if (/\b[A-Za-z0-9_.-]+\.[A-Za-z0-9_.-]+\b/.test(normalized)) {
5405
+ score += 40;
5406
+ }
5407
+ if (/['"`]/.test(term)) {
5408
+ score += 30;
5409
+ }
5410
+ if (normalized.includes("::")) {
5411
+ score += 25;
5412
+ }
5413
+ return score;
5414
+ }
5415
+ function collectCandidateSearchTerms(value) {
5416
+ const candidates = [];
5417
+ const normalized = value.trim();
5418
+ if (!normalized) {
5419
+ return candidates;
5420
+ }
5421
+ for (const match of normalized.matchAll(/['"`]([^'"`]{4,})['"`]/g)) {
5422
+ candidates.push(match[1]);
5423
+ }
5424
+ for (const match of normalized.matchAll(/\b[A-Z][A-Z0-9_]{2,}\b/g)) {
5425
+ candidates.push(match[0]);
5426
+ }
5427
+ for (const match of normalized.matchAll(/\bTS\d+\b/g)) {
5428
+ candidates.push(match[0]);
5429
+ }
5430
+ for (const match of normalized.matchAll(/\bHTTPError:\s*[45]\d\d\b/gi)) {
5431
+ candidates.push(match[0]);
5432
+ }
5433
+ for (const match of normalized.matchAll(/\/[A-Za-z0-9_./:{}-]{4,}/g)) {
5434
+ candidates.push(match[0]);
5435
+ }
5436
+ for (const match of normalized.matchAll(/\b(?:[A-Za-z0-9_.-]+\/)+[A-Za-z0-9_.-]+\b/g)) {
5437
+ candidates.push(match[0]);
5438
+ }
5439
+ for (const match of normalized.matchAll(/\b[A-Za-z0-9_.-]+\.[A-Za-z0-9_.-]+\b/g)) {
5440
+ candidates.push(match[0]);
5441
+ }
5442
+ const detail = normalized.split(":").slice(1).join(":").trim();
5443
+ if (detail.length >= 8) {
5444
+ candidates.push(detail);
5445
+ }
5446
+ return candidates;
5447
+ }
5448
+ function extractBucketSearchTerms(args) {
5449
+ const sources = [
5450
+ args.bucket.root_cause,
5451
+ ...args.bucket.evidence,
5452
+ ...args.readTargets.filter((target) => target.bucket_index === args.bucket.bucket_index).flatMap((target) => [target.context_hint.search_hint ?? "", target.file])
5453
+ ];
5454
+ const prioritized = unique3(
5455
+ sources.flatMap((value) => collectCandidateSearchTerms(value)).filter(isHighSignalSearchTerm)
5456
+ ).sort((left, right) => {
5457
+ const delta = scoreSearchTerm(right) - scoreSearchTerm(left);
5458
+ if (delta !== 0) {
5459
+ return delta;
5460
+ }
5461
+ return left.localeCompare(right);
5462
+ });
5463
+ if (prioritized.length > 0) {
5464
+ return prioritized.slice(0, 6);
5465
+ }
5466
+ const fallbackTerms = unique3(
5467
+ [...args.bucket.evidence, args.bucket.root_cause].flatMap((value) => value.split(/->|:/).map((part) => normalizeSearchTerm(part))).filter(isHighSignalSearchTerm)
5468
+ );
5469
+ return fallbackTerms.slice(0, 4);
5470
+ }
5471
+ function clusterIndexes(indexes, maxGap = 12) {
5472
+ if (indexes.length === 0) {
5473
+ return [];
5474
+ }
5475
+ const clusters = [];
5476
+ let currentCluster = [indexes[0]];
5477
+ for (const index of indexes.slice(1)) {
5478
+ if (index - currentCluster[currentCluster.length - 1] <= maxGap) {
5479
+ currentCluster.push(index);
5480
+ continue;
5481
+ }
5482
+ clusters.push(currentCluster);
5483
+ currentCluster = [index];
5484
+ }
5485
+ clusters.push(currentCluster);
5486
+ return clusters;
5487
+ }
4082
5488
  function buildLineWindows(args) {
4083
5489
  const selected = /* @__PURE__ */ new Set();
4084
5490
  for (const index of args.indexes) {
@@ -4094,11 +5500,17 @@ function buildLineWindows(args) {
4094
5500
  }
4095
5501
  return [...selected].sort((left, right) => left - right).map((index) => args.lines[index]);
4096
5502
  }
5503
+ function buildPriorityLineGroup(args) {
5504
+ return unique3([
5505
+ ...args.indexes.map((index) => args.lines[index]).filter(Boolean),
5506
+ ...buildLineWindows(args)
5507
+ ]);
5508
+ }
4097
5509
  function collapseSelectedLines(args) {
4098
5510
  if (args.lines.length === 0) {
4099
5511
  return args.fallback();
4100
5512
  }
4101
- const joined = unique2(args.lines).join("\n").trim();
5513
+ const joined = unique3(args.lines).join("\n").trim();
4102
5514
  if (joined.length === 0) {
4103
5515
  return args.fallback();
4104
5516
  }
@@ -4242,15 +5654,16 @@ function buildTestStatusRawSlice(args) {
4242
5654
  ) ? index : -1
4243
5655
  ).filter((index) => index >= 0);
4244
5656
  const bucketGroups = args.contract.main_buckets.map((bucket) => {
4245
- const bucketTerms = unique2(
4246
- [bucket.root_cause, ...bucket.evidence].map((value) => value.split(":").at(-1)?.trim() ?? value.trim()).filter((value) => value.length >= 4)
4247
- );
5657
+ const bucketTerms = extractBucketSearchTerms({
5658
+ bucket,
5659
+ readTargets: args.contract.read_targets
5660
+ });
4248
5661
  const indexes = lines.map(
4249
5662
  (line, index) => bucketTerms.some((term) => new RegExp(escapeRegExp(term), "i").test(line)) ? index : -1
4250
5663
  ).filter((index) => index >= 0);
4251
- return unique2([
5664
+ return unique3([
4252
5665
  ...indexes.map((index) => lines[index]).filter(Boolean),
4253
- ...buildLineWindows({
5666
+ ...buildPriorityLineGroup({
4254
5667
  lines,
4255
5668
  indexes,
4256
5669
  radius: 2,
@@ -4258,30 +5671,59 @@ function buildTestStatusRawSlice(args) {
4258
5671
  })
4259
5672
  ]);
4260
5673
  });
4261
- const targetGroups = args.contract.read_targets.map(
4262
- (target) => buildLineWindows({
5674
+ const targetGroups = args.contract.read_targets.flatMap((target) => {
5675
+ const searchHintIndexes = findSearchHintIndexes({
4263
5676
  lines,
4264
- indexes: unique2([
4265
- ...findReadTargetIndexes({
4266
- lines,
4267
- file: target.file,
4268
- line: target.line,
4269
- contextHint: target.context_hint
4270
- }),
4271
- ...findSearchHintIndexes({
4272
- lines,
4273
- searchHint: target.context_hint.search_hint
4274
- })
4275
- ]),
4276
- radius: target.line === null ? 1 : 2,
4277
- maxLines: target.line === null ? 6 : 8
5677
+ searchHint: target.context_hint.search_hint
5678
+ });
5679
+ const fileIndexes = findReadTargetIndexes({
5680
+ lines,
5681
+ file: target.file,
5682
+ line: target.line,
5683
+ contextHint: target.context_hint
5684
+ });
5685
+ const radius = target.line === null ? 1 : 2;
5686
+ const maxLines = target.line === null ? 6 : 8;
5687
+ const groups = [
5688
+ searchHintIndexes.length > 0 ? buildPriorityLineGroup({
5689
+ lines,
5690
+ indexes: searchHintIndexes,
5691
+ radius,
5692
+ maxLines
5693
+ }) : null,
5694
+ fileIndexes.length > 0 ? buildPriorityLineGroup({
5695
+ lines,
5696
+ indexes: fileIndexes,
5697
+ radius,
5698
+ maxLines
5699
+ }) : null
5700
+ ].filter((group) => group !== null && group.length > 0);
5701
+ if (groups.length > 0) {
5702
+ return groups;
5703
+ }
5704
+ return [
5705
+ buildPriorityLineGroup({
5706
+ lines,
5707
+ indexes: unique3([...searchHintIndexes, ...fileIndexes]),
5708
+ radius,
5709
+ maxLines
5710
+ })
5711
+ ];
5712
+ });
5713
+ const failureHeaderIndexes = lines.map((line, index) => /\b(FAILED|ERROR)\b/.test(line) ? index : -1).filter((index) => index >= 0);
5714
+ const failureIndexes = (failureHeaderIndexes.length > 0 ? failureHeaderIndexes : lines.map((line, index) => /^E\s/.test(line) ? index : -1).filter((index) => index >= 0)).filter((index) => index >= 0);
5715
+ const failureHeaderGroups = clusterIndexes(failureIndexes).slice(0, 8).map(
5716
+ (cluster) => buildPriorityLineGroup({
5717
+ lines,
5718
+ indexes: cluster,
5719
+ radius: 1,
5720
+ maxLines: 8
4278
5721
  })
4279
- );
4280
- const failureIndexes = lines.map((line, index) => /\b(FAILED|ERROR)\b/.test(line) || /^E\s/.test(line) ? index : -1).filter((index) => index >= 0);
5722
+ ).filter((group) => group.length > 0);
4281
5723
  const selected = collapseSelectedLineGroups({
4282
5724
  groups: [
4283
5725
  ...targetGroups,
4284
- unique2([
5726
+ unique3([
4285
5727
  ...summaryIndexes.map((index) => lines[index]).filter(Boolean),
4286
5728
  ...buildLineWindows({
4287
5729
  lines,
@@ -4291,12 +5733,14 @@ function buildTestStatusRawSlice(args) {
4291
5733
  })
4292
5734
  ]),
4293
5735
  ...bucketGroups,
4294
- buildLineWindows({
4295
- lines,
4296
- indexes: failureIndexes,
4297
- radius: 1,
4298
- maxLines: 24
4299
- })
5736
+ ...failureHeaderGroups.length > 0 ? failureHeaderGroups : [
5737
+ buildLineWindows({
5738
+ lines,
5739
+ indexes: failureIndexes,
5740
+ radius: 1,
5741
+ maxLines: 24
5742
+ })
5743
+ ]
4300
5744
  ],
4301
5745
  maxInputChars: args.config.maxInputChars,
4302
5746
  fallback: () => truncateInput(args.input, {
@@ -4437,7 +5881,8 @@ function withInsufficientHint(args) {
4437
5881
  return buildInsufficientSignalOutput({
4438
5882
  presetName: args.request.presetName,
4439
5883
  originalLength: args.prepared.meta.originalLength,
4440
- truncatedApplied: args.prepared.meta.truncatedApplied
5884
+ truncatedApplied: args.prepared.meta.truncatedApplied,
5885
+ recognizedRunner: detectTestRunner(args.prepared.redacted)
4441
5886
  });
4442
5887
  }
4443
5888
  async function generateWithRetry(args) {
@@ -4476,6 +5921,34 @@ function hasRecognizableTestStatusSignal(input) {
4476
5921
  const analysis = analyzeTestStatus(input);
4477
5922
  return analysis.collectionErrorCount !== void 0 || analysis.noTestsCollected || analysis.interrupted || analysis.failed > 0 || analysis.errors > 0 || analysis.passed > 0 || analysis.inlineItems.length > 0 || analysis.buckets.length > 0;
4478
5923
  }
5924
+ function shouldUseCompactTestStatusBypass(args) {
5925
+ if (args.request.policyName !== "test-status") {
5926
+ return false;
5927
+ }
5928
+ if (args.request.detail && args.request.detail !== "standard") {
5929
+ return false;
5930
+ }
5931
+ if (args.request.goal === "diagnose" && args.request.format === "json") {
5932
+ return false;
5933
+ }
5934
+ if (args.request.testStatusContext?.resolvedTests?.length || args.request.testStatusContext?.remainingTests?.length || args.request.testStatusContext?.remainingSubsetAvailable || args.request.testStatusContext?.remainingMode && args.request.testStatusContext.remainingMode !== "none") {
5935
+ return false;
5936
+ }
5937
+ return args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0 || args.analysis.collectionErrorCount !== void 0 && args.analysis.collectionItems.length === 0 && args.analysis.inlineItems.length === 0 && args.analysis.buckets.length === 0 || args.analysis.noTestsCollected || args.analysis.interrupted && args.analysis.failed === 0 && args.analysis.errors === 0;
5938
+ }
5939
+ function sanitizeProviderFailureReason(reason) {
5940
+ const normalized = reason.trim();
5941
+ const httpStatus = normalized.match(/\bHTTP\s+(\d{3})\b/i)?.[1];
5942
+ if (httpStatus) {
5943
+ return `provider follow-up unavailable (HTTP ${httpStatus})`;
5944
+ }
5945
+ if (/unterminated string|invalid json|unexpected token|json at position|schema|zod|parse/i.test(
5946
+ normalized
5947
+ )) {
5948
+ return "provider follow-up returned unusable structured output";
5949
+ }
5950
+ return "provider follow-up failed";
5951
+ }
4479
5952
  function renderTestStatusDecisionOutput(args) {
4480
5953
  if (args.request.goal === "diagnose" && args.request.format === "json") {
4481
5954
  return JSON.stringify(
@@ -4497,12 +5970,49 @@ function renderTestStatusDecisionOutput(args) {
4497
5970
  return args.decision.standardText;
4498
5971
  }
4499
5972
  function buildTestStatusProviderFailureDecision(args) {
5973
+ const sanitizedReason = sanitizeProviderFailureReason(args.reason);
5974
+ const concreteReadTarget = args.baseDecision.contract.read_targets.find(
5975
+ (target) => Boolean(target.file)
5976
+ );
5977
+ const hasUnknownBucket = args.baseDecision.contract.main_buckets.some(
5978
+ (bucket) => bucket.root_cause.startsWith("unknown ")
5979
+ );
5980
+ if (concreteReadTarget && !hasUnknownBucket) {
5981
+ return buildTestStatusDiagnoseContract({
5982
+ input: args.input,
5983
+ analysis: args.analysis,
5984
+ resolvedTests: args.baseDecision.contract.resolved_tests,
5985
+ remainingTests: args.baseDecision.contract.remaining_tests,
5986
+ remainingMode: args.request.testStatusContext?.remainingMode,
5987
+ contractOverrides: {
5988
+ ...args.baseDecision.contract,
5989
+ diagnosis_complete: false,
5990
+ raw_needed: false,
5991
+ additional_source_read_likely_low_value: false,
5992
+ read_raw_only_if: null,
5993
+ decision: "read_source",
5994
+ provider_used: true,
5995
+ provider_confidence: null,
5996
+ provider_failed: true,
5997
+ raw_slice_used: args.rawSliceUsed,
5998
+ raw_slice_strategy: args.rawSliceStrategy,
5999
+ next_best_action: {
6000
+ code: "read_source_for_bucket",
6001
+ bucket_index: args.baseDecision.contract.dominant_blocker_bucket_index ?? concreteReadTarget.bucket_index,
6002
+ note: `${sanitizedReason[0].toUpperCase()}${sanitizedReason.slice(
6003
+ 1
6004
+ )}. The heuristic anchor is concrete enough to inspect source for the current bucket before reading raw traceback.`
6005
+ }
6006
+ }
6007
+ });
6008
+ }
4500
6009
  const shouldZoomFirst = args.request.detail !== "verbose";
4501
6010
  return buildTestStatusDiagnoseContract({
4502
6011
  input: args.input,
4503
6012
  analysis: args.analysis,
4504
6013
  resolvedTests: args.baseDecision.contract.resolved_tests,
4505
6014
  remainingTests: args.baseDecision.contract.remaining_tests,
6015
+ remainingMode: args.request.testStatusContext?.remainingMode,
4506
6016
  contractOverrides: {
4507
6017
  ...args.baseDecision.contract,
4508
6018
  diagnosis_complete: false,
@@ -4518,12 +6028,16 @@ function buildTestStatusProviderFailureDecision(args) {
4518
6028
  next_best_action: {
4519
6029
  code: shouldZoomFirst ? "insufficient_signal" : "read_raw_for_exact_traceback",
4520
6030
  bucket_index: args.baseDecision.contract.dominant_blocker_bucket_index ?? args.baseDecision.contract.main_buckets[0]?.bucket_index ?? null,
4521
- note: shouldZoomFirst ? `Provider follow-up failed (${args.reason}). Use one deeper sift pass on the same cached output before reading raw traceback lines.` : `Provider follow-up failed (${args.reason}). Read raw traceback only if exact stack lines are still needed.`
6031
+ note: shouldZoomFirst ? `${sanitizedReason[0].toUpperCase()}${sanitizedReason.slice(
6032
+ 1
6033
+ )}. Use one deeper sift pass on the same cached output before reading raw traceback lines.` : `${sanitizedReason[0].toUpperCase()}${sanitizedReason.slice(
6034
+ 1
6035
+ )}. Read raw traceback only if exact stack lines are still needed.`
4522
6036
  }
4523
6037
  }
4524
6038
  });
4525
6039
  }
4526
- async function runSift(request) {
6040
+ async function runSiftCore(request, recorder) {
4527
6041
  const prepared = prepareInput(request.stdin, request.config.input);
4528
6042
  const heuristicInput = prepared.redacted;
4529
6043
  const heuristicInputTruncated = false;
@@ -4539,23 +6053,28 @@ async function runSift(request) {
4539
6053
  const provider = createProvider(request.config);
4540
6054
  const hasTestStatusSignal = request.policyName === "test-status" && hasRecognizableTestStatusSignal(heuristicInput);
4541
6055
  const testStatusAnalysis = hasTestStatusSignal ? analyzeTestStatus(heuristicInput) : null;
4542
- const testStatusDecision = hasTestStatusSignal && testStatusAnalysis ? buildTestStatusDiagnoseContract({
6056
+ const useCompactTestStatusOutput = hasTestStatusSignal && testStatusAnalysis ? shouldUseCompactTestStatusBypass({
6057
+ request,
6058
+ analysis: testStatusAnalysis
6059
+ }) : false;
6060
+ const testStatusDecision = hasTestStatusSignal && testStatusAnalysis && !useCompactTestStatusOutput ? buildTestStatusDiagnoseContract({
4543
6061
  input: heuristicInput,
4544
6062
  analysis: testStatusAnalysis,
4545
6063
  resolvedTests: request.testStatusContext?.resolvedTests,
4546
- remainingTests: request.testStatusContext?.remainingTests
6064
+ remainingTests: request.testStatusContext?.remainingTests,
6065
+ remainingMode: request.testStatusContext?.remainingMode
4547
6066
  }) : null;
4548
6067
  const testStatusHeuristicOutput = testStatusDecision ? renderTestStatusDecisionOutput({
4549
6068
  request,
4550
6069
  decision: testStatusDecision
4551
- }) : null;
6070
+ }) : useCompactTestStatusOutput ? applyHeuristicPolicy("test-status", heuristicInput, "standard") : null;
4552
6071
  if (request.config.runtime.verbose) {
4553
6072
  process.stderr.write(
4554
6073
  `${pc.dim("sift")} provider=${provider.name} model=${request.config.provider.model} base_url=${request.config.provider.baseUrl} input_chars=${prepared.meta.finalLength}
4555
6074
  `
4556
6075
  );
4557
6076
  }
4558
- const heuristicOutput = request.policyName === "test-status" ? testStatusDecision?.contract.diagnosis_complete ? testStatusHeuristicOutput : null : applyHeuristicPolicy(request.policyName, heuristicInput, request.detail);
6077
+ const heuristicOutput = request.policyName === "test-status" ? useCompactTestStatusOutput ? testStatusHeuristicOutput : testStatusDecision?.contract.diagnosis_complete ? testStatusHeuristicOutput : null : applyHeuristicPolicy(request.policyName, heuristicInput, request.detail);
4559
6078
  if (heuristicOutput) {
4560
6079
  if (request.config.runtime.verbose) {
4561
6080
  process.stderr.write(`${pc.dim("sift")} heuristic=${request.policyName}
@@ -4609,6 +6128,7 @@ async function runSift(request) {
4609
6128
  finalOutput
4610
6129
  });
4611
6130
  }
6131
+ recorder?.heuristic();
4612
6132
  return finalOutput;
4613
6133
  }
4614
6134
  if (testStatusDecision && testStatusAnalysis) {
@@ -4678,6 +6198,7 @@ async function runSift(request) {
4678
6198
  analysis: testStatusAnalysis,
4679
6199
  resolvedTests: request.testStatusContext?.resolvedTests,
4680
6200
  remainingTests: request.testStatusContext?.remainingTests,
6201
+ remainingMode: request.testStatusContext?.remainingMode,
4681
6202
  providerBucketSupplements: supplement.bucket_supplements,
4682
6203
  contractOverrides: {
4683
6204
  diagnosis_complete: supplement.diagnosis_complete,
@@ -4708,6 +6229,7 @@ async function runSift(request) {
4708
6229
  providerInputChars: providerPrepared2.truncated.length,
4709
6230
  providerOutputChars: result.text.length
4710
6231
  });
6232
+ recorder?.provider(result.usage);
4711
6233
  return finalOutput;
4712
6234
  } catch (error) {
4713
6235
  const reason = error instanceof Error ? error.message : "unknown_error";
@@ -4742,6 +6264,7 @@ async function runSift(request) {
4742
6264
  rawSliceChars: rawSlice.text.length,
4743
6265
  providerInputChars: providerPrepared2.truncated.length
4744
6266
  });
6267
+ recorder?.fallback();
4745
6268
  return finalOutput;
4746
6269
  }
4747
6270
  }
@@ -4798,6 +6321,7 @@ async function runSift(request) {
4798
6321
  })) {
4799
6322
  throw new Error("Model output rejected by quality gate");
4800
6323
  }
6324
+ recorder?.provider(result.usage);
4801
6325
  return withInsufficientHint({
4802
6326
  output: normalizeOutput(result.text, providerPrompt.responseMode),
4803
6327
  request,
@@ -4805,6 +6329,7 @@ async function runSift(request) {
4805
6329
  });
4806
6330
  } catch (error) {
4807
6331
  const reason = error instanceof Error ? error.message : "unknown_error";
6332
+ recorder?.fallback();
4808
6333
  return withInsufficientHint({
4809
6334
  output: buildFallbackOutput({
4810
6335
  format: request.format,
@@ -4818,6 +6343,72 @@ async function runSift(request) {
4818
6343
  });
4819
6344
  }
4820
6345
  }
6346
+ async function runSift(request) {
6347
+ return runSiftCore(request);
6348
+ }
6349
+ async function runSiftWithStats(request) {
6350
+ if (request.dryRun) {
6351
+ return {
6352
+ output: await runSiftCore(request),
6353
+ stats: null
6354
+ };
6355
+ }
6356
+ const startedAt = Date.now();
6357
+ let layer = "fallback";
6358
+ let providerCalled = false;
6359
+ let totalTokens = null;
6360
+ const output = await runSiftCore(request, {
6361
+ heuristic() {
6362
+ layer = "heuristic";
6363
+ providerCalled = false;
6364
+ totalTokens = null;
6365
+ },
6366
+ provider(usage) {
6367
+ layer = "provider";
6368
+ providerCalled = true;
6369
+ totalTokens = usage?.totalTokens ?? null;
6370
+ },
6371
+ fallback() {
6372
+ layer = "fallback";
6373
+ providerCalled = true;
6374
+ totalTokens = null;
6375
+ }
6376
+ });
6377
+ return {
6378
+ output,
6379
+ stats: {
6380
+ layer,
6381
+ providerCalled,
6382
+ totalTokens,
6383
+ durationMs: Date.now() - startedAt,
6384
+ presetName: request.presetName
6385
+ }
6386
+ };
6387
+ }
6388
+
6389
+ // src/core/stats.ts
6390
+ import pc2 from "picocolors";
6391
+ function formatDuration(durationMs) {
6392
+ return durationMs >= 1e3 ? `${(durationMs / 1e3).toFixed(1)}s` : `${durationMs}ms`;
6393
+ }
6394
+ function formatStatsFooter(stats) {
6395
+ const duration = formatDuration(stats.durationMs);
6396
+ if (stats.layer === "heuristic") {
6397
+ return `[sift: heuristic \u2022 LLM skipped \u2022 summary ${duration}]`;
6398
+ }
6399
+ if (stats.layer === "provider") {
6400
+ const tokenSegment = stats.totalTokens !== null ? ` \u2022 ${stats.totalTokens} tokens` : "";
6401
+ return `[sift: provider \u2022 LLM used${tokenSegment} \u2022 summary ${duration}]`;
6402
+ }
6403
+ return `[sift: fallback \u2022 provider failed \u2022 summary ${duration}]`;
6404
+ }
6405
+ function emitStatsFooter(args) {
6406
+ if (args.quiet || !args.stats || !process.stderr.isTTY) {
6407
+ return;
6408
+ }
6409
+ process.stderr.write(`${pc2.dim(formatStatsFooter(args.stats))}
6410
+ `);
6411
+ }
4821
6412
 
4822
6413
  // src/core/testStatusState.ts
4823
6414
  import fs from "fs";
@@ -4853,6 +6444,7 @@ var failureBucketTypeSchema = z2.enum([
4853
6444
  "import_dependency_failure",
4854
6445
  "collection_failure",
4855
6446
  "assertion_failure",
6447
+ "golden_output_drift",
4856
6448
  "runtime_failure",
4857
6449
  "interrupted_run",
4858
6450
  "no_tests_collected",
@@ -4893,7 +6485,19 @@ var cachedPytestStateSchema = z2.object({
4893
6485
  failingNodeIds: z2.array(z2.string()),
4894
6486
  remainingNodeIds: z2.array(z2.string()).optional()
4895
6487
  }).optional();
4896
- var cachedRunSchema = z2.object({
6488
+ var testRunnerSchema = z2.enum(["pytest", "vitest", "jest", "unknown"]);
6489
+ var cachedRunnerSubsetSchema = z2.object({
6490
+ available: z2.boolean(),
6491
+ strategy: z2.enum(["pytest-node-ids", "none"]),
6492
+ baseArgv: z2.array(z2.string()).min(1).optional()
6493
+ });
6494
+ var cachedRunnerStateSchema = z2.object({
6495
+ name: testRunnerSchema,
6496
+ failingTargets: z2.array(z2.string()),
6497
+ baselineCommand: cachedCommandSchema,
6498
+ subset: cachedRunnerSubsetSchema
6499
+ });
6500
+ var cachedRunV1Schema = z2.object({
4897
6501
  version: z2.literal(1),
4898
6502
  timestamp: z2.string(),
4899
6503
  presetName: z2.literal("test-status"),
@@ -4911,6 +6515,25 @@ var cachedRunSchema = z2.object({
4911
6515
  analysis: cachedAnalysisSchema,
4912
6516
  pytest: cachedPytestStateSchema
4913
6517
  });
6518
+ var cachedRunV2Schema = z2.object({
6519
+ version: z2.literal(2),
6520
+ timestamp: z2.string(),
6521
+ presetName: z2.literal("test-status"),
6522
+ cwd: z2.string(),
6523
+ commandKey: z2.string(),
6524
+ commandPreview: z2.string(),
6525
+ command: cachedCommandSchema,
6526
+ detail: detailSchema,
6527
+ exitCode: z2.number().int(),
6528
+ rawOutput: z2.string(),
6529
+ capture: z2.object({
6530
+ originalChars: countSchema,
6531
+ truncatedApplied: z2.boolean()
6532
+ }),
6533
+ analysis: cachedAnalysisSchema,
6534
+ runner: cachedRunnerStateSchema
6535
+ });
6536
+ var cachedRunSchema = z2.discriminatedUnion("version", [cachedRunV1Schema, cachedRunV2Schema]);
4914
6537
  var MissingCachedTestStatusRunError = class extends Error {
4915
6538
  constructor() {
4916
6539
  super(
@@ -4959,6 +6582,37 @@ function isPytestExecutable(value) {
4959
6582
  function isPythonExecutable(value) {
4960
6583
  return basenameMatches(value, /^python(?:\d+(?:\.\d+)*)?(?:\.exe)?$/i);
4961
6584
  }
6585
+ function detectRunnerFromCommand(command) {
6586
+ if (!command) {
6587
+ return "unknown";
6588
+ }
6589
+ if (command.mode === "argv") {
6590
+ const [first, second, third] = command.argv;
6591
+ if (first && isPytestExecutable(first)) {
6592
+ return "pytest";
6593
+ }
6594
+ if (first && isPythonExecutable(first) && second === "-m" && third === "pytest") {
6595
+ return "pytest";
6596
+ }
6597
+ if (first && basenameMatches(first, /^vitest(?:\.exe)?$/i)) {
6598
+ return "vitest";
6599
+ }
6600
+ if (first && basenameMatches(first, /^jest(?:\.exe)?$/i)) {
6601
+ return "jest";
6602
+ }
6603
+ return "unknown";
6604
+ }
6605
+ if (/\bpython(?:\d+(?:\.\d+)*)?\s+-m\s+pytest\b|\bpytest\b/i.test(command.shellCommand)) {
6606
+ return "pytest";
6607
+ }
6608
+ if (/\bvitest\b/i.test(command.shellCommand)) {
6609
+ return "vitest";
6610
+ }
6611
+ if (/\bjest\b/i.test(command.shellCommand)) {
6612
+ return "jest";
6613
+ }
6614
+ return "unknown";
6615
+ }
4962
6616
  var shortPytestOptionsWithValue = /* @__PURE__ */ new Set([
4963
6617
  "-c",
4964
6618
  "-k",
@@ -5053,26 +6707,52 @@ function buildCachedCommand(args) {
5053
6707
  }
5054
6708
  return void 0;
5055
6709
  }
5056
- function buildFailingNodeIds(analysis) {
6710
+ function buildFailingTargets(analysis) {
6711
+ const runner = analysis.runner;
5057
6712
  const values = [];
5058
6713
  for (const value of [...analysis.visibleErrorLabels, ...analysis.visibleFailedLabels]) {
5059
- if (value.length > 0 && !values.includes(value)) {
5060
- values.push(value);
6714
+ const normalized = normalizeFailingTarget(value, runner);
6715
+ if (normalized.length > 0 && !values.includes(normalized)) {
6716
+ values.push(normalized);
5061
6717
  }
5062
6718
  }
5063
6719
  return values;
5064
6720
  }
5065
- function buildCachedPytestState(args) {
6721
+ function buildCachedRunnerState(args) {
5066
6722
  const baseArgv = args.command?.mode === "argv" && isSubsetCapablePytestArgv(args.command.argv) ? [...args.command.argv] : void 0;
6723
+ const runnerName = args.analysis.runner !== "unknown" ? args.analysis.runner : detectRunnerFromCommand(args.command);
5067
6724
  return {
5068
- subsetCapable: Boolean(baseArgv),
5069
- baseArgv,
5070
- failingNodeIds: buildFailingNodeIds(args.analysis),
5071
- remainingNodeIds: args.remainingNodeIds
6725
+ name: runnerName,
6726
+ failingTargets: buildFailingTargets(args.analysis),
6727
+ baselineCommand: args.command,
6728
+ subset: {
6729
+ available: runnerName === "pytest" && Boolean(baseArgv),
6730
+ strategy: runnerName === "pytest" && baseArgv ? "pytest-node-ids" : "none",
6731
+ ...runnerName === "pytest" && baseArgv ? { baseArgv } : {}
6732
+ }
5072
6733
  };
5073
6734
  }
6735
+ function normalizeCwd(value) {
6736
+ return path2.resolve(value).replace(/\\/g, "/");
6737
+ }
6738
+ function buildTestStatusBaselineIdentity(args) {
6739
+ const cwd = normalizeCwd(args.cwd);
6740
+ const command = args.command ?? buildCachedCommand({
6741
+ shellCommand: args.shellCommand,
6742
+ command: args.shellCommand ? void 0 : args.commandPreview?.split(" ")
6743
+ });
6744
+ const mode = command?.mode ?? (args.shellCommand ? "shell" : "argv");
6745
+ const normalizedCommand = command?.mode === "argv" ? command.argv.join("") : command?.mode === "shell" ? command.shellCommand.trim().replace(/\s+/g, " ") : (args.commandPreview ?? "").trim().replace(/\s+/g, " ");
6746
+ return [cwd, args.runner, mode, normalizedCommand].join("");
6747
+ }
5074
6748
  function buildTestStatusCommandKey(args) {
5075
- return `${args.shellCommand ? "shell" : "argv"}:${args.commandPreview}`;
6749
+ return buildTestStatusBaselineIdentity({
6750
+ cwd: args.cwd ?? process.cwd(),
6751
+ runner: args.runner ?? "unknown",
6752
+ command: args.command,
6753
+ commandPreview: args.commandPreview,
6754
+ shellCommand: args.shellCommand
6755
+ });
5076
6756
  }
5077
6757
  function snapshotTestStatusAnalysis(analysis) {
5078
6758
  return {
@@ -5098,13 +6778,22 @@ function createCachedTestStatusRun(args) {
5098
6778
  command: args.command,
5099
6779
  shellCommand: args.shellCommand
5100
6780
  });
6781
+ const runnerName = args.analysis.runner !== "unknown" ? args.analysis.runner : detectRunnerFromCommand(command);
6782
+ const commandPreview = args.commandPreview ?? args.shellCommand ?? (args.command ?? []).join(" ");
6783
+ const commandKey = args.commandKey ?? buildTestStatusBaselineIdentity({
6784
+ cwd: args.cwd,
6785
+ runner: runnerName,
6786
+ command,
6787
+ commandPreview,
6788
+ shellCommand: args.shellCommand
6789
+ });
5101
6790
  return {
5102
- version: 1,
6791
+ version: 2,
5103
6792
  timestamp: args.timestamp ?? (/* @__PURE__ */ new Date()).toISOString(),
5104
6793
  presetName: "test-status",
5105
6794
  cwd: args.cwd,
5106
- commandKey: args.commandKey,
5107
- commandPreview: args.commandPreview,
6795
+ commandKey,
6796
+ commandPreview,
5108
6797
  command,
5109
6798
  detail: args.detail,
5110
6799
  exitCode: args.exitCode,
@@ -5114,13 +6803,61 @@ function createCachedTestStatusRun(args) {
5114
6803
  truncatedApplied: args.truncatedApplied
5115
6804
  },
5116
6805
  analysis: snapshotTestStatusAnalysis(args.analysis),
5117
- pytest: buildCachedPytestState({
6806
+ runner: buildCachedRunnerState({
5118
6807
  command,
5119
- analysis: args.analysis,
5120
- remainingNodeIds: args.remainingNodeIds
6808
+ analysis: args.analysis
5121
6809
  })
5122
6810
  };
5123
6811
  }
6812
+ function migrateCachedTestStatusRun(state) {
6813
+ if (state.version === 2) {
6814
+ return state;
6815
+ }
6816
+ const runnerFromOutput = detectTestRunner(state.rawOutput);
6817
+ const runner = runnerFromOutput !== "unknown" ? runnerFromOutput : detectRunnerFromCommand(state.command);
6818
+ const storedCommand = state.command;
6819
+ const fallbackBaseArgv = !storedCommand && state.pytest?.baseArgv ? {
6820
+ mode: "argv",
6821
+ argv: [...state.pytest.baseArgv]
6822
+ } : void 0;
6823
+ const baselineCommand = storedCommand ?? fallbackBaseArgv;
6824
+ const commandPreview = state.commandPreview ?? (baselineCommand?.mode === "argv" ? baselineCommand.argv.join(" ") : baselineCommand?.mode === "shell" ? baselineCommand.shellCommand : "");
6825
+ const commandKey = buildTestStatusBaselineIdentity({
6826
+ cwd: state.cwd,
6827
+ runner,
6828
+ command: baselineCommand,
6829
+ commandPreview
6830
+ });
6831
+ return {
6832
+ version: 2,
6833
+ timestamp: state.timestamp,
6834
+ presetName: state.presetName,
6835
+ cwd: state.cwd,
6836
+ commandKey,
6837
+ commandPreview,
6838
+ command: state.command,
6839
+ detail: state.detail,
6840
+ exitCode: state.exitCode,
6841
+ rawOutput: state.rawOutput,
6842
+ capture: state.capture,
6843
+ analysis: state.analysis,
6844
+ runner: {
6845
+ name: runner,
6846
+ failingTargets: [...new Set((state.pytest?.failingNodeIds ?? []).map(
6847
+ (target) => normalizeFailingTarget(target, runner)
6848
+ ))],
6849
+ baselineCommand,
6850
+ subset: {
6851
+ available: runner === "pytest" && Boolean(state.pytest?.baseArgv),
6852
+ strategy: runner === "pytest" && state.pytest?.baseArgv ? "pytest-node-ids" : "none",
6853
+ ...runner === "pytest" && state.pytest?.baseArgv ? {
6854
+ baseArgv: [...state.pytest.baseArgv]
6855
+ } : {}
6856
+ }
6857
+ },
6858
+ ...fallbackBaseArgv ? { runnerMigrationFallbackUsed: true } : {}
6859
+ };
6860
+ }
5124
6861
  function readCachedTestStatusRun(statePath = getDefaultTestStatusStatePath()) {
5125
6862
  let raw = "";
5126
6863
  try {
@@ -5132,7 +6869,7 @@ function readCachedTestStatusRun(statePath = getDefaultTestStatusStatePath()) {
5132
6869
  throw new InvalidCachedTestStatusRunError();
5133
6870
  }
5134
6871
  try {
5135
- return cachedRunSchema.parse(JSON.parse(raw));
6872
+ return migrateCachedTestStatusRun(cachedRunSchema.parse(JSON.parse(raw)));
5136
6873
  } catch {
5137
6874
  throw new InvalidCachedTestStatusRunError();
5138
6875
  }
@@ -5152,15 +6889,7 @@ function writeCachedTestStatusRun(state, statePath = getDefaultTestStatusStatePa
5152
6889
  `, "utf8");
5153
6890
  }
5154
6891
  function buildTargetDelta(args) {
5155
- if (args.previous.presetName !== "test-status" || args.current.presetName !== "test-status" || args.previous.cwd !== args.current.cwd || args.previous.commandKey !== args.current.commandKey) {
5156
- return {
5157
- comparable: false,
5158
- resolved: [],
5159
- remaining: [],
5160
- introduced: []
5161
- };
5162
- }
5163
- if (!args.previous.pytest || !args.current.pytest) {
6892
+ if (args.previous.presetName !== "test-status" || args.current.presetName !== "test-status" || args.previous.cwd !== args.current.cwd || args.previous.commandKey !== args.current.commandKey || args.previous.runner.name !== args.current.runner.name || args.previous.runner.name === "unknown") {
5164
6893
  return {
5165
6894
  comparable: false,
5166
6895
  resolved: [],
@@ -5168,8 +6897,8 @@ function buildTargetDelta(args) {
5168
6897
  introduced: []
5169
6898
  };
5170
6899
  }
5171
- const previousTargets = args.previous.pytest.failingNodeIds;
5172
- const currentTargets = args.current.pytest.failingNodeIds;
6900
+ const previousTargets = args.previous.runner.failingTargets;
6901
+ const currentTargets = args.current.runner.failingTargets;
5173
6902
  const currentTargetSet = new Set(currentTargets);
5174
6903
  const previousTargetSet = new Set(previousTargets);
5175
6904
  return {
@@ -5182,6 +6911,9 @@ function buildTargetDelta(args) {
5182
6911
  function diffTestStatusTargets(args) {
5183
6912
  return buildTargetDelta(args);
5184
6913
  }
6914
+ function isRemainingSubsetAvailable(state) {
6915
+ return state.runner.name === "pytest" && state.runner.subset.available;
6916
+ }
5185
6917
  function diffTestStatusRuns(args) {
5186
6918
  const targetDelta = buildTargetDelta(args);
5187
6919
  const previousBuckets = new Map(
@@ -5191,21 +6923,45 @@ function diffTestStatusRuns(args) {
5191
6923
  args.current.analysis.buckets.map((bucket) => [buildBucketSignature(bucket), bucket])
5192
6924
  );
5193
6925
  const lines = [];
5194
- if (targetDelta.resolved.length > 0) {
5195
- lines.push(
5196
- `- Resolved: ${formatCount3(targetDelta.resolved.length, "failing test/module", "failing tests/modules")} no longer appear${appendPreview(targetDelta.resolved)}.`
5197
- );
5198
- }
5199
- if (targetDelta.remaining.length > 0) {
5200
- lines.push(
5201
- `- Remaining: ${formatCount3(targetDelta.remaining.length, "failing test/module", "failing tests/modules")} still appear${appendPreview(targetDelta.remaining)}.`
5202
- );
5203
- }
5204
- if (targetDelta.introduced.length > 0) {
6926
+ const resolvedSummary = buildTestTargetSummary(targetDelta.resolved);
6927
+ const remainingSummary = buildTestTargetSummary(targetDelta.remaining);
6928
+ const introducedSummary = buildTestTargetSummary(targetDelta.introduced);
6929
+ const pushTargetLine = (args2) => {
6930
+ if (args2.summary.count === 0) {
6931
+ return;
6932
+ }
6933
+ const summaryText = describeTargetSummary(args2.summary);
6934
+ if (summaryText) {
6935
+ lines.push(
6936
+ `- ${args2.kind}: ${formatCount3(args2.summary.count, args2.countLabel, `${args2.countLabel}s`)} ${args2.verb} ${summaryText}.`
6937
+ );
6938
+ return;
6939
+ }
5205
6940
  lines.push(
5206
- `- New: ${formatCount3(targetDelta.introduced.length, "failing test/module", "failing tests/modules")} appeared${appendPreview(targetDelta.introduced)}.`
6941
+ `- ${args2.kind}: ${formatCount3(args2.summary.count, args2.countLabel, `${args2.countLabel}s`)} ${args2.verb}${appendPreview(args2.fallbackValues)}.`
5207
6942
  );
5208
- }
6943
+ };
6944
+ pushTargetLine({
6945
+ kind: "Resolved",
6946
+ summary: resolvedSummary,
6947
+ countLabel: "failing target",
6948
+ fallbackValues: targetDelta.resolved,
6949
+ verb: "no longer appear"
6950
+ });
6951
+ pushTargetLine({
6952
+ kind: "Remaining",
6953
+ summary: remainingSummary,
6954
+ countLabel: "failing target",
6955
+ fallbackValues: targetDelta.remaining,
6956
+ verb: "still appear"
6957
+ });
6958
+ pushTargetLine({
6959
+ kind: "New",
6960
+ summary: introducedSummary,
6961
+ countLabel: "failing target",
6962
+ fallbackValues: targetDelta.introduced,
6963
+ verb: "appeared"
6964
+ });
5209
6965
  for (const bucket of args.current.analysis.buckets) {
5210
6966
  const signature = buildBucketSignature(bucket);
5211
6967
  const previous = previousBuckets.get(signature);
@@ -5233,8 +6989,7 @@ function diffTestStatusRuns(args) {
5233
6989
  }
5234
6990
  }
5235
6991
  return {
5236
- lines: lines.slice(0, 4),
5237
- remainingNodeIds: targetDelta.comparable ? targetDelta.remaining : void 0
6992
+ lines: lines.slice(0, 4)
5238
6993
  };
5239
6994
  }
5240
6995
 
@@ -5367,8 +7122,9 @@ async function runTestStatusWatch(request, cycles) {
5367
7122
  testStatusContext: {
5368
7123
  ...request.testStatusContext,
5369
7124
  resolvedTests: targetDelta?.resolved ?? request.testStatusContext?.resolvedTests,
5370
- remainingTests: targetDelta?.remaining ?? currentRun.pytest?.failingNodeIds ?? request.testStatusContext?.remainingTests,
5371
- remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable ?? (Boolean(currentRun.pytest?.subsetCapable) && (currentRun.pytest?.failingNodeIds.length ?? 0) > 0)
7125
+ remainingTests: targetDelta?.remaining ?? currentRun.runner.failingTargets ?? request.testStatusContext?.remainingTests,
7126
+ remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable ?? (isRemainingSubsetAvailable(currentRun) && currentRun.runner.failingTargets.length > 0),
7127
+ remainingMode: request.testStatusContext?.remainingMode ?? "none"
5372
7128
  }
5373
7129
  });
5374
7130
  if (request.goal === "diagnose" && request.format === "json") {
@@ -5515,11 +7271,13 @@ async function runExec(request) {
5515
7271
  const shellPath = process.env.SHELL || "/bin/bash";
5516
7272
  const commandPreview = buildCommandPreview(request);
5517
7273
  const commandCwd = request.cwd ?? process.cwd();
5518
- const shouldCacheTestStatusBase = request.presetName === "test-status" && !request.skipCacheWrite;
5519
- const previousCachedRun = shouldCacheTestStatusBase ? tryReadCachedTestStatusRun() : null;
7274
+ const isTestStatusPreset = request.presetName === "test-status";
7275
+ const readCachedBaseline = isTestStatusPreset && (request.readCachedBaseline ?? true);
7276
+ const writeCachedBaselineRequested = isTestStatusPreset && (request.writeCachedBaseline ?? (request.skipCacheWrite ? false : true));
7277
+ const previousCachedRun = readCachedBaseline ? tryReadCachedTestStatusRun() : null;
5520
7278
  if (request.config.runtime.verbose) {
5521
7279
  process.stderr.write(
5522
- `${pc2.dim("sift")} exec mode=${hasShellCommand ? "shell" : "argv"} command=${commandPreview}
7280
+ `${pc3.dim("sift")} exec mode=${hasShellCommand ? "shell" : "argv"} command=${commandPreview}
5523
7281
  `
5524
7282
  );
5525
7283
  }
@@ -5548,7 +7306,7 @@ async function runExec(request) {
5548
7306
  }
5549
7307
  bypassed = true;
5550
7308
  if (request.config.runtime.verbose) {
5551
- process.stderr.write(`${pc2.dim("sift")} bypass=interactive-prompt
7309
+ process.stderr.write(`${pc3.dim("sift")} bypass=interactive-prompt
5552
7310
  `);
5553
7311
  }
5554
7312
  process.stderr.write(capture.render());
@@ -5574,18 +7332,20 @@ async function runExec(request) {
5574
7332
  const capturedOutput = capture.render();
5575
7333
  const autoWatchDetected = !request.watch && looksLikeWatchStream(capturedOutput);
5576
7334
  const useWatchFlow = Boolean(request.watch) || autoWatchDetected;
5577
- const shouldCacheTestStatus = shouldCacheTestStatusBase && !useWatchFlow;
7335
+ const shouldBuildTestStatusState = isTestStatusPreset && !useWatchFlow;
7336
+ const shouldWriteCachedBaseline = writeCachedBaselineRequested && !useWatchFlow;
5578
7337
  if (request.config.runtime.verbose) {
5579
7338
  process.stderr.write(
5580
- `${pc2.dim("sift")} child_exit=${exitCode} captured_chars=${capture.getTotalChars()} capture_truncated=${capture.wasTruncated()}
7339
+ `${pc3.dim("sift")} child_exit=${exitCode} captured_chars=${capture.getTotalChars()} capture_truncated=${capture.wasTruncated()}
5581
7340
  `
5582
7341
  );
5583
7342
  }
5584
7343
  if (autoWatchDetected) {
5585
- process.stderr.write(`${pc2.dim("sift")} auto-watch=detected
7344
+ process.stderr.write(`${pc3.dim("sift")} auto-watch=detected
5586
7345
  `);
5587
7346
  }
5588
7347
  if (!bypassed) {
7348
+ const reductionStartedAt = Date.now();
5589
7349
  if (request.showRaw && capturedOutput.length > 0) {
5590
7350
  process.stderr.write(capturedOutput);
5591
7351
  if (!capturedOutput.endsWith("\n")) {
@@ -5600,12 +7360,22 @@ async function runExec(request) {
5600
7360
  if (execSuccessShortcut && !request.dryRun) {
5601
7361
  if (request.config.runtime.verbose) {
5602
7362
  process.stderr.write(
5603
- `${pc2.dim("sift")} exec_shortcut=${request.presetName}
7363
+ `${pc3.dim("sift")} exec_shortcut=${request.presetName}
5604
7364
  `
5605
7365
  );
5606
7366
  }
5607
7367
  process.stdout.write(`${execSuccessShortcut}
5608
7368
  `);
7369
+ emitStatsFooter({
7370
+ stats: {
7371
+ layer: "heuristic",
7372
+ providerCalled: false,
7373
+ totalTokens: null,
7374
+ durationMs: Date.now() - reductionStartedAt,
7375
+ presetName: request.presetName
7376
+ },
7377
+ quiet: Boolean(request.quiet)
7378
+ });
5609
7379
  return exitCode;
5610
7380
  }
5611
7381
  if (useWatchFlow) {
@@ -5618,17 +7388,27 @@ async function runExec(request) {
5618
7388
  presetName: request.presetName,
5619
7389
  originalLength: capture.getTotalChars(),
5620
7390
  truncatedApplied: capture.wasTruncated(),
5621
- exitCode
7391
+ exitCode,
7392
+ recognizedRunner: detectTestRunner(capturedOutput)
5622
7393
  });
5623
7394
  }
5624
7395
  process.stdout.write(`${output2}
5625
7396
  `);
5626
7397
  return exitCode;
5627
7398
  }
5628
- const analysis = shouldCacheTestStatus ? analyzeTestStatus(capturedOutput) : null;
5629
- let currentCachedRun = shouldCacheTestStatus && analysis ? createCachedTestStatusRun({
7399
+ const analysis = shouldBuildTestStatusState ? analyzeTestStatus(capturedOutput) : null;
7400
+ let currentCachedRun = shouldBuildTestStatusState && analysis ? createCachedTestStatusRun({
5630
7401
  cwd: commandCwd,
5631
7402
  commandKey: buildTestStatusCommandKey({
7403
+ cwd: commandCwd,
7404
+ runner: analysis.runner,
7405
+ command: Array.isArray(request.command) && request.command.length > 0 ? {
7406
+ mode: "argv",
7407
+ argv: [...request.command]
7408
+ } : request.shellCommand ? {
7409
+ mode: "shell",
7410
+ shellCommand: request.shellCommand
7411
+ } : void 0,
5632
7412
  commandPreview,
5633
7413
  shellCommand: request.shellCommand
5634
7414
  }),
@@ -5642,36 +7422,39 @@ async function runExec(request) {
5642
7422
  truncatedApplied: capture.wasTruncated(),
5643
7423
  analysis
5644
7424
  }) : null;
5645
- const targetDelta = request.diff && !request.dryRun && previousCachedRun && currentCachedRun ? diffTestStatusTargets({
7425
+ const targetDelta = (request.diff || request.testStatusContext?.remainingMode === "subset_rerun" || request.testStatusContext?.remainingMode === "full_rerun_diff") && !request.dryRun && previousCachedRun && currentCachedRun ? diffTestStatusTargets({
5646
7426
  previous: previousCachedRun,
5647
7427
  current: currentCachedRun
5648
7428
  }) : null;
5649
- let output = await runSift({
7429
+ const result = await runSiftWithStats({
5650
7430
  ...request,
5651
7431
  stdin: capturedOutput,
5652
- analysisContext: request.skipCacheWrite && request.presetName === "test-status" ? [
7432
+ analysisContext: request.testStatusContext?.remainingMode && request.testStatusContext.remainingMode !== "none" && request.presetName === "test-status" ? [
5653
7433
  request.analysisContext,
5654
7434
  "Zoom context:",
5655
7435
  "- This pass is remaining-only.",
5656
7436
  "- The full-suite truth already exists from the cached full run.",
5657
7437
  "- Do not reintroduce resolved tests into the diagnosis."
5658
7438
  ].filter((value) => Boolean(value)).join("\n") : request.analysisContext,
5659
- testStatusContext: shouldCacheTestStatus && analysis ? {
7439
+ testStatusContext: shouldBuildTestStatusState && analysis ? {
5660
7440
  ...request.testStatusContext,
5661
7441
  resolvedTests: targetDelta?.resolved ?? request.testStatusContext?.resolvedTests,
5662
- remainingTests: targetDelta?.remaining ?? currentCachedRun?.pytest?.failingNodeIds ?? request.testStatusContext?.remainingTests,
7442
+ remainingTests: targetDelta?.remaining ?? currentCachedRun?.runner.failingTargets ?? request.testStatusContext?.remainingTests,
5663
7443
  remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable ?? Boolean(
5664
- currentCachedRun?.pytest?.subsetCapable && (targetDelta?.remaining ?? currentCachedRun?.pytest?.failingNodeIds ?? []).length > 0
5665
- )
7444
+ currentCachedRun && isRemainingSubsetAvailable(currentCachedRun) && (targetDelta?.remaining ?? currentCachedRun?.runner.failingTargets ?? []).length > 0
7445
+ ),
7446
+ remainingMode: request.testStatusContext?.remainingMode ?? "none"
5666
7447
  } : request.testStatusContext
5667
7448
  });
5668
- if (shouldCacheTestStatus) {
7449
+ let output = result.output;
7450
+ if (shouldBuildTestStatusState) {
5669
7451
  if (isInsufficientSignalOutput(output)) {
5670
7452
  output = buildInsufficientSignalOutput({
5671
7453
  presetName: request.presetName,
5672
7454
  originalLength: capture.getTotalChars(),
5673
7455
  truncatedApplied: capture.wasTruncated(),
5674
- exitCode
7456
+ exitCode,
7457
+ recognizedRunner: detectTestRunner(capturedOutput)
5675
7458
  });
5676
7459
  }
5677
7460
  if (request.diff && !request.dryRun && previousCachedRun && currentCachedRun) {
@@ -5679,32 +7462,18 @@ async function runExec(request) {
5679
7462
  previous: previousCachedRun,
5680
7463
  current: currentCachedRun
5681
7464
  });
5682
- currentCachedRun = createCachedTestStatusRun({
5683
- cwd: commandCwd,
5684
- commandKey: currentCachedRun.commandKey,
5685
- commandPreview,
5686
- command: request.command,
5687
- shellCommand: request.shellCommand,
5688
- detail: request.detail ?? "standard",
5689
- exitCode,
5690
- rawOutput: capturedOutput,
5691
- originalChars: capture.getTotalChars(),
5692
- truncatedApplied: capture.wasTruncated(),
5693
- analysis,
5694
- remainingNodeIds: delta.remainingNodeIds
5695
- });
5696
7465
  if (delta.lines.length > 0) {
5697
7466
  output = `${delta.lines.join("\n")}
5698
7467
  ${output}`;
5699
7468
  }
5700
7469
  }
5701
- if (currentCachedRun) {
7470
+ if (currentCachedRun && shouldWriteCachedBaseline) {
5702
7471
  try {
5703
7472
  writeCachedTestStatusRun(currentCachedRun);
5704
7473
  } catch (error) {
5705
7474
  if (request.config.runtime.verbose) {
5706
7475
  const reason = error instanceof Error ? error.message : "unknown_error";
5707
- process.stderr.write(`${pc2.dim("sift")} cache_write=failed reason=${reason}
7476
+ process.stderr.write(`${pc3.dim("sift")} cache_write=failed reason=${reason}
5708
7477
  `);
5709
7478
  }
5710
7479
  }
@@ -5714,11 +7483,16 @@ ${output}`;
5714
7483
  presetName: request.presetName,
5715
7484
  originalLength: capture.getTotalChars(),
5716
7485
  truncatedApplied: capture.wasTruncated(),
5717
- exitCode
7486
+ exitCode,
7487
+ recognizedRunner: detectTestRunner(capturedOutput)
5718
7488
  });
5719
7489
  }
5720
7490
  process.stdout.write(`${output}
5721
7491
  `);
7492
+ emitStatsFooter({
7493
+ stats: result.stats,
7494
+ quiet: Boolean(request.quiet)
7495
+ });
5722
7496
  if (request.failOn && !request.dryRun && exitCode === 0 && supportsFailOnPreset(request.presetName) && evaluateGate({
5723
7497
  presetName: request.presetName,
5724
7498
  output
@@ -6078,5 +7852,6 @@ export {
6078
7852
  normalizeChildExitCode,
6079
7853
  resolveConfig,
6080
7854
  runExec,
6081
- runSift
7855
+ runSift,
7856
+ runSiftWithStats
6082
7857
  };