@bilalimamoglu/sift 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -6,8 +6,14 @@ import pc2 from "picocolors";
6
6
  // src/constants.ts
7
7
  import os from "os";
8
8
  import path from "path";
9
- function getDefaultGlobalConfigPath() {
10
- return path.join(os.homedir(), ".config", "sift", "config.yaml");
9
+ function getDefaultGlobalConfigPath(homeDir = os.homedir()) {
10
+ return path.join(homeDir, ".config", "sift", "config.yaml");
11
+ }
12
+ function getDefaultGlobalStateDir(homeDir = os.homedir()) {
13
+ return path.join(homeDir, ".config", "sift", "state");
14
+ }
15
+ function getDefaultTestStatusStatePath(homeDir = os.homedir()) {
16
+ return path.join(getDefaultGlobalStateDir(homeDir), "last-test-status.json");
11
17
  }
12
18
  function getDefaultConfigSearchPaths() {
13
19
  return [
@@ -53,539 +59,955 @@ function evaluateGate(args) {
53
59
  return { shouldFail: false };
54
60
  }
55
61
 
56
- // src/core/insufficient.ts
57
- function isInsufficientSignalOutput(output) {
58
- const trimmed = output.trim();
59
- return trimmed === INSUFFICIENT_SIGNAL_TEXT || trimmed.startsWith(`${INSUFFICIENT_SIGNAL_TEXT}
60
- Hint:`);
62
+ // src/core/testStatusDecision.ts
63
+ import { z } from "zod";
64
+ var TEST_STATUS_DIAGNOSE_JSON_CONTRACT = '{"status":"ok|insufficient","diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","dominant_blocker_bucket_index":number|null,"provider_used":boolean,"provider_confidence":number|null,"provider_failed":boolean,"raw_slice_used":boolean,"raw_slice_strategy":"none|bucket_evidence|traceback_window|head_tail","resolved_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_subset_available":boolean,"main_buckets":[{"bucket_index":number,"label":string,"count":number,"root_cause":string,"evidence":string[],"bucket_confidence":number,"root_cause_confidence":number,"dominant":boolean,"secondary_visible_despite_blocker":boolean,"mini_diff":{"added_paths"?:number,"removed_models"?:number,"changed_task_mappings"?:number}|null}],"read_targets":[{"file":string,"line":number|null,"why":string,"bucket_index":number,"context_hint":{"start_line":number|null,"end_line":number|null,"search_hint":string|null}}],"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string},"resolved_tests"?:string[],"remaining_tests"?:string[]}';
65
+ var TEST_STATUS_PROVIDER_SUPPLEMENT_JSON_CONTRACT = '{"diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","provider_confidence":number|null,"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string}}';
66
+ var nextBestActionSchema = z.object({
67
+ code: z.enum([
68
+ "fix_dominant_blocker",
69
+ "read_source_for_bucket",
70
+ "read_raw_for_exact_traceback",
71
+ "insufficient_signal"
72
+ ]),
73
+ bucket_index: z.number().int().nullable(),
74
+ note: z.string().min(1)
75
+ });
76
+ var testStatusProviderSupplementSchema = z.object({
77
+ diagnosis_complete: z.boolean(),
78
+ raw_needed: z.boolean(),
79
+ additional_source_read_likely_low_value: z.boolean(),
80
+ read_raw_only_if: z.string().nullable(),
81
+ decision: z.enum(["stop", "zoom", "read_source", "read_raw"]),
82
+ provider_confidence: z.number().min(0).max(1).nullable(),
83
+ next_best_action: nextBestActionSchema
84
+ });
85
+ var testStatusDiagnoseContractSchema = z.object({
86
+ status: z.enum(["ok", "insufficient"]),
87
+ diagnosis_complete: z.boolean(),
88
+ raw_needed: z.boolean(),
89
+ additional_source_read_likely_low_value: z.boolean(),
90
+ read_raw_only_if: z.string().nullable(),
91
+ decision: z.enum(["stop", "zoom", "read_source", "read_raw"]),
92
+ dominant_blocker_bucket_index: z.number().int().nullable(),
93
+ provider_used: z.boolean(),
94
+ provider_confidence: z.number().min(0).max(1).nullable(),
95
+ provider_failed: z.boolean(),
96
+ raw_slice_used: z.boolean(),
97
+ raw_slice_strategy: z.enum(["none", "bucket_evidence", "traceback_window", "head_tail"]),
98
+ resolved_tests: z.array(z.string()),
99
+ remaining_tests: z.array(z.string()),
100
+ main_buckets: z.array(
101
+ z.object({
102
+ bucket_index: z.number().int(),
103
+ label: z.string(),
104
+ count: z.number().int(),
105
+ root_cause: z.string(),
106
+ evidence: z.array(z.string()).max(2),
107
+ bucket_confidence: z.number(),
108
+ root_cause_confidence: z.number(),
109
+ dominant: z.boolean(),
110
+ secondary_visible_despite_blocker: z.boolean(),
111
+ mini_diff: z.object({
112
+ added_paths: z.number().int().optional(),
113
+ removed_models: z.number().int().optional(),
114
+ changed_task_mappings: z.number().int().optional()
115
+ }).nullable()
116
+ })
117
+ ),
118
+ read_targets: z.array(
119
+ z.object({
120
+ file: z.string().min(1),
121
+ line: z.number().int().nullable(),
122
+ why: z.string().min(1),
123
+ bucket_index: z.number().int(),
124
+ context_hint: z.object({
125
+ start_line: z.number().int().nullable(),
126
+ end_line: z.number().int().nullable(),
127
+ search_hint: z.string().nullable()
128
+ })
129
+ })
130
+ ).max(5),
131
+ next_best_action: nextBestActionSchema
132
+ });
133
+ var testStatusTargetSummarySchema = z.object({
134
+ count: z.number().int().nonnegative(),
135
+ families: z.array(
136
+ z.object({
137
+ prefix: z.string().min(1),
138
+ count: z.number().int().nonnegative()
139
+ })
140
+ ).max(5)
141
+ });
142
+ var testStatusPublicDiagnoseContractSchema = testStatusDiagnoseContractSchema.omit({
143
+ resolved_tests: true,
144
+ remaining_tests: true
145
+ }).extend({
146
+ resolved_summary: testStatusTargetSummarySchema,
147
+ remaining_summary: testStatusTargetSummarySchema,
148
+ remaining_subset_available: z.boolean(),
149
+ resolved_tests: z.array(z.string()).optional(),
150
+ remaining_tests: z.array(z.string()).optional()
151
+ });
152
+ function parseTestStatusProviderSupplement(input) {
153
+ return testStatusProviderSupplementSchema.parse(JSON.parse(input));
61
154
  }
62
- function buildInsufficientSignalOutput(input) {
63
- let hint;
64
- if (input.originalLength === 0) {
65
- hint = "Hint: no command output was captured.";
66
- } else if (input.truncatedApplied) {
67
- hint = "Hint: captured output was truncated before a clear summary was found.";
68
- } else if (input.presetName === "test-status" && input.exitCode === 0) {
69
- hint = "Hint: command succeeded, but no recognizable test summary was found.";
70
- } else if (input.presetName === "test-status" && typeof input.exitCode === "number") {
71
- hint = "Hint: command failed, but the captured output did not include a recognizable test summary.";
72
- } else {
73
- hint = "Hint: the captured output did not contain a clear answer for this preset.";
74
- }
75
- return `${INSUFFICIENT_SIGNAL_TEXT}
76
- ${hint}`;
155
+ function formatCount(count, singular, plural = `${singular}s`) {
156
+ return `${count} ${count === 1 ? singular : plural}`;
77
157
  }
78
-
79
- // src/core/run.ts
80
- import pc from "picocolors";
81
-
82
- // src/providers/systemInstruction.ts
83
- var REDUCTION_SYSTEM_INSTRUCTION = "You reduce noisy command output into compact answers for agents and automation.";
84
-
85
- // src/providers/openai.ts
86
- function usesNativeJsonResponseFormat(mode) {
87
- return mode !== "off";
158
+ function unique(values) {
159
+ return [...new Set(values)];
88
160
  }
89
- function extractResponseText(payload) {
90
- if (typeof payload?.output_text === "string") {
91
- return payload.output_text.trim();
161
+ function normalizeTestId(value) {
162
+ return value.replace(/\\/g, "/").trim();
163
+ }
164
+ function extractTestFamilyPrefix(value) {
165
+ const normalized = normalizeTestId(value);
166
+ const testsMatch = normalized.match(/^(tests\/[^/]+\/)/);
167
+ if (testsMatch) {
168
+ return testsMatch[1];
92
169
  }
93
- if (!Array.isArray(payload?.output)) {
94
- return "";
170
+ const filePart = normalized.split("::")[0]?.trim() ?? "";
171
+ if (!filePart.includes("/")) {
172
+ return "other";
95
173
  }
96
- return payload.output.flatMap((item) => Array.isArray(item?.content) ? item.content : []).map((item) => item?.type === "output_text" ? item.text : "").filter((text) => typeof text === "string" && text.trim().length > 0).join("").trim();
174
+ const segments = filePart.replace(/^\/+/, "").split("/").filter(Boolean);
175
+ if (segments.length === 0) {
176
+ return "other";
177
+ }
178
+ return `${segments[0]}/`;
97
179
  }
98
- async function buildOpenAIError(response) {
99
- let detail = `Provider returned HTTP ${response.status}`;
100
- try {
101
- const data = await response.json();
102
- const message = data?.error?.message;
103
- if (typeof message === "string" && message.trim().length > 0) {
104
- detail = `${detail}: ${message.trim()}`;
180
+ function buildTestTargetSummary(values) {
181
+ const counts = /* @__PURE__ */ new Map();
182
+ for (const value of values) {
183
+ const prefix = extractTestFamilyPrefix(value);
184
+ counts.set(prefix, (counts.get(prefix) ?? 0) + 1);
185
+ }
186
+ const families = [...counts.entries()].map(([prefix, count]) => ({
187
+ prefix,
188
+ count
189
+ })).sort((left, right) => {
190
+ if (right.count !== left.count) {
191
+ return right.count - left.count;
105
192
  }
106
- } catch {
193
+ return left.prefix.localeCompare(right.prefix);
194
+ }).slice(0, 5);
195
+ return {
196
+ count: values.length,
197
+ families
198
+ };
199
+ }
200
+ function formatTargetSummary(summary) {
201
+ if (summary.count === 0) {
202
+ return "count=0";
107
203
  }
108
- return new Error(detail);
204
+ const families = summary.families.length > 0 ? summary.families.map((family) => `${family.prefix}${family.count}`).join(", ") : "none";
205
+ return `count=${summary.count}; families=${families}`;
109
206
  }
110
- var OpenAIProvider = class {
111
- name = "openai";
112
- baseUrl;
113
- apiKey;
114
- constructor(options) {
115
- this.baseUrl = options.baseUrl.replace(/\/$/, "");
116
- this.apiKey = options.apiKey;
207
+ function classifyGenericBucketType(reason) {
208
+ if (reason.startsWith("missing test env:")) {
209
+ return "shared_environment_blocker";
117
210
  }
118
- async generate(input) {
119
- const controller = new AbortController();
120
- const timeout = setTimeout(() => controller.abort(), input.timeoutMs);
121
- try {
122
- const url = new URL("responses", `${this.baseUrl}/`);
123
- const response = await fetch(url, {
124
- method: "POST",
125
- signal: controller.signal,
126
- headers: {
127
- "content-type": "application/json",
128
- ...this.apiKey ? { authorization: `Bearer ${this.apiKey}` } : {}
129
- },
130
- body: JSON.stringify({
131
- model: input.model,
132
- instructions: REDUCTION_SYSTEM_INSTRUCTION,
133
- input: input.prompt,
134
- reasoning: {
135
- effort: "minimal"
136
- },
137
- text: {
138
- verbosity: "low",
139
- ...input.responseMode === "json" && usesNativeJsonResponseFormat(input.jsonResponseFormat) ? {
140
- format: {
141
- type: "json_object"
142
- }
143
- } : {}
144
- },
145
- max_output_tokens: input.maxOutputTokens
146
- })
147
- });
148
- if (!response.ok) {
149
- throw await buildOpenAIError(response);
150
- }
151
- const data = await response.json();
152
- const text = extractResponseText(data);
153
- if (!text) {
154
- throw new Error("Provider returned an empty response");
155
- }
156
- const result = {
157
- text,
158
- usage: data?.usage ? {
159
- inputTokens: data.usage.input_tokens,
160
- outputTokens: data.usage.output_tokens,
161
- totalTokens: data.usage.total_tokens
162
- } : void 0,
163
- raw: data
164
- };
165
- clearTimeout(timeout);
166
- return result;
167
- } catch (error) {
168
- clearTimeout(timeout);
169
- if (error.name === "AbortError") {
170
- throw new Error("Provider request timed out");
171
- }
172
- throw error;
173
- }
211
+ if (reason.startsWith("fixture guard:")) {
212
+ return "collection_failure";
174
213
  }
175
- };
176
-
177
- // src/providers/openaiCompatible.ts
178
- function supportsNativeJsonResponseFormat(baseUrl, mode) {
179
- if (mode === "off") {
180
- return false;
214
+ if (reason.startsWith("service unavailable:")) {
215
+ return "runtime_failure";
181
216
  }
182
- if (mode === "on") {
183
- return true;
217
+ if (reason.startsWith("db refused:")) {
218
+ return "runtime_failure";
184
219
  }
185
- return /^https:\/\/api\.openai\.com(?:\/|$)/i.test(baseUrl);
186
- }
187
- function extractMessageText(payload) {
188
- const content = payload?.choices?.[0]?.message?.content;
189
- if (typeof content === "string") {
190
- return content;
220
+ if (reason.startsWith("auth bypass absent:")) {
221
+ return "runtime_failure";
191
222
  }
192
- if (Array.isArray(content)) {
193
- return content.map((item) => typeof item?.text === "string" ? item.text : "").join("").trim();
223
+ if (reason.startsWith("missing module:")) {
224
+ return "import_dependency_failure";
194
225
  }
195
- return "";
226
+ if (reason.startsWith("assertion failed:")) {
227
+ return "assertion_failure";
228
+ }
229
+ if (/^[A-Z][A-Za-z]+(?:Error|Exception):/.test(reason)) {
230
+ return "runtime_failure";
231
+ }
232
+ return "unknown_failure";
196
233
  }
197
- async function buildOpenAICompatibleError(response) {
198
- let detail = `Provider returned HTTP ${response.status}`;
199
- try {
200
- const data = await response.json();
201
- const message = data?.error?.message;
202
- if (typeof message === "string" && message.trim().length > 0) {
203
- detail = `${detail}: ${message.trim()}`;
234
+ function buildGenericBuckets(analysis) {
235
+ const buckets = [];
236
+ const grouped = /* @__PURE__ */ new Map();
237
+ const push = (reason, item) => {
238
+ const key = `${classifyGenericBucketType(reason)}:${reason}`;
239
+ const existing = grouped.get(key);
240
+ if (existing) {
241
+ existing.count += 1;
242
+ if (!existing.representativeItems.some((entry) => entry.label === item.label) && existing.representativeItems.length < 6) {
243
+ existing.representativeItems.push(item);
244
+ }
245
+ return;
204
246
  }
205
- } catch {
247
+ grouped.set(key, {
248
+ type: classifyGenericBucketType(reason),
249
+ headline: "",
250
+ summaryLines: [],
251
+ reason,
252
+ count: 1,
253
+ confidence: reason.startsWith("assertion failed:") || /^[A-Z][A-Za-z]+(?:Error|Exception):/.test(reason) ? 0.74 : 0.62,
254
+ representativeItems: [item],
255
+ entities: [],
256
+ hint: void 0,
257
+ overflowCount: 0,
258
+ overflowLabel: "failing tests/modules"
259
+ });
260
+ };
261
+ for (const item of [...analysis.collectionItems, ...analysis.inlineItems]) {
262
+ push(item.reason, item);
206
263
  }
207
- return new Error(detail);
264
+ for (const bucket of grouped.values()) {
265
+ const title = bucket.type === "assertion_failure" ? "Assertion failures" : bucket.type === "import_dependency_failure" ? "Import/dependency failures" : bucket.type === "collection_failure" ? "Collection or fixture failures" : "Runtime failures";
266
+ bucket.headline = `${title}: ${formatCount(bucket.count, "visible failure")} share ${bucket.reason}.`;
267
+ bucket.summaryLines = [bucket.headline];
268
+ bucket.overflowCount = Math.max(bucket.count - bucket.representativeItems.length, 0);
269
+ buckets.push(bucket);
270
+ }
271
+ return buckets.sort((left, right) => right.count - left.count);
208
272
  }
209
- var OpenAICompatibleProvider = class {
210
- name = "openai-compatible";
211
- baseUrl;
212
- apiKey;
213
- constructor(options) {
214
- this.baseUrl = options.baseUrl.replace(/\/$/, "");
215
- this.apiKey = options.apiKey;
273
+ function normalizeBucketIdentity(bucket) {
274
+ return `${bucket.type}:${bucket.reason.toLowerCase().replace(/\s+/g, " ").trim()}`;
275
+ }
276
+ function mergeRepresentativeItems(left, right) {
277
+ const merged = [...left];
278
+ for (const item of right) {
279
+ if (merged.some(
280
+ (existing) => existing.label === item.label && existing.reason === item.reason
281
+ )) {
282
+ continue;
283
+ }
284
+ if (merged.length >= 6) {
285
+ break;
286
+ }
287
+ merged.push(item);
216
288
  }
217
- async generate(input) {
218
- const controller = new AbortController();
219
- const timeout = setTimeout(() => controller.abort(), input.timeoutMs);
220
- try {
221
- const url = new URL("chat/completions", `${this.baseUrl}/`);
222
- const response = await fetch(url, {
223
- method: "POST",
224
- signal: controller.signal,
225
- headers: {
226
- "content-type": "application/json",
227
- ...this.apiKey ? { authorization: `Bearer ${this.apiKey}` } : {}
228
- },
229
- body: JSON.stringify({
230
- model: input.model,
231
- temperature: input.temperature,
232
- max_tokens: input.maxOutputTokens,
233
- ...input.responseMode === "json" && supportsNativeJsonResponseFormat(this.baseUrl, input.jsonResponseFormat) ? { response_format: { type: "json_object" } } : {},
234
- messages: [
235
- {
236
- role: "system",
237
- content: REDUCTION_SYSTEM_INSTRUCTION
238
- },
239
- {
240
- role: "user",
241
- content: input.prompt
242
- }
243
- ]
244
- })
245
- });
246
- if (!response.ok) {
247
- throw await buildOpenAICompatibleError(response);
248
- }
249
- const data = await response.json();
250
- const text = extractMessageText(data);
251
- if (!text.trim()) {
252
- throw new Error("Provider returned an empty response");
253
- }
254
- const result = {
255
- text,
256
- usage: data?.usage ? {
257
- inputTokens: data.usage.prompt_tokens,
258
- outputTokens: data.usage.completion_tokens,
259
- totalTokens: data.usage.total_tokens
260
- } : void 0,
261
- raw: data
262
- };
263
- clearTimeout(timeout);
264
- return result;
265
- } catch (error) {
266
- clearTimeout(timeout);
267
- if (error.name === "AbortError") {
268
- throw new Error("Provider request timed out");
289
+ return merged;
290
+ }
291
+ function mergeBucketDetails(existing, incoming) {
292
+ const representativeItems = mergeRepresentativeItems(
293
+ existing.representativeItems,
294
+ incoming.representativeItems
295
+ );
296
+ const count = Math.max(existing.count, incoming.count);
297
+ return {
298
+ ...existing,
299
+ headline: existing.summaryLines.length >= incoming.summaryLines.length && existing.headline.length >= incoming.headline.length ? existing.headline : incoming.headline,
300
+ summaryLines: existing.summaryLines.length >= incoming.summaryLines.length ? existing.summaryLines : incoming.summaryLines,
301
+ count,
302
+ confidence: Math.max(existing.confidence, incoming.confidence),
303
+ representativeItems,
304
+ entities: unique([...existing.entities, ...incoming.entities]),
305
+ hint: existing.hint ?? incoming.hint,
306
+ overflowCount: Math.max(
307
+ existing.overflowCount,
308
+ incoming.overflowCount,
309
+ count - representativeItems.length
310
+ ),
311
+ overflowLabel: existing.overflowLabel || incoming.overflowLabel
312
+ };
313
+ }
314
+ function mergeBuckets(analysis) {
315
+ const mergedByIdentity = /* @__PURE__ */ new Map();
316
+ const merged = [];
317
+ const pushBucket = (bucket) => {
318
+ const identity = normalizeBucketIdentity(bucket);
319
+ const existing = mergedByIdentity.get(identity);
320
+ if (existing) {
321
+ const replacement = mergeBucketDetails(existing, bucket);
322
+ const index = merged.indexOf(existing);
323
+ if (index >= 0) {
324
+ merged[index] = replacement;
269
325
  }
270
- throw error;
326
+ mergedByIdentity.set(identity, replacement);
327
+ return;
271
328
  }
329
+ merged.push(bucket);
330
+ mergedByIdentity.set(identity, bucket);
331
+ };
332
+ for (const bucket of analysis.buckets.map((bucket2) => ({
333
+ type: bucket2.type,
334
+ headline: bucket2.headline,
335
+ summaryLines: [...bucket2.summaryLines],
336
+ reason: bucket2.reason,
337
+ count: bucket2.countClaimed ?? bucket2.countVisible,
338
+ confidence: bucket2.confidence,
339
+ representativeItems: [...bucket2.representativeItems],
340
+ entities: [...bucket2.entities],
341
+ hint: bucket2.hint,
342
+ overflowCount: bucket2.overflowCount,
343
+ overflowLabel: bucket2.overflowLabel
344
+ }))) {
345
+ pushBucket(bucket);
272
346
  }
273
- };
274
-
275
- // src/providers/factory.ts
276
- function createProvider(config) {
277
- if (config.provider.provider === "openai") {
278
- return new OpenAIProvider({
279
- baseUrl: config.provider.baseUrl,
280
- apiKey: config.provider.apiKey
281
- });
282
- }
283
- if (config.provider.provider === "openai-compatible") {
284
- return new OpenAICompatibleProvider({
285
- baseUrl: config.provider.baseUrl,
286
- apiKey: config.provider.apiKey
347
+ const coveredLabels = new Set(
348
+ merged.flatMap((bucket) => bucket.representativeItems.map((item) => item.label))
349
+ );
350
+ for (const bucket of buildGenericBuckets(analysis)) {
351
+ const identity = normalizeBucketIdentity(bucket);
352
+ const unseenItems = bucket.representativeItems.filter(
353
+ (item) => !coveredLabels.has(item.label)
354
+ );
355
+ if (!mergedByIdentity.has(identity) && unseenItems.length === 0) {
356
+ continue;
357
+ }
358
+ pushBucket({
359
+ ...bucket,
360
+ count: Math.max(bucket.count, unseenItems.length),
361
+ representativeItems: mergedByIdentity.has(identity) || unseenItems.length === 0 ? bucket.representativeItems : unseenItems
287
362
  });
363
+ for (const item of bucket.representativeItems) {
364
+ coveredLabels.add(item.label);
365
+ }
288
366
  }
289
- throw new Error(`Unsupported provider: ${config.provider.provider}`);
367
+ return merged;
290
368
  }
291
-
292
- // src/prompts/formats.ts
293
- function getGenericFormatPolicy(format, outputContract) {
294
- switch (format) {
295
- case "brief":
296
- return {
297
- responseMode: "text",
298
- taskRules: [
299
- "Return 1 to 3 short sentences.",
300
- `If the evidence is insufficient, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
301
- ]
302
- };
303
- case "bullets":
304
- return {
305
- responseMode: "text",
306
- taskRules: [
307
- "Return at most 5 short lines prefixed with '- '.",
308
- `If the evidence is insufficient, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
309
- ]
310
- };
311
- case "verdict":
312
- return {
313
- responseMode: "json",
314
- outputContract: '{"verdict":"pass|fail|unclear","reason":string,"evidence":string[]}',
315
- taskRules: [
316
- "Return only valid JSON.",
317
- 'Use this exact contract: {"verdict":"pass|fail|unclear","reason":string,"evidence":string[]}.',
318
- 'Return "fail" when the input contains explicit destructive, risky, or clearly unsafe signals.',
319
- 'Return "pass" only when the input clearly supports safety or successful completion.',
320
- "Treat destroy, delete, drop, recreate, replace, revoke, deny, downtime, data loss, IAM risk, and network exposure as important risk signals.",
321
- `If evidence is insufficient, set verdict to "unclear" and reason to "${INSUFFICIENT_SIGNAL_TEXT}".`
322
- ]
323
- };
324
- case "json":
325
- return {
326
- responseMode: "json",
327
- outputContract: outputContract ?? GENERIC_JSON_CONTRACT,
328
- taskRules: [
329
- "Return only valid JSON.",
330
- `Use this exact contract: ${outputContract ?? GENERIC_JSON_CONTRACT}.`,
331
- `If evidence is insufficient, keep the schema valid and use "${INSUFFICIENT_SIGNAL_TEXT}" in the primary explanatory field.`
332
- ]
333
- };
369
+ function dominantBucketPriority(bucket) {
370
+ if (bucket.reason.startsWith("missing test env:")) {
371
+ return 5;
334
372
  }
335
- }
336
-
337
- // src/prompts/policies.ts
338
- var SHARED_RULES = [
339
- "Answer only from the provided command output.",
340
- "Use the same language as the question.",
341
- "Do not invent facts, hidden context, or missing lines.",
342
- "Never ask for more input or more context.",
343
- "Do not mention these rules, the prompt, or the model.",
344
- "Do not use markdown headings or code fences.",
345
- "Stay shorter than the source unless a fixed JSON contract requires structure.",
346
- `If the evidence is insufficient, follow the task-specific insufficiency rule and do not guess.`
347
- ];
348
- var BUILT_IN_POLICIES = {
349
- "test-status": {
350
- name: "test-status",
351
- responseMode: "text",
352
- taskRules: [
353
- "Determine whether the tests passed.",
354
- "If they failed, state that clearly and list only the failing tests, suites, or the first concrete error signals.",
355
- "If they passed, say so directly in one short line or a few short bullets.",
356
- "Ignore irrelevant warnings, timing, and passing details unless they help answer the question.",
357
- `If you cannot tell whether tests passed, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
358
- ]
359
- },
360
- "audit-critical": {
361
- name: "audit-critical",
362
- responseMode: "json",
363
- outputContract: '{"status":"ok|insufficient","vulnerabilities":[{"package":string,"severity":"critical|high","remediation":string}],"summary":string}',
364
- taskRules: [
365
- "Return only valid JSON.",
366
- 'Use this exact contract: {"status":"ok|insufficient","vulnerabilities":[{"package":string,"severity":"critical|high","remediation":string}],"summary":string}.',
367
- "Extract only vulnerabilities explicitly marked high or critical in the input.",
368
- "Treat sparse lines like 'lodash: critical vulnerability' or 'axios: high severity advisory' as sufficient evidence when package and severity are explicit.",
369
- "Do not invent package names, severities, CVEs, or remediations.",
370
- 'If the input clearly contains no qualifying vulnerabilities, return {"status":"ok","vulnerabilities":[],"summary":"No high or critical vulnerabilities found in the provided input."}.',
371
- `If the input does not provide enough evidence to determine vulnerability status, return status "insufficient" and use "${INSUFFICIENT_SIGNAL_TEXT}" in summary.`
372
- ]
373
- },
374
- "diff-summary": {
375
- name: "diff-summary",
376
- responseMode: "json",
377
- outputContract: '{"status":"ok|insufficient","answer":string,"evidence":string[],"risks":string[]}',
378
- taskRules: [
379
- "Return only valid JSON.",
380
- 'Use this exact contract: {"status":"ok|insufficient","answer":string,"evidence":string[],"risks":string[]}.',
381
- "Summarize what changed at a high level, grounded only in the visible diff or output.",
382
- "Evidence should cite the most important visible files, modules, resources, or actions.",
383
- "Risks should include migrations, config changes, security changes, destructive actions, or unknown impact when visible.",
384
- `If the change signal is incomplete, return status "insufficient" and use "${INSUFFICIENT_SIGNAL_TEXT}" in answer.`
385
- ]
386
- },
387
- "build-failure": {
388
- name: "build-failure",
389
- responseMode: "text",
390
- taskRules: [
391
- "Identify the most likely root cause of the build failure.",
392
- "Give the first concrete fix or next step in the same answer.",
393
- "Keep the response to 1 or 2 short sentences.",
394
- `If the root cause is not visible, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
395
- ]
396
- },
397
- "log-errors": {
398
- name: "log-errors",
399
- responseMode: "text",
400
- taskRules: [
401
- "Return at most 5 short bullet points.",
402
- "Extract only the most relevant error or failure signals.",
403
- "Prefer recurring or top-level errors over long stack traces.",
404
- "Do not dump full traces unless a single trace line is the key signal.",
405
- `If there is no clear error signal, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
406
- ]
407
- },
408
- "typecheck-summary": {
409
- name: "typecheck-summary",
410
- responseMode: "text",
411
- taskRules: [
412
- "Return at most 5 short bullet points.",
413
- "Determine whether the typecheck failed or passed.",
414
- "Group repeated diagnostics into root-cause buckets instead of echoing many duplicate lines.",
415
- "Mention the first concrete files, symbols, or error categories to fix when they are visible.",
416
- "Prefer compiler or type-system errors over timing, progress, or summary noise.",
417
- "If the output clearly indicates success, say that briefly and do not add extra bullets.",
418
- `If you cannot tell whether the typecheck failed, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
419
- ]
420
- },
421
- "lint-failures": {
422
- name: "lint-failures",
423
- responseMode: "text",
424
- taskRules: [
425
- "Return at most 5 short bullet points.",
426
- "Determine whether lint failed or whether there are no blocking lint failures.",
427
- "Group repeated rule violations instead of listing the same rule many times.",
428
- "Mention the top offending files and rule names when they are visible.",
429
- "Distinguish blocking failures from warnings only when that distinction is clearly visible in the input.",
430
- "Do not invent autofixability; only mention autofix or --fix support when the tool output explicitly says so.",
431
- "If the output clearly indicates success or no blocking failures, say that briefly and stop.",
432
- `If there is not enough evidence to determine the lint result, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
433
- ]
434
- },
435
- "infra-risk": {
436
- name: "infra-risk",
437
- responseMode: "json",
438
- outputContract: '{"verdict":"pass|fail|unclear","reason":string,"evidence":string[]}',
439
- taskRules: [
440
- "Return only valid JSON.",
441
- 'Use this exact contract: {"verdict":"pass|fail|unclear","reason":string,"evidence":string[]}.',
442
- 'Return "fail" when the input contains explicit destructive or clearly risky signals such as destroy, delete, drop, recreate, replace, revoke, deny, downtime, data loss, IAM risk, or network exposure.',
443
- 'Treat short plan summaries like "1 to destroy" or "resources to destroy" as enough evidence for "fail".',
444
- 'Return "pass" only when the input clearly shows no risky changes or explicitly safe behavior.',
445
- 'Return "unclear" when the input is incomplete, ambiguous, or does not show enough evidence to judge safety.',
446
- "Evidence should contain the shortest concrete lines or phrases that justify the verdict."
447
- ]
373
+ if (bucket.type === "shared_environment_blocker") {
374
+ return 4;
448
375
  }
449
- };
450
- function resolvePromptPolicy(args) {
451
- if (args.policyName) {
452
- const policy = BUILT_IN_POLICIES[args.policyName];
453
- return {
454
- ...policy,
455
- sharedRules: SHARED_RULES
456
- };
376
+ if (bucket.type === "import_dependency_failure") {
377
+ return 3;
457
378
  }
458
- const genericPolicy = getGenericFormatPolicy(args.format, args.outputContract);
459
- return {
460
- name: `generic-${args.format}`,
461
- responseMode: genericPolicy.responseMode,
462
- outputContract: genericPolicy.outputContract,
463
- sharedRules: SHARED_RULES,
464
- taskRules: genericPolicy.taskRules
465
- };
379
+ if (bucket.type === "collection_failure") {
380
+ return 2;
381
+ }
382
+ if (bucket.type === "contract_snapshot_drift") {
383
+ return 1;
384
+ }
385
+ return 0;
466
386
  }
467
-
468
- // src/prompts/buildPrompt.ts
469
- function buildPrompt(args) {
470
- const policy = resolvePromptPolicy({
471
- format: args.format,
472
- policyName: args.policyName,
473
- outputContract: args.outputContract
387
+ function prioritizeBuckets(buckets) {
388
+ return [...buckets].sort((left, right) => {
389
+ const priorityDelta = dominantBucketPriority(right) - dominantBucketPriority(left);
390
+ if (priorityDelta !== 0) {
391
+ return priorityDelta;
392
+ }
393
+ if (right.count !== left.count) {
394
+ return right.count - left.count;
395
+ }
396
+ if (right.confidence !== left.confidence) {
397
+ return right.confidence - left.confidence;
398
+ }
399
+ return left.reason.localeCompare(right.reason);
474
400
  });
475
- const detailRules = args.policyName === "test-status" && args.detail === "focused" ? [
476
- "Use a focused failure view.",
477
- "When the output clearly maps failures to specific tests or modules, group them by dominant error type first.",
478
- "Within each error group, prefer compact bullets in the form '- test-or-module -> dominant reason'.",
479
- "Cap focused entries at 6 per error group and end with '- and N more failing modules' if more clear mappings are visible.",
480
- "If per-test or per-module mapping is unclear, fall back to grouped root causes instead of guessing."
481
- ] : args.policyName === "test-status" && args.detail === "verbose" ? [
482
- "Use a verbose failure view.",
483
- "When the output clearly maps failures to specific tests or modules, list each visible failing test or module on its own line in the form '- test-or-module -> normalized reason'.",
484
- "Preserve the original file or module order when the mapping is visible.",
485
- "Prefer concrete normalized reasons such as missing modules or assertion failures over traceback plumbing.",
486
- "If per-test or per-module mapping is unclear, fall back to the focused grouped-cause view instead of guessing."
487
- ] : [];
488
- const prompt = [
489
- "You are Sift, a CLI output reduction assistant for downstream agents and automation.",
490
- "Hard rules:",
491
- ...policy.sharedRules.map((rule) => `- ${rule}`),
492
- "",
493
- `Task policy: ${policy.name}`,
494
- ...policy.taskRules.map((rule) => `- ${rule}`),
495
- ...detailRules.map((rule) => `- ${rule}`),
496
- ...policy.outputContract ? ["", `Output contract: ${policy.outputContract}`] : [],
497
- "",
498
- `Question: ${args.question}`,
499
- "",
500
- "Command output:",
501
- '"""',
502
- args.input,
503
- '"""'
504
- ].join("\n");
505
- return {
506
- prompt,
507
- responseMode: policy.responseMode
508
- };
509
- }
510
-
511
- // src/core/quality.ts
512
- var META_PATTERNS = [
513
- /please provide/i,
514
- /need more (?:input|context|information|details)/i,
515
- /provided command output/i,
516
- /based on the provided/i,
517
- /as an ai/i,
518
- /here(?:'s| is) (?:the )?(?:json|answer)/i,
519
- /cannot determine without/i
520
- ];
521
- function normalizeForComparison(input) {
522
- return input.replace(/\r\n/g, "\n").replace(/\r/g, "\n").replace(/\s+/g, " ").trim();
523
401
  }
524
- function isRetriableReason(reason) {
525
- return /timed out|http 408|http 409|http 425|http 429|http 5\d\d|network/i.test(
526
- reason.toLowerCase()
527
- );
402
+ function isDominantBlockerType(type) {
403
+ return type === "shared_environment_blocker" || type === "import_dependency_failure" || type === "collection_failure";
528
404
  }
529
- function looksLikeRejectedModelOutput(args) {
530
- const source = normalizeForComparison(args.source);
531
- const candidate = normalizeForComparison(args.candidate);
532
- if (!candidate) {
533
- return true;
405
+ function labelForBucket(bucket) {
406
+ if (bucket.reason.startsWith("missing test env:")) {
407
+ return "missing test env";
534
408
  }
535
- if (candidate === INSUFFICIENT_SIGNAL_TEXT) {
536
- return false;
409
+ if (bucket.reason.startsWith("fixture guard:")) {
410
+ return "fixture guard";
537
411
  }
538
- if (candidate.includes("```")) {
539
- return true;
412
+ if (bucket.reason.startsWith("service unavailable:")) {
413
+ return "service unavailable";
540
414
  }
541
- if (META_PATTERNS.some((pattern) => pattern.test(candidate))) {
542
- return true;
415
+ if (bucket.reason.startsWith("db refused:")) {
416
+ return "db refused";
543
417
  }
544
- if (args.responseMode === "json") {
545
- const trimmed = args.candidate.trim();
546
- if (!trimmed.startsWith("{") && !trimmed.startsWith("[")) {
547
- return true;
548
- }
418
+ if (bucket.reason.startsWith("auth bypass absent:")) {
419
+ return "auth bypass absent";
549
420
  }
550
- if (source.length >= 800 && candidate.length > source.length * 0.8) {
551
- return true;
421
+ if (bucket.type === "contract_snapshot_drift") {
422
+ if (/openapi/i.test(bucket.headline) || bucket.entities.some((value) => value.startsWith("/api/"))) {
423
+ return "route drift";
424
+ }
425
+ if (/schema/i.test(bucket.headline)) {
426
+ return "schema freeze mismatch";
427
+ }
428
+ if (/model/i.test(bucket.headline)) {
429
+ return "model catalog drift";
430
+ }
431
+ return "stale snapshot";
552
432
  }
553
- if (source.length > 0 && source.length < 800 && candidate.length > source.length + 160) {
554
- return true;
433
+ if (bucket.type === "import_dependency_failure") {
434
+ return "import dependency failure";
555
435
  }
556
- return false;
436
+ if (bucket.type === "assertion_failure") {
437
+ return "assertion failure";
438
+ }
439
+ if (bucket.type === "collection_failure") {
440
+ return "collection failure";
441
+ }
442
+ if (bucket.type === "runtime_failure") {
443
+ return "runtime failure";
444
+ }
445
+ return "unknown failure";
557
446
  }
558
-
559
- // src/core/fallback.ts
560
- var RAW_FALLBACK_SLICE = 1200;
561
- function buildStructuredError(reason) {
447
+ function rootCauseConfidenceFor(bucket) {
448
+ if (bucket.reason.startsWith("missing test env:") || bucket.reason.startsWith("missing module:") || bucket.reason.startsWith("db refused:") || bucket.reason.startsWith("service unavailable:") || bucket.reason.startsWith("auth bypass absent:")) {
449
+ return 0.95;
450
+ }
451
+ if (bucket.type === "contract_snapshot_drift") {
452
+ return bucket.entities.length > 0 ? 0.92 : 0.76;
453
+ }
454
+ return Math.max(0.6, Math.min(bucket.confidence, 0.88));
455
+ }
456
+ function buildBucketEvidence(bucket) {
457
+ const evidence = bucket.representativeItems.slice(0, 2).map((item) => `${item.label} -> ${item.reason}`);
458
+ if (evidence.length > 0) {
459
+ return evidence;
460
+ }
461
+ return bucket.entities.slice(0, 2);
462
+ }
463
+ function formatReadTargetLocation(target) {
464
+ return target.line === null ? target.file : `${target.file}:${target.line}`;
465
+ }
466
+ function buildReadTargetContextHint(args) {
467
+ if (args.anchor.line !== null) {
468
+ return {
469
+ start_line: Math.max(1, args.anchor.line - 5),
470
+ end_line: args.anchor.line + 5,
471
+ search_hint: null
472
+ };
473
+ }
562
474
  return {
563
- status: "error",
564
- reason,
565
- retriable: isRetriableReason(reason)
475
+ start_line: null,
476
+ end_line: null,
477
+ search_hint: buildReadTargetSearchHint(args.bucket, args.anchor)
566
478
  };
567
479
  }
568
- function buildFallbackOutput(args) {
569
- if (args.format === "verdict") {
570
- return JSON.stringify(
480
+ function buildReadTargetWhy(args) {
481
+ const envVar = args.bucket.reason.match(/^missing test env:\s+([A-Z][A-Z0-9_]{2,})$/)?.[1];
482
+ if (envVar) {
483
+ return `it contains the ${envVar} setup guard`;
484
+ }
485
+ if (args.bucket.reason.startsWith("fixture guard:")) {
486
+ return "it contains the fixture/setup guard behind this bucket";
487
+ }
488
+ if (args.bucket.reason.startsWith("db refused:")) {
489
+ return "it contains the database connection setup behind this bucket";
490
+ }
491
+ if (args.bucket.reason.startsWith("service unavailable:")) {
492
+ return "it contains the dependency service call or setup behind this bucket";
493
+ }
494
+ if (args.bucket.reason.startsWith("auth bypass absent:")) {
495
+ return "it contains the auth bypass setup behind this bucket";
496
+ }
497
+ if (args.bucket.type === "contract_snapshot_drift") {
498
+ if (args.bucketLabel === "route drift") {
499
+ return "it maps to the visible route drift bucket";
500
+ }
501
+ if (args.bucketLabel === "model catalog drift") {
502
+ return "it maps to the visible model drift bucket";
503
+ }
504
+ if (args.bucketLabel === "schema freeze mismatch") {
505
+ return "it maps to the visible schema freeze mismatch";
506
+ }
507
+ return "it maps to the visible stale snapshot expectation";
508
+ }
509
+ if (args.bucket.type === "import_dependency_failure") {
510
+ return "it is the first visible failing module in this missing dependency bucket";
511
+ }
512
+ if (args.bucket.type === "assertion_failure") {
513
+ return "it is the first visible failing test in this bucket";
514
+ }
515
+ if (args.bucket.type === "collection_failure") {
516
+ return "it is the first visible collection/setup anchor for this bucket";
517
+ }
518
+ return `it maps to the visible ${args.bucketLabel} bucket`;
519
+ }
520
+ function buildReadTargetSearchHint(bucket, anchor) {
521
+ const envVar = bucket.reason.match(/^missing test env:\s+([A-Z][A-Z0-9_]{2,})$/)?.[1];
522
+ if (envVar) {
523
+ return envVar;
524
+ }
525
+ if (bucket.type === "contract_snapshot_drift") {
526
+ return bucket.entities.find((value) => value.startsWith("/api/")) ?? bucket.entities[0] ?? null;
527
+ }
528
+ const missingModule = bucket.reason.match(/^missing module:\s+(.+)$/)?.[1];
529
+ if (missingModule) {
530
+ return missingModule;
531
+ }
532
+ const fixtureGuard = bucket.reason.match(/^fixture guard:\s+(.+)$/)?.[1];
533
+ if (fixtureGuard) {
534
+ return fixtureGuard;
535
+ }
536
+ const serviceMarker = bucket.reason.match(
537
+ /^(?:service unavailable|db refused|auth bypass absent):\s+(.+)$/
538
+ )?.[1];
539
+ if (serviceMarker) {
540
+ return serviceMarker;
541
+ }
542
+ const assertionText = bucket.reason.match(/^assertion failed:\s+(.+)$/)?.[1];
543
+ if (assertionText) {
544
+ return assertionText;
545
+ }
546
+ const fallbackLabel = anchor.label.split("::")[1]?.trim();
547
+ return fallbackLabel || null;
548
+ }
549
+ function buildReadTargets(args) {
550
+ return args.buckets.map((bucket, index) => ({
551
+ bucket,
552
+ bucketIndex: index + 1,
553
+ bucketLabel: labelForBucket(bucket),
554
+ dominant: args.dominantBucketIndex === index + 1
555
+ })).sort((left, right) => {
556
+ if (left.dominant !== right.dominant) {
557
+ return left.dominant ? -1 : 1;
558
+ }
559
+ return left.bucketIndex - right.bucketIndex;
560
+ }).flatMap(({ bucket, bucketIndex, bucketLabel }) => {
561
+ const anchor = [...bucket.representativeItems].filter((item) => item.file).sort((left, right) => {
562
+ if (left.line !== null !== (right.line !== null)) {
563
+ return left.line !== null ? -1 : 1;
564
+ }
565
+ if (right.anchor_confidence !== left.anchor_confidence) {
566
+ return right.anchor_confidence - left.anchor_confidence;
567
+ }
568
+ return left.label.localeCompare(right.label);
569
+ })[0];
570
+ if (!anchor?.file) {
571
+ return [];
572
+ }
573
+ return [
571
574
  {
572
- ...buildStructuredError(args.reason),
573
- verdict: "unclear",
574
- reason: `Sift fallback: ${args.reason}`,
575
- evidence: []
576
- },
577
- null,
578
- 2
575
+ file: anchor.file,
576
+ line: anchor.line,
577
+ why: buildReadTargetWhy({
578
+ bucket,
579
+ bucketLabel
580
+ }),
581
+ bucket_index: bucketIndex,
582
+ context_hint: buildReadTargetContextHint({
583
+ bucket,
584
+ anchor
585
+ })
586
+ }
587
+ ];
588
+ }).slice(0, 5);
589
+ }
590
+ function buildConcreteNextNote(args) {
591
+ const primaryTarget = args.readTargets.find((target) => target.bucket_index === args.nextBestAction.bucket_index) ?? args.readTargets[0];
592
+ if (!primaryTarget) {
593
+ return args.nextBestAction.note;
594
+ }
595
+ const lead = primaryTarget.context_hint.start_line !== null && primaryTarget.context_hint.end_line !== null ? `Read ${primaryTarget.file} lines ${primaryTarget.context_hint.start_line}-${primaryTarget.context_hint.end_line} first; ${primaryTarget.why}.` : primaryTarget.context_hint.search_hint ? `Search for ${primaryTarget.context_hint.search_hint} in ${primaryTarget.file} first; ${primaryTarget.why}.` : `Read ${formatReadTargetLocation(primaryTarget)} first; ${primaryTarget.why}.`;
596
+ if (args.nextBestAction.code === "fix_dominant_blocker") {
597
+ if (args.nextBestAction.bucket_index === 1 && args.hasSecondaryVisibleBucket) {
598
+ return "Fix bucket 1 first, then rerun the full suite at standard. Secondary buckets are already visible behind it.";
599
+ }
600
+ return `Fix bucket ${args.nextBestAction.bucket_index ?? 1} first, then rerun the full suite at standard.`;
601
+ }
602
+ if (args.nextBestAction.code === "read_source_for_bucket") {
603
+ return lead;
604
+ }
605
+ return args.nextBestAction.note;
606
+ }
607
+ function extractMiniDiff(input, bucket) {
608
+ if (bucket.type !== "contract_snapshot_drift") {
609
+ return null;
610
+ }
611
+ const addedPaths = unique(
612
+ [...input.matchAll(/[+-]\s+'(\/api\/[^']+)'/g)].map((match) => match[1])
613
+ ).length;
614
+ const removedModels = unique(
615
+ [...input.matchAll(/[+-]\s+'([A-Za-z0-9._/-]+-[A-Za-z0-9._-]+)'/g)].map((match) => match[1])
616
+ ).length;
617
+ const changedTaskMappings = unique(
618
+ [...input.matchAll(/[+-]\s+'([a-z]+(?:_[a-z0-9]+)+)'/g)].map((match) => match[1])
619
+ ).length;
620
+ if (addedPaths === 0 && removedModels === 0 && changedTaskMappings === 0) {
621
+ return null;
622
+ }
623
+ return {
624
+ ...addedPaths > 0 ? { added_paths: addedPaths } : {},
625
+ ...removedModels > 0 ? { removed_models: removedModels } : {},
626
+ ...changedTaskMappings > 0 ? { changed_task_mappings: changedTaskMappings } : {}
627
+ };
628
+ }
629
+ function buildOutcomeLines(analysis) {
630
+ if (analysis.noTestsCollected) {
631
+ return ["- Tests did not run.", "- Collected 0 items."];
632
+ }
633
+ if (analysis.failed === 0 && analysis.errors === 0 && analysis.passed > 0) {
634
+ const parts = [formatCount(analysis.passed, "test")];
635
+ if (analysis.skipped > 0) {
636
+ parts.push(formatCount(analysis.skipped, "skip"));
637
+ }
638
+ return ["- Tests passed.", `- ${parts.join(", ")}.`];
639
+ }
640
+ if (analysis.collectionErrorCount && analysis.failed === 0) {
641
+ return [
642
+ "- Tests did not complete.",
643
+ `- ${formatCount(analysis.collectionErrorCount, "error")} occurred during collection.`
644
+ ];
645
+ }
646
+ const counts = [];
647
+ if (analysis.failed > 0) {
648
+ counts.push(formatCount(analysis.failed, "test failed", "tests failed"));
649
+ }
650
+ if (analysis.errors > 0) {
651
+ counts.push(formatCount(analysis.errors, "error occurred", "errors occurred"));
652
+ }
653
+ if (counts.length === 0) {
654
+ return ["- Tests did not pass."];
655
+ }
656
+ return ["- Tests did not pass.", `- ${counts.join(". ")}.`];
657
+ }
658
+ function buildStopSignal(contract) {
659
+ if (contract.diagnosis_complete && !contract.raw_needed) {
660
+ return "- Stop signal: diagnosis complete; raw not needed.";
661
+ }
662
+ if (contract.raw_needed && contract.read_raw_only_if) {
663
+ return `- Stop signal: diagnosis incomplete; raw only if ${contract.read_raw_only_if}.`;
664
+ }
665
+ return "- Stop signal: diagnosis incomplete; provider or raw traceback may still help.";
666
+ }
667
+ function deriveDecision(contract) {
668
+ if (contract.raw_needed || contract.provider_failed) {
669
+ return "read_raw";
670
+ }
671
+ if (!contract.diagnosis_complete) {
672
+ return "zoom";
673
+ }
674
+ if (contract.main_buckets.length === 0 && contract.next_best_action.note === "No failing buckets remain.") {
675
+ return "stop";
676
+ }
677
+ if (contract.next_best_action.code === "read_source_for_bucket") {
678
+ return "read_source";
679
+ }
680
+ return "stop";
681
+ }
682
+ function buildDecisionLine(contract) {
683
+ if (contract.decision === "stop") {
684
+ return "- Decision: stop and act. Do not escalate unless you need exact traceback lines.";
685
+ }
686
+ if (contract.decision === "read_source") {
687
+ return "- Decision: read source next. Do not escalate unless exact traceback lines are still needed.";
688
+ }
689
+ if (contract.decision === "zoom") {
690
+ return "- Decision: zoom. One deeper sift pass is justified before raw.";
691
+ }
692
+ return "- Decision: raw only if exact traceback is required.";
693
+ }
694
+ function buildComparisonLines(contract) {
695
+ const lines = [];
696
+ if (contract.resolved_tests.length > 0) {
697
+ lines.push(
698
+ `- Resolved in this rerun: ${formatCount(contract.resolved_tests.length, "test")} dropped out of the failing set.`
579
699
  );
580
700
  }
581
- if (args.format === "json") {
582
- return JSON.stringify(buildStructuredError(args.reason), null, 2);
701
+ if (contract.resolved_tests.length > 0 && contract.remaining_tests.length > 0) {
702
+ lines.push(
703
+ `- Remaining failing targets: ${formatCount(contract.remaining_tests.length, "test/module", "tests/modules")}.`
704
+ );
583
705
  }
584
- const prefix = `Sift fallback triggered (${args.reason}).`;
585
- if (!args.rawFallback) {
586
- return prefix;
706
+ return lines;
707
+ }
708
+ function renderBucketHeadline(bucket) {
709
+ return `- Bucket ${bucket.bucket_index}: ${bucket.label} (${bucket.count}) -> ${bucket.root_cause}`;
710
+ }
711
+ function buildStandardAnchorText(target) {
712
+ if (!target) {
713
+ return null;
714
+ }
715
+ if (target.context_hint.start_line !== null && target.context_hint.end_line !== null) {
716
+ return `${target.file} lines ${target.context_hint.start_line}-${target.context_hint.end_line}`;
717
+ }
718
+ if (target.context_hint.search_hint) {
719
+ return `search ${target.context_hint.search_hint} in ${target.file}`;
720
+ }
721
+ return formatReadTargetLocation(target);
722
+ }
723
+ function buildStandardFixText(args) {
724
+ if (args.bucket.hint) {
725
+ return args.bucket.hint;
726
+ }
727
+ const envVar = args.bucket.reason.match(/^missing test env:\s+([A-Z][A-Z0-9_]{2,})$/)?.[1];
728
+ if (envVar) {
729
+ return `Set ${envVar} before rerunning the affected tests.`;
730
+ }
731
+ const missingModule = args.bucket.reason.match(/^missing module:\s+(.+)$/)?.[1];
732
+ if (missingModule) {
733
+ return `Install ${missingModule} and rerun the affected tests.`;
734
+ }
735
+ if (args.bucket.reason.startsWith("fixture guard:")) {
736
+ return "Restore the missing fixture/setup guard and rerun the full suite at standard.";
737
+ }
738
+ if (args.bucket.reason.startsWith("db refused:")) {
739
+ return "Fix the test database connectivity and rerun the full suite at standard.";
740
+ }
741
+ if (args.bucket.reason.startsWith("service unavailable:")) {
742
+ return "Restore the dependency service or test double and rerun the full suite at standard.";
743
+ }
744
+ if (args.bucket.reason.startsWith("auth bypass absent:")) {
745
+ return "Restore the test auth bypass setup and rerun the full suite at standard.";
746
+ }
747
+ if (args.bucket.type === "contract_snapshot_drift") {
748
+ return "Review the visible drift and regenerate the contract snapshots if the changes are intentional.";
749
+ }
750
+ if (args.bucket.type === "assertion_failure") {
751
+ return "Inspect the failing assertion and rerun the full suite at standard.";
752
+ }
753
+ if (args.bucket.type === "collection_failure") {
754
+ return "Fix the collection/setup failure and rerun the full suite at standard.";
755
+ }
756
+ if (args.bucket.type === "runtime_failure") {
757
+ return `Fix the visible ${args.bucketLabel} and rerun the full suite at standard.`;
758
+ }
759
+ return null;
760
+ }
761
+ function buildStandardBucketSupport(args) {
762
+ return {
763
+ headline: args.bucket.summaryLines[0] ? `- ${args.bucket.summaryLines[0]}` : renderBucketHeadline(args.contractBucket),
764
+ anchorText: buildStandardAnchorText(args.readTarget),
765
+ fixText: buildStandardFixText({
766
+ bucket: args.bucket,
767
+ bucketLabel: args.contractBucket.label
768
+ })
769
+ };
770
+ }
771
+ function renderStandard(args) {
772
+ const lines = [...buildOutcomeLines(args.analysis), ...buildComparisonLines(args.contract)];
773
+ if (args.contract.main_buckets.length > 0) {
774
+ for (const bucket of args.contract.main_buckets.slice(0, 3)) {
775
+ const rawBucket = args.buckets[bucket.bucket_index - 1];
776
+ if (!rawBucket) {
777
+ lines.push(renderBucketHeadline(bucket));
778
+ continue;
779
+ }
780
+ const support = buildStandardBucketSupport({
781
+ bucket: rawBucket,
782
+ contractBucket: bucket,
783
+ readTarget: args.contract.read_targets.find(
784
+ (target) => target.bucket_index === bucket.bucket_index
785
+ )
786
+ });
787
+ lines.push(support.headline);
788
+ if (support.anchorText) {
789
+ lines.push(`- Anchor: ${support.anchorText}`);
790
+ }
791
+ if (support.fixText) {
792
+ lines.push(`- Fix: ${support.fixText}`);
793
+ }
794
+ }
795
+ }
796
+ lines.push(buildDecisionLine(args.contract));
797
+ lines.push(`- Next: ${args.contract.next_best_action.note}`);
798
+ lines.push(buildStopSignal(args.contract));
799
+ return lines.join("\n");
800
+ }
801
+ function renderFocused(args) {
802
+ const lines = [...buildOutcomeLines(args.analysis), ...buildComparisonLines(args.contract)];
803
+ for (const bucket of args.contract.main_buckets) {
804
+ const rawBucket = args.buckets[bucket.bucket_index - 1];
805
+ lines.push(
806
+ ...rawBucket?.summaryLines.length ? rawBucket.summaryLines.map((line) => `- ${line}`) : [renderBucketHeadline(bucket)]
807
+ );
808
+ for (const evidence of bucket.evidence) {
809
+ lines.push(` - ${evidence}`);
810
+ }
811
+ if (rawBucket?.hint) {
812
+ lines.push(` - Hint: ${rawBucket.hint}`);
813
+ }
814
+ }
815
+ lines.push(buildDecisionLine(args.contract));
816
+ lines.push(`- Next: ${args.contract.next_best_action.note}`);
817
+ lines.push(buildStopSignal(args.contract));
818
+ return lines.join("\n");
819
+ }
820
+ function renderVerbose(args) {
821
+ const lines = [...buildOutcomeLines(args.analysis), ...buildComparisonLines(args.contract)];
822
+ for (const bucket of args.contract.main_buckets) {
823
+ const rawBucket = args.buckets[bucket.bucket_index - 1];
824
+ lines.push(
825
+ ...rawBucket?.summaryLines.length ? rawBucket.summaryLines.map((line) => `- ${line}`) : [renderBucketHeadline(bucket)]
826
+ );
827
+ for (const item of rawBucket?.representativeItems ?? []) {
828
+ lines.push(` - ${item.label} -> ${item.reason}`);
829
+ }
830
+ if (bucket.mini_diff) {
831
+ lines.push(` - mini-diff: ${JSON.stringify(bucket.mini_diff)}`);
832
+ }
833
+ if (rawBucket?.hint) {
834
+ lines.push(` - Hint: ${rawBucket.hint}`);
835
+ }
587
836
  }
588
- return [prefix, "", args.rawInput.slice(-RAW_FALLBACK_SLICE)].join("\n");
837
+ lines.push(buildDecisionLine(args.contract));
838
+ lines.push(`- Next: ${args.contract.next_best_action.note}`);
839
+ lines.push(buildStopSignal(args.contract));
840
+ return lines.join("\n");
841
+ }
842
+ function buildTestStatusDiagnoseContract(args) {
843
+ const buckets = prioritizeBuckets(mergeBuckets(args.analysis)).slice(0, 3);
844
+ const simpleCollectionFailure = args.analysis.collectionErrorCount !== void 0 && args.analysis.collectionItems.length === 0 && buckets.length === 0;
845
+ const dominantBucket = buckets.map((bucket, index) => ({
846
+ bucket,
847
+ index
848
+ })).sort((left, right) => {
849
+ if (right.bucket.count !== left.bucket.count) {
850
+ return right.bucket.count - left.bucket.count;
851
+ }
852
+ return right.bucket.confidence - left.bucket.confidence;
853
+ })[0] ?? null;
854
+ const diagnosisComplete = args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0 || simpleCollectionFailure || buckets.length > 0 && (dominantBucket?.bucket.confidence ?? 0) >= 0.7;
855
+ const rawNeeded = buckets.length > 0 ? buckets.every((bucket) => bucket.confidence < 0.7) : !(args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0 || simpleCollectionFailure);
856
+ const dominantBlockerBucketIndex = dominantBucket && isDominantBlockerType(dominantBucket.bucket.type) ? dominantBucket.index + 1 : null;
857
+ const readTargets = buildReadTargets({
858
+ buckets,
859
+ dominantBucketIndex: dominantBlockerBucketIndex
860
+ });
861
+ const mainBuckets = buckets.map((bucket, index) => ({
862
+ bucket_index: index + 1,
863
+ label: labelForBucket(bucket),
864
+ count: bucket.count,
865
+ root_cause: bucket.reason,
866
+ evidence: buildBucketEvidence(bucket),
867
+ bucket_confidence: Number(bucket.confidence.toFixed(2)),
868
+ root_cause_confidence: Number(rootCauseConfidenceFor(bucket).toFixed(2)),
869
+ dominant: dominantBucket?.index === index,
870
+ secondary_visible_despite_blocker: dominantBlockerBucketIndex !== null && dominantBlockerBucketIndex !== index + 1,
871
+ mini_diff: extractMiniDiff(args.input, bucket)
872
+ }));
873
+ const resolvedTests = unique(args.resolvedTests ?? []);
874
+ const remainingTests = unique(
875
+ args.remainingTests ?? unique([...args.analysis.visibleErrorLabels, ...args.analysis.visibleFailedLabels])
876
+ );
877
+ let nextBestAction;
878
+ if (args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0) {
879
+ nextBestAction = {
880
+ code: "read_source_for_bucket",
881
+ bucket_index: null,
882
+ note: "No failing buckets remain."
883
+ };
884
+ } else if (simpleCollectionFailure) {
885
+ nextBestAction = {
886
+ code: "read_source_for_bucket",
887
+ bucket_index: null,
888
+ note: "Inspect the collection traceback or setup code next; the run failed before tests executed."
889
+ };
890
+ } else if (!diagnosisComplete) {
891
+ nextBestAction = {
892
+ code: rawNeeded ? "read_raw_for_exact_traceback" : "insufficient_signal",
893
+ bucket_index: dominantBucket ? dominantBucket.index + 1 : null,
894
+ note: rawNeeded ? "Use focused or verbose detail, and read raw traceback only if exact stack lines are still needed." : "The visible output is not yet specific enough to diagnose reliably."
895
+ };
896
+ } else if (dominantBlockerBucketIndex !== null) {
897
+ nextBestAction = {
898
+ code: "fix_dominant_blocker",
899
+ bucket_index: dominantBlockerBucketIndex,
900
+ note: dominantBlockerBucketIndex === 1 && mainBuckets.some((bucket) => bucket.secondary_visible_despite_blocker) ? "Fix bucket 1 first, then rerun the full suite at standard. Secondary buckets are already visible behind it." : `Fix bucket ${dominantBlockerBucketIndex} first, then rerun the full suite at standard.`
901
+ };
902
+ } else {
903
+ nextBestAction = {
904
+ code: rawNeeded ? "read_raw_for_exact_traceback" : "read_source_for_bucket",
905
+ bucket_index: mainBuckets[0]?.bucket_index ?? null,
906
+ note: rawNeeded ? "Read raw traceback only if exact stack lines are required after the current diagnosis." : `Read the source or test code for bucket ${mainBuckets[0]?.bucket_index ?? 1} next.`
907
+ };
908
+ }
909
+ const baseContract = {
910
+ status: diagnosisComplete ? "ok" : "insufficient",
911
+ diagnosis_complete: diagnosisComplete,
912
+ raw_needed: rawNeeded,
913
+ additional_source_read_likely_low_value: diagnosisComplete && !rawNeeded,
914
+ read_raw_only_if: rawNeeded ? "you still need exact traceback lines after focused or verbose detail" : null,
915
+ dominant_blocker_bucket_index: dominantBlockerBucketIndex,
916
+ provider_used: false,
917
+ provider_confidence: null,
918
+ provider_failed: false,
919
+ raw_slice_used: false,
920
+ raw_slice_strategy: "none",
921
+ resolved_tests: resolvedTests,
922
+ remaining_tests: remainingTests,
923
+ main_buckets: mainBuckets,
924
+ read_targets: readTargets,
925
+ next_best_action: nextBestAction
926
+ };
927
+ const effectiveNextBestAction = args.contractOverrides?.next_best_action ?? baseContract.next_best_action;
928
+ const mergedContractWithoutDecision = {
929
+ ...baseContract,
930
+ ...args.contractOverrides,
931
+ status: args.contractOverrides?.diagnosis_complete ?? diagnosisComplete ? "ok" : "insufficient",
932
+ next_best_action: {
933
+ ...effectiveNextBestAction,
934
+ note: buildConcreteNextNote({
935
+ nextBestAction: effectiveNextBestAction,
936
+ readTargets,
937
+ hasSecondaryVisibleBucket: mainBuckets.some(
938
+ (bucket) => bucket.secondary_visible_despite_blocker
939
+ )
940
+ })
941
+ }
942
+ };
943
+ const contract = testStatusDiagnoseContractSchema.parse({
944
+ ...mergedContractWithoutDecision,
945
+ decision: args.contractOverrides?.decision ?? deriveDecision(mergedContractWithoutDecision)
946
+ });
947
+ return {
948
+ contract,
949
+ standardText: renderStandard({
950
+ analysis: args.analysis,
951
+ contract,
952
+ buckets
953
+ }),
954
+ focusedText: renderFocused({
955
+ analysis: args.analysis,
956
+ contract,
957
+ buckets
958
+ }),
959
+ verboseText: renderVerbose({
960
+ analysis: args.analysis,
961
+ contract,
962
+ buckets
963
+ })
964
+ };
965
+ }
966
+ function buildTestStatusPublicDiagnoseContract(args) {
967
+ const {
968
+ resolved_tests,
969
+ remaining_tests,
970
+ ...rest
971
+ } = args.contract;
972
+ return testStatusPublicDiagnoseContractSchema.parse({
973
+ ...rest,
974
+ resolved_summary: buildTestTargetSummary(resolved_tests),
975
+ remaining_summary: buildTestTargetSummary(remaining_tests),
976
+ remaining_subset_available: Boolean(args.remainingSubsetAvailable) && remaining_tests.length > 0,
977
+ ...args.includeTestIds ? {
978
+ resolved_tests,
979
+ remaining_tests
980
+ } : {}
981
+ });
982
+ }
983
+ function buildTestStatusAnalysisContext(args) {
984
+ const publicContract = buildTestStatusPublicDiagnoseContract({
985
+ contract: args.contract,
986
+ includeTestIds: args.includeTestIds,
987
+ remainingSubsetAvailable: args.remainingSubsetAvailable
988
+ });
989
+ const bucketLines = args.contract.main_buckets.length === 0 ? ["- No failing buckets visible."] : args.contract.main_buckets.map(
990
+ (bucket) => `- Bucket ${bucket.bucket_index}: ${bucket.label}; count=${bucket.count}; root_cause=${bucket.root_cause}; dominant=${bucket.dominant}`
991
+ );
992
+ return [
993
+ "Heuristic extract:",
994
+ `- diagnosis_complete=${args.contract.diagnosis_complete}`,
995
+ `- raw_needed=${args.contract.raw_needed}`,
996
+ `- decision=${args.contract.decision}`,
997
+ `- provider_used=${args.contract.provider_used}`,
998
+ `- provider_failed=${args.contract.provider_failed}`,
999
+ `- raw_slice_strategy=${args.contract.raw_slice_strategy}`,
1000
+ `- resolved_summary=${formatTargetSummary(publicContract.resolved_summary)}`,
1001
+ `- remaining_summary=${formatTargetSummary(publicContract.remaining_summary)}`,
1002
+ `- remaining_subset_available=${publicContract.remaining_subset_available}`,
1003
+ ...args.includeTestIds && args.contract.resolved_tests.length > 0 ? [`- resolved_tests=${args.contract.resolved_tests.join(", ")}`] : [],
1004
+ ...args.includeTestIds && args.contract.remaining_tests.length > 0 ? [`- remaining_tests=${args.contract.remaining_tests.join(", ")}`] : [],
1005
+ ...args.contract.read_targets.length > 0 ? args.contract.read_targets.map(
1006
+ (target) => `- read_target[bucket=${target.bucket_index}]=${formatReadTargetLocation(target)} -> ${target.why}${target.context_hint.start_line !== null && target.context_hint.end_line !== null ? `; lines=${target.context_hint.start_line}-${target.context_hint.end_line}` : target.context_hint.search_hint ? `; search=${target.context_hint.search_hint}` : ""}`
1007
+ ) : [],
1008
+ ...bucketLines,
1009
+ `- next_best_action=${args.contract.next_best_action.code}`
1010
+ ].join("\n");
589
1011
  }
590
1012
 
591
1013
  // src/core/heuristics.ts
@@ -610,7 +1032,7 @@ function getCount(input, label) {
610
1032
  const lastMatch = matches.at(-1);
611
1033
  return lastMatch ? Number(lastMatch[1]) : 0;
612
1034
  }
613
- function formatCount(count, singular, plural = `${singular}s`) {
1035
+ function formatCount2(count, singular, plural = `${singular}s`) {
614
1036
  return `${count} ${count === 1 ? singular : plural}`;
615
1037
  }
616
1038
  function countPattern(input, matcher) {
@@ -630,6 +1052,78 @@ function collectUniqueMatches(input, matcher, limit = 6) {
630
1052
  }
631
1053
  return values;
632
1054
  }
1055
+ function emptyAnchor() {
1056
+ return {
1057
+ file: null,
1058
+ line: null,
1059
+ anchor_kind: "none",
1060
+ anchor_confidence: 0
1061
+ };
1062
+ }
1063
+ function normalizeAnchorFile(value) {
1064
+ return value.replace(/\\/g, "/").trim();
1065
+ }
1066
+ function inferFileFromLabel(label) {
1067
+ const candidate = cleanFailureLabel(label).split("::")[0]?.trim();
1068
+ if (!candidate) {
1069
+ return null;
1070
+ }
1071
+ if (!/[./\\]/.test(candidate) || !/\.[A-Za-z0-9]+$/.test(candidate)) {
1072
+ return null;
1073
+ }
1074
+ return normalizeAnchorFile(candidate);
1075
+ }
1076
+ function buildLabelAnchor(label) {
1077
+ const file = inferFileFromLabel(label);
1078
+ if (!file) {
1079
+ return emptyAnchor();
1080
+ }
1081
+ return {
1082
+ file,
1083
+ line: null,
1084
+ anchor_kind: "test_label",
1085
+ anchor_confidence: 0.72
1086
+ };
1087
+ }
1088
+ function parseObservedAnchor(line) {
1089
+ const normalized = line.trim();
1090
+ if (normalized.length === 0) {
1091
+ return null;
1092
+ }
1093
+ const fileWithLine = normalized.match(/^([A-Za-z0-9_./-]+\.[A-Za-z0-9]+):(\d+)(?::\d+)?:\s+in\b/) ?? normalized.match(/^([^:\s][^:]*\.[A-Za-z0-9]+):(\d+)(?::\d+)?:\s+in\b/);
1094
+ if (fileWithLine) {
1095
+ return {
1096
+ file: normalizeAnchorFile(fileWithLine[1]),
1097
+ line: Number(fileWithLine[2]),
1098
+ anchor_kind: "traceback",
1099
+ anchor_confidence: 1
1100
+ };
1101
+ }
1102
+ const pythonTraceback = normalized.match(/^File\s+"([^"]+)",\s+line\s+(\d+)/);
1103
+ if (pythonTraceback) {
1104
+ return {
1105
+ file: normalizeAnchorFile(pythonTraceback[1]),
1106
+ line: Number(pythonTraceback[2]),
1107
+ anchor_kind: "traceback",
1108
+ anchor_confidence: 1
1109
+ };
1110
+ }
1111
+ const importModule = normalized.match(
1112
+ /ImportError while importing test module ['"]([^'"]+\.[A-Za-z0-9]+)['"]/i
1113
+ );
1114
+ if (importModule) {
1115
+ return {
1116
+ file: normalizeAnchorFile(importModule[1]),
1117
+ line: null,
1118
+ anchor_kind: "traceback",
1119
+ anchor_confidence: 0.92
1120
+ };
1121
+ }
1122
+ return null;
1123
+ }
1124
+ function resolveAnchorForLabel(args) {
1125
+ return args.observedAnchor ?? buildLabelAnchor(args.label);
1126
+ }
633
1127
  function cleanFailureLabel(label) {
634
1128
  return label.trim().replace(/^['"]|['"]$/g, "");
635
1129
  }
@@ -641,6 +1135,9 @@ function isLowValueInternalReason(normalized) {
641
1135
  ) || /\bpython\.py:\d+:\s+in\s+importtestmodule\b/i.test(normalized) || /\bpython\.py:\d+:\s+in\s+import_path\b/i.test(normalized);
642
1136
  }
643
1137
  function scoreFailureReason(reason) {
1138
+ if (reason.startsWith("missing test env:")) {
1139
+ return 6;
1140
+ }
644
1141
  if (reason.startsWith("missing module:")) {
645
1142
  return 5;
646
1143
  }
@@ -655,6 +1152,18 @@ function scoreFailureReason(reason) {
655
1152
  }
656
1153
  return 1;
657
1154
  }
1155
+ function extractEnvBlockerName(normalized) {
1156
+ const directMatch = normalized.match(
1157
+ /\bDB-isolated tests require\s+([A-Z][A-Z0-9_]{2,})\b/
1158
+ );
1159
+ if (directMatch) {
1160
+ return directMatch[1];
1161
+ }
1162
+ const fallbackMatch = normalized.match(
1163
+ /\b([A-Z][A-Z0-9_]{2,})\b(?=[^.\n]*DB-isolated tests)/
1164
+ );
1165
+ return fallbackMatch?.[1] ?? null;
1166
+ }
658
1167
  function classifyFailureReason(line, options) {
659
1168
  const normalized = line.trim().replace(/^[A-Z]\s+/, "");
660
1169
  if (normalized.length === 0) {
@@ -663,6 +1172,61 @@ function classifyFailureReason(line, options) {
663
1172
  if (isLowValueInternalReason(normalized)) {
664
1173
  return null;
665
1174
  }
1175
+ if (/^([A-Za-z0-9_./-]+\.[A-Za-z0-9]+):\d+(?::\d+)?:\s+in\b/.test(normalized) || /^([^:\s][^:]*\.[A-Za-z0-9]+):\d+(?::\d+)?:\s+in\b/.test(normalized) || /^File\s+"[^"]+",\s+line\s+\d+/.test(normalized)) {
1176
+ return null;
1177
+ }
1178
+ const envBlocker = extractEnvBlockerName(normalized);
1179
+ if (envBlocker) {
1180
+ return {
1181
+ reason: `missing test env: ${envBlocker}`,
1182
+ group: "DB-backed tests are blocked by missing test environment configuration"
1183
+ };
1184
+ }
1185
+ const missingEnv = normalized.match(
1186
+ /\b(?:environment variable|env(?:ironment)? var(?:iable)?|Missing required env(?:ironment)? variable)\s+([A-Z][A-Z0-9_]{2,})\b/i
1187
+ );
1188
+ if (missingEnv) {
1189
+ return {
1190
+ reason: `missing test env: ${missingEnv[1]}`,
1191
+ group: "tests are blocked by missing environment configuration"
1192
+ };
1193
+ }
1194
+ const keyErrorEnv = normalized.match(/KeyError:\s*['"]([A-Z][A-Z0-9_]{2,})['"]/);
1195
+ if (keyErrorEnv) {
1196
+ return {
1197
+ reason: `missing test env: ${keyErrorEnv[1]}`,
1198
+ group: "tests are blocked by missing environment configuration"
1199
+ };
1200
+ }
1201
+ const fixtureGuard = normalized.match(
1202
+ /(?:FixtureLookupError|fixture guard|requires fixture)\b[^A-Za-z0-9_'-]*([a-z_][a-z0-9_]*)?/i
1203
+ );
1204
+ if (fixtureGuard) {
1205
+ return {
1206
+ reason: `fixture guard: ${fixtureGuard[1] ?? "required fixture unavailable"}`.trim(),
1207
+ group: "fixture guards or setup gates"
1208
+ };
1209
+ }
1210
+ if (/(ECONNREFUSED|ConnectionRefusedError|connection refused|could not connect to server)/i.test(
1211
+ normalized
1212
+ ) && /(postgres|database|db|5432)/i.test(normalized)) {
1213
+ return {
1214
+ reason: "db refused: database connection was refused",
1215
+ group: "database connectivity failures"
1216
+ };
1217
+ }
1218
+ if (/(503\b|service unavailable|temporarily unavailable)/i.test(normalized)) {
1219
+ return {
1220
+ reason: "service unavailable: dependency service is unavailable",
1221
+ group: "service availability failures"
1222
+ };
1223
+ }
1224
+ if (/(auth bypass|test auth|bypass token)/i.test(normalized) && /(missing|absent|not configured|not set|unavailable)/i.test(normalized)) {
1225
+ return {
1226
+ reason: "auth bypass absent: test auth bypass is missing",
1227
+ group: "authentication test setup failures"
1228
+ };
1229
+ }
666
1230
  const pythonMissingModule = normalized.match(
667
1231
  /ModuleNotFoundError:\s+No module named ['"]([^'"]+)['"]/i
668
1232
  );
@@ -735,26 +1299,31 @@ function collectCollectionFailureItems(input) {
735
1299
  const lines = input.split("\n");
736
1300
  let currentLabel = null;
737
1301
  let pendingGenericReason = null;
1302
+ let currentAnchor = null;
738
1303
  for (const line of lines) {
739
1304
  const collecting = line.match(/^_+\s+ERROR collecting\s+(.+?)\s+_+\s*$/);
740
1305
  if (collecting) {
741
1306
  if (currentLabel && pendingGenericReason) {
742
- pushFocusedFailureItem(
743
- items,
744
- {
745
- label: currentLabel,
746
- reason: pendingGenericReason.reason,
747
- group: pendingGenericReason.group
748
- }
749
- );
1307
+ const anchor2 = resolveAnchorForLabel({
1308
+ label: currentLabel,
1309
+ observedAnchor: currentAnchor
1310
+ });
1311
+ pushFocusedFailureItem(items, {
1312
+ label: currentLabel,
1313
+ reason: pendingGenericReason.reason,
1314
+ group: pendingGenericReason.group,
1315
+ ...anchor2
1316
+ });
750
1317
  }
751
1318
  currentLabel = cleanFailureLabel(collecting[1]);
752
1319
  pendingGenericReason = null;
1320
+ currentAnchor = null;
753
1321
  continue;
754
1322
  }
755
1323
  if (!currentLabel) {
756
1324
  continue;
757
1325
  }
1326
+ currentAnchor = parseObservedAnchor(line) ?? currentAnchor;
758
1327
  const classification = classifyFailureReason(line, {
759
1328
  duringCollection: true
760
1329
  });
@@ -765,26 +1334,31 @@ function collectCollectionFailureItems(input) {
765
1334
  pendingGenericReason = classification;
766
1335
  continue;
767
1336
  }
768
- pushFocusedFailureItem(
769
- items,
770
- {
771
- label: currentLabel,
772
- reason: classification.reason,
773
- group: classification.group
774
- }
775
- );
1337
+ const anchor = resolveAnchorForLabel({
1338
+ label: currentLabel,
1339
+ observedAnchor: currentAnchor
1340
+ });
1341
+ pushFocusedFailureItem(items, {
1342
+ label: currentLabel,
1343
+ reason: classification.reason,
1344
+ group: classification.group,
1345
+ ...anchor
1346
+ });
776
1347
  currentLabel = null;
777
1348
  pendingGenericReason = null;
1349
+ currentAnchor = null;
778
1350
  }
779
1351
  if (currentLabel && pendingGenericReason) {
780
- pushFocusedFailureItem(
781
- items,
782
- {
783
- label: currentLabel,
784
- reason: pendingGenericReason.reason,
785
- group: pendingGenericReason.group
786
- }
787
- );
1352
+ const anchor = resolveAnchorForLabel({
1353
+ label: currentLabel,
1354
+ observedAnchor: currentAnchor
1355
+ });
1356
+ pushFocusedFailureItem(items, {
1357
+ label: currentLabel,
1358
+ reason: pendingGenericReason.reason,
1359
+ group: pendingGenericReason.group,
1360
+ ...anchor
1361
+ });
788
1362
  }
789
1363
  return items;
790
1364
  }
@@ -795,54 +1369,95 @@ function collectInlineFailureItems(input) {
795
1369
  if (!inlineFailure) {
796
1370
  continue;
797
1371
  }
1372
+ const cleanedLabel = cleanFailureLabel(inlineFailure[2]);
1373
+ if (!cleanedLabel) {
1374
+ continue;
1375
+ }
798
1376
  const classification = classifyFailureReason(inlineFailure[3], {
799
1377
  duringCollection: false
800
1378
  });
801
1379
  if (!classification) {
802
1380
  continue;
803
1381
  }
804
- pushFocusedFailureItem(
805
- items,
806
- {
807
- label: cleanFailureLabel(inlineFailure[2]),
808
- reason: classification.reason,
809
- group: classification.group
810
- }
811
- );
1382
+ pushFocusedFailureItem(items, {
1383
+ label: cleanedLabel,
1384
+ reason: classification.reason,
1385
+ group: classification.group,
1386
+ ...resolveAnchorForLabel({
1387
+ label: cleanedLabel,
1388
+ observedAnchor: parseObservedAnchor(inlineFailure[3])
1389
+ })
1390
+ });
812
1391
  }
813
1392
  return items;
814
1393
  }
815
- function formatFocusedFailureGroups(args) {
816
- const maxGroups = args.maxGroups ?? 3;
817
- const maxPerGroup = args.maxPerGroup ?? 6;
818
- const grouped = /* @__PURE__ */ new Map();
819
- for (const item of args.items) {
820
- const entries = grouped.get(item.group) ?? [];
821
- entries.push(item);
822
- grouped.set(item.group, entries);
823
- }
824
- const lines = [];
825
- const visibleGroups = [...grouped.entries()].slice(0, maxGroups);
826
- for (const [group, entries] of visibleGroups) {
827
- lines.push(`- ${group}`);
828
- for (const item of entries.slice(0, maxPerGroup)) {
829
- lines.push(` - ${item.label} -> ${item.reason}`);
830
- }
831
- const remaining = entries.length - Math.min(entries.length, maxPerGroup);
832
- if (remaining > 0) {
833
- lines.push(` - and ${remaining} more failing ${args.remainderLabel}`);
834
- }
1394
+ function collectInlineFailureItemsWithStatus(input) {
1395
+ const items = [];
1396
+ for (const line of input.split("\n")) {
1397
+ const inlineFailure = line.match(/^(FAILED|ERROR)\s+(.+?)(?:\s+-\s+(.+))?$/);
1398
+ if (!inlineFailure) {
1399
+ continue;
1400
+ }
1401
+ const cleanedLabel = cleanFailureLabel(inlineFailure[2]);
1402
+ if (!cleanedLabel) {
1403
+ continue;
1404
+ }
1405
+ const details = inlineFailure[3]?.trim();
1406
+ if (!details) {
1407
+ continue;
1408
+ }
1409
+ const classification = classifyFailureReason(details, {
1410
+ duringCollection: false
1411
+ });
1412
+ if (!classification) {
1413
+ continue;
1414
+ }
1415
+ items.push({
1416
+ label: cleanedLabel,
1417
+ reason: classification.reason,
1418
+ group: classification.group,
1419
+ status: inlineFailure[1] === "FAILED" ? "failed" : "error",
1420
+ ...resolveAnchorForLabel({
1421
+ label: cleanedLabel,
1422
+ observedAnchor: parseObservedAnchor(details)
1423
+ })
1424
+ });
835
1425
  }
836
- const hiddenGroups = grouped.size - visibleGroups.length;
837
- if (hiddenGroups > 0) {
838
- lines.push(`- and ${hiddenGroups} more error group${hiddenGroups === 1 ? "" : "s"}`);
1426
+ return items;
1427
+ }
1428
+ function collectStandaloneErrorClassifications(input) {
1429
+ const classifications = [];
1430
+ for (const line of input.split("\n")) {
1431
+ const standalone = line.match(/^\s*E\s+(.+)$/);
1432
+ if (!standalone) {
1433
+ continue;
1434
+ }
1435
+ const classification = classifyFailureReason(standalone[1], {
1436
+ duringCollection: false
1437
+ });
1438
+ if (!classification || classification.reason === "import error during collection") {
1439
+ continue;
1440
+ }
1441
+ classifications.push(classification);
839
1442
  }
840
- return lines;
1443
+ return classifications;
841
1444
  }
842
- function formatVerboseFailureItems(args) {
843
- return chooseStrongestFailureItems(args.items).map(
844
- (item) => `- ${item.label} -> ${item.reason}`
845
- );
1445
+ function chooseStrongestStatusFailureItems(items) {
1446
+ const strongest = /* @__PURE__ */ new Map();
1447
+ const order = [];
1448
+ for (const item of items) {
1449
+ const key = `${item.status}:${item.label}`;
1450
+ const existing = strongest.get(key);
1451
+ if (!existing) {
1452
+ strongest.set(key, item);
1453
+ order.push(key);
1454
+ continue;
1455
+ }
1456
+ if (scoreFailureReason(item.reason) > scoreFailureReason(existing.reason)) {
1457
+ strongest.set(key, item);
1458
+ }
1459
+ }
1460
+ return order.map((key) => strongest.get(key));
846
1461
  }
847
1462
  function summarizeRepeatedTestCauses(input, options) {
848
1463
  const pythonMissingModules = collectUniqueMatches(
@@ -863,236 +1478,1162 @@ function summarizeRepeatedTestCauses(input, options) {
863
1478
  input,
864
1479
  /ModuleNotFoundError:\s+No module named ['"]([^'"]+)['"]/gi
865
1480
  ) + countPattern(input, /Cannot find module ['"]([^'"]+)['"]/gi);
1481
+ const envBlockers = [];
1482
+ let envBlockerHits = 0;
1483
+ for (const line of input.split("\n")) {
1484
+ const envBlocker = extractEnvBlockerName(line.trim().replace(/^[A-Z]\s+/, ""));
1485
+ if (!envBlocker) {
1486
+ continue;
1487
+ }
1488
+ envBlockerHits += 1;
1489
+ if (!envBlockers.includes(envBlocker) && envBlockers.length < 4) {
1490
+ envBlockers.push(envBlocker);
1491
+ }
1492
+ }
866
1493
  const importCollectionHits = countPattern(input, /ImportError while importing test module/gi) + countPattern(input, /^\s*_+\s+ERROR collecting\b/gim);
867
1494
  const genericErrorTypes = collectUniqueMatches(
868
1495
  input,
869
1496
  /\b((?:Assertion|Import|Type|Value|Runtime|Reference|Key|Attribute)[A-Za-z]*Error)\b/gi,
870
1497
  4
871
1498
  );
872
- const bullets = [];
873
- if (options.duringCollection && (importCollectionHits >= 2 || missingModuleHits >= 2) || !options.duringCollection && missingModuleHits >= 2) {
874
- bullets.push(
875
- options.duringCollection ? "- Most failures are import/dependency errors during test collection." : "- Most failures are import/dependency errors."
876
- );
877
- }
878
- if (missingModules.length > 1) {
879
- bullets.push(`- Missing modules include ${missingModules.join(", ")}.`);
880
- } else if (missingModules.length === 1 && missingModuleHits >= 2) {
881
- bullets.push(`- Missing module repeated across failures: ${missingModules[0]}.`);
882
- }
883
- if (bullets.length < 2 && genericErrorTypes.length >= 2) {
884
- bullets.push(`- Repeated error types include ${genericErrorTypes.join(", ")}.`);
885
- }
886
- return bullets.slice(0, 2);
1499
+ const bullets = [];
1500
+ if (envBlockers.length > 0 && envBlockerHits >= 2) {
1501
+ bullets.push(`- Shared test environment blocker detected: ${envBlockers.join(", ")}.`);
1502
+ }
1503
+ if (bullets.length < 2 && (options.duringCollection && (importCollectionHits >= 2 || missingModuleHits >= 2) || !options.duringCollection && missingModuleHits >= 2)) {
1504
+ bullets.push(
1505
+ options.duringCollection ? "- Most failures are import/dependency errors during test collection." : "- Most failures are import/dependency errors."
1506
+ );
1507
+ }
1508
+ if (bullets.length < 2) {
1509
+ if (missingModules.length > 1) {
1510
+ bullets.push(`- Missing modules include ${missingModules.join(", ")}.`);
1511
+ } else if (missingModules.length === 1 && missingModuleHits >= 2) {
1512
+ bullets.push(`- Missing module repeated across failures: ${missingModules[0]}.`);
1513
+ }
1514
+ }
1515
+ if (bullets.length < 2 && genericErrorTypes.length >= 2) {
1516
+ bullets.push(`- Repeated error types include ${genericErrorTypes.join(", ")}.`);
1517
+ }
1518
+ return bullets.slice(0, 2);
1519
+ }
1520
+ function collectFailureLabels(input) {
1521
+ const labels = [];
1522
+ const seen = /* @__PURE__ */ new Set();
1523
+ const pushLabel = (label, status) => {
1524
+ const cleaned = cleanFailureLabel(label);
1525
+ if (!cleaned) {
1526
+ return;
1527
+ }
1528
+ const key = `${status}:${cleaned}`;
1529
+ if (seen.has(key)) {
1530
+ return;
1531
+ }
1532
+ seen.add(key);
1533
+ labels.push({
1534
+ label: cleaned,
1535
+ status
1536
+ });
1537
+ };
1538
+ for (const line of input.split("\n")) {
1539
+ const progress = line.match(
1540
+ /^(tests\/.+?)(?:\s+<-\s+\S+)?\s+(FAILED|ERROR)\s+\[[^\]]+\]\s*$/
1541
+ );
1542
+ if (progress) {
1543
+ pushLabel(progress[1], progress[2] === "FAILED" ? "failed" : "error");
1544
+ continue;
1545
+ }
1546
+ const summary = line.match(/^(FAILED|ERROR)\s+(.+?)(?:\s+-\s+.*)?$/);
1547
+ if (summary) {
1548
+ pushLabel(summary[2], summary[1] === "FAILED" ? "failed" : "error");
1549
+ }
1550
+ }
1551
+ return labels;
1552
+ }
1553
+ function classifyBucketTypeFromReason(reason) {
1554
+ if (reason.startsWith("missing test env:")) {
1555
+ return "shared_environment_blocker";
1556
+ }
1557
+ if (reason.startsWith("fixture guard:")) {
1558
+ return "fixture_guard_failure";
1559
+ }
1560
+ if (reason.startsWith("service unavailable:")) {
1561
+ return "service_unavailable";
1562
+ }
1563
+ if (reason.startsWith("db refused:")) {
1564
+ return "db_connection_failure";
1565
+ }
1566
+ if (reason.startsWith("auth bypass absent:")) {
1567
+ return "auth_bypass_absent";
1568
+ }
1569
+ if (reason.startsWith("missing module:")) {
1570
+ return "import_dependency_failure";
1571
+ }
1572
+ if (reason.startsWith("assertion failed:")) {
1573
+ return "assertion_failure";
1574
+ }
1575
+ if (/^RuntimeError:|^[A-Z][A-Za-z]+(?:Error|Exception):/.test(reason)) {
1576
+ return "runtime_failure";
1577
+ }
1578
+ return "unknown_failure";
1579
+ }
1580
+ function synthesizeSharedBlockerBucket(args) {
1581
+ if (args.errors === 0) {
1582
+ return null;
1583
+ }
1584
+ const visibleReasonGroups = /* @__PURE__ */ new Map();
1585
+ for (const item of args.visibleErrorItems) {
1586
+ const entry = visibleReasonGroups.get(item.reason);
1587
+ if (entry) {
1588
+ entry.count += 1;
1589
+ entry.items.push(item);
1590
+ continue;
1591
+ }
1592
+ visibleReasonGroups.set(item.reason, {
1593
+ count: 1,
1594
+ group: item.group,
1595
+ items: [item]
1596
+ });
1597
+ }
1598
+ const top = [...visibleReasonGroups.entries()].filter(([, entry]) => entry.count >= 3).sort((left, right) => right[1].count - left[1].count)[0];
1599
+ const standaloneReasonGroups = /* @__PURE__ */ new Map();
1600
+ for (const classification of collectStandaloneErrorClassifications(args.input)) {
1601
+ const entry = standaloneReasonGroups.get(classification.reason);
1602
+ if (entry) {
1603
+ entry.count += 1;
1604
+ continue;
1605
+ }
1606
+ standaloneReasonGroups.set(classification.reason, {
1607
+ count: 1,
1608
+ group: classification.group
1609
+ });
1610
+ }
1611
+ const standaloneTop = [...standaloneReasonGroups.entries()].filter(([, entry]) => entry.count >= 3).sort((left, right) => right[1].count - left[1].count)[0];
1612
+ const visibleTopReason = top?.[0];
1613
+ const visibleTopStats = top?.[1];
1614
+ const standaloneTopReason = standaloneTop?.[0];
1615
+ const chosenReason = visibleTopReason && standaloneTopReason ? standaloneReasonGroups.get(standaloneTopReason).count > visibleTopStats.count ? standaloneTopReason : visibleTopReason : visibleTopReason ?? standaloneTopReason;
1616
+ const singleEnvBlockerItem = !chosenReason && args.visibleErrorItems.length === 1 && args.visibleErrorItems[0].reason.startsWith("missing test env:") ? args.visibleErrorItems[0] : null;
1617
+ const effectiveReason = chosenReason ?? singleEnvBlockerItem?.reason;
1618
+ if (!effectiveReason || effectiveReason === "import error during collection") {
1619
+ return null;
1620
+ }
1621
+ const visibleStats = visibleReasonGroups.get(effectiveReason);
1622
+ const standaloneStats = standaloneReasonGroups.get(effectiveReason);
1623
+ const resolvedStats = visibleStats ?? standaloneStats;
1624
+ const bucketType = classifyBucketTypeFromReason(effectiveReason);
1625
+ const countVisible = resolvedStats.count;
1626
+ const visibleReasonsAreUniform = args.visibleErrorItems.length === 0 || args.visibleErrorItems.every((item) => item.reason === effectiveReason);
1627
+ const canClaimAllErrors = (args.errorStatusLabels.length >= 3 || Boolean(singleEnvBlockerItem)) && visibleReasonsAreUniform && args.errors >= countVisible;
1628
+ const countClaimed = canClaimAllErrors ? args.errors : void 0;
1629
+ const countText = countClaimed ?? countVisible;
1630
+ const atLeastPrefix = countClaimed ? "" : "At least ";
1631
+ const group = resolvedStats.group;
1632
+ const representativeItems = visibleStats?.items.slice(0, 4).map((item) => ({
1633
+ label: item.label,
1634
+ reason: effectiveReason,
1635
+ group,
1636
+ file: item.file,
1637
+ line: item.line,
1638
+ anchor_kind: item.anchor_kind,
1639
+ anchor_confidence: item.anchor_confidence
1640
+ })) ?? args.errorStatusLabels.slice(0, 4).map((label) => ({
1641
+ label,
1642
+ reason: effectiveReason,
1643
+ group,
1644
+ ...buildLabelAnchor(label)
1645
+ }));
1646
+ const envVar = effectiveReason.match(/^missing test env:\s+([A-Z][A-Z0-9_]{2,})$/)?.[1];
1647
+ let hint;
1648
+ if (envVar) {
1649
+ hint = `Set ${envVar} (or pass --pgtest-dsn) before rerunning DB-isolated tests.`;
1650
+ } else if (effectiveReason.startsWith("fixture guard:")) {
1651
+ hint = "Unblock the required fixture or setup guard before rerunning the affected tests.";
1652
+ } else if (effectiveReason.startsWith("db refused:")) {
1653
+ hint = "Start the expected test database or fix the DSN before rerunning DB-backed tests.";
1654
+ } else if (effectiveReason.startsWith("service unavailable:")) {
1655
+ hint = "Restore the unavailable service dependency before rerunning the affected tests.";
1656
+ } else if (effectiveReason.startsWith("auth bypass absent:")) {
1657
+ hint = "Configure the expected auth bypass or test auth fixture before rerunning the affected tests.";
1658
+ } else if (effectiveReason.startsWith("missing module:")) {
1659
+ hint = "Install the missing dependency and rerun the affected tests.";
1660
+ }
1661
+ let headline;
1662
+ if (envVar) {
1663
+ headline = `Shared blocker: ${atLeastPrefix}${countText} errors require ${envVar} for DB-isolated tests.`;
1664
+ } else if (effectiveReason.startsWith("fixture guard:")) {
1665
+ headline = `Shared blocker: ${atLeastPrefix}${countText} errors are gated by the same fixture/setup guard.`;
1666
+ } else if (effectiveReason.startsWith("db refused:")) {
1667
+ headline = `Shared blocker: ${atLeastPrefix}${countText} errors are caused by refused database connections.`;
1668
+ } else if (effectiveReason.startsWith("service unavailable:")) {
1669
+ headline = `Shared blocker: ${atLeastPrefix}${countText} errors are caused by an unavailable service dependency.`;
1670
+ } else if (effectiveReason.startsWith("auth bypass absent:")) {
1671
+ headline = `Shared blocker: ${atLeastPrefix}${countText} errors are caused by missing auth bypass setup.`;
1672
+ } else if (effectiveReason.startsWith("missing module:")) {
1673
+ const moduleName = effectiveReason.replace("missing module:", "").trim();
1674
+ headline = `Shared blocker: ${atLeastPrefix}${countText} errors are caused by missing module ${moduleName}.`;
1675
+ } else {
1676
+ headline = `Shared blocker: ${atLeastPrefix}${countText} errors share ${effectiveReason}.`;
1677
+ }
1678
+ return {
1679
+ type: bucketType,
1680
+ headline,
1681
+ countVisible,
1682
+ countClaimed,
1683
+ reason: effectiveReason,
1684
+ representativeItems,
1685
+ entities: envVar ? [envVar] : [],
1686
+ hint,
1687
+ confidence: countClaimed ? 0.95 : 0.75,
1688
+ summaryLines: [headline],
1689
+ overflowCount: Math.max((countClaimed ?? countVisible) - representativeItems.length, 0),
1690
+ overflowLabel: "failing tests/modules"
1691
+ };
1692
+ }
1693
+ function synthesizeImportDependencyBucket(args) {
1694
+ if (args.errors === 0) {
1695
+ return null;
1696
+ }
1697
+ const importItems = args.visibleErrorItems.filter((item) => item.reason.startsWith("missing module:"));
1698
+ if (importItems.length < 2) {
1699
+ return null;
1700
+ }
1701
+ const allVisibleErrorsAreImportRelated = args.visibleErrorItems.length > 0 && args.visibleErrorItems.every((item) => item.reason.startsWith("missing module:"));
1702
+ const countClaimed = allVisibleErrorsAreImportRelated && importItems.length >= 3 && args.errors >= importItems.length ? args.errors : void 0;
1703
+ const modules = Array.from(
1704
+ new Set(
1705
+ importItems.map((item) => item.reason.replace("missing module:", "").trim()).filter(Boolean)
1706
+ )
1707
+ ).slice(0, 6);
1708
+ const headlineCount = countClaimed ?? importItems.length;
1709
+ const headline = countClaimed ? `Import/dependency blocker: ${headlineCount} errors are caused by missing dependencies during test collection.` : `Import/dependency blocker: at least ${headlineCount} visible errors are caused by missing dependencies during test collection.`;
1710
+ const summaryLines = [headline];
1711
+ if (modules.length > 0) {
1712
+ summaryLines.push(`Missing modules include ${modules.join(", ")}.`);
1713
+ }
1714
+ return {
1715
+ type: "import_dependency_failure",
1716
+ headline,
1717
+ countVisible: importItems.length,
1718
+ countClaimed,
1719
+ reason: "missing dependencies during test collection",
1720
+ representativeItems: importItems.slice(0, 4).map((item) => ({
1721
+ label: item.label,
1722
+ reason: item.reason,
1723
+ group: item.group,
1724
+ file: item.file,
1725
+ line: item.line,
1726
+ anchor_kind: item.anchor_kind,
1727
+ anchor_confidence: item.anchor_confidence
1728
+ })),
1729
+ entities: modules,
1730
+ hint: modules.length === 1 ? `Install ${modules[0]} and rerun the affected tests.` : "Install the missing dependencies and rerun the affected tests.",
1731
+ confidence: countClaimed ? 0.95 : 0.8,
1732
+ summaryLines,
1733
+ overflowCount: Math.max((countClaimed ?? importItems.length) - Math.min(importItems.length, 4), 0),
1734
+ overflowLabel: "failing tests/modules"
1735
+ };
1736
+ }
1737
+ function isContractDriftLabel(label) {
1738
+ return /(freeze|snapshot|contract|manifest|openapi)/i.test(label);
1739
+ }
1740
+ function looksLikeTaskKey(value) {
1741
+ return /^[a-z]+(?:_[a-z0-9]+)+$/i.test(value) && !value.startsWith("/api/");
1742
+ }
1743
+ function looksLikeModelId(value) {
1744
+ return !value.startsWith("/api/") && /^[a-z0-9][a-z0-9._/-]*-[a-z0-9._-]+$/i.test(value);
1745
+ }
1746
+ function extractContractDriftEntities(input) {
1747
+ const apiPaths = [];
1748
+ const taskKeys = [];
1749
+ const modelIds = [];
1750
+ const snapshotKeys = [];
1751
+ for (const line of input.split("\n")) {
1752
+ const diffPathMatch = line.match(/^\s*(?:E\s+)?[+-]\s+'(\/api\/[^']+)'/);
1753
+ if (diffPathMatch) {
1754
+ const candidatePath = diffPathMatch[1].trim();
1755
+ if (candidatePath && !apiPaths.includes(candidatePath) && apiPaths.length < 6) {
1756
+ apiPaths.push(candidatePath);
1757
+ }
1758
+ }
1759
+ const diffMatch = line.match(/^\s*(?:E\s+)?[+-]\s+'([^']+)'[,]?\s*$/);
1760
+ if (!diffMatch) {
1761
+ continue;
1762
+ }
1763
+ const candidate = diffMatch[1].trim();
1764
+ if (!candidate) {
1765
+ continue;
1766
+ }
1767
+ if (candidate.startsWith("/api/")) {
1768
+ continue;
1769
+ }
1770
+ if (looksLikeModelId(candidate)) {
1771
+ if (!modelIds.includes(candidate) && modelIds.length < 6) {
1772
+ modelIds.push(candidate);
1773
+ }
1774
+ continue;
1775
+ }
1776
+ if (looksLikeTaskKey(candidate)) {
1777
+ if (!taskKeys.includes(candidate) && taskKeys.length < 6) {
1778
+ taskKeys.push(candidate);
1779
+ }
1780
+ continue;
1781
+ }
1782
+ if (!snapshotKeys.includes(candidate) && snapshotKeys.length < 6) {
1783
+ snapshotKeys.push(candidate);
1784
+ }
1785
+ }
1786
+ if (apiPaths.length === 0) {
1787
+ apiPaths.push(
1788
+ ...collectUniqueMatches(input, /['"](\/api\/[A-Za-z0-9_./{}:-]+)['"]/g, 6)
1789
+ );
1790
+ }
1791
+ return {
1792
+ apiPaths,
1793
+ modelIds,
1794
+ taskKeys,
1795
+ snapshotKeys
1796
+ };
1797
+ }
1798
+ function buildContractRepresentativeReason(args) {
1799
+ if (/openapi/i.test(args.label) && args.entities.apiPaths.length > 0) {
1800
+ const nextPath = args.entities.apiPaths.find((path4) => !args.usedPaths.has(path4)) ?? args.entities.apiPaths[0];
1801
+ args.usedPaths.add(nextPath);
1802
+ return `added path: ${nextPath}`;
1803
+ }
1804
+ if (/(feature|task|manifest|snapshot)/i.test(args.label) && args.entities.modelIds.length > 0) {
1805
+ const nextModel = args.entities.modelIds.find((modelId) => !args.usedModels.has(modelId)) ?? args.entities.modelIds[0];
1806
+ args.usedModels.add(nextModel);
1807
+ return `removed model: ${nextModel}`;
1808
+ }
1809
+ if (args.entities.snapshotKeys.length > 0) {
1810
+ return `snapshot content changed: ${args.entities.snapshotKeys[0]}`;
1811
+ }
1812
+ return "snapshot content changed";
1813
+ }
1814
+ function synthesizeContractDriftBucket(args) {
1815
+ const contractLabels = args.visibleFailedLabels.filter(isContractDriftLabel);
1816
+ if (contractLabels.length === 0) {
1817
+ return null;
1818
+ }
1819
+ const entities = extractContractDriftEntities(args.input);
1820
+ const usedPaths = /* @__PURE__ */ new Set();
1821
+ const usedModels = /* @__PURE__ */ new Set();
1822
+ const representativeItems = contractLabels.slice(0, 4).map((label) => ({
1823
+ label,
1824
+ reason: buildContractRepresentativeReason({
1825
+ label,
1826
+ entities,
1827
+ usedPaths,
1828
+ usedModels
1829
+ }),
1830
+ group: "contract drift",
1831
+ ...buildLabelAnchor(label)
1832
+ }));
1833
+ const summaryLines = [
1834
+ `Contract drift: ${formatCount2(contractLabels.length, "freeze test")} ${contractLabels.length === 1 ? "is" : "are"} out of sync with current API/model state.`
1835
+ ];
1836
+ if (entities.apiPaths.length > 0 && entities.modelIds.length > 0) {
1837
+ summaryLines.push(
1838
+ `Contract drift includes ${formatCount2(entities.apiPaths.length, "added API path")} and removed model ids such as ${entities.modelIds.slice(0, 3).join(", ")}.`
1839
+ );
1840
+ } else if (entities.apiPaths.length > 0) {
1841
+ summaryLines.push(
1842
+ `OpenAPI drift includes ${formatCount2(entities.apiPaths.length, "added API path")}.`
1843
+ );
1844
+ } else if (entities.modelIds.length > 0) {
1845
+ summaryLines.push(
1846
+ `Snapshot drift includes removed model ids such as ${entities.modelIds.slice(0, 3).join(", ")}.`
1847
+ );
1848
+ }
1849
+ const explicitCommand = args.input.match(/python\s+scripts\/update_contract_snapshots\.py/);
1850
+ const hint = explicitCommand ? `If these changes are intentional, run ${explicitCommand[0]} and rerun the freeze tests.` : "If these API/model changes are intentional, regenerate the contract snapshots and rerun the freeze tests.";
1851
+ return {
1852
+ type: "contract_snapshot_drift",
1853
+ headline: summaryLines[0],
1854
+ countVisible: contractLabels.length,
1855
+ countClaimed: contractLabels.length,
1856
+ reason: "freeze snapshots are out of sync with current API/model state",
1857
+ representativeItems,
1858
+ entities: [...entities.apiPaths, ...entities.modelIds, ...entities.taskKeys, ...entities.snapshotKeys].slice(0, 6),
1859
+ hint,
1860
+ confidence: entities.apiPaths.length > 0 || entities.modelIds.length > 0 ? 0.95 : 0.7,
1861
+ summaryLines,
1862
+ overflowCount: Math.max(
1863
+ [...entities.apiPaths, ...entities.modelIds, ...entities.taskKeys, ...entities.snapshotKeys].slice(0, 6).length - representativeItems.length,
1864
+ 0
1865
+ ),
1866
+ overflowLabel: "changed entities"
1867
+ };
1868
+ }
1869
+ function analyzeTestStatus(input) {
1870
+ const passed = getCount(input, "passed");
1871
+ const failed = getCount(input, "failed");
1872
+ const errors = Math.max(getCount(input, "errors"), getCount(input, "error"));
1873
+ const skipped = getCount(input, "skipped");
1874
+ const collectionErrors = input.match(/(\d+)\s+errors?\s+during collection/i);
1875
+ const noTestsCollected = /\bcollected\s+0\s+items\b/i.test(input) || /\bno tests ran\b/i.test(input);
1876
+ const interrupted = /\binterrupted\b/i.test(input) || /\bKeyboardInterrupt\b/i.test(input);
1877
+ const collectionItems = chooseStrongestFailureItems(collectCollectionFailureItems(input));
1878
+ const inlineItems = chooseStrongestFailureItems(collectInlineFailureItems(input));
1879
+ const visibleErrorItems = chooseStrongestStatusFailureItems([
1880
+ ...collectionItems.map((item) => ({
1881
+ ...item,
1882
+ status: "error"
1883
+ })),
1884
+ ...collectInlineFailureItemsWithStatus(input).filter((item) => item.status === "error")
1885
+ ]);
1886
+ const labels = collectFailureLabels(input);
1887
+ const visibleErrorLabels = labels.filter((item) => item.status === "error").map((item) => item.label);
1888
+ const visibleFailedLabels = labels.filter((item) => item.status === "failed").map((item) => item.label);
1889
+ const buckets = [];
1890
+ const sharedBlocker = synthesizeSharedBlockerBucket({
1891
+ input,
1892
+ errors,
1893
+ visibleErrorItems,
1894
+ errorStatusLabels: visibleErrorLabels
1895
+ });
1896
+ if (sharedBlocker) {
1897
+ buckets.push(sharedBlocker);
1898
+ }
1899
+ if (!sharedBlocker) {
1900
+ const importDependencyBucket = synthesizeImportDependencyBucket({
1901
+ errors,
1902
+ visibleErrorItems
1903
+ });
1904
+ if (importDependencyBucket) {
1905
+ buckets.push(importDependencyBucket);
1906
+ }
1907
+ }
1908
+ const contractDrift = synthesizeContractDriftBucket({
1909
+ input,
1910
+ visibleFailedLabels
1911
+ });
1912
+ if (contractDrift) {
1913
+ buckets.push(contractDrift);
1914
+ }
1915
+ return {
1916
+ passed,
1917
+ failed,
1918
+ errors,
1919
+ skipped,
1920
+ noTestsCollected,
1921
+ interrupted,
1922
+ collectionErrorCount: collectionErrors ? Number(collectionErrors[1]) : void 0,
1923
+ inlineItems,
1924
+ collectionItems,
1925
+ visibleErrorLabels,
1926
+ visibleFailedLabels,
1927
+ visibleErrorItems,
1928
+ buckets
1929
+ };
1930
+ }
1931
+ function testStatusHeuristic(input, detail = "standard") {
1932
+ const normalized = input.trim();
1933
+ if (normalized === "") {
1934
+ return null;
1935
+ }
1936
+ const analysis = analyzeTestStatus(input);
1937
+ if (analysis.collectionErrorCount) {
1938
+ if (analysis.collectionItems.length > 0 || analysis.buckets.length > 0) {
1939
+ const decision = buildTestStatusDiagnoseContract({
1940
+ input,
1941
+ analysis
1942
+ });
1943
+ if (detail === "verbose") {
1944
+ return decision.verboseText;
1945
+ }
1946
+ if (detail === "focused") {
1947
+ return decision.focusedText;
1948
+ }
1949
+ return decision.standardText;
1950
+ }
1951
+ return [
1952
+ "- Tests did not complete.",
1953
+ `- ${formatCount2(analysis.collectionErrorCount, "error")} occurred during collection.`,
1954
+ ...summarizeRepeatedTestCauses(input, {
1955
+ duringCollection: true
1956
+ })
1957
+ ].join("\n");
1958
+ }
1959
+ if (analysis.noTestsCollected) {
1960
+ return ["- Tests did not run.", "- Collected 0 items."].join("\n");
1961
+ }
1962
+ if (analysis.interrupted && analysis.failed === 0 && analysis.errors === 0) {
1963
+ return "- Test run was interrupted.";
1964
+ }
1965
+ if (analysis.failed === 0 && analysis.errors === 0 && analysis.passed > 0) {
1966
+ const details = [formatCount2(analysis.passed, "test")];
1967
+ if (analysis.skipped > 0) {
1968
+ details.push(formatCount2(analysis.skipped, "skip"));
1969
+ }
1970
+ return ["- Tests passed.", `- ${details.join(", ")}.`].join("\n");
1971
+ }
1972
+ if (analysis.failed > 0 || analysis.errors > 0 || analysis.inlineItems.length > 0 || analysis.buckets.length > 0) {
1973
+ const decision = buildTestStatusDiagnoseContract({
1974
+ input,
1975
+ analysis
1976
+ });
1977
+ if (detail === "verbose") {
1978
+ return decision.verboseText;
1979
+ }
1980
+ if (detail === "focused") {
1981
+ return decision.focusedText;
1982
+ }
1983
+ return decision.standardText;
1984
+ }
1985
+ return null;
1986
+ }
1987
+ function auditCriticalHeuristic(input) {
1988
+ const vulnerabilities = input.split("\n").map((line) => line.trim()).filter(Boolean).map((line) => {
1989
+ if (!/\b(critical|high)\b/i.test(line)) {
1990
+ return null;
1991
+ }
1992
+ const pkg = inferPackage(line);
1993
+ if (!pkg) {
1994
+ return null;
1995
+ }
1996
+ return {
1997
+ package: pkg,
1998
+ severity: inferSeverity(line),
1999
+ remediation: inferRemediation(pkg)
2000
+ };
2001
+ }).filter((item) => item !== null);
2002
+ if (vulnerabilities.length === 0) {
2003
+ return null;
2004
+ }
2005
+ const firstVulnerability = vulnerabilities[0];
2006
+ return JSON.stringify(
2007
+ {
2008
+ status: "ok",
2009
+ vulnerabilities,
2010
+ summary: vulnerabilities.length === 1 ? `One ${firstVulnerability.severity} vulnerability found in ${firstVulnerability.package}.` : `${vulnerabilities.length} high or critical vulnerabilities found in the provided input.`
2011
+ },
2012
+ null,
2013
+ 2
2014
+ );
2015
+ }
2016
+ function infraRiskHeuristic(input) {
2017
+ const zeroDestructiveEvidence = input.split("\n").map((line) => line.trim()).filter((line) => line.length > 0 && ZERO_DESTRUCTIVE_SUMMARY_PATTERN.test(line)).slice(0, 3);
2018
+ const riskEvidence = input.split("\n").map((line) => line.trim()).filter(
2019
+ (line) => line.length > 0 && RISK_LINE_PATTERN.test(line) && !ZERO_DESTRUCTIVE_SUMMARY_PATTERN.test(line)
2020
+ ).slice(0, 3);
2021
+ if (riskEvidence.length > 0) {
2022
+ return JSON.stringify(
2023
+ {
2024
+ verdict: "fail",
2025
+ reason: "Destructive or clearly risky infrastructure change signals are present.",
2026
+ evidence: riskEvidence
2027
+ },
2028
+ null,
2029
+ 2
2030
+ );
2031
+ }
2032
+ if (zeroDestructiveEvidence.length > 0) {
2033
+ return JSON.stringify(
2034
+ {
2035
+ verdict: "pass",
2036
+ reason: "The provided input explicitly indicates zero destructive changes.",
2037
+ evidence: zeroDestructiveEvidence
2038
+ },
2039
+ null,
2040
+ 2
2041
+ );
2042
+ }
2043
+ const safeEvidence = collectEvidence(input, SAFE_LINE_PATTERN);
2044
+ if (safeEvidence.length > 0) {
2045
+ return JSON.stringify(
2046
+ {
2047
+ verdict: "pass",
2048
+ reason: "The provided input explicitly indicates no risky infrastructure changes.",
2049
+ evidence: safeEvidence
2050
+ },
2051
+ null,
2052
+ 2
2053
+ );
2054
+ }
2055
+ return null;
2056
+ }
2057
+ function applyHeuristicPolicy(policyName, input, detail) {
2058
+ if (!policyName) {
2059
+ return null;
2060
+ }
2061
+ if (policyName === "audit-critical") {
2062
+ return auditCriticalHeuristic(input);
2063
+ }
2064
+ if (policyName === "infra-risk") {
2065
+ return infraRiskHeuristic(input);
2066
+ }
2067
+ if (policyName === "test-status") {
2068
+ return testStatusHeuristic(input, detail);
2069
+ }
2070
+ return null;
2071
+ }
2072
+
2073
+ // src/core/insufficient.ts
2074
+ function isInsufficientSignalOutput(output) {
2075
+ const trimmed = output.trim();
2076
+ return trimmed === INSUFFICIENT_SIGNAL_TEXT || trimmed.startsWith(`${INSUFFICIENT_SIGNAL_TEXT}
2077
+ Hint:`);
2078
+ }
2079
+ function buildInsufficientSignalOutput(input) {
2080
+ let hint;
2081
+ if (input.originalLength === 0) {
2082
+ hint = "Hint: no command output was captured.";
2083
+ } else if (input.truncatedApplied) {
2084
+ hint = "Hint: captured output was truncated before a clear summary was found.";
2085
+ } else if (input.presetName === "test-status" && input.exitCode === 0) {
2086
+ hint = "Hint: command succeeded, but no recognizable test summary was found.";
2087
+ } else if (input.presetName === "test-status" && typeof input.exitCode === "number") {
2088
+ hint = "Hint: command failed, but the captured output did not include a recognizable test summary.";
2089
+ } else {
2090
+ hint = "Hint: the captured output did not contain a clear answer for this preset.";
2091
+ }
2092
+ return `${INSUFFICIENT_SIGNAL_TEXT}
2093
+ ${hint}`;
2094
+ }
2095
+
2096
+ // src/core/run.ts
2097
+ import pc from "picocolors";
2098
+
2099
+ // src/providers/systemInstruction.ts
2100
+ var REDUCTION_SYSTEM_INSTRUCTION = "You reduce noisy command output into compact answers for agents and automation.";
2101
+
2102
+ // src/providers/openai.ts
2103
+ function usesNativeJsonResponseFormat(mode) {
2104
+ return mode !== "off";
2105
+ }
2106
+ function extractResponseText(payload) {
2107
+ if (typeof payload?.output_text === "string") {
2108
+ return payload.output_text.trim();
2109
+ }
2110
+ if (!Array.isArray(payload?.output)) {
2111
+ return "";
2112
+ }
2113
+ return payload.output.flatMap((item) => Array.isArray(item?.content) ? item.content : []).map((item) => item?.type === "output_text" ? item.text : "").filter((text) => typeof text === "string" && text.trim().length > 0).join("").trim();
2114
+ }
2115
+ async function buildOpenAIError(response) {
2116
+ let detail = `Provider returned HTTP ${response.status}`;
2117
+ try {
2118
+ const data = await response.json();
2119
+ const message = data?.error?.message;
2120
+ if (typeof message === "string" && message.trim().length > 0) {
2121
+ detail = `${detail}: ${message.trim()}`;
2122
+ }
2123
+ } catch {
2124
+ }
2125
+ return new Error(detail);
2126
+ }
2127
+ var OpenAIProvider = class {
2128
+ name = "openai";
2129
+ baseUrl;
2130
+ apiKey;
2131
+ constructor(options) {
2132
+ this.baseUrl = options.baseUrl.replace(/\/$/, "");
2133
+ this.apiKey = options.apiKey;
2134
+ }
2135
+ async generate(input) {
2136
+ const controller = new AbortController();
2137
+ const timeout = setTimeout(() => controller.abort(), input.timeoutMs);
2138
+ try {
2139
+ const url = new URL("responses", `${this.baseUrl}/`);
2140
+ const response = await fetch(url, {
2141
+ method: "POST",
2142
+ signal: controller.signal,
2143
+ headers: {
2144
+ "content-type": "application/json",
2145
+ ...this.apiKey ? { authorization: `Bearer ${this.apiKey}` } : {}
2146
+ },
2147
+ body: JSON.stringify({
2148
+ model: input.model,
2149
+ instructions: REDUCTION_SYSTEM_INSTRUCTION,
2150
+ input: input.prompt,
2151
+ reasoning: {
2152
+ effort: "minimal"
2153
+ },
2154
+ text: {
2155
+ verbosity: "low",
2156
+ ...input.responseMode === "json" && usesNativeJsonResponseFormat(input.jsonResponseFormat) ? {
2157
+ format: {
2158
+ type: "json_object"
2159
+ }
2160
+ } : {}
2161
+ },
2162
+ max_output_tokens: input.maxOutputTokens
2163
+ })
2164
+ });
2165
+ if (!response.ok) {
2166
+ throw await buildOpenAIError(response);
2167
+ }
2168
+ const data = await response.json();
2169
+ const text = extractResponseText(data);
2170
+ if (!text) {
2171
+ throw new Error("Provider returned an empty response");
2172
+ }
2173
+ const result = {
2174
+ text,
2175
+ usage: data?.usage ? {
2176
+ inputTokens: data.usage.input_tokens,
2177
+ outputTokens: data.usage.output_tokens,
2178
+ totalTokens: data.usage.total_tokens
2179
+ } : void 0,
2180
+ raw: data
2181
+ };
2182
+ clearTimeout(timeout);
2183
+ return result;
2184
+ } catch (error) {
2185
+ clearTimeout(timeout);
2186
+ if (error.name === "AbortError") {
2187
+ throw new Error("Provider request timed out");
2188
+ }
2189
+ throw error;
2190
+ }
2191
+ }
2192
+ };
2193
+
2194
+ // src/providers/openaiCompatible.ts
2195
+ function supportsNativeJsonResponseFormat(baseUrl, mode) {
2196
+ if (mode === "off") {
2197
+ return false;
2198
+ }
2199
+ if (mode === "on") {
2200
+ return true;
2201
+ }
2202
+ return /^https:\/\/api\.openai\.com(?:\/|$)/i.test(baseUrl);
2203
+ }
2204
+ function extractMessageText(payload) {
2205
+ const content = payload?.choices?.[0]?.message?.content;
2206
+ if (typeof content === "string") {
2207
+ return content;
2208
+ }
2209
+ if (Array.isArray(content)) {
2210
+ return content.map((item) => typeof item?.text === "string" ? item.text : "").join("").trim();
2211
+ }
2212
+ return "";
2213
+ }
2214
+ async function buildOpenAICompatibleError(response) {
2215
+ let detail = `Provider returned HTTP ${response.status}`;
2216
+ try {
2217
+ const data = await response.json();
2218
+ const message = data?.error?.message;
2219
+ if (typeof message === "string" && message.trim().length > 0) {
2220
+ detail = `${detail}: ${message.trim()}`;
2221
+ }
2222
+ } catch {
2223
+ }
2224
+ return new Error(detail);
2225
+ }
2226
+ var OpenAICompatibleProvider = class {
2227
+ name = "openai-compatible";
2228
+ baseUrl;
2229
+ apiKey;
2230
+ constructor(options) {
2231
+ this.baseUrl = options.baseUrl.replace(/\/$/, "");
2232
+ this.apiKey = options.apiKey;
2233
+ }
2234
+ async generate(input) {
2235
+ const controller = new AbortController();
2236
+ const timeout = setTimeout(() => controller.abort(), input.timeoutMs);
2237
+ try {
2238
+ const url = new URL("chat/completions", `${this.baseUrl}/`);
2239
+ const response = await fetch(url, {
2240
+ method: "POST",
2241
+ signal: controller.signal,
2242
+ headers: {
2243
+ "content-type": "application/json",
2244
+ ...this.apiKey ? { authorization: `Bearer ${this.apiKey}` } : {}
2245
+ },
2246
+ body: JSON.stringify({
2247
+ model: input.model,
2248
+ temperature: input.temperature,
2249
+ max_tokens: input.maxOutputTokens,
2250
+ ...input.responseMode === "json" && supportsNativeJsonResponseFormat(this.baseUrl, input.jsonResponseFormat) ? { response_format: { type: "json_object" } } : {},
2251
+ messages: [
2252
+ {
2253
+ role: "system",
2254
+ content: REDUCTION_SYSTEM_INSTRUCTION
2255
+ },
2256
+ {
2257
+ role: "user",
2258
+ content: input.prompt
2259
+ }
2260
+ ]
2261
+ })
2262
+ });
2263
+ if (!response.ok) {
2264
+ throw await buildOpenAICompatibleError(response);
2265
+ }
2266
+ const data = await response.json();
2267
+ const text = extractMessageText(data);
2268
+ if (!text.trim()) {
2269
+ throw new Error("Provider returned an empty response");
2270
+ }
2271
+ const result = {
2272
+ text,
2273
+ usage: data?.usage ? {
2274
+ inputTokens: data.usage.prompt_tokens,
2275
+ outputTokens: data.usage.completion_tokens,
2276
+ totalTokens: data.usage.total_tokens
2277
+ } : void 0,
2278
+ raw: data
2279
+ };
2280
+ clearTimeout(timeout);
2281
+ return result;
2282
+ } catch (error) {
2283
+ clearTimeout(timeout);
2284
+ if (error.name === "AbortError") {
2285
+ throw new Error("Provider request timed out");
2286
+ }
2287
+ throw error;
2288
+ }
2289
+ }
2290
+ };
2291
+
2292
+ // src/providers/factory.ts
2293
+ function createProvider(config) {
2294
+ if (config.provider.provider === "openai") {
2295
+ return new OpenAIProvider({
2296
+ baseUrl: config.provider.baseUrl,
2297
+ apiKey: config.provider.apiKey
2298
+ });
2299
+ }
2300
+ if (config.provider.provider === "openai-compatible") {
2301
+ return new OpenAICompatibleProvider({
2302
+ baseUrl: config.provider.baseUrl,
2303
+ apiKey: config.provider.apiKey
2304
+ });
2305
+ }
2306
+ throw new Error(`Unsupported provider: ${config.provider.provider}`);
2307
+ }
2308
+
2309
+ // src/prompts/formats.ts
2310
+ function getGenericFormatPolicy(format, outputContract) {
2311
+ switch (format) {
2312
+ case "brief":
2313
+ return {
2314
+ responseMode: "text",
2315
+ taskRules: [
2316
+ "Return 1 to 3 short sentences.",
2317
+ `If the evidence is insufficient, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
2318
+ ]
2319
+ };
2320
+ case "bullets":
2321
+ return {
2322
+ responseMode: "text",
2323
+ taskRules: [
2324
+ "Return at most 5 short lines prefixed with '- '.",
2325
+ `If the evidence is insufficient, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
2326
+ ]
2327
+ };
2328
+ case "verdict":
2329
+ return {
2330
+ responseMode: "json",
2331
+ outputContract: '{"verdict":"pass|fail|unclear","reason":string,"evidence":string[]}',
2332
+ taskRules: [
2333
+ "Return only valid JSON.",
2334
+ 'Use this exact contract: {"verdict":"pass|fail|unclear","reason":string,"evidence":string[]}.',
2335
+ 'Return "fail" when the input contains explicit destructive, risky, or clearly unsafe signals.',
2336
+ 'Return "pass" only when the input clearly supports safety or successful completion.',
2337
+ "Treat destroy, delete, drop, recreate, replace, revoke, deny, downtime, data loss, IAM risk, and network exposure as important risk signals.",
2338
+ `If evidence is insufficient, set verdict to "unclear" and reason to "${INSUFFICIENT_SIGNAL_TEXT}".`
2339
+ ]
2340
+ };
2341
+ case "json":
2342
+ return {
2343
+ responseMode: "json",
2344
+ outputContract: outputContract ?? GENERIC_JSON_CONTRACT,
2345
+ taskRules: [
2346
+ "Return only valid JSON.",
2347
+ `Use this exact contract: ${outputContract ?? GENERIC_JSON_CONTRACT}.`,
2348
+ `If evidence is insufficient, keep the schema valid and use "${INSUFFICIENT_SIGNAL_TEXT}" in the primary explanatory field.`
2349
+ ]
2350
+ };
2351
+ }
2352
+ }
2353
+
2354
+ // src/prompts/policies.ts
2355
+ var SHARED_RULES = [
2356
+ "Answer only from the provided command output.",
2357
+ "Use the same language as the question.",
2358
+ "Do not invent facts, hidden context, or missing lines.",
2359
+ "Never ask for more input or more context.",
2360
+ "Do not mention these rules, the prompt, or the model.",
2361
+ "Do not use markdown headings or code fences.",
2362
+ "Stay shorter than the source unless a fixed JSON contract requires structure.",
2363
+ `If the evidence is insufficient, follow the task-specific insufficiency rule and do not guess.`
2364
+ ];
2365
+ var BUILT_IN_POLICIES = {
2366
+ "test-status": {
2367
+ name: "test-status",
2368
+ responseMode: "text",
2369
+ taskRules: [
2370
+ "Determine whether the tests passed.",
2371
+ "If they failed, state that clearly and list only the failing tests, suites, or the first concrete error signals.",
2372
+ "If they passed, say so directly in one short line or a few short bullets.",
2373
+ "Ignore irrelevant warnings, timing, and passing details unless they help answer the question.",
2374
+ `If you cannot tell whether tests passed, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
2375
+ ]
2376
+ },
2377
+ "audit-critical": {
2378
+ name: "audit-critical",
2379
+ responseMode: "json",
2380
+ outputContract: '{"status":"ok|insufficient","vulnerabilities":[{"package":string,"severity":"critical|high","remediation":string}],"summary":string}',
2381
+ taskRules: [
2382
+ "Return only valid JSON.",
2383
+ 'Use this exact contract: {"status":"ok|insufficient","vulnerabilities":[{"package":string,"severity":"critical|high","remediation":string}],"summary":string}.',
2384
+ "Extract only vulnerabilities explicitly marked high or critical in the input.",
2385
+ "Treat sparse lines like 'lodash: critical vulnerability' or 'axios: high severity advisory' as sufficient evidence when package and severity are explicit.",
2386
+ "Do not invent package names, severities, CVEs, or remediations.",
2387
+ 'If the input clearly contains no qualifying vulnerabilities, return {"status":"ok","vulnerabilities":[],"summary":"No high or critical vulnerabilities found in the provided input."}.',
2388
+ `If the input does not provide enough evidence to determine vulnerability status, return status "insufficient" and use "${INSUFFICIENT_SIGNAL_TEXT}" in summary.`
2389
+ ]
2390
+ },
2391
+ "diff-summary": {
2392
+ name: "diff-summary",
2393
+ responseMode: "json",
2394
+ outputContract: '{"status":"ok|insufficient","answer":string,"evidence":string[],"risks":string[]}',
2395
+ taskRules: [
2396
+ "Return only valid JSON.",
2397
+ 'Use this exact contract: {"status":"ok|insufficient","answer":string,"evidence":string[],"risks":string[]}.',
2398
+ "Summarize what changed at a high level, grounded only in the visible diff or output.",
2399
+ "Evidence should cite the most important visible files, modules, resources, or actions.",
2400
+ "Risks should include migrations, config changes, security changes, destructive actions, or unknown impact when visible.",
2401
+ `If the change signal is incomplete, return status "insufficient" and use "${INSUFFICIENT_SIGNAL_TEXT}" in answer.`
2402
+ ]
2403
+ },
2404
+ "build-failure": {
2405
+ name: "build-failure",
2406
+ responseMode: "text",
2407
+ taskRules: [
2408
+ "Identify the most likely root cause of the build failure.",
2409
+ "Give the first concrete fix or next step in the same answer.",
2410
+ "Keep the response to 1 or 2 short sentences.",
2411
+ `If the root cause is not visible, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
2412
+ ]
2413
+ },
2414
+ "log-errors": {
2415
+ name: "log-errors",
2416
+ responseMode: "text",
2417
+ taskRules: [
2418
+ "Return at most 5 short bullet points.",
2419
+ "Extract only the most relevant error or failure signals.",
2420
+ "Prefer recurring or top-level errors over long stack traces.",
2421
+ "Do not dump full traces unless a single trace line is the key signal.",
2422
+ `If there is no clear error signal, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
2423
+ ]
2424
+ },
2425
+ "typecheck-summary": {
2426
+ name: "typecheck-summary",
2427
+ responseMode: "text",
2428
+ taskRules: [
2429
+ "Return at most 5 short bullet points.",
2430
+ "Determine whether the typecheck failed or passed.",
2431
+ "Group repeated diagnostics into root-cause buckets instead of echoing many duplicate lines.",
2432
+ "Mention the first concrete files, symbols, or error categories to fix when they are visible.",
2433
+ "Prefer compiler or type-system errors over timing, progress, or summary noise.",
2434
+ "If the output clearly indicates success, say that briefly and do not add extra bullets.",
2435
+ `If you cannot tell whether the typecheck failed, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
2436
+ ]
2437
+ },
2438
+ "lint-failures": {
2439
+ name: "lint-failures",
2440
+ responseMode: "text",
2441
+ taskRules: [
2442
+ "Return at most 5 short bullet points.",
2443
+ "Determine whether lint failed or whether there are no blocking lint failures.",
2444
+ "Group repeated rule violations instead of listing the same rule many times.",
2445
+ "Mention the top offending files and rule names when they are visible.",
2446
+ "Distinguish blocking failures from warnings only when that distinction is clearly visible in the input.",
2447
+ "Do not invent autofixability; only mention autofix or --fix support when the tool output explicitly says so.",
2448
+ "If the output clearly indicates success or no blocking failures, say that briefly and stop.",
2449
+ `If there is not enough evidence to determine the lint result, reply exactly with: ${INSUFFICIENT_SIGNAL_TEXT}`
2450
+ ]
2451
+ },
2452
+ "infra-risk": {
2453
+ name: "infra-risk",
2454
+ responseMode: "json",
2455
+ outputContract: '{"verdict":"pass|fail|unclear","reason":string,"evidence":string[]}',
2456
+ taskRules: [
2457
+ "Return only valid JSON.",
2458
+ 'Use this exact contract: {"verdict":"pass|fail|unclear","reason":string,"evidence":string[]}.',
2459
+ 'Return "fail" when the input contains explicit destructive or clearly risky signals such as destroy, delete, drop, recreate, replace, revoke, deny, downtime, data loss, IAM risk, or network exposure.',
2460
+ 'Treat short plan summaries like "1 to destroy" or "resources to destroy" as enough evidence for "fail".',
2461
+ 'Return "pass" only when the input clearly shows no risky changes or explicitly safe behavior.',
2462
+ 'Return "unclear" when the input is incomplete, ambiguous, or does not show enough evidence to judge safety.',
2463
+ "Evidence should contain the shortest concrete lines or phrases that justify the verdict."
2464
+ ]
2465
+ }
2466
+ };
2467
+ function resolvePromptPolicy(args) {
2468
+ if (args.policyName === "test-status" && args.goal === "diagnose") {
2469
+ return {
2470
+ name: "test-status",
2471
+ responseMode: args.format === "json" ? "json" : "text",
2472
+ outputContract: args.format === "json" ? args.outputContract ?? TEST_STATUS_DIAGNOSE_JSON_CONTRACT : void 0,
2473
+ sharedRules: SHARED_RULES,
2474
+ taskRules: args.format === "json" ? [
2475
+ "Return only valid JSON.",
2476
+ `Use this exact contract: ${args.outputContract ?? TEST_STATUS_DIAGNOSE_JSON_CONTRACT}.`,
2477
+ "Treat the heuristic context as extraction guidance, but do not invent hidden failures.",
2478
+ "Use the heuristic extract as the bucket truth unless the visible command output clearly disproves it.",
2479
+ "Identify the dominant blocker, remaining visible failure buckets, the decision, and the next best action.",
2480
+ "Set diagnosis_complete to true only when the visible output is already sufficient to stop and act.",
2481
+ "Set raw_needed to true only when exact traceback lines are still required.",
2482
+ "Set provider_confidence to a number between 0 and 1, or null only when confidence cannot be estimated."
2483
+ ] : [
2484
+ "Produce a decision-complete diagnosis.",
2485
+ "Name the main failure buckets, include counts and dominant root cause, and end with an explicit Decision line plus an explicit stop signal.",
2486
+ "Prefer blocker-first ordering and keep evidence budget small.",
2487
+ "Do not ask for more context."
2488
+ ]
2489
+ };
2490
+ }
2491
+ if (args.policyName) {
2492
+ const policy = BUILT_IN_POLICIES[args.policyName];
2493
+ return {
2494
+ ...policy,
2495
+ sharedRules: SHARED_RULES
2496
+ };
2497
+ }
2498
+ const genericPolicy = getGenericFormatPolicy(args.format, args.outputContract);
2499
+ return {
2500
+ name: `generic-${args.format}`,
2501
+ responseMode: genericPolicy.responseMode,
2502
+ outputContract: genericPolicy.outputContract,
2503
+ sharedRules: SHARED_RULES,
2504
+ taskRules: genericPolicy.taskRules
2505
+ };
2506
+ }
2507
+
2508
+ // src/prompts/buildPrompt.ts
2509
+ function buildPrompt(args) {
2510
+ const policy = resolvePromptPolicy({
2511
+ format: args.format,
2512
+ goal: args.goal,
2513
+ policyName: args.policyName,
2514
+ outputContract: args.outputContract
2515
+ });
2516
+ const detailRules = args.policyName === "test-status" && args.detail === "focused" ? [
2517
+ "Use a focused failure view.",
2518
+ "When the output clearly maps failures to specific tests or modules, group them by dominant error type first.",
2519
+ "Within each error group, prefer compact bullets in the form '- test-or-module -> dominant reason'.",
2520
+ "Cap focused entries at 6 per error group and end with '- and N more failing modules' if more clear mappings are visible.",
2521
+ "If per-test or per-module mapping is unclear, fall back to grouped root causes instead of guessing."
2522
+ ] : args.policyName === "test-status" && args.detail === "verbose" ? [
2523
+ "Use a verbose failure view.",
2524
+ "When the output clearly maps failures to specific tests or modules, list each visible failing test or module on its own line in the form '- test-or-module -> normalized reason'.",
2525
+ "Preserve the original file or module order when the mapping is visible.",
2526
+ "Prefer concrete normalized reasons such as missing modules or assertion failures over traceback plumbing.",
2527
+ "If per-test or per-module mapping is unclear, fall back to the focused grouped-cause view instead of guessing."
2528
+ ] : [];
2529
+ const prompt = [
2530
+ "You are Sift, a CLI output reduction assistant for downstream agents and automation.",
2531
+ "Hard rules:",
2532
+ ...policy.sharedRules.map((rule) => `- ${rule}`),
2533
+ "",
2534
+ `Goal: ${args.goal ?? "summarize"}`,
2535
+ "",
2536
+ `Task policy: ${policy.name}`,
2537
+ ...policy.taskRules.map((rule) => `- ${rule}`),
2538
+ ...detailRules.map((rule) => `- ${rule}`),
2539
+ ...policy.outputContract ? ["", `Output contract: ${policy.outputContract}`] : [],
2540
+ ...args.analysisContext ? ["", "Visible heuristic context:", '"""', args.analysisContext, '"""'] : [],
2541
+ "",
2542
+ `Question: ${args.question}`,
2543
+ "",
2544
+ "Command output:",
2545
+ '"""',
2546
+ args.input,
2547
+ '"""'
2548
+ ].join("\n");
2549
+ return {
2550
+ prompt,
2551
+ responseMode: policy.responseMode
2552
+ };
2553
+ }
2554
+
2555
+ // src/core/quality.ts
2556
+ var META_PATTERNS = [
2557
+ /please provide/i,
2558
+ /need more (?:input|context|information|details)/i,
2559
+ /provided command output/i,
2560
+ /based on the provided/i,
2561
+ /as an ai/i,
2562
+ /here(?:'s| is) (?:the )?(?:json|answer)/i,
2563
+ /cannot determine without/i
2564
+ ];
2565
+ function normalizeForComparison(input) {
2566
+ return input.replace(/\r\n/g, "\n").replace(/\r/g, "\n").replace(/\s+/g, " ").trim();
2567
+ }
2568
+ function isRetriableReason(reason) {
2569
+ return /timed out|http 408|http 409|http 425|http 429|http 5\d\d|network/i.test(
2570
+ reason.toLowerCase()
2571
+ );
887
2572
  }
888
- function testStatusHeuristic(input, detail = "standard") {
889
- const normalized = input.trim();
890
- if (normalized === "") {
891
- return null;
2573
+ function looksLikeRejectedModelOutput(args) {
2574
+ const source = normalizeForComparison(args.source);
2575
+ const candidate = normalizeForComparison(args.candidate);
2576
+ if (!candidate) {
2577
+ return true;
892
2578
  }
893
- const passed = getCount(input, "passed");
894
- const failed = getCount(input, "failed");
895
- const errors = Math.max(
896
- getCount(input, "errors"),
897
- getCount(input, "error")
898
- );
899
- const skipped = getCount(input, "skipped");
900
- const collectionErrors = input.match(/(\d+)\s+errors?\s+during collection/i);
901
- const noTestsCollected = /\bcollected\s+0\s+items\b/i.test(input) || /\bno tests ran\b/i.test(input);
902
- const interrupted = /\binterrupted\b/i.test(input) || /\bKeyboardInterrupt\b/i.test(input);
903
- const inlineItems = collectInlineFailureItems(input);
904
- if (collectionErrors) {
905
- const count = Number(collectionErrors[1]);
906
- const items = chooseStrongestFailureItems(collectCollectionFailureItems(input));
907
- if (detail === "verbose") {
908
- if (items.length > 0) {
909
- return [
910
- "- Tests did not complete.",
911
- `- ${formatCount(count, "error")} occurred during collection.`,
912
- ...formatVerboseFailureItems({
913
- items
914
- })
915
- ].join("\n");
916
- }
917
- }
918
- if (detail === "focused") {
919
- if (items.length > 0) {
920
- const groupedLines = formatFocusedFailureGroups({
921
- items,
922
- remainderLabel: "modules"
923
- });
924
- if (groupedLines.length > 0) {
925
- return [
926
- "- Tests did not complete.",
927
- `- ${formatCount(count, "error")} occurred during collection.`,
928
- ...groupedLines
929
- ].join("\n");
930
- }
931
- }
932
- }
933
- const causes = summarizeRepeatedTestCauses(input, {
934
- duringCollection: true
935
- });
936
- return [
937
- "- Tests did not complete.",
938
- `- ${formatCount(count, "error")} occurred during collection.`,
939
- ...causes
940
- ].join("\n");
2579
+ if (candidate === INSUFFICIENT_SIGNAL_TEXT) {
2580
+ return false;
941
2581
  }
942
- if (noTestsCollected) {
943
- return ["- Tests did not run.", "- Collected 0 items."].join("\n");
2582
+ if (candidate.includes("```")) {
2583
+ return true;
944
2584
  }
945
- if (interrupted && failed === 0 && errors === 0) {
946
- return "- Test run was interrupted.";
2585
+ if (META_PATTERNS.some((pattern) => pattern.test(candidate))) {
2586
+ return true;
947
2587
  }
948
- if (failed === 0 && errors === 0 && passed > 0) {
949
- const details = [formatCount(passed, "test")];
950
- if (skipped > 0) {
951
- details.push(formatCount(skipped, "skip"));
2588
+ if (args.responseMode === "json") {
2589
+ const trimmed = args.candidate.trim();
2590
+ if (!trimmed.startsWith("{") && !trimmed.startsWith("[")) {
2591
+ return true;
952
2592
  }
953
- return [
954
- "- Tests passed.",
955
- `- ${details.join(", ")}.`
956
- ].join("\n");
957
2593
  }
958
- if (failed > 0 || errors > 0 || inlineItems.length > 0) {
959
- const summarizedInlineItems = chooseStrongestFailureItems(inlineItems);
960
- if (detail === "verbose") {
961
- if (summarizedInlineItems.length > 0) {
962
- const detailLines2 = [];
963
- if (failed > 0) {
964
- detailLines2.push(`- ${formatCount(failed, "test")} failed.`);
965
- }
966
- if (errors > 0) {
967
- detailLines2.push(`- ${formatCount(errors, "error")} occurred.`);
968
- }
969
- return [
970
- "- Tests did not pass.",
971
- ...detailLines2,
972
- ...formatVerboseFailureItems({
973
- items: summarizedInlineItems
974
- })
975
- ].join("\n");
976
- }
977
- }
978
- if (detail === "focused") {
979
- if (summarizedInlineItems.length > 0) {
980
- const detailLines2 = [];
981
- if (failed > 0) {
982
- detailLines2.push(`- ${formatCount(failed, "test")} failed.`);
983
- }
984
- if (errors > 0) {
985
- detailLines2.push(`- ${formatCount(errors, "error")} occurred.`);
986
- }
987
- return [
988
- "- Tests did not pass.",
989
- ...detailLines2,
990
- ...formatFocusedFailureGroups({
991
- items: summarizedInlineItems,
992
- remainderLabel: "tests or modules"
993
- })
994
- ].join("\n");
995
- }
996
- }
997
- const detailLines = [];
998
- const causes = summarizeRepeatedTestCauses(input, {
999
- duringCollection: false
1000
- });
1001
- if (failed > 0) {
1002
- detailLines.push(`- ${formatCount(failed, "test")} failed.`);
1003
- }
1004
- if (errors > 0) {
1005
- detailLines.push(`- ${formatCount(errors, "error")} occurred.`);
1006
- }
1007
- const evidence = input.split("\n").map((line) => line.trim()).filter((line) => /\b(FAILED|ERROR)\b/.test(line)).slice(0, 3).map((line) => `- ${line}`);
1008
- return ["- Tests did not pass.", ...detailLines, ...causes, ...evidence].join("\n");
2594
+ if (source.length >= 800 && candidate.length > source.length * 0.8) {
2595
+ return true;
1009
2596
  }
1010
- return null;
1011
- }
1012
- function auditCriticalHeuristic(input) {
1013
- const vulnerabilities = input.split("\n").map((line) => line.trim()).filter(Boolean).map((line) => {
1014
- if (!/\b(critical|high)\b/i.test(line)) {
1015
- return null;
1016
- }
1017
- const pkg = inferPackage(line);
1018
- if (!pkg) {
1019
- return null;
1020
- }
1021
- return {
1022
- package: pkg,
1023
- severity: inferSeverity(line),
1024
- remediation: inferRemediation(pkg)
1025
- };
1026
- }).filter((item) => item !== null);
1027
- if (vulnerabilities.length === 0) {
1028
- return null;
2597
+ if (source.length > 0 && source.length < 800 && candidate.length > source.length + 160) {
2598
+ return true;
1029
2599
  }
1030
- const firstVulnerability = vulnerabilities[0];
1031
- return JSON.stringify(
1032
- {
1033
- status: "ok",
1034
- vulnerabilities,
1035
- summary: vulnerabilities.length === 1 ? `One ${firstVulnerability.severity} vulnerability found in ${firstVulnerability.package}.` : `${vulnerabilities.length} high or critical vulnerabilities found in the provided input.`
1036
- },
1037
- null,
1038
- 2
1039
- );
2600
+ return false;
1040
2601
  }
1041
- function infraRiskHeuristic(input) {
1042
- const zeroDestructiveEvidence = input.split("\n").map((line) => line.trim()).filter((line) => line.length > 0 && ZERO_DESTRUCTIVE_SUMMARY_PATTERN.test(line)).slice(0, 3);
1043
- const riskEvidence = input.split("\n").map((line) => line.trim()).filter(
1044
- (line) => line.length > 0 && RISK_LINE_PATTERN.test(line) && !ZERO_DESTRUCTIVE_SUMMARY_PATTERN.test(line)
1045
- ).slice(0, 3);
1046
- if (riskEvidence.length > 0) {
1047
- return JSON.stringify(
1048
- {
1049
- verdict: "fail",
1050
- reason: "Destructive or clearly risky infrastructure change signals are present.",
1051
- evidence: riskEvidence
1052
- },
1053
- null,
1054
- 2
1055
- );
1056
- }
1057
- if (zeroDestructiveEvidence.length > 0) {
1058
- return JSON.stringify(
1059
- {
1060
- verdict: "pass",
1061
- reason: "The provided input explicitly indicates zero destructive changes.",
1062
- evidence: zeroDestructiveEvidence
1063
- },
1064
- null,
1065
- 2
1066
- );
1067
- }
1068
- const safeEvidence = collectEvidence(input, SAFE_LINE_PATTERN);
1069
- if (safeEvidence.length > 0) {
2602
+
2603
+ // src/core/fallback.ts
2604
+ var RAW_FALLBACK_SLICE = 1200;
2605
+ function buildStructuredError(reason) {
2606
+ return {
2607
+ status: "error",
2608
+ reason,
2609
+ retriable: isRetriableReason(reason),
2610
+ provider_failed: true,
2611
+ raw_needed: true,
2612
+ why_raw_needed: "Provider follow-up failed, so the reduced answer may still need exact raw evidence."
2613
+ };
2614
+ }
2615
+ function buildFallbackOutput(args) {
2616
+ if (args.format === "verdict") {
1070
2617
  return JSON.stringify(
1071
2618
  {
1072
- verdict: "pass",
1073
- reason: "The provided input explicitly indicates no risky infrastructure changes.",
1074
- evidence: safeEvidence
2619
+ ...buildStructuredError(args.reason),
2620
+ verdict: "unclear",
2621
+ reason: `Sift fallback: ${args.reason}`,
2622
+ evidence: []
1075
2623
  },
1076
2624
  null,
1077
2625
  2
1078
2626
  );
1079
2627
  }
1080
- return null;
1081
- }
1082
- function applyHeuristicPolicy(policyName, input, detail) {
1083
- if (!policyName) {
1084
- return null;
1085
- }
1086
- if (policyName === "audit-critical") {
1087
- return auditCriticalHeuristic(input);
1088
- }
1089
- if (policyName === "infra-risk") {
1090
- return infraRiskHeuristic(input);
2628
+ if (args.format === "json") {
2629
+ return JSON.stringify(buildStructuredError(args.reason), null, 2);
1091
2630
  }
1092
- if (policyName === "test-status") {
1093
- return testStatusHeuristic(input, detail);
2631
+ const prefix = `Sift fallback triggered (${args.reason}).`;
2632
+ const rawHint = "Raw may still be needed because provider follow-up failed.";
2633
+ if (!args.rawFallback) {
2634
+ return `${prefix} ${rawHint}`;
1094
2635
  }
1095
- return null;
2636
+ return [prefix, rawHint, "", args.rawInput.slice(-RAW_FALLBACK_SLICE)].join("\n");
1096
2637
  }
1097
2638
 
1098
2639
  // src/core/redact.ts
@@ -1195,8 +2736,297 @@ function prepareInput(raw, config) {
1195
2736
  };
1196
2737
  }
1197
2738
 
2739
+ // src/core/rawSlice.ts
2740
+ function escapeRegExp(value) {
2741
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
2742
+ }
2743
+ function unique2(values) {
2744
+ return [...new Set(values)];
2745
+ }
2746
+ function buildLineWindows(args) {
2747
+ const selected = /* @__PURE__ */ new Set();
2748
+ for (const index of args.indexes) {
2749
+ for (let cursor = Math.max(0, index - args.radius); cursor <= Math.min(args.lines.length - 1, index + args.radius); cursor += 1) {
2750
+ selected.add(cursor);
2751
+ if (selected.size >= args.maxLines) {
2752
+ break;
2753
+ }
2754
+ }
2755
+ if (selected.size >= args.maxLines) {
2756
+ break;
2757
+ }
2758
+ }
2759
+ return [...selected].sort((left, right) => left - right).map((index) => args.lines[index]);
2760
+ }
2761
+ function collapseSelectedLines(args) {
2762
+ if (args.lines.length === 0) {
2763
+ return args.fallback();
2764
+ }
2765
+ const joined = unique2(args.lines).join("\n").trim();
2766
+ if (joined.length === 0) {
2767
+ return args.fallback();
2768
+ }
2769
+ if (joined.length <= args.maxInputChars) {
2770
+ return joined;
2771
+ }
2772
+ return truncateInput(joined, {
2773
+ maxInputChars: args.maxInputChars,
2774
+ headChars: Math.min(Math.max(200, Math.floor(args.maxInputChars * 0.55)), args.maxInputChars),
2775
+ tailChars: Math.min(Math.max(120, Math.floor(args.maxInputChars * 0.2)), args.maxInputChars)
2776
+ }).text;
2777
+ }
2778
+ function collapseSelectedLineGroups(args) {
2779
+ const selected = [];
2780
+ const seen = /* @__PURE__ */ new Set();
2781
+ const groups = args.groups.map(
2782
+ (group) => group.map((line) => line.trimEnd()).filter((line) => line.length > 0)
2783
+ );
2784
+ const cursors = groups.map(() => 0);
2785
+ let addedInPass = true;
2786
+ while (addedInPass) {
2787
+ addedInPass = false;
2788
+ for (const [groupIndex, group] of groups.entries()) {
2789
+ while (cursors[groupIndex] < group.length) {
2790
+ const line = group[cursors[groupIndex]];
2791
+ cursors[groupIndex] = cursors[groupIndex] + 1;
2792
+ if (seen.has(line)) {
2793
+ continue;
2794
+ }
2795
+ const candidate = [...selected, line].join("\n");
2796
+ if (candidate.length > args.maxInputChars) {
2797
+ break;
2798
+ }
2799
+ selected.push(line);
2800
+ seen.add(line);
2801
+ addedInPass = true;
2802
+ break;
2803
+ }
2804
+ }
2805
+ }
2806
+ if (selected.length === 0) {
2807
+ return args.fallback();
2808
+ }
2809
+ return selected.join("\n");
2810
+ }
2811
+ function buildHeadTailFallback(input, config) {
2812
+ const fallback = truncateInput(input, {
2813
+ maxInputChars: config.maxInputChars,
2814
+ headChars: config.headChars,
2815
+ tailChars: config.tailChars
2816
+ });
2817
+ return {
2818
+ text: fallback.text,
2819
+ strategy: "head_tail",
2820
+ used: fallback.truncatedApplied
2821
+ };
2822
+ }
2823
+ function findReadTargetIndexes(args) {
2824
+ const escapedFile = escapeRegExp(args.file);
2825
+ const exactPatterns = args.line === null ? [new RegExp(escapedFile)] : [
2826
+ new RegExp(`${escapedFile}:${args.line}(?::\\d+)?`),
2827
+ new RegExp(`File\\s+"${escapedFile}",\\s+line\\s+${args.line}\\b`),
2828
+ new RegExp(`['"]${escapedFile}['"].*\\b${args.line}\\b`)
2829
+ ];
2830
+ const matches = args.lines.map(
2831
+ (line, index) => exactPatterns.some((pattern) => pattern.test(line)) ? index : -1
2832
+ ).filter((index) => index >= 0);
2833
+ if (matches.length > 0) {
2834
+ return matches;
2835
+ }
2836
+ if (args.contextHint.start_line !== null && args.contextHint.end_line !== null) {
2837
+ const startLine = args.contextHint.start_line;
2838
+ const endLine = args.contextHint.end_line;
2839
+ const rangeMatches = args.lines.map((line, index) => {
2840
+ const fileWithLine = line.match(/^([A-Za-z0-9_./-]+\.[A-Za-z0-9]+):(\d+)(?::\d+)?:\s+in\b/) ?? line.match(/^([^:\s][^:]*\.[A-Za-z0-9]+):(\d+)(?::\d+)?:\s+in\b/) ?? line.match(/^File\s+"([^"]+)",\s+line\s+(\d+)/);
2841
+ if (!fileWithLine || !fileWithLine[1] || !fileWithLine[2]) {
2842
+ return -1;
2843
+ }
2844
+ if (fileWithLine[1].replace(/\\/g, "/") !== args.file) {
2845
+ return -1;
2846
+ }
2847
+ const lineNumber = Number(fileWithLine[2]);
2848
+ return lineNumber >= startLine && lineNumber <= endLine ? index : -1;
2849
+ }).filter((index) => index >= 0);
2850
+ if (rangeMatches.length > 0) {
2851
+ return rangeMatches;
2852
+ }
2853
+ }
2854
+ if (args.line !== null) {
2855
+ return [];
2856
+ }
2857
+ return args.lines.map((line, index) => line.includes(args.file) ? index : -1).filter((index) => index >= 0);
2858
+ }
2859
+ function findSearchHintIndexes(args) {
2860
+ if (!args.searchHint) {
2861
+ return [];
2862
+ }
2863
+ const pattern = new RegExp(escapeRegExp(args.searchHint), "i");
2864
+ return args.lines.map((line, index) => pattern.test(line) ? index : -1).filter((index) => index >= 0);
2865
+ }
2866
+ function buildTracebackSlice(args) {
2867
+ const lines = args.input.split("\n");
2868
+ const indexes = lines.map(
2869
+ (line, index) => /(traceback|^E\s|error\b|failed\b|exception\b|assertionerror\b|runtimeerror\b)/i.test(line) ? index : -1
2870
+ ).filter((index) => index >= 0);
2871
+ if (indexes.length === 0) {
2872
+ return buildHeadTailFallback(args.input, args.config);
2873
+ }
2874
+ const text = collapseSelectedLines({
2875
+ lines: buildLineWindows({
2876
+ lines,
2877
+ indexes,
2878
+ radius: 3,
2879
+ maxLines: 80
2880
+ }),
2881
+ maxInputChars: args.config.maxInputChars,
2882
+ fallback: () => truncateInput(args.input, {
2883
+ maxInputChars: args.config.maxInputChars,
2884
+ headChars: args.config.headChars,
2885
+ tailChars: args.config.tailChars
2886
+ }).text
2887
+ });
2888
+ return {
2889
+ text,
2890
+ strategy: "traceback_window",
2891
+ used: true
2892
+ };
2893
+ }
2894
+ function buildTestStatusRawSlice(args) {
2895
+ if (args.input.length <= args.config.maxInputChars) {
2896
+ return {
2897
+ text: args.input,
2898
+ strategy: "none",
2899
+ used: false
2900
+ };
2901
+ }
2902
+ const lines = args.input.split("\n");
2903
+ const summaryIndexes = lines.map(
2904
+ (line, index) => /(=+.*(?:failed|errors?|passed|no tests ran|interrupted).*=+|\b\d+\s+failed\b|\b\d+\s+errors?\b)/i.test(
2905
+ line
2906
+ ) ? index : -1
2907
+ ).filter((index) => index >= 0);
2908
+ const bucketGroups = args.contract.main_buckets.map((bucket) => {
2909
+ const bucketTerms = unique2(
2910
+ [bucket.root_cause, ...bucket.evidence].map((value) => value.split(":").at(-1)?.trim() ?? value.trim()).filter((value) => value.length >= 4)
2911
+ );
2912
+ const indexes = lines.map(
2913
+ (line, index) => bucketTerms.some((term) => new RegExp(escapeRegExp(term), "i").test(line)) ? index : -1
2914
+ ).filter((index) => index >= 0);
2915
+ return unique2([
2916
+ ...indexes.map((index) => lines[index]).filter(Boolean),
2917
+ ...buildLineWindows({
2918
+ lines,
2919
+ indexes,
2920
+ radius: 2,
2921
+ maxLines: 16
2922
+ })
2923
+ ]);
2924
+ });
2925
+ const targetGroups = args.contract.read_targets.map(
2926
+ (target) => buildLineWindows({
2927
+ lines,
2928
+ indexes: unique2([
2929
+ ...findReadTargetIndexes({
2930
+ lines,
2931
+ file: target.file,
2932
+ line: target.line,
2933
+ contextHint: target.context_hint
2934
+ }),
2935
+ ...findSearchHintIndexes({
2936
+ lines,
2937
+ searchHint: target.context_hint.search_hint
2938
+ })
2939
+ ]),
2940
+ radius: target.line === null ? 1 : 2,
2941
+ maxLines: target.line === null ? 6 : 8
2942
+ })
2943
+ );
2944
+ const failureIndexes = lines.map((line, index) => /\b(FAILED|ERROR)\b/.test(line) || /^E\s/.test(line) ? index : -1).filter((index) => index >= 0);
2945
+ const selected = collapseSelectedLineGroups({
2946
+ groups: [
2947
+ ...targetGroups,
2948
+ unique2([
2949
+ ...summaryIndexes.map((index) => lines[index]).filter(Boolean),
2950
+ ...buildLineWindows({
2951
+ lines,
2952
+ indexes: summaryIndexes,
2953
+ radius: 1,
2954
+ maxLines: 12
2955
+ })
2956
+ ]),
2957
+ ...bucketGroups,
2958
+ buildLineWindows({
2959
+ lines,
2960
+ indexes: failureIndexes,
2961
+ radius: 1,
2962
+ maxLines: 24
2963
+ })
2964
+ ],
2965
+ maxInputChars: args.config.maxInputChars,
2966
+ fallback: () => truncateInput(args.input, {
2967
+ maxInputChars: args.config.maxInputChars,
2968
+ headChars: args.config.headChars,
2969
+ tailChars: args.config.tailChars
2970
+ }).text
2971
+ });
2972
+ if (selected.trim().length === 0) {
2973
+ return buildTracebackSlice({
2974
+ input: args.input,
2975
+ config: args.config
2976
+ });
2977
+ }
2978
+ return {
2979
+ text: selected,
2980
+ strategy: "bucket_evidence",
2981
+ used: true
2982
+ };
2983
+ }
2984
+ function buildGenericRawSlice(args) {
2985
+ if (args.input.length <= args.config.maxInputChars) {
2986
+ return {
2987
+ text: args.input,
2988
+ strategy: "none",
2989
+ used: false
2990
+ };
2991
+ }
2992
+ return buildTracebackSlice(args);
2993
+ }
2994
+
1198
2995
  // src/core/run.ts
1199
2996
  var RETRY_DELAY_MS = 300;
2997
+ function estimateTokenCount(text) {
2998
+ return Math.max(1, Math.ceil(text.length / 4));
2999
+ }
3000
+ function getDiagnosisCompleteAtLayer(contract) {
3001
+ if (contract.raw_needed || contract.provider_failed) {
3002
+ return "raw";
3003
+ }
3004
+ if (contract.provider_used) {
3005
+ return "provider";
3006
+ }
3007
+ return "heuristic";
3008
+ }
3009
+ function logVerboseTestStatusTelemetry(args) {
3010
+ if (!args.request.config.runtime.verbose) {
3011
+ return;
3012
+ }
3013
+ const lines = [
3014
+ `${pc.dim("sift")} diagnosis_complete_at_layer=${getDiagnosisCompleteAtLayer(args.contract)}`,
3015
+ `${pc.dim("sift")} heuristic_short_circuit=${!args.contract.provider_used && args.contract.diagnosis_complete && !args.contract.raw_needed && !args.contract.provider_failed}`,
3016
+ `${pc.dim("sift")} raw_input_chars=${args.request.stdin.length}`,
3017
+ `${pc.dim("sift")} prepared_input_chars=${args.prepared.meta.finalLength}`,
3018
+ `${pc.dim("sift")} raw_slice_chars=${args.rawSliceChars ?? 0}`,
3019
+ `${pc.dim("sift")} provider_input_chars=${args.providerInputChars ?? 0}`,
3020
+ `${pc.dim("sift")} provider_output_chars=${args.providerOutputChars ?? 0}`,
3021
+ `${pc.dim("sift")} final_output_chars=${args.finalOutput.length}`,
3022
+ `${pc.dim("sift")} final_output_tokens_est=${estimateTokenCount(args.finalOutput)}`,
3023
+ `${pc.dim("sift")} read_targets_count=${args.contract.read_targets.length}`,
3024
+ `${pc.dim("sift")} remaining_count=${args.contract.remaining_tests.length}`,
3025
+ `${pc.dim("sift")} remaining_ids_exposed=${Boolean(args.request.includeTestIds)}`
3026
+ ];
3027
+ process.stderr.write(`${lines.join("\n")}
3028
+ `);
3029
+ }
1200
3030
  function normalizeOutput(text, responseMode) {
1201
3031
  if (responseMode !== "json") {
1202
3032
  return text.trim();
@@ -1212,7 +3042,7 @@ function buildDryRunOutput(args) {
1212
3042
  return JSON.stringify(
1213
3043
  {
1214
3044
  status: "dry-run",
1215
- strategy: args.heuristicOutput ? "heuristic" : "provider",
3045
+ strategy: args.strategy ?? (args.heuristicOutput ? "heuristic" : "provider"),
1216
3046
  provider: {
1217
3047
  name: args.providerName,
1218
3048
  model: args.request.config.provider.model,
@@ -1278,77 +3108,303 @@ async function generateWithRetry(args) {
1278
3108
  }
1279
3109
  return generate();
1280
3110
  }
3111
+ function hasRecognizableTestStatusSignal(input) {
3112
+ const analysis = analyzeTestStatus(input);
3113
+ return analysis.collectionErrorCount !== void 0 || analysis.noTestsCollected || analysis.interrupted || analysis.failed > 0 || analysis.errors > 0 || analysis.passed > 0 || analysis.inlineItems.length > 0 || analysis.buckets.length > 0;
3114
+ }
3115
+ function renderTestStatusDecisionOutput(args) {
3116
+ if (args.request.goal === "diagnose" && args.request.format === "json") {
3117
+ return JSON.stringify(
3118
+ buildTestStatusPublicDiagnoseContract({
3119
+ contract: args.decision.contract,
3120
+ includeTestIds: args.request.includeTestIds,
3121
+ remainingSubsetAvailable: args.request.testStatusContext?.remainingSubsetAvailable
3122
+ }),
3123
+ null,
3124
+ 2
3125
+ );
3126
+ }
3127
+ if (args.request.detail === "verbose") {
3128
+ return args.decision.verboseText;
3129
+ }
3130
+ if (args.request.detail === "focused") {
3131
+ return args.decision.focusedText;
3132
+ }
3133
+ return args.decision.standardText;
3134
+ }
3135
+ function buildTestStatusProviderFailureDecision(args) {
3136
+ const shouldZoomFirst = args.request.detail !== "verbose";
3137
+ return buildTestStatusDiagnoseContract({
3138
+ input: args.input,
3139
+ analysis: args.analysis,
3140
+ resolvedTests: args.baseDecision.contract.resolved_tests,
3141
+ remainingTests: args.baseDecision.contract.remaining_tests,
3142
+ contractOverrides: {
3143
+ ...args.baseDecision.contract,
3144
+ diagnosis_complete: false,
3145
+ raw_needed: true,
3146
+ additional_source_read_likely_low_value: false,
3147
+ read_raw_only_if: shouldZoomFirst ? "the provider follow-up failed and one deeper sift pass still is not enough" : "the provider follow-up failed and you still need exact traceback lines",
3148
+ decision: shouldZoomFirst ? "zoom" : "read_raw",
3149
+ provider_used: true,
3150
+ provider_confidence: null,
3151
+ provider_failed: true,
3152
+ raw_slice_used: args.rawSliceUsed,
3153
+ raw_slice_strategy: args.rawSliceStrategy,
3154
+ next_best_action: {
3155
+ code: shouldZoomFirst ? "insufficient_signal" : "read_raw_for_exact_traceback",
3156
+ bucket_index: args.baseDecision.contract.dominant_blocker_bucket_index ?? args.baseDecision.contract.main_buckets[0]?.bucket_index ?? null,
3157
+ note: shouldZoomFirst ? `Provider follow-up failed (${args.reason}). Use one deeper sift pass on the same cached output before reading raw traceback lines.` : `Provider follow-up failed (${args.reason}). Read raw traceback only if exact stack lines are still needed.`
3158
+ }
3159
+ }
3160
+ });
3161
+ }
1281
3162
  async function runSift(request) {
1282
3163
  const prepared = prepareInput(request.stdin, request.config.input);
1283
- const { prompt, responseMode } = buildPrompt({
1284
- question: request.question,
1285
- format: request.format,
1286
- input: prepared.truncated,
1287
- detail: request.detail,
1288
- policyName: request.policyName,
1289
- outputContract: request.outputContract
1290
- });
1291
3164
  const provider = createProvider(request.config);
3165
+ const hasTestStatusSignal = request.policyName === "test-status" && hasRecognizableTestStatusSignal(prepared.truncated);
3166
+ const testStatusAnalysis = hasTestStatusSignal ? analyzeTestStatus(prepared.truncated) : null;
3167
+ const testStatusDecision = hasTestStatusSignal && testStatusAnalysis ? buildTestStatusDiagnoseContract({
3168
+ input: prepared.truncated,
3169
+ analysis: testStatusAnalysis,
3170
+ resolvedTests: request.testStatusContext?.resolvedTests,
3171
+ remainingTests: request.testStatusContext?.remainingTests
3172
+ }) : null;
3173
+ const testStatusHeuristicOutput = testStatusDecision ? renderTestStatusDecisionOutput({
3174
+ request,
3175
+ decision: testStatusDecision
3176
+ }) : null;
1292
3177
  if (request.config.runtime.verbose) {
1293
3178
  process.stderr.write(
1294
3179
  `${pc.dim("sift")} provider=${provider.name} model=${request.config.provider.model} base_url=${request.config.provider.baseUrl} input_chars=${prepared.meta.finalLength}
1295
3180
  `
1296
3181
  );
1297
3182
  }
1298
- const heuristicOutput = applyHeuristicPolicy(
1299
- request.policyName,
1300
- prepared.truncated,
1301
- request.detail
1302
- );
3183
+ const heuristicOutput = request.policyName === "test-status" ? testStatusDecision?.contract.diagnosis_complete ? testStatusHeuristicOutput : null : applyHeuristicPolicy(request.policyName, prepared.truncated, request.detail);
1303
3184
  if (heuristicOutput) {
1304
3185
  if (request.config.runtime.verbose) {
1305
3186
  process.stderr.write(`${pc.dim("sift")} heuristic=${request.policyName}
1306
3187
  `);
1307
3188
  }
3189
+ const heuristicPrompt = buildPrompt({
3190
+ question: request.question,
3191
+ format: request.format,
3192
+ goal: request.goal,
3193
+ input: prepared.truncated,
3194
+ detail: request.detail,
3195
+ policyName: request.policyName,
3196
+ outputContract: request.policyName === "test-status" && request.goal === "diagnose" && request.format === "json" ? request.outputContract ?? TEST_STATUS_DIAGNOSE_JSON_CONTRACT : request.outputContract,
3197
+ analysisContext: [
3198
+ request.analysisContext,
3199
+ testStatusDecision ? buildTestStatusAnalysisContext({
3200
+ contract: testStatusDecision.contract,
3201
+ includeTestIds: request.includeTestIds,
3202
+ remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable
3203
+ }) : void 0
3204
+ ].filter((value) => Boolean(value)).join("\n\n")
3205
+ });
1308
3206
  if (request.dryRun) {
1309
3207
  return buildDryRunOutput({
1310
3208
  request,
1311
3209
  providerName: provider.name,
1312
- prompt,
1313
- responseMode,
3210
+ prompt: heuristicPrompt.prompt,
3211
+ responseMode: heuristicPrompt.responseMode,
1314
3212
  prepared,
1315
- heuristicOutput
3213
+ heuristicOutput,
3214
+ strategy: "heuristic"
1316
3215
  });
1317
3216
  }
1318
- return withInsufficientHint({
3217
+ const finalOutput = withInsufficientHint({
1319
3218
  output: heuristicOutput,
1320
3219
  request,
1321
3220
  prepared
1322
3221
  });
3222
+ if (testStatusDecision) {
3223
+ logVerboseTestStatusTelemetry({
3224
+ request,
3225
+ prepared,
3226
+ contract: testStatusDecision.contract,
3227
+ finalOutput
3228
+ });
3229
+ }
3230
+ return finalOutput;
3231
+ }
3232
+ if (testStatusDecision && testStatusAnalysis) {
3233
+ const rawSlice = buildTestStatusRawSlice({
3234
+ input: prepared.redacted,
3235
+ config: request.config.input,
3236
+ contract: testStatusDecision.contract
3237
+ });
3238
+ const prompt = buildPrompt({
3239
+ question: "Complete the diagnosis. Use the heuristic extract as the bucket truth and only change the decision when the sliced command output proves it.",
3240
+ format: "json",
3241
+ goal: "diagnose",
3242
+ input: rawSlice.text,
3243
+ detail: request.detail,
3244
+ policyName: "test-status",
3245
+ outputContract: TEST_STATUS_PROVIDER_SUPPLEMENT_JSON_CONTRACT,
3246
+ analysisContext: [
3247
+ request.analysisContext,
3248
+ buildTestStatusAnalysisContext({
3249
+ contract: {
3250
+ ...testStatusDecision.contract,
3251
+ provider_used: true,
3252
+ provider_failed: false,
3253
+ raw_slice_used: rawSlice.used,
3254
+ raw_slice_strategy: rawSlice.strategy
3255
+ },
3256
+ includeTestIds: request.includeTestIds,
3257
+ remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable
3258
+ })
3259
+ ].filter((value) => Boolean(value)).join("\n\n")
3260
+ });
3261
+ const providerPrepared2 = {
3262
+ ...prepared,
3263
+ truncated: rawSlice.text,
3264
+ meta: {
3265
+ ...prepared.meta,
3266
+ finalLength: rawSlice.text.length,
3267
+ truncatedApplied: rawSlice.used || prepared.meta.truncatedApplied
3268
+ }
3269
+ };
3270
+ if (request.dryRun) {
3271
+ return buildDryRunOutput({
3272
+ request,
3273
+ providerName: provider.name,
3274
+ prompt: prompt.prompt,
3275
+ responseMode: prompt.responseMode,
3276
+ prepared: providerPrepared2,
3277
+ heuristicOutput: testStatusHeuristicOutput,
3278
+ strategy: "hybrid"
3279
+ });
3280
+ }
3281
+ try {
3282
+ const result = await generateWithRetry({
3283
+ provider,
3284
+ request,
3285
+ prompt: prompt.prompt,
3286
+ responseMode: prompt.responseMode
3287
+ });
3288
+ const supplement = parseTestStatusProviderSupplement(result.text);
3289
+ const mergedDecision = buildTestStatusDiagnoseContract({
3290
+ input: prepared.truncated,
3291
+ analysis: testStatusAnalysis,
3292
+ resolvedTests: request.testStatusContext?.resolvedTests,
3293
+ remainingTests: request.testStatusContext?.remainingTests,
3294
+ contractOverrides: {
3295
+ diagnosis_complete: supplement.diagnosis_complete,
3296
+ raw_needed: supplement.raw_needed,
3297
+ additional_source_read_likely_low_value: supplement.additional_source_read_likely_low_value,
3298
+ read_raw_only_if: supplement.read_raw_only_if,
3299
+ decision: supplement.decision,
3300
+ provider_used: true,
3301
+ provider_confidence: supplement.provider_confidence,
3302
+ provider_failed: false,
3303
+ raw_slice_used: rawSlice.used,
3304
+ raw_slice_strategy: rawSlice.strategy,
3305
+ next_best_action: supplement.next_best_action
3306
+ }
3307
+ });
3308
+ const finalOutput = renderTestStatusDecisionOutput({
3309
+ request,
3310
+ decision: mergedDecision
3311
+ });
3312
+ logVerboseTestStatusTelemetry({
3313
+ request,
3314
+ prepared,
3315
+ contract: mergedDecision.contract,
3316
+ finalOutput,
3317
+ rawSliceChars: rawSlice.text.length,
3318
+ providerInputChars: providerPrepared2.truncated.length,
3319
+ providerOutputChars: result.text.length
3320
+ });
3321
+ return finalOutput;
3322
+ } catch (error) {
3323
+ const reason = error instanceof Error ? error.message : "unknown_error";
3324
+ const failureDecision = buildTestStatusProviderFailureDecision({
3325
+ request,
3326
+ baseDecision: testStatusDecision,
3327
+ input: prepared.truncated,
3328
+ analysis: testStatusAnalysis,
3329
+ reason,
3330
+ rawSliceUsed: rawSlice.used,
3331
+ rawSliceStrategy: rawSlice.strategy
3332
+ });
3333
+ const finalOutput = request.goal === "diagnose" && request.format === "json" ? JSON.stringify(
3334
+ buildTestStatusPublicDiagnoseContract({
3335
+ contract: failureDecision.contract,
3336
+ includeTestIds: request.includeTestIds,
3337
+ remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable
3338
+ }),
3339
+ null,
3340
+ 2
3341
+ ) : renderTestStatusDecisionOutput({
3342
+ request,
3343
+ decision: failureDecision
3344
+ });
3345
+ logVerboseTestStatusTelemetry({
3346
+ request,
3347
+ prepared,
3348
+ contract: failureDecision.contract,
3349
+ finalOutput,
3350
+ rawSliceChars: rawSlice.text.length,
3351
+ providerInputChars: providerPrepared2.truncated.length
3352
+ });
3353
+ return finalOutput;
3354
+ }
1323
3355
  }
3356
+ const genericRawSlice = buildGenericRawSlice({
3357
+ input: prepared.redacted,
3358
+ config: request.config.input
3359
+ });
3360
+ const providerPrompt = buildPrompt({
3361
+ question: request.question,
3362
+ format: request.format,
3363
+ goal: request.goal,
3364
+ input: genericRawSlice.text,
3365
+ detail: request.detail,
3366
+ policyName: request.policyName,
3367
+ outputContract: request.outputContract,
3368
+ analysisContext: request.analysisContext
3369
+ });
3370
+ const providerPrepared = {
3371
+ ...prepared,
3372
+ truncated: genericRawSlice.text,
3373
+ meta: {
3374
+ ...prepared.meta,
3375
+ finalLength: genericRawSlice.text.length,
3376
+ truncatedApplied: genericRawSlice.used || prepared.meta.truncatedApplied
3377
+ }
3378
+ };
1324
3379
  if (request.dryRun) {
1325
3380
  return buildDryRunOutput({
1326
3381
  request,
1327
3382
  providerName: provider.name,
1328
- prompt,
1329
- responseMode,
1330
- prepared,
1331
- heuristicOutput: null
3383
+ prompt: providerPrompt.prompt,
3384
+ responseMode: providerPrompt.responseMode,
3385
+ prepared: providerPrepared,
3386
+ heuristicOutput: testStatusDecision ? testStatusHeuristicOutput : null,
3387
+ strategy: testStatusDecision ? "hybrid" : "provider"
1332
3388
  });
1333
3389
  }
1334
3390
  try {
1335
3391
  const result = await generateWithRetry({
1336
3392
  provider,
1337
3393
  request,
1338
- prompt,
1339
- responseMode
3394
+ prompt: providerPrompt.prompt,
3395
+ responseMode: providerPrompt.responseMode
1340
3396
  });
1341
3397
  if (looksLikeRejectedModelOutput({
1342
- source: prepared.truncated,
3398
+ source: genericRawSlice.text,
1343
3399
  candidate: result.text,
1344
- responseMode
3400
+ responseMode: providerPrompt.responseMode
1345
3401
  })) {
1346
3402
  throw new Error("Model output rejected by quality gate");
1347
3403
  }
1348
3404
  return withInsufficientHint({
1349
- output: normalizeOutput(result.text, responseMode),
3405
+ output: normalizeOutput(result.text, providerPrompt.responseMode),
1350
3406
  request,
1351
- prepared
3407
+ prepared: providerPrepared
1352
3408
  });
1353
3409
  } catch (error) {
1354
3410
  const reason = error instanceof Error ? error.message : "unknown_error";
@@ -1356,14 +3412,595 @@ async function runSift(request) {
1356
3412
  output: buildFallbackOutput({
1357
3413
  format: request.format,
1358
3414
  reason,
1359
- rawInput: prepared.truncated,
3415
+ rawInput: providerPrepared.truncated,
1360
3416
  rawFallback: request.config.runtime.rawFallback,
1361
3417
  jsonFallback: request.fallbackJson
1362
3418
  }),
1363
3419
  request,
1364
- prepared
3420
+ prepared: providerPrepared
3421
+ });
3422
+ }
3423
+ }
3424
+
3425
+ // src/core/testStatusState.ts
3426
+ import fs from "fs";
3427
+ import path2 from "path";
3428
+ import { z as z2 } from "zod";
3429
+ var detailSchema = z2.enum(["standard", "focused", "verbose"]);
3430
+ var failureBucketTypeSchema = z2.enum([
3431
+ "shared_environment_blocker",
3432
+ "fixture_guard_failure",
3433
+ "service_unavailable",
3434
+ "db_connection_failure",
3435
+ "auth_bypass_absent",
3436
+ "contract_snapshot_drift",
3437
+ "import_dependency_failure",
3438
+ "collection_failure",
3439
+ "assertion_failure",
3440
+ "runtime_failure",
3441
+ "interrupted_run",
3442
+ "no_tests_collected",
3443
+ "unknown_failure"
3444
+ ]);
3445
+ var countSchema = z2.number().int().nonnegative();
3446
+ var cachedBucketSchema = z2.object({
3447
+ type: failureBucketTypeSchema,
3448
+ headline: z2.string(),
3449
+ countVisible: countSchema,
3450
+ countClaimed: countSchema.optional(),
3451
+ reason: z2.string(),
3452
+ entities: z2.array(z2.string())
3453
+ });
3454
+ var cachedAnalysisSchema = z2.object({
3455
+ passed: countSchema,
3456
+ failed: countSchema,
3457
+ errors: countSchema,
3458
+ skipped: countSchema,
3459
+ noTestsCollected: z2.boolean(),
3460
+ interrupted: z2.boolean(),
3461
+ collectionErrorCount: countSchema.optional(),
3462
+ buckets: z2.array(cachedBucketSchema)
3463
+ });
3464
+ var cachedCommandSchema = z2.discriminatedUnion("mode", [
3465
+ z2.object({
3466
+ mode: z2.literal("argv"),
3467
+ argv: z2.array(z2.string()).min(1)
3468
+ }),
3469
+ z2.object({
3470
+ mode: z2.literal("shell"),
3471
+ shellCommand: z2.string().min(1)
3472
+ })
3473
+ ]).optional();
3474
+ var cachedPytestStateSchema = z2.object({
3475
+ subsetCapable: z2.boolean(),
3476
+ baseArgv: z2.array(z2.string()).min(1).optional(),
3477
+ failingNodeIds: z2.array(z2.string()),
3478
+ remainingNodeIds: z2.array(z2.string()).optional()
3479
+ }).optional();
3480
+ var cachedRunSchema = z2.object({
3481
+ version: z2.literal(1),
3482
+ timestamp: z2.string(),
3483
+ presetName: z2.literal("test-status"),
3484
+ cwd: z2.string(),
3485
+ commandKey: z2.string(),
3486
+ commandPreview: z2.string(),
3487
+ command: cachedCommandSchema,
3488
+ detail: detailSchema,
3489
+ exitCode: z2.number().int(),
3490
+ rawOutput: z2.string(),
3491
+ capture: z2.object({
3492
+ originalChars: countSchema,
3493
+ truncatedApplied: z2.boolean()
3494
+ }),
3495
+ analysis: cachedAnalysisSchema,
3496
+ pytest: cachedPytestStateSchema
3497
+ });
3498
+ var MissingCachedTestStatusRunError = class extends Error {
3499
+ constructor() {
3500
+ super(
3501
+ "No cached test-status run found. Start with `sift exec --preset test-status -- <test command>`."
3502
+ );
3503
+ }
3504
+ };
3505
+ var InvalidCachedTestStatusRunError = class extends Error {
3506
+ constructor() {
3507
+ super(
3508
+ "Cached test-status state is invalid. Run `sift exec --preset test-status -- <test command>` again."
3509
+ );
3510
+ }
3511
+ };
3512
+ function normalizeBucketReason(reason) {
3513
+ return reason.trim().replace(/\s+/g, " ");
3514
+ }
3515
+ function getBucketCount(bucket) {
3516
+ return bucket.countClaimed ?? bucket.countVisible;
3517
+ }
3518
+ function formatCount3(count, singular, plural = `${singular}s`) {
3519
+ return `${count} ${count === 1 ? singular : plural}`;
3520
+ }
3521
+ function appendPreview(values) {
3522
+ if (values.length === 0) {
3523
+ return "";
3524
+ }
3525
+ const preview = values.slice(0, 2);
3526
+ const overflowCount = values.length - preview.length;
3527
+ const suffix = overflowCount > 0 ? `, and ${overflowCount} more` : "";
3528
+ return ` (${preview.join(", ")}${suffix})`;
3529
+ }
3530
+ function buildBucketSignature(bucket) {
3531
+ return JSON.stringify([
3532
+ bucket.type,
3533
+ [...bucket.entities].sort(),
3534
+ normalizeBucketReason(bucket.reason)
3535
+ ]);
3536
+ }
3537
+ function basenameMatches(value, matcher) {
3538
+ return matcher.test(path2.basename(value));
3539
+ }
3540
+ function isPytestExecutable(value) {
3541
+ return basenameMatches(value, /^pytest(?:\.exe)?$/i);
3542
+ }
3543
+ function isPythonExecutable(value) {
3544
+ return basenameMatches(value, /^python(?:\d+(?:\.\d+)*)?(?:\.exe)?$/i);
3545
+ }
3546
+ var shortPytestOptionsWithValue = /* @__PURE__ */ new Set([
3547
+ "-c",
3548
+ "-k",
3549
+ "-m",
3550
+ "-n",
3551
+ "-o",
3552
+ "-p",
3553
+ "-W"
3554
+ ]);
3555
+ var longPytestOptionsWithValue = /* @__PURE__ */ new Set([
3556
+ "--asyncio-mode",
3557
+ "--basetemp",
3558
+ "--capture",
3559
+ "--color",
3560
+ "--confcutdir",
3561
+ "--cov",
3562
+ "--cov-config",
3563
+ "--cov-report",
3564
+ "--deselect",
3565
+ "--durations",
3566
+ "--durations-min",
3567
+ "--ignore",
3568
+ "--ignore-glob",
3569
+ "--import-mode",
3570
+ "--junitxml",
3571
+ "--log-cli-level",
3572
+ "--log-date-format",
3573
+ "--log-file",
3574
+ "--log-file-level",
3575
+ "--log-format",
3576
+ "--log-level",
3577
+ "--maxfail",
3578
+ "--override-ini",
3579
+ "--pyargs",
3580
+ "--rootdir",
3581
+ "--tb"
3582
+ ]);
3583
+ function isSubsetCapablePytestArgv(argv) {
3584
+ let offset = -1;
3585
+ if (argv.length > 0 && isPytestExecutable(argv[0])) {
3586
+ offset = 1;
3587
+ } else if (argv.length > 2 && isPythonExecutable(argv[0]) && argv[1] === "-m" && argv[2] === "pytest") {
3588
+ offset = 3;
3589
+ }
3590
+ if (offset === -1) {
3591
+ return false;
3592
+ }
3593
+ for (let index = offset; index < argv.length; index += 1) {
3594
+ const arg = argv[index];
3595
+ if (arg === "--") {
3596
+ return false;
3597
+ }
3598
+ if (!arg.startsWith("-")) {
3599
+ return false;
3600
+ }
3601
+ if (arg.startsWith("--")) {
3602
+ if (arg.includes("=")) {
3603
+ continue;
3604
+ }
3605
+ if (longPytestOptionsWithValue.has(arg)) {
3606
+ index += 1;
3607
+ if (index >= argv.length) {
3608
+ return false;
3609
+ }
3610
+ }
3611
+ continue;
3612
+ }
3613
+ const shortOption = arg.slice(0, 2);
3614
+ if (shortPytestOptionsWithValue.has(shortOption)) {
3615
+ if (arg.length === 2) {
3616
+ index += 1;
3617
+ if (index >= argv.length) {
3618
+ return false;
3619
+ }
3620
+ }
3621
+ }
3622
+ }
3623
+ return true;
3624
+ }
3625
+ function buildCachedCommand(args) {
3626
+ if (Array.isArray(args.command) && args.command.length > 0) {
3627
+ return {
3628
+ mode: "argv",
3629
+ argv: [...args.command]
3630
+ };
3631
+ }
3632
+ if (typeof args.shellCommand === "string" && args.shellCommand.length > 0) {
3633
+ return {
3634
+ mode: "shell",
3635
+ shellCommand: args.shellCommand
3636
+ };
3637
+ }
3638
+ return void 0;
3639
+ }
3640
+ function buildFailingNodeIds(analysis) {
3641
+ const values = [];
3642
+ for (const value of [...analysis.visibleErrorLabels, ...analysis.visibleFailedLabels]) {
3643
+ if (value.length > 0 && !values.includes(value)) {
3644
+ values.push(value);
3645
+ }
3646
+ }
3647
+ return values;
3648
+ }
3649
+ function buildCachedPytestState(args) {
3650
+ const baseArgv = args.command?.mode === "argv" && isSubsetCapablePytestArgv(args.command.argv) ? [...args.command.argv] : void 0;
3651
+ return {
3652
+ subsetCapable: Boolean(baseArgv),
3653
+ baseArgv,
3654
+ failingNodeIds: buildFailingNodeIds(args.analysis),
3655
+ remainingNodeIds: args.remainingNodeIds
3656
+ };
3657
+ }
3658
+ function buildTestStatusCommandKey(args) {
3659
+ return `${args.shellCommand ? "shell" : "argv"}:${args.commandPreview}`;
3660
+ }
3661
+ function snapshotTestStatusAnalysis(analysis) {
3662
+ return {
3663
+ passed: analysis.passed,
3664
+ failed: analysis.failed,
3665
+ errors: analysis.errors,
3666
+ skipped: analysis.skipped,
3667
+ noTestsCollected: analysis.noTestsCollected,
3668
+ interrupted: analysis.interrupted,
3669
+ collectionErrorCount: analysis.collectionErrorCount,
3670
+ buckets: analysis.buckets.map((bucket) => ({
3671
+ type: bucket.type,
3672
+ headline: bucket.headline,
3673
+ countVisible: bucket.countVisible,
3674
+ countClaimed: bucket.countClaimed,
3675
+ reason: bucket.reason,
3676
+ entities: [...bucket.entities]
3677
+ }))
3678
+ };
3679
+ }
3680
+ function createCachedTestStatusRun(args) {
3681
+ const command = buildCachedCommand({
3682
+ command: args.command,
3683
+ shellCommand: args.shellCommand
3684
+ });
3685
+ return {
3686
+ version: 1,
3687
+ timestamp: args.timestamp ?? (/* @__PURE__ */ new Date()).toISOString(),
3688
+ presetName: "test-status",
3689
+ cwd: args.cwd,
3690
+ commandKey: args.commandKey,
3691
+ commandPreview: args.commandPreview,
3692
+ command,
3693
+ detail: args.detail,
3694
+ exitCode: args.exitCode,
3695
+ rawOutput: args.rawOutput,
3696
+ capture: {
3697
+ originalChars: args.originalChars,
3698
+ truncatedApplied: args.truncatedApplied
3699
+ },
3700
+ analysis: snapshotTestStatusAnalysis(args.analysis),
3701
+ pytest: buildCachedPytestState({
3702
+ command,
3703
+ analysis: args.analysis,
3704
+ remainingNodeIds: args.remainingNodeIds
3705
+ })
3706
+ };
3707
+ }
3708
+ function readCachedTestStatusRun(statePath = getDefaultTestStatusStatePath()) {
3709
+ let raw = "";
3710
+ try {
3711
+ raw = fs.readFileSync(statePath, "utf8");
3712
+ } catch (error) {
3713
+ if (error.code === "ENOENT") {
3714
+ throw new MissingCachedTestStatusRunError();
3715
+ }
3716
+ throw new InvalidCachedTestStatusRunError();
3717
+ }
3718
+ try {
3719
+ return cachedRunSchema.parse(JSON.parse(raw));
3720
+ } catch {
3721
+ throw new InvalidCachedTestStatusRunError();
3722
+ }
3723
+ }
3724
+ function tryReadCachedTestStatusRun(statePath = getDefaultTestStatusStatePath()) {
3725
+ try {
3726
+ return readCachedTestStatusRun(statePath);
3727
+ } catch {
3728
+ return null;
3729
+ }
3730
+ }
3731
+ function writeCachedTestStatusRun(state, statePath = getDefaultTestStatusStatePath()) {
3732
+ fs.mkdirSync(path2.dirname(statePath), {
3733
+ recursive: true
3734
+ });
3735
+ fs.writeFileSync(statePath, `${JSON.stringify(state, null, 2)}
3736
+ `, "utf8");
3737
+ }
3738
+ function buildTargetDelta(args) {
3739
+ if (args.previous.presetName !== "test-status" || args.current.presetName !== "test-status" || args.previous.cwd !== args.current.cwd || args.previous.commandKey !== args.current.commandKey) {
3740
+ return {
3741
+ comparable: false,
3742
+ resolved: [],
3743
+ remaining: [],
3744
+ introduced: []
3745
+ };
3746
+ }
3747
+ if (!args.previous.pytest || !args.current.pytest) {
3748
+ return {
3749
+ comparable: false,
3750
+ resolved: [],
3751
+ remaining: [],
3752
+ introduced: []
3753
+ };
3754
+ }
3755
+ const previousTargets = args.previous.pytest.failingNodeIds;
3756
+ const currentTargets = args.current.pytest.failingNodeIds;
3757
+ const currentTargetSet = new Set(currentTargets);
3758
+ const previousTargetSet = new Set(previousTargets);
3759
+ return {
3760
+ comparable: true,
3761
+ resolved: previousTargets.filter((target) => !currentTargetSet.has(target)),
3762
+ remaining: currentTargets.filter((target) => previousTargetSet.has(target)),
3763
+ introduced: currentTargets.filter((target) => !previousTargetSet.has(target))
3764
+ };
3765
+ }
3766
+ function diffTestStatusTargets(args) {
3767
+ return buildTargetDelta(args);
3768
+ }
3769
+ function diffTestStatusRuns(args) {
3770
+ const targetDelta = buildTargetDelta(args);
3771
+ const previousBuckets = new Map(
3772
+ args.previous.analysis.buckets.map((bucket) => [buildBucketSignature(bucket), bucket])
3773
+ );
3774
+ const currentBuckets = new Map(
3775
+ args.current.analysis.buckets.map((bucket) => [buildBucketSignature(bucket), bucket])
3776
+ );
3777
+ const lines = [];
3778
+ if (targetDelta.resolved.length > 0) {
3779
+ lines.push(
3780
+ `- Resolved: ${formatCount3(targetDelta.resolved.length, "failing test/module", "failing tests/modules")} no longer appear${appendPreview(targetDelta.resolved)}.`
3781
+ );
3782
+ }
3783
+ if (targetDelta.remaining.length > 0) {
3784
+ lines.push(
3785
+ `- Remaining: ${formatCount3(targetDelta.remaining.length, "failing test/module", "failing tests/modules")} still appear${appendPreview(targetDelta.remaining)}.`
3786
+ );
3787
+ }
3788
+ if (targetDelta.introduced.length > 0) {
3789
+ lines.push(
3790
+ `- New: ${formatCount3(targetDelta.introduced.length, "failing test/module", "failing tests/modules")} appeared${appendPreview(targetDelta.introduced)}.`
3791
+ );
3792
+ }
3793
+ for (const bucket of args.current.analysis.buckets) {
3794
+ const signature = buildBucketSignature(bucket);
3795
+ const previous = previousBuckets.get(signature);
3796
+ if (!previous) {
3797
+ continue;
3798
+ }
3799
+ const previousCount = getBucketCount(previous);
3800
+ const currentCount = getBucketCount(bucket);
3801
+ if (previousCount !== currentCount) {
3802
+ lines.push(`- Changed: ${bucket.headline} (${previousCount} -> ${currentCount}).`);
3803
+ }
3804
+ }
3805
+ if (lines.length === 0) {
3806
+ for (const bucket of args.previous.analysis.buckets) {
3807
+ const signature = buildBucketSignature(bucket);
3808
+ if (!currentBuckets.has(signature)) {
3809
+ lines.push(`- Resolved: ${bucket.headline} (${getBucketCount(bucket)}).`);
3810
+ }
3811
+ }
3812
+ for (const bucket of args.current.analysis.buckets) {
3813
+ const signature = buildBucketSignature(bucket);
3814
+ if (!previousBuckets.has(signature)) {
3815
+ lines.push(`- New: ${bucket.headline} (${getBucketCount(bucket)}).`);
3816
+ }
3817
+ }
3818
+ }
3819
+ return {
3820
+ lines: lines.slice(0, 4),
3821
+ remainingNodeIds: targetDelta.comparable ? targetDelta.remaining : void 0
3822
+ };
3823
+ }
3824
+
3825
+ // src/core/watch.ts
3826
+ var CLEAR_SCREEN_PATTERN = /\u001bc|\u001b\[2J(?:\u001b\[H)?/g;
3827
+ var SUMMARY_BOUNDARY_PATTERN = /^={5,}.*(?:passed|failed|errors?|no tests ran|interrupted).*={5,}\s*$/i;
3828
+ function normalizeWatchInput(input) {
3829
+ return input.replace(/\r\n/g, "\n");
3830
+ }
3831
+ function hasVisibleContent(input) {
3832
+ return input.split("\n").some((line) => line.trim().length > 0);
3833
+ }
3834
+ function splitBySummaryBoundaries(input) {
3835
+ const cycles = [];
3836
+ let current = [];
3837
+ for (const line of input.split("\n")) {
3838
+ current.push(line);
3839
+ if (SUMMARY_BOUNDARY_PATTERN.test(line.trim())) {
3840
+ const candidate = current.join("\n").trim();
3841
+ if (candidate.length > 0) {
3842
+ cycles.push(candidate);
3843
+ }
3844
+ current = [];
3845
+ }
3846
+ }
3847
+ const trailing = current.join("\n").trim();
3848
+ if (trailing.length > 0) {
3849
+ cycles.push(trailing);
3850
+ }
3851
+ return cycles;
3852
+ }
3853
+ function splitWatchCycles(input) {
3854
+ const normalized = normalizeWatchInput(input);
3855
+ const clearScreenChunks = normalized.split(CLEAR_SCREEN_PATTERN).map((chunk) => chunk.trim()).filter((chunk) => chunk.length > 0);
3856
+ if (clearScreenChunks.length > 1) {
3857
+ return clearScreenChunks;
3858
+ }
3859
+ const summaryChunks = splitBySummaryBoundaries(normalized);
3860
+ if (summaryChunks.length > 1) {
3861
+ return summaryChunks;
3862
+ }
3863
+ return hasVisibleContent(normalized) ? [normalized.trim()] : [];
3864
+ }
3865
+ function looksLikeWatchStream(input) {
3866
+ const normalized = normalizeWatchInput(input);
3867
+ if (/\u001bc|\u001b\[2J(?:\u001b\[H)?/.test(normalized)) {
3868
+ return splitWatchCycles(input).length > 1;
3869
+ }
3870
+ return /(watch(?:ing)?|waiting for file changes|rerunning|re-running)/i.test(normalized) && splitWatchCycles(input).length > 1;
3871
+ }
3872
+ function indentBlock(text) {
3873
+ return text.split("\n").map((line) => line.length > 0 ? ` ${line}` : line).join("\n");
3874
+ }
3875
+ async function runGenericWatch(request, cycles) {
3876
+ const rendered = [];
3877
+ let previousSummary = null;
3878
+ for (const [index, cycle] of cycles.entries()) {
3879
+ const currentSummary = await runSift({
3880
+ ...request,
3881
+ stdin: cycle
3882
+ });
3883
+ if (index === 0) {
3884
+ rendered.push(`- Cycle 1
3885
+ ${indentBlock(currentSummary)}`);
3886
+ previousSummary = currentSummary;
3887
+ continue;
3888
+ }
3889
+ const changeSummary = await runSift({
3890
+ ...request,
3891
+ goal: "summarize",
3892
+ format: "bullets",
3893
+ policyName: void 0,
3894
+ detail: void 0,
3895
+ outputContract: void 0,
3896
+ analysisContext: void 0,
3897
+ fallbackJson: void 0,
3898
+ question: "What changed since the previous cycle? Mention what resolved, what stayed, and the next best action.",
3899
+ stdin: [
3900
+ "Previous cycle summary:",
3901
+ previousSummary ?? "",
3902
+ "",
3903
+ "Current cycle summary:",
3904
+ currentSummary
3905
+ ].join("\n")
1365
3906
  });
3907
+ rendered.push(
3908
+ [`- Cycle ${index + 1}`, indentBlock(changeSummary), indentBlock(currentSummary)].join("\n")
3909
+ );
3910
+ previousSummary = currentSummary;
3911
+ }
3912
+ return rendered.join("\n\n");
3913
+ }
3914
+ async function runTestStatusWatch(request, cycles) {
3915
+ const rendered = [];
3916
+ const cyclePayloads = [];
3917
+ let previousRun = null;
3918
+ for (const [index, cycle] of cycles.entries()) {
3919
+ const analysis = analyzeTestStatus(cycle);
3920
+ let currentRun = createCachedTestStatusRun({
3921
+ cwd: process.cwd(),
3922
+ commandKey: `watch:${request.question}`,
3923
+ commandPreview: `watch:${request.question}`,
3924
+ detail: request.detail ?? "standard",
3925
+ exitCode: analysis.failed > 0 || analysis.errors > 0 || analysis.collectionErrorCount ? 1 : 0,
3926
+ rawOutput: cycle,
3927
+ originalChars: cycle.length,
3928
+ truncatedApplied: false,
3929
+ analysis
3930
+ });
3931
+ const targetDelta = previousRun === null ? null : diffTestStatusTargets({
3932
+ previous: previousRun,
3933
+ current: currentRun
3934
+ });
3935
+ const diffLines = previousRun === null ? [] : diffTestStatusRuns({
3936
+ previous: previousRun,
3937
+ current: currentRun
3938
+ }).lines;
3939
+ const output = await runSift({
3940
+ ...request,
3941
+ stdin: cycle,
3942
+ analysisContext: [
3943
+ request.analysisContext,
3944
+ "Watch context:",
3945
+ "- Treat this as a redraw/change cycle, not a fresh full-suite baseline.",
3946
+ ...previousRun === null ? [] : [
3947
+ "- Prefer what changed, what resolved, and what still remains.",
3948
+ "- Keep the current blocker and remaining failures in focus."
3949
+ ]
3950
+ ].join("\n"),
3951
+ testStatusContext: {
3952
+ ...request.testStatusContext,
3953
+ resolvedTests: targetDelta?.resolved ?? request.testStatusContext?.resolvedTests,
3954
+ remainingTests: targetDelta?.remaining ?? currentRun.pytest?.failingNodeIds ?? request.testStatusContext?.remainingTests,
3955
+ remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable ?? (Boolean(currentRun.pytest?.subsetCapable) && (currentRun.pytest?.failingNodeIds.length ?? 0) > 0)
3956
+ }
3957
+ });
3958
+ if (request.goal === "diagnose" && request.format === "json") {
3959
+ cyclePayloads.push({
3960
+ cycle: index + 1,
3961
+ diagnosis: JSON.parse(output),
3962
+ changes: diffLines
3963
+ });
3964
+ } else {
3965
+ const block = [`- Cycle ${index + 1}`];
3966
+ if (diffLines.length > 0) {
3967
+ block.push(...diffLines.map((line) => ` ${line}`));
3968
+ }
3969
+ block.push(indentBlock(output));
3970
+ rendered.push(block.join("\n"));
3971
+ }
3972
+ previousRun = currentRun;
3973
+ }
3974
+ if (request.goal === "diagnose" && request.format === "json") {
3975
+ const lastDiagnosis = cyclePayloads.at(-1)?.diagnosis;
3976
+ return JSON.stringify(
3977
+ {
3978
+ status: cyclePayloads.some(
3979
+ (payload) => typeof payload.diagnosis === "object" && payload.diagnosis !== null && "status" in payload.diagnosis && payload.diagnosis.status === "insufficient"
3980
+ ) ? "insufficient" : "ok",
3981
+ cycles: cyclePayloads,
3982
+ next_best_action: lastDiagnosis?.next_best_action ?? null
3983
+ },
3984
+ null,
3985
+ 2
3986
+ );
3987
+ }
3988
+ return rendered.join("\n\n");
3989
+ }
3990
+ async function runWatch(request) {
3991
+ const cycles = splitWatchCycles(request.stdin);
3992
+ if (cycles.length <= 1) {
3993
+ return runSift(request);
3994
+ }
3995
+ if (request.goal === "diagnose" && request.format === "json" && request.policyName !== "test-status") {
3996
+ throw new Error(
3997
+ "`--goal diagnose --format json` is currently supported only for `test-status` watch flows."
3998
+ );
1366
3999
  }
4000
+ if (request.policyName === "test-status") {
4001
+ return runTestStatusWatch(request, cycles);
4002
+ }
4003
+ return runGenericWatch(request, cycles);
1367
4004
  }
1368
4005
 
1369
4006
  // src/core/exec.ts
@@ -1460,9 +4097,13 @@ async function runExec(request) {
1460
4097
  throw new Error("Provide either --shell <command> or -- <program> [args...].");
1461
4098
  }
1462
4099
  const shellPath = process.env.SHELL || "/bin/bash";
4100
+ const commandPreview = buildCommandPreview(request);
4101
+ const commandCwd = request.cwd ?? process.cwd();
4102
+ const shouldCacheTestStatusBase = request.presetName === "test-status" && !request.skipCacheWrite;
4103
+ const previousCachedRun = shouldCacheTestStatusBase ? tryReadCachedTestStatusRun() : null;
1463
4104
  if (request.config.runtime.verbose) {
1464
4105
  process.stderr.write(
1465
- `${pc2.dim("sift")} exec mode=${hasShellCommand ? "shell" : "argv"} command=${buildCommandPreview(request)}
4106
+ `${pc2.dim("sift")} exec mode=${hasShellCommand ? "shell" : "argv"} command=${commandPreview}
1466
4107
  `
1467
4108
  );
1468
4109
  }
@@ -1472,8 +4113,10 @@ async function runExec(request) {
1472
4113
  let childStatus = null;
1473
4114
  let childSignal = null;
1474
4115
  const child = hasShellCommand ? spawn(shellPath, ["-lc", request.shellCommand], {
4116
+ cwd: commandCwd,
1475
4117
  stdio: ["inherit", "pipe", "pipe"]
1476
4118
  }) : spawn(request.command[0], request.command.slice(1), {
4119
+ cwd: commandCwd,
1477
4120
  stdio: ["inherit", "pipe", "pipe"]
1478
4121
  });
1479
4122
  const handleChunk = (chunk) => {
@@ -1513,12 +4156,19 @@ async function runExec(request) {
1513
4156
  });
1514
4157
  const exitCode = normalizeChildExitCode(childStatus, childSignal);
1515
4158
  const capturedOutput = capture.render();
4159
+ const autoWatchDetected = !request.watch && looksLikeWatchStream(capturedOutput);
4160
+ const useWatchFlow = Boolean(request.watch) || autoWatchDetected;
4161
+ const shouldCacheTestStatus = shouldCacheTestStatusBase && !useWatchFlow;
1516
4162
  if (request.config.runtime.verbose) {
1517
4163
  process.stderr.write(
1518
4164
  `${pc2.dim("sift")} child_exit=${exitCode} captured_chars=${capture.getTotalChars()} capture_truncated=${capture.wasTruncated()}
1519
4165
  `
1520
4166
  );
1521
4167
  }
4168
+ if (autoWatchDetected) {
4169
+ process.stderr.write(`${pc2.dim("sift")} auto-watch=detected
4170
+ `);
4171
+ }
1522
4172
  if (!bypassed) {
1523
4173
  if (request.showRaw && capturedOutput.length > 0) {
1524
4174
  process.stderr.write(capturedOutput);
@@ -1526,7 +4176,7 @@ async function runExec(request) {
1526
4176
  process.stderr.write("\n");
1527
4177
  }
1528
4178
  }
1529
- const execSuccessShortcut = getExecSuccessShortcut({
4179
+ const execSuccessShortcut = useWatchFlow ? null : getExecSuccessShortcut({
1530
4180
  presetName: request.presetName,
1531
4181
  exitCode,
1532
4182
  capturedOutput
@@ -1542,11 +4192,108 @@ async function runExec(request) {
1542
4192
  `);
1543
4193
  return exitCode;
1544
4194
  }
4195
+ if (useWatchFlow) {
4196
+ let output2 = await runWatch({
4197
+ ...request,
4198
+ stdin: capturedOutput
4199
+ });
4200
+ if (isInsufficientSignalOutput(output2)) {
4201
+ output2 = buildInsufficientSignalOutput({
4202
+ presetName: request.presetName,
4203
+ originalLength: capture.getTotalChars(),
4204
+ truncatedApplied: capture.wasTruncated(),
4205
+ exitCode
4206
+ });
4207
+ }
4208
+ process.stdout.write(`${output2}
4209
+ `);
4210
+ return exitCode;
4211
+ }
4212
+ const analysis = shouldCacheTestStatus ? analyzeTestStatus(capturedOutput) : null;
4213
+ let currentCachedRun = shouldCacheTestStatus && analysis ? createCachedTestStatusRun({
4214
+ cwd: commandCwd,
4215
+ commandKey: buildTestStatusCommandKey({
4216
+ commandPreview,
4217
+ shellCommand: request.shellCommand
4218
+ }),
4219
+ commandPreview,
4220
+ command: request.command,
4221
+ shellCommand: request.shellCommand,
4222
+ detail: request.detail ?? "standard",
4223
+ exitCode,
4224
+ rawOutput: capturedOutput,
4225
+ originalChars: capture.getTotalChars(),
4226
+ truncatedApplied: capture.wasTruncated(),
4227
+ analysis
4228
+ }) : null;
4229
+ const targetDelta = request.diff && !request.dryRun && previousCachedRun && currentCachedRun ? diffTestStatusTargets({
4230
+ previous: previousCachedRun,
4231
+ current: currentCachedRun
4232
+ }) : null;
1545
4233
  let output = await runSift({
1546
4234
  ...request,
1547
- stdin: capturedOutput
4235
+ stdin: capturedOutput,
4236
+ analysisContext: request.skipCacheWrite && request.presetName === "test-status" ? [
4237
+ request.analysisContext,
4238
+ "Zoom context:",
4239
+ "- This pass is remaining-only.",
4240
+ "- The full-suite truth already exists from the cached full run.",
4241
+ "- Do not reintroduce resolved tests into the diagnosis."
4242
+ ].filter((value) => Boolean(value)).join("\n") : request.analysisContext,
4243
+ testStatusContext: shouldCacheTestStatus && analysis ? {
4244
+ ...request.testStatusContext,
4245
+ resolvedTests: targetDelta?.resolved ?? request.testStatusContext?.resolvedTests,
4246
+ remainingTests: targetDelta?.remaining ?? currentCachedRun?.pytest?.failingNodeIds ?? request.testStatusContext?.remainingTests,
4247
+ remainingSubsetAvailable: request.testStatusContext?.remainingSubsetAvailable ?? Boolean(
4248
+ currentCachedRun?.pytest?.subsetCapable && (targetDelta?.remaining ?? currentCachedRun?.pytest?.failingNodeIds ?? []).length > 0
4249
+ )
4250
+ } : request.testStatusContext
1548
4251
  });
1549
- if (isInsufficientSignalOutput(output)) {
4252
+ if (shouldCacheTestStatus) {
4253
+ if (isInsufficientSignalOutput(output)) {
4254
+ output = buildInsufficientSignalOutput({
4255
+ presetName: request.presetName,
4256
+ originalLength: capture.getTotalChars(),
4257
+ truncatedApplied: capture.wasTruncated(),
4258
+ exitCode
4259
+ });
4260
+ }
4261
+ if (request.diff && !request.dryRun && previousCachedRun && currentCachedRun) {
4262
+ const delta = diffTestStatusRuns({
4263
+ previous: previousCachedRun,
4264
+ current: currentCachedRun
4265
+ });
4266
+ currentCachedRun = createCachedTestStatusRun({
4267
+ cwd: commandCwd,
4268
+ commandKey: currentCachedRun.commandKey,
4269
+ commandPreview,
4270
+ command: request.command,
4271
+ shellCommand: request.shellCommand,
4272
+ detail: request.detail ?? "standard",
4273
+ exitCode,
4274
+ rawOutput: capturedOutput,
4275
+ originalChars: capture.getTotalChars(),
4276
+ truncatedApplied: capture.wasTruncated(),
4277
+ analysis,
4278
+ remainingNodeIds: delta.remainingNodeIds
4279
+ });
4280
+ if (delta.lines.length > 0) {
4281
+ output = `${delta.lines.join("\n")}
4282
+ ${output}`;
4283
+ }
4284
+ }
4285
+ if (currentCachedRun) {
4286
+ try {
4287
+ writeCachedTestStatusRun(currentCachedRun);
4288
+ } catch (error) {
4289
+ if (request.config.runtime.verbose) {
4290
+ const reason = error instanceof Error ? error.message : "unknown_error";
4291
+ process.stderr.write(`${pc2.dim("sift")} cache_write=failed reason=${reason}
4292
+ `);
4293
+ }
4294
+ }
4295
+ }
4296
+ } else if (isInsufficientSignalOutput(output)) {
1550
4297
  output = buildInsufficientSignalOutput({
1551
4298
  presetName: request.presetName,
1552
4299
  originalLength: capture.getTotalChars(),
@@ -1638,19 +4385,19 @@ var defaultConfig = {
1638
4385
  };
1639
4386
 
1640
4387
  // src/config/load.ts
1641
- import fs from "fs";
1642
- import path2 from "path";
4388
+ import fs2 from "fs";
4389
+ import path3 from "path";
1643
4390
  import YAML from "yaml";
1644
4391
  function findConfigPath(explicitPath) {
1645
4392
  if (explicitPath) {
1646
- const resolved = path2.resolve(explicitPath);
1647
- if (!fs.existsSync(resolved)) {
4393
+ const resolved = path3.resolve(explicitPath);
4394
+ if (!fs2.existsSync(resolved)) {
1648
4395
  throw new Error(`Config file not found: ${resolved}`);
1649
4396
  }
1650
4397
  return resolved;
1651
4398
  }
1652
4399
  for (const candidate of getDefaultConfigSearchPaths()) {
1653
- if (fs.existsSync(candidate)) {
4400
+ if (fs2.existsSync(candidate)) {
1654
4401
  return candidate;
1655
4402
  }
1656
4403
  }
@@ -1661,7 +4408,7 @@ function loadRawConfig(explicitPath) {
1661
4408
  if (!configPath) {
1662
4409
  return {};
1663
4410
  }
1664
- const content = fs.readFileSync(configPath, "utf8");
4411
+ const content = fs2.readFileSync(configPath, "utf8");
1665
4412
  return YAML.parse(content) ?? {};
1666
4413
  }
1667
4414
 
@@ -1715,17 +4462,17 @@ function resolveProviderApiKey(provider, baseUrl, env) {
1715
4462
  }
1716
4463
 
1717
4464
  // src/config/schema.ts
1718
- import { z } from "zod";
1719
- var providerNameSchema = z.enum(["openai", "openai-compatible"]);
1720
- var outputFormatSchema = z.enum([
4465
+ import { z as z3 } from "zod";
4466
+ var providerNameSchema = z3.enum(["openai", "openai-compatible"]);
4467
+ var outputFormatSchema = z3.enum([
1721
4468
  "brief",
1722
4469
  "bullets",
1723
4470
  "json",
1724
4471
  "verdict"
1725
4472
  ]);
1726
- var responseModeSchema = z.enum(["text", "json"]);
1727
- var jsonResponseFormatModeSchema = z.enum(["auto", "on", "off"]);
1728
- var promptPolicyNameSchema = z.enum([
4473
+ var responseModeSchema = z3.enum(["text", "json"]);
4474
+ var jsonResponseFormatModeSchema = z3.enum(["auto", "on", "off"]);
4475
+ var promptPolicyNameSchema = z3.enum([
1729
4476
  "test-status",
1730
4477
  "audit-critical",
1731
4478
  "diff-summary",
@@ -1735,41 +4482,41 @@ var promptPolicyNameSchema = z.enum([
1735
4482
  "typecheck-summary",
1736
4483
  "lint-failures"
1737
4484
  ]);
1738
- var providerConfigSchema = z.object({
4485
+ var providerConfigSchema = z3.object({
1739
4486
  provider: providerNameSchema,
1740
- model: z.string().min(1),
1741
- baseUrl: z.string().url(),
1742
- apiKey: z.string().optional(),
4487
+ model: z3.string().min(1),
4488
+ baseUrl: z3.string().url(),
4489
+ apiKey: z3.string().optional(),
1743
4490
  jsonResponseFormat: jsonResponseFormatModeSchema,
1744
- timeoutMs: z.number().int().positive(),
1745
- temperature: z.number().min(0).max(2),
1746
- maxOutputTokens: z.number().int().positive()
4491
+ timeoutMs: z3.number().int().positive(),
4492
+ temperature: z3.number().min(0).max(2),
4493
+ maxOutputTokens: z3.number().int().positive()
1747
4494
  });
1748
- var inputConfigSchema = z.object({
1749
- stripAnsi: z.boolean(),
1750
- redact: z.boolean(),
1751
- redactStrict: z.boolean(),
1752
- maxCaptureChars: z.number().int().positive(),
1753
- maxInputChars: z.number().int().positive(),
1754
- headChars: z.number().int().positive(),
1755
- tailChars: z.number().int().positive()
4495
+ var inputConfigSchema = z3.object({
4496
+ stripAnsi: z3.boolean(),
4497
+ redact: z3.boolean(),
4498
+ redactStrict: z3.boolean(),
4499
+ maxCaptureChars: z3.number().int().positive(),
4500
+ maxInputChars: z3.number().int().positive(),
4501
+ headChars: z3.number().int().positive(),
4502
+ tailChars: z3.number().int().positive()
1756
4503
  });
1757
- var runtimeConfigSchema = z.object({
1758
- rawFallback: z.boolean(),
1759
- verbose: z.boolean()
4504
+ var runtimeConfigSchema = z3.object({
4505
+ rawFallback: z3.boolean(),
4506
+ verbose: z3.boolean()
1760
4507
  });
1761
- var presetDefinitionSchema = z.object({
1762
- question: z.string().min(1),
4508
+ var presetDefinitionSchema = z3.object({
4509
+ question: z3.string().min(1),
1763
4510
  format: outputFormatSchema,
1764
4511
  policy: promptPolicyNameSchema.optional(),
1765
- outputContract: z.string().optional(),
1766
- fallbackJson: z.unknown().optional()
4512
+ outputContract: z3.string().optional(),
4513
+ fallbackJson: z3.unknown().optional()
1767
4514
  });
1768
- var siftConfigSchema = z.object({
4515
+ var siftConfigSchema = z3.object({
1769
4516
  provider: providerConfigSchema,
1770
4517
  input: inputConfigSchema,
1771
4518
  runtime: runtimeConfigSchema,
1772
- presets: z.record(presetDefinitionSchema)
4519
+ presets: z3.record(presetDefinitionSchema)
1773
4520
  });
1774
4521
 
1775
4522
  // src/config/resolve.ts