@bilalimamoglu/sift 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -1
- package/dist/cli.js +888 -114
- package/dist/index.d.ts +11 -2
- package/dist/index.js +533 -44
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -62,7 +62,7 @@ function evaluateGate(args) {
|
|
|
62
62
|
// src/core/testStatusDecision.ts
|
|
63
63
|
import { z } from "zod";
|
|
64
64
|
var TEST_STATUS_DIAGNOSE_JSON_CONTRACT = '{"status":"ok|insufficient","diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","dominant_blocker_bucket_index":number|null,"provider_used":boolean,"provider_confidence":number|null,"provider_failed":boolean,"raw_slice_used":boolean,"raw_slice_strategy":"none|bucket_evidence|traceback_window|head_tail","resolved_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_summary":{"count":number,"families":[{"prefix":string,"count":number}]},"remaining_subset_available":boolean,"main_buckets":[{"bucket_index":number,"label":string,"count":number,"root_cause":string,"evidence":string[],"bucket_confidence":number,"root_cause_confidence":number,"dominant":boolean,"secondary_visible_despite_blocker":boolean,"mini_diff":{"added_paths"?:number,"removed_models"?:number,"changed_task_mappings"?:number}|null}],"read_targets":[{"file":string,"line":number|null,"why":string,"bucket_index":number,"context_hint":{"start_line":number|null,"end_line":number|null,"search_hint":string|null}}],"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string},"resolved_tests"?:string[],"remaining_tests"?:string[]}';
|
|
65
|
-
var TEST_STATUS_PROVIDER_SUPPLEMENT_JSON_CONTRACT = '{"diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","provider_confidence":number|null,"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string}}';
|
|
65
|
+
var TEST_STATUS_PROVIDER_SUPPLEMENT_JSON_CONTRACT = '{"diagnosis_complete":boolean,"raw_needed":boolean,"additional_source_read_likely_low_value":boolean,"read_raw_only_if":string|null,"decision":"stop|zoom|read_source|read_raw","provider_confidence":number|null,"bucket_supplements":[{"label":string,"count":number,"root_cause":string,"anchor":{"file":string|null,"line":number|null,"search_hint":string|null},"fix_hint":string|null,"confidence":number}],"next_best_action":{"code":"fix_dominant_blocker|read_source_for_bucket|read_raw_for_exact_traceback|insufficient_signal","bucket_index":number|null,"note":string}}';
|
|
66
66
|
var nextBestActionSchema = z.object({
|
|
67
67
|
code: z.enum([
|
|
68
68
|
"fix_dominant_blocker",
|
|
@@ -80,6 +80,20 @@ var testStatusProviderSupplementSchema = z.object({
|
|
|
80
80
|
read_raw_only_if: z.string().nullable(),
|
|
81
81
|
decision: z.enum(["stop", "zoom", "read_source", "read_raw"]),
|
|
82
82
|
provider_confidence: z.number().min(0).max(1).nullable(),
|
|
83
|
+
bucket_supplements: z.array(
|
|
84
|
+
z.object({
|
|
85
|
+
label: z.string().min(1),
|
|
86
|
+
count: z.number().int().positive(),
|
|
87
|
+
root_cause: z.string().min(1),
|
|
88
|
+
anchor: z.object({
|
|
89
|
+
file: z.string().nullable(),
|
|
90
|
+
line: z.number().int().nullable(),
|
|
91
|
+
search_hint: z.string().nullable()
|
|
92
|
+
}),
|
|
93
|
+
fix_hint: z.string().nullable(),
|
|
94
|
+
confidence: z.number().min(0).max(1)
|
|
95
|
+
})
|
|
96
|
+
).max(2),
|
|
83
97
|
next_best_action: nextBestActionSchema
|
|
84
98
|
});
|
|
85
99
|
var testStatusDiagnoseContractSchema = z.object({
|
|
@@ -231,14 +245,73 @@ function classifyGenericBucketType(reason) {
|
|
|
231
245
|
}
|
|
232
246
|
return "unknown_failure";
|
|
233
247
|
}
|
|
248
|
+
function isUnknownBucket(bucket) {
|
|
249
|
+
return bucket.source === "unknown" || bucket.reason.startsWith("unknown ");
|
|
250
|
+
}
|
|
251
|
+
function classifyVisibleStatusForLabel(args) {
|
|
252
|
+
const isError = args.errorLabels.has(args.label);
|
|
253
|
+
const isFailed = args.failedLabels.has(args.label);
|
|
254
|
+
if (isError && isFailed) {
|
|
255
|
+
return "mixed";
|
|
256
|
+
}
|
|
257
|
+
if (isError) {
|
|
258
|
+
return "error";
|
|
259
|
+
}
|
|
260
|
+
if (isFailed) {
|
|
261
|
+
return "failed";
|
|
262
|
+
}
|
|
263
|
+
return "unknown";
|
|
264
|
+
}
|
|
265
|
+
function inferCoverageFromReason(reason) {
|
|
266
|
+
if (reason.startsWith("missing test env:") || reason.startsWith("fixture guard:") || reason.startsWith("service unavailable:") || reason.startsWith("db refused:") || reason.startsWith("auth bypass absent:") || reason.startsWith("missing module:")) {
|
|
267
|
+
return "error";
|
|
268
|
+
}
|
|
269
|
+
if (reason.startsWith("assertion failed:")) {
|
|
270
|
+
return "failed";
|
|
271
|
+
}
|
|
272
|
+
return "mixed";
|
|
273
|
+
}
|
|
274
|
+
function buildCoverageCounts(args) {
|
|
275
|
+
if (args.coverageKind === "error") {
|
|
276
|
+
return {
|
|
277
|
+
error: args.count,
|
|
278
|
+
failed: 0
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
if (args.coverageKind === "failed") {
|
|
282
|
+
return {
|
|
283
|
+
error: 0,
|
|
284
|
+
failed: args.count
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
return {
|
|
288
|
+
error: 0,
|
|
289
|
+
failed: 0
|
|
290
|
+
};
|
|
291
|
+
}
|
|
234
292
|
function buildGenericBuckets(analysis) {
|
|
235
293
|
const buckets = [];
|
|
236
294
|
const grouped = /* @__PURE__ */ new Map();
|
|
295
|
+
const errorLabels = new Set(analysis.visibleErrorLabels);
|
|
296
|
+
const failedLabels = new Set(analysis.visibleFailedLabels);
|
|
237
297
|
const push = (reason, item) => {
|
|
238
|
-
const
|
|
298
|
+
const coverageKind = (() => {
|
|
299
|
+
const status = classifyVisibleStatusForLabel({
|
|
300
|
+
label: item.label,
|
|
301
|
+
errorLabels,
|
|
302
|
+
failedLabels
|
|
303
|
+
});
|
|
304
|
+
return status === "unknown" ? inferCoverageFromReason(reason) : status;
|
|
305
|
+
})();
|
|
306
|
+
const key = `${classifyGenericBucketType(reason)}:${coverageKind}:${reason}`;
|
|
239
307
|
const existing = grouped.get(key);
|
|
240
308
|
if (existing) {
|
|
241
309
|
existing.count += 1;
|
|
310
|
+
if (coverageKind === "error") {
|
|
311
|
+
existing.coverage.error += 1;
|
|
312
|
+
} else if (coverageKind === "failed") {
|
|
313
|
+
existing.coverage.failed += 1;
|
|
314
|
+
}
|
|
242
315
|
if (!existing.representativeItems.some((entry) => entry.label === item.label) && existing.representativeItems.length < 6) {
|
|
243
316
|
existing.representativeItems.push(item);
|
|
244
317
|
}
|
|
@@ -255,7 +328,12 @@ function buildGenericBuckets(analysis) {
|
|
|
255
328
|
entities: [],
|
|
256
329
|
hint: void 0,
|
|
257
330
|
overflowCount: 0,
|
|
258
|
-
overflowLabel: "failing tests/modules"
|
|
331
|
+
overflowLabel: "failing tests/modules",
|
|
332
|
+
coverage: buildCoverageCounts({
|
|
333
|
+
count: 1,
|
|
334
|
+
coverageKind
|
|
335
|
+
}),
|
|
336
|
+
source: "heuristic"
|
|
259
337
|
});
|
|
260
338
|
};
|
|
261
339
|
for (const item of [...analysis.collectionItems, ...analysis.inlineItems]) {
|
|
@@ -308,10 +386,51 @@ function mergeBucketDetails(existing, incoming) {
|
|
|
308
386
|
incoming.overflowCount,
|
|
309
387
|
count - representativeItems.length
|
|
310
388
|
),
|
|
311
|
-
overflowLabel: existing.overflowLabel || incoming.overflowLabel
|
|
389
|
+
overflowLabel: existing.overflowLabel || incoming.overflowLabel,
|
|
390
|
+
labelOverride: existing.labelOverride ?? incoming.labelOverride,
|
|
391
|
+
coverage: {
|
|
392
|
+
error: Math.max(existing.coverage.error, incoming.coverage.error),
|
|
393
|
+
failed: Math.max(existing.coverage.failed, incoming.coverage.failed)
|
|
394
|
+
},
|
|
395
|
+
source: existing.source
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
function inferFailureBucketCoverage(bucket, analysis) {
|
|
399
|
+
const errorLabels = new Set(analysis.visibleErrorLabels);
|
|
400
|
+
const failedLabels = new Set(analysis.visibleFailedLabels);
|
|
401
|
+
let error = 0;
|
|
402
|
+
let failed = 0;
|
|
403
|
+
for (const item of bucket.representativeItems) {
|
|
404
|
+
const status = classifyVisibleStatusForLabel({
|
|
405
|
+
label: item.label,
|
|
406
|
+
errorLabels,
|
|
407
|
+
failedLabels
|
|
408
|
+
});
|
|
409
|
+
if (status === "error") {
|
|
410
|
+
error += 1;
|
|
411
|
+
} else if (status === "failed") {
|
|
412
|
+
failed += 1;
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
const claimed = bucket.countClaimed ?? bucket.countVisible;
|
|
416
|
+
if (bucket.type === "contract_snapshot_drift" || bucket.type === "assertion_failure") {
|
|
417
|
+
return {
|
|
418
|
+
error,
|
|
419
|
+
failed: Math.max(failed, claimed)
|
|
420
|
+
};
|
|
421
|
+
}
|
|
422
|
+
if (bucket.type === "shared_environment_blocker" || bucket.type === "import_dependency_failure" || bucket.type === "collection_failure" || bucket.type === "fixture_guard_failure" || bucket.type === "service_unavailable" || bucket.type === "db_connection_failure" || bucket.type === "auth_bypass_absent") {
|
|
423
|
+
return {
|
|
424
|
+
error: Math.max(error, claimed),
|
|
425
|
+
failed
|
|
426
|
+
};
|
|
427
|
+
}
|
|
428
|
+
return {
|
|
429
|
+
error,
|
|
430
|
+
failed
|
|
312
431
|
};
|
|
313
432
|
}
|
|
314
|
-
function mergeBuckets(analysis) {
|
|
433
|
+
function mergeBuckets(analysis, extraBuckets = []) {
|
|
315
434
|
const mergedByIdentity = /* @__PURE__ */ new Map();
|
|
316
435
|
const merged = [];
|
|
317
436
|
const pushBucket = (bucket) => {
|
|
@@ -340,7 +459,9 @@ function mergeBuckets(analysis) {
|
|
|
340
459
|
entities: [...bucket2.entities],
|
|
341
460
|
hint: bucket2.hint,
|
|
342
461
|
overflowCount: bucket2.overflowCount,
|
|
343
|
-
overflowLabel: bucket2.overflowLabel
|
|
462
|
+
overflowLabel: bucket2.overflowLabel,
|
|
463
|
+
coverage: inferFailureBucketCoverage(bucket2, analysis),
|
|
464
|
+
source: "heuristic"
|
|
344
465
|
}))) {
|
|
345
466
|
pushBucket(bucket);
|
|
346
467
|
}
|
|
@@ -364,6 +485,9 @@ function mergeBuckets(analysis) {
|
|
|
364
485
|
coveredLabels.add(item.label);
|
|
365
486
|
}
|
|
366
487
|
}
|
|
488
|
+
for (const bucket of extraBuckets) {
|
|
489
|
+
pushBucket(bucket);
|
|
490
|
+
}
|
|
367
491
|
return merged;
|
|
368
492
|
}
|
|
369
493
|
function dominantBucketPriority(bucket) {
|
|
@@ -379,6 +503,9 @@ function dominantBucketPriority(bucket) {
|
|
|
379
503
|
if (bucket.type === "collection_failure") {
|
|
380
504
|
return 2;
|
|
381
505
|
}
|
|
506
|
+
if (isUnknownBucket(bucket)) {
|
|
507
|
+
return 2;
|
|
508
|
+
}
|
|
382
509
|
if (bucket.type === "contract_snapshot_drift") {
|
|
383
510
|
return 1;
|
|
384
511
|
}
|
|
@@ -403,6 +530,9 @@ function isDominantBlockerType(type) {
|
|
|
403
530
|
return type === "shared_environment_blocker" || type === "import_dependency_failure" || type === "collection_failure";
|
|
404
531
|
}
|
|
405
532
|
function labelForBucket(bucket) {
|
|
533
|
+
if (bucket.labelOverride) {
|
|
534
|
+
return bucket.labelOverride;
|
|
535
|
+
}
|
|
406
536
|
if (bucket.reason.startsWith("missing test env:")) {
|
|
407
537
|
return "missing test env";
|
|
408
538
|
}
|
|
@@ -442,15 +572,27 @@ function labelForBucket(bucket) {
|
|
|
442
572
|
if (bucket.type === "runtime_failure") {
|
|
443
573
|
return "runtime failure";
|
|
444
574
|
}
|
|
575
|
+
if (bucket.reason.startsWith("unknown setup blocker:")) {
|
|
576
|
+
return "unknown setup blocker";
|
|
577
|
+
}
|
|
578
|
+
if (bucket.reason.startsWith("unknown failure family:")) {
|
|
579
|
+
return "unknown failure family";
|
|
580
|
+
}
|
|
445
581
|
return "unknown failure";
|
|
446
582
|
}
|
|
447
583
|
function rootCauseConfidenceFor(bucket) {
|
|
584
|
+
if (isUnknownBucket(bucket)) {
|
|
585
|
+
return 0.52;
|
|
586
|
+
}
|
|
448
587
|
if (bucket.reason.startsWith("missing test env:") || bucket.reason.startsWith("missing module:") || bucket.reason.startsWith("db refused:") || bucket.reason.startsWith("service unavailable:") || bucket.reason.startsWith("auth bypass absent:")) {
|
|
449
588
|
return 0.95;
|
|
450
589
|
}
|
|
451
590
|
if (bucket.type === "contract_snapshot_drift") {
|
|
452
591
|
return bucket.entities.length > 0 ? 0.92 : 0.76;
|
|
453
592
|
}
|
|
593
|
+
if (bucket.source === "provider") {
|
|
594
|
+
return Math.max(0.6, Math.min(bucket.confidence, 0.82));
|
|
595
|
+
}
|
|
454
596
|
return Math.max(0.6, Math.min(bucket.confidence, 0.88));
|
|
455
597
|
}
|
|
456
598
|
function buildBucketEvidence(bucket) {
|
|
@@ -494,6 +636,12 @@ function buildReadTargetWhy(args) {
|
|
|
494
636
|
if (args.bucket.reason.startsWith("auth bypass absent:")) {
|
|
495
637
|
return "it contains the auth bypass setup behind this bucket";
|
|
496
638
|
}
|
|
639
|
+
if (args.bucket.reason.startsWith("unknown setup blocker:")) {
|
|
640
|
+
return "it is the first anchored setup failure in this unknown bucket";
|
|
641
|
+
}
|
|
642
|
+
if (args.bucket.reason.startsWith("unknown failure family:")) {
|
|
643
|
+
return "it is the first anchored failing test in this unknown bucket";
|
|
644
|
+
}
|
|
497
645
|
if (args.bucket.type === "contract_snapshot_drift") {
|
|
498
646
|
if (args.bucketLabel === "route drift") {
|
|
499
647
|
return "it maps to the visible route drift bucket";
|
|
@@ -543,6 +691,9 @@ function buildReadTargetSearchHint(bucket, anchor) {
|
|
|
543
691
|
if (assertionText) {
|
|
544
692
|
return assertionText;
|
|
545
693
|
}
|
|
694
|
+
if (bucket.reason.startsWith("unknown ")) {
|
|
695
|
+
return anchor.reason;
|
|
696
|
+
}
|
|
546
697
|
const fallbackLabel = anchor.label.split("::")[1]?.trim();
|
|
547
698
|
return fallbackLabel || null;
|
|
548
699
|
}
|
|
@@ -602,6 +753,12 @@ function buildConcreteNextNote(args) {
|
|
|
602
753
|
if (args.nextBestAction.code === "read_source_for_bucket") {
|
|
603
754
|
return lead;
|
|
604
755
|
}
|
|
756
|
+
if (args.nextBestAction.code === "insufficient_signal") {
|
|
757
|
+
if (args.nextBestAction.note.startsWith("Provider follow-up failed")) {
|
|
758
|
+
return args.nextBestAction.note;
|
|
759
|
+
}
|
|
760
|
+
return `${lead} Then take one deeper sift pass before raw traceback.`;
|
|
761
|
+
}
|
|
605
762
|
return args.nextBestAction.note;
|
|
606
763
|
}
|
|
607
764
|
function extractMiniDiff(input, bucket) {
|
|
@@ -626,6 +783,152 @@ function extractMiniDiff(input, bucket) {
|
|
|
626
783
|
...changedTaskMappings > 0 ? { changed_task_mappings: changedTaskMappings } : {}
|
|
627
784
|
};
|
|
628
785
|
}
|
|
786
|
+
function inferSupplementCoverageKind(args) {
|
|
787
|
+
const normalized = `${args.label} ${args.rootCause}`.toLowerCase();
|
|
788
|
+
if (/env|setup|fixture|import|dependency|service|db|database|auth bypass|collection|connection refused/.test(
|
|
789
|
+
normalized
|
|
790
|
+
)) {
|
|
791
|
+
return "error";
|
|
792
|
+
}
|
|
793
|
+
if (/snapshot|contract|drift|assertion|expected|actual|golden/.test(normalized)) {
|
|
794
|
+
return "failed";
|
|
795
|
+
}
|
|
796
|
+
if (args.remainingErrors > 0 && args.remainingFailed === 0) {
|
|
797
|
+
return "error";
|
|
798
|
+
}
|
|
799
|
+
return "failed";
|
|
800
|
+
}
|
|
801
|
+
function buildProviderSupplementBuckets(args) {
|
|
802
|
+
let remainingErrors = args.remainingErrors;
|
|
803
|
+
let remainingFailed = args.remainingFailed;
|
|
804
|
+
return args.supplements.flatMap((supplement) => {
|
|
805
|
+
const coverageKind = inferSupplementCoverageKind({
|
|
806
|
+
label: supplement.label,
|
|
807
|
+
rootCause: supplement.root_cause,
|
|
808
|
+
remainingErrors,
|
|
809
|
+
remainingFailed
|
|
810
|
+
});
|
|
811
|
+
const budget = coverageKind === "error" ? remainingErrors : remainingFailed;
|
|
812
|
+
const count = Math.max(0, Math.min(supplement.count, budget));
|
|
813
|
+
if (count === 0) {
|
|
814
|
+
return [];
|
|
815
|
+
}
|
|
816
|
+
if (coverageKind === "error") {
|
|
817
|
+
remainingErrors -= count;
|
|
818
|
+
} else {
|
|
819
|
+
remainingFailed -= count;
|
|
820
|
+
}
|
|
821
|
+
const representativeLabel = supplement.anchor.file ?? `${supplement.label} supplement`;
|
|
822
|
+
const representativeItem = {
|
|
823
|
+
label: representativeLabel,
|
|
824
|
+
reason: supplement.root_cause,
|
|
825
|
+
group: supplement.label,
|
|
826
|
+
file: supplement.anchor.file,
|
|
827
|
+
line: supplement.anchor.line,
|
|
828
|
+
anchor_kind: supplement.anchor.file && supplement.anchor.line !== null ? "traceback" : supplement.anchor.file ? "test_label" : supplement.anchor.search_hint ? "entity" : "none",
|
|
829
|
+
anchor_confidence: Math.max(0.4, Math.min(supplement.confidence, 0.82))
|
|
830
|
+
};
|
|
831
|
+
return [
|
|
832
|
+
{
|
|
833
|
+
type: classifyGenericBucketType(supplement.root_cause),
|
|
834
|
+
headline: `${supplement.label}: ${formatCount(count, "visible failure")} share ${supplement.root_cause}.`,
|
|
835
|
+
summaryLines: [
|
|
836
|
+
`${supplement.label}: ${formatCount(count, "visible failure")} share ${supplement.root_cause}.`
|
|
837
|
+
],
|
|
838
|
+
reason: supplement.root_cause,
|
|
839
|
+
count,
|
|
840
|
+
confidence: Math.max(0.4, Math.min(supplement.confidence, 0.82)),
|
|
841
|
+
representativeItems: [representativeItem],
|
|
842
|
+
entities: supplement.anchor.search_hint ? [supplement.anchor.search_hint] : [],
|
|
843
|
+
hint: supplement.fix_hint ?? void 0,
|
|
844
|
+
overflowCount: Math.max(count - 1, 0),
|
|
845
|
+
overflowLabel: "failing tests/modules",
|
|
846
|
+
labelOverride: supplement.label,
|
|
847
|
+
coverage: buildCoverageCounts({
|
|
848
|
+
count,
|
|
849
|
+
coverageKind
|
|
850
|
+
}),
|
|
851
|
+
source: "provider"
|
|
852
|
+
}
|
|
853
|
+
];
|
|
854
|
+
});
|
|
855
|
+
}
|
|
856
|
+
function pickUnknownAnchor(args) {
|
|
857
|
+
const fromStatusItems = args.kind === "error" ? args.analysis.visibleErrorItems[0] : null;
|
|
858
|
+
if (fromStatusItems) {
|
|
859
|
+
return {
|
|
860
|
+
label: fromStatusItems.label,
|
|
861
|
+
reason: fromStatusItems.reason,
|
|
862
|
+
group: fromStatusItems.group,
|
|
863
|
+
file: fromStatusItems.file,
|
|
864
|
+
line: fromStatusItems.line,
|
|
865
|
+
anchor_kind: fromStatusItems.anchor_kind,
|
|
866
|
+
anchor_confidence: fromStatusItems.anchor_confidence
|
|
867
|
+
};
|
|
868
|
+
}
|
|
869
|
+
const label = args.kind === "error" ? args.analysis.visibleErrorLabels[0] : args.analysis.visibleFailedLabels[0];
|
|
870
|
+
if (label) {
|
|
871
|
+
const normalizedLabel = normalizeTestId(label);
|
|
872
|
+
const fileMatch = normalizedLabel.match(/^([A-Za-z0-9_./-]+\.[A-Za-z0-9]+)\b/);
|
|
873
|
+
const file = fileMatch?.[1] ?? normalizedLabel.split("::")[0] ?? null;
|
|
874
|
+
return {
|
|
875
|
+
label,
|
|
876
|
+
reason: args.kind === "error" ? "setup failures share a repeated but unclassified pattern" : "failing tests share a repeated but unclassified pattern",
|
|
877
|
+
group: args.kind === "error" ? "unknown setup blocker" : "unknown failure family",
|
|
878
|
+
file: file && file !== label ? file : null,
|
|
879
|
+
line: null,
|
|
880
|
+
anchor_kind: file && file !== label ? "test_label" : "none",
|
|
881
|
+
anchor_confidence: file && file !== label ? 0.6 : 0
|
|
882
|
+
};
|
|
883
|
+
}
|
|
884
|
+
return null;
|
|
885
|
+
}
|
|
886
|
+
function buildUnknownBucket(args) {
|
|
887
|
+
if (args.count <= 0) {
|
|
888
|
+
return null;
|
|
889
|
+
}
|
|
890
|
+
const anchor = pickUnknownAnchor(args);
|
|
891
|
+
const isError = args.kind === "error";
|
|
892
|
+
const label = isError ? "unknown setup blocker" : "unknown failure family";
|
|
893
|
+
const reason = isError ? "unknown setup blocker: setup failures share a repeated but unclassified pattern" : "unknown failure family: failing tests share a repeated but unclassified pattern";
|
|
894
|
+
return {
|
|
895
|
+
type: "unknown_failure",
|
|
896
|
+
headline: `${label}: ${formatCount(args.count, "visible failure")} share a repeated but unclassified pattern.`,
|
|
897
|
+
summaryLines: [
|
|
898
|
+
`${label}: ${formatCount(args.count, "visible failure")} share a repeated but unclassified pattern.`
|
|
899
|
+
],
|
|
900
|
+
reason,
|
|
901
|
+
count: args.count,
|
|
902
|
+
confidence: 0.45,
|
|
903
|
+
representativeItems: anchor ? [anchor] : [],
|
|
904
|
+
entities: [],
|
|
905
|
+
hint: isError ? "Take one deeper sift pass or inspect the first anchored setup failure." : "Take one deeper sift pass or inspect the first anchored failing test.",
|
|
906
|
+
overflowCount: Math.max(args.count - (anchor ? 1 : 0), 0),
|
|
907
|
+
overflowLabel: "failing tests/modules",
|
|
908
|
+
labelOverride: label,
|
|
909
|
+
coverage: buildCoverageCounts({
|
|
910
|
+
count: args.count,
|
|
911
|
+
coverageKind: isError ? "error" : "failed"
|
|
912
|
+
}),
|
|
913
|
+
source: "unknown"
|
|
914
|
+
};
|
|
915
|
+
}
|
|
916
|
+
function buildCoverageResiduals(args) {
|
|
917
|
+
const covered = args.buckets.reduce(
|
|
918
|
+
(totals, bucket) => ({
|
|
919
|
+
error: totals.error + bucket.coverage.error,
|
|
920
|
+
failed: totals.failed + bucket.coverage.failed
|
|
921
|
+
}),
|
|
922
|
+
{
|
|
923
|
+
error: 0,
|
|
924
|
+
failed: 0
|
|
925
|
+
}
|
|
926
|
+
);
|
|
927
|
+
return {
|
|
928
|
+
remainingErrors: Math.max(args.analysis.errors - Math.min(args.analysis.errors, covered.error), 0),
|
|
929
|
+
remainingFailed: Math.max(args.analysis.failed - Math.min(args.analysis.failed, covered.failed), 0)
|
|
930
|
+
};
|
|
931
|
+
}
|
|
629
932
|
function buildOutcomeLines(analysis) {
|
|
630
933
|
if (analysis.noTestsCollected) {
|
|
631
934
|
return ["- Tests did not run.", "- Collected 0 items."];
|
|
@@ -744,6 +1047,12 @@ function buildStandardFixText(args) {
|
|
|
744
1047
|
if (args.bucket.reason.startsWith("auth bypass absent:")) {
|
|
745
1048
|
return "Restore the test auth bypass setup and rerun the full suite at standard.";
|
|
746
1049
|
}
|
|
1050
|
+
if (args.bucket.reason.startsWith("unknown setup blocker:")) {
|
|
1051
|
+
return "Take one deeper sift pass or inspect the first anchored setup failure before rerunning.";
|
|
1052
|
+
}
|
|
1053
|
+
if (args.bucket.reason.startsWith("unknown failure family:")) {
|
|
1054
|
+
return "Take one deeper sift pass or inspect the first anchored failing test before rerunning.";
|
|
1055
|
+
}
|
|
747
1056
|
if (args.bucket.type === "contract_snapshot_drift") {
|
|
748
1057
|
return "Review the visible drift and regenerate the contract snapshots if the changes are intentional.";
|
|
749
1058
|
}
|
|
@@ -840,7 +1149,35 @@ function renderVerbose(args) {
|
|
|
840
1149
|
return lines.join("\n");
|
|
841
1150
|
}
|
|
842
1151
|
function buildTestStatusDiagnoseContract(args) {
|
|
843
|
-
const
|
|
1152
|
+
const heuristicBuckets = mergeBuckets(args.analysis);
|
|
1153
|
+
const preUnknownSimpleCollectionFailure = args.analysis.collectionErrorCount !== void 0 && args.analysis.collectionItems.length === 0 && heuristicBuckets.length === 0 && (args.providerBucketSupplements?.length ?? 0) === 0;
|
|
1154
|
+
const heuristicResiduals = buildCoverageResiduals({
|
|
1155
|
+
analysis: args.analysis,
|
|
1156
|
+
buckets: heuristicBuckets
|
|
1157
|
+
});
|
|
1158
|
+
const providerSupplementBuckets = buildProviderSupplementBuckets({
|
|
1159
|
+
supplements: args.providerBucketSupplements ?? [],
|
|
1160
|
+
remainingErrors: heuristicResiduals.remainingErrors,
|
|
1161
|
+
remainingFailed: heuristicResiduals.remainingFailed
|
|
1162
|
+
});
|
|
1163
|
+
const combinedBuckets = mergeBuckets(args.analysis, providerSupplementBuckets);
|
|
1164
|
+
const residuals = buildCoverageResiduals({
|
|
1165
|
+
analysis: args.analysis,
|
|
1166
|
+
buckets: combinedBuckets
|
|
1167
|
+
});
|
|
1168
|
+
const unknownBuckets = preUnknownSimpleCollectionFailure ? [] : [
|
|
1169
|
+
buildUnknownBucket({
|
|
1170
|
+
analysis: args.analysis,
|
|
1171
|
+
kind: "error",
|
|
1172
|
+
count: residuals.remainingErrors
|
|
1173
|
+
}),
|
|
1174
|
+
buildUnknownBucket({
|
|
1175
|
+
analysis: args.analysis,
|
|
1176
|
+
kind: "failed",
|
|
1177
|
+
count: residuals.remainingFailed
|
|
1178
|
+
})
|
|
1179
|
+
].filter((bucket) => Boolean(bucket));
|
|
1180
|
+
const buckets = prioritizeBuckets([...combinedBuckets, ...unknownBuckets]).slice(0, 3);
|
|
844
1181
|
const simpleCollectionFailure = args.analysis.collectionErrorCount !== void 0 && args.analysis.collectionItems.length === 0 && buckets.length === 0;
|
|
845
1182
|
const dominantBucket = buckets.map((bucket, index) => ({
|
|
846
1183
|
bucket,
|
|
@@ -851,8 +1188,10 @@ function buildTestStatusDiagnoseContract(args) {
|
|
|
851
1188
|
}
|
|
852
1189
|
return right.bucket.confidence - left.bucket.confidence;
|
|
853
1190
|
})[0] ?? null;
|
|
854
|
-
const
|
|
855
|
-
const
|
|
1191
|
+
const hasUnknownBucket = buckets.some((bucket) => isUnknownBucket(bucket));
|
|
1192
|
+
const hasConcreteCoverage = args.analysis.failed === 0 && args.analysis.errors === 0 ? true : residuals.remainingErrors === 0 && residuals.remainingFailed === 0;
|
|
1193
|
+
const diagnosisComplete = args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0 || simpleCollectionFailure || buckets.length > 0 && hasConcreteCoverage && !hasUnknownBucket && (dominantBucket?.bucket.confidence ?? 0) >= 0.6;
|
|
1194
|
+
const rawNeeded = buckets.length === 0 ? !(args.analysis.failed === 0 && args.analysis.errors === 0 && args.analysis.passed > 0 || simpleCollectionFailure) : !diagnosisComplete && !hasUnknownBucket && buckets.every((bucket) => bucket.confidence < 0.7);
|
|
856
1195
|
const dominantBlockerBucketIndex = dominantBucket && isDominantBlockerType(dominantBucket.bucket.type) ? dominantBucket.index + 1 : null;
|
|
857
1196
|
const readTargets = buildReadTargets({
|
|
858
1197
|
buckets,
|
|
@@ -887,6 +1226,12 @@ function buildTestStatusDiagnoseContract(args) {
|
|
|
887
1226
|
bucket_index: null,
|
|
888
1227
|
note: "Inspect the collection traceback or setup code next; the run failed before tests executed."
|
|
889
1228
|
};
|
|
1229
|
+
} else if (hasUnknownBucket) {
|
|
1230
|
+
nextBestAction = {
|
|
1231
|
+
code: "insufficient_signal",
|
|
1232
|
+
bucket_index: dominantBucket ? dominantBucket.index + 1 : null,
|
|
1233
|
+
note: "Take one deeper sift pass or inspect the first anchored failure before falling back to raw traceback."
|
|
1234
|
+
};
|
|
890
1235
|
} else if (!diagnosisComplete) {
|
|
891
1236
|
nextBestAction = {
|
|
892
1237
|
code: rawNeeded ? "read_raw_for_exact_traceback" : "insufficient_signal",
|
|
@@ -924,11 +1269,15 @@ function buildTestStatusDiagnoseContract(args) {
|
|
|
924
1269
|
read_targets: readTargets,
|
|
925
1270
|
next_best_action: nextBestAction
|
|
926
1271
|
};
|
|
1272
|
+
const effectiveDiagnosisComplete = Boolean(args.contractOverrides?.diagnosis_complete ?? diagnosisComplete) && !hasUnknownBucket;
|
|
1273
|
+
const requestedDecision = args.contractOverrides?.decision;
|
|
1274
|
+
const effectiveDecision = hasUnknownBucket && requestedDecision && (requestedDecision === "stop" || requestedDecision === "read_source") ? "zoom" : requestedDecision;
|
|
927
1275
|
const effectiveNextBestAction = args.contractOverrides?.next_best_action ?? baseContract.next_best_action;
|
|
928
1276
|
const mergedContractWithoutDecision = {
|
|
929
1277
|
...baseContract,
|
|
930
1278
|
...args.contractOverrides,
|
|
931
|
-
|
|
1279
|
+
diagnosis_complete: effectiveDiagnosisComplete,
|
|
1280
|
+
status: effectiveDiagnosisComplete ? "ok" : "insufficient",
|
|
932
1281
|
next_best_action: {
|
|
933
1282
|
...effectiveNextBestAction,
|
|
934
1283
|
note: buildConcreteNextNote({
|
|
@@ -942,7 +1291,7 @@ function buildTestStatusDiagnoseContract(args) {
|
|
|
942
1291
|
};
|
|
943
1292
|
const contract = testStatusDiagnoseContractSchema.parse({
|
|
944
1293
|
...mergedContractWithoutDecision,
|
|
945
|
-
decision:
|
|
1294
|
+
decision: effectiveDecision ?? deriveDecision(mergedContractWithoutDecision)
|
|
946
1295
|
});
|
|
947
1296
|
return {
|
|
948
1297
|
contract,
|
|
@@ -1162,7 +1511,25 @@ function extractEnvBlockerName(normalized) {
|
|
|
1162
1511
|
const fallbackMatch = normalized.match(
|
|
1163
1512
|
/\b([A-Z][A-Z0-9_]{2,})\b(?=[^.\n]*DB-isolated tests)/
|
|
1164
1513
|
);
|
|
1165
|
-
|
|
1514
|
+
if (fallbackMatch) {
|
|
1515
|
+
return fallbackMatch[1];
|
|
1516
|
+
}
|
|
1517
|
+
const leadingEnvMatch = normalized.match(
|
|
1518
|
+
/\b([A-Z][A-Z0-9_]{2,})\b(?=[^.\n]{0,80}\b(?:is\s+)?(?:missing|unset|not set|not configured|required)\b)/
|
|
1519
|
+
);
|
|
1520
|
+
if (leadingEnvMatch) {
|
|
1521
|
+
return leadingEnvMatch[1];
|
|
1522
|
+
}
|
|
1523
|
+
const trailingEnvMatch = normalized.match(
|
|
1524
|
+
/\b(?:missing|unset|not set|not configured|required)\b[^.\n]{0,80}\b([A-Z][A-Z0-9_]{2,})\b/
|
|
1525
|
+
);
|
|
1526
|
+
if (trailingEnvMatch) {
|
|
1527
|
+
return trailingEnvMatch[1];
|
|
1528
|
+
}
|
|
1529
|
+
const validationEnvMatch = normalized.match(
|
|
1530
|
+
/\bValidationError\b[^.\n]{0,120}\b([A-Z][A-Z0-9_]{2,})\b/
|
|
1531
|
+
);
|
|
1532
|
+
return validationEnvMatch?.[1] ?? null;
|
|
1166
1533
|
}
|
|
1167
1534
|
function classifyFailureReason(line, options) {
|
|
1168
1535
|
const normalized = line.trim().replace(/^[A-Z]\s+/, "");
|
|
@@ -1183,7 +1550,7 @@ function classifyFailureReason(line, options) {
|
|
|
1183
1550
|
};
|
|
1184
1551
|
}
|
|
1185
1552
|
const missingEnv = normalized.match(
|
|
1186
|
-
/\b(?:environment variable|env(?:ironment)? var(?:iable)?|
|
|
1553
|
+
/\b(?:environment variable|env(?:ironment)? var(?:iable)?|missing required env(?:ironment)? variable)\s+([A-Z][A-Z0-9_]{2,})\b/
|
|
1187
1554
|
);
|
|
1188
1555
|
if (missingEnv) {
|
|
1189
1556
|
return {
|
|
@@ -1215,6 +1582,12 @@ function classifyFailureReason(line, options) {
|
|
|
1215
1582
|
group: "database connectivity failures"
|
|
1216
1583
|
};
|
|
1217
1584
|
}
|
|
1585
|
+
if (/(ECONNREFUSED|ConnectionRefusedError|connection refused)/i.test(normalized)) {
|
|
1586
|
+
return {
|
|
1587
|
+
reason: "service unavailable: dependency connection was refused",
|
|
1588
|
+
group: "service availability failures"
|
|
1589
|
+
};
|
|
1590
|
+
}
|
|
1218
1591
|
if (/(503\b|service unavailable|temporarily unavailable)/i.test(normalized)) {
|
|
1219
1592
|
return {
|
|
1220
1593
|
reason: "service unavailable: dependency service is unavailable",
|
|
@@ -1699,7 +2072,7 @@ function synthesizeImportDependencyBucket(args) {
|
|
|
1699
2072
|
return null;
|
|
1700
2073
|
}
|
|
1701
2074
|
const allVisibleErrorsAreImportRelated = args.visibleErrorItems.length > 0 && args.visibleErrorItems.every((item) => item.reason.startsWith("missing module:"));
|
|
1702
|
-
const countClaimed = allVisibleErrorsAreImportRelated && importItems.length >=
|
|
2075
|
+
const countClaimed = allVisibleErrorsAreImportRelated && importItems.length >= 2 && args.errors >= importItems.length ? args.errors : void 0;
|
|
1703
2076
|
const modules = Array.from(
|
|
1704
2077
|
new Set(
|
|
1705
2078
|
importItems.map((item) => item.reason.replace("missing module:", "").trim()).filter(Boolean)
|
|
@@ -1735,7 +2108,7 @@ function synthesizeImportDependencyBucket(args) {
|
|
|
1735
2108
|
};
|
|
1736
2109
|
}
|
|
1737
2110
|
function isContractDriftLabel(label) {
|
|
1738
|
-
return /(freeze|snapshot|contract|manifest|openapi)/i.test(label);
|
|
2111
|
+
return /(freeze|snapshot|contract|manifest|openapi|golden)/i.test(label);
|
|
1739
2112
|
}
|
|
1740
2113
|
function looksLikeTaskKey(value) {
|
|
1741
2114
|
return /^[a-z]+(?:_[a-z0-9]+)+$/i.test(value) && !value.startsWith("/api/");
|
|
@@ -2224,10 +2597,11 @@ async function buildOpenAICompatibleError(response) {
|
|
|
2224
2597
|
return new Error(detail);
|
|
2225
2598
|
}
|
|
2226
2599
|
var OpenAICompatibleProvider = class {
|
|
2227
|
-
name
|
|
2600
|
+
name;
|
|
2228
2601
|
baseUrl;
|
|
2229
2602
|
apiKey;
|
|
2230
2603
|
constructor(options) {
|
|
2604
|
+
this.name = options.name ?? "openai-compatible";
|
|
2231
2605
|
this.baseUrl = options.baseUrl.replace(/\/$/, "");
|
|
2232
2606
|
this.apiKey = options.apiKey;
|
|
2233
2607
|
}
|
|
@@ -2303,6 +2677,13 @@ function createProvider(config) {
|
|
|
2303
2677
|
apiKey: config.provider.apiKey
|
|
2304
2678
|
});
|
|
2305
2679
|
}
|
|
2680
|
+
if (config.provider.provider === "openrouter") {
|
|
2681
|
+
return new OpenAICompatibleProvider({
|
|
2682
|
+
baseUrl: config.provider.baseUrl,
|
|
2683
|
+
apiKey: config.provider.apiKey,
|
|
2684
|
+
name: "openrouter"
|
|
2685
|
+
});
|
|
2686
|
+
}
|
|
2306
2687
|
throw new Error(`Unsupported provider: ${config.provider.provider}`);
|
|
2307
2688
|
}
|
|
2308
2689
|
|
|
@@ -2475,9 +2856,12 @@ function resolvePromptPolicy(args) {
|
|
|
2475
2856
|
"Return only valid JSON.",
|
|
2476
2857
|
`Use this exact contract: ${args.outputContract ?? TEST_STATUS_DIAGNOSE_JSON_CONTRACT}.`,
|
|
2477
2858
|
"Treat the heuristic context as extraction guidance, but do not invent hidden failures.",
|
|
2478
|
-
"Use the heuristic extract as the bucket truth unless the visible command output clearly disproves it.",
|
|
2859
|
+
"Use the heuristic extract as the base bucket truth unless the visible command output clearly disproves it.",
|
|
2860
|
+
"If some visible failure or error families remain unexplained, add at most 2 bucket_supplements for the residual families only.",
|
|
2861
|
+
"Do not rewrite or delete heuristic buckets; only supplement missing residual coverage.",
|
|
2862
|
+
"Keep bucket_supplement counts within the unexplained residual failures or errors.",
|
|
2479
2863
|
"Identify the dominant blocker, remaining visible failure buckets, the decision, and the next best action.",
|
|
2480
|
-
"Set diagnosis_complete to true only when the visible output is already sufficient to stop and act.",
|
|
2864
|
+
"Set diagnosis_complete to true only when the visible output is already sufficient to stop and act and no unknown residual family remains.",
|
|
2481
2865
|
"Set raw_needed to true only when exact traceback lines are still required.",
|
|
2482
2866
|
"Set provider_confidence to a number between 0 and 1, or null only when confidence cannot be estimated."
|
|
2483
2867
|
] : [
|
|
@@ -2994,6 +3378,7 @@ function buildGenericRawSlice(args) {
|
|
|
2994
3378
|
|
|
2995
3379
|
// src/core/run.ts
|
|
2996
3380
|
var RETRY_DELAY_MS = 300;
|
|
3381
|
+
var PROVIDER_PENDING_NOTICE_DELAY_MS = 150;
|
|
2997
3382
|
function estimateTokenCount(text) {
|
|
2998
3383
|
return Math.max(1, Math.ceil(text.length / 4));
|
|
2999
3384
|
}
|
|
@@ -3014,6 +3399,8 @@ function logVerboseTestStatusTelemetry(args) {
|
|
|
3014
3399
|
`${pc.dim("sift")} diagnosis_complete_at_layer=${getDiagnosisCompleteAtLayer(args.contract)}`,
|
|
3015
3400
|
`${pc.dim("sift")} heuristic_short_circuit=${!args.contract.provider_used && args.contract.diagnosis_complete && !args.contract.raw_needed && !args.contract.provider_failed}`,
|
|
3016
3401
|
`${pc.dim("sift")} raw_input_chars=${args.request.stdin.length}`,
|
|
3402
|
+
`${pc.dim("sift")} heuristic_input_chars=${args.heuristicInputChars}`,
|
|
3403
|
+
`${pc.dim("sift")} heuristic_input_truncated=${args.heuristicInputTruncated}`,
|
|
3017
3404
|
`${pc.dim("sift")} prepared_input_chars=${args.prepared.meta.finalLength}`,
|
|
3018
3405
|
`${pc.dim("sift")} raw_slice_chars=${args.rawSliceChars ?? 0}`,
|
|
3019
3406
|
`${pc.dim("sift")} provider_input_chars=${args.providerInputChars ?? 0}`,
|
|
@@ -3055,6 +3442,7 @@ function buildDryRunOutput(args) {
|
|
|
3055
3442
|
responseMode: args.responseMode,
|
|
3056
3443
|
policy: args.request.policyName ?? null,
|
|
3057
3444
|
heuristicOutput: args.heuristicOutput ?? null,
|
|
3445
|
+
heuristicInput: args.heuristicInput,
|
|
3058
3446
|
input: {
|
|
3059
3447
|
originalLength: args.prepared.meta.originalLength,
|
|
3060
3448
|
finalLength: args.prepared.meta.finalLength,
|
|
@@ -3071,6 +3459,25 @@ function buildDryRunOutput(args) {
|
|
|
3071
3459
|
async function delay(ms) {
|
|
3072
3460
|
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
3073
3461
|
}
|
|
3462
|
+
function startProviderPendingNotice() {
|
|
3463
|
+
if (!process.stderr.isTTY) {
|
|
3464
|
+
return () => {
|
|
3465
|
+
};
|
|
3466
|
+
}
|
|
3467
|
+
const message = "sift waiting for provider...";
|
|
3468
|
+
let shown = false;
|
|
3469
|
+
const timer = setTimeout(() => {
|
|
3470
|
+
shown = true;
|
|
3471
|
+
process.stderr.write(`${message}\r`);
|
|
3472
|
+
}, PROVIDER_PENDING_NOTICE_DELAY_MS);
|
|
3473
|
+
return () => {
|
|
3474
|
+
clearTimeout(timer);
|
|
3475
|
+
if (!shown) {
|
|
3476
|
+
return;
|
|
3477
|
+
}
|
|
3478
|
+
process.stderr.write(`\r${" ".repeat(message.length)}\r`);
|
|
3479
|
+
};
|
|
3480
|
+
}
|
|
3074
3481
|
function withInsufficientHint(args) {
|
|
3075
3482
|
if (!isInsufficientSignalOutput(args.output)) {
|
|
3076
3483
|
return args.output;
|
|
@@ -3091,22 +3498,27 @@ async function generateWithRetry(args) {
|
|
|
3091
3498
|
responseMode: args.responseMode,
|
|
3092
3499
|
jsonResponseFormat: args.request.config.provider.jsonResponseFormat
|
|
3093
3500
|
});
|
|
3501
|
+
const stopPendingNotice = startProviderPendingNotice();
|
|
3094
3502
|
try {
|
|
3095
|
-
|
|
3096
|
-
|
|
3097
|
-
|
|
3098
|
-
|
|
3099
|
-
|
|
3100
|
-
|
|
3101
|
-
|
|
3102
|
-
|
|
3103
|
-
|
|
3503
|
+
try {
|
|
3504
|
+
return await generate();
|
|
3505
|
+
} catch (error) {
|
|
3506
|
+
const reason = error instanceof Error ? error.message : "unknown_error";
|
|
3507
|
+
if (!isRetriableReason(reason)) {
|
|
3508
|
+
throw error;
|
|
3509
|
+
}
|
|
3510
|
+
if (args.request.config.runtime.verbose) {
|
|
3511
|
+
process.stderr.write(
|
|
3512
|
+
`${pc.dim("sift")} retry=1 reason=${reason} delay_ms=${RETRY_DELAY_MS}
|
|
3104
3513
|
`
|
|
3105
|
-
|
|
3514
|
+
);
|
|
3515
|
+
}
|
|
3516
|
+
await delay(RETRY_DELAY_MS);
|
|
3106
3517
|
}
|
|
3107
|
-
await
|
|
3518
|
+
return await generate();
|
|
3519
|
+
} finally {
|
|
3520
|
+
stopPendingNotice();
|
|
3108
3521
|
}
|
|
3109
|
-
return generate();
|
|
3110
3522
|
}
|
|
3111
3523
|
function hasRecognizableTestStatusSignal(input) {
|
|
3112
3524
|
const analysis = analyzeTestStatus(input);
|
|
@@ -3161,11 +3573,22 @@ function buildTestStatusProviderFailureDecision(args) {
|
|
|
3161
3573
|
}
|
|
3162
3574
|
async function runSift(request) {
|
|
3163
3575
|
const prepared = prepareInput(request.stdin, request.config.input);
|
|
3576
|
+
const heuristicInput = prepared.redacted;
|
|
3577
|
+
const heuristicInputTruncated = false;
|
|
3578
|
+
const heuristicPrepared = {
|
|
3579
|
+
...prepared,
|
|
3580
|
+
truncated: heuristicInput,
|
|
3581
|
+
meta: {
|
|
3582
|
+
...prepared.meta,
|
|
3583
|
+
finalLength: heuristicInput.length,
|
|
3584
|
+
truncatedApplied: heuristicInputTruncated
|
|
3585
|
+
}
|
|
3586
|
+
};
|
|
3164
3587
|
const provider = createProvider(request.config);
|
|
3165
|
-
const hasTestStatusSignal = request.policyName === "test-status" && hasRecognizableTestStatusSignal(
|
|
3166
|
-
const testStatusAnalysis = hasTestStatusSignal ? analyzeTestStatus(
|
|
3588
|
+
const hasTestStatusSignal = request.policyName === "test-status" && hasRecognizableTestStatusSignal(heuristicInput);
|
|
3589
|
+
const testStatusAnalysis = hasTestStatusSignal ? analyzeTestStatus(heuristicInput) : null;
|
|
3167
3590
|
const testStatusDecision = hasTestStatusSignal && testStatusAnalysis ? buildTestStatusDiagnoseContract({
|
|
3168
|
-
input:
|
|
3591
|
+
input: heuristicInput,
|
|
3169
3592
|
analysis: testStatusAnalysis,
|
|
3170
3593
|
resolvedTests: request.testStatusContext?.resolvedTests,
|
|
3171
3594
|
remainingTests: request.testStatusContext?.remainingTests
|
|
@@ -3180,7 +3603,7 @@ async function runSift(request) {
|
|
|
3180
3603
|
`
|
|
3181
3604
|
);
|
|
3182
3605
|
}
|
|
3183
|
-
const heuristicOutput = request.policyName === "test-status" ? testStatusDecision?.contract.diagnosis_complete ? testStatusHeuristicOutput : null : applyHeuristicPolicy(request.policyName,
|
|
3606
|
+
const heuristicOutput = request.policyName === "test-status" ? testStatusDecision?.contract.diagnosis_complete ? testStatusHeuristicOutput : null : applyHeuristicPolicy(request.policyName, heuristicInput, request.detail);
|
|
3184
3607
|
if (heuristicOutput) {
|
|
3185
3608
|
if (request.config.runtime.verbose) {
|
|
3186
3609
|
process.stderr.write(`${pc.dim("sift")} heuristic=${request.policyName}
|
|
@@ -3190,7 +3613,7 @@ async function runSift(request) {
|
|
|
3190
3613
|
question: request.question,
|
|
3191
3614
|
format: request.format,
|
|
3192
3615
|
goal: request.goal,
|
|
3193
|
-
input:
|
|
3616
|
+
input: heuristicInput,
|
|
3194
3617
|
detail: request.detail,
|
|
3195
3618
|
policyName: request.policyName,
|
|
3196
3619
|
outputContract: request.policyName === "test-status" && request.goal === "diagnose" && request.format === "json" ? request.outputContract ?? TEST_STATUS_DIAGNOSE_JSON_CONTRACT : request.outputContract,
|
|
@@ -3210,6 +3633,11 @@ async function runSift(request) {
|
|
|
3210
3633
|
prompt: heuristicPrompt.prompt,
|
|
3211
3634
|
responseMode: heuristicPrompt.responseMode,
|
|
3212
3635
|
prepared,
|
|
3636
|
+
heuristicInput: {
|
|
3637
|
+
length: heuristicInput.length,
|
|
3638
|
+
truncatedApplied: heuristicInputTruncated,
|
|
3639
|
+
strategy: "full-redacted"
|
|
3640
|
+
},
|
|
3213
3641
|
heuristicOutput,
|
|
3214
3642
|
strategy: "heuristic"
|
|
3215
3643
|
});
|
|
@@ -3223,6 +3651,8 @@ async function runSift(request) {
|
|
|
3223
3651
|
logVerboseTestStatusTelemetry({
|
|
3224
3652
|
request,
|
|
3225
3653
|
prepared,
|
|
3654
|
+
heuristicInputChars: heuristicInput.length,
|
|
3655
|
+
heuristicInputTruncated,
|
|
3226
3656
|
contract: testStatusDecision.contract,
|
|
3227
3657
|
finalOutput
|
|
3228
3658
|
});
|
|
@@ -3274,6 +3704,11 @@ async function runSift(request) {
|
|
|
3274
3704
|
prompt: prompt.prompt,
|
|
3275
3705
|
responseMode: prompt.responseMode,
|
|
3276
3706
|
prepared: providerPrepared2,
|
|
3707
|
+
heuristicInput: {
|
|
3708
|
+
length: heuristicInput.length,
|
|
3709
|
+
truncatedApplied: heuristicInputTruncated,
|
|
3710
|
+
strategy: "full-redacted"
|
|
3711
|
+
},
|
|
3277
3712
|
heuristicOutput: testStatusHeuristicOutput,
|
|
3278
3713
|
strategy: "hybrid"
|
|
3279
3714
|
});
|
|
@@ -3287,10 +3722,11 @@ async function runSift(request) {
|
|
|
3287
3722
|
});
|
|
3288
3723
|
const supplement = parseTestStatusProviderSupplement(result.text);
|
|
3289
3724
|
const mergedDecision = buildTestStatusDiagnoseContract({
|
|
3290
|
-
input:
|
|
3725
|
+
input: heuristicInput,
|
|
3291
3726
|
analysis: testStatusAnalysis,
|
|
3292
3727
|
resolvedTests: request.testStatusContext?.resolvedTests,
|
|
3293
3728
|
remainingTests: request.testStatusContext?.remainingTests,
|
|
3729
|
+
providerBucketSupplements: supplement.bucket_supplements,
|
|
3294
3730
|
contractOverrides: {
|
|
3295
3731
|
diagnosis_complete: supplement.diagnosis_complete,
|
|
3296
3732
|
raw_needed: supplement.raw_needed,
|
|
@@ -3312,6 +3748,8 @@ async function runSift(request) {
|
|
|
3312
3748
|
logVerboseTestStatusTelemetry({
|
|
3313
3749
|
request,
|
|
3314
3750
|
prepared,
|
|
3751
|
+
heuristicInputChars: heuristicInput.length,
|
|
3752
|
+
heuristicInputTruncated,
|
|
3315
3753
|
contract: mergedDecision.contract,
|
|
3316
3754
|
finalOutput,
|
|
3317
3755
|
rawSliceChars: rawSlice.text.length,
|
|
@@ -3324,7 +3762,7 @@ async function runSift(request) {
|
|
|
3324
3762
|
const failureDecision = buildTestStatusProviderFailureDecision({
|
|
3325
3763
|
request,
|
|
3326
3764
|
baseDecision: testStatusDecision,
|
|
3327
|
-
input:
|
|
3765
|
+
input: heuristicInput,
|
|
3328
3766
|
analysis: testStatusAnalysis,
|
|
3329
3767
|
reason,
|
|
3330
3768
|
rawSliceUsed: rawSlice.used,
|
|
@@ -3345,6 +3783,8 @@ async function runSift(request) {
|
|
|
3345
3783
|
logVerboseTestStatusTelemetry({
|
|
3346
3784
|
request,
|
|
3347
3785
|
prepared,
|
|
3786
|
+
heuristicInputChars: heuristicInput.length,
|
|
3787
|
+
heuristicInputTruncated,
|
|
3348
3788
|
contract: failureDecision.contract,
|
|
3349
3789
|
finalOutput,
|
|
3350
3790
|
rawSliceChars: rawSlice.text.length,
|
|
@@ -3383,6 +3823,11 @@ async function runSift(request) {
|
|
|
3383
3823
|
prompt: providerPrompt.prompt,
|
|
3384
3824
|
responseMode: providerPrompt.responseMode,
|
|
3385
3825
|
prepared: providerPrepared,
|
|
3826
|
+
heuristicInput: {
|
|
3827
|
+
length: heuristicInput.length,
|
|
3828
|
+
truncatedApplied: heuristicInputTruncated,
|
|
3829
|
+
strategy: "full-redacted"
|
|
3830
|
+
},
|
|
3386
3831
|
heuristicOutput: testStatusDecision ? testStatusHeuristicOutput : null,
|
|
3387
3832
|
strategy: testStatusDecision ? "hybrid" : "provider"
|
|
3388
3833
|
});
|
|
@@ -4419,13 +4864,16 @@ var OPENAI_COMPATIBLE_BASE_URL_ENV = [
|
|
|
4419
4864
|
{ prefix: "https://api.together.xyz/", envName: "TOGETHER_API_KEY" },
|
|
4420
4865
|
{ prefix: "https://api.groq.com/openai/", envName: "GROQ_API_KEY" }
|
|
4421
4866
|
];
|
|
4867
|
+
var NATIVE_PROVIDER_API_KEY_ENV = {
|
|
4868
|
+
openai: "OPENAI_API_KEY",
|
|
4869
|
+
openrouter: "OPENROUTER_API_KEY"
|
|
4870
|
+
};
|
|
4422
4871
|
var PROVIDER_API_KEY_ENV = {
|
|
4423
4872
|
anthropic: "ANTHROPIC_API_KEY",
|
|
4424
4873
|
claude: "ANTHROPIC_API_KEY",
|
|
4425
4874
|
groq: "GROQ_API_KEY",
|
|
4426
|
-
|
|
4427
|
-
|
|
4428
|
-
together: "TOGETHER_API_KEY"
|
|
4875
|
+
together: "TOGETHER_API_KEY",
|
|
4876
|
+
...NATIVE_PROVIDER_API_KEY_ENV
|
|
4429
4877
|
};
|
|
4430
4878
|
function normalizeBaseUrl(baseUrl) {
|
|
4431
4879
|
if (!baseUrl) {
|
|
@@ -4463,7 +4911,11 @@ function resolveProviderApiKey(provider, baseUrl, env) {
|
|
|
4463
4911
|
|
|
4464
4912
|
// src/config/schema.ts
|
|
4465
4913
|
import { z as z3 } from "zod";
|
|
4466
|
-
var providerNameSchema = z3.enum([
|
|
4914
|
+
var providerNameSchema = z3.enum([
|
|
4915
|
+
"openai",
|
|
4916
|
+
"openai-compatible",
|
|
4917
|
+
"openrouter"
|
|
4918
|
+
]);
|
|
4467
4919
|
var outputFormatSchema = z3.enum([
|
|
4468
4920
|
"brief",
|
|
4469
4921
|
"bullets",
|
|
@@ -4492,6 +4944,15 @@ var providerConfigSchema = z3.object({
|
|
|
4492
4944
|
temperature: z3.number().min(0).max(2),
|
|
4493
4945
|
maxOutputTokens: z3.number().int().positive()
|
|
4494
4946
|
});
|
|
4947
|
+
var providerProfileSchema = z3.object({
|
|
4948
|
+
model: z3.string().min(1).optional(),
|
|
4949
|
+
baseUrl: z3.string().url().optional(),
|
|
4950
|
+
apiKey: z3.string().optional()
|
|
4951
|
+
});
|
|
4952
|
+
var providerProfilesSchema = z3.object({
|
|
4953
|
+
openai: providerProfileSchema.optional(),
|
|
4954
|
+
openrouter: providerProfileSchema.optional()
|
|
4955
|
+
}).optional();
|
|
4495
4956
|
var inputConfigSchema = z3.object({
|
|
4496
4957
|
stripAnsi: z3.boolean(),
|
|
4497
4958
|
redact: z3.boolean(),
|
|
@@ -4516,10 +4977,19 @@ var siftConfigSchema = z3.object({
|
|
|
4516
4977
|
provider: providerConfigSchema,
|
|
4517
4978
|
input: inputConfigSchema,
|
|
4518
4979
|
runtime: runtimeConfigSchema,
|
|
4519
|
-
presets: z3.record(presetDefinitionSchema)
|
|
4980
|
+
presets: z3.record(presetDefinitionSchema),
|
|
4981
|
+
providerProfiles: providerProfilesSchema
|
|
4520
4982
|
});
|
|
4521
4983
|
|
|
4522
4984
|
// src/config/resolve.ts
|
|
4985
|
+
var PROVIDER_DEFAULT_OVERRIDES = {
|
|
4986
|
+
openrouter: {
|
|
4987
|
+
provider: {
|
|
4988
|
+
model: "openrouter/free",
|
|
4989
|
+
baseUrl: "https://openrouter.ai/api/v1"
|
|
4990
|
+
}
|
|
4991
|
+
}
|
|
4992
|
+
};
|
|
4523
4993
|
function isRecord(value) {
|
|
4524
4994
|
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
4525
4995
|
}
|
|
@@ -4582,13 +5052,32 @@ function buildCredentialEnvOverrides(env, context) {
|
|
|
4582
5052
|
}
|
|
4583
5053
|
};
|
|
4584
5054
|
}
|
|
5055
|
+
function getBaseConfigForProvider(provider) {
|
|
5056
|
+
return mergeDefined(defaultConfig, provider ? PROVIDER_DEFAULT_OVERRIDES[provider] : {});
|
|
5057
|
+
}
|
|
5058
|
+
function resolveProvisionalProvider(args) {
|
|
5059
|
+
const provisional = mergeDefined(
|
|
5060
|
+
mergeDefined(
|
|
5061
|
+
mergeDefined(defaultConfig, args.fileConfig),
|
|
5062
|
+
args.nonCredentialEnvConfig
|
|
5063
|
+
),
|
|
5064
|
+
stripApiKey(args.cliOverrides) ?? {}
|
|
5065
|
+
);
|
|
5066
|
+
return provisional.provider.provider;
|
|
5067
|
+
}
|
|
4585
5068
|
function resolveConfig(options = {}) {
|
|
4586
5069
|
const env = options.env ?? process.env;
|
|
4587
5070
|
const fileConfig = loadRawConfig(options.configPath);
|
|
4588
5071
|
const nonCredentialEnvConfig = buildNonCredentialEnvOverrides(env);
|
|
5072
|
+
const provisionalProvider = resolveProvisionalProvider({
|
|
5073
|
+
fileConfig,
|
|
5074
|
+
nonCredentialEnvConfig,
|
|
5075
|
+
cliOverrides: options.cliOverrides
|
|
5076
|
+
});
|
|
5077
|
+
const baseConfig = getBaseConfigForProvider(provisionalProvider);
|
|
4589
5078
|
const contextConfig = mergeDefined(
|
|
4590
5079
|
mergeDefined(
|
|
4591
|
-
mergeDefined(
|
|
5080
|
+
mergeDefined(baseConfig, fileConfig),
|
|
4592
5081
|
nonCredentialEnvConfig
|
|
4593
5082
|
),
|
|
4594
5083
|
stripApiKey(options.cliOverrides) ?? {}
|
|
@@ -4600,7 +5089,7 @@ function resolveConfig(options = {}) {
|
|
|
4600
5089
|
const merged = mergeDefined(
|
|
4601
5090
|
mergeDefined(
|
|
4602
5091
|
mergeDefined(
|
|
4603
|
-
mergeDefined(
|
|
5092
|
+
mergeDefined(baseConfig, fileConfig),
|
|
4604
5093
|
nonCredentialEnvConfig
|
|
4605
5094
|
),
|
|
4606
5095
|
credentialEnvConfig
|