@oscharko-dev/keiko-workflows 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -0
- package/dist/bug-investigation/context.d.ts +7 -0
- package/dist/bug-investigation/context.d.ts.map +1 -0
- package/dist/bug-investigation/context.js +119 -0
- package/dist/bug-investigation/descriptor.d.ts +4 -0
- package/dist/bug-investigation/descriptor.d.ts.map +1 -0
- package/dist/bug-investigation/descriptor.js +46 -0
- package/dist/bug-investigation/emit.d.ts +13 -0
- package/dist/bug-investigation/emit.d.ts.map +1 -0
- package/dist/bug-investigation/emit.js +35 -0
- package/dist/bug-investigation/events.d.ts +2 -0
- package/dist/bug-investigation/events.d.ts.map +1 -0
- package/dist/bug-investigation/events.js +6 -0
- package/dist/bug-investigation/failure-parse.d.ts +4 -0
- package/dist/bug-investigation/failure-parse.d.ts.map +1 -0
- package/dist/bug-investigation/failure-parse.js +154 -0
- package/dist/bug-investigation/guard.d.ts +3 -0
- package/dist/bug-investigation/guard.d.ts.map +1 -0
- package/dist/bug-investigation/guard.js +69 -0
- package/dist/bug-investigation/index.d.ts +8 -0
- package/dist/bug-investigation/index.d.ts.map +1 -0
- package/dist/bug-investigation/index.js +13 -0
- package/dist/bug-investigation/internal.d.ts +39 -0
- package/dist/bug-investigation/internal.d.ts.map +1 -0
- package/dist/bug-investigation/internal.js +65 -0
- package/dist/bug-investigation/memory.d.ts +5 -0
- package/dist/bug-investigation/memory.d.ts.map +1 -0
- package/dist/bug-investigation/memory.js +91 -0
- package/dist/bug-investigation/model-loop.d.ts +5 -0
- package/dist/bug-investigation/model-loop.d.ts.map +1 -0
- package/dist/bug-investigation/model-loop.js +225 -0
- package/dist/bug-investigation/parse.d.ts +4 -0
- package/dist/bug-investigation/parse.d.ts.map +1 -0
- package/dist/bug-investigation/parse.js +125 -0
- package/dist/bug-investigation/prompt.d.ts +5 -0
- package/dist/bug-investigation/prompt.d.ts.map +1 -0
- package/dist/bug-investigation/prompt.js +122 -0
- package/dist/bug-investigation/report.d.ts +24 -0
- package/dist/bug-investigation/report.d.ts.map +1 -0
- package/dist/bug-investigation/report.js +151 -0
- package/dist/bug-investigation/stages.d.ts +14 -0
- package/dist/bug-investigation/stages.d.ts.map +1 -0
- package/dist/bug-investigation/stages.js +247 -0
- package/dist/bug-investigation/types.d.ts +88 -0
- package/dist/bug-investigation/types.d.ts.map +1 -0
- package/dist/bug-investigation/types.js +6 -0
- package/dist/bug-investigation/verify-stage.d.ts +11 -0
- package/dist/bug-investigation/verify-stage.d.ts.map +1 -0
- package/dist/bug-investigation/verify-stage.js +91 -0
- package/dist/bug-investigation/workflow.d.ts +3 -0
- package/dist/bug-investigation/workflow.d.ts.map +1 -0
- package/dist/bug-investigation/workflow.js +85 -0
- package/dist/contextpack/assemble.d.ts +35 -0
- package/dist/contextpack/assemble.d.ts.map +1 -0
- package/dist/contextpack/assemble.js +431 -0
- package/dist/contextpack/compaction.d.ts +23 -0
- package/dist/contextpack/compaction.d.ts.map +1 -0
- package/dist/contextpack/compaction.js +68 -0
- package/dist/contextpack/index.d.ts +9 -0
- package/dist/contextpack/index.d.ts.map +1 -0
- package/dist/contextpack/index.js +8 -0
- package/dist/contextpack/microIndex.d.ts +29 -0
- package/dist/contextpack/microIndex.d.ts.map +1 -0
- package/dist/contextpack/microIndex.js +98 -0
- package/dist/contextpack/reranker.d.ts +15 -0
- package/dist/contextpack/reranker.d.ts.map +1 -0
- package/dist/contextpack/reranker.js +31 -0
- package/dist/descriptor.d.ts +2 -0
- package/dist/descriptor.d.ts.map +1 -0
- package/dist/descriptor.js +1 -0
- package/dist/governed-handoff.d.ts +6 -0
- package/dist/governed-handoff.d.ts.map +1 -0
- package/dist/governed-handoff.js +86 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +13 -0
- package/dist/planner/anchors.d.ts +17 -0
- package/dist/planner/anchors.d.ts.map +1 -0
- package/dist/planner/anchors.js +291 -0
- package/dist/planner/explorationPlanner.d.ts +9 -0
- package/dist/planner/explorationPlanner.d.ts.map +1 -0
- package/dist/planner/explorationPlanner.js +15 -0
- package/dist/planner/governor.d.ts +16 -0
- package/dist/planner/governor.d.ts.map +1 -0
- package/dist/planner/governor.js +106 -0
- package/dist/planner/index.d.ts +11 -0
- package/dist/planner/index.d.ts.map +1 -0
- package/dist/planner/index.js +8 -0
- package/dist/planner/intent.d.ts +8 -0
- package/dist/planner/intent.d.ts.map +1 -0
- package/dist/planner/intent.js +140 -0
- package/dist/planner/plan.d.ts +43 -0
- package/dist/planner/plan.d.ts.map +1 -0
- package/dist/planner/plan.js +237 -0
- package/dist/promptEnhancer/index.d.ts +23 -0
- package/dist/promptEnhancer/index.d.ts.map +1 -0
- package/dist/promptEnhancer/index.js +282 -0
- package/dist/qualityIntelligence/__tests__/fixtures/runEntryFixtures.d.ts +30 -0
- package/dist/qualityIntelligence/__tests__/fixtures/runEntryFixtures.d.ts.map +1 -0
- package/dist/qualityIntelligence/__tests__/fixtures/runEntryFixtures.js +114 -0
- package/dist/qualityIntelligence/cancellation.d.ts +20 -0
- package/dist/qualityIntelligence/cancellation.d.ts.map +1 -0
- package/dist/qualityIntelligence/cancellation.js +55 -0
- package/dist/qualityIntelligence/descriptors.d.ts +41 -0
- package/dist/qualityIntelligence/descriptors.d.ts.map +1 -0
- package/dist/qualityIntelligence/descriptors.js +105 -0
- package/dist/qualityIntelligence/index.d.ts +11 -0
- package/dist/qualityIntelligence/index.d.ts.map +1 -0
- package/dist/qualityIntelligence/index.js +11 -0
- package/dist/qualityIntelligence/modelRoutedTestDesign.d.ts +100 -0
- package/dist/qualityIntelligence/modelRoutedTestDesign.d.ts.map +1 -0
- package/dist/qualityIntelligence/modelRoutedTestDesign.js +620 -0
- package/dist/qualityIntelligence/runEntries.d.ts +60 -0
- package/dist/qualityIntelligence/runEntries.d.ts.map +1 -0
- package/dist/qualityIntelligence/runEntries.js +243 -0
- package/dist/qualityIntelligence/runtimeCommon.d.ts +106 -0
- package/dist/qualityIntelligence/runtimeCommon.d.ts.map +1 -0
- package/dist/qualityIntelligence/runtimeCommon.js +258 -0
- package/dist/qualityIntelligence/scopedRegeneration.d.ts +26 -0
- package/dist/qualityIntelligence/scopedRegeneration.d.ts.map +1 -0
- package/dist/qualityIntelligence/scopedRegeneration.js +35 -0
- package/dist/ranking/filter.d.ts +20 -0
- package/dist/ranking/filter.d.ts.map +1 -0
- package/dist/ranking/filter.js +99 -0
- package/dist/ranking/index.d.ts +9 -0
- package/dist/ranking/index.d.ts.map +1 -0
- package/dist/ranking/index.js +8 -0
- package/dist/ranking/rank.d.ts +21 -0
- package/dist/ranking/rank.d.ts.map +1 -0
- package/dist/ranking/rank.js +160 -0
- package/dist/ranking/scoring.d.ts +13 -0
- package/dist/ranking/scoring.d.ts.map +1 -0
- package/dist/ranking/scoring.js +39 -0
- package/dist/ranking/signals.d.ts +20 -0
- package/dist/ranking/signals.d.ts.map +1 -0
- package/dist/ranking/signals.js +145 -0
- package/dist/unit-tests/context.d.ts +7 -0
- package/dist/unit-tests/context.d.ts.map +1 -0
- package/dist/unit-tests/context.js +129 -0
- package/dist/unit-tests/conventions.d.ts +5 -0
- package/dist/unit-tests/conventions.d.ts.map +1 -0
- package/dist/unit-tests/conventions.js +87 -0
- package/dist/unit-tests/descriptor.d.ts +5 -0
- package/dist/unit-tests/descriptor.d.ts.map +1 -0
- package/dist/unit-tests/descriptor.js +43 -0
- package/dist/unit-tests/emit.d.ts +13 -0
- package/dist/unit-tests/emit.d.ts.map +1 -0
- package/dist/unit-tests/emit.js +35 -0
- package/dist/unit-tests/events.d.ts +2 -0
- package/dist/unit-tests/events.d.ts.map +1 -0
- package/dist/unit-tests/events.js +6 -0
- package/dist/unit-tests/frontend.d.ts +42 -0
- package/dist/unit-tests/frontend.d.ts.map +1 -0
- package/dist/unit-tests/frontend.js +281 -0
- package/dist/unit-tests/index.d.ts +9 -0
- package/dist/unit-tests/index.d.ts.map +1 -0
- package/dist/unit-tests/index.js +15 -0
- package/dist/unit-tests/internal.d.ts +36 -0
- package/dist/unit-tests/internal.d.ts.map +1 -0
- package/dist/unit-tests/internal.js +43 -0
- package/dist/unit-tests/model-loop.d.ts +6 -0
- package/dist/unit-tests/model-loop.d.ts.map +1 -0
- package/dist/unit-tests/model-loop.js +98 -0
- package/dist/unit-tests/parse.d.ts +7 -0
- package/dist/unit-tests/parse.d.ts.map +1 -0
- package/dist/unit-tests/parse.js +68 -0
- package/dist/unit-tests/prompt.d.ts +6 -0
- package/dist/unit-tests/prompt.d.ts.map +1 -0
- package/dist/unit-tests/prompt.js +139 -0
- package/dist/unit-tests/report.d.ts +26 -0
- package/dist/unit-tests/report.d.ts.map +1 -0
- package/dist/unit-tests/report.js +104 -0
- package/dist/unit-tests/stages.d.ts +12 -0
- package/dist/unit-tests/stages.d.ts.map +1 -0
- package/dist/unit-tests/stages.js +202 -0
- package/dist/unit-tests/strategy.d.ts +6 -0
- package/dist/unit-tests/strategy.d.ts.map +1 -0
- package/dist/unit-tests/strategy.js +36 -0
- package/dist/unit-tests/target-guard.d.ts +5 -0
- package/dist/unit-tests/target-guard.d.ts.map +1 -0
- package/dist/unit-tests/target-guard.js +29 -0
- package/dist/unit-tests/types.d.ts +74 -0
- package/dist/unit-tests/types.d.ts.map +1 -0
- package/dist/unit-tests/types.js +6 -0
- package/dist/unit-tests/verify-stage.d.ts +10 -0
- package/dist/unit-tests/verify-stage.d.ts.map +1 -0
- package/dist/unit-tests/verify-stage.js +56 -0
- package/dist/unit-tests/workflow.d.ts +3 -0
- package/dist/unit-tests/workflow.d.ts.map +1 -0
- package/dist/unit-tests/workflow.js +69 -0
- package/package.json +38 -0
|
@@ -0,0 +1,620 @@
|
|
|
1
|
+
// Quality Intelligence model-routed test-design run entry (Epic #270, Issue #272/#273/#279).
|
|
2
|
+
//
|
|
3
|
+
// The live generation path: real source evidence → Keiko Model Gateway → generated test-case
|
|
4
|
+
// candidates → pure-domain dedup / coverage / validation → evidence + candidate-artifact persist.
|
|
5
|
+
// Shares the run-lifecycle runtime with the scripted entries via `runtimeCommon.ts`. The model call
|
|
6
|
+
// is injected as an abstract `generate` port so this module stays free of provider SDKs and the
|
|
7
|
+
// server tier owns the gateway wiring (ADR-0023 D5/D6).
|
|
8
|
+
import { QualityIntelligence as QI } from "@oscharko-dev/keiko-contracts";
|
|
9
|
+
import { buildAtomCoverageStatuses, buildCoverageMap, buildRequirementExcerpt, computeCandidateEquivalenceSignature, deduplicateCandidates, deriveIntent, designTestCaseCandidates, scoreFromDimensions, TEST_QUALITY_WEAK_THRESHOLD, verdictFromDimensions, validateCandidates, QualityIntelligenceGeneration, } from "@oscharko-dev/keiko-quality-intelligence";
|
|
10
|
+
import { sha256Hex } from "@oscharko-dev/keiko-security";
|
|
11
|
+
import { QI_TEST_DESIGN_WORKFLOW_DESCRIPTOR } from "./descriptors.js";
|
|
12
|
+
import { emit, emitCandidateProposed, emitFindingsRecorded, emitQueuedAndStarted, finaliseFailureOrCancellation, makeContext, persistRun, safeReasonSummary, StageCancelledError, toCoverageMatrixRows, truncateCandidates, truncateFindings, withStage, } from "./runtimeCommon.js";
|
|
13
|
+
import { isCancelled } from "./cancellation.js";
|
|
14
|
+
class EmptyEvidenceError extends Error {
|
|
15
|
+
constructor() {
|
|
16
|
+
super("No usable evidence atoms were ingested for the run");
|
|
17
|
+
this.name = "EmptyEvidenceError";
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
class UnparseableModelOutputError extends Error {
|
|
21
|
+
constructor() {
|
|
22
|
+
super("Model output could not be parsed into test cases");
|
|
23
|
+
this.name = "UnparseableModelOutputError";
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
function evidenceRefsFor(ingestedAtoms) {
|
|
27
|
+
return Object.freeze(ingestedAtoms.map((a) => Object.freeze({
|
|
28
|
+
envelopeId: String(a.atom.sourceEnvelopeId),
|
|
29
|
+
atomId: String(a.atom.id),
|
|
30
|
+
lifecycleStatus: a.atom.lifecycleStatus,
|
|
31
|
+
})));
|
|
32
|
+
}
|
|
33
|
+
function atomFingerprintsFor(ingestedAtoms) {
|
|
34
|
+
return Object.freeze(ingestedAtoms.map((entry) => Object.freeze({
|
|
35
|
+
atomId: String(entry.atom.id),
|
|
36
|
+
envelopeId: String(entry.atom.sourceEnvelopeId),
|
|
37
|
+
canonicalHashSha256Hex: entry.atom.canonicalHashSha256Hex,
|
|
38
|
+
...(entry.replacementGroupId !== undefined
|
|
39
|
+
? { replacementGroupId: entry.replacementGroupId }
|
|
40
|
+
: {}),
|
|
41
|
+
...(entry.replacementOrdinal !== undefined
|
|
42
|
+
? { replacementOrdinal: entry.replacementOrdinal }
|
|
43
|
+
: {}),
|
|
44
|
+
})));
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Map each ingested atom's id to a short, redacted excerpt of its canonical text (#790) so
|
|
48
|
+
* coverage rows and gap findings can name the requirement, not just its opaque id. Atoms whose
|
|
49
|
+
* text collapses to nothing are omitted (the optional field is simply absent downstream).
|
|
50
|
+
*/
|
|
51
|
+
export function excerptsByAtomId(ingestedAtoms) {
|
|
52
|
+
const map = new Map();
|
|
53
|
+
for (const entry of ingestedAtoms) {
|
|
54
|
+
const excerpt = buildRequirementExcerpt(entry.canonicalText);
|
|
55
|
+
if (excerpt !== undefined)
|
|
56
|
+
map.set(String(entry.atom.id), excerpt);
|
|
57
|
+
}
|
|
58
|
+
return map;
|
|
59
|
+
}
|
|
60
|
+
function buildCoverageGapFinding(runId, atomStatus, ordinal, excerpt) {
|
|
61
|
+
const payload = ["v1-cov-gap", String(runId), String(atomStatus.atomId), String(ordinal)].join("");
|
|
62
|
+
const idStr = `qi-finding-${sha256Hex(payload).slice(0, 32)}`;
|
|
63
|
+
// An atom with zero tracing tests is the headline audit gap (high); an atom covered only weakly
|
|
64
|
+
// (incidentally, by broad tests) is a softer "strengthen this" signal (low). This keeps the gap
|
|
65
|
+
// list honest: a flood of low-severity weak findings never drowns out the genuine zero-coverage
|
|
66
|
+
// requirements, and severity-ordered truncation (below) protects the high ones.
|
|
67
|
+
const severity = atomStatus.status === "uncovered" ? "high" : "low";
|
|
68
|
+
// Name the requirement, not just its id (#790): the excerpt is already redacted (and persist
|
|
69
|
+
// redacts every leaf again), so the finding stays evidence-safe while becoming auditor-readable.
|
|
70
|
+
const atomLabel = excerpt === undefined
|
|
71
|
+
? `Atom ${String(atomStatus.atomId)}`
|
|
72
|
+
: `Atom ${String(atomStatus.atomId)} ("${excerpt}")`;
|
|
73
|
+
const summary = atomStatus.status === "uncovered"
|
|
74
|
+
? `${atomLabel} hat keinen zugeordneten Test (uncovered).`
|
|
75
|
+
: `${atomLabel} ist nur schwach abgedeckt (kein dedizierter Test referenziert dieses Atom).`;
|
|
76
|
+
return Object.freeze({
|
|
77
|
+
kind: "coverage-gap",
|
|
78
|
+
id: QI.asQualityIntelligenceValidationFindingId(idStr),
|
|
79
|
+
runId,
|
|
80
|
+
severity,
|
|
81
|
+
summary,
|
|
82
|
+
evidenceAtomIds: Object.freeze([atomStatus.atomId]),
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
const MODEL_DELTA_CANDIDATE_CEILING = 16;
|
|
86
|
+
const MODEL_DELTA_CANDIDATE_FLOOR = 3;
|
|
87
|
+
function modelDeltaCandidateLimit(evidenceCount, runLimit) {
|
|
88
|
+
const boundedRunLimit = Math.max(1, Math.trunc(runLimit));
|
|
89
|
+
const evidenceAwareLimit = Math.max(MODEL_DELTA_CANDIDATE_FLOOR, Math.max(1, Math.trunc(evidenceCount)) * 2);
|
|
90
|
+
return Math.max(1, Math.min(boundedRunLimit, MODEL_DELTA_CANDIDATE_CEILING, evidenceAwareLimit));
|
|
91
|
+
}
|
|
92
|
+
function deterministicBaselineCandidates(ctx, input) {
|
|
93
|
+
const atomTextById = new Map(input.ingestedAtoms.map((entry) => [String(entry.atom.id), entry.canonicalText]));
|
|
94
|
+
const candidates = designTestCaseCandidates({
|
|
95
|
+
runId: input.plan.id,
|
|
96
|
+
intent: deriveIntent(input.envelopes, ctx.profile),
|
|
97
|
+
atoms: input.ingestedAtoms.map((entry) => entry.atom),
|
|
98
|
+
atomTextById,
|
|
99
|
+
profile: ctx.profile,
|
|
100
|
+
});
|
|
101
|
+
return truncateCandidates(deduplicateCandidates(candidates), ctx.limits.maxCandidatesPerRun);
|
|
102
|
+
}
|
|
103
|
+
function parseModelCandidates(result, ctx, input, maxCandidates) {
|
|
104
|
+
const parsed = QualityIntelligenceGeneration.parseGeneratedCandidates(result.rawText, {
|
|
105
|
+
runId: input.plan.id,
|
|
106
|
+
atomIds: input.ingestedAtoms.map((a) => a.atom.id),
|
|
107
|
+
profile: ctx.profile,
|
|
108
|
+
maxCandidates,
|
|
109
|
+
});
|
|
110
|
+
if (!parsed.recovered) {
|
|
111
|
+
throw new UnparseableModelOutputError();
|
|
112
|
+
}
|
|
113
|
+
return truncateCandidates(deduplicateCandidates(parsed.candidates), maxCandidates);
|
|
114
|
+
}
|
|
115
|
+
function appendModelDelta(baseline, delta, limit) {
|
|
116
|
+
const baselineLimit = delta.length > 0 && limit > 0 ? Math.max(0, limit - 1) : limit;
|
|
117
|
+
const out = [...baseline].slice(0, baselineLimit);
|
|
118
|
+
const seen = new Set(out.map((candidate) => computeCandidateEquivalenceSignature(candidate)));
|
|
119
|
+
let appendedDelta = 0;
|
|
120
|
+
for (const candidate of delta) {
|
|
121
|
+
const signature = computeCandidateEquivalenceSignature(candidate);
|
|
122
|
+
if (seen.has(signature))
|
|
123
|
+
continue;
|
|
124
|
+
seen.add(signature);
|
|
125
|
+
out.push(candidate);
|
|
126
|
+
appendedDelta += 1;
|
|
127
|
+
if (out.length >= limit)
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
130
|
+
if (appendedDelta === 0) {
|
|
131
|
+
for (let index = baselineLimit; index < baseline.length && out.length < limit; index += 1) {
|
|
132
|
+
const candidate = baseline[index];
|
|
133
|
+
if (candidate === undefined)
|
|
134
|
+
continue;
|
|
135
|
+
out.push(candidate);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return Object.freeze(out);
|
|
139
|
+
}
|
|
140
|
+
function selectReviewCandidates(persisted, baseline, delta) {
|
|
141
|
+
if (delta.length === 0) {
|
|
142
|
+
return Object.freeze([]);
|
|
143
|
+
}
|
|
144
|
+
const persistedIds = new Set(persisted.map((candidate) => String(candidate.id)));
|
|
145
|
+
const persistedDelta = delta.filter((candidate) => persistedIds.has(String(candidate.id)));
|
|
146
|
+
return persistedDelta.length > 0 ? Object.freeze(persistedDelta) : baseline;
|
|
147
|
+
}
|
|
148
|
+
function modelGenerationOutput(result, ctx, input, runCandidateLimit, modelDeltaLimit) {
|
|
149
|
+
const baseline = deterministicBaselineCandidates(ctx, input);
|
|
150
|
+
const delta = parseModelCandidates(result, ctx, input, modelDeltaLimit);
|
|
151
|
+
const candidates = appendModelDelta(baseline, delta, runCandidateLimit);
|
|
152
|
+
return {
|
|
153
|
+
candidates,
|
|
154
|
+
reviewCandidates: selectReviewCandidates(candidates, baseline, delta),
|
|
155
|
+
...(result.modelId !== undefined ? { modelId: result.modelId } : {}),
|
|
156
|
+
...(result.modelId !== undefined
|
|
157
|
+
? { seedUsed: result.seedUsed ?? null }
|
|
158
|
+
: result.seedUsed !== undefined
|
|
159
|
+
? { seedUsed: result.seedUsed }
|
|
160
|
+
: {}),
|
|
161
|
+
modelParameters: result.modelParameters,
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
function baselineFallbackGenerationOutput(ctx, input, reasonSummary) {
|
|
165
|
+
const candidates = deterministicBaselineCandidates(ctx, input);
|
|
166
|
+
return {
|
|
167
|
+
candidates,
|
|
168
|
+
reviewCandidates: candidates,
|
|
169
|
+
skipJudge: true,
|
|
170
|
+
fallbackReason: reasonSummary,
|
|
171
|
+
modelParameters: { generationFallbackReason: reasonSummary },
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
function hasQiCode(error, code) {
|
|
175
|
+
if (!(error instanceof Error))
|
|
176
|
+
return false;
|
|
177
|
+
const coded = error;
|
|
178
|
+
return typeof coded.code === "string" && coded.code === code;
|
|
179
|
+
}
|
|
180
|
+
function shouldCountRejectedGenerationDispatch(error) {
|
|
181
|
+
return !hasQiCode(error, "QI_PROMPT_TOO_LARGE");
|
|
182
|
+
}
|
|
183
|
+
async function generateCandidates(ctx, input, deps) {
|
|
184
|
+
if (input.ingestedAtoms.length === 0) {
|
|
185
|
+
throw new EmptyEvidenceError();
|
|
186
|
+
}
|
|
187
|
+
const evidence = input.ingestedAtoms.map((a, i) => ({
|
|
188
|
+
index: i + 1,
|
|
189
|
+
kind: a.atom.kind,
|
|
190
|
+
text: a.canonicalText,
|
|
191
|
+
}));
|
|
192
|
+
const runCandidateLimit = ctx.limits.maxCandidatesPerRun;
|
|
193
|
+
const modelDeltaLimit = modelDeltaCandidateLimit(evidence.length, runCandidateLimit);
|
|
194
|
+
const instruction = QualityIntelligenceGeneration.buildTestDesignInstruction({
|
|
195
|
+
evidenceCount: evidence.length,
|
|
196
|
+
profile: ctx.profile,
|
|
197
|
+
maxTestCases: modelDeltaLimit,
|
|
198
|
+
});
|
|
199
|
+
// Count the generation gateway dispatch as an ATTEMPT, mirroring the judge contract
|
|
200
|
+
// (judgeOneCandidate counts before its await). The generation port makes at most one gateway
|
|
201
|
+
// dispatch per call, so a rejection (Azure 5xx / timeout / network / abort) still means one call
|
|
202
|
+
// was attempted and billed. Counting only result.modelCallCount AFTER a successful await
|
|
203
|
+
// under-reported a failed run's audit trail as 0 gateway calls (#273 audit; #843 undercount class).
|
|
204
|
+
// The deterministic baseline port never rejects and reports modelCallCount 0, so it is unaffected.
|
|
205
|
+
let result;
|
|
206
|
+
let countedGatewayDispatch = false;
|
|
207
|
+
try {
|
|
208
|
+
result = await deps.generate.generate({
|
|
209
|
+
systemPrompt: QualityIntelligenceGeneration.QI_TEST_DESIGN_SYSTEM_PROMPT,
|
|
210
|
+
instruction,
|
|
211
|
+
evidence,
|
|
212
|
+
maxCandidates: modelDeltaLimit,
|
|
213
|
+
signal: ctx.signal,
|
|
214
|
+
});
|
|
215
|
+
ctx.modelGatewayCallCount += result.modelCallCount;
|
|
216
|
+
countedGatewayDispatch = result.modelCallCount > 0;
|
|
217
|
+
if (result.modelId === undefined && result.modelCallCount === 0) {
|
|
218
|
+
const candidates = deterministicBaselineCandidates(ctx, input);
|
|
219
|
+
return {
|
|
220
|
+
candidates,
|
|
221
|
+
reviewCandidates: candidates,
|
|
222
|
+
...(result.seedUsed !== undefined ? { seedUsed: result.seedUsed } : {}),
|
|
223
|
+
modelParameters: result.modelParameters,
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
return modelGenerationOutput(result, ctx, input, runCandidateLimit, modelDeltaLimit);
|
|
227
|
+
}
|
|
228
|
+
catch (error) {
|
|
229
|
+
if (isCancellationError(ctx, error))
|
|
230
|
+
throw new StageCancelledError();
|
|
231
|
+
if (!countedGatewayDispatch && shouldCountRejectedGenerationDispatch(error)) {
|
|
232
|
+
ctx.modelGatewayCallCount += 1;
|
|
233
|
+
}
|
|
234
|
+
return baselineFallbackGenerationOutput(ctx, input, safeReasonSummary(error));
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
function candidateSummaryText(candidate) {
|
|
238
|
+
const parts = [
|
|
239
|
+
`Titel: ${candidate.title}`,
|
|
240
|
+
`Vorbedingungen: ${candidate.preconditions.join("; ")}`,
|
|
241
|
+
`Schritte: ${candidate.steps.join("; ")}`,
|
|
242
|
+
`Erwartetes Ergebnis: ${candidate.expectedResults.join("; ")}`,
|
|
243
|
+
];
|
|
244
|
+
return parts.join("\n");
|
|
245
|
+
}
|
|
246
|
+
const JUDGE_SUMMARY_DIMENSION_LIMIT = 2;
|
|
247
|
+
const FINDING_KIND_TRUNCATION_PRIORITY = {
|
|
248
|
+
"test-quality": 0,
|
|
249
|
+
};
|
|
250
|
+
const JUDGE_DIMENSION_LABEL = {
|
|
251
|
+
verifiability: "Prüfbarkeit",
|
|
252
|
+
atomicity: "Atomarität",
|
|
253
|
+
determinism: "Determinismus",
|
|
254
|
+
"ac-fidelity": "AC-Treue",
|
|
255
|
+
};
|
|
256
|
+
function sourceContextForCandidate(candidate, ingestedAtoms) {
|
|
257
|
+
const byAtomId = new Map(ingestedAtoms.map((entry) => [
|
|
258
|
+
String(entry.atom.id),
|
|
259
|
+
Object.freeze({
|
|
260
|
+
atomId: String(entry.atom.id),
|
|
261
|
+
text: entry.canonicalText,
|
|
262
|
+
}),
|
|
263
|
+
]));
|
|
264
|
+
const matched = candidate.derivedFromAtomIds
|
|
265
|
+
.map((atomId) => byAtomId.get(String(atomId)))
|
|
266
|
+
.filter((entry) => entry !== undefined);
|
|
267
|
+
if (matched.length > 0)
|
|
268
|
+
return Object.freeze(matched);
|
|
269
|
+
return Object.freeze(ingestedAtoms.map((entry) => Object.freeze({
|
|
270
|
+
atomId: String(entry.atom.id),
|
|
271
|
+
text: entry.canonicalText,
|
|
272
|
+
})));
|
|
273
|
+
}
|
|
274
|
+
function judgeRationaleSummary(verdict) {
|
|
275
|
+
const weakDimensions = verdict.dimensions
|
|
276
|
+
.filter((dimension) => dimension.score < TEST_QUALITY_WEAK_THRESHOLD)
|
|
277
|
+
.sort((left, right) => left.score - right.score);
|
|
278
|
+
const dimensionsToDescribe = weakDimensions.length > 0
|
|
279
|
+
? weakDimensions
|
|
280
|
+
: [...verdict.dimensions].sort((a, b) => a.score - b.score);
|
|
281
|
+
return dimensionsToDescribe
|
|
282
|
+
.slice(0, JUDGE_SUMMARY_DIMENSION_LIMIT)
|
|
283
|
+
.map((dimension) => `${JUDGE_DIMENSION_LABEL[dimension.name]}: ${dimension.rationale}`)
|
|
284
|
+
.join("; ");
|
|
285
|
+
}
|
|
286
|
+
function buildTestQualityFinding(runId, candidate, score, rationale, ordinal) {
|
|
287
|
+
const payload = ["v1-tq", String(runId), String(candidate.id), String(ordinal)].join("");
|
|
288
|
+
const idStr = `qi-finding-${sha256Hex(payload).slice(0, 32)}`;
|
|
289
|
+
const severity = score < 30 ? "high" : "medium";
|
|
290
|
+
return Object.freeze({
|
|
291
|
+
kind: "test-quality",
|
|
292
|
+
id: QI.asQualityIntelligenceValidationFindingId(idStr),
|
|
293
|
+
runId,
|
|
294
|
+
candidateId: candidate.id,
|
|
295
|
+
severity,
|
|
296
|
+
summary: rationale,
|
|
297
|
+
evidenceAtomIds: Object.freeze([...candidate.derivedFromAtomIds]),
|
|
298
|
+
});
|
|
299
|
+
}
|
|
300
|
+
function findingTruncationPriority(finding) {
|
|
301
|
+
return FINDING_KIND_TRUNCATION_PRIORITY[finding.kind] ?? 1;
|
|
302
|
+
}
|
|
303
|
+
const EMPTY_JUDGE_RESULT = Object.freeze({
|
|
304
|
+
findings: Object.freeze([]),
|
|
305
|
+
qualityScore: null,
|
|
306
|
+
candidateQualityVerdicts: new Map(),
|
|
307
|
+
});
|
|
308
|
+
// Bounded concurrency for the per-candidate judge calls: cuts the wall-clock of judging a large run
|
|
309
|
+
// (one gateway call per candidate) without flooding the gateway, which applies its own per-call
|
|
310
|
+
// retry/timeout. Findings are written into a candidate-indexed slot array so the persisted finding
|
|
311
|
+
// order stays deterministic regardless of which judge call completes first.
|
|
312
|
+
const JUDGE_CONCURRENCY = 4;
|
|
313
|
+
function isCancellationError(ctx, error) {
|
|
314
|
+
return error instanceof StageCancelledError || isCancelled(ctx.signal);
|
|
315
|
+
}
|
|
316
|
+
const JUDGE_ERROR_RATIONALE = "Der Quality-Judge konnte diesen Kandidaten nicht bewerten; er wird für das Audit als schwach behandelt.";
|
|
317
|
+
const JUDGE_BUDGET_RATIONALE = "Das Quality-Judge-Budget war vor der Bewertung dieses Kandidaten ausgeschöpft; er wird für das Audit als schwach behandelt.";
|
|
318
|
+
function buildSyntheticWeakJudgeOutcome(ctx, candidate, ordinal, rationale) {
|
|
319
|
+
return {
|
|
320
|
+
strong: false,
|
|
321
|
+
finding: buildTestQualityFinding(ctx.plan.id, candidate, 0, rationale, ordinal),
|
|
322
|
+
qualityVerdict: syntheticWeakQualityVerdict(rationale),
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
function cloneDimensions(dimensions) {
|
|
326
|
+
return Object.freeze(dimensions.map((dimension) => Object.freeze({ ...dimension })));
|
|
327
|
+
}
|
|
328
|
+
function qualityVerdictFromJudge(verdict) {
|
|
329
|
+
const score = scoreFromDimensions(verdict.dimensions);
|
|
330
|
+
return Object.freeze({
|
|
331
|
+
verdict: verdictFromDimensions(verdict.dimensions),
|
|
332
|
+
score,
|
|
333
|
+
dimensions: cloneDimensions(verdict.dimensions),
|
|
334
|
+
overallRationale: verdict.overallRationale,
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
function syntheticWeakQualityVerdict(rationale) {
|
|
338
|
+
const dimensions = QI.TEST_QUALITY_RUBRIC_DIMENSIONS.map((name) => Object.freeze({
|
|
339
|
+
name,
|
|
340
|
+
score: 0,
|
|
341
|
+
rationale,
|
|
342
|
+
}));
|
|
343
|
+
return Object.freeze({
|
|
344
|
+
verdict: "weak",
|
|
345
|
+
score: 0,
|
|
346
|
+
dimensions: Object.freeze(dimensions),
|
|
347
|
+
overallRationale: rationale,
|
|
348
|
+
});
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* Judge one candidate. Counts actual gateway dispatches reported by the judge port, then returns
|
|
352
|
+
* its outcome. A transient judge error (rate-limit / 5xx / timeout / network) remains
|
|
353
|
+
* run-fail-soft but becomes an explicit weak judge outcome; cancellation is re-raised as
|
|
354
|
+
* `StageCancelledError` so the whole stage aborts. Legacy/test ports that throw without returning
|
|
355
|
+
* dispatch metadata are counted as one attempted gateway call, preserving the audit contract.
|
|
356
|
+
*/
|
|
357
|
+
async function judgeOneCandidate(ctx, candidate, ordinal, ingestedAtoms, judge) {
|
|
358
|
+
let verdict;
|
|
359
|
+
try {
|
|
360
|
+
verdict = await judge.judge({
|
|
361
|
+
candidateText: candidateSummaryText(candidate),
|
|
362
|
+
sourceContext: sourceContextForCandidate(candidate, ingestedAtoms),
|
|
363
|
+
}, ctx.signal);
|
|
364
|
+
ctx.modelGatewayCallCount += verdict.gatewayCallCount ?? 1;
|
|
365
|
+
}
|
|
366
|
+
catch (error) {
|
|
367
|
+
if (isCancellationError(ctx, error))
|
|
368
|
+
throw new StageCancelledError();
|
|
369
|
+
ctx.modelGatewayCallCount += 1;
|
|
370
|
+
return buildSyntheticWeakJudgeOutcome(ctx, candidate, ordinal, JUDGE_ERROR_RATIONALE);
|
|
371
|
+
}
|
|
372
|
+
const score = scoreFromDimensions(verdict.dimensions);
|
|
373
|
+
const qualityVerdict = qualityVerdictFromJudge(verdict);
|
|
374
|
+
if (qualityVerdict.verdict === "strong") {
|
|
375
|
+
return { strong: true, finding: null, qualityVerdict };
|
|
376
|
+
}
|
|
377
|
+
return {
|
|
378
|
+
strong: false,
|
|
379
|
+
finding: buildTestQualityFinding(ctx.plan.id, candidate, score, judgeRationaleSummary(verdict), ordinal),
|
|
380
|
+
qualityVerdict,
|
|
381
|
+
};
|
|
382
|
+
}
|
|
383
|
+
function makeJudgeSlots(candidateCount) {
|
|
384
|
+
return {
|
|
385
|
+
findingSlots: Array.from({ length: candidateCount }, () => undefined),
|
|
386
|
+
verdictSlots: Array.from({ length: candidateCount }, () => undefined),
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
function recordJudgeOutcome(slots, index, outcome) {
|
|
390
|
+
slots.verdictSlots[index] = outcome.qualityVerdict;
|
|
391
|
+
if (outcome.finding !== null)
|
|
392
|
+
slots.findingSlots[index] = outcome.finding;
|
|
393
|
+
return outcome.strong;
|
|
394
|
+
}
|
|
395
|
+
async function judgeCandidates(ctx, candidates, ingestedAtoms, judge, slots) {
|
|
396
|
+
let strongCount = 0;
|
|
397
|
+
let verdictCount = 0;
|
|
398
|
+
let cursor = 0;
|
|
399
|
+
const worker = async () => {
|
|
400
|
+
for (;;) {
|
|
401
|
+
const i = cursor;
|
|
402
|
+
cursor += 1;
|
|
403
|
+
if (i >= candidates.length)
|
|
404
|
+
return;
|
|
405
|
+
const candidate = candidates[i];
|
|
406
|
+
if (candidate === undefined)
|
|
407
|
+
continue;
|
|
408
|
+
const outcome = await judgeOneCandidate(ctx, candidate, i, ingestedAtoms, judge);
|
|
409
|
+
verdictCount += 1;
|
|
410
|
+
if (recordJudgeOutcome(slots, i, outcome))
|
|
411
|
+
strongCount += 1;
|
|
412
|
+
}
|
|
413
|
+
};
|
|
414
|
+
if (candidates.length > 0) {
|
|
415
|
+
await Promise.all(Array.from({ length: Math.min(JUDGE_CONCURRENCY, candidates.length) }, () => worker()));
|
|
416
|
+
}
|
|
417
|
+
return { strongCount, verdictCount };
|
|
418
|
+
}
|
|
419
|
+
function recordBudgetOverflow(ctx, candidates, startIndex, slots) {
|
|
420
|
+
let verdictCount = 0;
|
|
421
|
+
for (let i = startIndex; i < candidates.length; i += 1) {
|
|
422
|
+
const candidate = candidates[i];
|
|
423
|
+
if (candidate === undefined)
|
|
424
|
+
continue;
|
|
425
|
+
const outcome = buildSyntheticWeakJudgeOutcome(ctx, candidate, i, JUDGE_BUDGET_RATIONALE);
|
|
426
|
+
verdictCount += 1;
|
|
427
|
+
recordJudgeOutcome(slots, i, outcome);
|
|
428
|
+
}
|
|
429
|
+
return verdictCount;
|
|
430
|
+
}
|
|
431
|
+
function candidateQualityVerdictMap(candidates, slots) {
|
|
432
|
+
const candidateQualityVerdicts = new Map();
|
|
433
|
+
for (let i = 0; i < candidates.length; i += 1) {
|
|
434
|
+
const candidate = candidates[i];
|
|
435
|
+
const qualityVerdict = slots.verdictSlots[i];
|
|
436
|
+
if (candidate !== undefined && qualityVerdict !== undefined) {
|
|
437
|
+
candidateQualityVerdicts.set(String(candidate.id), qualityVerdict);
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
return candidateQualityVerdicts;
|
|
441
|
+
}
|
|
442
|
+
function buildJudgeStageResult(candidates, slots, counts) {
|
|
443
|
+
const findings = slots.findingSlots.filter((f) => f !== undefined);
|
|
444
|
+
const qualityScore = counts.verdictCount === 0 ? null : (counts.strongCount / counts.verdictCount) * 100;
|
|
445
|
+
return {
|
|
446
|
+
findings: Object.freeze(findings),
|
|
447
|
+
qualityScore,
|
|
448
|
+
candidateQualityVerdicts: candidateQualityVerdictMap(candidates, slots),
|
|
449
|
+
};
|
|
450
|
+
}
|
|
451
|
+
/**
|
|
452
|
+
* Adversarially judge every candidate via the model-judge port (Epic #736, Issue #747).
|
|
453
|
+
*
|
|
454
|
+
* Resilience contract: the judge AUGMENTS generation and must never fail an otherwise successful
|
|
455
|
+
* run — a transient per-candidate error becomes an explicit weak test-quality finding and only
|
|
456
|
+
* cancellation aborts the stage. Audit contract: every dispatch is counted into
|
|
457
|
+
* `ctx.modelGatewayCallCount`. Budget contract: at most `ctx.limits.maxJudgeCallsPerRun` candidates
|
|
458
|
+
* make gateway calls; any overflow candidates receive deterministic weak findings so the persisted
|
|
459
|
+
* run still accounts for every candidate. Bounded-concurrency workers share a cursor; findings land
|
|
460
|
+
* in candidate-indexed slots so the persisted order stays deterministic regardless of completion
|
|
461
|
+
* order.
|
|
462
|
+
*/
|
|
463
|
+
async function runJudgeStage(ctx, candidates, ingestedAtoms, judge) {
|
|
464
|
+
if (candidates.length === 0)
|
|
465
|
+
return EMPTY_JUDGE_RESULT;
|
|
466
|
+
const budget = Math.max(0, ctx.limits.maxJudgeCallsPerRun);
|
|
467
|
+
const judgeable = budget >= candidates.length ? candidates : candidates.slice(0, budget);
|
|
468
|
+
const slots = makeJudgeSlots(candidates.length);
|
|
469
|
+
const judged = await judgeCandidates(ctx, judgeable, ingestedAtoms, judge, slots);
|
|
470
|
+
const overflowVerdictCount = recordBudgetOverflow(ctx, candidates, judgeable.length, slots);
|
|
471
|
+
// Per-run quality score = share of candidates with a strong judge outcome, as a percentage (#747).
|
|
472
|
+
// Gateway errors and budget overflow produce explicit weak outcomes so unverified candidates cannot
|
|
473
|
+
// be indistinguishable from strong candidates or inflate the run score.
|
|
474
|
+
return buildJudgeStageResult(candidates, slots, {
|
|
475
|
+
strongCount: judged.strongCount,
|
|
476
|
+
verdictCount: judged.verdictCount + overflowVerdictCount,
|
|
477
|
+
});
|
|
478
|
+
}
|
|
479
|
+
function candidatesWithQualityVerdicts(candidates, verdicts) {
|
|
480
|
+
if (verdicts.size === 0)
|
|
481
|
+
return candidates;
|
|
482
|
+
return Object.freeze(candidates.map((candidate) => {
|
|
483
|
+
const qualityVerdict = verdicts.get(String(candidate.id));
|
|
484
|
+
if (qualityVerdict === undefined)
|
|
485
|
+
return candidate;
|
|
486
|
+
return Object.freeze({
|
|
487
|
+
...candidate,
|
|
488
|
+
qualityVerdict,
|
|
489
|
+
});
|
|
490
|
+
}));
|
|
491
|
+
}
|
|
492
|
+
/**
|
|
493
|
+
* Execute a model-routed QI test-design run end to end. Emits the standard QI run-event envelope,
|
|
494
|
+
* fails the run with a safe reason when the model output is unusable (rather than silently emitting
|
|
495
|
+
* zero candidates), and persists both the run manifest and the generated candidate bodies.
|
|
496
|
+
*/
|
|
497
|
+
// eslint-disable-next-line max-lines-per-function -- strict QI lifecycle: linear stage audit trail.
|
|
498
|
+
export async function runQualityIntelligenceModelRoutedTestDesign(input, deps) {
|
|
499
|
+
const ctx = makeContext({
|
|
500
|
+
descriptor: QI_TEST_DESIGN_WORKFLOW_DESCRIPTOR,
|
|
501
|
+
plan: input.plan,
|
|
502
|
+
sink: deps.sink,
|
|
503
|
+
clock: deps.clock,
|
|
504
|
+
limits: deps.limits,
|
|
505
|
+
policyProfile: input.profile,
|
|
506
|
+
signal: deps.signal,
|
|
507
|
+
});
|
|
508
|
+
ctx.modelGatewayCallCount += deps.initialModelGatewayCallCount ?? 0;
|
|
509
|
+
const evidenceRefs = evidenceRefsFor(input.ingestedAtoms);
|
|
510
|
+
emitQueuedAndStarted(ctx);
|
|
511
|
+
try {
|
|
512
|
+
await withStage(ctx, "plan", async () => Promise.resolve());
|
|
513
|
+
const generation = await withStage(ctx, "candidates", async () => generateCandidates(ctx, input, deps));
|
|
514
|
+
const candidates = generation.candidates;
|
|
515
|
+
const reviewCandidates = generation.reviewCandidates;
|
|
516
|
+
// A provider/parser failure is caught inside generateCandidates and degrades to the deterministic
|
|
517
|
+
// baseline (skipJudge), keeping the run alive. That degradation MUST stay visible: the redacted
|
|
518
|
+
// reason is threaded into the terminal summary (and surfaced on the wire `done` frame as
|
|
519
|
+
// `degraded` + `reasonSummary`) so the run is never presented as an authoritative model-backed
|
|
520
|
+
// result (regulated-delivery audit, QI-DEG-01).
|
|
521
|
+
const degradedReason = generation.skipJudge === true
|
|
522
|
+
? (generation.fallbackReason ?? "qi-generation-fallback")
|
|
523
|
+
: undefined;
|
|
524
|
+
emitCandidateProposed(ctx, candidates);
|
|
525
|
+
const judge = deps.judge;
|
|
526
|
+
const judgeResult = await withStage(ctx, "judge", async () => {
|
|
527
|
+
if (judge === undefined || generation.skipJudge === true)
|
|
528
|
+
return EMPTY_JUDGE_RESULT;
|
|
529
|
+
try {
|
|
530
|
+
return await runJudgeStage(ctx, reviewCandidates, input.ingestedAtoms, judge);
|
|
531
|
+
}
|
|
532
|
+
catch (error) {
|
|
533
|
+
// Cancellation must still abort the run; anything else is fail-soft so an optional judge
|
|
534
|
+
// can never turn a successful generation into a failed run (Epic #736 augments-not-harms).
|
|
535
|
+
if (isCancellationError(ctx, error))
|
|
536
|
+
throw error;
|
|
537
|
+
return EMPTY_JUDGE_RESULT;
|
|
538
|
+
}
|
|
539
|
+
});
|
|
540
|
+
const atoms = input.ingestedAtoms.map((a) => a.atom);
|
|
541
|
+
const coverageMap = await withStage(ctx, "coverage", async () => Promise.resolve(buildCoverageMap({ runId: input.plan.id, atoms, candidates })));
|
|
542
|
+
const atomStatuses = buildAtomCoverageStatuses(atoms, coverageMap);
|
|
543
|
+
const excerptByAtomId = excerptsByAtomId(input.ingestedAtoms);
|
|
544
|
+
const coverageMatrix = toCoverageMatrixRows(atomStatuses, excerptByAtomId);
|
|
545
|
+
const gapFindings = [];
|
|
546
|
+
for (let i = 0; i < atomStatuses.length; i += 1) {
|
|
547
|
+
const s = atomStatuses[i];
|
|
548
|
+
if (s !== undefined && s.status !== "covered") {
|
|
549
|
+
gapFindings.push(buildCoverageGapFinding(input.plan.id, s, i, excerptByAtomId.get(String(s.atomId))));
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
const rawFindings = await withStage(ctx, "validate", async () => Promise.resolve(validateCandidates(input.plan.id, candidates)));
|
|
553
|
+
// Order by severity (critical -> low) BEFORE truncation so that, if the run hits the
|
|
554
|
+
// per-run findings cap, the most severe findings — uncovered-requirement gaps included —
|
|
555
|
+
// always survive the cut rather than being dropped by array position. Within a severity tier,
|
|
556
|
+
// keep test-quality findings first because #748 weak-test flags are projected exclusively from
|
|
557
|
+
// those candidate-scoped findings; stable sort preserves original order for all remaining ties.
|
|
558
|
+
const allFindings = [
|
|
559
|
+
...gapFindings,
|
|
560
|
+
...rawFindings,
|
|
561
|
+
...judgeResult.findings,
|
|
562
|
+
]
|
|
563
|
+
.slice()
|
|
564
|
+
.sort((a, b) => QI.QUALITY_INTELLIGENCE_SEVERITY_RANK[a.severity] -
|
|
565
|
+
QI.QUALITY_INTELLIGENCE_SEVERITY_RANK[b.severity] ||
|
|
566
|
+
findingTruncationPriority(a) - findingTruncationPriority(b));
|
|
567
|
+
const findings = truncateFindings(allFindings, ctx.limits.maxFindingsPerRun);
|
|
568
|
+
emitFindingsRecorded(ctx, findings);
|
|
569
|
+
const evidence = await withStage(ctx, "finalize", async () => {
|
|
570
|
+
const completedAt = ctx.clock.nowIso();
|
|
571
|
+
const sourceFingerprints = input.envelopes.map((e) => ({
|
|
572
|
+
envelopeId: String(e.id),
|
|
573
|
+
integrityHashSha256Hex: e.provenance.integrityHashSha256Hex,
|
|
574
|
+
}));
|
|
575
|
+
const atomFingerprints = atomFingerprintsFor(input.ingestedAtoms);
|
|
576
|
+
const result = persistRun({
|
|
577
|
+
ctx,
|
|
578
|
+
status: "succeeded",
|
|
579
|
+
candidatesCount: candidates.length,
|
|
580
|
+
findings,
|
|
581
|
+
evidenceRefs,
|
|
582
|
+
provenanceRefs: input.provenanceRefs,
|
|
583
|
+
completedAt,
|
|
584
|
+
evidenceStore: deps.evidenceStore,
|
|
585
|
+
coverageMatrix,
|
|
586
|
+
qualityScore: judgeResult.qualityScore,
|
|
587
|
+
...(deps.redaction !== undefined ? { redaction: deps.redaction } : {}),
|
|
588
|
+
...(sourceFingerprints.length > 0 ? { sourceFingerprints } : {}),
|
|
589
|
+
...(atomFingerprints.length > 0 ? { atomFingerprints } : {}),
|
|
590
|
+
...(generation.modelId !== undefined ? { modelId: generation.modelId } : {}),
|
|
591
|
+
...(generation.seedUsed !== undefined ? { seedUsed: generation.seedUsed } : {}),
|
|
592
|
+
...(generation.modelParameters !== undefined
|
|
593
|
+
? { modelParameters: generation.modelParameters }
|
|
594
|
+
: {}),
|
|
595
|
+
});
|
|
596
|
+
deps.candidatesSink.record(candidatesWithQualityVerdicts(candidates, judgeResult.candidateQualityVerdicts), completedAt);
|
|
597
|
+
return Promise.resolve(result);
|
|
598
|
+
});
|
|
599
|
+
emit(ctx, { kind: "run:succeeded" });
|
|
600
|
+
return Object.freeze({
|
|
601
|
+
runId: input.plan.id,
|
|
602
|
+
workflowId: ctx.descriptor.workflowId,
|
|
603
|
+
status: "succeeded",
|
|
604
|
+
eventsEmitted: ctx.sequence,
|
|
605
|
+
modelGatewayCallCount: ctx.modelGatewayCallCount,
|
|
606
|
+
evidence,
|
|
607
|
+
qualityScore: judgeResult.qualityScore,
|
|
608
|
+
...(degradedReason !== undefined ? { reasonSummary: degradedReason } : {}),
|
|
609
|
+
});
|
|
610
|
+
}
|
|
611
|
+
catch (caught) {
|
|
612
|
+
return finaliseFailureOrCancellation(ctx, caught, {
|
|
613
|
+
candidatesCount: 0,
|
|
614
|
+
findings: Object.freeze([]),
|
|
615
|
+
evidenceRefs,
|
|
616
|
+
provenanceRefs: input.provenanceRefs,
|
|
617
|
+
evidenceStore: deps.evidenceStore,
|
|
618
|
+
});
|
|
619
|
+
}
|
|
620
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { QualityIntelligence as QI, type ModelCapability, type NormalizedResponse } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import { type PolicyProfile } from "@oscharko-dev/keiko-quality-intelligence";
|
|
3
|
+
import { type QualityIntelligenceBudgetState, type QualityIntelligenceDispatcherArgs, type QualityIntelligenceDispatcherResult, type QualityIntelligenceReplayCachePort } from "@oscharko-dev/keiko-model-gateway";
|
|
4
|
+
import { type QualityIntelligenceLocalStore } from "@oscharko-dev/keiko-evidence";
|
|
5
|
+
import { type QualityIntelligenceClock, type QualityIntelligenceProvenanceRefs, type QualityIntelligenceRunEventSink, type QualityIntelligenceRunSummary } from "./runtimeCommon.js";
|
|
6
|
+
import type { QualityIntelligenceWorkflowLimits } from "./descriptors.js";
|
|
7
|
+
export type { QualityIntelligenceClock, QualityIntelligenceProvenanceRefs, QualityIntelligenceRunEventSink, QualityIntelligenceRunStatus, QualityIntelligenceRunSummary, } from "./runtimeCommon.js";
|
|
8
|
+
/**
|
|
9
|
+
* Optional model-routed dispatch surface. When omitted, judges/refinements skip the model call and
|
|
10
|
+
* fall through to the pure-domain validators only. When supplied, the dispatcher is invoked through
|
|
11
|
+
* the gateway seam from #279.
|
|
12
|
+
*/
|
|
13
|
+
export interface QualityIntelligenceDispatchPort {
|
|
14
|
+
readonly dispatch: (args: QualityIntelligenceDispatcherArgs) => Promise<QualityIntelligenceDispatcherResult>;
|
|
15
|
+
}
|
|
16
|
+
export interface QualityIntelligenceModelRoutedDeps {
|
|
17
|
+
readonly dispatch: QualityIntelligenceDispatchPort;
|
|
18
|
+
readonly model: ModelCapability;
|
|
19
|
+
readonly providerConfig: QualityIntelligenceDispatcherArgs["providerConfig"];
|
|
20
|
+
readonly port: QualityIntelligenceDispatcherArgs["port"];
|
|
21
|
+
readonly cache: QualityIntelligenceReplayCachePort<NormalizedResponse>;
|
|
22
|
+
readonly initialBudget: QualityIntelligenceBudgetState;
|
|
23
|
+
}
|
|
24
|
+
export interface QualityIntelligenceRunEntryDeps {
|
|
25
|
+
readonly sink: QualityIntelligenceRunEventSink;
|
|
26
|
+
readonly evidenceStore: QualityIntelligenceLocalStore;
|
|
27
|
+
readonly clock?: QualityIntelligenceClock | undefined;
|
|
28
|
+
readonly signal?: AbortSignal | undefined;
|
|
29
|
+
readonly limits?: QualityIntelligenceWorkflowLimits | undefined;
|
|
30
|
+
readonly policyProfile?: PolicyProfile | undefined;
|
|
31
|
+
readonly modelRouted?: QualityIntelligenceModelRoutedDeps | undefined;
|
|
32
|
+
}
|
|
33
|
+
export interface QualityIntelligenceTestDesignInput {
|
|
34
|
+
readonly plan: QI.QualityIntelligenceRunPlan;
|
|
35
|
+
readonly envelopes: readonly QI.QualityIntelligenceSourceEnvelope[];
|
|
36
|
+
readonly atoms: readonly QI.QualityIntelligenceEvidenceAtom[];
|
|
37
|
+
readonly provenanceRefs: QualityIntelligenceProvenanceRefs;
|
|
38
|
+
}
|
|
39
|
+
export interface QualityIntelligenceCoverageReviewInput {
|
|
40
|
+
readonly plan: QI.QualityIntelligenceRunPlan;
|
|
41
|
+
readonly atoms: readonly QI.QualityIntelligenceEvidenceAtom[];
|
|
42
|
+
readonly candidates: readonly QI.QualityIntelligenceTestCaseCandidate[];
|
|
43
|
+
readonly provenanceRefs: QualityIntelligenceProvenanceRefs;
|
|
44
|
+
}
|
|
45
|
+
export interface QualityIntelligenceValidationInput {
|
|
46
|
+
readonly plan: QI.QualityIntelligenceRunPlan;
|
|
47
|
+
readonly candidates: readonly QI.QualityIntelligenceTestCaseCandidate[];
|
|
48
|
+
readonly provenanceRefs: QualityIntelligenceProvenanceRefs;
|
|
49
|
+
}
|
|
50
|
+
export interface QualityIntelligenceArtifactRefinementInput {
|
|
51
|
+
readonly plan: QI.QualityIntelligenceRunPlan;
|
|
52
|
+
readonly atoms: readonly QI.QualityIntelligenceEvidenceAtom[];
|
|
53
|
+
readonly candidates: readonly QI.QualityIntelligenceTestCaseCandidate[];
|
|
54
|
+
readonly provenanceRefs: QualityIntelligenceProvenanceRefs;
|
|
55
|
+
}
|
|
56
|
+
export declare function runQualityIntelligenceTestDesign(input: QualityIntelligenceTestDesignInput, deps: QualityIntelligenceRunEntryDeps): Promise<QualityIntelligenceRunSummary>;
|
|
57
|
+
export declare function runQualityIntelligenceCoverageReview(input: QualityIntelligenceCoverageReviewInput, deps: QualityIntelligenceRunEntryDeps): Promise<QualityIntelligenceRunSummary>;
|
|
58
|
+
export declare function runQualityIntelligenceValidation(input: QualityIntelligenceValidationInput, deps: QualityIntelligenceRunEntryDeps): Promise<QualityIntelligenceRunSummary>;
|
|
59
|
+
export declare function runQualityIntelligenceArtifactRefinement(input: QualityIntelligenceArtifactRefinementInput, deps: QualityIntelligenceRunEntryDeps): Promise<QualityIntelligenceRunSummary>;
|
|
60
|
+
//# sourceMappingURL=runEntries.d.ts.map
|