scene-capability-engine 3.3.25 → 3.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +68 -0
- package/README.md +81 -719
- package/README.zh.md +85 -586
- package/bin/scene-capability-engine.js +105 -0
- package/docs/README.md +47 -249
- package/docs/command-reference.md +80 -4
- package/docs/spec-workflow.md +35 -4
- package/docs/zh/README.md +44 -331
- package/lib/adoption/adoption-strategy.js +4 -0
- package/lib/adoption/detection-engine.js +4 -0
- package/lib/adoption/file-classifier.js +5 -1
- package/lib/adoption/smart-orchestrator.js +4 -0
- package/lib/commands/adopt.js +32 -0
- package/lib/commands/errorbook.js +409 -2
- package/lib/commands/session.js +27 -0
- package/lib/commands/spec-domain.js +78 -2
- package/lib/commands/studio.js +283 -12
- package/lib/commands/timeline.js +287 -0
- package/lib/commands/upgrade.js +16 -0
- package/lib/problem/problem-evaluator.js +620 -0
- package/lib/runtime/project-timeline.js +598 -0
- package/lib/spec/domain-modeling.js +217 -1
- package/lib/workspace/takeover-baseline.js +446 -0
- package/package.json +1 -1
- package/template/.sce/config/problem-eval-policy.json +36 -0
- package/template/.sce/config/session-governance.json +8 -0
- package/template/.sce/config/spec-domain-policy.json +6 -0
- package/template/.sce/config/takeover-baseline.json +33 -0
|
@@ -0,0 +1,620 @@
|
|
|
1
|
+
const path = require('path');
|
|
2
|
+
const fs = require('fs-extra');
|
|
3
|
+
|
|
4
|
+
const PROBLEM_EVAL_API_VERSION = 'sce.problem-eval/v0.1';
|
|
5
|
+
const DEFAULT_POLICY_PATH = '.sce/config/problem-eval-policy.json';
|
|
6
|
+
const DEFAULT_REPORT_DIR = '.sce/reports/problem-eval';
|
|
7
|
+
const STUDIO_STAGES = Object.freeze(['plan', 'generate', 'apply', 'verify', 'release']);
|
|
8
|
+
const DEBUG_EVIDENCE_TAGS = Object.freeze(['debug-evidence', 'diagnostic-evidence', 'debug-log']);
|
|
9
|
+
|
|
10
|
+
const DEFAULT_PROBLEM_EVAL_POLICY = Object.freeze({
|
|
11
|
+
schema_version: '1.0',
|
|
12
|
+
enabled: true,
|
|
13
|
+
mode: 'required',
|
|
14
|
+
enforce_on_stages: [...STUDIO_STAGES],
|
|
15
|
+
block_on_stages: ['apply', 'release'],
|
|
16
|
+
min_confidence_by_stage: {
|
|
17
|
+
plan: 20,
|
|
18
|
+
generate: 25,
|
|
19
|
+
apply: 30,
|
|
20
|
+
verify: 35,
|
|
21
|
+
release: 40
|
|
22
|
+
},
|
|
23
|
+
high_risk_requires_debug_evidence: true,
|
|
24
|
+
high_risk_keywords: [
|
|
25
|
+
'auth',
|
|
26
|
+
'payment',
|
|
27
|
+
'security',
|
|
28
|
+
'delete',
|
|
29
|
+
'rollback',
|
|
30
|
+
'production',
|
|
31
|
+
'migrate',
|
|
32
|
+
'compliance',
|
|
33
|
+
'data-loss'
|
|
34
|
+
],
|
|
35
|
+
recommendation_limit: 6
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
function normalizeText(value) {
|
|
39
|
+
if (typeof value !== 'string') {
|
|
40
|
+
return '';
|
|
41
|
+
}
|
|
42
|
+
return value.trim();
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function normalizeLowerText(value) {
|
|
46
|
+
return normalizeText(value).toLowerCase();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function normalizeBoolean(value, fallback = false) {
|
|
50
|
+
if (typeof value === 'boolean') {
|
|
51
|
+
return value;
|
|
52
|
+
}
|
|
53
|
+
const normalized = normalizeLowerText(`${value || ''}`);
|
|
54
|
+
if (!normalized) {
|
|
55
|
+
return fallback;
|
|
56
|
+
}
|
|
57
|
+
if (['1', 'true', 'yes', 'y', 'on'].includes(normalized)) {
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
if (['0', 'false', 'no', 'n', 'off'].includes(normalized)) {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
return fallback;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function normalizeInteger(value, fallback = 0, min = 0, max = Number.MAX_SAFE_INTEGER) {
|
|
67
|
+
const parsed = Number.parseInt(`${value}`, 10);
|
|
68
|
+
if (!Number.isFinite(parsed)) {
|
|
69
|
+
return fallback;
|
|
70
|
+
}
|
|
71
|
+
if (parsed < min) {
|
|
72
|
+
return min;
|
|
73
|
+
}
|
|
74
|
+
if (parsed > max) {
|
|
75
|
+
return max;
|
|
76
|
+
}
|
|
77
|
+
return parsed;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function normalizeArray(value = []) {
|
|
81
|
+
if (!Array.isArray(value)) {
|
|
82
|
+
return [];
|
|
83
|
+
}
|
|
84
|
+
return value.map((item) => normalizeText(item)).filter(Boolean);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function normalizeIncidentState(value, fallback = 'open') {
|
|
88
|
+
const normalized = normalizeLowerText(value);
|
|
89
|
+
if (!normalized) {
|
|
90
|
+
return fallback;
|
|
91
|
+
}
|
|
92
|
+
if (normalized === 'open' || normalized === 'resolved') {
|
|
93
|
+
return normalized;
|
|
94
|
+
}
|
|
95
|
+
return fallback;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function hasDebugEvidenceInAttempt(attempt = {}) {
|
|
99
|
+
const tags = normalizeArray(attempt.tags).map((item) => item.toLowerCase());
|
|
100
|
+
if (tags.some((tag) => DEBUG_EVIDENCE_TAGS.includes(tag))) {
|
|
101
|
+
return true;
|
|
102
|
+
}
|
|
103
|
+
const verification = normalizeArray(attempt.verification_evidence);
|
|
104
|
+
if (verification.some((item) => /^debug:/i.test(item))) {
|
|
105
|
+
return true;
|
|
106
|
+
}
|
|
107
|
+
const notes = normalizeLowerText(attempt.notes);
|
|
108
|
+
if (notes && /(debug|trace|diagnostic|observability|telemetry|日志|埋点|观测)/i.test(notes)) {
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function normalizePolicy(policy = {}, env = process.env) {
|
|
115
|
+
const envMode = normalizeLowerText(env.SCE_PROBLEM_EVAL_MODE);
|
|
116
|
+
const envDisabled = normalizeBoolean(env.SCE_PROBLEM_EVAL_DISABLED, false);
|
|
117
|
+
const mode = envMode === 'off' || envMode === 'advisory' || envMode === 'required'
|
|
118
|
+
? envMode
|
|
119
|
+
: normalizeLowerText(policy.mode) || DEFAULT_PROBLEM_EVAL_POLICY.mode;
|
|
120
|
+
const enabled = envDisabled
|
|
121
|
+
? false
|
|
122
|
+
: mode === 'off'
|
|
123
|
+
? false
|
|
124
|
+
: normalizeBoolean(policy.enabled, DEFAULT_PROBLEM_EVAL_POLICY.enabled);
|
|
125
|
+
const minByStage = {
|
|
126
|
+
...DEFAULT_PROBLEM_EVAL_POLICY.min_confidence_by_stage,
|
|
127
|
+
...(policy.min_confidence_by_stage && typeof policy.min_confidence_by_stage === 'object'
|
|
128
|
+
? policy.min_confidence_by_stage
|
|
129
|
+
: {})
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
const normalized = {
|
|
133
|
+
schema_version: normalizeText(policy.schema_version) || DEFAULT_PROBLEM_EVAL_POLICY.schema_version,
|
|
134
|
+
enabled,
|
|
135
|
+
mode: mode || DEFAULT_PROBLEM_EVAL_POLICY.mode,
|
|
136
|
+
enforce_on_stages: normalizeArray(policy.enforce_on_stages).length > 0
|
|
137
|
+
? normalizeArray(policy.enforce_on_stages).map((item) => item.toLowerCase())
|
|
138
|
+
: [...DEFAULT_PROBLEM_EVAL_POLICY.enforce_on_stages],
|
|
139
|
+
block_on_stages: normalizeArray(policy.block_on_stages).length > 0
|
|
140
|
+
? normalizeArray(policy.block_on_stages).map((item) => item.toLowerCase())
|
|
141
|
+
: [...DEFAULT_PROBLEM_EVAL_POLICY.block_on_stages],
|
|
142
|
+
min_confidence_by_stage: {
|
|
143
|
+
plan: normalizeInteger(minByStage.plan, DEFAULT_PROBLEM_EVAL_POLICY.min_confidence_by_stage.plan, 0, 100),
|
|
144
|
+
generate: normalizeInteger(minByStage.generate, DEFAULT_PROBLEM_EVAL_POLICY.min_confidence_by_stage.generate, 0, 100),
|
|
145
|
+
apply: normalizeInteger(minByStage.apply, DEFAULT_PROBLEM_EVAL_POLICY.min_confidence_by_stage.apply, 0, 100),
|
|
146
|
+
verify: normalizeInteger(minByStage.verify, DEFAULT_PROBLEM_EVAL_POLICY.min_confidence_by_stage.verify, 0, 100),
|
|
147
|
+
release: normalizeInteger(minByStage.release, DEFAULT_PROBLEM_EVAL_POLICY.min_confidence_by_stage.release, 0, 100)
|
|
148
|
+
},
|
|
149
|
+
high_risk_requires_debug_evidence: normalizeBoolean(
|
|
150
|
+
policy.high_risk_requires_debug_evidence,
|
|
151
|
+
DEFAULT_PROBLEM_EVAL_POLICY.high_risk_requires_debug_evidence
|
|
152
|
+
),
|
|
153
|
+
high_risk_keywords: normalizeArray(policy.high_risk_keywords).length > 0
|
|
154
|
+
? normalizeArray(policy.high_risk_keywords).map((item) => item.toLowerCase())
|
|
155
|
+
: [...DEFAULT_PROBLEM_EVAL_POLICY.high_risk_keywords],
|
|
156
|
+
recommendation_limit: normalizeInteger(
|
|
157
|
+
policy.recommendation_limit,
|
|
158
|
+
DEFAULT_PROBLEM_EVAL_POLICY.recommendation_limit,
|
|
159
|
+
1,
|
|
160
|
+
20
|
|
161
|
+
)
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
return normalized;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
async function loadProblemEvalPolicy(projectPath = process.cwd(), fileSystem = fs, env = process.env) {
|
|
168
|
+
const policyPath = path.join(projectPath, DEFAULT_POLICY_PATH);
|
|
169
|
+
let payload = {};
|
|
170
|
+
if (await fileSystem.pathExists(policyPath)) {
|
|
171
|
+
try {
|
|
172
|
+
payload = await fileSystem.readJson(policyPath);
|
|
173
|
+
} catch (error) {
|
|
174
|
+
throw new Error(`Failed to read problem-eval policy: ${error.message}`);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const policy = normalizePolicy(payload, env);
|
|
179
|
+
return {
|
|
180
|
+
policy_path: policyPath,
|
|
181
|
+
policy
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function scoreRisk(stage, text, policy, incidentSignals = {}, releaseChannel = '') {
|
|
186
|
+
let score = 0;
|
|
187
|
+
const signals = [];
|
|
188
|
+
const keywords = Array.isArray(policy.high_risk_keywords) ? policy.high_risk_keywords : [];
|
|
189
|
+
let keywordHits = 0;
|
|
190
|
+
for (const keyword of keywords) {
|
|
191
|
+
if (!keyword) {
|
|
192
|
+
continue;
|
|
193
|
+
}
|
|
194
|
+
if (text.includes(keyword)) {
|
|
195
|
+
keywordHits += 1;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
if (keywordHits > 0) {
|
|
199
|
+
const keywordScore = Math.min(30, keywordHits * 6);
|
|
200
|
+
score += keywordScore;
|
|
201
|
+
signals.push(`high-risk-keywords:${keywordHits}`);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (stage === 'release') {
|
|
205
|
+
score += 28;
|
|
206
|
+
signals.push('stage-release');
|
|
207
|
+
} else if (stage === 'verify') {
|
|
208
|
+
score += 18;
|
|
209
|
+
signals.push('stage-verify');
|
|
210
|
+
} else if (stage === 'apply') {
|
|
211
|
+
score += 14;
|
|
212
|
+
signals.push('stage-apply');
|
|
213
|
+
} else if (stage === 'generate') {
|
|
214
|
+
score += 8;
|
|
215
|
+
signals.push('stage-generate');
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (normalizeLowerText(releaseChannel) === 'prod') {
|
|
219
|
+
score += 18;
|
|
220
|
+
signals.push('channel-prod');
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const openIncidents = Number(incidentSignals.open_incident_count || 0);
|
|
224
|
+
const maxAttempts = Number(incidentSignals.max_attempt_count || 0);
|
|
225
|
+
if (openIncidents > 0) {
|
|
226
|
+
score += Math.min(20, openIncidents * 3);
|
|
227
|
+
signals.push(`open-incidents:${openIncidents}`);
|
|
228
|
+
}
|
|
229
|
+
if (maxAttempts >= 3) {
|
|
230
|
+
score += 16;
|
|
231
|
+
signals.push(`repeat-attempts:${maxAttempts}`);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
score = Math.max(0, Math.min(100, Math.round(score)));
|
|
235
|
+
let level = 'low';
|
|
236
|
+
if (score >= 70) {
|
|
237
|
+
level = 'high';
|
|
238
|
+
} else if (score >= 40) {
|
|
239
|
+
level = 'medium';
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return { score, level, signals };
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function scoreEvidence(context = {}, incidentSignals = {}) {
|
|
246
|
+
const signals = [];
|
|
247
|
+
let score = 0;
|
|
248
|
+
const domainChain = context.domain_chain && typeof context.domain_chain === 'object'
|
|
249
|
+
? context.domain_chain
|
|
250
|
+
: {};
|
|
251
|
+
const summary = domainChain.summary && typeof domainChain.summary === 'object'
|
|
252
|
+
? domainChain.summary
|
|
253
|
+
: {};
|
|
254
|
+
|
|
255
|
+
if (domainChain.resolved === true) {
|
|
256
|
+
score += 20;
|
|
257
|
+
signals.push('domain-chain-resolved');
|
|
258
|
+
}
|
|
259
|
+
const decisionSteps = Number(summary.decision_path_steps || 0);
|
|
260
|
+
if (decisionSteps >= 3) {
|
|
261
|
+
score += 15;
|
|
262
|
+
signals.push(`decision-path:${decisionSteps}`);
|
|
263
|
+
} else if (decisionSteps > 0) {
|
|
264
|
+
score += 8;
|
|
265
|
+
signals.push(`decision-path-partial:${decisionSteps}`);
|
|
266
|
+
}
|
|
267
|
+
const verificationGates = Array.isArray(summary.verification_gates) ? summary.verification_gates.length : 0;
|
|
268
|
+
if (verificationGates > 0) {
|
|
269
|
+
score += Math.min(12, verificationGates * 3);
|
|
270
|
+
signals.push(`verification-gates:${verificationGates}`);
|
|
271
|
+
}
|
|
272
|
+
const relatedSpecsCount = Number(context.related_specs_count || 0);
|
|
273
|
+
if (relatedSpecsCount > 0) {
|
|
274
|
+
score += Math.min(15, 8 + relatedSpecsCount);
|
|
275
|
+
signals.push(`related-specs:${relatedSpecsCount}`);
|
|
276
|
+
}
|
|
277
|
+
if (incidentSignals.has_debug_evidence === true) {
|
|
278
|
+
score += 15;
|
|
279
|
+
signals.push('debug-evidence-present');
|
|
280
|
+
}
|
|
281
|
+
const stageReadiness = context.stage_readiness && typeof context.stage_readiness === 'object'
|
|
282
|
+
? context.stage_readiness
|
|
283
|
+
: {};
|
|
284
|
+
if (stageReadiness.prerequisites_ready === true) {
|
|
285
|
+
score += 8;
|
|
286
|
+
signals.push('stage-prerequisites-ready');
|
|
287
|
+
}
|
|
288
|
+
if (stageReadiness.rollback_ready === true) {
|
|
289
|
+
score += 10;
|
|
290
|
+
signals.push('rollback-ready');
|
|
291
|
+
}
|
|
292
|
+
if (stageReadiness.gate_required_ready === true) {
|
|
293
|
+
score += 6;
|
|
294
|
+
signals.push('required-gates-available');
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
score = Math.max(0, Math.min(100, Math.round(score)));
|
|
298
|
+
return { score, signals };
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
function scoreReadiness(context = {}) {
|
|
302
|
+
const signals = [];
|
|
303
|
+
let score = 0;
|
|
304
|
+
const stageReadiness = context.stage_readiness && typeof context.stage_readiness === 'object'
|
|
305
|
+
? context.stage_readiness
|
|
306
|
+
: {};
|
|
307
|
+
|
|
308
|
+
if (normalizeText(context.scene_id)) {
|
|
309
|
+
score += 20;
|
|
310
|
+
signals.push('scene-defined');
|
|
311
|
+
}
|
|
312
|
+
if (normalizeText(context.goal)) {
|
|
313
|
+
score += 10;
|
|
314
|
+
signals.push('goal-defined');
|
|
315
|
+
}
|
|
316
|
+
if (normalizeText(context.spec_id)) {
|
|
317
|
+
score += 10;
|
|
318
|
+
signals.push('spec-bound');
|
|
319
|
+
}
|
|
320
|
+
if (stageReadiness.prerequisites_ready === true) {
|
|
321
|
+
score += 25;
|
|
322
|
+
signals.push('prerequisites-ready');
|
|
323
|
+
}
|
|
324
|
+
if (stageReadiness.patch_bundle_ready === true) {
|
|
325
|
+
score += 15;
|
|
326
|
+
signals.push('patch-bundle-ready');
|
|
327
|
+
}
|
|
328
|
+
if (stageReadiness.verify_report_ready === true) {
|
|
329
|
+
score += 10;
|
|
330
|
+
signals.push('verify-report-ready');
|
|
331
|
+
}
|
|
332
|
+
const gateSignals = context.gate_signals && typeof context.gate_signals === 'object'
|
|
333
|
+
? context.gate_signals
|
|
334
|
+
: {};
|
|
335
|
+
const requiredTotal = Number(gateSignals.required_total || 0);
|
|
336
|
+
const requiredEnabled = Number(gateSignals.required_enabled || 0);
|
|
337
|
+
if (requiredTotal > 0) {
|
|
338
|
+
const ratio = requiredEnabled / requiredTotal;
|
|
339
|
+
score += Math.round(Math.max(0, Math.min(10, ratio * 10)));
|
|
340
|
+
signals.push(`gate-availability:${requiredEnabled}/${requiredTotal}`);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
score = Math.max(0, Math.min(100, Math.round(score)));
|
|
344
|
+
return { score, signals };
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function deriveStrategy(stage, risk, evidence, confidence, incidentSignals = {}, policy = DEFAULT_PROBLEM_EVAL_POLICY) {
|
|
348
|
+
const reasons = [];
|
|
349
|
+
let strategy = 'direct-execution';
|
|
350
|
+
if (Number(incidentSignals.max_attempt_count || 0) >= 3
|
|
351
|
+
&& policy.high_risk_requires_debug_evidence
|
|
352
|
+
&& incidentSignals.has_debug_evidence !== true) {
|
|
353
|
+
strategy = 'debug-first';
|
|
354
|
+
reasons.push('repeated-failures-without-debug-evidence');
|
|
355
|
+
return { strategy, reasons };
|
|
356
|
+
}
|
|
357
|
+
if (risk.level === 'high' && evidence.score < 55) {
|
|
358
|
+
strategy = 'evidence-first';
|
|
359
|
+
reasons.push('high-risk-insufficient-evidence');
|
|
360
|
+
return { strategy, reasons };
|
|
361
|
+
}
|
|
362
|
+
if (confidence < 45) {
|
|
363
|
+
strategy = 'explore-and-validate';
|
|
364
|
+
reasons.push('low-confidence');
|
|
365
|
+
return { strategy, reasons };
|
|
366
|
+
}
|
|
367
|
+
if (stage === 'release' && risk.level !== 'low') {
|
|
368
|
+
strategy = 'controlled-execution';
|
|
369
|
+
reasons.push('release-risk-control');
|
|
370
|
+
return { strategy, reasons };
|
|
371
|
+
}
|
|
372
|
+
reasons.push('confidence-sufficient');
|
|
373
|
+
return { strategy, reasons };
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
function evaluateProblemContext(context = {}, policy = DEFAULT_PROBLEM_EVAL_POLICY) {
|
|
377
|
+
const stage = normalizeLowerText(context.stage);
|
|
378
|
+
if (!STUDIO_STAGES.includes(stage)) {
|
|
379
|
+
throw new Error(`Unsupported problem-eval stage: ${context.stage || 'unknown'}`);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
const textForRisk = [
|
|
383
|
+
normalizeLowerText(context.goal),
|
|
384
|
+
normalizeLowerText(context.scene_id),
|
|
385
|
+
normalizeLowerText(context.spec_id),
|
|
386
|
+
normalizeLowerText(context?.domain_chain?.reason),
|
|
387
|
+
normalizeLowerText(context.release_channel)
|
|
388
|
+
].join(' ');
|
|
389
|
+
|
|
390
|
+
const incidentSignals = context.incident_signals && typeof context.incident_signals === 'object'
|
|
391
|
+
? context.incident_signals
|
|
392
|
+
: {};
|
|
393
|
+
const risk = scoreRisk(stage, textForRisk, policy, incidentSignals, context.release_channel);
|
|
394
|
+
const evidence = scoreEvidence(context, incidentSignals);
|
|
395
|
+
const readiness = scoreReadiness(context);
|
|
396
|
+
const confidenceScore = Math.max(0, Math.min(100, Math.round(
|
|
397
|
+
evidence.score * 0.45 + readiness.score * 0.35 + (100 - risk.score) * 0.20
|
|
398
|
+
)));
|
|
399
|
+
|
|
400
|
+
const minConfidence = Number(policy?.min_confidence_by_stage?.[stage] || 0);
|
|
401
|
+
const strategy = deriveStrategy(stage, risk, evidence, confidenceScore, incidentSignals, policy);
|
|
402
|
+
const blockers = [];
|
|
403
|
+
const warnings = [];
|
|
404
|
+
|
|
405
|
+
const enforced = policy.enabled === true && Array.isArray(policy.enforce_on_stages) && policy.enforce_on_stages.includes(stage);
|
|
406
|
+
const blockStage = Array.isArray(policy.block_on_stages) && policy.block_on_stages.includes(stage);
|
|
407
|
+
const advisoryMode = policy.mode === 'advisory';
|
|
408
|
+
|
|
409
|
+
if (confidenceScore < minConfidence) {
|
|
410
|
+
warnings.push(`confidence ${confidenceScore} below threshold ${minConfidence}`);
|
|
411
|
+
if (blockStage) {
|
|
412
|
+
blockers.push(`confidence-too-low:${confidenceScore}<${minConfidence}`);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
if (policy.high_risk_requires_debug_evidence
|
|
417
|
+
&& risk.level === 'high'
|
|
418
|
+
&& Number(incidentSignals.max_attempt_count || 0) >= 3
|
|
419
|
+
&& incidentSignals.has_debug_evidence !== true) {
|
|
420
|
+
warnings.push('high risk with repeated failed attempts and no debug evidence');
|
|
421
|
+
if (blockStage) {
|
|
422
|
+
blockers.push('missing-debug-evidence-after-repeated-failures');
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
if (evidence.score < 35) {
|
|
427
|
+
warnings.push(`evidence score ${evidence.score} is low`);
|
|
428
|
+
if (blockStage && risk.level === 'high') {
|
|
429
|
+
blockers.push(`high-risk-low-evidence:${evidence.score}`);
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
const recommendations = [];
|
|
434
|
+
if (strategy.strategy === 'debug-first') {
|
|
435
|
+
recommendations.push('Capture debug trace/log evidence before the next patch attempt.');
|
|
436
|
+
}
|
|
437
|
+
if (strategy.strategy === 'evidence-first' || evidence.score < 45) {
|
|
438
|
+
recommendations.push('Refresh domain artifacts and verify ontology coverage before execution.');
|
|
439
|
+
recommendations.push('Load related historical specs and compare successful remediation paths.');
|
|
440
|
+
}
|
|
441
|
+
if (risk.level !== 'low') {
|
|
442
|
+
recommendations.push('Prefer guarded execution with rollback checkpoints and release gates enabled.');
|
|
443
|
+
}
|
|
444
|
+
if (Number(incidentSignals.open_incident_count || 0) > 0) {
|
|
445
|
+
recommendations.push('Review staging incident attempts to avoid repeating failed actions.');
|
|
446
|
+
}
|
|
447
|
+
if (recommendations.length === 0) {
|
|
448
|
+
recommendations.push('Proceed with direct execution and keep gate verification enabled.');
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
const cappedRecommendations = recommendations.slice(0, policy.recommendation_limit || 6);
|
|
452
|
+
const blocked = enforced && blockStage && !advisoryMode && blockers.length > 0;
|
|
453
|
+
|
|
454
|
+
return {
|
|
455
|
+
mode: 'problem-eval',
|
|
456
|
+
api_version: PROBLEM_EVAL_API_VERSION,
|
|
457
|
+
generated_at: new Date().toISOString(),
|
|
458
|
+
stage,
|
|
459
|
+
scene_id: normalizeText(context.scene_id),
|
|
460
|
+
spec_id: normalizeText(context.spec_id),
|
|
461
|
+
job_id: normalizeText(context.job_id),
|
|
462
|
+
policy: {
|
|
463
|
+
enabled: policy.enabled === true,
|
|
464
|
+
mode: policy.mode,
|
|
465
|
+
enforced,
|
|
466
|
+
block_stage: blockStage,
|
|
467
|
+
min_confidence: minConfidence
|
|
468
|
+
},
|
|
469
|
+
dimensions: {
|
|
470
|
+
risk,
|
|
471
|
+
evidence,
|
|
472
|
+
readiness,
|
|
473
|
+
strategy
|
|
474
|
+
},
|
|
475
|
+
incident_signals: {
|
|
476
|
+
...incidentSignals
|
|
477
|
+
},
|
|
478
|
+
confidence_score: confidenceScore,
|
|
479
|
+
warnings,
|
|
480
|
+
blockers,
|
|
481
|
+
recommendations: cappedRecommendations,
|
|
482
|
+
passed: !blocked,
|
|
483
|
+
blocked
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
function isIncidentRelevantToContext(incident = {}, context = {}) {
|
|
488
|
+
const wantedSpecId = normalizeText(context.spec_id);
|
|
489
|
+
const wantedSceneId = normalizeText(context.scene_id);
|
|
490
|
+
const wantedGoal = normalizeLowerText(context.goal);
|
|
491
|
+
if (!wantedSpecId && !wantedSceneId && !wantedGoal) {
|
|
492
|
+
return true;
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
const title = normalizeLowerText(incident.title);
|
|
496
|
+
const symptom = normalizeLowerText(incident.symptom);
|
|
497
|
+
const matchesGoal = wantedGoal && (title.includes(wantedGoal) || symptom.includes(wantedGoal));
|
|
498
|
+
const matchesSpec = wantedSpecId
|
|
499
|
+
&& Array.isArray(incident.attempts)
|
|
500
|
+
&& incident.attempts.some((attempt) => normalizeText(attempt?.source?.spec) === wantedSpecId);
|
|
501
|
+
const matchesScene = wantedSceneId
|
|
502
|
+
&& (title.includes(wantedSceneId.toLowerCase()) || symptom.includes(wantedSceneId.toLowerCase()));
|
|
503
|
+
return Boolean(matchesSpec || matchesScene || matchesGoal);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
async function collectIncidentSignals(projectPath = process.cwd(), context = {}, fileSystem = fs) {
|
|
507
|
+
const indexPath = path.join(projectPath, '.sce', 'errorbook', 'staging', 'index.json');
|
|
508
|
+
if (!await fileSystem.pathExists(indexPath)) {
|
|
509
|
+
return {
|
|
510
|
+
has_staging_data: false,
|
|
511
|
+
total_incident_count: 0,
|
|
512
|
+
open_incident_count: 0,
|
|
513
|
+
resolved_incident_count: 0,
|
|
514
|
+
relevant_incident_count: 0,
|
|
515
|
+
max_attempt_count: 0,
|
|
516
|
+
has_debug_evidence: false
|
|
517
|
+
};
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
const indexPayload = await fileSystem.readJson(indexPath).catch(() => null);
|
|
521
|
+
if (!indexPayload || !Array.isArray(indexPayload.incidents)) {
|
|
522
|
+
return {
|
|
523
|
+
has_staging_data: true,
|
|
524
|
+
total_incident_count: 0,
|
|
525
|
+
open_incident_count: 0,
|
|
526
|
+
resolved_incident_count: 0,
|
|
527
|
+
relevant_incident_count: 0,
|
|
528
|
+
max_attempt_count: 0,
|
|
529
|
+
has_debug_evidence: false
|
|
530
|
+
};
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
const incidentsDir = path.join(projectPath, '.sce', 'errorbook', 'staging', 'incidents');
|
|
534
|
+
let relevantCount = 0;
|
|
535
|
+
let maxAttemptCount = 0;
|
|
536
|
+
let hasDebugEvidence = false;
|
|
537
|
+
|
|
538
|
+
for (const summary of indexPayload.incidents.slice(0, 200)) {
|
|
539
|
+
const incidentId = normalizeText(summary.id);
|
|
540
|
+
if (!incidentId) {
|
|
541
|
+
continue;
|
|
542
|
+
}
|
|
543
|
+
const incidentPath = path.join(incidentsDir, `${incidentId}.json`);
|
|
544
|
+
if (!await fileSystem.pathExists(incidentPath)) {
|
|
545
|
+
continue;
|
|
546
|
+
}
|
|
547
|
+
const incident = await fileSystem.readJson(incidentPath).catch(() => null);
|
|
548
|
+
if (!incident || !isIncidentRelevantToContext(incident, context)) {
|
|
549
|
+
continue;
|
|
550
|
+
}
|
|
551
|
+
relevantCount += 1;
|
|
552
|
+
const attemptCount = Number(incident.attempt_count || (Array.isArray(incident.attempts) ? incident.attempts.length : 0) || 0);
|
|
553
|
+
if (attemptCount > maxAttemptCount) {
|
|
554
|
+
maxAttemptCount = attemptCount;
|
|
555
|
+
}
|
|
556
|
+
if (Array.isArray(incident.attempts) && incident.attempts.some((attempt) => hasDebugEvidenceInAttempt(attempt))) {
|
|
557
|
+
hasDebugEvidence = true;
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
return {
|
|
562
|
+
has_staging_data: true,
|
|
563
|
+
total_incident_count: indexPayload.incidents.length,
|
|
564
|
+
open_incident_count: indexPayload.incidents.filter((item) => normalizeIncidentState(item.state, 'open') === 'open').length,
|
|
565
|
+
resolved_incident_count: indexPayload.incidents.filter((item) => normalizeIncidentState(item.state, 'open') === 'resolved').length,
|
|
566
|
+
relevant_incident_count: relevantCount,
|
|
567
|
+
max_attempt_count: maxAttemptCount,
|
|
568
|
+
has_debug_evidence: hasDebugEvidence
|
|
569
|
+
};
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
function toRelativePosix(projectPath, absolutePath) {
|
|
573
|
+
return path.relative(projectPath, absolutePath).replace(/\\/g, '/');
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
function sanitizeSegment(value, fallback = 'adhoc') {
|
|
577
|
+
const normalized = normalizeText(value);
|
|
578
|
+
if (!normalized) {
|
|
579
|
+
return fallback;
|
|
580
|
+
}
|
|
581
|
+
return normalized.replace(/[^a-zA-Z0-9._-]+/g, '-').replace(/^-+|-+$/g, '') || fallback;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
async function runProblemEvaluation(context = {}, dependencies = {}) {
|
|
585
|
+
const projectPath = dependencies.projectPath || process.cwd();
|
|
586
|
+
const fileSystem = dependencies.fileSystem || fs;
|
|
587
|
+
const env = dependencies.env || process.env;
|
|
588
|
+
const writeReport = dependencies.writeReport !== false;
|
|
589
|
+
const policyBundle = dependencies.policyBundle || await loadProblemEvalPolicy(projectPath, fileSystem, env);
|
|
590
|
+
const policy = policyBundle.policy;
|
|
591
|
+
const incidentSignals = context.incident_signals || await collectIncidentSignals(projectPath, context, fileSystem);
|
|
592
|
+
const report = evaluateProblemContext({
|
|
593
|
+
...context,
|
|
594
|
+
incident_signals: incidentSignals
|
|
595
|
+
}, policy);
|
|
596
|
+
|
|
597
|
+
if (writeReport) {
|
|
598
|
+
const reportDir = path.join(projectPath, DEFAULT_REPORT_DIR);
|
|
599
|
+
const stage = sanitizeSegment(report.stage, 'stage');
|
|
600
|
+
const jobId = sanitizeSegment(report.job_id, `adhoc-${Date.now()}`);
|
|
601
|
+
const reportPath = path.join(reportDir, `${jobId}-${stage}.json`);
|
|
602
|
+
await fileSystem.ensureDir(path.dirname(reportPath));
|
|
603
|
+
await fileSystem.writeJson(reportPath, report, { spaces: 2 });
|
|
604
|
+
report.report_file = toRelativePosix(projectPath, reportPath);
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
return report;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
module.exports = {
|
|
611
|
+
PROBLEM_EVAL_API_VERSION,
|
|
612
|
+
DEFAULT_POLICY_PATH,
|
|
613
|
+
DEFAULT_REPORT_DIR,
|
|
614
|
+
DEFAULT_PROBLEM_EVAL_POLICY,
|
|
615
|
+
normalizePolicy,
|
|
616
|
+
loadProblemEvalPolicy,
|
|
617
|
+
collectIncidentSignals,
|
|
618
|
+
evaluateProblemContext,
|
|
619
|
+
runProblemEvaluation
|
|
620
|
+
};
|