scene-capability-engine 3.3.26 → 3.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1035 @@
1
+ const path = require('path');
2
+ const fs = require('fs-extra');
3
+
4
+ const PROBLEM_EVAL_API_VERSION = 'sce.problem-eval/v0.1';
5
+ const DEFAULT_POLICY_PATH = '.sce/config/problem-eval-policy.json';
6
+ const DEFAULT_REPORT_DIR = '.sce/reports/problem-eval';
7
+ const STUDIO_STAGES = Object.freeze(['plan', 'generate', 'apply', 'verify', 'release']);
8
+ const DEBUG_EVIDENCE_TAGS = Object.freeze(['debug-evidence', 'diagnostic-evidence', 'debug-log']);
9
+ const ONTOLOGY_AXES = Object.freeze(['entity', 'relation', 'business_rule', 'decision_policy', 'execution_flow']);
10
+
11
+ const DEFAULT_PROBLEM_EVAL_POLICY = Object.freeze({
12
+ schema_version: '1.0',
13
+ enabled: true,
14
+ mode: 'required',
15
+ enforce_on_stages: [...STUDIO_STAGES],
16
+ block_on_stages: ['apply', 'release'],
17
+ min_confidence_by_stage: {
18
+ plan: 20,
19
+ generate: 25,
20
+ apply: 30,
21
+ verify: 35,
22
+ release: 40
23
+ },
24
+ high_risk_requires_debug_evidence: true,
25
+ high_risk_keywords: [
26
+ 'auth',
27
+ 'payment',
28
+ 'security',
29
+ 'delete',
30
+ 'rollback',
31
+ 'production',
32
+ 'migrate',
33
+ 'compliance',
34
+ 'data-loss'
35
+ ],
36
+ recommendation_limit: 6,
37
+ max_failed_rounds_before_debug: 2,
38
+ problem_contract_required_stages: [...STUDIO_STAGES],
39
+ problem_contract_block_stages: ['plan', 'apply', 'release'],
40
+ ontology_alignment_required_stages: [...STUDIO_STAGES],
41
+ ontology_alignment_block_stages: ['apply', 'release'],
42
+ ontology_required_axes: [...ONTOLOGY_AXES],
43
+ require_ontology_evidence_binding: true,
44
+ ontology_evidence_min_bindings: 1,
45
+ convergence_required_stages: ['verify', 'release'],
46
+ convergence_block_stages: ['release'],
47
+ release_block_on_high_alerts: true,
48
+ release_require_governance_report: false
49
+ });
50
+
51
+ function normalizeText(value) {
52
+ if (typeof value !== 'string') {
53
+ return '';
54
+ }
55
+ return value.trim();
56
+ }
57
+
58
+ function normalizeLowerText(value) {
59
+ return normalizeText(value).toLowerCase();
60
+ }
61
+
62
+ function normalizeBoolean(value, fallback = false) {
63
+ if (typeof value === 'boolean') {
64
+ return value;
65
+ }
66
+ const normalized = normalizeLowerText(`${value || ''}`);
67
+ if (!normalized) {
68
+ return fallback;
69
+ }
70
+ if (['1', 'true', 'yes', 'y', 'on'].includes(normalized)) {
71
+ return true;
72
+ }
73
+ if (['0', 'false', 'no', 'n', 'off'].includes(normalized)) {
74
+ return false;
75
+ }
76
+ return fallback;
77
+ }
78
+
79
+ function normalizeInteger(value, fallback = 0, min = 0, max = Number.MAX_SAFE_INTEGER) {
80
+ const parsed = Number.parseInt(`${value}`, 10);
81
+ if (!Number.isFinite(parsed)) {
82
+ return fallback;
83
+ }
84
+ if (parsed < min) {
85
+ return min;
86
+ }
87
+ if (parsed > max) {
88
+ return max;
89
+ }
90
+ return parsed;
91
+ }
92
+
93
+ function normalizeArray(value = []) {
94
+ if (!Array.isArray(value)) {
95
+ return [];
96
+ }
97
+ return value.map((item) => normalizeText(item)).filter(Boolean);
98
+ }
99
+
100
+ function normalizeTextList(value = [], limit = 20) {
101
+ if (!Array.isArray(value)) {
102
+ return [];
103
+ }
104
+ return value
105
+ .map((item) => {
106
+ if (typeof item === 'string') {
107
+ return normalizeText(item);
108
+ }
109
+ if (item && typeof item === 'object') {
110
+ return normalizeText(item.step || item.description || item.id || item.name || '');
111
+ }
112
+ return '';
113
+ })
114
+ .filter(Boolean)
115
+ .slice(0, limit);
116
+ }
117
+
118
+ function normalizeStageArray(value, fallback = []) {
119
+ const candidates = normalizeArray(value).map((item) => item.toLowerCase());
120
+ const filtered = candidates.filter((item, index) => STUDIO_STAGES.includes(item) && candidates.indexOf(item) === index);
121
+ return filtered.length > 0 ? filtered : [...fallback];
122
+ }
123
+
124
+ function normalizeOntologyAxisArray(value, fallback = []) {
125
+ const candidates = normalizeArray(value).map((item) => item.toLowerCase());
126
+ const filtered = candidates.filter((item, index) => ONTOLOGY_AXES.includes(item) && candidates.indexOf(item) === index);
127
+ return filtered.length > 0 ? filtered : [...fallback];
128
+ }
129
+
130
+ function normalizeIncidentState(value, fallback = 'open') {
131
+ const normalized = normalizeLowerText(value);
132
+ if (!normalized) {
133
+ return fallback;
134
+ }
135
+ if (normalized === 'open' || normalized === 'resolved') {
136
+ return normalized;
137
+ }
138
+ return fallback;
139
+ }
140
+
141
+ function hasDebugEvidenceInAttempt(attempt = {}) {
142
+ const tags = normalizeArray(attempt.tags).map((item) => item.toLowerCase());
143
+ if (tags.some((tag) => DEBUG_EVIDENCE_TAGS.includes(tag))) {
144
+ return true;
145
+ }
146
+ const verification = normalizeArray(attempt.verification_evidence);
147
+ if (verification.some((item) => /^debug:/i.test(item))) {
148
+ return true;
149
+ }
150
+ const notes = normalizeLowerText(attempt.notes);
151
+ if (notes && /(debug|trace|diagnostic|observability|telemetry|日志|埋点|观测)/i.test(notes)) {
152
+ return true;
153
+ }
154
+ return false;
155
+ }
156
+
157
+ function normalizePolicy(policy = {}, env = process.env) {
158
+ const envMode = normalizeLowerText(env.SCE_PROBLEM_EVAL_MODE);
159
+ const envDisabled = normalizeBoolean(env.SCE_PROBLEM_EVAL_DISABLED, false);
160
+ const mode = envMode === 'off' || envMode === 'advisory' || envMode === 'required'
161
+ ? envMode
162
+ : normalizeLowerText(policy.mode) || DEFAULT_PROBLEM_EVAL_POLICY.mode;
163
+ const enabled = envDisabled
164
+ ? false
165
+ : mode === 'off'
166
+ ? false
167
+ : normalizeBoolean(policy.enabled, DEFAULT_PROBLEM_EVAL_POLICY.enabled);
168
+ const minByStage = {
169
+ ...DEFAULT_PROBLEM_EVAL_POLICY.min_confidence_by_stage,
170
+ ...(policy.min_confidence_by_stage && typeof policy.min_confidence_by_stage === 'object'
171
+ ? policy.min_confidence_by_stage
172
+ : {})
173
+ };
174
+
175
+ const normalized = {
176
+ schema_version: normalizeText(policy.schema_version) || DEFAULT_PROBLEM_EVAL_POLICY.schema_version,
177
+ enabled,
178
+ mode: mode || DEFAULT_PROBLEM_EVAL_POLICY.mode,
179
+ enforce_on_stages: normalizeStageArray(
180
+ policy.enforce_on_stages,
181
+ DEFAULT_PROBLEM_EVAL_POLICY.enforce_on_stages
182
+ ),
183
+ block_on_stages: normalizeStageArray(
184
+ policy.block_on_stages,
185
+ DEFAULT_PROBLEM_EVAL_POLICY.block_on_stages
186
+ ),
187
+ min_confidence_by_stage: {
188
+ plan: normalizeInteger(minByStage.plan, DEFAULT_PROBLEM_EVAL_POLICY.min_confidence_by_stage.plan, 0, 100),
189
+ generate: normalizeInteger(minByStage.generate, DEFAULT_PROBLEM_EVAL_POLICY.min_confidence_by_stage.generate, 0, 100),
190
+ apply: normalizeInteger(minByStage.apply, DEFAULT_PROBLEM_EVAL_POLICY.min_confidence_by_stage.apply, 0, 100),
191
+ verify: normalizeInteger(minByStage.verify, DEFAULT_PROBLEM_EVAL_POLICY.min_confidence_by_stage.verify, 0, 100),
192
+ release: normalizeInteger(minByStage.release, DEFAULT_PROBLEM_EVAL_POLICY.min_confidence_by_stage.release, 0, 100)
193
+ },
194
+ high_risk_requires_debug_evidence: normalizeBoolean(
195
+ policy.high_risk_requires_debug_evidence,
196
+ DEFAULT_PROBLEM_EVAL_POLICY.high_risk_requires_debug_evidence
197
+ ),
198
+ high_risk_keywords: normalizeArray(policy.high_risk_keywords).length > 0
199
+ ? normalizeArray(policy.high_risk_keywords).map((item) => item.toLowerCase())
200
+ : [...DEFAULT_PROBLEM_EVAL_POLICY.high_risk_keywords],
201
+ recommendation_limit: normalizeInteger(
202
+ policy.recommendation_limit,
203
+ DEFAULT_PROBLEM_EVAL_POLICY.recommendation_limit,
204
+ 1,
205
+ 20
206
+ ),
207
+ max_failed_rounds_before_debug: normalizeInteger(
208
+ policy.max_failed_rounds_before_debug,
209
+ DEFAULT_PROBLEM_EVAL_POLICY.max_failed_rounds_before_debug,
210
+ 1,
211
+ 10
212
+ ),
213
+ problem_contract_required_stages: normalizeStageArray(
214
+ policy.problem_contract_required_stages,
215
+ DEFAULT_PROBLEM_EVAL_POLICY.problem_contract_required_stages
216
+ ),
217
+ problem_contract_block_stages: normalizeStageArray(
218
+ policy.problem_contract_block_stages,
219
+ DEFAULT_PROBLEM_EVAL_POLICY.problem_contract_block_stages
220
+ ),
221
+ ontology_alignment_required_stages: normalizeStageArray(
222
+ policy.ontology_alignment_required_stages,
223
+ DEFAULT_PROBLEM_EVAL_POLICY.ontology_alignment_required_stages
224
+ ),
225
+ ontology_alignment_block_stages: normalizeStageArray(
226
+ policy.ontology_alignment_block_stages,
227
+ DEFAULT_PROBLEM_EVAL_POLICY.ontology_alignment_block_stages
228
+ ),
229
+ ontology_required_axes: normalizeOntologyAxisArray(
230
+ policy.ontology_required_axes,
231
+ DEFAULT_PROBLEM_EVAL_POLICY.ontology_required_axes
232
+ ),
233
+ require_ontology_evidence_binding: normalizeBoolean(
234
+ policy.require_ontology_evidence_binding,
235
+ DEFAULT_PROBLEM_EVAL_POLICY.require_ontology_evidence_binding
236
+ ),
237
+ ontology_evidence_min_bindings: normalizeInteger(
238
+ policy.ontology_evidence_min_bindings,
239
+ DEFAULT_PROBLEM_EVAL_POLICY.ontology_evidence_min_bindings,
240
+ 0,
241
+ 20
242
+ ),
243
+ convergence_required_stages: normalizeStageArray(
244
+ policy.convergence_required_stages,
245
+ DEFAULT_PROBLEM_EVAL_POLICY.convergence_required_stages
246
+ ),
247
+ convergence_block_stages: normalizeStageArray(
248
+ policy.convergence_block_stages,
249
+ DEFAULT_PROBLEM_EVAL_POLICY.convergence_block_stages
250
+ ),
251
+ release_block_on_high_alerts: normalizeBoolean(
252
+ policy.release_block_on_high_alerts,
253
+ DEFAULT_PROBLEM_EVAL_POLICY.release_block_on_high_alerts
254
+ ),
255
+ release_require_governance_report: normalizeBoolean(
256
+ policy.release_require_governance_report,
257
+ DEFAULT_PROBLEM_EVAL_POLICY.release_require_governance_report
258
+ )
259
+ };
260
+
261
+ return normalized;
262
+ }
263
+
264
+ async function loadProblemEvalPolicy(projectPath = process.cwd(), fileSystem = fs, env = process.env) {
265
+ const policyPath = path.join(projectPath, DEFAULT_POLICY_PATH);
266
+ let payload = {};
267
+ if (await fileSystem.pathExists(policyPath)) {
268
+ try {
269
+ payload = await fileSystem.readJson(policyPath);
270
+ } catch (error) {
271
+ throw new Error(`Failed to read problem-eval policy: ${error.message}`);
272
+ }
273
+ }
274
+
275
+ const policy = normalizePolicy(payload, env);
276
+ return {
277
+ policy_path: policyPath,
278
+ policy
279
+ };
280
+ }
281
+
282
+ function scoreRisk(stage, text, policy, incidentSignals = {}, releaseChannel = '') {
283
+ let score = 0;
284
+ const signals = [];
285
+ const keywords = Array.isArray(policy.high_risk_keywords) ? policy.high_risk_keywords : [];
286
+ let keywordHits = 0;
287
+ for (const keyword of keywords) {
288
+ if (!keyword) {
289
+ continue;
290
+ }
291
+ if (text.includes(keyword)) {
292
+ keywordHits += 1;
293
+ }
294
+ }
295
+ if (keywordHits > 0) {
296
+ const keywordScore = Math.min(30, keywordHits * 6);
297
+ score += keywordScore;
298
+ signals.push(`high-risk-keywords:${keywordHits}`);
299
+ }
300
+
301
+ if (stage === 'release') {
302
+ score += 28;
303
+ signals.push('stage-release');
304
+ } else if (stage === 'verify') {
305
+ score += 18;
306
+ signals.push('stage-verify');
307
+ } else if (stage === 'apply') {
308
+ score += 14;
309
+ signals.push('stage-apply');
310
+ } else if (stage === 'generate') {
311
+ score += 8;
312
+ signals.push('stage-generate');
313
+ }
314
+
315
+ if (normalizeLowerText(releaseChannel) === 'prod') {
316
+ score += 18;
317
+ signals.push('channel-prod');
318
+ }
319
+
320
+ const openIncidents = Number(incidentSignals.open_incident_count || 0);
321
+ const maxAttempts = Number(incidentSignals.max_attempt_count || 0);
322
+ if (openIncidents > 0) {
323
+ score += Math.min(20, openIncidents * 3);
324
+ signals.push(`open-incidents:${openIncidents}`);
325
+ }
326
+ const debugRoundThreshold = Number(policy.max_failed_rounds_before_debug || 2) + 1;
327
+ if (maxAttempts >= debugRoundThreshold) {
328
+ score += 16;
329
+ signals.push(`repeat-attempts:${maxAttempts}`);
330
+ }
331
+
332
+ score = Math.max(0, Math.min(100, Math.round(score)));
333
+ let level = 'low';
334
+ if (score >= 70) {
335
+ level = 'high';
336
+ } else if (score >= 40) {
337
+ level = 'medium';
338
+ }
339
+
340
+ return { score, level, signals };
341
+ }
342
+
343
+ function stageInPolicy(stage, list = []) {
344
+ return Array.isArray(list) && list.includes(stage);
345
+ }
346
+
347
+ function countOntologyEvidenceBindings(domainChain = {}, summary = {}) {
348
+ const summaryCount = Number(summary.evidence_binding_count || 0);
349
+ if (Number.isFinite(summaryCount) && summaryCount > 0) {
350
+ return summaryCount;
351
+ }
352
+
353
+ let total = 0;
354
+ const explicitBindings = Array.isArray(domainChain.evidence_bindings)
355
+ ? domainChain.evidence_bindings.length
356
+ : 0;
357
+ total += explicitBindings;
358
+
359
+ const ontologyEvidence = domainChain.ontology_evidence && typeof domainChain.ontology_evidence === 'object'
360
+ ? domainChain.ontology_evidence
361
+ : {};
362
+ for (const axis of ONTOLOGY_AXES) {
363
+ total += normalizeTextList(ontologyEvidence[axis], 50).length;
364
+ }
365
+
366
+ const hypotheses = Array.isArray(domainChain.hypotheses) ? domainChain.hypotheses : [];
367
+ for (const hypothesis of hypotheses.slice(0, 30)) {
368
+ total += normalizeTextList(hypothesis && hypothesis.evidence, 20).length;
369
+ }
370
+
371
+ return total;
372
+ }
373
+
374
+ function extractDomainChainSummary(domainChain = {}) {
375
+ const summary = domainChain.summary && typeof domainChain.summary === 'object'
376
+ ? domainChain.summary
377
+ : {};
378
+ const payloadOntology = domainChain.ontology && typeof domainChain.ontology === 'object'
379
+ ? domainChain.ontology
380
+ : {};
381
+
382
+ const fallbackCounts = {
383
+ entity: normalizeTextList(payloadOntology.entity, 50).length,
384
+ relation: normalizeTextList(payloadOntology.relation, 50).length,
385
+ business_rule: normalizeTextList(payloadOntology.business_rule, 50).length,
386
+ decision_policy: normalizeTextList(payloadOntology.decision_policy, 50).length,
387
+ execution_flow: normalizeTextList(payloadOntology.execution_flow, 50).length
388
+ };
389
+
390
+ const summaryCounts = summary.ontology_counts && typeof summary.ontology_counts === 'object'
391
+ ? summary.ontology_counts
392
+ : {};
393
+ const ontologyCounts = {
394
+ entity: Number(summaryCounts.entity || fallbackCounts.entity || 0),
395
+ relation: Number(summaryCounts.relation || fallbackCounts.relation || 0),
396
+ business_rule: Number(summaryCounts.business_rule || fallbackCounts.business_rule || 0),
397
+ decision_policy: Number(summaryCounts.decision_policy || fallbackCounts.decision_policy || 0),
398
+ execution_flow: Number(summaryCounts.execution_flow || fallbackCounts.execution_flow || 0)
399
+ };
400
+
401
+ return {
402
+ ontology_counts: ontologyCounts,
403
+ decision_path_steps: Number(
404
+ summary.decision_path_steps
405
+ || (Array.isArray(domainChain.decision_execution_path) ? domainChain.decision_execution_path.length : 0)
406
+ || 0
407
+ ),
408
+ verification_gates: Array.isArray(summary.verification_gates)
409
+ ? summary.verification_gates
410
+ : (domainChain?.verification?.gates && Array.isArray(domainChain.verification.gates) ? domainChain.verification.gates : []),
411
+ hypothesis_count: Number(summary.hypothesis_count || (Array.isArray(domainChain.hypotheses) ? domainChain.hypotheses.length : 0) || 0),
412
+ risk_count: Number(summary.risk_count || (Array.isArray(domainChain.risks) ? domainChain.risks.length : 0) || 0),
413
+ evidence_binding_count: countOntologyEvidenceBindings(domainChain, summary)
414
+ };
415
+ }
416
+
417
+ function deriveProblemContract(context = {}) {
418
+ const domainChain = context.domain_chain && typeof context.domain_chain === 'object'
419
+ ? context.domain_chain
420
+ : {};
421
+ const chainContext = domainChain.context && typeof domainChain.context === 'object'
422
+ ? domainChain.context
423
+ : {};
424
+ const contractRaw = context.problem_contract && typeof context.problem_contract === 'object'
425
+ ? context.problem_contract
426
+ : {};
427
+
428
+ const issueStatement = normalizeText(
429
+ contractRaw.issue_statement
430
+ || contractRaw.issue
431
+ || contractRaw.problem_statement
432
+ || chainContext?.problem?.statement
433
+ || domainChain?.problem?.statement
434
+ || context.goal
435
+ || (context.scene_id ? `Stabilize scene ${normalizeText(context.scene_id)} execution.` : '')
436
+ );
437
+ const expectedOutcome = normalizeText(
438
+ contractRaw.expected_outcome
439
+ || contractRaw.expected
440
+ || contractRaw.success_criteria
441
+ || chainContext?.verification?.plan
442
+ || domainChain?.verification?.plan
443
+ || (context.scene_id ? `Scene ${normalizeText(context.scene_id)} reaches deterministic verification gates.` : '')
444
+ );
445
+ const reproductionStepsRaw = normalizeTextList(
446
+ contractRaw.reproduction_steps
447
+ || contractRaw.repro_steps
448
+ || contractRaw.steps,
449
+ 20
450
+ );
451
+ const reproductionSteps = reproductionStepsRaw.length > 0
452
+ ? reproductionStepsRaw
453
+ : [
454
+ normalizeText(context.goal) || 'Reproduce the reported failure path in the target scene.',
455
+ 'Capture execution trace and gate evidence for the failing path.'
456
+ ].filter(Boolean);
457
+ const impactScope = normalizeText(
458
+ contractRaw.impact_scope
459
+ || contractRaw.scope
460
+ || chainContext?.problem?.scope
461
+ || domainChain?.problem?.scope
462
+ || context.scene_id
463
+ );
464
+ const forbiddenWorkaroundsRaw = normalizeTextList(
465
+ contractRaw.forbidden_workarounds
466
+ || contractRaw.prohibited_workarounds
467
+ || contractRaw.disallowed_workarounds,
468
+ 20
469
+ );
470
+ const forbiddenWorkarounds = forbiddenWorkaroundsRaw.length > 0
471
+ ? forbiddenWorkaroundsRaw
472
+ : [
473
+ 'Do not bypass mandatory gates or tests.',
474
+ 'Do not silence errors without root-cause remediation.'
475
+ ];
476
+ return {
477
+ issue_statement: issueStatement,
478
+ expected_outcome: expectedOutcome,
479
+ reproduction_steps: reproductionSteps,
480
+ impact_scope: impactScope,
481
+ forbidden_workarounds: forbiddenWorkarounds
482
+ };
483
+ }
484
+
485
+ function evaluateProblemContract(context = {}) {
486
+ const contract = deriveProblemContract(context);
487
+ const checks = {
488
+ issue_statement: normalizeText(contract.issue_statement).length > 0,
489
+ expected_outcome: normalizeText(contract.expected_outcome).length > 0,
490
+ reproduction_steps: Array.isArray(contract.reproduction_steps) && contract.reproduction_steps.length > 0,
491
+ impact_scope: normalizeText(contract.impact_scope).length > 0,
492
+ forbidden_workarounds: Array.isArray(contract.forbidden_workarounds) && contract.forbidden_workarounds.length > 0
493
+ };
494
+ const total = Object.keys(checks).length;
495
+ const covered = Object.values(checks).filter(Boolean).length;
496
+ return {
497
+ contract,
498
+ checks,
499
+ total,
500
+ covered,
501
+ missing: Object.keys(checks).filter((key) => !checks[key]),
502
+ score: Math.round((covered / total) * 100),
503
+ passed: covered === total
504
+ };
505
+ }
506
+
507
+ function evaluateOntologyAlignment(context = {}, policy = DEFAULT_PROBLEM_EVAL_POLICY) {
508
+ const domainChain = context.domain_chain && typeof context.domain_chain === 'object'
509
+ ? context.domain_chain
510
+ : {};
511
+ const summary = extractDomainChainSummary(domainChain);
512
+ const requiredAxes = Array.isArray(policy.ontology_required_axes) && policy.ontology_required_axes.length > 0
513
+ ? policy.ontology_required_axes
514
+ : [...ONTOLOGY_AXES];
515
+ const missingAxes = requiredAxes.filter((axis) => Number(summary?.ontology_counts?.[axis] || 0) <= 0);
516
+ const evidenceBindingCount = Number(summary.evidence_binding_count || 0);
517
+ const minBindings = Number(policy.ontology_evidence_min_bindings || 0);
518
+ const hasDomainMaterial = domainChain.resolved === true
519
+ || ONTOLOGY_AXES.some((axis) => Number(summary?.ontology_counts?.[axis] || 0) > 0)
520
+ || normalizeText(context.spec_id).length > 0;
521
+ if (!hasDomainMaterial) {
522
+ return {
523
+ required_axes: requiredAxes,
524
+ missing_axes: [],
525
+ ontology_counts: summary.ontology_counts,
526
+ evidence_binding_count: evidenceBindingCount,
527
+ required_evidence_bindings: minBindings,
528
+ evidence_satisfied: true,
529
+ score: 100,
530
+ passed: true,
531
+ skipped: true
532
+ };
533
+ }
534
+ const evidenceSatisfied = policy.require_ontology_evidence_binding !== true
535
+ ? true
536
+ : evidenceBindingCount >= minBindings;
537
+
538
+ let score = 0;
539
+ if (requiredAxes.length > 0) {
540
+ score += Math.round(((requiredAxes.length - missingAxes.length) / requiredAxes.length) * 80);
541
+ }
542
+ if (evidenceSatisfied) {
543
+ score += 20;
544
+ }
545
+ score = Math.max(0, Math.min(100, score));
546
+
547
+ return {
548
+ required_axes: requiredAxes,
549
+ missing_axes: missingAxes,
550
+ ontology_counts: summary.ontology_counts,
551
+ evidence_binding_count: evidenceBindingCount,
552
+ required_evidence_bindings: minBindings,
553
+ evidence_satisfied: evidenceSatisfied,
554
+ score,
555
+ passed: missingAxes.length === 0 && evidenceSatisfied
556
+ };
557
+ }
558
+
559
+ function evaluateConvergence(context = {}, stage = '', policy = DEFAULT_PROBLEM_EVAL_POLICY) {
560
+ const readiness = context.stage_readiness && typeof context.stage_readiness === 'object'
561
+ ? context.stage_readiness
562
+ : {};
563
+ const checks = {
564
+ prerequisites_ready: readiness.prerequisites_ready === true
565
+ };
566
+ if (stage === 'release') {
567
+ checks.verify_report_ready = readiness.verify_report_ready === true;
568
+ checks.verify_stage_passed = readiness.verify_stage_passed !== false;
569
+ checks.regression_passed = readiness.regression_passed !== false;
570
+ const highAlertCount = Number(readiness.high_alert_count || 0);
571
+ checks.high_alerts_clear = policy.release_block_on_high_alerts === true
572
+ ? highAlertCount <= 0
573
+ : true;
574
+ if (policy.release_require_governance_report === true) {
575
+ checks.governance_report_ready = readiness.governance_report_ready === true;
576
+ }
577
+ }
578
+
579
+ const total = Object.keys(checks).length;
580
+ const covered = Object.values(checks).filter(Boolean).length;
581
+ const missing = Object.keys(checks).filter((key) => !checks[key]);
582
+ const score = total > 0 ? Math.round((covered / total) * 100) : 100;
583
+
584
+ return {
585
+ checks,
586
+ missing,
587
+ score,
588
+ passed: missing.length === 0
589
+ };
590
+ }
591
+
592
+ function scoreEvidence(context = {}, incidentSignals = {}) {
593
+ const signals = [];
594
+ let score = 0;
595
+ const domainChain = context.domain_chain && typeof context.domain_chain === 'object'
596
+ ? context.domain_chain
597
+ : {};
598
+ const summary = extractDomainChainSummary(domainChain);
599
+
600
+ if (domainChain.resolved === true) {
601
+ score += 20;
602
+ signals.push('domain-chain-resolved');
603
+ }
604
+ const decisionSteps = Number(summary.decision_path_steps || 0);
605
+ if (decisionSteps >= 3) {
606
+ score += 15;
607
+ signals.push(`decision-path:${decisionSteps}`);
608
+ } else if (decisionSteps > 0) {
609
+ score += 8;
610
+ signals.push(`decision-path-partial:${decisionSteps}`);
611
+ }
612
+ const verificationGates = Array.isArray(summary.verification_gates) ? summary.verification_gates.length : 0;
613
+ if (verificationGates > 0) {
614
+ score += Math.min(12, verificationGates * 3);
615
+ signals.push(`verification-gates:${verificationGates}`);
616
+ }
617
+ const relatedSpecsCount = Number(context.related_specs_count || 0);
618
+ if (relatedSpecsCount > 0) {
619
+ score += Math.min(15, 8 + relatedSpecsCount);
620
+ signals.push(`related-specs:${relatedSpecsCount}`);
621
+ }
622
+ if (incidentSignals.has_debug_evidence === true) {
623
+ score += 15;
624
+ signals.push('debug-evidence-present');
625
+ }
626
+ const stageReadiness = context.stage_readiness && typeof context.stage_readiness === 'object'
627
+ ? context.stage_readiness
628
+ : {};
629
+ if (stageReadiness.prerequisites_ready === true) {
630
+ score += 8;
631
+ signals.push('stage-prerequisites-ready');
632
+ }
633
+ if (stageReadiness.rollback_ready === true) {
634
+ score += 10;
635
+ signals.push('rollback-ready');
636
+ }
637
+ if (stageReadiness.gate_required_ready === true) {
638
+ score += 6;
639
+ signals.push('required-gates-available');
640
+ }
641
+ const evidenceBindingCount = Number(summary.evidence_binding_count || 0);
642
+ if (evidenceBindingCount > 0) {
643
+ score += Math.min(10, evidenceBindingCount);
644
+ signals.push(`ontology-evidence-bindings:${evidenceBindingCount}`);
645
+ }
646
+
647
+ score = Math.max(0, Math.min(100, Math.round(score)));
648
+ return { score, signals };
649
+ }
650
+
651
+ function scoreReadiness(context = {}) {
652
+ const signals = [];
653
+ let score = 0;
654
+ const stageReadiness = context.stage_readiness && typeof context.stage_readiness === 'object'
655
+ ? context.stage_readiness
656
+ : {};
657
+
658
+ if (normalizeText(context.scene_id)) {
659
+ score += 20;
660
+ signals.push('scene-defined');
661
+ }
662
+ if (normalizeText(context.goal)) {
663
+ score += 10;
664
+ signals.push('goal-defined');
665
+ }
666
+ if (normalizeText(context.spec_id)) {
667
+ score += 10;
668
+ signals.push('spec-bound');
669
+ }
670
+ if (stageReadiness.prerequisites_ready === true) {
671
+ score += 25;
672
+ signals.push('prerequisites-ready');
673
+ }
674
+ if (stageReadiness.patch_bundle_ready === true) {
675
+ score += 15;
676
+ signals.push('patch-bundle-ready');
677
+ }
678
+ if (stageReadiness.verify_report_ready === true) {
679
+ score += 10;
680
+ signals.push('verify-report-ready');
681
+ }
682
+ const gateSignals = context.gate_signals && typeof context.gate_signals === 'object'
683
+ ? context.gate_signals
684
+ : {};
685
+ const requiredTotal = Number(gateSignals.required_total || 0);
686
+ const requiredEnabled = Number(gateSignals.required_enabled || 0);
687
+ if (requiredTotal > 0) {
688
+ const ratio = requiredEnabled / requiredTotal;
689
+ score += Math.round(Math.max(0, Math.min(10, ratio * 10)));
690
+ signals.push(`gate-availability:${requiredEnabled}/${requiredTotal}`);
691
+ }
692
+
693
+ score = Math.max(0, Math.min(100, Math.round(score)));
694
+ return { score, signals };
695
+ }
696
+
697
+ function deriveStrategy(stage, risk, evidence, confidence, incidentSignals = {}, policy = DEFAULT_PROBLEM_EVAL_POLICY) {
698
+ const reasons = [];
699
+ let strategy = 'direct-execution';
700
+ const debugAttemptThreshold = Number(policy.max_failed_rounds_before_debug || 2) + 1;
701
+ if (Number(incidentSignals.max_attempt_count || 0) >= debugAttemptThreshold
702
+ && policy.high_risk_requires_debug_evidence
703
+ && incidentSignals.has_debug_evidence !== true) {
704
+ strategy = 'debug-first';
705
+ reasons.push('repeated-failures-without-debug-evidence');
706
+ return { strategy, reasons };
707
+ }
708
+ if (risk.level === 'high' && evidence.score < 55) {
709
+ strategy = 'evidence-first';
710
+ reasons.push('high-risk-insufficient-evidence');
711
+ return { strategy, reasons };
712
+ }
713
+ if (confidence < 45) {
714
+ strategy = 'explore-and-validate';
715
+ reasons.push('low-confidence');
716
+ return { strategy, reasons };
717
+ }
718
+ if (stage === 'release' && risk.level !== 'low') {
719
+ strategy = 'controlled-execution';
720
+ reasons.push('release-risk-control');
721
+ return { strategy, reasons };
722
+ }
723
+ reasons.push('confidence-sufficient');
724
+ return { strategy, reasons };
725
+ }
726
+
727
+ function evaluateProblemContext(context = {}, policy = DEFAULT_PROBLEM_EVAL_POLICY) {
728
+ const stage = normalizeLowerText(context.stage);
729
+ if (!STUDIO_STAGES.includes(stage)) {
730
+ throw new Error(`Unsupported problem-eval stage: ${context.stage || 'unknown'}`);
731
+ }
732
+
733
+ const textForRisk = [
734
+ normalizeLowerText(context.goal),
735
+ normalizeLowerText(context.scene_id),
736
+ normalizeLowerText(context.spec_id),
737
+ normalizeLowerText(context?.domain_chain?.reason),
738
+ normalizeLowerText(context.release_channel)
739
+ ].join(' ');
740
+
741
+ const incidentSignals = context.incident_signals && typeof context.incident_signals === 'object'
742
+ ? context.incident_signals
743
+ : {};
744
+ const risk = scoreRisk(stage, textForRisk, policy, incidentSignals, context.release_channel);
745
+ const evidence = scoreEvidence(context, incidentSignals);
746
+ const readiness = scoreReadiness(context);
747
+ const problemContract = evaluateProblemContract(context);
748
+ const ontologyAlignment = evaluateOntologyAlignment(context, policy);
749
+ const convergence = evaluateConvergence(context, stage, policy);
750
+ const confidenceScore = Math.max(0, Math.min(100, Math.round(
751
+ evidence.score * 0.32
752
+ + readiness.score * 0.24
753
+ + (100 - risk.score) * 0.14
754
+ + problemContract.score * 0.15
755
+ + ontologyAlignment.score * 0.10
756
+ + convergence.score * 0.05
757
+ )));
758
+
759
+ const minConfidence = Number(policy?.min_confidence_by_stage?.[stage] || 0);
760
+ const strategy = deriveStrategy(stage, risk, evidence, confidenceScore, incidentSignals, policy);
761
+ const blockers = [];
762
+ const warnings = [];
763
+
764
+ const enforced = policy.enabled === true && Array.isArray(policy.enforce_on_stages) && policy.enforce_on_stages.includes(stage);
765
+ const blockStage = Array.isArray(policy.block_on_stages) && policy.block_on_stages.includes(stage);
766
+ const advisoryMode = policy.mode === 'advisory';
767
+
768
+ if (confidenceScore < minConfidence) {
769
+ warnings.push(`confidence ${confidenceScore} below threshold ${minConfidence}`);
770
+ if (blockStage) {
771
+ blockers.push(`confidence-too-low:${confidenceScore}<${minConfidence}`);
772
+ }
773
+ }
774
+
775
+ const debugAttemptThreshold = Number(policy.max_failed_rounds_before_debug || 2) + 1;
776
+ if (policy.high_risk_requires_debug_evidence
777
+ && risk.level === 'high'
778
+ && Number(incidentSignals.max_attempt_count || 0) >= debugAttemptThreshold
779
+ && incidentSignals.has_debug_evidence !== true) {
780
+ warnings.push('high risk with repeated failed attempts and no debug evidence');
781
+ if (blockStage) {
782
+ blockers.push('missing-debug-evidence-after-repeated-failures');
783
+ }
784
+ }
785
+
786
+ if (evidence.score < 35) {
787
+ warnings.push(`evidence score ${evidence.score} is low`);
788
+ if (blockStage && risk.level === 'high') {
789
+ blockers.push(`high-risk-low-evidence:${evidence.score}`);
790
+ }
791
+ }
792
+
793
+ const problemContractRequired = stageInPolicy(stage, policy.problem_contract_required_stages);
794
+ const problemContractBlockedStage = stageInPolicy(stage, policy.problem_contract_block_stages);
795
+ if (problemContractRequired && !problemContract.passed) {
796
+ warnings.push(`problem contract incomplete: ${problemContract.missing.join(', ')}`);
797
+ if (problemContractBlockedStage) {
798
+ blockers.push(`problem-contract-incomplete:${problemContract.missing.join('|')}`);
799
+ }
800
+ }
801
+
802
+ const ontologyRequired = stageInPolicy(stage, policy.ontology_alignment_required_stages);
803
+ const ontologyBlockedStage = stageInPolicy(stage, policy.ontology_alignment_block_stages);
804
+ if (ontologyRequired && !ontologyAlignment.passed) {
805
+ if (ontologyAlignment.missing_axes.length > 0) {
806
+ warnings.push(`ontology alignment missing axes: ${ontologyAlignment.missing_axes.join(', ')}`);
807
+ }
808
+ if (!ontologyAlignment.evidence_satisfied) {
809
+ warnings.push(
810
+ `ontology evidence binding below threshold: ${ontologyAlignment.evidence_binding_count}<${ontologyAlignment.required_evidence_bindings}`
811
+ );
812
+ }
813
+ if (ontologyBlockedStage) {
814
+ if (ontologyAlignment.missing_axes.length > 0) {
815
+ blockers.push(`ontology-alignment-missing:${ontologyAlignment.missing_axes.join('|')}`);
816
+ }
817
+ if (!ontologyAlignment.evidence_satisfied) {
818
+ blockers.push(
819
+ `ontology-evidence-binding-low:${ontologyAlignment.evidence_binding_count}<${ontologyAlignment.required_evidence_bindings}`
820
+ );
821
+ }
822
+ }
823
+ }
824
+
825
+ const convergenceRequired = stageInPolicy(stage, policy.convergence_required_stages);
826
+ const convergenceBlockedStage = stageInPolicy(stage, policy.convergence_block_stages);
827
+ if (convergenceRequired && !convergence.passed) {
828
+ warnings.push(`convergence checks pending: ${convergence.missing.join(', ')}`);
829
+ if (convergenceBlockedStage) {
830
+ blockers.push(`convergence-gate-missing:${convergence.missing.join('|')}`);
831
+ }
832
+ }
833
+
834
+ const recommendations = [];
835
+ if (strategy.strategy === 'debug-first') {
836
+ recommendations.push('Capture debug trace/log evidence before the next patch attempt.');
837
+ }
838
+ if (strategy.strategy === 'evidence-first' || evidence.score < 45) {
839
+ recommendations.push('Refresh domain artifacts and verify ontology coverage before execution.');
840
+ recommendations.push('Load related historical specs and compare successful remediation paths.');
841
+ }
842
+ if (risk.level !== 'low') {
843
+ recommendations.push('Prefer guarded execution with rollback checkpoints and release gates enabled.');
844
+ }
845
+ if (Number(incidentSignals.open_incident_count || 0) > 0) {
846
+ recommendations.push('Review staging incident attempts to avoid repeating failed actions.');
847
+ }
848
+ if (!problemContract.passed) {
849
+ recommendations.push('Complete the problem contract: issue, expected outcome, reproduction steps, impact scope, and forbidden workarounds.');
850
+ }
851
+ if (!ontologyAlignment.passed) {
852
+ recommendations.push('Fill missing ontology axes and bind evidence references before further remediation.');
853
+ }
854
+ if (!convergence.passed) {
855
+ recommendations.push('Close convergence checks (verify pass, regression pass, high-alert clear) before release.');
856
+ }
857
+ if (recommendations.length === 0) {
858
+ recommendations.push('Proceed with direct execution and keep gate verification enabled.');
859
+ }
860
+
861
+ const cappedRecommendations = recommendations.slice(0, policy.recommendation_limit || 6);
862
+ const hardBlockStage = blockStage || problemContractBlockedStage || ontologyBlockedStage || convergenceBlockedStage;
863
+ const blocked = enforced && hardBlockStage && !advisoryMode && blockers.length > 0;
864
+
865
+ return {
866
+ mode: 'problem-eval',
867
+ api_version: PROBLEM_EVAL_API_VERSION,
868
+ generated_at: new Date().toISOString(),
869
+ stage,
870
+ scene_id: normalizeText(context.scene_id),
871
+ spec_id: normalizeText(context.spec_id),
872
+ job_id: normalizeText(context.job_id),
873
+ policy: {
874
+ enabled: policy.enabled === true,
875
+ mode: policy.mode,
876
+ enforced,
877
+ block_stage: blockStage,
878
+ hard_block_stage: hardBlockStage,
879
+ min_confidence: minConfidence
880
+ },
881
+ dimensions: {
882
+ risk,
883
+ evidence,
884
+ readiness,
885
+ strategy,
886
+ problem_contract: problemContract,
887
+ ontology_alignment: ontologyAlignment,
888
+ convergence
889
+ },
890
+ incident_signals: {
891
+ ...incidentSignals
892
+ },
893
+ confidence_score: confidenceScore,
894
+ warnings,
895
+ blockers,
896
+ recommendations: cappedRecommendations,
897
+ passed: !blocked,
898
+ blocked
899
+ };
900
+ }
901
+
902
+ function isIncidentRelevantToContext(incident = {}, context = {}) {
903
+ const wantedSpecId = normalizeText(context.spec_id);
904
+ const wantedSceneId = normalizeText(context.scene_id);
905
+ const wantedGoal = normalizeLowerText(context.goal);
906
+ if (!wantedSpecId && !wantedSceneId && !wantedGoal) {
907
+ return true;
908
+ }
909
+
910
+ const title = normalizeLowerText(incident.title);
911
+ const symptom = normalizeLowerText(incident.symptom);
912
+ const matchesGoal = wantedGoal && (title.includes(wantedGoal) || symptom.includes(wantedGoal));
913
+ const matchesSpec = wantedSpecId
914
+ && Array.isArray(incident.attempts)
915
+ && incident.attempts.some((attempt) => normalizeText(attempt?.source?.spec) === wantedSpecId);
916
+ const matchesScene = wantedSceneId
917
+ && (title.includes(wantedSceneId.toLowerCase()) || symptom.includes(wantedSceneId.toLowerCase()));
918
+ return Boolean(matchesSpec || matchesScene || matchesGoal);
919
+ }
920
+
921
+ async function collectIncidentSignals(projectPath = process.cwd(), context = {}, fileSystem = fs) {
922
+ const indexPath = path.join(projectPath, '.sce', 'errorbook', 'staging', 'index.json');
923
+ if (!await fileSystem.pathExists(indexPath)) {
924
+ return {
925
+ has_staging_data: false,
926
+ total_incident_count: 0,
927
+ open_incident_count: 0,
928
+ resolved_incident_count: 0,
929
+ relevant_incident_count: 0,
930
+ max_attempt_count: 0,
931
+ has_debug_evidence: false
932
+ };
933
+ }
934
+
935
+ const indexPayload = await fileSystem.readJson(indexPath).catch(() => null);
936
+ if (!indexPayload || !Array.isArray(indexPayload.incidents)) {
937
+ return {
938
+ has_staging_data: true,
939
+ total_incident_count: 0,
940
+ open_incident_count: 0,
941
+ resolved_incident_count: 0,
942
+ relevant_incident_count: 0,
943
+ max_attempt_count: 0,
944
+ has_debug_evidence: false
945
+ };
946
+ }
947
+
948
+ const incidentsDir = path.join(projectPath, '.sce', 'errorbook', 'staging', 'incidents');
949
+ let relevantCount = 0;
950
+ let maxAttemptCount = 0;
951
+ let hasDebugEvidence = false;
952
+
953
+ for (const summary of indexPayload.incidents.slice(0, 200)) {
954
+ const incidentId = normalizeText(summary.id);
955
+ if (!incidentId) {
956
+ continue;
957
+ }
958
+ const incidentPath = path.join(incidentsDir, `${incidentId}.json`);
959
+ if (!await fileSystem.pathExists(incidentPath)) {
960
+ continue;
961
+ }
962
+ const incident = await fileSystem.readJson(incidentPath).catch(() => null);
963
+ if (!incident || !isIncidentRelevantToContext(incident, context)) {
964
+ continue;
965
+ }
966
+ relevantCount += 1;
967
+ const attemptCount = Number(incident.attempt_count || (Array.isArray(incident.attempts) ? incident.attempts.length : 0) || 0);
968
+ if (attemptCount > maxAttemptCount) {
969
+ maxAttemptCount = attemptCount;
970
+ }
971
+ if (Array.isArray(incident.attempts) && incident.attempts.some((attempt) => hasDebugEvidenceInAttempt(attempt))) {
972
+ hasDebugEvidence = true;
973
+ }
974
+ }
975
+
976
+ return {
977
+ has_staging_data: true,
978
+ total_incident_count: indexPayload.incidents.length,
979
+ open_incident_count: indexPayload.incidents.filter((item) => normalizeIncidentState(item.state, 'open') === 'open').length,
980
+ resolved_incident_count: indexPayload.incidents.filter((item) => normalizeIncidentState(item.state, 'open') === 'resolved').length,
981
+ relevant_incident_count: relevantCount,
982
+ max_attempt_count: maxAttemptCount,
983
+ has_debug_evidence: hasDebugEvidence
984
+ };
985
+ }
986
+
987
+ function toRelativePosix(projectPath, absolutePath) {
988
+ return path.relative(projectPath, absolutePath).replace(/\\/g, '/');
989
+ }
990
+
991
+ function sanitizeSegment(value, fallback = 'adhoc') {
992
+ const normalized = normalizeText(value);
993
+ if (!normalized) {
994
+ return fallback;
995
+ }
996
+ return normalized.replace(/[^a-zA-Z0-9._-]+/g, '-').replace(/^-+|-+$/g, '') || fallback;
997
+ }
998
+
999
+ async function runProblemEvaluation(context = {}, dependencies = {}) {
1000
+ const projectPath = dependencies.projectPath || process.cwd();
1001
+ const fileSystem = dependencies.fileSystem || fs;
1002
+ const env = dependencies.env || process.env;
1003
+ const writeReport = dependencies.writeReport !== false;
1004
+ const policyBundle = dependencies.policyBundle || await loadProblemEvalPolicy(projectPath, fileSystem, env);
1005
+ const policy = policyBundle.policy;
1006
+ const incidentSignals = context.incident_signals || await collectIncidentSignals(projectPath, context, fileSystem);
1007
+ const report = evaluateProblemContext({
1008
+ ...context,
1009
+ incident_signals: incidentSignals
1010
+ }, policy);
1011
+
1012
+ if (writeReport) {
1013
+ const reportDir = path.join(projectPath, DEFAULT_REPORT_DIR);
1014
+ const stage = sanitizeSegment(report.stage, 'stage');
1015
+ const jobId = sanitizeSegment(report.job_id, `adhoc-${Date.now()}`);
1016
+ const reportPath = path.join(reportDir, `${jobId}-${stage}.json`);
1017
+ await fileSystem.ensureDir(path.dirname(reportPath));
1018
+ await fileSystem.writeJson(reportPath, report, { spaces: 2 });
1019
+ report.report_file = toRelativePosix(projectPath, reportPath);
1020
+ }
1021
+
1022
+ return report;
1023
+ }
1024
+
1025
+ module.exports = {
1026
+ PROBLEM_EVAL_API_VERSION,
1027
+ DEFAULT_POLICY_PATH,
1028
+ DEFAULT_REPORT_DIR,
1029
+ DEFAULT_PROBLEM_EVAL_POLICY,
1030
+ normalizePolicy,
1031
+ loadProblemEvalPolicy,
1032
+ collectIncidentSignals,
1033
+ evaluateProblemContext,
1034
+ runProblemEvaluation
1035
+ };