@veraxhq/verax 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -88
- package/bin/verax.js +11 -452
- package/package.json +24 -36
- package/src/cli/commands/default.js +681 -0
- package/src/cli/commands/doctor.js +197 -0
- package/src/cli/commands/inspect.js +109 -0
- package/src/cli/commands/run.js +586 -0
- package/src/cli/entry.js +196 -0
- package/src/cli/util/atomic-write.js +37 -0
- package/src/cli/util/detection-engine.js +297 -0
- package/src/cli/util/env-url.js +33 -0
- package/src/cli/util/errors.js +44 -0
- package/src/cli/util/events.js +110 -0
- package/src/cli/util/expectation-extractor.js +388 -0
- package/src/cli/util/findings-writer.js +32 -0
- package/src/cli/util/idgen.js +87 -0
- package/src/cli/util/learn-writer.js +39 -0
- package/src/cli/util/observation-engine.js +412 -0
- package/src/cli/util/observe-writer.js +25 -0
- package/src/cli/util/paths.js +30 -0
- package/src/cli/util/project-discovery.js +297 -0
- package/src/cli/util/project-writer.js +26 -0
- package/src/cli/util/redact.js +128 -0
- package/src/cli/util/run-id.js +30 -0
- package/src/cli/util/runtime-budget.js +147 -0
- package/src/cli/util/summary-writer.js +43 -0
- package/src/types/global.d.ts +28 -0
- package/src/types/ts-ast.d.ts +24 -0
- package/src/verax/cli/ci-summary.js +35 -0
- package/src/verax/cli/context-explanation.js +89 -0
- package/src/verax/cli/doctor.js +277 -0
- package/src/verax/cli/error-normalizer.js +154 -0
- package/src/verax/cli/explain-output.js +105 -0
- package/src/verax/cli/finding-explainer.js +130 -0
- package/src/verax/cli/init.js +237 -0
- package/src/verax/cli/run-overview.js +163 -0
- package/src/verax/cli/url-safety.js +111 -0
- package/src/verax/cli/wizard.js +109 -0
- package/src/verax/cli/zero-findings-explainer.js +57 -0
- package/src/verax/cli/zero-interaction-explainer.js +127 -0
- package/src/verax/core/action-classifier.js +86 -0
- package/src/verax/core/budget-engine.js +218 -0
- package/src/verax/core/canonical-outcomes.js +157 -0
- package/src/verax/core/decision-snapshot.js +335 -0
- package/src/verax/core/determinism-model.js +432 -0
- package/src/verax/core/incremental-store.js +245 -0
- package/src/verax/core/invariants.js +356 -0
- package/src/verax/core/promise-model.js +230 -0
- package/src/verax/core/replay-validator.js +350 -0
- package/src/verax/core/replay.js +222 -0
- package/src/verax/core/run-id.js +175 -0
- package/src/verax/core/run-manifest.js +99 -0
- package/src/verax/core/silence-impact.js +369 -0
- package/src/verax/core/silence-model.js +523 -0
- package/src/verax/detect/comparison.js +7 -34
- package/src/verax/detect/confidence-engine.js +764 -329
- package/src/verax/detect/detection-engine.js +293 -0
- package/src/verax/detect/evidence-index.js +127 -0
- package/src/verax/detect/expectation-model.js +241 -168
- package/src/verax/detect/explanation-helpers.js +187 -0
- package/src/verax/detect/finding-detector.js +450 -0
- package/src/verax/detect/findings-writer.js +41 -12
- package/src/verax/detect/flow-detector.js +366 -0
- package/src/verax/detect/index.js +200 -288
- package/src/verax/detect/interactive-findings.js +612 -0
- package/src/verax/detect/signal-mapper.js +308 -0
- package/src/verax/detect/skip-classifier.js +4 -4
- package/src/verax/detect/verdict-engine.js +561 -0
- package/src/verax/evidence-index-writer.js +61 -0
- package/src/verax/flow/flow-engine.js +3 -2
- package/src/verax/flow/flow-spec.js +1 -2
- package/src/verax/index.js +103 -15
- package/src/verax/intel/effect-detector.js +368 -0
- package/src/verax/intel/handler-mapper.js +249 -0
- package/src/verax/intel/index.js +281 -0
- package/src/verax/intel/route-extractor.js +280 -0
- package/src/verax/intel/ts-program.js +256 -0
- package/src/verax/intel/vue-navigation-extractor.js +642 -0
- package/src/verax/intel/vue-router-extractor.js +325 -0
- package/src/verax/learn/action-contract-extractor.js +338 -104
- package/src/verax/learn/ast-contract-extractor.js +148 -6
- package/src/verax/learn/flow-extractor.js +172 -0
- package/src/verax/learn/index.js +36 -2
- package/src/verax/learn/manifest-writer.js +122 -58
- package/src/verax/learn/project-detector.js +40 -0
- package/src/verax/learn/route-extractor.js +28 -97
- package/src/verax/learn/route-validator.js +8 -7
- package/src/verax/learn/state-extractor.js +212 -0
- package/src/verax/learn/static-extractor-navigation.js +114 -0
- package/src/verax/learn/static-extractor-validation.js +88 -0
- package/src/verax/learn/static-extractor.js +119 -10
- package/src/verax/learn/truth-assessor.js +24 -21
- package/src/verax/learn/ts-contract-resolver.js +14 -12
- package/src/verax/observe/aria-sensor.js +211 -0
- package/src/verax/observe/browser.js +30 -6
- package/src/verax/observe/console-sensor.js +2 -18
- package/src/verax/observe/domain-boundary.js +10 -1
- package/src/verax/observe/expectation-executor.js +513 -0
- package/src/verax/observe/flow-matcher.js +143 -0
- package/src/verax/observe/focus-sensor.js +196 -0
- package/src/verax/observe/human-driver.js +660 -273
- package/src/verax/observe/index.js +910 -26
- package/src/verax/observe/interaction-discovery.js +378 -15
- package/src/verax/observe/interaction-runner.js +562 -197
- package/src/verax/observe/loading-sensor.js +145 -0
- package/src/verax/observe/navigation-sensor.js +255 -0
- package/src/verax/observe/network-sensor.js +55 -7
- package/src/verax/observe/observed-expectation-deriver.js +186 -0
- package/src/verax/observe/observed-expectation.js +305 -0
- package/src/verax/observe/page-frontier.js +234 -0
- package/src/verax/observe/settle.js +38 -17
- package/src/verax/observe/state-sensor.js +393 -0
- package/src/verax/observe/state-ui-sensor.js +7 -1
- package/src/verax/observe/timing-sensor.js +228 -0
- package/src/verax/observe/traces-writer.js +73 -21
- package/src/verax/observe/ui-signal-sensor.js +143 -17
- package/src/verax/scan-summary-writer.js +80 -15
- package/src/verax/shared/artifact-manager.js +111 -9
- package/src/verax/shared/budget-profiles.js +136 -0
- package/src/verax/shared/caching.js +1 -1
- package/src/verax/shared/ci-detection.js +39 -0
- package/src/verax/shared/config-loader.js +169 -0
- package/src/verax/shared/dynamic-route-utils.js +224 -0
- package/src/verax/shared/expectation-coverage.js +44 -0
- package/src/verax/shared/expectation-prover.js +81 -0
- package/src/verax/shared/expectation-tracker.js +201 -0
- package/src/verax/shared/expectations-writer.js +60 -0
- package/src/verax/shared/first-run.js +44 -0
- package/src/verax/shared/progress-reporter.js +171 -0
- package/src/verax/shared/retry-policy.js +9 -1
- package/src/verax/shared/root-artifacts.js +49 -0
- package/src/verax/shared/scan-budget.js +86 -0
- package/src/verax/shared/url-normalizer.js +162 -0
- package/src/verax/shared/zip-artifacts.js +66 -0
- package/src/verax/validate/context-validator.js +244 -0
|
@@ -1,498 +1,933 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* WAVE 4: CONFIDENCE ENGINE
|
|
2
|
+
* WAVE 4: CONFIDENCE ENGINE (PRODUCTION GRADE)
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
4
|
+
* Deterministic, evidence-based scoring for findings.
|
|
5
|
+
* Output: { score, level, explain, factors }
|
|
6
|
+
*
|
|
7
|
+
* MANDATORY RULES:
|
|
8
|
+
* 1. Same inputs always produce same score and explanations
|
|
9
|
+
* 2. HIGH level requires PROVEN expectation AND sensors present WITH DATA
|
|
10
|
+
* 3. All scores clamped to [0, 100]
|
|
11
|
+
* 4. Explanations ordered by importance, max 8 items
|
|
12
|
+
*
|
|
13
|
+
* PHASE 3: EVIDENCE INTEGRITY
|
|
14
|
+
* - Sensors must contain NON-TRIVIAL data to count as "present"
|
|
15
|
+
* - Empty/placeholder sensor data does NOT count
|
|
16
|
+
* - Sensor failures tracked as silence events
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Check if network sensor contains non-trivial data.
|
|
21
|
+
* STRICT: Must have actual network activity captured.
|
|
22
|
+
*/
|
|
23
|
+
function hasNetworkData(networkSummary) {
|
|
24
|
+
if (!networkSummary || typeof networkSummary !== 'object') return false;
|
|
25
|
+
|
|
26
|
+
// Check for any actual network activity
|
|
27
|
+
const hasRequests = (networkSummary.totalRequests || 0) > 0;
|
|
28
|
+
const hasFailures = (networkSummary.failedRequests || 0) > 0;
|
|
29
|
+
const hasSlow = (networkSummary.slowRequests || 0) > 0;
|
|
30
|
+
const hasFailedUrls = Array.isArray(networkSummary.topFailedUrls) && networkSummary.topFailedUrls.length > 0;
|
|
31
|
+
const hasSlowUrls = Array.isArray(networkSummary.topSlowUrls) && networkSummary.topSlowUrls.length > 0;
|
|
32
|
+
|
|
33
|
+
return hasRequests || hasFailures || hasSlow || hasFailedUrls || hasSlowUrls;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Check if console sensor contains non-trivial data.
|
|
38
|
+
* STRICT: Must have actual console messages captured.
|
|
39
|
+
*/
|
|
40
|
+
function hasConsoleData(consoleSummary) {
|
|
41
|
+
if (!consoleSummary || typeof consoleSummary !== 'object') return false;
|
|
42
|
+
|
|
43
|
+
// Check for any actual console activity
|
|
44
|
+
const hasMessages = (consoleSummary.totalMessages || 0) > 0;
|
|
45
|
+
const hasErrors = (consoleSummary.errors || 0) > 0;
|
|
46
|
+
const hasWarnings = (consoleSummary.warnings || 0) > 0;
|
|
47
|
+
const hasEntries = Array.isArray(consoleSummary.entries) && consoleSummary.entries.length > 0;
|
|
48
|
+
|
|
49
|
+
return hasMessages || hasErrors || hasWarnings || hasEntries;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Check if UI sensor contains non-trivial data.
|
|
54
|
+
* STRICT: Must have meaningful UI changes captured.
|
|
9
55
|
*/
|
|
56
|
+
function hasUiData(uiSignals) {
|
|
57
|
+
if (!uiSignals || typeof uiSignals !== 'object') return false;
|
|
58
|
+
|
|
59
|
+
// Check diff object if it exists
|
|
60
|
+
const diff = uiSignals.diff || uiSignals;
|
|
61
|
+
|
|
62
|
+
// Check for any meaningful UI changes
|
|
63
|
+
const hasAnyDelta = diff.hasAnyDelta === true || diff.changed === true;
|
|
64
|
+
const hasDomChange = diff.domChanged === true;
|
|
65
|
+
const hasVisibleChange = diff.visibleChanged === true;
|
|
66
|
+
const hasAriaChange = diff.ariaChanged === true;
|
|
67
|
+
const hasFocusChange = diff.focusChanged === true;
|
|
68
|
+
const hasTextChange = diff.textChanged === true;
|
|
69
|
+
|
|
70
|
+
return hasAnyDelta || hasDomChange || hasVisibleChange || hasAriaChange || hasFocusChange || hasTextChange;
|
|
71
|
+
}
|
|
10
72
|
|
|
11
73
|
const BASE_SCORES = {
|
|
12
74
|
network_silent_failure: 70,
|
|
13
|
-
validation_silent_failure: 60,
|
|
75
|
+
validation_silent_failure: 60, // VALIDATION INTELLIGENCE v1
|
|
14
76
|
missing_feedback_failure: 55,
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
MEDIUM: 60
|
|
77
|
+
no_effect_silent_failure: 50,
|
|
78
|
+
missing_network_action: 65,
|
|
79
|
+
missing_state_action: 60,
|
|
80
|
+
navigation_silent_failure: 75, // NAVIGATION INTELLIGENCE v2
|
|
81
|
+
partial_navigation_failure: 65, // NAVIGATION INTELLIGENCE v2
|
|
82
|
+
flow_silent_failure: 70, // FLOW INTELLIGENCE v1
|
|
83
|
+
observed_break: 50 // OBSERVED expectations (runtime-derived, lower confidence)
|
|
23
84
|
};
|
|
24
85
|
|
|
25
|
-
|
|
86
|
+
/**
|
|
87
|
+
* Get base score from expectation strength.
|
|
88
|
+
*/
|
|
89
|
+
function getBaseScoreFromExpectationStrength(expectationStrength) {
|
|
90
|
+
if (expectationStrength === 'PROVEN') {
|
|
91
|
+
return 70;
|
|
92
|
+
}
|
|
93
|
+
if (expectationStrength === 'OBSERVED') {
|
|
94
|
+
return 55;
|
|
95
|
+
}
|
|
96
|
+
if (expectationStrength === 'WEAK') {
|
|
97
|
+
return 50;
|
|
98
|
+
}
|
|
99
|
+
return 0; // UNKNOWN
|
|
100
|
+
}
|
|
26
101
|
|
|
27
102
|
/**
|
|
28
|
-
*
|
|
29
|
-
*
|
|
30
|
-
* @
|
|
31
|
-
* @param {string} params.findingType - Type of finding
|
|
32
|
-
* @param {Object} params.expectation - Expectation with proof status
|
|
33
|
-
* @param {Object} params.sensors - Sensor data (network, console, uiSignals)
|
|
34
|
-
* @param {Object} params.comparisons - Comparison results (hasUrlChange, hasDomChange, hasVisibleChange)
|
|
35
|
-
* @param {Object} params.attemptMeta - Metadata about the interaction attempt
|
|
36
|
-
* @returns {Object} { score, level, reasons, breakdown }
|
|
103
|
+
* Main confidence computation function.
|
|
104
|
+
* @param {Object} params - { findingType, expectation, sensors, comparisons, attemptMeta }
|
|
105
|
+
* @returns {Object} - { score, level, explain, factors }
|
|
37
106
|
*/
|
|
38
107
|
export function computeConfidence({ findingType, expectation, sensors = {}, comparisons = {}, attemptMeta = {} }) {
|
|
39
|
-
const
|
|
40
|
-
const
|
|
41
|
-
const reasons = [];
|
|
108
|
+
const boosts = [];
|
|
109
|
+
const penalties = [];
|
|
42
110
|
|
|
111
|
+
// Extract sensor data (with defaults for missing sensors)
|
|
43
112
|
const networkSummary = sensors.network || {};
|
|
44
113
|
const consoleSummary = sensors.console || {};
|
|
45
114
|
const uiSignals = sensors.uiSignals || {};
|
|
46
115
|
|
|
47
|
-
//
|
|
48
|
-
const
|
|
49
|
-
|
|
50
|
-
|
|
116
|
+
// === STEP 1: DETERMINE EXPECTATION STRENGTH ===
|
|
117
|
+
const expectationStrength = determineExpectationStrength(expectation);
|
|
118
|
+
|
|
119
|
+
// === STEP 1B: SET BASE SCORE FROM EXPECTATION STRENGTH ===
|
|
120
|
+
let baseScore = BASE_SCORES[findingType] || 50;
|
|
121
|
+
// Override with expectation-strength-based score if available
|
|
122
|
+
const strengthBasedScore = getBaseScoreFromExpectationStrength(expectationStrength);
|
|
123
|
+
if (strengthBasedScore > 0) {
|
|
124
|
+
baseScore = strengthBasedScore;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// === STEP 2: EXTRACT EVIDENCE SIGNALS ===
|
|
128
|
+
const evidenceSignals = extractEvidenceSignals({
|
|
129
|
+
networkSummary,
|
|
130
|
+
consoleSummary,
|
|
131
|
+
uiSignals,
|
|
132
|
+
comparisons
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
// === STEP 3: SENSOR PRESENCE CHECK (STRICT - must contain data) ===
|
|
136
|
+
// PHASE 3: Sensors only count as "present" if they contain non-trivial data
|
|
137
|
+
const sensorsPresent = {
|
|
138
|
+
network: hasNetworkData(networkSummary),
|
|
139
|
+
console: hasConsoleData(consoleSummary),
|
|
140
|
+
ui: hasUiData(uiSignals)
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
const allSensorsPresent = sensorsPresent.network && sensorsPresent.console && sensorsPresent.ui;
|
|
144
|
+
|
|
145
|
+
// === STEP 4: COMPUTE BOOSTS AND PENALTIES (TYPE-SPECIFIC) ===
|
|
146
|
+
let totalBoosts = 0;
|
|
147
|
+
let totalPenalties = 0;
|
|
148
|
+
|
|
149
|
+
const typeResults = scoreByFindingType({
|
|
150
|
+
findingType,
|
|
151
|
+
expectation,
|
|
152
|
+
expectationStrength,
|
|
153
|
+
networkSummary,
|
|
154
|
+
consoleSummary,
|
|
155
|
+
uiSignals,
|
|
156
|
+
evidenceSignals,
|
|
157
|
+
comparisons,
|
|
158
|
+
attemptMeta,
|
|
159
|
+
boosts,
|
|
160
|
+
penalties
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
totalBoosts = typeResults.totalBoosts;
|
|
164
|
+
totalPenalties = typeResults.totalPenalties;
|
|
165
|
+
|
|
166
|
+
// === STEP 5: APPLY GLOBAL PENALTIES ===
|
|
167
|
+
|
|
168
|
+
// -15 if sensors missing (can't trust silent failure claim without sensors)
|
|
169
|
+
if (!allSensorsPresent) {
|
|
170
|
+
const missingSensors = [];
|
|
171
|
+
if (!sensorsPresent.network) missingSensors.push('network');
|
|
172
|
+
if (!sensorsPresent.console) missingSensors.push('console');
|
|
173
|
+
if (!sensorsPresent.ui) missingSensors.push('ui');
|
|
174
|
+
|
|
175
|
+
const penalty = 15;
|
|
176
|
+
totalPenalties += penalty;
|
|
177
|
+
penalties.push(`Missing sensor data: ${missingSensors.join(', ')}`);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// -10 if expectation not proven
|
|
181
|
+
if (expectationStrength !== 'PROVEN') {
|
|
182
|
+
totalPenalties += 10;
|
|
183
|
+
penalties.push(`Expectation strength is ${expectationStrength}, not PROVEN`);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// === STEP 6: COMPUTE FINAL SCORE ===
|
|
187
|
+
let score = baseScore + totalBoosts - totalPenalties;
|
|
188
|
+
score = Math.max(0, Math.min(100, score)); // Clamp to [0, 100]
|
|
189
|
+
|
|
190
|
+
// === STEP 7: DETERMINE LEVEL WITH HARD RULES ===
|
|
191
|
+
let level = 'LOW';
|
|
192
|
+
let boundaryExplanation = null; // Phase 3: Track near-threshold decisions
|
|
193
|
+
|
|
194
|
+
if (score >= 80) {
|
|
195
|
+
// HARD RULE: HIGH level requires PROVEN expectation AND all sensors present
|
|
196
|
+
if (expectationStrength === 'PROVEN' && allSensorsPresent) {
|
|
197
|
+
level = 'HIGH';
|
|
198
|
+
|
|
199
|
+
// Phase 3: Near-threshold detection (within 2 points of boundary)
|
|
200
|
+
if (score < 82) {
|
|
201
|
+
boundaryExplanation = `Near threshold: score ${score.toFixed(1)} >= 80 threshold, assigned HIGH (proven expectation + all sensors)`;
|
|
202
|
+
}
|
|
203
|
+
} else {
|
|
204
|
+
// Cap at MEDIUM if missing evidence
|
|
205
|
+
level = 'MEDIUM';
|
|
206
|
+
score = Math.min(score, 79);
|
|
207
|
+
|
|
208
|
+
// Phase 3: Boundary explanation for capped score
|
|
209
|
+
boundaryExplanation = `Capped at MEDIUM: score would be ${(baseScore + totalBoosts - totalPenalties).toFixed(1)} but ${expectationStrength !== 'PROVEN' ? 'expectation not proven' : 'sensors missing'}, kept score <= 79`;
|
|
210
|
+
}
|
|
211
|
+
} else if (score >= 55) {
|
|
212
|
+
level = 'MEDIUM';
|
|
213
|
+
|
|
214
|
+
// Phase 3: Near-threshold detection
|
|
215
|
+
if (score < 57) {
|
|
216
|
+
boundaryExplanation = `Near threshold: score ${score.toFixed(1)} >= 55 threshold, assigned MEDIUM (above LOW boundary)`;
|
|
217
|
+
} else if (score > 77) {
|
|
218
|
+
boundaryExplanation = `Near threshold: score ${score.toFixed(1)} < 80 threshold, kept MEDIUM (below HIGH boundary)`;
|
|
219
|
+
}
|
|
220
|
+
} else {
|
|
221
|
+
level = 'LOW';
|
|
222
|
+
|
|
223
|
+
// Phase 3: Near-threshold detection
|
|
224
|
+
if (score > 52) {
|
|
225
|
+
boundaryExplanation = `Near threshold: score ${score.toFixed(1)} < 55 threshold, kept LOW (below MEDIUM boundary)`;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// === STEP 8: GENERATE EXPLANATIONS (ORDERED BY IMPORTANCE) ===
|
|
230
|
+
const explain = generateExplanations(boosts, penalties, expectationStrength, evidenceSignals);
|
|
231
|
+
|
|
232
|
+
// OBSERVED expectations are conservatively capped
|
|
233
|
+
if (expectationStrength === 'OBSERVED') {
|
|
234
|
+
if (!attemptMeta?.repeated) {
|
|
235
|
+
level = 'LOW';
|
|
236
|
+
score = Math.min(score, 49);
|
|
237
|
+
} else if (level === 'HIGH') {
|
|
238
|
+
level = 'MEDIUM';
|
|
239
|
+
score = Math.min(score, 79);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// === STEP 9: ASSEMBLE FINAL OUTPUT ===
|
|
244
|
+
const finalExplain = explain.slice(0, 8); // Max 8 reasons
|
|
245
|
+
|
|
246
|
+
// === STEP 10: GENERATE CONFIDENCE EXPLANATIONS (PHASE 9) ===
|
|
247
|
+
const confidenceExplanation = generateConfidenceExplanation({
|
|
248
|
+
level,
|
|
249
|
+
score: Math.round(score),
|
|
250
|
+
expectationStrength,
|
|
251
|
+
sensorsPresent,
|
|
252
|
+
allSensorsPresent,
|
|
253
|
+
evidenceSignals,
|
|
254
|
+
boosts,
|
|
255
|
+
penalties,
|
|
256
|
+
attemptMeta,
|
|
257
|
+
boundaryExplanation // Phase 3: Include boundary reasoning
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
return {
|
|
261
|
+
score: Math.round(score),
|
|
262
|
+
level,
|
|
263
|
+
explain: finalExplain,
|
|
264
|
+
factors: {
|
|
265
|
+
expectationStrength,
|
|
266
|
+
sensorsPresent,
|
|
267
|
+
evidenceSignals,
|
|
268
|
+
penalties,
|
|
269
|
+
boosts
|
|
270
|
+
},
|
|
271
|
+
confidenceExplanation,
|
|
272
|
+
boundaryExplanation // Phase 3: Surface boundary reasoning in output
|
|
273
|
+
};
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Determine expectation strength from proof metadata.
|
|
278
|
+
*/
|
|
279
|
+
function determineExpectationStrength(expectation = {}) {
|
|
280
|
+
if (!expectation || Object.keys(expectation).length === 0) {
|
|
281
|
+
return 'UNKNOWN';
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (expectation.expectationStrength === 'OBSERVED') {
|
|
285
|
+
return 'OBSERVED';
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// If expectation has PROVEN_EXPECTATION marker, it's proven
|
|
289
|
+
if (expectation.proof === 'PROVEN_EXPECTATION') {
|
|
290
|
+
return 'PROVEN';
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// If it has explicit source reference (AST analysis, TS cross-file, etc.), it's proven
|
|
294
|
+
if (expectation.explicit === true || expectation.sourceRef) {
|
|
295
|
+
return 'PROVEN';
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// Static expectations from HTML parsing are considered PROVEN if they have evidence
|
|
299
|
+
if (expectation.evidence && expectation.evidence.source) {
|
|
300
|
+
return 'PROVEN';
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// If it has some metadata but not proven, it's weak
|
|
304
|
+
return 'WEAK';
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Extract deterministic evidence signals from runtime data.
|
|
309
|
+
*/
|
|
310
|
+
function extractEvidenceSignals({ networkSummary, consoleSummary, uiSignals, comparisons }) {
|
|
311
|
+
const signals = {
|
|
312
|
+
urlChanged: comparisons?.hasUrlChange === true,
|
|
313
|
+
domChanged: comparisons?.hasDomChange === true,
|
|
314
|
+
screenshotChanged: comparisons?.hasVisibleChange === true,
|
|
315
|
+
networkFailed: (networkSummary?.failedRequests || 0) > 0,
|
|
316
|
+
consoleErrors: (consoleSummary?.hasErrors === true),
|
|
317
|
+
uiFeedbackDetected: hasAnyFeedback(uiSignals),
|
|
318
|
+
slowRequests: (networkSummary?.slowRequestsCount || 0) > 0
|
|
319
|
+
};
|
|
320
|
+
|
|
321
|
+
return signals;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Check if any UI feedback is present (error, loading, status, etc.).
|
|
326
|
+
*/
|
|
327
|
+
function hasAnyFeedback(uiSignals = {}) {
|
|
328
|
+
const before = uiSignals.before || {};
|
|
329
|
+
const after = uiSignals.after || {};
|
|
51
330
|
|
|
52
|
-
|
|
53
|
-
|
|
331
|
+
return (
|
|
332
|
+
before.hasErrorSignal || after.hasErrorSignal ||
|
|
333
|
+
before.hasLoadingIndicator || after.hasLoadingIndicator ||
|
|
334
|
+
before.hasStatusSignal || after.hasStatusSignal ||
|
|
335
|
+
before.hasLiveRegion || after.hasLiveRegion ||
|
|
336
|
+
before.hasDialog || after.hasDialog ||
|
|
337
|
+
(before.disabledElements?.length || 0) > 0 ||
|
|
338
|
+
(after.disabledElements?.length || 0) > 0
|
|
339
|
+
);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
/**
|
|
343
|
+
* Type-specific scoring dispatch.
|
|
344
|
+
*/
|
|
345
|
+
function scoreByFindingType({
|
|
346
|
+
findingType,
|
|
347
|
+
expectation,
|
|
348
|
+
expectationStrength,
|
|
349
|
+
networkSummary,
|
|
350
|
+
consoleSummary,
|
|
351
|
+
uiSignals: _uiSignals,
|
|
352
|
+
evidenceSignals,
|
|
353
|
+
comparisons: _comparisons,
|
|
354
|
+
attemptMeta: _attemptMeta,
|
|
355
|
+
boosts,
|
|
356
|
+
penalties
|
|
357
|
+
}) {
|
|
358
|
+
let totalBoosts = 0;
|
|
359
|
+
let totalPenalties = 0;
|
|
54
360
|
|
|
55
|
-
// Type-specific scoring
|
|
56
361
|
switch (findingType) {
|
|
57
362
|
case 'network_silent_failure':
|
|
58
|
-
|
|
363
|
+
totalBoosts = scoreNetworkSilentFailure({
|
|
59
364
|
networkSummary,
|
|
60
365
|
consoleSummary,
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
reasons
|
|
366
|
+
evidenceSignals,
|
|
367
|
+
boosts,
|
|
368
|
+
penalties
|
|
65
369
|
});
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
reasons
|
|
370
|
+
totalPenalties = penalizeNetworkSilentFailure({
|
|
371
|
+
evidenceSignals,
|
|
372
|
+
penalties
|
|
70
373
|
});
|
|
71
374
|
break;
|
|
72
375
|
|
|
73
376
|
case 'validation_silent_failure':
|
|
74
|
-
|
|
377
|
+
totalBoosts = scoreValidationSilentFailure({
|
|
75
378
|
networkSummary,
|
|
76
379
|
consoleSummary,
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
reasons
|
|
380
|
+
evidenceSignals,
|
|
381
|
+
boosts,
|
|
382
|
+
penalties
|
|
81
383
|
});
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
reasons
|
|
384
|
+
totalPenalties = penalizeValidationSilentFailure({
|
|
385
|
+
evidenceSignals,
|
|
386
|
+
penalties
|
|
86
387
|
});
|
|
87
388
|
break;
|
|
88
389
|
|
|
89
390
|
case 'missing_feedback_failure':
|
|
90
|
-
|
|
391
|
+
totalBoosts = scoreMissingFeedbackFailure({
|
|
91
392
|
networkSummary,
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
393
|
+
evidenceSignals,
|
|
394
|
+
boosts,
|
|
395
|
+
penalties
|
|
95
396
|
});
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
reasons
|
|
397
|
+
totalPenalties = penalizeMissingFeedbackFailure({
|
|
398
|
+
evidenceSignals,
|
|
399
|
+
penalties
|
|
100
400
|
});
|
|
101
401
|
break;
|
|
102
402
|
|
|
103
403
|
case 'no_effect_silent_failure':
|
|
104
|
-
|
|
404
|
+
totalBoosts = scoreNoEffectSilentFailure({
|
|
105
405
|
expectation,
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
points,
|
|
110
|
-
reasons
|
|
406
|
+
evidenceSignals,
|
|
407
|
+
boosts,
|
|
408
|
+
penalties
|
|
111
409
|
});
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
points,
|
|
116
|
-
reasons
|
|
410
|
+
totalPenalties = penalizeNoEffectSilentFailure({
|
|
411
|
+
evidenceSignals,
|
|
412
|
+
penalties
|
|
117
413
|
});
|
|
118
414
|
break;
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
break;
|
|
135
|
-
|
|
415
|
+
|
|
416
|
+
case 'missing_network_action':
|
|
417
|
+
totalBoosts = scoreMissingNetworkAction({
|
|
418
|
+
expectation,
|
|
419
|
+
expectationStrength,
|
|
420
|
+
evidenceSignals,
|
|
421
|
+
boosts,
|
|
422
|
+
penalties
|
|
423
|
+
});
|
|
424
|
+
totalPenalties = penalizeMissingNetworkAction({
|
|
425
|
+
evidenceSignals,
|
|
426
|
+
penalties
|
|
427
|
+
});
|
|
428
|
+
break;
|
|
429
|
+
|
|
136
430
|
case 'missing_state_action':
|
|
137
|
-
|
|
431
|
+
totalBoosts = scoreMissingStateAction({
|
|
138
432
|
expectation,
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
reasons
|
|
433
|
+
expectationStrength,
|
|
434
|
+
evidenceSignals,
|
|
435
|
+
boosts,
|
|
436
|
+
penalties
|
|
144
437
|
});
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
438
|
+
totalPenalties = penalizeMissingStateAction({
|
|
439
|
+
evidenceSignals,
|
|
440
|
+
penalties
|
|
441
|
+
});
|
|
442
|
+
break;
|
|
443
|
+
|
|
444
|
+
case 'navigation_silent_failure':
|
|
445
|
+
totalBoosts = scoreNavigationSilentFailure({
|
|
446
|
+
expectation,
|
|
447
|
+
expectationStrength,
|
|
448
|
+
evidenceSignals,
|
|
449
|
+
boosts,
|
|
450
|
+
penalties
|
|
451
|
+
});
|
|
452
|
+
totalPenalties = penalizeNavigationSilentFailure({
|
|
453
|
+
evidenceSignals,
|
|
454
|
+
penalties
|
|
455
|
+
});
|
|
456
|
+
break;
|
|
457
|
+
|
|
458
|
+
case 'partial_navigation_failure':
|
|
459
|
+
totalBoosts = scorePartialNavigationFailure({
|
|
460
|
+
expectation,
|
|
461
|
+
expectationStrength,
|
|
462
|
+
evidenceSignals,
|
|
463
|
+
boosts,
|
|
464
|
+
penalties
|
|
465
|
+
});
|
|
466
|
+
totalPenalties = penalizePartialNavigationFailure({
|
|
467
|
+
evidenceSignals,
|
|
468
|
+
penalties
|
|
150
469
|
});
|
|
151
470
|
break;
|
|
152
471
|
}
|
|
153
472
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
473
|
+
return { totalBoosts, totalPenalties };
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// ============================================================
|
|
477
|
+
// TYPE-SPECIFIC SCORING FUNCTIONS
|
|
478
|
+
// ============================================================
|
|
479
|
+
|
|
480
|
+
function scoreNetworkSilentFailure({ networkSummary: _networkSummary, consoleSummary: _consoleSummary, evidenceSignals, boosts, penalties: _penalties }) {
|
|
481
|
+
let total = 0;
|
|
157
482
|
|
|
158
|
-
//
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
} else if (score >= CONFIDENCE_LEVELS.MEDIUM) {
|
|
163
|
-
level = 'MEDIUM';
|
|
483
|
+
// +10 if network failed
|
|
484
|
+
if (evidenceSignals.networkFailed) {
|
|
485
|
+
total += 10;
|
|
486
|
+
boosts.push('Network request failed');
|
|
164
487
|
}
|
|
165
488
|
|
|
166
|
-
//
|
|
167
|
-
|
|
489
|
+
// +8 if console errors
|
|
490
|
+
if (evidenceSignals.consoleErrors) {
|
|
491
|
+
total += 8;
|
|
492
|
+
boosts.push('Console errors present');
|
|
493
|
+
}
|
|
168
494
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
minus: points.minus
|
|
177
|
-
}
|
|
178
|
-
};
|
|
495
|
+
// +6 if network failed AND no UI feedback
|
|
496
|
+
if (evidenceSignals.networkFailed && !evidenceSignals.uiFeedbackDetected) {
|
|
497
|
+
total += 6;
|
|
498
|
+
boosts.push('Silent failure: no user feedback on network error');
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
return total;
|
|
179
502
|
}
|
|
180
503
|
|
|
181
|
-
|
|
504
|
+
function penalizeNetworkSilentFailure({ evidenceSignals, penalties }) {
|
|
505
|
+
let total = 0;
|
|
506
|
+
|
|
507
|
+
// -10 if UI feedback present (shouldn't be silent failure)
|
|
508
|
+
if (evidenceSignals.uiFeedbackDetected) {
|
|
509
|
+
total += 10;
|
|
510
|
+
penalties.push('UI feedback detected (suggests not silent)');
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
return total;
|
|
514
|
+
}
|
|
182
515
|
|
|
183
|
-
function
|
|
516
|
+
function scoreValidationSilentFailure({ networkSummary: _networkSummary, consoleSummary: _consoleSummary, evidenceSignals, boosts, penalties: _penalties }) {
|
|
184
517
|
let total = 0;
|
|
185
518
|
|
|
186
|
-
// +
|
|
187
|
-
if (
|
|
188
|
-
|
|
189
|
-
if (
|
|
190
|
-
points.plus.serverError = 15;
|
|
191
|
-
total += 15;
|
|
192
|
-
reasons.push('Server error (5xx) detected');
|
|
193
|
-
} else {
|
|
194
|
-
// Client error (4xx)
|
|
195
|
-
points.plus.networkFailure = 10;
|
|
196
|
-
total += 10;
|
|
197
|
-
reasons.push('Network request failed');
|
|
198
|
-
}
|
|
519
|
+
// +10 if console errors (validation errors logged)
|
|
520
|
+
if (evidenceSignals.consoleErrors) {
|
|
521
|
+
total += 10;
|
|
522
|
+
if (boosts) boosts.push('Validation errors in console');
|
|
199
523
|
}
|
|
200
524
|
|
|
201
|
-
// +
|
|
202
|
-
if (
|
|
203
|
-
|
|
204
|
-
if (
|
|
205
|
-
points.plus.explicitFailure = 10;
|
|
206
|
-
total += 10;
|
|
207
|
-
reasons.push('Request failure event captured');
|
|
208
|
-
}
|
|
525
|
+
// +8 if no UI feedback with console errors
|
|
526
|
+
if (evidenceSignals.consoleErrors && !evidenceSignals.uiFeedbackDetected) {
|
|
527
|
+
total += 8;
|
|
528
|
+
if (boosts) boosts.push('Silent validation: errors logged but no visible feedback');
|
|
209
529
|
}
|
|
210
530
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
531
|
+
return total;
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
function penalizeValidationSilentFailure({ evidenceSignals, penalties }) {
|
|
535
|
+
let total = 0;
|
|
536
|
+
|
|
537
|
+
// -10 if error feedback visible
|
|
538
|
+
if (evidenceSignals.uiFeedbackDetected) {
|
|
214
539
|
total += 10;
|
|
215
|
-
|
|
540
|
+
penalties.push('Error feedback visible (not silent)');
|
|
216
541
|
}
|
|
217
542
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
543
|
+
return total;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
function scoreMissingFeedbackFailure({ networkSummary: _networkSummary, evidenceSignals, boosts, penalties: _penalties }) {
|
|
547
|
+
let total = 0;
|
|
548
|
+
|
|
549
|
+
// +10 if slow/pending requests
|
|
550
|
+
if (evidenceSignals.slowRequests) {
|
|
221
551
|
total += 10;
|
|
222
|
-
|
|
552
|
+
boosts.push('Slow requests detected');
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
// +8 if network activity without loading feedback
|
|
556
|
+
if (evidenceSignals.networkFailed && !evidenceSignals.uiFeedbackDetected) {
|
|
557
|
+
total += 8;
|
|
558
|
+
boosts.push('Network activity without user feedback');
|
|
223
559
|
}
|
|
224
560
|
|
|
225
561
|
return total;
|
|
226
562
|
}
|
|
227
563
|
|
|
228
|
-
function
|
|
564
|
+
function penalizeMissingFeedbackFailure({ evidenceSignals, penalties }) {
|
|
229
565
|
let total = 0;
|
|
230
566
|
|
|
231
|
-
// -
|
|
232
|
-
if (
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
reasons.push('User feedback detected (reduces confidence)');
|
|
567
|
+
// -10 if loading feedback detected
|
|
568
|
+
if (evidenceSignals.uiFeedbackDetected) {
|
|
569
|
+
total += 10;
|
|
570
|
+
penalties.push('Loading indicator detected');
|
|
236
571
|
}
|
|
237
572
|
|
|
238
573
|
return total;
|
|
239
574
|
}
|
|
240
575
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
function scoreValidationSilentFailure({ networkSummary, consoleSummary, hasErrorFeedback, attemptMeta, points, reasons }) {
|
|
576
|
+
function scoreNoEffectSilentFailure({ expectation: _expectation, evidenceSignals, boosts, penalties: _penalties }) {
|
|
244
577
|
let total = 0;
|
|
245
578
|
|
|
246
|
-
// +
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
total += 15;
|
|
251
|
-
reasons.push(`${invalidFieldsCount} invalid form field(s) detected`);
|
|
579
|
+
// +10 if URL should have changed but didn't
|
|
580
|
+
if (!evidenceSignals.urlChanged) {
|
|
581
|
+
total += 10;
|
|
582
|
+
boosts.push('Expected URL change did not occur');
|
|
252
583
|
}
|
|
253
584
|
|
|
254
|
-
// +
|
|
255
|
-
if (
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
reasons.push('Validation errors logged to console');
|
|
585
|
+
// +6 if DOM unchanged
|
|
586
|
+
if (!evidenceSignals.domChanged) {
|
|
587
|
+
total += 6;
|
|
588
|
+
boosts.push('DOM state unchanged');
|
|
259
589
|
}
|
|
260
590
|
|
|
261
|
-
// +
|
|
262
|
-
if (!
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
reasons.push('No visible validation error message');
|
|
591
|
+
// +5 if screenshot unchanged
|
|
592
|
+
if (!evidenceSignals.screenshotChanged) {
|
|
593
|
+
total += 5;
|
|
594
|
+
boosts.push('No visible changes');
|
|
266
595
|
}
|
|
267
596
|
|
|
268
597
|
return total;
|
|
269
598
|
}
|
|
270
599
|
|
|
271
|
-
function
|
|
600
|
+
function penalizeNoEffectSilentFailure({ evidenceSignals, penalties }) {
|
|
272
601
|
let total = 0;
|
|
273
602
|
|
|
274
|
-
// -
|
|
275
|
-
if (
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
603
|
+
// -10 if network activity (might be a real effect)
|
|
604
|
+
if (evidenceSignals.networkFailed) {
|
|
605
|
+
total += 10;
|
|
606
|
+
penalties.push('Network activity detected (potential effect)');
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// -8 if UI feedback changed
|
|
610
|
+
if (evidenceSignals.uiFeedbackDetected) {
|
|
611
|
+
total += 8;
|
|
612
|
+
penalties.push('UI feedback changed (potential effect)');
|
|
279
613
|
}
|
|
280
614
|
|
|
281
615
|
return total;
|
|
282
616
|
}
|
|
283
617
|
|
|
284
|
-
|
|
618
|
+
function scoreMissingNetworkAction({ expectation, expectationStrength, evidenceSignals, boosts, penalties: _penalties }) {
|
|
619
|
+
let total = 0;
|
|
620
|
+
|
|
621
|
+
// +10 if PROVEN expectation
|
|
622
|
+
if (expectationStrength === 'PROVEN') {
|
|
623
|
+
total += 10;
|
|
624
|
+
boosts.push('Code promise verified via AST analysis');
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
// +8 if zero network activity (strong evidence of missing action)
|
|
628
|
+
if (!evidenceSignals.networkFailed && (expectation?.totalRequests || 0) === 0) {
|
|
629
|
+
total += 8;
|
|
630
|
+
boosts.push('Zero network activity despite code promise');
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// +6 if console errors may explain why action didn't fire
|
|
634
|
+
if (evidenceSignals.consoleErrors) {
|
|
635
|
+
total += 6;
|
|
636
|
+
boosts.push('Console errors may have prevented action');
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
return total;
|
|
640
|
+
}
|
|
285
641
|
|
|
286
|
-
function
|
|
642
|
+
function penalizeMissingNetworkAction({ evidenceSignals, penalties }) {
|
|
287
643
|
let total = 0;
|
|
288
644
|
|
|
289
|
-
//
|
|
290
|
-
if (
|
|
291
|
-
const slowestDuration = networkSummary.slowRequests?.[0]?.duration || 0;
|
|
292
|
-
points.plus.slowRequest = 15;
|
|
645
|
+
// -15 if there WAS network activity (promise may be fulfilled differently)
|
|
646
|
+
if (evidenceSignals.networkFailed) {
|
|
293
647
|
total += 15;
|
|
294
|
-
|
|
648
|
+
penalties.push('Other network requests occurred');
|
|
295
649
|
}
|
|
296
650
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
651
|
+
return total;
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
function scoreMissingStateAction({ expectation: _expectation, expectationStrength, evidenceSignals, boosts, penalties: _penalties }) {
|
|
655
|
+
let total = 0;
|
|
656
|
+
|
|
657
|
+
// +10 if PROVEN expectation
|
|
658
|
+
if (expectationStrength === 'PROVEN') {
|
|
300
659
|
total += 10;
|
|
301
|
-
|
|
660
|
+
boosts.push('State mutation proven via cross-file analysis');
|
|
302
661
|
}
|
|
303
662
|
|
|
304
|
-
// +
|
|
305
|
-
if (!
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
reasons.push('No loading indicator shown');
|
|
663
|
+
// +8 if no DOM changes
|
|
664
|
+
if (!evidenceSignals.domChanged) {
|
|
665
|
+
total += 8;
|
|
666
|
+
boosts.push('DOM unchanged (no state mutation visible)');
|
|
309
667
|
}
|
|
310
668
|
|
|
311
669
|
return total;
|
|
312
670
|
}
|
|
313
671
|
|
|
314
|
-
function
|
|
672
|
+
function penalizeMissingStateAction({ evidenceSignals, penalties }) {
|
|
315
673
|
let total = 0;
|
|
316
674
|
|
|
317
|
-
// -10 if
|
|
318
|
-
if (
|
|
319
|
-
points.minus.hasLoadingFeedback = 10;
|
|
675
|
+
// -10 if network activity (async state update possible)
|
|
676
|
+
if (evidenceSignals.networkFailed) {
|
|
320
677
|
total += 10;
|
|
321
|
-
|
|
678
|
+
penalties.push('Network activity (deferred state update possible)');
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
// -8 if UI feedback
|
|
682
|
+
if (evidenceSignals.uiFeedbackDetected) {
|
|
683
|
+
total += 8;
|
|
684
|
+
penalties.push('UI feedback suggests state managed differently');
|
|
322
685
|
}
|
|
323
686
|
|
|
324
687
|
return total;
|
|
325
688
|
}
|
|
326
689
|
|
|
327
|
-
//
|
|
328
|
-
|
|
329
|
-
function scoreNoEffectSilentFailure({ expectation, comparisons, networkSummary, hasAnyFeedback, points, reasons }) {
|
|
690
|
+
// NAVIGATION INTELLIGENCE v2: Navigation failure scoring
|
|
691
|
+
function scoreNavigationSilentFailure({ expectation: _expectation, expectationStrength: _expectationStrength, evidenceSignals, boosts, penalties: _penalties }) {
|
|
330
692
|
let total = 0;
|
|
331
693
|
|
|
332
|
-
// +
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
if (expectsNavigation && !comparisons.hasUrlChange) {
|
|
337
|
-
points.plus.expectedNavNoUrl = 15;
|
|
338
|
-
total += 15;
|
|
339
|
-
reasons.push('Expected navigation did not occur');
|
|
694
|
+
// +10 if URL should have changed but didn't
|
|
695
|
+
if (!evidenceSignals.urlChanged) {
|
|
696
|
+
total += 10;
|
|
697
|
+
boosts.push('Expected URL change did not occur');
|
|
340
698
|
}
|
|
341
699
|
|
|
342
|
-
// +
|
|
343
|
-
if (!
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
reasons.push('No DOM changes detected');
|
|
700
|
+
// +8 if no UI feedback
|
|
701
|
+
if (!evidenceSignals.uiFeedbackDetected) {
|
|
702
|
+
total += 8;
|
|
703
|
+
boosts.push('No user-visible feedback on navigation failure');
|
|
347
704
|
}
|
|
348
705
|
|
|
349
|
-
// +
|
|
350
|
-
if (
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
reasons.push('No visible changes in screenshot');
|
|
706
|
+
// +6 if console errors (navigation errors logged)
|
|
707
|
+
if (evidenceSignals.consoleErrors) {
|
|
708
|
+
total += 6;
|
|
709
|
+
boosts.push('Navigation errors in console');
|
|
354
710
|
}
|
|
355
711
|
|
|
356
712
|
return total;
|
|
357
713
|
}
|
|
358
714
|
|
|
359
|
-
function
|
|
715
|
+
function penalizeNavigationSilentFailure({ evidenceSignals, penalties }) {
|
|
360
716
|
let total = 0;
|
|
361
717
|
|
|
362
|
-
// -10 if
|
|
363
|
-
if (
|
|
364
|
-
points.minus.hasNetworkActivity = 10;
|
|
718
|
+
// -10 if UI feedback present (shouldn't be silent failure)
|
|
719
|
+
if (evidenceSignals.uiFeedbackDetected) {
|
|
365
720
|
total += 10;
|
|
366
|
-
|
|
721
|
+
penalties.push('UI feedback detected (suggests navigation feedback provided)');
|
|
367
722
|
}
|
|
368
723
|
|
|
369
|
-
// -
|
|
370
|
-
if (
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
reasons.push('UI signal changed (potential effect)');
|
|
724
|
+
// -5 if URL changed (navigation might have succeeded)
|
|
725
|
+
if (evidenceSignals.urlChanged) {
|
|
726
|
+
total += 5;
|
|
727
|
+
penalties.push('URL changed (navigation may have succeeded)');
|
|
374
728
|
}
|
|
375
729
|
|
|
376
730
|
return total;
|
|
377
731
|
}
|
|
378
732
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
function detectErrorFeedback(uiSignals) {
|
|
382
|
-
const before = uiSignals.before || {};
|
|
383
|
-
const after = uiSignals.after || {};
|
|
384
|
-
const changes = uiSignals.changes || {};
|
|
733
|
+
function scorePartialNavigationFailure({ expectation: _expectation, expectationStrength: _expectationStrength, evidenceSignals, boosts, penalties: _penalties }) {
|
|
734
|
+
let total = 0;
|
|
385
735
|
|
|
386
|
-
//
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
736
|
+
// +10 if history changed but target not reached
|
|
737
|
+
if (evidenceSignals.urlChanged && !evidenceSignals.uiFeedbackDetected) {
|
|
738
|
+
total += 10;
|
|
739
|
+
boosts.push('Navigation started but target not reached');
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
// +8 if no UI feedback
|
|
743
|
+
if (!evidenceSignals.uiFeedbackDetected) {
|
|
744
|
+
total += 8;
|
|
745
|
+
boosts.push('No user-visible feedback on partial navigation');
|
|
746
|
+
}
|
|
394
747
|
|
|
395
|
-
|
|
396
|
-
return after.hasLoadingIndicator ||
|
|
397
|
-
(before.hasLoadingIndicator !== after.hasLoadingIndicator);
|
|
748
|
+
return total;
|
|
398
749
|
}
|
|
399
750
|
|
|
400
|
-
function
|
|
401
|
-
|
|
751
|
+
function penalizePartialNavigationFailure({ evidenceSignals, penalties }) {
|
|
752
|
+
let total = 0;
|
|
402
753
|
|
|
403
|
-
//
|
|
404
|
-
|
|
754
|
+
// -10 if UI feedback present (shouldn't be partial failure)
|
|
755
|
+
if (evidenceSignals.uiFeedbackDetected) {
|
|
756
|
+
total += 10;
|
|
757
|
+
penalties.push('UI feedback detected (suggests navigation feedback provided)');
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
return total;
|
|
405
761
|
}
|
|
406
762
|
|
|
407
|
-
|
|
763
|
+
// ============================================================
|
|
764
|
+
// EXPLANATION GENERATION (ORDERED BY IMPORTANCE)
|
|
765
|
+
// ============================================================
|
|
408
766
|
|
|
409
|
-
|
|
410
|
-
|
|
767
|
+
function generateExplanations(boosts, penalties, expectationStrength, _evidenceSignals) {
|
|
768
|
+
const explain = [];
|
|
411
769
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
reasons.push('Code contract proven via AST analysis');
|
|
417
|
-
}
|
|
770
|
+
// Add penalties first (most important negatives)
|
|
771
|
+
if (penalties.length > 0) {
|
|
772
|
+
explain.push(...penalties);
|
|
773
|
+
}
|
|
418
774
|
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
reasons.push('JavaScript errors may have prevented request');
|
|
424
|
-
}
|
|
775
|
+
// Add boosts (evidence in favor)
|
|
776
|
+
if (boosts.length > 0) {
|
|
777
|
+
explain.push(...boosts);
|
|
778
|
+
}
|
|
425
779
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
reasons.push('Zero network activity despite code promise');
|
|
431
|
-
}
|
|
780
|
+
// Add expectation strength note if not proven
|
|
781
|
+
if (expectationStrength !== 'PROVEN') {
|
|
782
|
+
explain.push(`Expectation: ${expectationStrength}`);
|
|
783
|
+
}
|
|
432
784
|
|
|
433
|
-
|
|
785
|
+
// Remove duplicates while preserving order
|
|
786
|
+
const seen = new Set();
|
|
787
|
+
const unique = [];
|
|
788
|
+
for (const item of explain) {
|
|
789
|
+
if (!seen.has(item)) {
|
|
790
|
+
seen.add(item);
|
|
791
|
+
unique.push(item);
|
|
792
|
+
}
|
|
434
793
|
}
|
|
794
|
+
|
|
795
|
+
return unique;
|
|
796
|
+
}
|
|
435
797
|
|
|
436
|
-
|
|
437
|
-
|
|
798
|
+
/**
|
|
799
|
+
* Generate confidence explanation for Phase 9: Reality Confidence & Explanation Layer.
|
|
800
|
+
* Provides whyThisConfidence, whatWouldIncreaseConfidence, whatWouldReduceConfidence.
|
|
801
|
+
* Phase 3: Also includes boundaryExplanation for near-threshold decisions.
|
|
802
|
+
*/
|
|
803
|
+
function generateConfidenceExplanation({
|
|
804
|
+
level,
|
|
805
|
+
score: _score,
|
|
806
|
+
expectationStrength,
|
|
807
|
+
sensorsPresent,
|
|
808
|
+
allSensorsPresent,
|
|
809
|
+
evidenceSignals: _evidenceSignals,
|
|
810
|
+
boosts,
|
|
811
|
+
penalties,
|
|
812
|
+
attemptMeta,
|
|
813
|
+
boundaryExplanation = null // Phase 3: Optional boundary reasoning
|
|
814
|
+
}) {
|
|
815
|
+
const whyThisConfidence = [];
|
|
816
|
+
const whatWouldIncreaseConfidence = [];
|
|
817
|
+
const whatWouldReduceConfidence = [];
|
|
818
|
+
|
|
819
|
+
// Phase 3: If near threshold, include boundary reasoning first
|
|
820
|
+
if (boundaryExplanation) {
|
|
821
|
+
whyThisConfidence.push(boundaryExplanation);
|
|
822
|
+
}
|
|
438
823
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
824
|
+
// WHY THIS CONFIDENCE: Explain current level
|
|
825
|
+
if (level === 'HIGH') {
|
|
826
|
+
whyThisConfidence.push('High confidence: expectation is proven and all sensors captured evidence');
|
|
827
|
+
if (expectationStrength === 'PROVEN') {
|
|
828
|
+
whyThisConfidence.push('Expectation is proven from source code');
|
|
829
|
+
}
|
|
830
|
+
if (allSensorsPresent) {
|
|
831
|
+
whyThisConfidence.push('All sensors (network, console, UI) were active');
|
|
444
832
|
}
|
|
833
|
+
if (boosts.length > 0) {
|
|
834
|
+
whyThisConfidence.push(`Strong evidence: ${boosts.length} positive signal(s)`);
|
|
835
|
+
}
|
|
836
|
+
} else if (level === 'MEDIUM') {
|
|
837
|
+
whyThisConfidence.push('Medium confidence: some evidence suggests a failure, but uncertainty remains');
|
|
838
|
+
if (expectationStrength === 'PROVEN') {
|
|
839
|
+
whyThisConfidence.push('Expectation is proven from source code');
|
|
840
|
+
} else {
|
|
841
|
+
whyThisConfidence.push(`Expectation strength: ${expectationStrength} (not proven)`);
|
|
842
|
+
}
|
|
843
|
+
if (!allSensorsPresent) {
|
|
844
|
+
const missing = [];
|
|
845
|
+
if (!sensorsPresent.network) missing.push('network');
|
|
846
|
+
if (!sensorsPresent.console) missing.push('console');
|
|
847
|
+
if (!sensorsPresent.ui) missing.push('UI');
|
|
848
|
+
whyThisConfidence.push(`Missing sensor data: ${missing.join(', ')}`);
|
|
849
|
+
}
|
|
850
|
+
if (penalties.length > 0) {
|
|
851
|
+
whyThisConfidence.push(`Reducing factors: ${penalties.length} uncertainty signal(s)`);
|
|
852
|
+
}
|
|
853
|
+
} else {
|
|
854
|
+
whyThisConfidence.push('Low confidence: limited evidence or expectation not proven');
|
|
855
|
+
if (expectationStrength !== 'PROVEN') {
|
|
856
|
+
whyThisConfidence.push(`Expectation strength: ${expectationStrength} (not proven from code)`);
|
|
857
|
+
}
|
|
858
|
+
if (!allSensorsPresent) {
|
|
859
|
+
whyThisConfidence.push('Some sensors were not active, reducing confidence');
|
|
860
|
+
}
|
|
861
|
+
if (attemptMeta && !attemptMeta.repeated) {
|
|
862
|
+
whyThisConfidence.push('Not repeated (single observation may be unreliable)');
|
|
863
|
+
}
|
|
864
|
+
}
|
|
445
865
|
|
|
446
|
-
|
|
866
|
+
// WHAT WOULD INCREASE CONFIDENCE
|
|
867
|
+
if (level !== 'HIGH') {
|
|
868
|
+
if (expectationStrength !== 'PROVEN') {
|
|
869
|
+
whatWouldIncreaseConfidence.push('Make the expectation proven by adding explicit code that promises the behavior');
|
|
870
|
+
}
|
|
871
|
+
if (!allSensorsPresent) {
|
|
872
|
+
const missing = [];
|
|
873
|
+
if (!sensorsPresent.network) missing.push('network monitoring');
|
|
874
|
+
if (!sensorsPresent.console) missing.push('console error detection');
|
|
875
|
+
if (!sensorsPresent.ui) missing.push('UI change detection');
|
|
876
|
+
whatWouldIncreaseConfidence.push(`Enable missing sensors: ${missing.join(', ')}`);
|
|
877
|
+
}
|
|
878
|
+
if (attemptMeta && !attemptMeta.repeated && level === 'LOW') {
|
|
879
|
+
whatWouldIncreaseConfidence.push('Repeat the interaction multiple times to confirm consistency');
|
|
880
|
+
}
|
|
881
|
+
if (boosts.length === 0) {
|
|
882
|
+
whatWouldIncreaseConfidence.push('Add stronger evidence signals (network requests, console errors, UI changes)');
|
|
883
|
+
}
|
|
447
884
|
}
|
|
448
|
-
|
|
449
|
-
//
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
// +15 if PROVEN expectation with handlerRef (TS cross-file proof)
|
|
455
|
-
if (expectation?.proof === 'PROVEN_EXPECTATION' && attemptMeta.handlerRef) {
|
|
456
|
-
points.plus.provenHandlerRef = 15;
|
|
457
|
-
total += 15;
|
|
458
|
-
reasons.push('State mutation proven via TS cross-file analysis');
|
|
885
|
+
|
|
886
|
+
// WHAT WOULD REDUCE CONFIDENCE
|
|
887
|
+
if (level !== 'LOW') {
|
|
888
|
+
if (expectationStrength === 'PROVEN') {
|
|
889
|
+
whatWouldReduceConfidence.push('If expectation becomes unproven (code changes, expectation removed)');
|
|
459
890
|
}
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
const stateUI = sensors.stateUI || {};
|
|
463
|
-
if (stateUI.changed === false) {
|
|
464
|
-
points.plus.noStateChange = 10;
|
|
465
|
-
total += 10;
|
|
466
|
-
reasons.push('State UI signals show no change');
|
|
891
|
+
if (allSensorsPresent) {
|
|
892
|
+
whatWouldReduceConfidence.push('If sensors become unavailable or disabled');
|
|
467
893
|
}
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
if (comparisons.hasDomChange === false) {
|
|
471
|
-
points.plus.noDomChange = 5;
|
|
472
|
-
total += 5;
|
|
473
|
-
reasons.push('DOM unchanged despite promised state mutation');
|
|
894
|
+
if (boosts.length > 0) {
|
|
895
|
+
whatWouldReduceConfidence.push('If positive evidence signals disappear (network succeeds, UI feedback appears)');
|
|
474
896
|
}
|
|
475
|
-
|
|
476
|
-
return total;
|
|
477
897
|
}
|
|
898
|
+
if (penalties.length === 0 && level === 'HIGH') {
|
|
899
|
+
whatWouldReduceConfidence.push('If uncertainty factors appear (URL changes, partial effects, missing data)');
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
return {
|
|
903
|
+
whyThisConfidence: whyThisConfidence.length > 0 ? whyThisConfidence : ['Confidence based on available evidence'],
|
|
904
|
+
whatWouldIncreaseConfidence: whatWouldIncreaseConfidence.length > 0 ? whatWouldIncreaseConfidence : ['Already at maximum confidence for available evidence'],
|
|
905
|
+
whatWouldReduceConfidence: whatWouldReduceConfidence.length > 0 ? whatWouldReduceConfidence : ['No factors would reduce confidence further']
|
|
906
|
+
};
|
|
907
|
+
}
|
|
478
908
|
|
|
479
|
-
|
|
480
|
-
|
|
909
|
+
// ============================================================
|
|
910
|
+
// LEGACY EXPORTS (FOR BACKWARD COMPATIBILITY)
|
|
911
|
+
// ============================================================
|
|
481
912
|
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
points.minus.hadNetworkActivity = 10;
|
|
485
|
-
total += 10;
|
|
486
|
-
reasons.push('Network activity may be causing deferred state update');
|
|
487
|
-
}
|
|
913
|
+
// PHASE 3: Export sensor validation functions for testing
|
|
914
|
+
export { hasNetworkData, hasConsoleData, hasUiData };
|
|
488
915
|
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
}
|
|
916
|
+
// Detect error feedback (legacy helper)
|
|
917
|
+
function _detectErrorFeedback(uiSignals) {
|
|
918
|
+
const before = uiSignals?.before || {};
|
|
919
|
+
const after = uiSignals?.after || {};
|
|
920
|
+
return after.hasErrorSignal && !before.hasErrorSignal;
|
|
921
|
+
}
|
|
496
922
|
|
|
497
|
-
|
|
498
|
-
|
|
923
|
+
// Detect loading feedback (legacy helper)
|
|
924
|
+
function _detectLoadingFeedback(uiSignals) {
|
|
925
|
+
const after = uiSignals?.after || {};
|
|
926
|
+
return after.hasLoadingIndicator;
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
// Detect status feedback (legacy helper)
|
|
930
|
+
function _detectStatusFeedback(uiSignals) {
|
|
931
|
+
const after = uiSignals?.after || {};
|
|
932
|
+
return after.hasStatusSignal || after.hasLiveRegion || after.hasDialog;
|
|
933
|
+
}
|