@veraxhq/verax 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -88
- package/bin/verax.js +11 -452
- package/package.json +14 -36
- package/src/cli/commands/default.js +523 -0
- package/src/cli/commands/doctor.js +165 -0
- package/src/cli/commands/inspect.js +109 -0
- package/src/cli/commands/run.js +402 -0
- package/src/cli/entry.js +196 -0
- package/src/cli/util/atomic-write.js +37 -0
- package/src/cli/util/detection-engine.js +296 -0
- package/src/cli/util/env-url.js +33 -0
- package/src/cli/util/errors.js +44 -0
- package/src/cli/util/events.js +34 -0
- package/src/cli/util/expectation-extractor.js +378 -0
- package/src/cli/util/findings-writer.js +31 -0
- package/src/cli/util/idgen.js +87 -0
- package/src/cli/util/learn-writer.js +39 -0
- package/src/cli/util/observation-engine.js +366 -0
- package/src/cli/util/observe-writer.js +25 -0
- package/src/cli/util/paths.js +29 -0
- package/src/cli/util/project-discovery.js +277 -0
- package/src/cli/util/project-writer.js +26 -0
- package/src/cli/util/redact.js +128 -0
- package/src/cli/util/run-id.js +30 -0
- package/src/cli/util/summary-writer.js +32 -0
- package/src/verax/cli/ci-summary.js +35 -0
- package/src/verax/cli/context-explanation.js +89 -0
- package/src/verax/cli/doctor.js +277 -0
- package/src/verax/cli/error-normalizer.js +154 -0
- package/src/verax/cli/explain-output.js +105 -0
- package/src/verax/cli/finding-explainer.js +130 -0
- package/src/verax/cli/init.js +237 -0
- package/src/verax/cli/run-overview.js +163 -0
- package/src/verax/cli/url-safety.js +101 -0
- package/src/verax/cli/wizard.js +98 -0
- package/src/verax/cli/zero-findings-explainer.js +57 -0
- package/src/verax/cli/zero-interaction-explainer.js +127 -0
- package/src/verax/core/action-classifier.js +86 -0
- package/src/verax/core/budget-engine.js +218 -0
- package/src/verax/core/canonical-outcomes.js +157 -0
- package/src/verax/core/decision-snapshot.js +335 -0
- package/src/verax/core/determinism-model.js +403 -0
- package/src/verax/core/incremental-store.js +237 -0
- package/src/verax/core/invariants.js +356 -0
- package/src/verax/core/promise-model.js +230 -0
- package/src/verax/core/replay-validator.js +350 -0
- package/src/verax/core/replay.js +222 -0
- package/src/verax/core/run-id.js +175 -0
- package/src/verax/core/run-manifest.js +99 -0
- package/src/verax/core/silence-impact.js +369 -0
- package/src/verax/core/silence-model.js +521 -0
- package/src/verax/detect/comparison.js +2 -34
- package/src/verax/detect/confidence-engine.js +764 -329
- package/src/verax/detect/detection-engine.js +293 -0
- package/src/verax/detect/evidence-index.js +177 -0
- package/src/verax/detect/expectation-model.js +194 -172
- package/src/verax/detect/explanation-helpers.js +187 -0
- package/src/verax/detect/finding-detector.js +450 -0
- package/src/verax/detect/findings-writer.js +44 -8
- package/src/verax/detect/flow-detector.js +366 -0
- package/src/verax/detect/index.js +172 -286
- package/src/verax/detect/interactive-findings.js +613 -0
- package/src/verax/detect/signal-mapper.js +308 -0
- package/src/verax/detect/verdict-engine.js +563 -0
- package/src/verax/evidence-index-writer.js +61 -0
- package/src/verax/index.js +90 -14
- package/src/verax/intel/effect-detector.js +368 -0
- package/src/verax/intel/handler-mapper.js +249 -0
- package/src/verax/intel/index.js +281 -0
- package/src/verax/intel/route-extractor.js +280 -0
- package/src/verax/intel/ts-program.js +256 -0
- package/src/verax/intel/vue-navigation-extractor.js +579 -0
- package/src/verax/intel/vue-router-extractor.js +323 -0
- package/src/verax/learn/action-contract-extractor.js +335 -101
- package/src/verax/learn/ast-contract-extractor.js +95 -5
- package/src/verax/learn/flow-extractor.js +172 -0
- package/src/verax/learn/manifest-writer.js +97 -47
- package/src/verax/learn/project-detector.js +40 -0
- package/src/verax/learn/route-extractor.js +27 -96
- package/src/verax/learn/state-extractor.js +212 -0
- package/src/verax/learn/static-extractor-navigation.js +114 -0
- package/src/verax/learn/static-extractor-validation.js +88 -0
- package/src/verax/learn/static-extractor.js +112 -4
- package/src/verax/learn/truth-assessor.js +24 -21
- package/src/verax/observe/aria-sensor.js +211 -0
- package/src/verax/observe/browser.js +10 -5
- package/src/verax/observe/console-sensor.js +1 -17
- package/src/verax/observe/domain-boundary.js +10 -1
- package/src/verax/observe/expectation-executor.js +512 -0
- package/src/verax/observe/flow-matcher.js +143 -0
- package/src/verax/observe/focus-sensor.js +196 -0
- package/src/verax/observe/human-driver.js +643 -275
- package/src/verax/observe/index.js +908 -27
- package/src/verax/observe/index.js.backup +1 -0
- package/src/verax/observe/interaction-discovery.js +365 -14
- package/src/verax/observe/interaction-runner.js +563 -198
- package/src/verax/observe/loading-sensor.js +139 -0
- package/src/verax/observe/navigation-sensor.js +255 -0
- package/src/verax/observe/network-sensor.js +55 -7
- package/src/verax/observe/observed-expectation-deriver.js +186 -0
- package/src/verax/observe/observed-expectation.js +305 -0
- package/src/verax/observe/page-frontier.js +234 -0
- package/src/verax/observe/settle.js +37 -17
- package/src/verax/observe/state-sensor.js +389 -0
- package/src/verax/observe/timing-sensor.js +228 -0
- package/src/verax/observe/traces-writer.js +61 -20
- package/src/verax/observe/ui-signal-sensor.js +136 -17
- package/src/verax/scan-summary-writer.js +77 -15
- package/src/verax/shared/artifact-manager.js +110 -8
- package/src/verax/shared/budget-profiles.js +136 -0
- package/src/verax/shared/ci-detection.js +39 -0
- package/src/verax/shared/config-loader.js +170 -0
- package/src/verax/shared/dynamic-route-utils.js +218 -0
- package/src/verax/shared/expectation-coverage.js +44 -0
- package/src/verax/shared/expectation-prover.js +81 -0
- package/src/verax/shared/expectation-tracker.js +201 -0
- package/src/verax/shared/expectations-writer.js +60 -0
- package/src/verax/shared/first-run.js +44 -0
- package/src/verax/shared/progress-reporter.js +171 -0
- package/src/verax/shared/retry-policy.js +14 -1
- package/src/verax/shared/root-artifacts.js +49 -0
- package/src/verax/shared/scan-budget.js +86 -0
- package/src/verax/shared/url-normalizer.js +162 -0
- package/src/verax/shared/zip-artifacts.js +65 -0
- package/src/verax/validate/context-validator.js +244 -0
- package/src/verax/validate/context-validator.js.bak +0 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL normalization for frontier deduplication and canonical form
|
|
3
|
+
* Prevents infinite frontier growth from utm_* params and other tracking/session identifiers
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Tracking parameter prefixes to remove from URLs
|
|
8
|
+
*/
|
|
9
|
+
const TRACKING_PARAM_PREFIXES = [
|
|
10
|
+
'utm_',
|
|
11
|
+
'gclid',
|
|
12
|
+
'fbclid',
|
|
13
|
+
'msclkid',
|
|
14
|
+
'click_id',
|
|
15
|
+
'session',
|
|
16
|
+
'sid',
|
|
17
|
+
'tracking',
|
|
18
|
+
'ref',
|
|
19
|
+
'source',
|
|
20
|
+
'campaign',
|
|
21
|
+
'medium'
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Check if a parameter name should be dropped
|
|
26
|
+
* @param {string} name - Parameter name
|
|
27
|
+
* @returns {boolean}
|
|
28
|
+
*/
|
|
29
|
+
function isTrackingParam(name) {
|
|
30
|
+
const lowerName = name.toLowerCase();
|
|
31
|
+
return TRACKING_PARAM_PREFIXES.some(prefix =>
|
|
32
|
+
lowerName.startsWith(prefix.toLowerCase())
|
|
33
|
+
);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Normalize a URL to canonical form:
|
|
38
|
+
* 1. Remove hash fragments
|
|
39
|
+
* 2. Sort query parameters alphabetically
|
|
40
|
+
* 3. Drop tracking/session parameters
|
|
41
|
+
* 4. Decode percent-encoding for consistency
|
|
42
|
+
* 5. Ensure protocol and host are lowercase
|
|
43
|
+
*
|
|
44
|
+
* @param {string} url - URL to normalize
|
|
45
|
+
* @returns {string} Normalized URL
|
|
46
|
+
*/
|
|
47
|
+
export function normalizeUrl(url) {
|
|
48
|
+
try {
|
|
49
|
+
const parsed = new URL(url);
|
|
50
|
+
|
|
51
|
+
// Step 1: Lowercase protocol and host
|
|
52
|
+
parsed.protocol = parsed.protocol.toLowerCase();
|
|
53
|
+
parsed.hostname = parsed.hostname.toLowerCase();
|
|
54
|
+
|
|
55
|
+
// Step 2: Remove hash
|
|
56
|
+
parsed.hash = '';
|
|
57
|
+
|
|
58
|
+
// Step 3: Remove and drop tracking params
|
|
59
|
+
const params = new URLSearchParams(parsed.search);
|
|
60
|
+
const filteredParams = new URLSearchParams();
|
|
61
|
+
|
|
62
|
+
// Sort and filter params
|
|
63
|
+
const paramEntries = Array.from(params.entries()).sort((a, b) => a[0].localeCompare(b[0]));
|
|
64
|
+
for (const [name, value] of paramEntries) {
|
|
65
|
+
if (!isTrackingParam(name)) {
|
|
66
|
+
filteredParams.append(name, value);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Step 4: Reconstruct search string (sorted)
|
|
71
|
+
parsed.search = filteredParams.toString();
|
|
72
|
+
|
|
73
|
+
// Step 5: Return full canonical URL
|
|
74
|
+
return parsed.toString();
|
|
75
|
+
} catch (err) {
|
|
76
|
+
// If URL parsing fails, return original
|
|
77
|
+
return url;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Alias for normalizeUrl for clarity in frontier deduplication contexts
|
|
83
|
+
* @param {string} url - URL to canonicalize
|
|
84
|
+
* @returns {string} Canonical URL
|
|
85
|
+
*/
|
|
86
|
+
export function canonicalizeUrl(url) {
|
|
87
|
+
return normalizeUrl(url);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Drop tracking parameters from a URL
|
|
92
|
+
* Preserves hash and other non-tracking parameters
|
|
93
|
+
*
|
|
94
|
+
* @param {string} url - URL to clean
|
|
95
|
+
* @returns {string} URL with tracking params removed
|
|
96
|
+
*/
|
|
97
|
+
export function dropTrackingParams(url) {
|
|
98
|
+
try {
|
|
99
|
+
const parsed = new URL(url);
|
|
100
|
+
const params = new URLSearchParams(parsed.search);
|
|
101
|
+
|
|
102
|
+
// Filter out tracking params
|
|
103
|
+
for (const name of params.keys()) {
|
|
104
|
+
if (isTrackingParam(name)) {
|
|
105
|
+
params.delete(name);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
parsed.search = params.toString();
|
|
110
|
+
return parsed.toString();
|
|
111
|
+
} catch (err) {
|
|
112
|
+
return url;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Check if two URLs are equivalent in canonical form
|
|
118
|
+
* @param {string} url1 - First URL
|
|
119
|
+
* @param {string} url2 - Second URL
|
|
120
|
+
* @returns {boolean} True if canonically equivalent
|
|
121
|
+
*/
|
|
122
|
+
export function areUrlsEquivalent(url1, url2) {
|
|
123
|
+
return normalizeUrl(url1) === normalizeUrl(url2);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Extract normalized domain from URL
|
|
128
|
+
* @param {string} url - URL
|
|
129
|
+
* @returns {string} Domain (protocol://hostname)
|
|
130
|
+
*/
|
|
131
|
+
export function getDomain(url) {
|
|
132
|
+
try {
|
|
133
|
+
const parsed = new URL(url);
|
|
134
|
+
return `${parsed.protocol}//${parsed.hostname}`;
|
|
135
|
+
} catch (err) {
|
|
136
|
+
return '';
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Count tracked parameters in a URL
|
|
142
|
+
* Useful for diagnostics
|
|
143
|
+
* @param {string} url - URL
|
|
144
|
+
* @returns {number} Count of tracking parameters
|
|
145
|
+
*/
|
|
146
|
+
export function countTrackingParams(url) {
|
|
147
|
+
try {
|
|
148
|
+
const parsed = new URL(url);
|
|
149
|
+
const params = new URLSearchParams(parsed.search);
|
|
150
|
+
let count = 0;
|
|
151
|
+
|
|
152
|
+
for (const name of params.keys()) {
|
|
153
|
+
if (isTrackingParam(name)) {
|
|
154
|
+
count++;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return count;
|
|
159
|
+
} catch (err) {
|
|
160
|
+
return 0;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wave 5 — Artifact Packaging
|
|
3
|
+
*
|
|
4
|
+
* Creates a zip file containing scan artifacts.
|
|
5
|
+
* Cross-platform using archiver library.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { createWriteStream } from 'fs';
|
|
9
|
+
import { dirname, resolve, basename } from 'path';
|
|
10
|
+
import { mkdirSync } from 'fs';
|
|
11
|
+
|
|
12
|
+
// Dynamic import for archiver (dev dependency)
|
|
13
|
+
let archiver;
|
|
14
|
+
async function getArchiver() {
|
|
15
|
+
if (!archiver) {
|
|
16
|
+
archiver = (await import('archiver')).default;
|
|
17
|
+
}
|
|
18
|
+
return archiver;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Create a zip file containing artifacts from a run directory
|
|
23
|
+
* @param {string} runDir - Directory containing artifacts (e.g., .verax/runs/<runId>)
|
|
24
|
+
* @param {string} outputPath - Full path where zip should be created (optional, defaults to runDir/artifacts.zip)
|
|
25
|
+
* @returns {Promise<string>} Path to created zip file
|
|
26
|
+
*/
|
|
27
|
+
export async function createArtifactsZip(runDir, outputPath = null) {
|
|
28
|
+
const Archiver = await getArchiver();
|
|
29
|
+
|
|
30
|
+
return new Promise((resolvePromise, reject) => {
|
|
31
|
+
// Determine output path
|
|
32
|
+
if (!outputPath) {
|
|
33
|
+
outputPath = resolve(runDir, 'artifacts.zip');
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Ensure parent directory exists
|
|
37
|
+
mkdirSync(dirname(outputPath), { recursive: true });
|
|
38
|
+
|
|
39
|
+
// Create write stream
|
|
40
|
+
const output = createWriteStream(outputPath);
|
|
41
|
+
const archive = Archiver('zip', {
|
|
42
|
+
zlib: { level: 9 } // Maximum compression
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
// Handle errors
|
|
46
|
+
archive.on('error', (err) => {
|
|
47
|
+
reject(err);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
output.on('close', () => {
|
|
51
|
+
resolvePromise(outputPath);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
// Pipe archive data to file
|
|
55
|
+
archive.pipe(output);
|
|
56
|
+
|
|
57
|
+
// Add directory contents to archive
|
|
58
|
+
// Use glob pattern to include all files recursively
|
|
59
|
+
archive.directory(runDir, basename(runDir), { date: new Date() });
|
|
60
|
+
|
|
61
|
+
// Finalize the archive
|
|
62
|
+
archive.finalize();
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wave 1 — Context Validator
|
|
3
|
+
*
|
|
4
|
+
* Validates that the target URL matches the project being analyzed.
|
|
5
|
+
* Checks if extracted routes exist on the live site by:
|
|
6
|
+
* 1. Fetching the homepage and parsing internal links
|
|
7
|
+
* 2. Checking if any extracted route paths match internal links
|
|
8
|
+
* 3. For SPAs, also checking if routes return 200 (SPA fallback)
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { chromium } from 'playwright';
|
|
12
|
+
import { parse } from 'node-html-parser';
|
|
13
|
+
|
|
14
|
+
const CONTEXT_CHECK_TIMEOUT_MS = 8000;
|
|
15
|
+
const MAX_ROUTES_TO_CHECK = 20;
|
|
16
|
+
const MAX_LINKS_TO_PARSE = 100;
|
|
17
|
+
|
|
18
|
+
function normalizePathForContext(path) {
|
|
19
|
+
if (!path) return '/';
|
|
20
|
+
|
|
21
|
+
let normalized = path.split('#')[0].split('?')[0];
|
|
22
|
+
if (!normalized.startsWith('/')) {
|
|
23
|
+
normalized = '/' + normalized;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Normalize common static site patterns
|
|
27
|
+
if (normalized.toLowerCase() === '/index.html') {
|
|
28
|
+
return '/';
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (normalized.toLowerCase().endsWith('.html')) {
|
|
32
|
+
normalized = normalized.slice(0, -5) || '/';
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (normalized.endsWith('/') && normalized !== '/') {
|
|
36
|
+
normalized = normalized.slice(0, -1) || '/';
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return normalized || '/';
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Validate context by checking if project routes match live site
|
|
44
|
+
* @param {Object} manifest - Manifest with routes and projectType
|
|
45
|
+
* @param {string} baseUrl - Target URL to validate against
|
|
46
|
+
* @param {boolean} forced - Whether --force flag was used
|
|
47
|
+
* @returns {Promise<Object>} Context check result
|
|
48
|
+
*/
|
|
49
|
+
export async function validateContext(manifest, baseUrl, forced = false) {
|
|
50
|
+
const publicRoutes = manifest.publicRoutes || [];
|
|
51
|
+
const projectType = manifest.projectType || 'unknown';
|
|
52
|
+
|
|
53
|
+
// If no routes extracted, context validation doesn't apply
|
|
54
|
+
if (publicRoutes.length === 0) {
|
|
55
|
+
return {
|
|
56
|
+
ran: false,
|
|
57
|
+
forced: forced,
|
|
58
|
+
verdict: null,
|
|
59
|
+
matchedRoutesCount: 0,
|
|
60
|
+
matchedLinksCount: 0,
|
|
61
|
+
sampleMatched: [],
|
|
62
|
+
reason: 'no_routes_extracted'
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// For file:// URLs (local development/testing), assume context is valid if any route is extracted
|
|
67
|
+
// since we can't reliably validate local file system paths
|
|
68
|
+
if (baseUrl.startsWith('file://')) {
|
|
69
|
+
return {
|
|
70
|
+
ran: true,
|
|
71
|
+
forced: forced,
|
|
72
|
+
verdict: 'VALID_CONTEXT',
|
|
73
|
+
matchedRoutesCount: publicRoutes.length,
|
|
74
|
+
matchedLinksCount: publicRoutes.length,
|
|
75
|
+
sampleMatched: publicRoutes.slice(0, 5),
|
|
76
|
+
reason: 'file_protocol_skip_validation'
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
let baseOrigin;
|
|
81
|
+
let basePathCandidate;
|
|
82
|
+
try {
|
|
83
|
+
const urlObj = new URL(baseUrl);
|
|
84
|
+
baseOrigin = urlObj.origin;
|
|
85
|
+
// Treat the requested URL path as a candidate route so static sites without links still match
|
|
86
|
+
const basePath = normalizePathForContext(urlObj.pathname);
|
|
87
|
+
// Include the base path in the internal link set so at least one known route can match
|
|
88
|
+
// when the homepage itself is part of the manifest.
|
|
89
|
+
basePathCandidate = basePath;
|
|
90
|
+
} catch (error) {
|
|
91
|
+
return {
|
|
92
|
+
ran: false,
|
|
93
|
+
forced: forced,
|
|
94
|
+
verdict: null,
|
|
95
|
+
matchedRoutesCount: 0,
|
|
96
|
+
matchedLinksCount: 0,
|
|
97
|
+
sampleMatched: [],
|
|
98
|
+
reason: 'invalid_url'
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Normalize route paths for comparison
|
|
103
|
+
const normalizedRoutes = publicRoutes
|
|
104
|
+
.slice(0, MAX_ROUTES_TO_CHECK)
|
|
105
|
+
.map(route => normalizePathForContext(route));
|
|
106
|
+
|
|
107
|
+
const browser = await chromium.launch({ headless: true });
|
|
108
|
+
const context = await browser.newContext({
|
|
109
|
+
viewport: { width: 1280, height: 720 }
|
|
110
|
+
});
|
|
111
|
+
const page = await context.newPage();
|
|
112
|
+
|
|
113
|
+
try {
|
|
114
|
+
// Fetch homepage and parse internal links
|
|
115
|
+
const response = await page.goto(baseUrl, {
|
|
116
|
+
waitUntil: 'domcontentloaded',
|
|
117
|
+
timeout: CONTEXT_CHECK_TIMEOUT_MS
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
await page.waitForTimeout(500); // Allow SPA to render
|
|
121
|
+
|
|
122
|
+
const html = await page.content();
|
|
123
|
+
const root = parse(html);
|
|
124
|
+
const links = root.querySelectorAll('a[href]');
|
|
125
|
+
|
|
126
|
+
// Extract internal links
|
|
127
|
+
const internalLinks = new Set();
|
|
128
|
+
if (basePathCandidate) {
|
|
129
|
+
internalLinks.add(basePathCandidate);
|
|
130
|
+
}
|
|
131
|
+
for (const link of links.slice(0, MAX_LINKS_TO_PARSE)) {
|
|
132
|
+
const href = link.getAttribute('href');
|
|
133
|
+
if (!href) continue;
|
|
134
|
+
|
|
135
|
+
try {
|
|
136
|
+
// Resolve relative URLs
|
|
137
|
+
const resolvedUrl = new URL(href, baseUrl);
|
|
138
|
+
if (resolvedUrl.origin === baseOrigin) {
|
|
139
|
+
const normalizedPath = normalizePathForContext(resolvedUrl.pathname);
|
|
140
|
+
internalLinks.add(normalizedPath);
|
|
141
|
+
}
|
|
142
|
+
} catch (e) {
|
|
143
|
+
// If href is relative, try direct path matching
|
|
144
|
+
if (href.startsWith('/') || (!href.startsWith('http') && !href.startsWith('#'))) {
|
|
145
|
+
const path = href.split('#')[0].split('?')[0];
|
|
146
|
+
const normalizedPath = normalizePathForContext(path);
|
|
147
|
+
internalLinks.add(normalizedPath);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Check route reachability for SPAs (may return 200 due to fallback)
|
|
153
|
+
const routeReachabilityChecks = [];
|
|
154
|
+
if (projectType === 'react_spa' || projectType.startsWith('nextjs_')) {
|
|
155
|
+
// Sample a few routes to check if they return 200 (SPA fallback)
|
|
156
|
+
const routesToCheck = normalizedRoutes.slice(0, 5);
|
|
157
|
+
for (const routePath of routesToCheck) {
|
|
158
|
+
const candidates = new Set([routePath]);
|
|
159
|
+
if (routePath === '/') {
|
|
160
|
+
candidates.add('/index.html');
|
|
161
|
+
} else if (!routePath.endsWith('.html')) {
|
|
162
|
+
candidates.add(`${routePath}.html`);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
for (const candidate of candidates) {
|
|
166
|
+
try {
|
|
167
|
+
const routeUrl = baseOrigin + candidate;
|
|
168
|
+
const routeResponse = await page.goto(routeUrl, {
|
|
169
|
+
waitUntil: 'domcontentloaded',
|
|
170
|
+
timeout: CONTEXT_CHECK_TIMEOUT_MS
|
|
171
|
+
});
|
|
172
|
+
if (routeResponse && routeResponse.status() >= 200 && routeResponse.status() < 300) {
|
|
173
|
+
routeReachabilityChecks.push(routePath);
|
|
174
|
+
break;
|
|
175
|
+
}
|
|
176
|
+
} catch (e) {
|
|
177
|
+
// Route check failed
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Find intersection: routes that match internal links or are reachable
|
|
184
|
+
const matchedRoutes = new Set();
|
|
185
|
+
const matchedLinks = new Set();
|
|
186
|
+
const sampleMatched = [];
|
|
187
|
+
|
|
188
|
+
for (const route of normalizedRoutes) {
|
|
189
|
+
if (internalLinks.has(route)) {
|
|
190
|
+
matchedRoutes.add(route);
|
|
191
|
+
matchedLinks.add(route);
|
|
192
|
+
if (sampleMatched.length < 5) {
|
|
193
|
+
sampleMatched.push(route);
|
|
194
|
+
}
|
|
195
|
+
} else if (routeReachabilityChecks.includes(route)) {
|
|
196
|
+
matchedRoutes.add(route);
|
|
197
|
+
if (sampleMatched.length < 5) {
|
|
198
|
+
sampleMatched.push(route);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const matchedRoutesCount = matchedRoutes.size;
|
|
204
|
+
const matchedLinksCount = matchedLinks.size;
|
|
205
|
+
const totalRoutes = normalizedRoutes.length;
|
|
206
|
+
|
|
207
|
+
// Require at least a majority of routes to match to trust the context
|
|
208
|
+
const requiredMatches = totalRoutes > 0 ? Math.max(1, Math.ceil(totalRoutes / 2)) : 0;
|
|
209
|
+
|
|
210
|
+
let verdict = null;
|
|
211
|
+
if (totalRoutes > 0 && matchedRoutesCount < requiredMatches) {
|
|
212
|
+
verdict = forced ? 'INVALID_CONTEXT_FORCED' : 'INVALID_CONTEXT';
|
|
213
|
+
} else {
|
|
214
|
+
verdict = 'VALID_CONTEXT';
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
return {
|
|
218
|
+
ran: true,
|
|
219
|
+
forced: forced,
|
|
220
|
+
verdict: verdict,
|
|
221
|
+
matchedRoutesCount: matchedRoutesCount,
|
|
222
|
+
matchedLinksCount: matchedLinksCount,
|
|
223
|
+
totalRoutesChecked: totalRoutes,
|
|
224
|
+
sampleMatched: sampleMatched,
|
|
225
|
+
internalLinksFound: internalLinks.size,
|
|
226
|
+
reason: matchedRoutesCount < requiredMatches ? 'insufficient_route_match' : 'routes_matched'
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
} catch (error) {
|
|
230
|
+
// Context check failed - can't determine validity
|
|
231
|
+
return {
|
|
232
|
+
ran: true,
|
|
233
|
+
forced: forced,
|
|
234
|
+
verdict: forced ? 'INVALID_CONTEXT_FORCED' : 'INVALID_CONTEXT',
|
|
235
|
+
matchedRoutesCount: 0,
|
|
236
|
+
matchedLinksCount: 0,
|
|
237
|
+
sampleMatched: [],
|
|
238
|
+
reason: 'context_check_failed',
|
|
239
|
+
error: error.message
|
|
240
|
+
};
|
|
241
|
+
} finally {
|
|
242
|
+
await browser.close();
|
|
243
|
+
}
|
|
244
|
+
}
|
|
File without changes
|