@odavl/guardian 0.1.0-rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/LICENSE +21 -0
- package/README.md +141 -0
- package/bin/guardian.js +690 -0
- package/flows/example-login-flow.json +36 -0
- package/flows/example-signup-flow.json +44 -0
- package/guardian-contract-v1.md +149 -0
- package/guardian.config.json +54 -0
- package/guardian.policy.json +12 -0
- package/guardian.profile.docs.yaml +18 -0
- package/guardian.profile.ecommerce.yaml +17 -0
- package/guardian.profile.marketing.yaml +18 -0
- package/guardian.profile.saas.yaml +21 -0
- package/package.json +69 -0
- package/policies/enterprise.json +12 -0
- package/policies/saas.json +12 -0
- package/policies/startup.json +12 -0
- package/src/guardian/attempt-engine.js +454 -0
- package/src/guardian/attempt-registry.js +227 -0
- package/src/guardian/attempt-reporter.js +507 -0
- package/src/guardian/attempt.js +227 -0
- package/src/guardian/auto-attempt-builder.js +283 -0
- package/src/guardian/baseline-reporter.js +143 -0
- package/src/guardian/baseline-storage.js +285 -0
- package/src/guardian/baseline.js +492 -0
- package/src/guardian/behavioral-signals.js +261 -0
- package/src/guardian/breakage-intelligence.js +223 -0
- package/src/guardian/browser.js +92 -0
- package/src/guardian/cli-summary.js +141 -0
- package/src/guardian/crawler.js +142 -0
- package/src/guardian/discovery-engine.js +661 -0
- package/src/guardian/enhanced-html-reporter.js +305 -0
- package/src/guardian/failure-taxonomy.js +169 -0
- package/src/guardian/flow-executor.js +374 -0
- package/src/guardian/flow-registry.js +67 -0
- package/src/guardian/html-reporter.js +414 -0
- package/src/guardian/index.js +218 -0
- package/src/guardian/init-command.js +139 -0
- package/src/guardian/junit-reporter.js +264 -0
- package/src/guardian/market-criticality.js +335 -0
- package/src/guardian/market-reporter.js +305 -0
- package/src/guardian/network-trace.js +178 -0
- package/src/guardian/policy.js +357 -0
- package/src/guardian/preset-loader.js +148 -0
- package/src/guardian/reality.js +547 -0
- package/src/guardian/reporter.js +181 -0
- package/src/guardian/root-cause-analysis.js +171 -0
- package/src/guardian/safety.js +248 -0
- package/src/guardian/scan-presets.js +60 -0
- package/src/guardian/screenshot.js +152 -0
- package/src/guardian/sitemap.js +225 -0
- package/src/guardian/snapshot-schema.js +266 -0
- package/src/guardian/snapshot.js +327 -0
- package/src/guardian/validators.js +323 -0
- package/src/guardian/visual-diff.js +247 -0
- package/src/guardian/webhook.js +206 -0
|
@@ -0,0 +1,661 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Discovery Engine (Phase 4)
|
|
3
|
+
*
|
|
4
|
+
* Auto-discovers real user interactions deterministically.
|
|
5
|
+
* - Crawls pages up to limit
|
|
6
|
+
* - Extracts candidate interactions: links, buttons, forms
|
|
7
|
+
* - Applies safety model: DENY risky, ALLOW safe
|
|
8
|
+
* - Executes safe interactions in safe exploration mode
|
|
9
|
+
* - Captures outcomes: success, friction, failure
|
|
10
|
+
*
|
|
11
|
+
* NO AI. Pure deterministic heuristics.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const url = require('url');
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* @typedef {Object} DiscoveryConfig
|
|
18
|
+
* @property {string} baseUrl - base URL to start from
|
|
19
|
+
* @property {number} [maxPages=25] - max pages to visit
|
|
20
|
+
* @property {number} [maxInteractionsPerPage=10] - max interactions per page
|
|
21
|
+
* @property {boolean} [executeInteractions=false] - whether to actually execute interactions
|
|
22
|
+
* @property {number} [timeout=20000] - ms timeout per interaction
|
|
23
|
+
* @property {Array<string>} [startUrls] - additional URLs to start from
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* @typedef {Object} Interaction
|
|
28
|
+
* @property {string} interactionId - unique ID (e.g., "btn-checkout-0")
|
|
29
|
+
* @property {string} pageUrl - URL where interaction was found
|
|
30
|
+
* @property {string} type - 'NAVIGATE' (link), 'CLICK' (button), 'FORM_FILL' (form)
|
|
31
|
+
* @property {string} interactionClass - 'NAVIGATION', 'ACTION', 'SUBMISSION', 'TOGGLE' (Phase 2)
|
|
32
|
+
* @property {string} selector - CSS/XPath selector to find element
|
|
33
|
+
* @property {string} [selectorStrategy] - 'css' or 'xpath'
|
|
34
|
+
* @property {string} [text] - visible text of element
|
|
35
|
+
* @property {string} [ariaLabel] - aria-label if present
|
|
36
|
+
* @property {string} [href] - for NAVIGATE type
|
|
37
|
+
* @property {boolean} [isRisky] - true if matches deny patterns
|
|
38
|
+
* @property {string} [riskReason] - why it's risky
|
|
39
|
+
* @property {string} [targetUrl] - where link goes (for NAVIGATE)
|
|
40
|
+
* @property {boolean} [isFormSafe] - true if data-guardian-safe="true" or known safe form
|
|
41
|
+
* @property {Array<string>} [formFields] - field types in form (email, text, password)
|
|
42
|
+
* @property {number} [confidenceScore] - 0-100, higher = more confident it's safe/useful (Phase 2)
|
|
43
|
+
*/
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* @typedef {Object} InteractionResult
|
|
47
|
+
* @property {string} interactionId - ID of interaction executed
|
|
48
|
+
* @property {string} pageUrl - URL where it was found
|
|
49
|
+
* @property {string} type - NAVIGATE/CLICK/FORM_FILL
|
|
50
|
+
* @property {string} selector - selector used
|
|
51
|
+
* @property {string} outcome - 'SUCCESS', 'FAILURE', 'FRICTION'
|
|
52
|
+
* @property {string} [notes] - details (e.g., target URL, error message)
|
|
53
|
+
* @property {number} [durationMs] - execution time
|
|
54
|
+
* @property {string} [errorMessage] - if FAILURE
|
|
55
|
+
* @property {string} [evidencePath] - path to screenshot if captured
|
|
56
|
+
*/
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* @typedef {Object} DiscoveryResult
|
|
60
|
+
* @property {string[]} pagesVisited - URLs of all pages visited
|
|
61
|
+
* @property {number} pagesVisitedCount - total count
|
|
62
|
+
* @property {number} interactionsDiscovered - total count of candidates
|
|
63
|
+
* @property {number} interactionsExecuted - total count executed
|
|
64
|
+
* @property {number} interactionsByType - { NAVIGATE: n, CLICK: n, FORM_FILL: n }
|
|
65
|
+
* @property {number} interactionsByRisk - { risky: n, safe: n }
|
|
66
|
+
* @property {Interaction[]} interactions - all discovered interactions (Phase 2)
|
|
67
|
+
* @property {InteractionResult[]} results - detailed outcomes (failures + notable successes)
|
|
68
|
+
* @property {string} [summary] - human-readable summary
|
|
69
|
+
*/
|
|
70
|
+
|
|
71
|
+
// ============================================================================
|
|
72
|
+
// SAFETY MODEL (NON-NEGOTIABLE)
|
|
73
|
+
// ============================================================================
|
|
74
|
+
|
|
75
|
+
const RISKY_TEXT_PATTERNS = [
|
|
76
|
+
'delete', 'remove', 'logout', 'log out', 'sign out', 'signout',
|
|
77
|
+
'unsubscribe', 'cancel', 'cancel order', 'payment', 'buy', 'order',
|
|
78
|
+
'confirm purchase', 'purchase', 'checkout', 'pay now', 'place order',
|
|
79
|
+
'close account', 'deactivate'
|
|
80
|
+
];
|
|
81
|
+
|
|
82
|
+
const RISKY_HREF_PATTERNS = [
|
|
83
|
+
/\/logout\b/, /\/log-out\b/, /\/signout\b/, /\/sign-out\b/,
|
|
84
|
+
/\/delete\b/, /\/remove\b/, /\/unsubscribe\b/, /\/cancel\b/,
|
|
85
|
+
/\/checkout\b/, /\/pay\b/, /\/payment\b/, /\/purchase\b/,
|
|
86
|
+
/\/admin\b/, /\/admin\//, /\/settings\/danger\b/,
|
|
87
|
+
/\/account\/close\b/, /\/deactivate\b/
|
|
88
|
+
];
|
|
89
|
+
|
|
90
|
+
const KNOWN_SAFE_FORMS = [
|
|
91
|
+
'newsletter', 'newsletter_signup', 'contact', 'contact_form',
|
|
92
|
+
'search', 'subscribe', 'login', 'signup', 'register'
|
|
93
|
+
];
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Check if element text/label contains risky keywords
|
|
97
|
+
*/
|
|
98
|
+
function isRiskyText(text) {
|
|
99
|
+
if (!text) return false;
|
|
100
|
+
const lower = text.toLowerCase().trim();
|
|
101
|
+
return RISKY_TEXT_PATTERNS.some(pattern => lower.includes(pattern));
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Check if href contains risky patterns
|
|
106
|
+
*/
|
|
107
|
+
function isRiskyHref(href) {
|
|
108
|
+
if (!href) return false;
|
|
109
|
+
return RISKY_HREF_PATTERNS.some(pattern => pattern.test(href));
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Evaluate interaction risk
|
|
114
|
+
*/
|
|
115
|
+
function assessInteractionRisk(interaction, baseUrl) {
|
|
116
|
+
const { text = '', ariaLabel = '', href = '', type, formFields = [] } = interaction;
|
|
117
|
+
|
|
118
|
+
// NAVIGATE (links): mostly safe by default
|
|
119
|
+
if (type === 'NAVIGATE') {
|
|
120
|
+
if (isRiskyHref(href)) {
|
|
121
|
+
return { isRisky: true, reason: `Href matches risk pattern: ${href}` };
|
|
122
|
+
}
|
|
123
|
+
return { isRisky: false, reason: '' };
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// CLICK (buttons): check text and aria-label
|
|
127
|
+
if (type === 'CLICK') {
|
|
128
|
+
if (isRiskyText(text) || isRiskyText(ariaLabel)) {
|
|
129
|
+
return {
|
|
130
|
+
isRisky: true,
|
|
131
|
+
reason: `Button text/label risky: "${text || ariaLabel}"`
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
return { isRisky: false, reason: '' };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// FORM_FILL: check if form is explicitly safe or known
|
|
138
|
+
if (type === 'FORM_FILL') {
|
|
139
|
+
// Always allow if marked data-guardian-safe="true"
|
|
140
|
+
if (interaction.isFormSafe) {
|
|
141
|
+
return { isRisky: false, reason: 'Form marked data-guardian-safe="true"' };
|
|
142
|
+
}
|
|
143
|
+
// Allow if matches known safe form patterns
|
|
144
|
+
const formId = interaction.formId || '';
|
|
145
|
+
if (KNOWN_SAFE_FORMS.some(pattern => formId.includes(pattern))) {
|
|
146
|
+
return { isRisky: false, reason: `Form matches known safe pattern: ${formId}` };
|
|
147
|
+
}
|
|
148
|
+
// Otherwise risky (don't fill unknown forms)
|
|
149
|
+
return { isRisky: true, reason: 'Unknown form - not marked safe' };
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return { isRisky: false, reason: '' };
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Phase 2: Classify interaction into high-level category
|
|
157
|
+
* - NAVIGATION: links, menu items (changes page)
|
|
158
|
+
* - ACTION: buttons without submission (modal, accordion, etc.)
|
|
159
|
+
* - SUBMISSION: forms with submit buttons
|
|
160
|
+
* - TOGGLE: language/theme switches
|
|
161
|
+
*/
|
|
162
|
+
function classifyInteraction(interaction) {
|
|
163
|
+
const { type, text = '', ariaLabel = '', href = '', formFields = [] } = interaction;
|
|
164
|
+
const combinedText = `${text} ${ariaLabel}`.toLowerCase();
|
|
165
|
+
|
|
166
|
+
// TOGGLE detection: language, theme, etc.
|
|
167
|
+
if (combinedText.includes('language') || combinedText.includes('lang') ||
|
|
168
|
+
combinedText.includes('theme') || combinedText.includes('dark mode') ||
|
|
169
|
+
combinedText.includes('light mode')) {
|
|
170
|
+
return 'TOGGLE';
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// NAVIGATE: all links
|
|
174
|
+
if (type === 'NAVIGATE') {
|
|
175
|
+
return 'NAVIGATION';
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// SUBMISSION: forms
|
|
179
|
+
if (type === 'FORM_FILL') {
|
|
180
|
+
return 'SUBMISSION';
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// CLICK: categorize as ACTION by default (could be modal, accordion, etc.)
|
|
184
|
+
if (type === 'CLICK') {
|
|
185
|
+
// If button text suggests submission, classify as SUBMISSION
|
|
186
|
+
if (combinedText.includes('submit') || combinedText.includes('send') ||
|
|
187
|
+
combinedText.includes('post') || combinedText.includes('save')) {
|
|
188
|
+
return 'SUBMISSION';
|
|
189
|
+
}
|
|
190
|
+
return 'ACTION';
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
return 'ACTION';
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Phase 2: Calculate confidence/safety score for interaction (0-100)
|
|
198
|
+
* Higher = more confident it's safe and useful to auto-test
|
|
199
|
+
*/
|
|
200
|
+
function calculateConfidenceScore(interaction) {
|
|
201
|
+
let score = 50; // base
|
|
202
|
+
|
|
203
|
+
// Risky = 0
|
|
204
|
+
if (interaction.isRisky) return 0;
|
|
205
|
+
|
|
206
|
+
// Has clear text/label: +20
|
|
207
|
+
if (interaction.text && interaction.text.length > 2) {
|
|
208
|
+
score += 20;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Has aria-label: +10
|
|
212
|
+
if (interaction.ariaLabel && interaction.ariaLabel.length > 2) {
|
|
213
|
+
score += 10;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// NAVIGATE type: inherently safer: +10
|
|
217
|
+
if (interaction.type === 'NAVIGATE') {
|
|
218
|
+
score += 10;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Form marked safe: +20
|
|
222
|
+
if (interaction.isFormSafe) {
|
|
223
|
+
score += 20;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Cap at 100
|
|
227
|
+
return Math.min(score, 100);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
// ============================================================================
|
|
232
|
+
// DISCOVERY ENGINE
|
|
233
|
+
// ============================================================================
|
|
234
|
+
|
|
235
|
+
class DiscoveryEngine {
|
|
236
|
+
constructor(config = {}) {
|
|
237
|
+
this.baseUrl = config.baseUrl;
|
|
238
|
+
this.maxPages = config.maxPages || 25;
|
|
239
|
+
this.maxInteractionsPerPage = config.maxInteractionsPerPage || 10;
|
|
240
|
+
this.executeInteractions = config.executeInteractions || false;
|
|
241
|
+
this.timeout = config.timeout || 20000;
|
|
242
|
+
this.startUrls = config.startUrls || [this.baseUrl];
|
|
243
|
+
this.browser = config.browser;
|
|
244
|
+
this.page = null;
|
|
245
|
+
|
|
246
|
+
this.visited = new Set();
|
|
247
|
+
this.queue = [];
|
|
248
|
+
this.interactions = [];
|
|
249
|
+
this.results = [];
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Normalize URL (remove fragments, sort params)
|
|
254
|
+
*/
|
|
255
|
+
normalizeUrl(urlStr) {
|
|
256
|
+
try {
|
|
257
|
+
const u = new URL(urlStr, this.baseUrl);
|
|
258
|
+
u.hash = ''; // Remove fragments
|
|
259
|
+
return u.toString();
|
|
260
|
+
} catch {
|
|
261
|
+
return null;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Check if URL is same-origin
|
|
267
|
+
*/
|
|
268
|
+
isSameOrigin(urlStr) {
|
|
269
|
+
try {
|
|
270
|
+
const u = new URL(urlStr, this.baseUrl);
|
|
271
|
+
const base = new URL(this.baseUrl);
|
|
272
|
+
return u.origin === base.origin;
|
|
273
|
+
} catch {
|
|
274
|
+
return false;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Extract links from page
|
|
280
|
+
*/
|
|
281
|
+
async extractLinks(pageUrl, page) {
|
|
282
|
+
const candidates = [];
|
|
283
|
+
try {
|
|
284
|
+
const links = await page.evaluate(() => {
|
|
285
|
+
return Array.from(document.querySelectorAll('a[href]')).map((a, idx) => ({
|
|
286
|
+
href: a.getAttribute('href'),
|
|
287
|
+
text: a.innerText.trim().substring(0, 100),
|
|
288
|
+
ariaLabel: a.getAttribute('aria-label') || '',
|
|
289
|
+
visible: a.offsetHeight > 0 && a.offsetWidth > 0,
|
|
290
|
+
idx
|
|
291
|
+
}));
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
for (const link of links) {
|
|
295
|
+
// Skip mailto, tel, javascript, fragments-only
|
|
296
|
+
if (!link.href || link.href.startsWith('mailto:') ||
|
|
297
|
+
link.href.startsWith('tel:') || link.href.startsWith('javascript:')) {
|
|
298
|
+
continue;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// Check same-origin
|
|
302
|
+
if (!this.isSameOrigin(link.href)) {
|
|
303
|
+
continue;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// Check if not blocked
|
|
307
|
+
if (!link.visible) {
|
|
308
|
+
continue;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
const normalized = this.normalizeUrl(link.href);
|
|
312
|
+
if (!normalized) continue;
|
|
313
|
+
|
|
314
|
+
candidates.push({
|
|
315
|
+
type: 'NAVIGATE',
|
|
316
|
+
selector: `a[href="${link.href}"]`,
|
|
317
|
+
selectorStrategy: 'css',
|
|
318
|
+
text: link.text,
|
|
319
|
+
ariaLabel: link.ariaLabel,
|
|
320
|
+
href: link.href,
|
|
321
|
+
targetUrl: normalized
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
} catch (e) {
|
|
325
|
+
// Page evaluation error, skip
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
return candidates;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* Extract buttons from page
|
|
333
|
+
*/
|
|
334
|
+
async extractButtons(pageUrl, page) {
|
|
335
|
+
const candidates = [];
|
|
336
|
+
try {
|
|
337
|
+
const buttons = await page.evaluate(() => {
|
|
338
|
+
const btns = [];
|
|
339
|
+
// <button> elements
|
|
340
|
+
document.querySelectorAll('button').forEach((btn, idx) => {
|
|
341
|
+
btns.push({
|
|
342
|
+
type: 'button',
|
|
343
|
+
text: btn.innerText.trim().substring(0, 100),
|
|
344
|
+
ariaLabel: btn.getAttribute('aria-label') || '',
|
|
345
|
+
disabled: btn.disabled,
|
|
346
|
+
visible: btn.offsetHeight > 0 && btn.offsetWidth > 0,
|
|
347
|
+
idx
|
|
348
|
+
});
|
|
349
|
+
});
|
|
350
|
+
// [role=button] elements
|
|
351
|
+
document.querySelectorAll('[role="button"]').forEach((btn, idx) => {
|
|
352
|
+
btns.push({
|
|
353
|
+
type: 'role-button',
|
|
354
|
+
text: btn.innerText.trim().substring(0, 100),
|
|
355
|
+
ariaLabel: btn.getAttribute('aria-label') || '',
|
|
356
|
+
disabled: btn.hasAttribute('disabled') || btn.getAttribute('aria-disabled') === 'true',
|
|
357
|
+
visible: btn.offsetHeight > 0 && btn.offsetWidth > 0,
|
|
358
|
+
idx
|
|
359
|
+
});
|
|
360
|
+
});
|
|
361
|
+
// input[type=submit]
|
|
362
|
+
document.querySelectorAll('input[type="submit"]').forEach((btn, idx) => {
|
|
363
|
+
btns.push({
|
|
364
|
+
type: 'submit',
|
|
365
|
+
text: btn.value || btn.getAttribute('aria-label') || 'Submit',
|
|
366
|
+
ariaLabel: btn.getAttribute('aria-label') || '',
|
|
367
|
+
disabled: btn.disabled,
|
|
368
|
+
visible: btn.offsetHeight > 0 && btn.offsetWidth > 0,
|
|
369
|
+
idx
|
|
370
|
+
});
|
|
371
|
+
});
|
|
372
|
+
return btns;
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
let btnIdx = 0;
|
|
376
|
+
for (const btn of buttons) {
|
|
377
|
+
if (btn.disabled || !btn.visible) continue;
|
|
378
|
+
|
|
379
|
+
candidates.push({
|
|
380
|
+
type: 'CLICK',
|
|
381
|
+
selector: btn.type === 'button' ? `button:nth-of-type(${btn.idx + 1})` :
|
|
382
|
+
btn.type === 'submit' ? `input[type="submit"]:nth-of-type(${btn.idx + 1})` :
|
|
383
|
+
`[role="button"]:nth-of-type(${btn.idx + 1})`,
|
|
384
|
+
selectorStrategy: 'css',
|
|
385
|
+
text: btn.text,
|
|
386
|
+
ariaLabel: btn.ariaLabel
|
|
387
|
+
});
|
|
388
|
+
|
|
389
|
+
btnIdx++;
|
|
390
|
+
}
|
|
391
|
+
} catch (e) {
|
|
392
|
+
// Skip
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
return candidates;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/**
|
|
399
|
+
* Extract forms from page
|
|
400
|
+
*/
|
|
401
|
+
async extractForms(pageUrl, page) {
|
|
402
|
+
const candidates = [];
|
|
403
|
+
try {
|
|
404
|
+
const forms = await page.evaluate(() => {
|
|
405
|
+
return Array.from(document.querySelectorAll('form')).map((form, fidx) => {
|
|
406
|
+
const inputs = Array.from(form.querySelectorAll('input'));
|
|
407
|
+
const fieldTypes = new Set();
|
|
408
|
+
inputs.forEach(input => {
|
|
409
|
+
const type = input.type.toLowerCase();
|
|
410
|
+
if (['email', 'text', 'password', 'tel', 'url'].includes(type)) {
|
|
411
|
+
fieldTypes.add(type);
|
|
412
|
+
}
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
return {
|
|
416
|
+
id: form.id || form.name || `form-${fidx}`,
|
|
417
|
+
action: form.action || '',
|
|
418
|
+
fieldTypes: Array.from(fieldTypes),
|
|
419
|
+
visible: form.offsetHeight > 0 && form.offsetWidth > 0,
|
|
420
|
+
hasGuardianSafe: form.getAttribute('data-guardian-safe') === 'true',
|
|
421
|
+
inputCount: inputs.length,
|
|
422
|
+
idx: fidx
|
|
423
|
+
};
|
|
424
|
+
});
|
|
425
|
+
});
|
|
426
|
+
|
|
427
|
+
for (const form of forms) {
|
|
428
|
+
if (!form.visible || form.inputCount === 0) continue;
|
|
429
|
+
|
|
430
|
+
candidates.push({
|
|
431
|
+
type: 'FORM_FILL',
|
|
432
|
+
selector: form.id ? `form#${form.id}` : `form:nth-of-type(${form.idx + 1})`,
|
|
433
|
+
selectorStrategy: 'css',
|
|
434
|
+
formId: form.id || form.name || '',
|
|
435
|
+
formFields: form.fieldTypes,
|
|
436
|
+
isFormSafe: form.hasGuardianSafe,
|
|
437
|
+
text: `Form: ${form.id || `form-${form.idx}`}`,
|
|
438
|
+
ariaLabel: ''
|
|
439
|
+
});
|
|
440
|
+
}
|
|
441
|
+
} catch (e) {
|
|
442
|
+
// Skip
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
return candidates;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
/**
|
|
449
|
+
* Visit a page and extract interactions
|
|
450
|
+
*/
|
|
451
|
+
async visitPage(pageUrl) {
|
|
452
|
+
if (this.visited.size >= this.maxPages) return [];
|
|
453
|
+
if (this.visited.has(pageUrl)) return [];
|
|
454
|
+
|
|
455
|
+
this.visited.add(pageUrl);
|
|
456
|
+
|
|
457
|
+
try {
|
|
458
|
+
await this.page.goto(pageUrl, { waitUntil: 'networkidle', timeout: this.timeout });
|
|
459
|
+
|
|
460
|
+
const candidates = [];
|
|
461
|
+
const links = await this.extractLinks(pageUrl, this.page);
|
|
462
|
+
const buttons = await this.extractButtons(pageUrl, this.page);
|
|
463
|
+
const forms = await this.extractForms(pageUrl, this.page);
|
|
464
|
+
|
|
465
|
+
candidates.push(...links);
|
|
466
|
+
candidates.push(...buttons);
|
|
467
|
+
candidates.push(...forms);
|
|
468
|
+
|
|
469
|
+
// Limit per page
|
|
470
|
+
const limited = candidates.slice(0, this.maxInteractionsPerPage);
|
|
471
|
+
|
|
472
|
+
// Assess risk for each
|
|
473
|
+
const interactions = limited.map((cand, idx) => {
|
|
474
|
+
const riskAssess = assessInteractionRisk(cand, this.baseUrl);
|
|
475
|
+
const interactionClass = classifyInteraction(cand);
|
|
476
|
+
const confidenceScore = calculateConfidenceScore({
|
|
477
|
+
...cand,
|
|
478
|
+
isRisky: riskAssess.isRisky
|
|
479
|
+
});
|
|
480
|
+
|
|
481
|
+
return {
|
|
482
|
+
interactionId: `${cand.type.toLowerCase()}-${idx}`,
|
|
483
|
+
pageUrl,
|
|
484
|
+
...cand,
|
|
485
|
+
isRisky: riskAssess.isRisky,
|
|
486
|
+
riskReason: riskAssess.reason,
|
|
487
|
+
interactionClass,
|
|
488
|
+
confidenceScore
|
|
489
|
+
};
|
|
490
|
+
});
|
|
491
|
+
|
|
492
|
+
// Queue new NAVIGATE targets
|
|
493
|
+
for (const inter of interactions) {
|
|
494
|
+
if (inter.type === 'NAVIGATE' && inter.targetUrl && !this.visited.has(inter.targetUrl)) {
|
|
495
|
+
this.queue.push(inter.targetUrl);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
return interactions;
|
|
500
|
+
} catch (e) {
|
|
501
|
+
// Page failed to load
|
|
502
|
+
return [];
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
/**
|
|
507
|
+
* Execute a single interaction
|
|
508
|
+
*/
|
|
509
|
+
async executeInteraction(interaction, page) {
|
|
510
|
+
const startMs = Date.now();
|
|
511
|
+
try {
|
|
512
|
+
if (interaction.type === 'NAVIGATE') {
|
|
513
|
+
await page.goto(interaction.targetUrl, { waitUntil: 'networkidle2', timeout: this.timeout });
|
|
514
|
+
const durationMs = Date.now() - startMs;
|
|
515
|
+
return {
|
|
516
|
+
interactionId: interaction.interactionId,
|
|
517
|
+
pageUrl: interaction.pageUrl,
|
|
518
|
+
type: interaction.type,
|
|
519
|
+
selector: interaction.selector,
|
|
520
|
+
outcome: 'SUCCESS',
|
|
521
|
+
notes: `Navigated to ${interaction.targetUrl}`,
|
|
522
|
+
durationMs,
|
|
523
|
+
targetUrl: interaction.targetUrl
|
|
524
|
+
};
|
|
525
|
+
} else if (interaction.type === 'CLICK') {
|
|
526
|
+
const prevUrl = page.url();
|
|
527
|
+
await page.click(interaction.selector);
|
|
528
|
+
await page.waitForNavigation({ timeout: 2000 }).catch(() => {});
|
|
529
|
+
const durationMs = Date.now() - startMs;
|
|
530
|
+
const newUrl = page.url();
|
|
531
|
+
return {
|
|
532
|
+
interactionId: interaction.interactionId,
|
|
533
|
+
pageUrl: interaction.pageUrl,
|
|
534
|
+
type: interaction.type,
|
|
535
|
+
selector: interaction.selector,
|
|
536
|
+
outcome: 'SUCCESS',
|
|
537
|
+
notes: newUrl !== prevUrl ? `Navigated to ${newUrl}` : 'Click executed',
|
|
538
|
+
durationMs,
|
|
539
|
+
targetUrl: newUrl
|
|
540
|
+
};
|
|
541
|
+
} else if (interaction.type === 'FORM_FILL') {
|
|
542
|
+
// Fill visible input fields (email, text, password)
|
|
543
|
+
const filled = await page.evaluate((selector) => {
|
|
544
|
+
const form = document.querySelector(selector);
|
|
545
|
+
if (!form) return 0;
|
|
546
|
+
const inputs = form.querySelectorAll('input[type="email"], input[type="text"], input[type="password"]');
|
|
547
|
+
let filledCount = 0;
|
|
548
|
+
inputs.forEach((input, idx) => {
|
|
549
|
+
if (input.type === 'email') {
|
|
550
|
+
input.value = `test${idx}@example.com`;
|
|
551
|
+
filledCount++;
|
|
552
|
+
} else if (input.type === 'password') {
|
|
553
|
+
input.value = 'TestPass123!';
|
|
554
|
+
filledCount++;
|
|
555
|
+
} else if (input.type === 'text') {
|
|
556
|
+
input.value = `Test input ${idx}`;
|
|
557
|
+
filledCount++;
|
|
558
|
+
}
|
|
559
|
+
});
|
|
560
|
+
return filledCount;
|
|
561
|
+
}, interaction.selector);
|
|
562
|
+
|
|
563
|
+
const durationMs = Date.now() - startMs;
|
|
564
|
+
return {
|
|
565
|
+
interactionId: interaction.interactionId,
|
|
566
|
+
pageUrl: interaction.pageUrl,
|
|
567
|
+
type: interaction.type,
|
|
568
|
+
selector: interaction.selector,
|
|
569
|
+
outcome: 'SUCCESS',
|
|
570
|
+
notes: `Filled ${filled} form fields`,
|
|
571
|
+
durationMs
|
|
572
|
+
};
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
return {
|
|
576
|
+
interactionId: interaction.interactionId,
|
|
577
|
+
pageUrl: interaction.pageUrl,
|
|
578
|
+
type: interaction.type,
|
|
579
|
+
selector: interaction.selector,
|
|
580
|
+
outcome: 'FAILURE',
|
|
581
|
+
errorMessage: 'Unknown interaction type',
|
|
582
|
+
durationMs: Date.now() - startMs
|
|
583
|
+
};
|
|
584
|
+
} catch (e) {
|
|
585
|
+
const durationMs = Date.now() - startMs;
|
|
586
|
+
return {
|
|
587
|
+
interactionId: interaction.interactionId,
|
|
588
|
+
pageUrl: interaction.pageUrl,
|
|
589
|
+
type: interaction.type,
|
|
590
|
+
selector: interaction.selector,
|
|
591
|
+
outcome: 'FAILURE',
|
|
592
|
+
errorMessage: e.message.substring(0, 200),
|
|
593
|
+
durationMs
|
|
594
|
+
};
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
/**
|
|
599
|
+
* Main discovery crawl
|
|
600
|
+
*/
|
|
601
|
+
async discover(page) {
|
|
602
|
+
this.page = page;
|
|
603
|
+
this.queue = [...this.startUrls];
|
|
604
|
+
|
|
605
|
+
// BFS crawl
|
|
606
|
+
while (this.queue.length > 0 && this.visited.size < this.maxPages) {
|
|
607
|
+
const urlStr = this.queue.shift();
|
|
608
|
+
const normalized = this.normalizeUrl(urlStr);
|
|
609
|
+
if (!normalized) continue;
|
|
610
|
+
|
|
611
|
+
const pageInteractions = await this.visitPage(normalized);
|
|
612
|
+
this.interactions.push(...pageInteractions);
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
// Execute safe interactions if enabled
|
|
616
|
+
if (this.executeInteractions && this.page) {
|
|
617
|
+
const safeInteractions = this.interactions.filter(i => !i.isRisky);
|
|
618
|
+
for (const inter of safeInteractions.slice(0, 20)) { // Limit executions
|
|
619
|
+
const result = await this.executeInteraction(inter, this.page);
|
|
620
|
+
this.results.push(result);
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
return this.generateResult();
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
/**
|
|
628
|
+
* Generate summary result
|
|
629
|
+
*/
|
|
630
|
+
generateResult() {
|
|
631
|
+
const pagesVisited = Array.from(this.visited);
|
|
632
|
+
const byType = { NAVIGATE: 0, CLICK: 0, FORM_FILL: 0 };
|
|
633
|
+
const byRisk = { risky: 0, safe: 0 };
|
|
634
|
+
|
|
635
|
+
this.interactions.forEach(i => {
|
|
636
|
+
byType[i.type] = (byType[i.type] || 0) + 1;
|
|
637
|
+
if (i.isRisky) byRisk.risky++;
|
|
638
|
+
else byRisk.safe++;
|
|
639
|
+
});
|
|
640
|
+
|
|
641
|
+
// Failures and notable successes
|
|
642
|
+
const failures = this.results.filter(r => r.outcome === 'FAILURE').slice(0, 10);
|
|
643
|
+
const successes = this.results.filter(r => r.outcome === 'SUCCESS').slice(0, 5);
|
|
644
|
+
const topResults = [...failures, ...successes];
|
|
645
|
+
|
|
646
|
+
return {
|
|
647
|
+
pagesVisited,
|
|
648
|
+
pagesVisitedCount: pagesVisited.length,
|
|
649
|
+
interactionsDiscovered: this.interactions.length,
|
|
650
|
+
interactionsExecuted: this.results.length,
|
|
651
|
+
interactionsByType: byType,
|
|
652
|
+
interactionsByRisk: byRisk,
|
|
653
|
+
interactions: this.interactions,
|
|
654
|
+
results: topResults,
|
|
655
|
+
summary: `Visited ${pagesVisited.length} pages, discovered ${this.interactions.length} interactions ` +
|
|
656
|
+
`(${byRisk.safe} safe, ${byRisk.risky} risky), executed ${this.results.length}`
|
|
657
|
+
};
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
module.exports = { DiscoveryEngine, assessInteractionRisk, classifyInteraction, calculateConfidenceScore };
|