outcome-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -0
- package/package.json +95 -0
- package/src/agents/README.md +139 -0
- package/src/agents/adapters/anthropic.adapter.ts +166 -0
- package/src/agents/adapters/dalle.adapter.ts +145 -0
- package/src/agents/adapters/gemini.adapter.ts +134 -0
- package/src/agents/adapters/imagen.adapter.ts +106 -0
- package/src/agents/adapters/nano-banana.adapter.ts +129 -0
- package/src/agents/adapters/openai.adapter.ts +165 -0
- package/src/agents/adapters/veo.adapter.ts +130 -0
- package/src/agents/agent.schema.property.test.ts +379 -0
- package/src/agents/agent.schema.test.ts +148 -0
- package/src/agents/agent.schema.ts +263 -0
- package/src/agents/index.ts +60 -0
- package/src/agents/registered-agent.schema.ts +356 -0
- package/src/agents/registry.ts +97 -0
- package/src/agents/tournament-configs.property.test.ts +266 -0
- package/src/cli/README.md +145 -0
- package/src/cli/commands/define.ts +79 -0
- package/src/cli/commands/list.ts +46 -0
- package/src/cli/commands/logs.ts +83 -0
- package/src/cli/commands/run.ts +416 -0
- package/src/cli/commands/verify.ts +110 -0
- package/src/cli/index.ts +81 -0
- package/src/config/README.md +128 -0
- package/src/config/env.ts +262 -0
- package/src/config/index.ts +19 -0
- package/src/eval/README.md +318 -0
- package/src/eval/ai-judge.test.ts +435 -0
- package/src/eval/ai-judge.ts +368 -0
- package/src/eval/code-validators.ts +414 -0
- package/src/eval/evaluateOutcome.property.test.ts +1174 -0
- package/src/eval/evaluateOutcome.ts +591 -0
- package/src/eval/immigration-validators.ts +122 -0
- package/src/eval/index.ts +90 -0
- package/src/eval/judge-cache.ts +402 -0
- package/src/eval/tournament-validators.property.test.ts +439 -0
- package/src/eval/validators.property.test.ts +1118 -0
- package/src/eval/validators.ts +1199 -0
- package/src/eval/weighted-scorer.ts +285 -0
- package/src/index.ts +17 -0
- package/src/league/README.md +188 -0
- package/src/league/health-check.ts +353 -0
- package/src/league/index.ts +93 -0
- package/src/league/killAgent.ts +151 -0
- package/src/league/league.test.ts +1151 -0
- package/src/league/runLeague.ts +843 -0
- package/src/league/scoreAgent.ts +175 -0
- package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
- package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
- package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
- package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
- package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
- package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
- package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
- package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
- package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
- package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
- package/src/modules/omnibridge/api/.gitkeep +1 -0
- package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
- package/src/modules/omnibridge/auth/.gitkeep +1 -0
- package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
- package/src/modules/omnibridge/auth/session-vault.ts +577 -0
- package/src/modules/omnibridge/core/.gitkeep +1 -0
- package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
- package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
- package/src/modules/omnibridge/core/types.ts +610 -0
- package/src/modules/omnibridge/execution/.gitkeep +1 -0
- package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
- package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
- package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
- package/src/modules/omnibridge/index.ts +212 -0
- package/src/modules/omnibridge/omnibridge.ts +510 -0
- package/src/modules/omnibridge/verification/.gitkeep +1 -0
- package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
- package/src/outcomes/README.md +75 -0
- package/src/outcomes/acquire-pilot-customer.ts +297 -0
- package/src/outcomes/code-delivery-outcomes.ts +89 -0
- package/src/outcomes/code-outcomes.ts +256 -0
- package/src/outcomes/code_review_battle.test.ts +135 -0
- package/src/outcomes/code_review_battle.ts +135 -0
- package/src/outcomes/cold_email_battle.ts +97 -0
- package/src/outcomes/content_creation_battle.ts +160 -0
- package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
- package/src/outcomes/index.ts +107 -0
- package/src/outcomes/lead_gen_battle.test.ts +113 -0
- package/src/outcomes/lead_gen_battle.ts +99 -0
- package/src/outcomes/outcome.schema.property.test.ts +229 -0
- package/src/outcomes/outcome.schema.ts +187 -0
- package/src/outcomes/qualified_sales_interest.ts +118 -0
- package/src/outcomes/swarm_planner.property.test.ts +370 -0
- package/src/outcomes/swarm_planner.ts +96 -0
- package/src/outcomes/web_extraction.ts +234 -0
- package/src/runtime/README.md +220 -0
- package/src/runtime/agentRunner.test.ts +341 -0
- package/src/runtime/agentRunner.ts +746 -0
- package/src/runtime/claudeAdapter.ts +232 -0
- package/src/runtime/costTracker.ts +123 -0
- package/src/runtime/index.ts +34 -0
- package/src/runtime/modelAdapter.property.test.ts +305 -0
- package/src/runtime/modelAdapter.ts +144 -0
- package/src/runtime/openaiAdapter.ts +235 -0
- package/src/utils/README.md +122 -0
- package/src/utils/command-runner.ts +134 -0
- package/src/utils/cost-guard.ts +379 -0
- package/src/utils/errors.test.ts +290 -0
- package/src/utils/errors.ts +442 -0
- package/src/utils/index.ts +37 -0
- package/src/utils/logger.test.ts +361 -0
- package/src/utils/logger.ts +419 -0
- package/src/utils/output-parsers.ts +216 -0
|
@@ -0,0 +1,702 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Normalizer
|
|
3
|
+
*
|
|
4
|
+
* Converts raw DOM to Intent-Tagged JSON, stripping 90% of non-functional code.
|
|
5
|
+
* This is Layer 1 of OmniBridge: DOM → Intent
|
|
6
|
+
*
|
|
7
|
+
* Requirements: 1.1, 1.2, 1.3, 1.5, 1.6
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import * as cheerio from 'cheerio';
|
|
11
|
+
import type {
|
|
12
|
+
IntentDocument,
|
|
13
|
+
IntentElement,
|
|
14
|
+
IntentId,
|
|
15
|
+
IntentForm,
|
|
16
|
+
IntentFormField,
|
|
17
|
+
IntentNavigation,
|
|
18
|
+
} from './types.js';
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Elements to strip from HTML (noise reduction)
|
|
22
|
+
*/
|
|
23
|
+
const NOISE_SELECTORS = [
|
|
24
|
+
'script',
|
|
25
|
+
'style',
|
|
26
|
+
'noscript',
|
|
27
|
+
'iframe',
|
|
28
|
+
'svg',
|
|
29
|
+
'link[rel="stylesheet"]',
|
|
30
|
+
'meta',
|
|
31
|
+
'head > *:not(title)',
|
|
32
|
+
'[data-tracking]',
|
|
33
|
+
'[data-analytics]',
|
|
34
|
+
'[data-gtm]',
|
|
35
|
+
'.tracking',
|
|
36
|
+
'.analytics',
|
|
37
|
+
'#google_ads_iframe',
|
|
38
|
+
'[id*="google_ads"]',
|
|
39
|
+
'[class*="ad-"]',
|
|
40
|
+
'[class*="advertisement"]',
|
|
41
|
+
'[data-ad]',
|
|
42
|
+
'ins.adsbygoogle',
|
|
43
|
+
];
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* ARIA role to Intent category mapping
|
|
47
|
+
*/
|
|
48
|
+
const ARIA_TO_CATEGORY: Record<string, IntentId['category']> = {
|
|
49
|
+
button: 'ACTION',
|
|
50
|
+
link: 'NAV',
|
|
51
|
+
textbox: 'INPUT',
|
|
52
|
+
searchbox: 'INPUT',
|
|
53
|
+
checkbox: 'INPUT',
|
|
54
|
+
radio: 'INPUT',
|
|
55
|
+
combobox: 'INPUT',
|
|
56
|
+
listbox: 'INPUT',
|
|
57
|
+
slider: 'INPUT',
|
|
58
|
+
spinbutton: 'INPUT',
|
|
59
|
+
switch: 'INPUT',
|
|
60
|
+
navigation: 'NAV',
|
|
61
|
+
menu: 'NAV',
|
|
62
|
+
menuitem: 'NAV',
|
|
63
|
+
tab: 'NAV',
|
|
64
|
+
tablist: 'NAV',
|
|
65
|
+
heading: 'DISPLAY',
|
|
66
|
+
img: 'DISPLAY',
|
|
67
|
+
figure: 'DISPLAY',
|
|
68
|
+
table: 'DISPLAY',
|
|
69
|
+
grid: 'DISPLAY',
|
|
70
|
+
list: 'DISPLAY',
|
|
71
|
+
listitem: 'DISPLAY',
|
|
72
|
+
article: 'DISPLAY',
|
|
73
|
+
region: 'DISPLAY',
|
|
74
|
+
main: 'DISPLAY',
|
|
75
|
+
banner: 'DISPLAY',
|
|
76
|
+
contentinfo: 'DISPLAY',
|
|
77
|
+
complementary: 'DISPLAY',
|
|
78
|
+
form: 'INPUT',
|
|
79
|
+
search: 'INPUT',
|
|
80
|
+
alert: 'DISPLAY',
|
|
81
|
+
alertdialog: 'ACTION',
|
|
82
|
+
dialog: 'ACTION',
|
|
83
|
+
progressbar: 'DISPLAY',
|
|
84
|
+
status: 'DISPLAY',
|
|
85
|
+
tooltip: 'DISPLAY',
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Tag to Intent category mapping (fallback when no ARIA role)
|
|
91
|
+
*/
|
|
92
|
+
const TAG_TO_CATEGORY: Record<string, IntentId['category']> = {
|
|
93
|
+
button: 'ACTION',
|
|
94
|
+
a: 'NAV',
|
|
95
|
+
input: 'INPUT',
|
|
96
|
+
textarea: 'INPUT',
|
|
97
|
+
select: 'INPUT',
|
|
98
|
+
form: 'INPUT',
|
|
99
|
+
nav: 'NAV',
|
|
100
|
+
header: 'DISPLAY',
|
|
101
|
+
footer: 'DISPLAY',
|
|
102
|
+
main: 'DISPLAY',
|
|
103
|
+
article: 'DISPLAY',
|
|
104
|
+
section: 'DISPLAY',
|
|
105
|
+
aside: 'DISPLAY',
|
|
106
|
+
h1: 'DISPLAY',
|
|
107
|
+
h2: 'DISPLAY',
|
|
108
|
+
h3: 'DISPLAY',
|
|
109
|
+
h4: 'DISPLAY',
|
|
110
|
+
h5: 'DISPLAY',
|
|
111
|
+
h6: 'DISPLAY',
|
|
112
|
+
p: 'DISPLAY',
|
|
113
|
+
span: 'DISPLAY',
|
|
114
|
+
div: 'DISPLAY',
|
|
115
|
+
img: 'DISPLAY',
|
|
116
|
+
table: 'DISPLAY',
|
|
117
|
+
ul: 'DISPLAY',
|
|
118
|
+
ol: 'DISPLAY',
|
|
119
|
+
li: 'DISPLAY',
|
|
120
|
+
label: 'DISPLAY',
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Semantic Normalizer class
|
|
125
|
+
*/
|
|
126
|
+
export class SemanticNormalizer {
|
|
127
|
+
/**
|
|
128
|
+
* Strip noise from HTML (CSS, scripts, tracking, metadata)
|
|
129
|
+
*/
|
|
130
|
+
stripNoise(html: string): string {
|
|
131
|
+
const $ = cheerio.load(html);
|
|
132
|
+
|
|
133
|
+
// Remove all noise elements
|
|
134
|
+
for (const selector of NOISE_SELECTORS) {
|
|
135
|
+
$(selector).remove();
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Remove inline styles
|
|
139
|
+
$('[style]').removeAttr('style');
|
|
140
|
+
|
|
141
|
+
// Remove class attributes (CSS-only, not semantic)
|
|
142
|
+
// Keep classes that might have semantic meaning
|
|
143
|
+
$('[class]').each((_, el) => {
|
|
144
|
+
const $el = $(el);
|
|
145
|
+
const classes = $el.attr('class') || '';
|
|
146
|
+
// Keep only semantic-looking classes
|
|
147
|
+
const semanticClasses = classes
|
|
148
|
+
.split(/\s+/)
|
|
149
|
+
.filter(
|
|
150
|
+
(c) =>
|
|
151
|
+
c.includes('btn') ||
|
|
152
|
+
c.includes('button') ||
|
|
153
|
+
c.includes('nav') ||
|
|
154
|
+
c.includes('menu') ||
|
|
155
|
+
c.includes('form') ||
|
|
156
|
+
c.includes('input') ||
|
|
157
|
+
c.includes('search') ||
|
|
158
|
+
c.includes('submit') ||
|
|
159
|
+
c.includes('login') ||
|
|
160
|
+
c.includes('signup') ||
|
|
161
|
+
c.includes('cart') ||
|
|
162
|
+
c.includes('checkout')
|
|
163
|
+
);
|
|
164
|
+
if (semanticClasses.length > 0) {
|
|
165
|
+
$el.attr('class', semanticClasses.join(' '));
|
|
166
|
+
} else {
|
|
167
|
+
$el.removeAttr('class');
|
|
168
|
+
}
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
// Remove data attributes except semantic ones
|
|
172
|
+
$('*').each((_, el) => {
|
|
173
|
+
const $el = $(el);
|
|
174
|
+
const attrs = $el.attr() || {};
|
|
175
|
+
for (const attr of Object.keys(attrs)) {
|
|
176
|
+
if (
|
|
177
|
+
attr.startsWith('data-') &&
|
|
178
|
+
!attr.includes('action') &&
|
|
179
|
+
!attr.includes('target') &&
|
|
180
|
+
!attr.includes('value') &&
|
|
181
|
+
!attr.includes('id') &&
|
|
182
|
+
!attr.includes('name')
|
|
183
|
+
) {
|
|
184
|
+
$el.removeAttr(attr);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
// Remove comments
|
|
190
|
+
$('*')
|
|
191
|
+
.contents()
|
|
192
|
+
.filter(function () {
|
|
193
|
+
return this.type === 'comment';
|
|
194
|
+
})
|
|
195
|
+
.remove();
|
|
196
|
+
|
|
197
|
+
// Remove empty elements (except semantic ones)
|
|
198
|
+
$('div:empty, span:empty, p:empty').remove();
|
|
199
|
+
|
|
200
|
+
return $.html();
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Assign an Intent ID to an element based on its semantic meaning.
|
|
206
|
+
* Prioritizes ARIA roles when present (Requirement 1.5)
|
|
207
|
+
*/
|
|
208
|
+
assignIntentId(
|
|
209
|
+
tagName: string,
|
|
210
|
+
ariaRole: string | undefined,
|
|
211
|
+
attributes: Record<string, string>,
|
|
212
|
+
textContent: string,
|
|
213
|
+
contextHints: string[]
|
|
214
|
+
): IntentId {
|
|
215
|
+
// Priority 1: ARIA role (Requirement 1.5)
|
|
216
|
+
if (ariaRole && ARIA_TO_CATEGORY[ariaRole]) {
|
|
217
|
+
const purpose = this.inferPurpose(ariaRole, attributes, textContent, contextHints);
|
|
218
|
+
return {
|
|
219
|
+
category: ARIA_TO_CATEGORY[ariaRole],
|
|
220
|
+
purpose,
|
|
221
|
+
confidence: 0.95, // High confidence when ARIA role is present
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Priority 2: Tag-based inference
|
|
226
|
+
const category = TAG_TO_CATEGORY[tagName.toLowerCase()] || 'DISPLAY';
|
|
227
|
+
const purpose = this.inferPurpose(tagName, attributes, textContent, contextHints);
|
|
228
|
+
|
|
229
|
+
// Calculate confidence based on available signals
|
|
230
|
+
let confidence = 0.7; // Base confidence
|
|
231
|
+
if (attributes.id || attributes.name) confidence += 0.1;
|
|
232
|
+
if (textContent.trim().length > 0) confidence += 0.1;
|
|
233
|
+
if (contextHints.length > 0) confidence += 0.05;
|
|
234
|
+
|
|
235
|
+
return {
|
|
236
|
+
category,
|
|
237
|
+
purpose: purpose.toUpperCase().replace(/\s+/g, '_'),
|
|
238
|
+
confidence: Math.min(confidence, 1),
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Infer the purpose of an element from available signals
|
|
244
|
+
*/
|
|
245
|
+
private inferPurpose(
|
|
246
|
+
roleOrTag: string,
|
|
247
|
+
attributes: Record<string, string>,
|
|
248
|
+
textContent: string,
|
|
249
|
+
contextHints: string[]
|
|
250
|
+
): string {
|
|
251
|
+
const text = textContent.toLowerCase().trim();
|
|
252
|
+
const id = (attributes.id || '').toLowerCase();
|
|
253
|
+
const name = (attributes.name || '').toLowerCase();
|
|
254
|
+
const type = (attributes.type || '').toLowerCase();
|
|
255
|
+
const placeholder = (attributes.placeholder || '').toLowerCase();
|
|
256
|
+
const ariaLabel = (attributes['aria-label'] || '').toLowerCase();
|
|
257
|
+
|
|
258
|
+
// Combine all signals
|
|
259
|
+
const signals = [text, id, name, type, placeholder, ariaLabel, ...contextHints]
|
|
260
|
+
.join(' ')
|
|
261
|
+
.toLowerCase();
|
|
262
|
+
|
|
263
|
+
// Common purpose patterns
|
|
264
|
+
if (signals.includes('login') || signals.includes('sign in') || signals.includes('signin')) {
|
|
265
|
+
return 'LOGIN';
|
|
266
|
+
}
|
|
267
|
+
if (signals.includes('signup') || signals.includes('sign up') || signals.includes('register')) {
|
|
268
|
+
return 'SIGNUP';
|
|
269
|
+
}
|
|
270
|
+
if (signals.includes('search')) {
|
|
271
|
+
return 'SEARCH';
|
|
272
|
+
}
|
|
273
|
+
if (signals.includes('submit') || signals.includes('send')) {
|
|
274
|
+
return 'SUBMIT';
|
|
275
|
+
}
|
|
276
|
+
if (signals.includes('cancel') || signals.includes('close')) {
|
|
277
|
+
return 'CANCEL';
|
|
278
|
+
}
|
|
279
|
+
if (signals.includes('delete') || signals.includes('remove')) {
|
|
280
|
+
return 'DELETE';
|
|
281
|
+
}
|
|
282
|
+
if (signals.includes('edit') || signals.includes('modify')) {
|
|
283
|
+
return 'EDIT';
|
|
284
|
+
}
|
|
285
|
+
if (signals.includes('save')) {
|
|
286
|
+
return 'SAVE';
|
|
287
|
+
}
|
|
288
|
+
if (signals.includes('cart') || signals.includes('basket')) {
|
|
289
|
+
return 'CART';
|
|
290
|
+
}
|
|
291
|
+
if (signals.includes('checkout') || signals.includes('purchase') || signals.includes('buy')) {
|
|
292
|
+
return 'PURCHASE';
|
|
293
|
+
}
|
|
294
|
+
if (signals.includes('pay') || signals.includes('payment')) {
|
|
295
|
+
return 'PAYMENT';
|
|
296
|
+
}
|
|
297
|
+
if (signals.includes('email') || type === 'email') {
|
|
298
|
+
return 'EMAIL_INPUT';
|
|
299
|
+
}
|
|
300
|
+
if (signals.includes('password') || type === 'password') {
|
|
301
|
+
return 'PASSWORD_INPUT';
|
|
302
|
+
}
|
|
303
|
+
if (signals.includes('phone') || type === 'tel') {
|
|
304
|
+
return 'PHONE_INPUT';
|
|
305
|
+
}
|
|
306
|
+
if (signals.includes('name') && !signals.includes('username')) {
|
|
307
|
+
return 'NAME_INPUT';
|
|
308
|
+
}
|
|
309
|
+
if (signals.includes('username')) {
|
|
310
|
+
return 'USERNAME_INPUT';
|
|
311
|
+
}
|
|
312
|
+
if (signals.includes('address')) {
|
|
313
|
+
return 'ADDRESS_INPUT';
|
|
314
|
+
}
|
|
315
|
+
if (signals.includes('next') || signals.includes('continue')) {
|
|
316
|
+
return 'NEXT';
|
|
317
|
+
}
|
|
318
|
+
if (signals.includes('back') || signals.includes('previous')) {
|
|
319
|
+
return 'BACK';
|
|
320
|
+
}
|
|
321
|
+
if (signals.includes('menu') || signals.includes('navigation')) {
|
|
322
|
+
return 'NAVIGATION';
|
|
323
|
+
}
|
|
324
|
+
if (signals.includes('home')) {
|
|
325
|
+
return 'HOME';
|
|
326
|
+
}
|
|
327
|
+
if (signals.includes('profile') || signals.includes('account')) {
|
|
328
|
+
return 'PROFILE';
|
|
329
|
+
}
|
|
330
|
+
if (signals.includes('settings') || signals.includes('preferences')) {
|
|
331
|
+
return 'SETTINGS';
|
|
332
|
+
}
|
|
333
|
+
if (signals.includes('help') || signals.includes('support')) {
|
|
334
|
+
return 'HELP';
|
|
335
|
+
}
|
|
336
|
+
if (signals.includes('download')) {
|
|
337
|
+
return 'DOWNLOAD';
|
|
338
|
+
}
|
|
339
|
+
if (signals.includes('upload')) {
|
|
340
|
+
return 'UPLOAD';
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Fallback to role/tag based purpose
|
|
344
|
+
return roleOrTag.toUpperCase();
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Extract context hints from surrounding elements
|
|
350
|
+
*/
|
|
351
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
352
|
+
private extractContextHints($: cheerio.CheerioAPI, el: any): string[] {
|
|
353
|
+
const hints: string[] = [];
|
|
354
|
+
const $el = $(el);
|
|
355
|
+
|
|
356
|
+
// Get parent text
|
|
357
|
+
const parentText = $el.parent().clone().children().remove().end().text().trim();
|
|
358
|
+
if (parentText) {
|
|
359
|
+
hints.push(parentText.slice(0, 100)); // Limit length
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// Get sibling labels
|
|
363
|
+
const prevSibling = $el.prev('label').text().trim();
|
|
364
|
+
if (prevSibling) {
|
|
365
|
+
hints.push(prevSibling);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// Get associated label by 'for' attribute
|
|
369
|
+
const id = $el.attr('id');
|
|
370
|
+
if (id) {
|
|
371
|
+
const labelText = $(`label[for="${id}"]`).text().trim();
|
|
372
|
+
if (labelText) {
|
|
373
|
+
hints.push(labelText);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// Get aria-describedby text
|
|
378
|
+
const describedBy = $el.attr('aria-describedby');
|
|
379
|
+
if (describedBy) {
|
|
380
|
+
const descText = $(`#${describedBy}`).text().trim();
|
|
381
|
+
if (descText) {
|
|
382
|
+
hints.push(descText);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
return hints.filter((h) => h.length > 0);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/**
|
|
390
|
+
* Extract forms from the document (Requirement 1.6)
|
|
391
|
+
*/
|
|
392
|
+
private extractForms($: cheerio.CheerioAPI): IntentForm[] {
|
|
393
|
+
const forms: IntentForm[] = [];
|
|
394
|
+
|
|
395
|
+
$('form').each((_, formEl) => {
|
|
396
|
+
const $form = $(formEl);
|
|
397
|
+
const fields: IntentFormField[] = [];
|
|
398
|
+
|
|
399
|
+
// Extract all form fields
|
|
400
|
+
$form.find('input, textarea, select').each((_, fieldEl) => {
|
|
401
|
+
const $field = $(fieldEl);
|
|
402
|
+
const tagName = fieldEl.tagName.toLowerCase();
|
|
403
|
+
const type = $field.attr('type') || (tagName === 'textarea' ? 'textarea' : 'text');
|
|
404
|
+
const name = $field.attr('name') || '';
|
|
405
|
+
const id = $field.attr('id') || '';
|
|
406
|
+
const required = $field.attr('required') !== undefined || $field.attr('aria-required') === 'true';
|
|
407
|
+
const placeholder = $field.attr('placeholder') || '';
|
|
408
|
+
|
|
409
|
+
// Find label
|
|
410
|
+
let label = '';
|
|
411
|
+
if (id) {
|
|
412
|
+
label = $(`label[for="${id}"]`).text().trim();
|
|
413
|
+
}
|
|
414
|
+
if (!label) {
|
|
415
|
+
label = $field.closest('label').text().trim();
|
|
416
|
+
}
|
|
417
|
+
if (!label) {
|
|
418
|
+
label = placeholder || name || type;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// Extract validation rules
|
|
422
|
+
const validationRules: string[] = [];
|
|
423
|
+
if ($field.attr('pattern')) {
|
|
424
|
+
validationRules.push(`pattern:${$field.attr('pattern')}`);
|
|
425
|
+
}
|
|
426
|
+
if ($field.attr('minlength')) {
|
|
427
|
+
validationRules.push(`minlength:${$field.attr('minlength')}`);
|
|
428
|
+
}
|
|
429
|
+
if ($field.attr('maxlength')) {
|
|
430
|
+
validationRules.push(`maxlength:${$field.attr('maxlength')}`);
|
|
431
|
+
}
|
|
432
|
+
if ($field.attr('min')) {
|
|
433
|
+
validationRules.push(`min:${$field.attr('min')}`);
|
|
434
|
+
}
|
|
435
|
+
if ($field.attr('max')) {
|
|
436
|
+
validationRules.push(`max:${$field.attr('max')}`);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
const intentId = this.assignIntentId(
|
|
440
|
+
tagName,
|
|
441
|
+
$field.attr('role'),
|
|
442
|
+
{
|
|
443
|
+
id,
|
|
444
|
+
name,
|
|
445
|
+
type,
|
|
446
|
+
placeholder,
|
|
447
|
+
'aria-label': $field.attr('aria-label') || '',
|
|
448
|
+
},
|
|
449
|
+
label,
|
|
450
|
+
[]
|
|
451
|
+
);
|
|
452
|
+
|
|
453
|
+
fields.push({
|
|
454
|
+
intentId: `${intentId.category}_ID:${intentId.purpose}`,
|
|
455
|
+
name,
|
|
456
|
+
type,
|
|
457
|
+
label,
|
|
458
|
+
required,
|
|
459
|
+
validationRules: validationRules.length > 0 ? validationRules : undefined,
|
|
460
|
+
placeholder: placeholder || undefined,
|
|
461
|
+
});
|
|
462
|
+
});
|
|
463
|
+
|
|
464
|
+
// Find submit button
|
|
465
|
+
const $submitBtn = $form.find('button[type="submit"], input[type="submit"]').first();
|
|
466
|
+
const submitButtonId = $submitBtn.length > 0 ? 'ACTION_ID:SUBMIT' : undefined;
|
|
467
|
+
|
|
468
|
+
const formIntentId = this.assignIntentId(
|
|
469
|
+
'form',
|
|
470
|
+
$form.attr('role'),
|
|
471
|
+
{
|
|
472
|
+
id: $form.attr('id') || '',
|
|
473
|
+
name: $form.attr('name') || '',
|
|
474
|
+
action: $form.attr('action') || '',
|
|
475
|
+
},
|
|
476
|
+
'',
|
|
477
|
+
[]
|
|
478
|
+
);
|
|
479
|
+
|
|
480
|
+
forms.push({
|
|
481
|
+
intentId: `${formIntentId.category}_ID:${formIntentId.purpose}`,
|
|
482
|
+
action: $form.attr('action') || undefined,
|
|
483
|
+
method: ($form.attr('method')?.toUpperCase() as 'GET' | 'POST') || 'POST',
|
|
484
|
+
fields,
|
|
485
|
+
submitButtonId,
|
|
486
|
+
});
|
|
487
|
+
});
|
|
488
|
+
|
|
489
|
+
return forms;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Extract navigation structure from the document
|
|
495
|
+
*/
|
|
496
|
+
private extractNavigation($: cheerio.CheerioAPI): IntentNavigation {
|
|
497
|
+
const primaryLinks: IntentNavigation['primaryLinks'] = [];
|
|
498
|
+
const breadcrumbs: IntentNavigation['breadcrumbs'] = [];
|
|
499
|
+
|
|
500
|
+
// Extract primary navigation links
|
|
501
|
+
$('nav a, header a, [role="navigation"] a').each((_, el) => {
|
|
502
|
+
const $el = $(el);
|
|
503
|
+
const href = $el.attr('href');
|
|
504
|
+
const label = $el.text().trim();
|
|
505
|
+
|
|
506
|
+
if (href && label && !href.startsWith('#') && !href.startsWith('javascript:')) {
|
|
507
|
+
const intentId = this.assignIntentId(
|
|
508
|
+
'a',
|
|
509
|
+
$el.attr('role'),
|
|
510
|
+
{ href, id: $el.attr('id') || '' },
|
|
511
|
+
label,
|
|
512
|
+
[]
|
|
513
|
+
);
|
|
514
|
+
|
|
515
|
+
primaryLinks.push({
|
|
516
|
+
intentId: `${intentId.category}_ID:${intentId.purpose}`,
|
|
517
|
+
label,
|
|
518
|
+
href,
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
});
|
|
522
|
+
|
|
523
|
+
// Extract breadcrumbs
|
|
524
|
+
$('[aria-label="breadcrumb"] a, .breadcrumb a, nav[aria-label*="breadcrumb"] a').each((_, el) => {
|
|
525
|
+
const $el = $(el);
|
|
526
|
+
breadcrumbs.push({
|
|
527
|
+
label: $el.text().trim(),
|
|
528
|
+
href: $el.attr('href'),
|
|
529
|
+
});
|
|
530
|
+
});
|
|
531
|
+
|
|
532
|
+
// Add current page to breadcrumbs if present
|
|
533
|
+
$('[aria-label="breadcrumb"] [aria-current], .breadcrumb [aria-current]').each((_, el) => {
|
|
534
|
+
breadcrumbs.push({
|
|
535
|
+
label: $(el).text().trim(),
|
|
536
|
+
});
|
|
537
|
+
});
|
|
538
|
+
|
|
539
|
+
return {
|
|
540
|
+
primaryLinks,
|
|
541
|
+
breadcrumbs: breadcrumbs.length > 0 ? breadcrumbs : undefined,
|
|
542
|
+
};
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
/**
|
|
546
|
+
* Extract semantic elements from the document
|
|
547
|
+
*/
|
|
548
|
+
private extractElements($: cheerio.CheerioAPI): IntentElement[] {
|
|
549
|
+
const elements: IntentElement[] = [];
|
|
550
|
+
const processedIds = new Set<string>();
|
|
551
|
+
|
|
552
|
+
// Selectors for interactive/semantic elements
|
|
553
|
+
const selectors = [
|
|
554
|
+
'button',
|
|
555
|
+
'a[href]',
|
|
556
|
+
'input',
|
|
557
|
+
'textarea',
|
|
558
|
+
'select',
|
|
559
|
+
'[role]',
|
|
560
|
+
'[onclick]',
|
|
561
|
+
'[data-action]',
|
|
562
|
+
'h1, h2, h3, h4, h5, h6',
|
|
563
|
+
'label',
|
|
564
|
+
'nav',
|
|
565
|
+
'main',
|
|
566
|
+
'article',
|
|
567
|
+
'section[aria-label]',
|
|
568
|
+
'aside',
|
|
569
|
+
'header',
|
|
570
|
+
'footer',
|
|
571
|
+
];
|
|
572
|
+
|
|
573
|
+
$(selectors.join(', ')).each((_, el) => {
|
|
574
|
+
const $el = $(el);
|
|
575
|
+
// Type guard for element nodes
|
|
576
|
+
if (!('tagName' in el)) {
|
|
577
|
+
return;
|
|
578
|
+
}
|
|
579
|
+
const tagName = (el.tagName as string).toLowerCase();
|
|
580
|
+
|
|
581
|
+
// Skip hidden elements
|
|
582
|
+
if ($el.attr('hidden') !== undefined || $el.attr('aria-hidden') === 'true') {
|
|
583
|
+
return;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Skip elements inside forms (handled separately)
|
|
587
|
+
if (tagName !== 'form' && $el.closest('form').length > 0 && ['input', 'textarea', 'select'].includes(tagName)) {
|
|
588
|
+
return;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
const id = $el.attr('id') || '';
|
|
592
|
+
const uniqueKey = id || `${tagName}-${$el.text().trim().slice(0, 50)}`;
|
|
593
|
+
|
|
594
|
+
// Skip duplicates
|
|
595
|
+
if (processedIds.has(uniqueKey)) {
|
|
596
|
+
return;
|
|
597
|
+
}
|
|
598
|
+
processedIds.add(uniqueKey);
|
|
599
|
+
|
|
600
|
+
const ariaRole = $el.attr('role');
|
|
601
|
+
const textContent = $el.text().trim().slice(0, 200); // Limit text length
|
|
602
|
+
const contextHints = this.extractContextHints($, el);
|
|
603
|
+
|
|
604
|
+
const attributes: Record<string, string> = {};
|
|
605
|
+
const attrNames = ['id', 'name', 'type', 'href', 'placeholder', 'aria-label', 'title', 'value'];
|
|
606
|
+
for (const attr of attrNames) {
|
|
607
|
+
const val = $el.attr(attr);
|
|
608
|
+
if (val) {
|
|
609
|
+
attributes[attr] = val;
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
const intentId = this.assignIntentId(tagName, ariaRole, attributes, textContent, contextHints);
|
|
614
|
+
|
|
615
|
+
// Determine role
|
|
616
|
+
let role: IntentElement['role'] = 'display';
|
|
617
|
+
if (intentId.category === 'ACTION') role = 'action';
|
|
618
|
+
else if (intentId.category === 'INPUT') role = 'input';
|
|
619
|
+
else if (intentId.category === 'NAV') role = 'navigation';
|
|
620
|
+
|
|
621
|
+
elements.push({
|
|
622
|
+
intentId: `${intentId.category}_ID:${intentId.purpose}`,
|
|
623
|
+
role,
|
|
624
|
+
label: textContent || attributes['aria-label'] || attributes.placeholder || tagName,
|
|
625
|
+
ariaRole,
|
|
626
|
+
contextHints,
|
|
627
|
+
tagName,
|
|
628
|
+
});
|
|
629
|
+
});
|
|
630
|
+
|
|
631
|
+
return elements;
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
/**
|
|
636
|
+
* Count tokens in a string (approximate)
|
|
637
|
+
* Uses a simple word-based approximation
|
|
638
|
+
*/
|
|
639
|
+
countTokens(text: string): number {
|
|
640
|
+
// Approximate token count: ~4 characters per token on average
|
|
641
|
+
return Math.ceil(text.length / 4);
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
/**
|
|
645
|
+
* Normalize HTML to Intent Document
|
|
646
|
+
* Main entry point for the Semantic Normalizer
|
|
647
|
+
*
|
|
648
|
+
* Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 1.6
|
|
649
|
+
*/
|
|
650
|
+
normalize(html: string, sourceUrl: string = ''): IntentDocument {
|
|
651
|
+
const startTime = performance.now();
|
|
652
|
+
|
|
653
|
+
// Count original tokens
|
|
654
|
+
const originalTokens = this.countTokens(html);
|
|
655
|
+
|
|
656
|
+
// Strip noise (Requirement 1.2)
|
|
657
|
+
const cleanedHtml = this.stripNoise(html);
|
|
658
|
+
|
|
659
|
+
// Parse cleaned HTML
|
|
660
|
+
const $ = cheerio.load(cleanedHtml);
|
|
661
|
+
|
|
662
|
+
// Extract semantic elements (Requirement 1.1, 1.3, 1.5)
|
|
663
|
+
const elements = this.extractElements($);
|
|
664
|
+
|
|
665
|
+
// Extract forms (Requirement 1.6)
|
|
666
|
+
const forms = this.extractForms($);
|
|
667
|
+
|
|
668
|
+
// Extract navigation
|
|
669
|
+
const navigation = this.extractNavigation($);
|
|
670
|
+
|
|
671
|
+
// Build the Intent Document
|
|
672
|
+
const document: IntentDocument = {
|
|
673
|
+
elements,
|
|
674
|
+
forms,
|
|
675
|
+
navigation,
|
|
676
|
+
buildTimeMs: 0, // Will be set below
|
|
677
|
+
tokenReduction: 0, // Will be set below
|
|
678
|
+
sourceUrl,
|
|
679
|
+
createdAt: Date.now(),
|
|
680
|
+
};
|
|
681
|
+
|
|
682
|
+
// Calculate metrics
|
|
683
|
+
const resultJson = JSON.stringify(document);
|
|
684
|
+
const resultTokens = this.countTokens(resultJson);
|
|
685
|
+
const buildTimeMs = performance.now() - startTime;
|
|
686
|
+
|
|
687
|
+
// Calculate token reduction percentage
|
|
688
|
+
const tokenReduction = originalTokens > 0 ? ((originalTokens - resultTokens) / originalTokens) * 100 : 0;
|
|
689
|
+
|
|
690
|
+
document.buildTimeMs = Math.round(buildTimeMs * 100) / 100;
|
|
691
|
+
document.tokenReduction = Math.round(tokenReduction * 100) / 100;
|
|
692
|
+
|
|
693
|
+
return document;
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
/**
|
|
698
|
+
* Create a new Semantic Normalizer instance
|
|
699
|
+
*/
|
|
700
|
+
export function createSemanticNormalizer(): SemanticNormalizer {
|
|
701
|
+
return new SemanticNormalizer();
|
|
702
|
+
}
|