outcome-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +261 -0
  2. package/package.json +95 -0
  3. package/src/agents/README.md +139 -0
  4. package/src/agents/adapters/anthropic.adapter.ts +166 -0
  5. package/src/agents/adapters/dalle.adapter.ts +145 -0
  6. package/src/agents/adapters/gemini.adapter.ts +134 -0
  7. package/src/agents/adapters/imagen.adapter.ts +106 -0
  8. package/src/agents/adapters/nano-banana.adapter.ts +129 -0
  9. package/src/agents/adapters/openai.adapter.ts +165 -0
  10. package/src/agents/adapters/veo.adapter.ts +130 -0
  11. package/src/agents/agent.schema.property.test.ts +379 -0
  12. package/src/agents/agent.schema.test.ts +148 -0
  13. package/src/agents/agent.schema.ts +263 -0
  14. package/src/agents/index.ts +60 -0
  15. package/src/agents/registered-agent.schema.ts +356 -0
  16. package/src/agents/registry.ts +97 -0
  17. package/src/agents/tournament-configs.property.test.ts +266 -0
  18. package/src/cli/README.md +145 -0
  19. package/src/cli/commands/define.ts +79 -0
  20. package/src/cli/commands/list.ts +46 -0
  21. package/src/cli/commands/logs.ts +83 -0
  22. package/src/cli/commands/run.ts +416 -0
  23. package/src/cli/commands/verify.ts +110 -0
  24. package/src/cli/index.ts +81 -0
  25. package/src/config/README.md +128 -0
  26. package/src/config/env.ts +262 -0
  27. package/src/config/index.ts +19 -0
  28. package/src/eval/README.md +318 -0
  29. package/src/eval/ai-judge.test.ts +435 -0
  30. package/src/eval/ai-judge.ts +368 -0
  31. package/src/eval/code-validators.ts +414 -0
  32. package/src/eval/evaluateOutcome.property.test.ts +1174 -0
  33. package/src/eval/evaluateOutcome.ts +591 -0
  34. package/src/eval/immigration-validators.ts +122 -0
  35. package/src/eval/index.ts +90 -0
  36. package/src/eval/judge-cache.ts +402 -0
  37. package/src/eval/tournament-validators.property.test.ts +439 -0
  38. package/src/eval/validators.property.test.ts +1118 -0
  39. package/src/eval/validators.ts +1199 -0
  40. package/src/eval/weighted-scorer.ts +285 -0
  41. package/src/index.ts +17 -0
  42. package/src/league/README.md +188 -0
  43. package/src/league/health-check.ts +353 -0
  44. package/src/league/index.ts +93 -0
  45. package/src/league/killAgent.ts +151 -0
  46. package/src/league/league.test.ts +1151 -0
  47. package/src/league/runLeague.ts +843 -0
  48. package/src/league/scoreAgent.ts +175 -0
  49. package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
  50. package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
  51. package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
  52. package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
  53. package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
  54. package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
  55. package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
  56. package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
  57. package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
  58. package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
  59. package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
  60. package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
  61. package/src/modules/omnibridge/api/.gitkeep +1 -0
  62. package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
  63. package/src/modules/omnibridge/auth/.gitkeep +1 -0
  64. package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
  65. package/src/modules/omnibridge/auth/session-vault.ts +577 -0
  66. package/src/modules/omnibridge/core/.gitkeep +1 -0
  67. package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
  68. package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
  69. package/src/modules/omnibridge/core/types.ts +610 -0
  70. package/src/modules/omnibridge/execution/.gitkeep +1 -0
  71. package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
  72. package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
  73. package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
  74. package/src/modules/omnibridge/index.ts +212 -0
  75. package/src/modules/omnibridge/omnibridge.ts +510 -0
  76. package/src/modules/omnibridge/verification/.gitkeep +1 -0
  77. package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
  78. package/src/outcomes/README.md +75 -0
  79. package/src/outcomes/acquire-pilot-customer.ts +297 -0
  80. package/src/outcomes/code-delivery-outcomes.ts +89 -0
  81. package/src/outcomes/code-outcomes.ts +256 -0
  82. package/src/outcomes/code_review_battle.test.ts +135 -0
  83. package/src/outcomes/code_review_battle.ts +135 -0
  84. package/src/outcomes/cold_email_battle.ts +97 -0
  85. package/src/outcomes/content_creation_battle.ts +160 -0
  86. package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
  87. package/src/outcomes/index.ts +107 -0
  88. package/src/outcomes/lead_gen_battle.test.ts +113 -0
  89. package/src/outcomes/lead_gen_battle.ts +99 -0
  90. package/src/outcomes/outcome.schema.property.test.ts +229 -0
  91. package/src/outcomes/outcome.schema.ts +187 -0
  92. package/src/outcomes/qualified_sales_interest.ts +118 -0
  93. package/src/outcomes/swarm_planner.property.test.ts +370 -0
  94. package/src/outcomes/swarm_planner.ts +96 -0
  95. package/src/outcomes/web_extraction.ts +234 -0
  96. package/src/runtime/README.md +220 -0
  97. package/src/runtime/agentRunner.test.ts +341 -0
  98. package/src/runtime/agentRunner.ts +746 -0
  99. package/src/runtime/claudeAdapter.ts +232 -0
  100. package/src/runtime/costTracker.ts +123 -0
  101. package/src/runtime/index.ts +34 -0
  102. package/src/runtime/modelAdapter.property.test.ts +305 -0
  103. package/src/runtime/modelAdapter.ts +144 -0
  104. package/src/runtime/openaiAdapter.ts +235 -0
  105. package/src/utils/README.md +122 -0
  106. package/src/utils/command-runner.ts +134 -0
  107. package/src/utils/cost-guard.ts +379 -0
  108. package/src/utils/errors.test.ts +290 -0
  109. package/src/utils/errors.ts +442 -0
  110. package/src/utils/index.ts +37 -0
  111. package/src/utils/logger.test.ts +361 -0
  112. package/src/utils/logger.ts +419 -0
  113. package/src/utils/output-parsers.ts +216 -0
@@ -0,0 +1,702 @@
1
+ /**
2
+ * Semantic Normalizer
3
+ *
4
+ * Converts raw DOM to Intent-Tagged JSON, stripping 90% of non-functional code.
5
+ * This is Layer 1 of OmniBridge: DOM → Intent
6
+ *
7
+ * Requirements: 1.1, 1.2, 1.3, 1.5, 1.6
8
+ */
9
+
10
+ import * as cheerio from 'cheerio';
11
+ import type {
12
+ IntentDocument,
13
+ IntentElement,
14
+ IntentId,
15
+ IntentForm,
16
+ IntentFormField,
17
+ IntentNavigation,
18
+ } from './types.js';
19
+
20
+ /**
21
+ * Elements to strip from HTML (noise reduction)
22
+ */
23
+ const NOISE_SELECTORS = [
24
+ 'script',
25
+ 'style',
26
+ 'noscript',
27
+ 'iframe',
28
+ 'svg',
29
+ 'link[rel="stylesheet"]',
30
+ 'meta',
31
+ 'head > *:not(title)',
32
+ '[data-tracking]',
33
+ '[data-analytics]',
34
+ '[data-gtm]',
35
+ '.tracking',
36
+ '.analytics',
37
+ '#google_ads_iframe',
38
+ '[id*="google_ads"]',
39
+ '[class*="ad-"]',
40
+ '[class*="advertisement"]',
41
+ '[data-ad]',
42
+ 'ins.adsbygoogle',
43
+ ];
44
+
45
+ /**
46
+ * ARIA role to Intent category mapping
47
+ */
48
+ const ARIA_TO_CATEGORY: Record<string, IntentId['category']> = {
49
+ button: 'ACTION',
50
+ link: 'NAV',
51
+ textbox: 'INPUT',
52
+ searchbox: 'INPUT',
53
+ checkbox: 'INPUT',
54
+ radio: 'INPUT',
55
+ combobox: 'INPUT',
56
+ listbox: 'INPUT',
57
+ slider: 'INPUT',
58
+ spinbutton: 'INPUT',
59
+ switch: 'INPUT',
60
+ navigation: 'NAV',
61
+ menu: 'NAV',
62
+ menuitem: 'NAV',
63
+ tab: 'NAV',
64
+ tablist: 'NAV',
65
+ heading: 'DISPLAY',
66
+ img: 'DISPLAY',
67
+ figure: 'DISPLAY',
68
+ table: 'DISPLAY',
69
+ grid: 'DISPLAY',
70
+ list: 'DISPLAY',
71
+ listitem: 'DISPLAY',
72
+ article: 'DISPLAY',
73
+ region: 'DISPLAY',
74
+ main: 'DISPLAY',
75
+ banner: 'DISPLAY',
76
+ contentinfo: 'DISPLAY',
77
+ complementary: 'DISPLAY',
78
+ form: 'INPUT',
79
+ search: 'INPUT',
80
+ alert: 'DISPLAY',
81
+ alertdialog: 'ACTION',
82
+ dialog: 'ACTION',
83
+ progressbar: 'DISPLAY',
84
+ status: 'DISPLAY',
85
+ tooltip: 'DISPLAY',
86
+ };
87
+
88
+
89
+ /**
90
+ * Tag to Intent category mapping (fallback when no ARIA role)
91
+ */
92
+ const TAG_TO_CATEGORY: Record<string, IntentId['category']> = {
93
+ button: 'ACTION',
94
+ a: 'NAV',
95
+ input: 'INPUT',
96
+ textarea: 'INPUT',
97
+ select: 'INPUT',
98
+ form: 'INPUT',
99
+ nav: 'NAV',
100
+ header: 'DISPLAY',
101
+ footer: 'DISPLAY',
102
+ main: 'DISPLAY',
103
+ article: 'DISPLAY',
104
+ section: 'DISPLAY',
105
+ aside: 'DISPLAY',
106
+ h1: 'DISPLAY',
107
+ h2: 'DISPLAY',
108
+ h3: 'DISPLAY',
109
+ h4: 'DISPLAY',
110
+ h5: 'DISPLAY',
111
+ h6: 'DISPLAY',
112
+ p: 'DISPLAY',
113
+ span: 'DISPLAY',
114
+ div: 'DISPLAY',
115
+ img: 'DISPLAY',
116
+ table: 'DISPLAY',
117
+ ul: 'DISPLAY',
118
+ ol: 'DISPLAY',
119
+ li: 'DISPLAY',
120
+ label: 'DISPLAY',
121
+ };
122
+
123
+ /**
124
+ * Semantic Normalizer class
125
+ */
126
+ export class SemanticNormalizer {
127
+ /**
128
+ * Strip noise from HTML (CSS, scripts, tracking, metadata)
129
+ */
130
+ stripNoise(html: string): string {
131
+ const $ = cheerio.load(html);
132
+
133
+ // Remove all noise elements
134
+ for (const selector of NOISE_SELECTORS) {
135
+ $(selector).remove();
136
+ }
137
+
138
+ // Remove inline styles
139
+ $('[style]').removeAttr('style');
140
+
141
+ // Remove class attributes (CSS-only, not semantic)
142
+ // Keep classes that might have semantic meaning
143
+ $('[class]').each((_, el) => {
144
+ const $el = $(el);
145
+ const classes = $el.attr('class') || '';
146
+ // Keep only semantic-looking classes
147
+ const semanticClasses = classes
148
+ .split(/\s+/)
149
+ .filter(
150
+ (c) =>
151
+ c.includes('btn') ||
152
+ c.includes('button') ||
153
+ c.includes('nav') ||
154
+ c.includes('menu') ||
155
+ c.includes('form') ||
156
+ c.includes('input') ||
157
+ c.includes('search') ||
158
+ c.includes('submit') ||
159
+ c.includes('login') ||
160
+ c.includes('signup') ||
161
+ c.includes('cart') ||
162
+ c.includes('checkout')
163
+ );
164
+ if (semanticClasses.length > 0) {
165
+ $el.attr('class', semanticClasses.join(' '));
166
+ } else {
167
+ $el.removeAttr('class');
168
+ }
169
+ });
170
+
171
+ // Remove data attributes except semantic ones
172
+ $('*').each((_, el) => {
173
+ const $el = $(el);
174
+ const attrs = $el.attr() || {};
175
+ for (const attr of Object.keys(attrs)) {
176
+ if (
177
+ attr.startsWith('data-') &&
178
+ !attr.includes('action') &&
179
+ !attr.includes('target') &&
180
+ !attr.includes('value') &&
181
+ !attr.includes('id') &&
182
+ !attr.includes('name')
183
+ ) {
184
+ $el.removeAttr(attr);
185
+ }
186
+ }
187
+ });
188
+
189
+ // Remove comments
190
+ $('*')
191
+ .contents()
192
+ .filter(function () {
193
+ return this.type === 'comment';
194
+ })
195
+ .remove();
196
+
197
+ // Remove empty elements (except semantic ones)
198
+ $('div:empty, span:empty, p:empty').remove();
199
+
200
+ return $.html();
201
+ }
202
+
203
+
204
+ /**
205
+ * Assign an Intent ID to an element based on its semantic meaning.
206
+ * Prioritizes ARIA roles when present (Requirement 1.5)
207
+ */
208
+ assignIntentId(
209
+ tagName: string,
210
+ ariaRole: string | undefined,
211
+ attributes: Record<string, string>,
212
+ textContent: string,
213
+ contextHints: string[]
214
+ ): IntentId {
215
+ // Priority 1: ARIA role (Requirement 1.5)
216
+ if (ariaRole && ARIA_TO_CATEGORY[ariaRole]) {
217
+ const purpose = this.inferPurpose(ariaRole, attributes, textContent, contextHints);
218
+ return {
219
+ category: ARIA_TO_CATEGORY[ariaRole],
220
+ purpose,
221
+ confidence: 0.95, // High confidence when ARIA role is present
222
+ };
223
+ }
224
+
225
+ // Priority 2: Tag-based inference
226
+ const category = TAG_TO_CATEGORY[tagName.toLowerCase()] || 'DISPLAY';
227
+ const purpose = this.inferPurpose(tagName, attributes, textContent, contextHints);
228
+
229
+ // Calculate confidence based on available signals
230
+ let confidence = 0.7; // Base confidence
231
+ if (attributes.id || attributes.name) confidence += 0.1;
232
+ if (textContent.trim().length > 0) confidence += 0.1;
233
+ if (contextHints.length > 0) confidence += 0.05;
234
+
235
+ return {
236
+ category,
237
+ purpose: purpose.toUpperCase().replace(/\s+/g, '_'),
238
+ confidence: Math.min(confidence, 1),
239
+ };
240
+ }
241
+
242
+ /**
243
+ * Infer the purpose of an element from available signals
244
+ */
245
+ private inferPurpose(
246
+ roleOrTag: string,
247
+ attributes: Record<string, string>,
248
+ textContent: string,
249
+ contextHints: string[]
250
+ ): string {
251
+ const text = textContent.toLowerCase().trim();
252
+ const id = (attributes.id || '').toLowerCase();
253
+ const name = (attributes.name || '').toLowerCase();
254
+ const type = (attributes.type || '').toLowerCase();
255
+ const placeholder = (attributes.placeholder || '').toLowerCase();
256
+ const ariaLabel = (attributes['aria-label'] || '').toLowerCase();
257
+
258
+ // Combine all signals
259
+ const signals = [text, id, name, type, placeholder, ariaLabel, ...contextHints]
260
+ .join(' ')
261
+ .toLowerCase();
262
+
263
+ // Common purpose patterns
264
+ if (signals.includes('login') || signals.includes('sign in') || signals.includes('signin')) {
265
+ return 'LOGIN';
266
+ }
267
+ if (signals.includes('signup') || signals.includes('sign up') || signals.includes('register')) {
268
+ return 'SIGNUP';
269
+ }
270
+ if (signals.includes('search')) {
271
+ return 'SEARCH';
272
+ }
273
+ if (signals.includes('submit') || signals.includes('send')) {
274
+ return 'SUBMIT';
275
+ }
276
+ if (signals.includes('cancel') || signals.includes('close')) {
277
+ return 'CANCEL';
278
+ }
279
+ if (signals.includes('delete') || signals.includes('remove')) {
280
+ return 'DELETE';
281
+ }
282
+ if (signals.includes('edit') || signals.includes('modify')) {
283
+ return 'EDIT';
284
+ }
285
+ if (signals.includes('save')) {
286
+ return 'SAVE';
287
+ }
288
+ if (signals.includes('cart') || signals.includes('basket')) {
289
+ return 'CART';
290
+ }
291
+ if (signals.includes('checkout') || signals.includes('purchase') || signals.includes('buy')) {
292
+ return 'PURCHASE';
293
+ }
294
+ if (signals.includes('pay') || signals.includes('payment')) {
295
+ return 'PAYMENT';
296
+ }
297
+ if (signals.includes('email') || type === 'email') {
298
+ return 'EMAIL_INPUT';
299
+ }
300
+ if (signals.includes('password') || type === 'password') {
301
+ return 'PASSWORD_INPUT';
302
+ }
303
+ if (signals.includes('phone') || type === 'tel') {
304
+ return 'PHONE_INPUT';
305
+ }
306
+ if (signals.includes('name') && !signals.includes('username')) {
307
+ return 'NAME_INPUT';
308
+ }
309
+ if (signals.includes('username')) {
310
+ return 'USERNAME_INPUT';
311
+ }
312
+ if (signals.includes('address')) {
313
+ return 'ADDRESS_INPUT';
314
+ }
315
+ if (signals.includes('next') || signals.includes('continue')) {
316
+ return 'NEXT';
317
+ }
318
+ if (signals.includes('back') || signals.includes('previous')) {
319
+ return 'BACK';
320
+ }
321
+ if (signals.includes('menu') || signals.includes('navigation')) {
322
+ return 'NAVIGATION';
323
+ }
324
+ if (signals.includes('home')) {
325
+ return 'HOME';
326
+ }
327
+ if (signals.includes('profile') || signals.includes('account')) {
328
+ return 'PROFILE';
329
+ }
330
+ if (signals.includes('settings') || signals.includes('preferences')) {
331
+ return 'SETTINGS';
332
+ }
333
+ if (signals.includes('help') || signals.includes('support')) {
334
+ return 'HELP';
335
+ }
336
+ if (signals.includes('download')) {
337
+ return 'DOWNLOAD';
338
+ }
339
+ if (signals.includes('upload')) {
340
+ return 'UPLOAD';
341
+ }
342
+
343
+ // Fallback to role/tag based purpose
344
+ return roleOrTag.toUpperCase();
345
+ }
346
+
347
+
348
+ /**
349
+ * Extract context hints from surrounding elements
350
+ */
351
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
352
+ private extractContextHints($: cheerio.CheerioAPI, el: any): string[] {
353
+ const hints: string[] = [];
354
+ const $el = $(el);
355
+
356
+ // Get parent text
357
+ const parentText = $el.parent().clone().children().remove().end().text().trim();
358
+ if (parentText) {
359
+ hints.push(parentText.slice(0, 100)); // Limit length
360
+ }
361
+
362
+ // Get sibling labels
363
+ const prevSibling = $el.prev('label').text().trim();
364
+ if (prevSibling) {
365
+ hints.push(prevSibling);
366
+ }
367
+
368
+ // Get associated label by 'for' attribute
369
+ const id = $el.attr('id');
370
+ if (id) {
371
+ const labelText = $(`label[for="${id}"]`).text().trim();
372
+ if (labelText) {
373
+ hints.push(labelText);
374
+ }
375
+ }
376
+
377
+ // Get aria-describedby text
378
+ const describedBy = $el.attr('aria-describedby');
379
+ if (describedBy) {
380
+ const descText = $(`#${describedBy}`).text().trim();
381
+ if (descText) {
382
+ hints.push(descText);
383
+ }
384
+ }
385
+
386
+ return hints.filter((h) => h.length > 0);
387
+ }
388
+
389
+ /**
390
+ * Extract forms from the document (Requirement 1.6)
391
+ */
392
+ private extractForms($: cheerio.CheerioAPI): IntentForm[] {
393
+ const forms: IntentForm[] = [];
394
+
395
+ $('form').each((_, formEl) => {
396
+ const $form = $(formEl);
397
+ const fields: IntentFormField[] = [];
398
+
399
+ // Extract all form fields
400
+ $form.find('input, textarea, select').each((_, fieldEl) => {
401
+ const $field = $(fieldEl);
402
+ const tagName = fieldEl.tagName.toLowerCase();
403
+ const type = $field.attr('type') || (tagName === 'textarea' ? 'textarea' : 'text');
404
+ const name = $field.attr('name') || '';
405
+ const id = $field.attr('id') || '';
406
+ const required = $field.attr('required') !== undefined || $field.attr('aria-required') === 'true';
407
+ const placeholder = $field.attr('placeholder') || '';
408
+
409
+ // Find label
410
+ let label = '';
411
+ if (id) {
412
+ label = $(`label[for="${id}"]`).text().trim();
413
+ }
414
+ if (!label) {
415
+ label = $field.closest('label').text().trim();
416
+ }
417
+ if (!label) {
418
+ label = placeholder || name || type;
419
+ }
420
+
421
+ // Extract validation rules
422
+ const validationRules: string[] = [];
423
+ if ($field.attr('pattern')) {
424
+ validationRules.push(`pattern:${$field.attr('pattern')}`);
425
+ }
426
+ if ($field.attr('minlength')) {
427
+ validationRules.push(`minlength:${$field.attr('minlength')}`);
428
+ }
429
+ if ($field.attr('maxlength')) {
430
+ validationRules.push(`maxlength:${$field.attr('maxlength')}`);
431
+ }
432
+ if ($field.attr('min')) {
433
+ validationRules.push(`min:${$field.attr('min')}`);
434
+ }
435
+ if ($field.attr('max')) {
436
+ validationRules.push(`max:${$field.attr('max')}`);
437
+ }
438
+
439
+ const intentId = this.assignIntentId(
440
+ tagName,
441
+ $field.attr('role'),
442
+ {
443
+ id,
444
+ name,
445
+ type,
446
+ placeholder,
447
+ 'aria-label': $field.attr('aria-label') || '',
448
+ },
449
+ label,
450
+ []
451
+ );
452
+
453
+ fields.push({
454
+ intentId: `${intentId.category}_ID:${intentId.purpose}`,
455
+ name,
456
+ type,
457
+ label,
458
+ required,
459
+ validationRules: validationRules.length > 0 ? validationRules : undefined,
460
+ placeholder: placeholder || undefined,
461
+ });
462
+ });
463
+
464
+ // Find submit button
465
+ const $submitBtn = $form.find('button[type="submit"], input[type="submit"]').first();
466
+ const submitButtonId = $submitBtn.length > 0 ? 'ACTION_ID:SUBMIT' : undefined;
467
+
468
+ const formIntentId = this.assignIntentId(
469
+ 'form',
470
+ $form.attr('role'),
471
+ {
472
+ id: $form.attr('id') || '',
473
+ name: $form.attr('name') || '',
474
+ action: $form.attr('action') || '',
475
+ },
476
+ '',
477
+ []
478
+ );
479
+
480
+ forms.push({
481
+ intentId: `${formIntentId.category}_ID:${formIntentId.purpose}`,
482
+ action: $form.attr('action') || undefined,
483
+ method: ($form.attr('method')?.toUpperCase() as 'GET' | 'POST') || 'POST',
484
+ fields,
485
+ submitButtonId,
486
+ });
487
+ });
488
+
489
+ return forms;
490
+ }
491
+
492
+
493
+ /**
494
+ * Extract navigation structure from the document
495
+ */
496
+ private extractNavigation($: cheerio.CheerioAPI): IntentNavigation {
497
+ const primaryLinks: IntentNavigation['primaryLinks'] = [];
498
+ const breadcrumbs: IntentNavigation['breadcrumbs'] = [];
499
+
500
+ // Extract primary navigation links
501
+ $('nav a, header a, [role="navigation"] a').each((_, el) => {
502
+ const $el = $(el);
503
+ const href = $el.attr('href');
504
+ const label = $el.text().trim();
505
+
506
+ if (href && label && !href.startsWith('#') && !href.startsWith('javascript:')) {
507
+ const intentId = this.assignIntentId(
508
+ 'a',
509
+ $el.attr('role'),
510
+ { href, id: $el.attr('id') || '' },
511
+ label,
512
+ []
513
+ );
514
+
515
+ primaryLinks.push({
516
+ intentId: `${intentId.category}_ID:${intentId.purpose}`,
517
+ label,
518
+ href,
519
+ });
520
+ }
521
+ });
522
+
523
+ // Extract breadcrumbs
524
+ $('[aria-label="breadcrumb"] a, .breadcrumb a, nav[aria-label*="breadcrumb"] a').each((_, el) => {
525
+ const $el = $(el);
526
+ breadcrumbs.push({
527
+ label: $el.text().trim(),
528
+ href: $el.attr('href'),
529
+ });
530
+ });
531
+
532
+ // Add current page to breadcrumbs if present
533
+ $('[aria-label="breadcrumb"] [aria-current], .breadcrumb [aria-current]').each((_, el) => {
534
+ breadcrumbs.push({
535
+ label: $(el).text().trim(),
536
+ });
537
+ });
538
+
539
+ return {
540
+ primaryLinks,
541
+ breadcrumbs: breadcrumbs.length > 0 ? breadcrumbs : undefined,
542
+ };
543
+ }
544
+
545
+ /**
546
+ * Extract semantic elements from the document
547
+ */
548
+ private extractElements($: cheerio.CheerioAPI): IntentElement[] {
549
+ const elements: IntentElement[] = [];
550
+ const processedIds = new Set<string>();
551
+
552
+ // Selectors for interactive/semantic elements
553
+ const selectors = [
554
+ 'button',
555
+ 'a[href]',
556
+ 'input',
557
+ 'textarea',
558
+ 'select',
559
+ '[role]',
560
+ '[onclick]',
561
+ '[data-action]',
562
+ 'h1, h2, h3, h4, h5, h6',
563
+ 'label',
564
+ 'nav',
565
+ 'main',
566
+ 'article',
567
+ 'section[aria-label]',
568
+ 'aside',
569
+ 'header',
570
+ 'footer',
571
+ ];
572
+
573
+ $(selectors.join(', ')).each((_, el) => {
574
+ const $el = $(el);
575
+ // Type guard for element nodes
576
+ if (!('tagName' in el)) {
577
+ return;
578
+ }
579
+ const tagName = (el.tagName as string).toLowerCase();
580
+
581
+ // Skip hidden elements
582
+ if ($el.attr('hidden') !== undefined || $el.attr('aria-hidden') === 'true') {
583
+ return;
584
+ }
585
+
586
+ // Skip elements inside forms (handled separately)
587
+ if (tagName !== 'form' && $el.closest('form').length > 0 && ['input', 'textarea', 'select'].includes(tagName)) {
588
+ return;
589
+ }
590
+
591
+ const id = $el.attr('id') || '';
592
+ const uniqueKey = id || `${tagName}-${$el.text().trim().slice(0, 50)}`;
593
+
594
+ // Skip duplicates
595
+ if (processedIds.has(uniqueKey)) {
596
+ return;
597
+ }
598
+ processedIds.add(uniqueKey);
599
+
600
+ const ariaRole = $el.attr('role');
601
+ const textContent = $el.text().trim().slice(0, 200); // Limit text length
602
+ const contextHints = this.extractContextHints($, el);
603
+
604
+ const attributes: Record<string, string> = {};
605
+ const attrNames = ['id', 'name', 'type', 'href', 'placeholder', 'aria-label', 'title', 'value'];
606
+ for (const attr of attrNames) {
607
+ const val = $el.attr(attr);
608
+ if (val) {
609
+ attributes[attr] = val;
610
+ }
611
+ }
612
+
613
+ const intentId = this.assignIntentId(tagName, ariaRole, attributes, textContent, contextHints);
614
+
615
+ // Determine role
616
+ let role: IntentElement['role'] = 'display';
617
+ if (intentId.category === 'ACTION') role = 'action';
618
+ else if (intentId.category === 'INPUT') role = 'input';
619
+ else if (intentId.category === 'NAV') role = 'navigation';
620
+
621
+ elements.push({
622
+ intentId: `${intentId.category}_ID:${intentId.purpose}`,
623
+ role,
624
+ label: textContent || attributes['aria-label'] || attributes.placeholder || tagName,
625
+ ariaRole,
626
+ contextHints,
627
+ tagName,
628
+ });
629
+ });
630
+
631
+ return elements;
632
+ }
633
+
634
+
635
+ /**
636
+ * Count tokens in a string (approximate)
637
+ * Uses a simple word-based approximation
638
+ */
639
+ countTokens(text: string): number {
640
+ // Approximate token count: ~4 characters per token on average
641
+ return Math.ceil(text.length / 4);
642
+ }
643
+
644
+ /**
645
+ * Normalize HTML to Intent Document
646
+ * Main entry point for the Semantic Normalizer
647
+ *
648
+ * Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 1.6
649
+ */
650
+ normalize(html: string, sourceUrl: string = ''): IntentDocument {
651
+ const startTime = performance.now();
652
+
653
+ // Count original tokens
654
+ const originalTokens = this.countTokens(html);
655
+
656
+ // Strip noise (Requirement 1.2)
657
+ const cleanedHtml = this.stripNoise(html);
658
+
659
+ // Parse cleaned HTML
660
+ const $ = cheerio.load(cleanedHtml);
661
+
662
+ // Extract semantic elements (Requirement 1.1, 1.3, 1.5)
663
+ const elements = this.extractElements($);
664
+
665
+ // Extract forms (Requirement 1.6)
666
+ const forms = this.extractForms($);
667
+
668
+ // Extract navigation
669
+ const navigation = this.extractNavigation($);
670
+
671
+ // Build the Intent Document
672
+ const document: IntentDocument = {
673
+ elements,
674
+ forms,
675
+ navigation,
676
+ buildTimeMs: 0, // Will be set below
677
+ tokenReduction: 0, // Will be set below
678
+ sourceUrl,
679
+ createdAt: Date.now(),
680
+ };
681
+
682
+ // Calculate metrics
683
+ const resultJson = JSON.stringify(document);
684
+ const resultTokens = this.countTokens(resultJson);
685
+ const buildTimeMs = performance.now() - startTime;
686
+
687
+ // Calculate token reduction percentage
688
+ const tokenReduction = originalTokens > 0 ? ((originalTokens - resultTokens) / originalTokens) * 100 : 0;
689
+
690
+ document.buildTimeMs = Math.round(buildTimeMs * 100) / 100;
691
+ document.tokenReduction = Math.round(tokenReduction * 100) / 100;
692
+
693
+ return document;
694
+ }
695
+ }
696
+
697
+ /**
698
+ * Create a new Semantic Normalizer instance
699
+ */
700
+ export function createSemanticNormalizer(): SemanticNormalizer {
701
+ return new SemanticNormalizer();
702
+ }