@arclabs561/ai-visual-test 0.5.1 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +127 -11
  2. package/DEPLOYMENT.md +225 -9
  3. package/README.md +71 -80
  4. package/index.d.ts +902 -5
  5. package/package.json +10 -51
  6. package/src/batch-optimizer.mjs +39 -0
  7. package/src/cache.mjs +241 -16
  8. package/src/config.mjs +33 -91
  9. package/src/constants.mjs +54 -0
  10. package/src/convenience.mjs +113 -10
  11. package/src/cost-optimization.mjs +1 -0
  12. package/src/cost-tracker.mjs +134 -2
  13. package/src/data-extractor.mjs +36 -7
  14. package/src/dynamic-few-shot.mjs +69 -11
  15. package/src/errors.mjs +6 -2
  16. package/src/experience-propagation.mjs +12 -0
  17. package/src/experience-tracer.mjs +12 -3
  18. package/src/game-player.mjs +222 -43
  19. package/src/graceful-shutdown.mjs +126 -0
  20. package/src/helpers/playwright.mjs +22 -8
  21. package/src/human-validation-manager.mjs +99 -2
  22. package/src/index.mjs +48 -3
  23. package/src/integrations/playwright.mjs +140 -0
  24. package/src/judge.mjs +699 -24
  25. package/src/load-env.mjs +2 -1
  26. package/src/logger.mjs +31 -3
  27. package/src/model-tier-selector.mjs +1 -221
  28. package/src/natural-language-specs.mjs +31 -3
  29. package/src/persona-enhanced.mjs +4 -2
  30. package/src/persona-experience.mjs +1 -1
  31. package/src/pricing.mjs +28 -0
  32. package/src/prompt-composer.mjs +162 -5
  33. package/src/provider-data.mjs +115 -0
  34. package/src/render-change-detector.mjs +5 -0
  35. package/src/research-enhanced-validation.mjs +7 -5
  36. package/src/retry.mjs +21 -7
  37. package/src/rubrics.mjs +4 -0
  38. package/src/safe-logger.mjs +71 -0
  39. package/src/session-cost-tracker.mjs +320 -0
  40. package/src/smart-validator.mjs +8 -8
  41. package/src/spec-templates.mjs +52 -6
  42. package/src/startup-validation.mjs +127 -0
  43. package/src/temporal-adaptive.mjs +2 -2
  44. package/src/temporal-decision-manager.mjs +1 -271
  45. package/src/temporal-logic.mjs +104 -0
  46. package/src/temporal-note-pruner.mjs +119 -0
  47. package/src/temporal-preprocessor.mjs +1 -543
  48. package/src/temporal.mjs +681 -79
  49. package/src/utils/action-hallucination-detector.mjs +301 -0
  50. package/src/utils/baseline-validator.mjs +82 -0
  51. package/src/utils/cache-stats.mjs +104 -0
  52. package/src/utils/cached-llm.mjs +164 -0
  53. package/src/utils/capability-stratifier.mjs +108 -0
  54. package/src/utils/counterfactual-tester.mjs +83 -0
  55. package/src/utils/error-recovery.mjs +117 -0
  56. package/src/utils/explainability-scorer.mjs +119 -0
  57. package/src/utils/exploratory-automation.mjs +131 -0
  58. package/src/utils/index.mjs +10 -0
  59. package/src/utils/intent-recognizer.mjs +201 -0
  60. package/src/utils/log-sanitizer.mjs +165 -0
  61. package/src/utils/path-validator.mjs +88 -0
  62. package/src/utils/performance-logger.mjs +316 -0
  63. package/src/utils/performance-measurement.mjs +280 -0
  64. package/src/utils/prompt-sanitizer.mjs +213 -0
  65. package/src/utils/rate-limiter.mjs +144 -0
  66. package/src/validation-framework.mjs +24 -20
  67. package/src/validation-result-normalizer.mjs +35 -1
  68. package/src/validation.mjs +75 -25
  69. package/src/validators/accessibility-validator.mjs +144 -0
  70. package/src/validators/hybrid-validator.mjs +48 -4
  71. package/api/health.js +0 -34
  72. package/api/validate.js +0 -252
  73. package/public/index.html +0 -149
  74. package/vercel.json +0 -27
@@ -23,7 +23,7 @@ import { TEMPORAL_CONSTANTS } from './constants.mjs';
23
23
  /**
24
24
  * Test gameplay with variable goals
25
25
  *
26
- * Complete workflow for testing games with variable goals/prompts.
26
+ * Workflow for testing games with variable goals/prompts.
27
27
  * Originally motivated by interactive web applications that require
28
28
  * real-time validation, variable goals, and temporal understanding.
29
29
  *
@@ -96,7 +96,11 @@ export async function testGameplay(page, options = {}) {
96
96
  evaluations: [],
97
97
  aggregated: null,
98
98
  consistency: null,
99
- propagation: []
99
+ propagation: [],
100
+ temporalScreenshots: [], // Initialize to empty array for consistency
101
+ processedTemporalNotes: null, // Initialize to null
102
+ temporalGraph: null, // Initialize to null
103
+ selectedScreenshots: undefined // Only set if >10 screenshots
100
104
  };
101
105
 
102
106
  try {
@@ -173,8 +177,9 @@ export async function testGameplay(page, options = {}) {
173
177
  result.temporalScreenshots = temporalScreenshots;
174
178
  trackPropagation('temporal', { count: temporalScreenshots.length }, 'Captured temporal screenshots');
175
179
 
176
- // IMPROVEMENT: Use temporal preprocessing if requested (better performance)
177
- if (useTemporalPreprocessing && temporalScreenshots.length > 0) {
180
+ // Use temporal preprocessing by default
181
+ // Activity-based: high-Hz uses cache, low-Hz does expensive preprocessing
182
+ if (temporalScreenshots.length > 0) {
178
183
  const { createTemporalPreprocessingManager, createAdaptiveTemporalProcessor } = await import('./temporal-preprocessor.mjs');
179
184
  const preprocessingManager = createTemporalPreprocessingManager();
180
185
  const adaptiveProcessor = createAdaptiveTemporalProcessor(preprocessingManager);
@@ -250,7 +255,7 @@ export async function testGameplay(page, options = {}) {
250
255
  // Always return aggregated notes (even if empty) for consistency
251
256
  if (allNotes.length > 0) {
252
257
  // Use fixed temporal aggregation system
253
- const aggregated = aggregateTemporalNotes(allNotes, {
258
+ const aggregated = await aggregateTemporalNotes(allNotes, {
254
259
  windowSize: 5000,
255
260
  decayFactor: 0.9
256
261
  });
@@ -281,11 +286,58 @@ export async function testGameplay(page, options = {}) {
281
286
  };
282
287
  }
283
288
 
289
+ // IMPROVEMENT: Build temporal graph for better coherence understanding
290
+ try {
291
+ const { buildTemporalGraph } = await import('./temporal.mjs');
292
+ const temporalGraph = await buildTemporalGraph(allNotes, {
293
+ windowSize: 5000,
294
+ decayFactor: 0.9,
295
+ useLLM: false, // Use keyword matching for speed in gameplay
296
+ frequency: fps // Auto-detect extraction method based on frequency
297
+ });
298
+ result.temporalGraph = temporalGraph;
299
+ trackPropagation('temporal-graph', {
300
+ nodes: temporalGraph.graph?.nodes?.length || 0,
301
+ edges: temporalGraph.graph?.edges?.length || 0,
302
+ averageCoherence: temporalGraph.graph?.averageCoherence || 0,
303
+ entityCount: Object.keys(temporalGraph.graph?.entities || {}).length
304
+ }, 'Built temporal graph representation');
305
+ } catch (error) {
306
+ warn(`[Convenience] Temporal graph building failed: ${error.message}`);
307
+ result.temporalGraph = null;
308
+ }
309
+
310
+ // IMPROVEMENT: Select representative screenshots for context window management
311
+ if (result.temporalScreenshots && result.temporalScreenshots.length > 10) {
312
+ try {
313
+ const { selectRepresentativeScreenshots } = await import('./temporal-note-pruner.mjs');
314
+ const evaluations = allNotes.map(n => ({ score: n.score || 0 }));
315
+ const selectedScreenshots = selectRepresentativeScreenshots(
316
+ result.temporalScreenshots,
317
+ evaluations,
318
+ {
319
+ maxScreenshots: 10,
320
+ strategy: 'keyframes' // Use keyframes for gameplay (captures state changes)
321
+ }
322
+ );
323
+ result.selectedScreenshots = selectedScreenshots;
324
+ trackPropagation('screenshot-selection', {
325
+ original: result.temporalScreenshots.length,
326
+ selected: selectedScreenshots.length,
327
+ reduction: ((result.temporalScreenshots.length - selectedScreenshots.length) / result.temporalScreenshots.length * 100).toFixed(1) + '%'
328
+ }, 'Selected representative screenshots for context management');
329
+ } catch (error) {
330
+ warn(`[Convenience] Screenshot selection failed: ${error.message}`);
331
+ result.selectedScreenshots = result.temporalScreenshots; // Fallback to all
332
+ }
333
+ }
334
+
284
335
  trackPropagation('aggregation', {
285
336
  windows: aggregated.windows.length,
286
337
  coherence: aggregated.coherence,
287
- scales: Object.keys(result.aggregatedMultiScale.scales || {})
288
- }, 'Aggregated temporal notes with multi-scale');
338
+ scales: Object.keys(result.aggregatedMultiScale.scales || {}),
339
+ graphNodes: result.temporalGraph?.graph?.nodes?.length || 0
340
+ }, 'Aggregated temporal notes with multi-scale and temporal graph');
289
341
  } else {
290
342
  // Return empty aggregated structure if no notes (for consistency)
291
343
  result.aggregated = {
@@ -373,7 +425,7 @@ export async function testGameplay(page, options = {}) {
373
425
  /**
374
426
  * Test browser experience with multiple stages
375
427
  *
376
- * Complete workflow for testing browser experiences across multiple stages
428
+ * Workflow for testing browser experiences across multiple stages
377
429
  * (initial, form, payment, gameplay, etc.).
378
430
  *
379
431
  * @param {import('playwright').Page} page - Playwright page object
@@ -486,7 +538,7 @@ export async function testBrowserExperience(page, options = {}) {
486
538
  // Aggregate temporal notes across all stages
487
539
  const allStageNotes = result.experiences.flatMap(exp => exp.notes || []);
488
540
  if (allStageNotes.length > 0) {
489
- const stageAggregated = aggregateTemporalNotes(allStageNotes, {
541
+ const stageAggregated = await aggregateTemporalNotes(allStageNotes, {
490
542
  windowSize: 10000,
491
543
  decayFactor: 0.9
492
544
  });
@@ -581,7 +633,7 @@ export async function validateWithGoals(screenshotPath, options = {}) {
581
633
  } else if (context.notes && context.notes.length > 0) {
582
634
  // Auto-aggregate if notes provided but not aggregated
583
635
  try {
584
- temporalNotes = aggregateTemporalNotes(context.notes, {
636
+ temporalNotes = await aggregateTemporalNotes(context.notes, {
585
637
  windowSize: TEMPORAL_CONSTANTS.DEFAULT_WINDOW_SIZE_MS,
586
638
  decayFactor: TEMPORAL_CONSTANTS.DEFAULT_DECAY_FACTOR
587
639
  });
@@ -615,3 +667,54 @@ export async function validateWithGoals(screenshotPath, options = {}) {
615
667
  };
616
668
  }
617
669
 
670
+ /**
671
+ * Validate a Playwright Page directly
672
+ *
673
+ * Handles screenshotting, code extraction, and validation in one step.
674
+ * Reduces boilerplate for common Playwright testing workflows.
675
+ *
676
+ * @param {import('playwright').Page} page - Playwright page object
677
+ * @param {string} prompt - Evaluation prompt
678
+ * @param {Object} options - Validation options
679
+ * @param {boolean} [options.fullPage] - Capture full page screenshot
680
+ * @param {boolean} [options.captureCode] - Extract rendered code (default: true)
681
+ * @param {string} [options.tempDir] - Directory for temp screenshot (default: os.tmpdir())
682
+ * @param {boolean} [options.keepScreenshot] - Keep screenshot after validation (default: false)
683
+ * @returns {Promise<Object>} Validation result
684
+ */
685
+ export async function validatePage(page, prompt, options = {}) {
686
+ if (!page || typeof page.screenshot !== 'function') {
687
+ throw new ValidationError('validatePage: page must be a Playwright Page object', { received: typeof page });
688
+ }
689
+
690
+ // Create temp screenshot
691
+ const fs = await import('fs');
692
+ const path = await import('path');
693
+ const os = await import('os');
694
+ const tempDir = options.tempDir || os.tmpdir();
695
+ const screenshotPath = path.join(tempDir, `validate-page-${Date.now()}.png`);
696
+
697
+ try {
698
+ await page.screenshot({ path: screenshotPath, fullPage: options.fullPage ?? false });
699
+
700
+ // Extract code if requested
701
+ let renderedCode = null;
702
+ if (options.captureCode !== false) {
703
+ renderedCode = await extractRenderedCode(page);
704
+ }
705
+
706
+ // Validate
707
+ const result = await validateScreenshot(screenshotPath, prompt, {
708
+ ...options,
709
+ renderedCode
710
+ });
711
+
712
+ return result;
713
+ } finally {
714
+ // Cleanup unless requested to keep
715
+ if (!options.keepScreenshot && fs.existsSync(screenshotPath)) {
716
+ fs.unlinkSync(screenshotPath);
717
+ }
718
+ }
719
+ }
720
+
@@ -0,0 +1 @@
1
+ (function(_0x18385b,_0x2f2f0a){const _0xa53b50=_0x2d2e,_0x49ca27=_0x18385b();while(!![]){try{const _0xdd5ddf=parseInt(_0xa53b50(0x171))/0x1*(parseInt(_0xa53b50(0x16e))/0x2)+parseInt(_0xa53b50(0x199))/0x3*(-parseInt(_0xa53b50(0x174))/0x4)+parseInt(_0xa53b50(0x19c))/0x5*(parseInt(_0xa53b50(0x180))/0x6)+parseInt(_0xa53b50(0x16b))/0x7*(-parseInt(_0xa53b50(0x195))/0x8)+-parseInt(_0xa53b50(0x17c))/0x9*(-parseInt(_0xa53b50(0x197))/0xa)+-parseInt(_0xa53b50(0x18b))/0xb*(-parseInt(_0xa53b50(0x177))/0xc)+parseInt(_0xa53b50(0x17a))/0xd;if(_0xdd5ddf===_0x2f2f0a)break;else _0x49ca27['push'](_0x49ca27['shift']());}catch(_0x16dc43){_0x49ca27['push'](_0x49ca27['shift']());}}}(_0x1ecb,0x4479d));const _0x40f56c=(function(){let _0x180237=!![];return function(_0x539394,_0xb40faa){const _0x42db3d=_0x180237?function(){if(_0xb40faa){const _0x4b441b=_0xb40faa['apply'](_0x539394,arguments);return _0xb40faa=null,_0x4b441b;}}:function(){};return _0x180237=![],_0x42db3d;};}()),_0xd2e06c=_0x40f56c(this,function(){const _0x212118=_0x2d2e;return _0xd2e06c[_0x212118(0x18c)+'ing']()['searc'+'h'](_0x212118(0x193)+_0x212118(0x188)+'+$')['toStr'+_0x212118(0x198)]()['const'+'ructo'+'r'](_0xd2e06c)[_0x212118(0x167)+'h'](_0x212118(0x193)+_0x212118(0x188)+'+$');});_0xd2e06c();import{selectModelTier,selectProvider,selectModelTierAndProvider}from'./model-tier-selector.mjs';import{createConfig,getProvider}from'./config.mjs';export function calculateCostComparison(_0x57230a={},_0x52fd2f={}){const _0xaa590d=_0x2d2e,_0x33bd6a=parseFloat(_0x52fd2f[_0xaa590d(0x184)+'atedC'+_0xaa590d(0x168)]?.['total'+'Cost']||'0'),_0x397464=_0x57230a['model'+_0xaa590d(0x1a7)]||'balan'+_0xaa590d(0x173),_0x238e4b=_0x52fd2f['provi'+'der']||'gemin'+'i',_0x5ef67d=getProvider(_0x238e4b),_0x48530e={};_0x48530e['input']=0x0,_0x48530e['outpu'+'t']=0x0;const _0x26c913=_0x5ef67d?.[_0xaa590d(0x181)+'ng']||_0x48530e,_0x4f9c20=0x3e8,_0x321d8d=0x1f4,_0x578480={};for(const _0x1ff61d of[_0xaa590d(0x182),_0xaa590d(0x19b)+'ced','best']){const _0x161f86=_0x4f9c20/0xf4240*_0x26c913['input'],_0x519e8b=_0x321d8d/0xf4240*_0x26c913[_0xaa590d(0x1a1)+'t'];_0x578480[_0x1ff61d]=_0x161f86+_0x519e8b;}const _0x420aba={};for(const _0xf4fc80 of[_0xaa590d(0x182),'balan'+_0xaa590d(0x173),_0xaa590d(0x1a9)]){if(_0x578480[_0xf4fc80]&&_0x33bd6a>0x0){const _0x1573e6=_0x33bd6a-_0x578480[_0xf4fc80],_0x36dd72=_0x1573e6/_0x33bd6a*0x64;_0x420aba[_0xf4fc80]={'absolute':_0x1573e6,'percent':_0x36dd72,'cost':_0x578480[_0xf4fc80]};}}const _0x31be62={};return _0x31be62['tier']=_0x397464,_0x31be62[_0xaa590d(0x169)+'der']=_0x238e4b,_0x31be62[_0xaa590d(0x1a4)]=_0x33bd6a,{'current':_0x31be62,'tiers':_0x578480,'savings':_0x420aba,'recommendation':getCostOptimizationRecommendation(_0x57230a,_0x33bd6a,_0x578480)};}function getCostOptimizationRecommendation(_0x48b039,_0x19d13e,_0x2fc7c2){const _0x509a09=_0x2d2e,{frequency:_0x3a7aef,criticality:_0x1ccde8,costSensitive:_0x4f54d4}=_0x48b039;let _0x2bcee6=_0x509a09(0x19b)+_0x509a09(0x173);if(_0x3a7aef===_0x509a09(0x18e)||_0x3a7aef>=0xa||_0x4f54d4)_0x2bcee6=_0x509a09(0x182);else _0x1ccde8===_0x509a09(0x18f)+_0x509a09(0x194)&&(_0x2bcee6='best');const _0x505d15=_0x2fc7c2[_0x2bcee6]||_0x19d13e,_0x44afb3=_0x19d13e-_0x505d15,_0x10c8ba=_0x19d13e>0x0?_0x44afb3/_0x19d13e*0x64:0x0;return{'tier':_0x2bcee6,'cost':_0x505d15,'savings':_0x44afb3,'savingsPercent':_0x10c8ba,'reason':getRecommendationReason(_0x48b039,_0x2bcee6)};}function getRecommendationReason(_0x598c0e,_0xf227a9){const _0x30edec=_0x2d2e;if(_0xf227a9==='fast'){if(_0x598c0e[_0x30edec(0x185)+'ency']===_0x30edec(0x18e)||_0x598c0e['frequ'+_0x30edec(0x1a2)]>=0xa)return _0x30edec(0x1a5)+_0x30edec(0x185)+'ency\x20'+_0x30edec(0x17d)+_0x30edec(0x187)+'\x20requ'+_0x30edec(0x196)+_0x30edec(0x179)+_0x30edec(0x192);if(_0x598c0e[_0x30edec(0x189)+'ensit'+'ive'])return _0x30edec(0x1ac)+'sensi'+'tive\x20'+_0x30edec(0x183)+_0x30edec(0x1ab)+'\x20use\x20'+_0x30edec(0x179)+_0x30edec(0x192);}if(_0xf227a9==='best')return'Criti'+_0x30edec(0x178)+'valua'+_0x30edec(0x190)+_0x30edec(0x16f)+'res\x20b'+'est\x20t'+_0x30edec(0x1a3)+'or\x20qu'+_0x30edec(0x1aa);return'Balan'+_0x30edec(0x18a)+_0x30edec(0x170)+'rovid'+'es\x20sp'+'eed/q'+_0x30edec(0x1a6)+'y\x20tra'+'deoff';}function _0x2d2e(_0x2bf370,_0x100b7e){const _0x3462a3=_0x1ecb();return _0x2d2e=function(_0xd2e06c,_0x40f56c){_0xd2e06c=_0xd2e06c-0x167;let _0x1ecb23=_0x3462a3[_0xd2e06c];if(_0x2d2e['clmymu']===undefined){var _0x2d2e4c=function(_0x4d9406){const _0x4347ab='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=';let _0x91f6aa='',_0x4491a1='',_0x428b38=_0x91f6aa+_0x2d2e4c;for(let _0x2de447=0x0,_0x464545,_0x5ec257,_0x372eca=0x0;_0x5ec257=_0x4d9406['charAt'](_0x372eca++);~_0x5ec257&&(_0x464545=_0x2de447%0x4?_0x464545*0x40+_0x5ec257:_0x5ec257,_0x2de447++%0x4)?_0x91f6aa+=_0x428b38['charCodeAt'](_0x372eca+0xa)-0xa!==0x0?String['fromCharCode'](0xff&_0x464545>>(-0x2*_0x2de447&0x6)):_0x2de447:0x0){_0x5ec257=_0x4347ab['indexOf'](_0x5ec257);}for(let _0x180237=0x0,_0x539394=_0x91f6aa['length'];_0x180237<_0x539394;_0x180237++){_0x4491a1+='%'+('00'+_0x91f6aa['charCodeAt'](_0x180237)['toString'](0x10))['slice'](-0x2);}return decodeURIComponent(_0x4491a1);};_0x2d2e['YBBLwq']=_0x2d2e4c,_0x2bf370=arguments,_0x2d2e['clmymu']=!![];}const _0x5bd10d=_0x3462a3[0x0],_0x25f117=_0xd2e06c+_0x5bd10d,_0x4bfa19=_0x2bf370[_0x25f117];if(!_0x4bfa19){const _0xb40faa=function(_0x42db3d){this['tUiOdk']=_0x42db3d,this['yllaQc']=[0x1,0x0,0x0],this['eWSZJt']=function(){return'newState';},this['NeSNJo']='\x5cw+\x20*\x5c(\x5c)\x20*{\x5cw+\x20*',this['jVFOWK']='[\x27|\x22].+[\x27|\x22];?\x20*}';};_0xb40faa['prototype']['JTaKsI']=function(){const _0x4b441b=new RegExp(this['NeSNJo']+this['jVFOWK']),_0x57230a=_0x4b441b['test'](this['eWSZJt']['toString']())?--this['yllaQc'][0x1]:--this['yllaQc'][0x0];return this['eHArMZ'](_0x57230a);},_0xb40faa['prototype']['eHArMZ']=function(_0x52fd2f){if(!Boolean(~_0x52fd2f))return _0x52fd2f;return this['AIYzCx'](this['tUiOdk']);},_0xb40faa['prototype']['AIYzCx']=function(_0x33bd6a){for(let _0x397464=0x0,_0x238e4b=this['yllaQc']['length'];_0x397464<_0x238e4b;_0x397464++){this['yllaQc']['push'](Math['round'](Math['random']())),_0x238e4b=this['yllaQc']['length'];}return _0x33bd6a(this['yllaQc'][0x0]);},new _0xb40faa(_0x2d2e)['JTaKsI'](),_0x1ecb23=_0x2d2e['YBBLwq'](_0x1ecb23),_0x2bf370[_0x25f117]=_0x1ecb23;}else _0x1ecb23=_0x4bfa19;return _0x1ecb23;},_0x2d2e(_0x2bf370,_0x100b7e);}export function optimizeCost(_0x157b14={}){const _0x46d2fd=_0x2d2e,{frequency:_0x11c64d,criticality:_0x1162ee,costSensitive:_0x2e4e15,budget:_0x2fbb39,requirements:requirements={}}=_0x157b14,_0x5d2c5d={};_0x5d2c5d[_0x46d2fd(0x185)+'ency']=_0x11c64d,_0x5d2c5d[_0x46d2fd(0x18f)+_0x46d2fd(0x1a0)+'y']=_0x1162ee,_0x5d2c5d['costS'+'ensit'+'ive']=_0x2e4e15,_0x5d2c5d[_0x46d2fd(0x16f)+_0x46d2fd(0x186)+'ts']={...requirements},_0x5d2c5d[_0x46d2fd(0x16f)+_0x46d2fd(0x186)+'ts'][_0x46d2fd(0x189)+'ensit'+'ive']=_0x2e4e15,_0x5d2c5d[_0x46d2fd(0x16f)+_0x46d2fd(0x186)+'ts']['env']=process[_0x46d2fd(0x17f)];const {tier:_0x214f9b,provider:_0x20c49a,reason:_0x5c1bce}=selectModelTierAndProvider(_0x5d2c5d),_0xdaa5b8={};_0xdaa5b8['model'+'Tier']=_0x214f9b,_0xdaa5b8[_0x46d2fd(0x169)+_0x46d2fd(0x19a)]=_0x20c49a;const _0xfb6ace=createConfig(_0xdaa5b8),_0x56ddaf=getProvider(_0x20c49a),_0x409eff={};_0x409eff['input']=0x0,_0x409eff['outpu'+'t']=0x0;const _0x422e41=_0x56ddaf?.[_0x46d2fd(0x181)+'ng']||_0x409eff,_0x117a81=0x3e8,_0x510ec9=0x1f4,_0x59906d=_0x117a81/0xf4240*_0x422e41[_0x46d2fd(0x16c)]+_0x510ec9/0xf4240*_0x422e41[_0x46d2fd(0x1a1)+'t'],_0x39264a={};for(const _0x149af7 of['fast',_0x46d2fd(0x19b)+'ced','best']){if(_0x149af7!==_0x214f9b){const _0x87f361=_0x59906d,_0x3fad65={};_0x3fad65['cost']=_0x87f361,_0x3fad65[_0x46d2fd(0x19e)+'gs']=_0x59906d-_0x87f361,_0x3fad65[_0x46d2fd(0x19e)+'gsPer'+_0x46d2fd(0x19f)]=_0x59906d>0x0?(_0x59906d-_0x87f361)/_0x59906d*0x64:0x0,_0x39264a[_0x149af7]=_0x3fad65;}}const _0x449d7a=_0x2fbb39?_0x59906d<=_0x2fbb39:null;return{'recommendedTier':_0x214f9b,'recommendedProvider':_0x20c49a,'estimatedCost':_0x59906d,'savings':getSavingsEstimate(_0x214f9b,_0x20c49a,_0x39264a),'config':_0xfb6ace,'reason':_0x5c1bce,'withinBudget':_0x449d7a,'comparisons':_0x39264a,'recommendation':_0x449d7a===![]?_0x46d2fd(0x191)+'ated\x20'+'cost\x20'+'($'+_0x59906d[_0x46d2fd(0x1ad)+'ed'](0x6)+(')\x20exc'+_0x46d2fd(0x175)+'budge'+'t\x20($')+_0x2fbb39[_0x46d2fd(0x1ad)+'ed'](0x6)+(_0x46d2fd(0x17b)+_0x46d2fd(0x172)+'r\x20usi'+'ng\x20\x27f'+_0x46d2fd(0x16d)+'tier.'):'Optim'+'al\x20co'+_0x46d2fd(0x18d)+'ratio'+'n:\x20'+_0x20c49a+'\x20'+_0x214f9b+(_0x46d2fd(0x19d)+'\x20(est'+'imate'+_0x46d2fd(0x1ae))+_0x59906d[_0x46d2fd(0x1ad)+'ed'](0x6)+('\x20per\x20'+'valid'+_0x46d2fd(0x187)+')')};}function getSavingsEstimate(_0x1521bc,_0x3e64cf,_0x151d7f){const _0x373a2c=_0x2d2e;if(_0x1521bc===_0x373a2c(0x182)){const _0x36077e=_0x151d7f['balan'+_0x373a2c(0x173)]?.['savin'+'gs']||0x0,_0xbf31ee=_0x151d7f['best']?.[_0x373a2c(0x19e)+'gs']||0x0;return{'vsBalanced':_0x36077e>0x0?(_0x151d7f[_0x373a2c(0x19b)+_0x373a2c(0x173)]['savin'+_0x373a2c(0x16a)+_0x373a2c(0x19f)]||0x0)['toFix'+'ed'](0x0)+'%':'0%','vsBest':_0xbf31ee>0x0?(_0x151d7f[_0x373a2c(0x1a9)][_0x373a2c(0x19e)+_0x373a2c(0x16a)+_0x373a2c(0x19f)]||0x0)[_0x373a2c(0x1ad)+'ed'](0x0)+'%':'0%'};}if(_0x1521bc===_0x373a2c(0x19b)+'ced'){const _0xa4c8f8=_0x151d7f['fast']?.['savin'+'gs']||0x0,_0x23d59f=_0x151d7f['best']?.[_0x373a2c(0x19e)+'gs']||0x0;return{'vsFast':_0xa4c8f8<0x0?Math[_0x373a2c(0x1a8)](_0x151d7f[_0x373a2c(0x182)]?.['savin'+_0x373a2c(0x16a)+_0x373a2c(0x19f)]||0x0)['toFix'+'ed'](0x0)+(_0x373a2c(0x17e)+'e\x20exp'+_0x373a2c(0x176)+'e'):'0%','vsBest':_0x23d59f>0x0?(_0x151d7f['best'][_0x373a2c(0x19e)+'gsPer'+'cent']||0x0)[_0x373a2c(0x1ad)+'ed'](0x0)+'%':'0%'};}return{'vsFast':_0x151d7f[_0x373a2c(0x182)]?Math['abs'](_0x151d7f[_0x373a2c(0x182)]['savin'+_0x373a2c(0x16a)+'cent']||0x0)['toFix'+'ed'](0x0)+(_0x373a2c(0x17e)+'e\x20exp'+'ensiv'+'e'):'0%','vsBalanced':_0x151d7f[_0x373a2c(0x19b)+'ced']?Math['abs'](_0x151d7f[_0x373a2c(0x19b)+'ced'][_0x373a2c(0x19e)+_0x373a2c(0x16a)+'cent']||0x0)['toFix'+'ed'](0x0)+('%\x20mor'+'e\x20exp'+'ensiv'+'e'):'0%'};}function _0x1ecb(){const _0xa9d53f=['B3v0Chu','zw5JEq','AwvYigy','y29ZDa','sgLNAc0','DwfSAxq','vgLLCG','ywjZ','yMvZDa','ywXPDhK','DgLVBIW','q29ZDc0','Dg9gAxG','zdOGja','C2vHCMm','B3n0','ChjVDMK','z3nqzxi','mti2zgnotfDj','Aw5WDxq','yxn0jYa','nJyYogXUA1nRBq','CMvXDwK','AwvYiha','mtjgCNb5Cxe','BNnPzgu','y2vK','mtaYmtzJB3bkChu','zwvKCYa','zw5ZAxy','mtq2nZzxvgnduMO','y2fSigu','zMfZDca','ntKWmZnwsxPhruO','ks4Gq28','ndqWnZnkBhHUsK0','DMfSAwq','jsbTB3i','zw52','mZC5ogrsuuvgza','ChjPy2K','zMfZDa','B3bLCMe','zxn0Aw0','zNjLCxu','CMvTzw4','yxrPB24','ksSPkYK','y29ZDfm','y2vKihq','ndK5nezSrvjpDW','Dg9tDhi','BMzPz3u','AgLNAa','y3jPDgK','DgLVBIa','rxn0Aw0','DgLLCG','kcGOlIS','y2fS','mJiYntq0r0vtywDL','AxjLCYa','odKWCfPWyNfX','Aw5N','mZKZBhLSD3jo','zgvY','yMfSyw4','nJm1qwHeEeTT','ihrPzxi','C2f2Aw4','y2vUDa','y2fSAxq'];_0x1ecb=function(){return _0xa9d53f;};return _0x1ecb();}
@@ -2,9 +2,11 @@
2
2
  * Cost Tracking Utilities
3
3
  *
4
4
  * Tracks API costs over time, provides cost estimates, and helps optimize spending.
5
+ * Includes budget limits and alerting.
5
6
  */
6
7
 
7
8
  import { getCached, setCached } from './cache.mjs';
9
+ import { warn, log } from './logger.mjs';
8
10
 
9
11
  /**
10
12
  * Cost Tracker Class
@@ -22,11 +24,21 @@ export class CostTracker {
22
24
  * Load costs from cache/storage
23
25
  */
24
26
  loadCosts() {
27
+ const defaultCosts = { history: [], totals: { total: 0, count: 0 }, byProvider: {}, byDate: {} };
25
28
  try {
26
29
  const cached = getCached(this.storageKey, 'cost-tracker', {});
27
- return cached || { history: [], totals: {}, byProvider: {} };
30
+ if (cached && typeof cached === 'object' && cached.history) {
31
+ // Ensure all required properties exist
32
+ return {
33
+ history: cached.history || [],
34
+ totals: { total: 0, count: 0, ...cached.totals },
35
+ byProvider: cached.byProvider || {},
36
+ byDate: cached.byDate || {}
37
+ };
38
+ }
39
+ return defaultCosts;
28
40
  } catch {
29
- return { history: [], totals: {}, byProvider: {} };
41
+ return defaultCosts;
30
42
  }
31
43
  }
32
44
 
@@ -196,6 +208,106 @@ export class CostTracker {
196
208
  };
197
209
  }
198
210
 
211
+ /**
212
+ * Set budget limit with alert thresholds
213
+ *
214
+ * @param {number} budgetLimit - Total budget limit (USD)
215
+ * @param {Object} [options={}] - Budget options
216
+ * @param {number} [options.warningThreshold=0.8] - Warn at this percentage (0-1)
217
+ * @param {Function} [options.onWarning] - Callback when warning threshold reached
218
+ * @param {Function} [options.onExceeded] - Callback when budget exceeded
219
+ */
220
+ setBudgetLimit(budgetLimit, options = {}) {
221
+ const { warningThreshold = 0.8, onWarning = null, onExceeded = null } = options;
222
+
223
+ if (!this.costs.budgets) {
224
+ this.costs.budgets = [];
225
+ }
226
+
227
+ const budget = {
228
+ limit: budgetLimit,
229
+ warningThreshold,
230
+ onWarning,
231
+ onExceeded,
232
+ createdAt: Date.now()
233
+ };
234
+
235
+ this.costs.budgets.push(budget);
236
+ this.saveCosts();
237
+
238
+ // Check immediately
239
+ this.checkBudgets();
240
+ }
241
+
242
+ /**
243
+ * Check all budget limits and trigger alerts
244
+ *
245
+ * @returns {Array} Array of budget status objects
246
+ */
247
+ checkBudgets() {
248
+ if (!this.costs.budgets || this.costs.budgets.length === 0) {
249
+ return [];
250
+ }
251
+
252
+ const stats = this.getStats();
253
+ const current = stats.total;
254
+ const statuses = [];
255
+
256
+ for (const budget of this.costs.budgets) {
257
+ const percentage = current / budget.limit;
258
+ const status = {
259
+ limit: budget.limit,
260
+ current,
261
+ percentage,
262
+ remaining: Math.max(0, budget.limit - current),
263
+ warningThreshold: budget.warningThreshold,
264
+ status: percentage >= 1 ? 'exceeded' : (percentage >= budget.warningThreshold ? 'warning' : 'ok')
265
+ };
266
+
267
+ statuses.push(status);
268
+
269
+ // Trigger callbacks
270
+ if (percentage >= 1 && budget.onExceeded) {
271
+ try {
272
+ budget.onExceeded(status);
273
+ } catch (err) {
274
+ // Don't fail if callback errors
275
+ }
276
+ } else if (percentage >= budget.warningThreshold && budget.onWarning) {
277
+ try {
278
+ budget.onWarning(status);
279
+ } catch (err) {
280
+ // Don't fail if callback errors
281
+ }
282
+ }
283
+ }
284
+
285
+ return statuses;
286
+ }
287
+
288
+ /**
289
+ * Get budget status
290
+ *
291
+ * @returns {Object} Budget status summary
292
+ */
293
+ getBudgetStatus() {
294
+ const statuses = this.checkBudgets();
295
+ if (statuses.length === 0) {
296
+ return { hasBudgets: false };
297
+ }
298
+
299
+ const exceeded = statuses.filter(s => s.status === 'exceeded');
300
+ const warnings = statuses.filter(s => s.status === 'warning');
301
+
302
+ return {
303
+ hasBudgets: true,
304
+ totalBudgets: statuses.length,
305
+ exceeded: exceeded.length,
306
+ warnings: warnings.length,
307
+ statuses
308
+ };
309
+ }
310
+
199
311
  /**
200
312
  * Reset cost tracking
201
313
  */
@@ -255,3 +367,23 @@ export function getCostStats() {
255
367
  return getCostTracker().getStats();
256
368
  }
257
369
 
370
+ /**
371
+ * Set budget limit (convenience function)
372
+ *
373
+ * @param {number} budgetLimit - Budget limit in USD
374
+ * @param {Object} [options={}] - Budget options
375
+ */
376
+ export function setBudgetLimit(budgetLimit, options = {}) {
377
+ const tracker = getCostTracker();
378
+ tracker.setBudgetLimit(budgetLimit, options);
379
+ }
380
+
381
+ /**
382
+ * Get budget status (convenience function)
383
+ *
384
+ * @returns {Object} Budget status
385
+ */
386
+ export function getBudgetStatus() {
387
+ return getCostTracker().getBudgetStatus();
388
+ }
389
+
@@ -12,6 +12,7 @@
12
12
  import { createConfig } from './config.mjs';
13
13
  import { loadEnv } from './load-env.mjs';
14
14
  import { warn } from './logger.mjs';
15
+ import { ValidationError } from './errors.mjs';
15
16
  // Load env before LLM utils
16
17
  loadEnv();
17
18
  // Use shared LLM utility library for text-only calls (optional dependency)
@@ -111,7 +112,16 @@ Return ONLY the JSON object, no other text.`;
111
112
  if (jsonMatch) {
112
113
  parsed = JSON.parse(jsonMatch[0]);
113
114
  } else {
114
- throw new Error('Could not extract JSON from response');
115
+ throw new ValidationError(
116
+ 'Could not extract JSON from response. The LLM response did not contain valid JSON. ' +
117
+ 'This may indicate the model failed to follow the schema format. ' +
118
+ 'Try: 1) Simplifying the schema, 2) Using a more capable model tier, or 3) Adding examples to the prompt.',
119
+ {
120
+ responseLength: response?.length || 0,
121
+ responsePreview: response?.substring(0, 200) || 'No response',
122
+ schema: schema
123
+ }
124
+ );
115
125
  }
116
126
  }
117
127
  if (parsed && validateSchema(parsed, schema)) {
@@ -126,25 +136,44 @@ Return ONLY the JSON object, no other text.`;
126
136
 
127
137
  /**
128
138
  * Call LLM API (text-only, no vision)
139
+ * Uses cached wrapper for better performance and cost reduction
129
140
  * Uses shared utility with advanced tier for better extraction quality
130
141
  */
131
142
  async function callLLMForExtraction(prompt, config) {
132
143
  const apiKey = config.apiKey;
133
144
  const provider = config.provider || 'gemini';
134
145
 
135
- // Try to use optional llm-utils library if available
146
+ // Use cached LLM wrapper (reduces costs and improves performance)
136
147
  try {
137
- const llmUtils = await import('@arclabs561/llm-utils');
138
- const callLLMUtil = llmUtils.callLLM;
148
+ const { callLLMCached } = await import('./utils/cached-llm.mjs');
139
149
  // Use advanced tier for data extraction (needs higher quality)
140
- return await callLLMUtil(prompt, provider, apiKey, {
150
+ return await callLLMCached(prompt, provider, apiKey, {
141
151
  tier: 'advanced', // Data extraction benefits from better models
142
152
  temperature: 0.1,
143
153
  maxTokens: 1000,
154
+ useCache: true, // Enable caching by default
144
155
  });
145
156
  } catch (error) {
146
- // Fallback: use local implementation or throw
147
- throw new Error(`LLM extraction requires @arclabs561/llm-utils package: ${error.message}`);
157
+ // Fallback: try uncached version if cached wrapper fails
158
+ try {
159
+ const llmUtils = await import('@arclabs561/llm-utils');
160
+ return await llmUtils.callLLM(prompt, provider, apiKey, {
161
+ tier: 'advanced',
162
+ temperature: 0.1,
163
+ maxTokens: 1000,
164
+ });
165
+ } catch (fallbackError) {
166
+ throw new ValidationError(
167
+ `LLM extraction requires @arclabs561/llm-utils package. ` +
168
+ `Install it with: npm install @arclabs561/llm-utils. ` +
169
+ `Error: ${fallbackError.message}`,
170
+ {
171
+ package: '@arclabs561/llm-utils',
172
+ installationCommand: 'npm install @arclabs561/llm-utils',
173
+ originalError: fallbackError.message
174
+ }
175
+ );
176
+ }
148
177
  }
149
178
  }
150
179
 
@@ -8,9 +8,8 @@
8
8
  * - ES-KNN: arXiv:2506.05614 (Exemplar Selection KNN using semantic similarity)
9
9
  * - KATE: arXiv:2101.06804 (Foundational work on kNN-augmented in-context examples)
10
10
  *
11
- * Note: This implementation uses keyword-based similarity (Jaccard) rather than
12
- * true semantic embeddings due to npm package constraints. For full ES-KNN,
13
- * embedding-based cosine similarity would be required.
11
+ * This implementation supports both keyword-based similarity (Jaccard) and
12
+ * embedding-based semantic similarity. Embeddings are preferred when available.
14
13
  *
15
14
  * This module provides dynamic few-shot example selection based on similarity
16
15
  * to the evaluation prompt.
@@ -19,20 +18,25 @@
19
18
  /**
20
19
  * Select few-shot examples based on semantic similarity to prompt
21
20
  *
21
+ * Research: ES-KNN shows embedding-based selection improves performance by 10-20%
22
+ * over keyword-based selection. This implementation supports both methods.
23
+ *
22
24
  * @param {string} prompt - Evaluation prompt
23
25
  * @param {Array<import('./index.mjs').FewShotExample>} examples - Available examples
24
26
  * @param {{
25
27
  * maxExamples?: number;
26
28
  * similarityThreshold?: number;
27
29
  * useSemanticMatching?: boolean;
30
+ * task?: string;
28
31
  * }} [options={}] - Selection options
29
- * @returns {Array<import('./index.mjs').FewShotExample>} Selected examples
32
+ * @returns {Promise<Array<import('./index.mjs').FewShotExample>>} Selected examples
30
33
  */
31
- export function selectFewShotExamples(prompt, examples = [], options = {}) {
34
+ export async function selectFewShotExamples(prompt, examples = [], options = {}) {
32
35
  const {
33
36
  maxExamples = 3,
34
37
  similarityThreshold = 0.3,
35
- useSemanticMatching = true
38
+ useSemanticMatching = true,
39
+ task = 'general'
36
40
  } = options;
37
41
 
38
42
  // Validate inputs
@@ -50,14 +54,68 @@ export function selectFewShotExamples(prompt, examples = [], options = {}) {
50
54
  return examples.slice(0, maxExamples);
51
55
  }
52
56
 
53
- // Simple keyword-based similarity (for npm package - full semantic matching would require embeddings)
54
- const promptKeywords = extractKeywords(prompt.toLowerCase());
57
+ // UX OPTIMIZATION: Auto-disable embeddings for large example arrays (>100) unless explicitly requested
58
+ // - Why: Embeddings add ~15ms per example, so 1000 examples = ~15s latency
59
+ // - User experience: Most users have 10-50 examples, so embeddings are fast and valuable
60
+ // - Edge case: Large datasets (1000+ examples) should use keyword matching for speed
61
+ // - Exception: If useEmbeddings is explicitly set to true, respect user preference
62
+ const exampleCount = examples.length;
63
+ const shouldUseEmbeddingsForLargeArrays = options.useEmbeddings === true;
64
+ const autoDisableForLargeArrays = exampleCount > 100 && !shouldUseEmbeddingsForLargeArrays;
65
+
66
+ // Try embeddings first (more accurate) - but skip for large arrays unless explicitly requested
67
+ if (!autoDisableForLargeArrays) {
68
+ try {
69
+ const { instructionSemanticSimilarity, isInstructionEmbeddingsAvailable } = await import('../evaluation/utils/instruction-embeddings.mjs');
70
+ const { semanticSimilarity, isEmbeddingsAvailable } = await import('../evaluation/utils/semantic-matcher.mjs');
71
+
72
+ const useInstructionEmbeddings = await isInstructionEmbeddingsAvailable();
73
+ const useGeneralEmbeddings = !useInstructionEmbeddings && await isEmbeddingsAvailable();
74
+
75
+ if (useInstructionEmbeddings || useGeneralEmbeddings) {
76
+ // Use embeddings for similarity calculation
77
+ const similarityFn = useInstructionEmbeddings
78
+ ? (text1, text2) => instructionSemanticSimilarity(text1, text2, task)
79
+ : (text1, text2) => semanticSimilarity(text1, text2);
80
+
81
+ // Score each example using embeddings
82
+ const scored = await Promise.all(
83
+ examples.map(async (example) => {
84
+ const exampleText = (example.description || '') + ' ' + (example.evaluation || '');
85
+ const similarity = await similarityFn(prompt, exampleText);
86
+
87
+ return {
88
+ example,
89
+ similarity: similarity !== null ? similarity : 0
90
+ };
91
+ })
92
+ );
93
+
94
+ // Sort by similarity and take top N
95
+ return scored
96
+ .filter(s => s.similarity >= similarityThreshold)
97
+ .sort((a, b) => b.similarity - a.similarity)
98
+ .slice(0, maxExamples)
99
+ .map(s => s.example);
100
+ }
101
+ } catch (error) {
102
+ // Fall through to keyword matching if embeddings unavailable
103
+ }
104
+ }
105
+
106
+ // Fallback: Keyword-based similarity (Jaccard)
107
+ // For very long prompts, limit keyword extraction to avoid performance issues
108
+ const maxPromptLength = 10000; // Limit prompt processing to 10KB for performance
109
+ const processedPrompt = prompt.length > maxPromptLength
110
+ ? prompt.substring(0, maxPromptLength)
111
+ : prompt;
112
+
113
+ const promptKeywords = extractKeywords(processedPrompt.toLowerCase());
55
114
 
56
115
  // Score each example by keyword overlap
57
116
  const scored = examples.map(example => {
58
- const exampleKeywords = extractKeywords(
59
- (example.description || '') + ' ' + (example.evaluation || '')
60
- );
117
+ const exampleText = (example.description || '') + ' ' + (example.evaluation || '');
118
+ const exampleKeywords = extractKeywords(exampleText.toLowerCase());
61
119
 
62
120
  // Jaccard similarity (intersection over union)
63
121
  const intersection = new Set(
package/src/errors.mjs CHANGED
@@ -2,7 +2,7 @@
2
2
  * Custom Error Classes for ai-visual-test
3
3
  *
4
4
  * Provides standardized error handling across the package.
5
- * Based on Playwright's error handling patterns and industry best practices.
5
+ * Based on Playwright's error handling patterns and industry practices.
6
6
  *
7
7
  * All errors extend AIBrowserTestError for consistent error handling and serialization.
8
8
  */
@@ -42,7 +42,11 @@ export class AIBrowserTestError extends Error {
42
42
  code: this.code,
43
43
  message: this.message,
44
44
  details: this.details,
45
- stack: this.stack
45
+ // SECURITY: Stack traces may contain sensitive information
46
+ // Only include in development mode or when explicitly requested
47
+ ...(process.env.NODE_ENV === 'development' || process.env.INCLUDE_STACK_TRACES === 'true'
48
+ ? { stack: this.stack }
49
+ : {})
46
50
  };
47
51
  }
48
52
  }
@@ -110,10 +110,22 @@ let globalTracker = null;
110
110
 
111
111
  /**
112
112
  * Get or create global propagation tracker
113
+ *
114
+ * @param {Object} [options={}] - Options for tracker (only used on first call)
115
+ * @returns {ExperiencePropagationTracker} Global tracker instance
113
116
  */
114
117
  export function getPropagationTracker(options = {}) {
115
118
  if (!globalTracker) {
116
119
  globalTracker = new ExperiencePropagationTracker(options);
120
+ } else if (Object.keys(options).length > 0) {
121
+ // If tracker exists but options provided, update it
122
+ // This allows reconfiguration (though typically tracker is created once)
123
+ if (options.enabled !== undefined) {
124
+ globalTracker.enabled = options.enabled;
125
+ }
126
+ if (options.logLevel !== undefined) {
127
+ globalTracker.logLevel = options.logLevel;
128
+ }
117
129
  }
118
130
  return globalTracker;
119
131
  }
@@ -11,6 +11,7 @@
11
11
  */
12
12
 
13
13
  import { warn } from './logger.mjs';
14
+ import { ValidationError } from './errors.mjs';
14
15
 
15
16
  /**
16
17
  * Experience Trace
@@ -133,7 +134,7 @@ export class ExperienceTrace {
133
134
  * @param {Record<string, unknown>} [options={}] - Aggregation options
134
135
  * @returns {import('./index.mjs').AggregatedTemporalNotes} Aggregated notes
135
136
  */
136
- aggregateNotes(aggregateTemporalNotes, options = {}) {
137
+ async aggregateNotes(aggregateTemporalNotes, options = {}) {
137
138
  // Extract notes from events and validations
138
139
  const eventNotes = this.events
139
140
  .filter(e => e.type === 'interaction' || e.type === 'observation')
@@ -157,7 +158,7 @@ export class ExperienceTrace {
157
158
 
158
159
  const notes = [...eventNotes, ...validationNotes].sort((a, b) => a.timestamp - b.timestamp);
159
160
 
160
- this.aggregatedNotes = aggregateTemporalNotes(notes, options);
161
+ this.aggregatedNotes = await aggregateTemporalNotes(notes, options);
161
162
  return this.aggregatedNotes;
162
163
  }
163
164
 
@@ -296,7 +297,15 @@ export class ExperienceTracerManager {
296
297
  async metaEvaluateTrace(sessionId, validateScreenshot) {
297
298
  const trace = this.getTrace(sessionId);
298
299
  if (!trace) {
299
- throw new Error(`Trace not found: ${sessionId}`);
300
+ throw new ValidationError(
301
+ `Trace not found for session: ${sessionId}. ` +
302
+ `Use startTrace() to create a new trace, or listTraces() to see all available traces.`,
303
+ {
304
+ sessionId,
305
+ availableSessions: Object.keys(this.traces),
306
+ function: 'metaEvaluateTrace'
307
+ }
308
+ );
300
309
  }
301
310
 
302
311
  const evaluation = {