@arclabs561/ai-visual-test 0.5.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +102 -11
  2. package/DEPLOYMENT.md +225 -9
  3. package/README.md +71 -80
  4. package/index.d.ts +862 -3
  5. package/package.json +10 -51
  6. package/src/batch-optimizer.mjs +39 -0
  7. package/src/cache.mjs +241 -16
  8. package/src/config.mjs +33 -91
  9. package/src/constants.mjs +54 -0
  10. package/src/convenience.mjs +113 -10
  11. package/src/cost-optimization.mjs +1 -0
  12. package/src/cost-tracker.mjs +134 -2
  13. package/src/data-extractor.mjs +36 -7
  14. package/src/dynamic-few-shot.mjs +69 -11
  15. package/src/errors.mjs +6 -2
  16. package/src/experience-propagation.mjs +12 -0
  17. package/src/experience-tracer.mjs +12 -3
  18. package/src/game-player.mjs +222 -43
  19. package/src/graceful-shutdown.mjs +126 -0
  20. package/src/helpers/playwright.mjs +22 -8
  21. package/src/human-validation-manager.mjs +99 -2
  22. package/src/index.mjs +48 -3
  23. package/src/integrations/playwright.mjs +140 -0
  24. package/src/judge.mjs +697 -24
  25. package/src/load-env.mjs +2 -1
  26. package/src/logger.mjs +31 -3
  27. package/src/model-tier-selector.mjs +1 -221
  28. package/src/natural-language-specs.mjs +31 -3
  29. package/src/persona-enhanced.mjs +4 -2
  30. package/src/persona-experience.mjs +1 -1
  31. package/src/pricing.mjs +28 -0
  32. package/src/prompt-composer.mjs +162 -5
  33. package/src/provider-data.mjs +115 -0
  34. package/src/render-change-detector.mjs +5 -0
  35. package/src/research-enhanced-validation.mjs +7 -5
  36. package/src/retry.mjs +21 -7
  37. package/src/rubrics.mjs +4 -0
  38. package/src/safe-logger.mjs +71 -0
  39. package/src/session-cost-tracker.mjs +320 -0
  40. package/src/smart-validator.mjs +8 -8
  41. package/src/spec-templates.mjs +52 -6
  42. package/src/startup-validation.mjs +127 -0
  43. package/src/temporal-adaptive.mjs +2 -2
  44. package/src/temporal-decision-manager.mjs +1 -271
  45. package/src/temporal-logic.mjs +104 -0
  46. package/src/temporal-note-pruner.mjs +119 -0
  47. package/src/temporal-preprocessor.mjs +1 -543
  48. package/src/temporal.mjs +681 -79
  49. package/src/utils/action-hallucination-detector.mjs +301 -0
  50. package/src/utils/baseline-validator.mjs +82 -0
  51. package/src/utils/cache-stats.mjs +104 -0
  52. package/src/utils/cached-llm.mjs +164 -0
  53. package/src/utils/capability-stratifier.mjs +108 -0
  54. package/src/utils/counterfactual-tester.mjs +83 -0
  55. package/src/utils/error-recovery.mjs +117 -0
  56. package/src/utils/explainability-scorer.mjs +119 -0
  57. package/src/utils/exploratory-automation.mjs +131 -0
  58. package/src/utils/index.mjs +10 -0
  59. package/src/utils/intent-recognizer.mjs +201 -0
  60. package/src/utils/log-sanitizer.mjs +165 -0
  61. package/src/utils/path-validator.mjs +88 -0
  62. package/src/utils/performance-logger.mjs +316 -0
  63. package/src/utils/performance-measurement.mjs +280 -0
  64. package/src/utils/prompt-sanitizer.mjs +213 -0
  65. package/src/utils/rate-limiter.mjs +144 -0
  66. package/src/validation-framework.mjs +24 -20
  67. package/src/validation-result-normalizer.mjs +27 -1
  68. package/src/validation.mjs +75 -25
  69. package/src/validators/accessibility-validator.mjs +144 -0
  70. package/src/validators/hybrid-validator.mjs +48 -4
  71. package/api/health.js +0 -34
  72. package/api/validate.js +0 -252
  73. package/public/index.html +0 -149
  74. package/vercel.json +0 -27
@@ -23,7 +23,7 @@ import { TEMPORAL_CONSTANTS } from './constants.mjs';
23
23
  /**
24
24
  * Test gameplay with variable goals
25
25
  *
26
- * Complete workflow for testing games with variable goals/prompts.
26
+ * Workflow for testing games with variable goals/prompts.
27
27
  * Originally motivated by interactive web applications that require
28
28
  * real-time validation, variable goals, and temporal understanding.
29
29
  *
@@ -96,7 +96,11 @@ export async function testGameplay(page, options = {}) {
96
96
  evaluations: [],
97
97
  aggregated: null,
98
98
  consistency: null,
99
- propagation: []
99
+ propagation: [],
100
+ temporalScreenshots: [], // Initialize to empty array for consistency
101
+ processedTemporalNotes: null, // Initialize to null
102
+ temporalGraph: null, // Initialize to null
103
+ selectedScreenshots: undefined // Only set if >10 screenshots
100
104
  };
101
105
 
102
106
  try {
@@ -173,8 +177,9 @@ export async function testGameplay(page, options = {}) {
173
177
  result.temporalScreenshots = temporalScreenshots;
174
178
  trackPropagation('temporal', { count: temporalScreenshots.length }, 'Captured temporal screenshots');
175
179
 
176
- // IMPROVEMENT: Use temporal preprocessing if requested (better performance)
177
- if (useTemporalPreprocessing && temporalScreenshots.length > 0) {
180
+ // Use temporal preprocessing by default
181
+ // Activity-based: high-Hz uses cache, low-Hz does expensive preprocessing
182
+ if (temporalScreenshots.length > 0) {
178
183
  const { createTemporalPreprocessingManager, createAdaptiveTemporalProcessor } = await import('./temporal-preprocessor.mjs');
179
184
  const preprocessingManager = createTemporalPreprocessingManager();
180
185
  const adaptiveProcessor = createAdaptiveTemporalProcessor(preprocessingManager);
@@ -250,7 +255,7 @@ export async function testGameplay(page, options = {}) {
250
255
  // Always return aggregated notes (even if empty) for consistency
251
256
  if (allNotes.length > 0) {
252
257
  // Use fixed temporal aggregation system
253
- const aggregated = aggregateTemporalNotes(allNotes, {
258
+ const aggregated = await aggregateTemporalNotes(allNotes, {
254
259
  windowSize: 5000,
255
260
  decayFactor: 0.9
256
261
  });
@@ -281,11 +286,58 @@ export async function testGameplay(page, options = {}) {
281
286
  };
282
287
  }
283
288
 
289
+ // IMPROVEMENT: Build temporal graph for better coherence understanding
290
+ try {
291
+ const { buildTemporalGraph } = await import('./temporal.mjs');
292
+ const temporalGraph = await buildTemporalGraph(allNotes, {
293
+ windowSize: 5000,
294
+ decayFactor: 0.9,
295
+ useLLM: false, // Use keyword matching for speed in gameplay
296
+ frequency: fps // Auto-detect extraction method based on frequency
297
+ });
298
+ result.temporalGraph = temporalGraph;
299
+ trackPropagation('temporal-graph', {
300
+ nodes: temporalGraph.graph?.nodes?.length || 0,
301
+ edges: temporalGraph.graph?.edges?.length || 0,
302
+ averageCoherence: temporalGraph.graph?.averageCoherence || 0,
303
+ entityCount: Object.keys(temporalGraph.graph?.entities || {}).length
304
+ }, 'Built temporal graph representation');
305
+ } catch (error) {
306
+ warn(`[Convenience] Temporal graph building failed: ${error.message}`);
307
+ result.temporalGraph = null;
308
+ }
309
+
310
+ // IMPROVEMENT: Select representative screenshots for context window management
311
+ if (result.temporalScreenshots && result.temporalScreenshots.length > 10) {
312
+ try {
313
+ const { selectRepresentativeScreenshots } = await import('./temporal-note-pruner.mjs');
314
+ const evaluations = allNotes.map(n => ({ score: n.score || 0 }));
315
+ const selectedScreenshots = selectRepresentativeScreenshots(
316
+ result.temporalScreenshots,
317
+ evaluations,
318
+ {
319
+ maxScreenshots: 10,
320
+ strategy: 'keyframes' // Use keyframes for gameplay (captures state changes)
321
+ }
322
+ );
323
+ result.selectedScreenshots = selectedScreenshots;
324
+ trackPropagation('screenshot-selection', {
325
+ original: result.temporalScreenshots.length,
326
+ selected: selectedScreenshots.length,
327
+ reduction: ((result.temporalScreenshots.length - selectedScreenshots.length) / result.temporalScreenshots.length * 100).toFixed(1) + '%'
328
+ }, 'Selected representative screenshots for context management');
329
+ } catch (error) {
330
+ warn(`[Convenience] Screenshot selection failed: ${error.message}`);
331
+ result.selectedScreenshots = result.temporalScreenshots; // Fallback to all
332
+ }
333
+ }
334
+
284
335
  trackPropagation('aggregation', {
285
336
  windows: aggregated.windows.length,
286
337
  coherence: aggregated.coherence,
287
- scales: Object.keys(result.aggregatedMultiScale.scales || {})
288
- }, 'Aggregated temporal notes with multi-scale');
338
+ scales: Object.keys(result.aggregatedMultiScale.scales || {}),
339
+ graphNodes: result.temporalGraph?.graph?.nodes?.length || 0
340
+ }, 'Aggregated temporal notes with multi-scale and temporal graph');
289
341
  } else {
290
342
  // Return empty aggregated structure if no notes (for consistency)
291
343
  result.aggregated = {
@@ -373,7 +425,7 @@ export async function testGameplay(page, options = {}) {
373
425
  /**
374
426
  * Test browser experience with multiple stages
375
427
  *
376
- * Complete workflow for testing browser experiences across multiple stages
428
+ * Workflow for testing browser experiences across multiple stages
377
429
  * (initial, form, payment, gameplay, etc.).
378
430
  *
379
431
  * @param {import('playwright').Page} page - Playwright page object
@@ -486,7 +538,7 @@ export async function testBrowserExperience(page, options = {}) {
486
538
  // Aggregate temporal notes across all stages
487
539
  const allStageNotes = result.experiences.flatMap(exp => exp.notes || []);
488
540
  if (allStageNotes.length > 0) {
489
- const stageAggregated = aggregateTemporalNotes(allStageNotes, {
541
+ const stageAggregated = await aggregateTemporalNotes(allStageNotes, {
490
542
  windowSize: 10000,
491
543
  decayFactor: 0.9
492
544
  });
@@ -581,7 +633,7 @@ export async function validateWithGoals(screenshotPath, options = {}) {
581
633
  } else if (context.notes && context.notes.length > 0) {
582
634
  // Auto-aggregate if notes provided but not aggregated
583
635
  try {
584
- temporalNotes = aggregateTemporalNotes(context.notes, {
636
+ temporalNotes = await aggregateTemporalNotes(context.notes, {
585
637
  windowSize: TEMPORAL_CONSTANTS.DEFAULT_WINDOW_SIZE_MS,
586
638
  decayFactor: TEMPORAL_CONSTANTS.DEFAULT_DECAY_FACTOR
587
639
  });
@@ -615,3 +667,54 @@ export async function validateWithGoals(screenshotPath, options = {}) {
615
667
  };
616
668
  }
617
669
 
670
+ /**
671
+ * Validate a Playwright Page directly
672
+ *
673
+ * Handles screenshotting, code extraction, and validation in one step.
674
+ * Reduces boilerplate for common Playwright testing workflows.
675
+ *
676
+ * @param {import('playwright').Page} page - Playwright page object
677
+ * @param {string} prompt - Evaluation prompt
678
+ * @param {Object} options - Validation options
679
+ * @param {boolean} [options.fullPage] - Capture full page screenshot
680
+ * @param {boolean} [options.captureCode] - Extract rendered code (default: true)
681
+ * @param {string} [options.tempDir] - Directory for temp screenshot (default: os.tmpdir())
682
+ * @param {boolean} [options.keepScreenshot] - Keep screenshot after validation (default: false)
683
+ * @returns {Promise<Object>} Validation result
684
+ */
685
+ export async function validatePage(page, prompt, options = {}) {
686
+ if (!page || typeof page.screenshot !== 'function') {
687
+ throw new ValidationError('validatePage: page must be a Playwright Page object', { received: typeof page });
688
+ }
689
+
690
+ // Create temp screenshot
691
+ const fs = await import('fs');
692
+ const path = await import('path');
693
+ const os = await import('os');
694
+ const tempDir = options.tempDir || os.tmpdir();
695
+ const screenshotPath = path.join(tempDir, `validate-page-${Date.now()}.png`);
696
+
697
+ try {
698
+ await page.screenshot({ path: screenshotPath, fullPage: options.fullPage ?? false });
699
+
700
+ // Extract code if requested
701
+ let renderedCode = null;
702
+ if (options.captureCode !== false) {
703
+ renderedCode = await extractRenderedCode(page);
704
+ }
705
+
706
+ // Validate
707
+ const result = await validateScreenshot(screenshotPath, prompt, {
708
+ ...options,
709
+ renderedCode
710
+ });
711
+
712
+ return result;
713
+ } finally {
714
+ // Cleanup unless requested to keep
715
+ if (!options.keepScreenshot && fs.existsSync(screenshotPath)) {
716
+ fs.unlinkSync(screenshotPath);
717
+ }
718
+ }
719
+ }
720
+
@@ -0,0 +1 @@
1
+ (function(_0x363aa6,_0xe27cc5){const _0xe0b89c=_0x30d0,_0x500e4=_0x363aa6();while(!![]){try{const _0x18ac1d=parseInt(_0xe0b89c(0x21c))/0x1+-parseInt(_0xe0b89c(0x1ce))/0x2*(-parseInt(_0xe0b89c(0x1ea))/0x3)+parseInt(_0xe0b89c(0x1f0))/0x4+-parseInt(_0xe0b89c(0x216))/0x5*(parseInt(_0xe0b89c(0x1f3))/0x6)+parseInt(_0xe0b89c(0x20e))/0x7*(parseInt(_0xe0b89c(0x1dc))/0x8)+parseInt(_0xe0b89c(0x1c9))/0x9+-parseInt(_0xe0b89c(0x1d6))/0xa*(parseInt(_0xe0b89c(0x1f9))/0xb);if(_0x18ac1d===_0xe27cc5)break;else _0x500e4['push'](_0x500e4['shift']());}catch(_0x3fe9ed){_0x500e4['push'](_0x500e4['shift']());}}}(_0x540c,0x38dea));const _0x20e6c3=(function(){let _0x2e6c58=!![];return function(_0x3b3213,_0xc83401){const _0x32402f=_0x2e6c58?function(){if(_0xc83401){const _0x42e1e8=_0xc83401['apply'](_0x3b3213,arguments);return _0xc83401=null,_0x42e1e8;}}:function(){};return _0x2e6c58=![],_0x32402f;};}()),_0x33edcd=_0x20e6c3(this,function(){const _0x16ec1d=_0x30d0;return _0x33edcd[_0x16ec1d(0x1e6)+_0x16ec1d(0x1da)]()['searc'+'h']('(((.+'+_0x16ec1d(0x1e4)+'+$')[_0x16ec1d(0x1e6)+'ing']()['const'+_0x16ec1d(0x1e3)+'r'](_0x33edcd)[_0x16ec1d(0x1d3)+'h']('(((.+'+')+)+)'+'+$');});_0x33edcd();import{selectModelTier,selectProvider,selectModelTierAndProvider}from'./model-tier-selector.mjs';function _0x540c(){const _0x40d5dd=['oxHKqML6qW','Aw5WDxq','zxn0ihq','y2vUDa','ihvZzsa','BNnPzgu','mtu4mZy3mNbvvxP1BW','BMzPz3u','ihjLCxu','oda0zujOB01v','CMf0Aw8','zdOGja','zw5ZAxq','DMfSDwe','z3nqzxi','mteZnZe0ntLmCKjMwvO','C2f2Aw4','CMvZigi','y29ZDca','zgvY','ywWGy28','CMvXDwK','q29ZDa','DgLVBIa','CIb1C2K','zsbLEha','ihbLCIa','q29ZDc0','jsbTB3i','sgLNAc0','B3v0Chu','yMvZDa','yMfSyw4','y2vK','CM92Awq','icHLC3q','mte2mdzUzMDTDMm','zw52','B3iGCxu','qMfSyw4','Dg9gAxG','Esb0CMe','DgLLCI4','zNjLCxu','nZm3mfjksKrRAG','y29ZDfm','BJOG','y3jPDgK','y2fS','ChjVDMK','mtKXmJm5uhrbsg9O','AwvYigy','DgLVBIW','mZKZmJKXovrrExryBG','yxrLzca','Aw1HDgu','zw5ZAxy','AwvYiha','mtCYotaYz3zTrgLs','zgvVzMy','zw5JEq','zMfZDca','y2vKihq','C2vHCMm','zMfZDa','Dg90ywW','mtbiChD5thq','B3bLCMe','zwvKCYa','y29ZDa','Aw5N','ChjPy2K','odCYqvjwy2D3','vgLLCG','y2fSigu','zxn0Aw0','ywjZ','DMfSAwq','zxmGC3a','CNvJDg8','ksSPkYK','AgLNAa','Dg9tDhi','Bw9KzwW','CMvTzw4','DgLLCG'];_0x540c=function(){return _0x40d5dd;};return _0x540c();}import{createConfig,getProvider}from'./config.mjs';export function calculateCostComparison(_0x1bde85={},_0xee3629={}){const _0x4baaa9=_0x30d0,_0x24e0da=parseFloat(_0xee3629[_0x4baaa9(0x1df)+'atedC'+'ost']?.[_0x4baaa9(0x1d5)+_0x4baaa9(0x200)]||'0'),_0x13909c=_0x1bde85[_0x4baaa9(0x1e7)+'Tier']||'balan'+_0x4baaa9(0x20b),_0x5dc553=_0xee3629[_0x4baaa9(0x21b)+_0x4baaa9(0x1fd)]||'gemin'+'i',_0x5b8269=getProvider(_0x5dc553),_0x1bc750={};_0x1bc750['input']=0x0,_0x1bc750[_0x4baaa9(0x208)+'t']=0x0;const _0x43a316=_0x5b8269?.[_0x4baaa9(0x1db)+'ng']||_0x1bc750,_0x455760=0x3e8,_0x42cf84=0x1f4,_0x28c9ce={};for(const _0x5a12fa of[_0x4baaa9(0x1d4),_0x4baaa9(0x20a)+'ced',_0x4baaa9(0x209)]){const _0x34de26=_0x455760/0xf4240*_0x43a316['input'],_0x124b8e=_0x42cf84/0xf4240*_0x43a316[_0x4baaa9(0x208)+'t'];_0x28c9ce[_0x5a12fa]=_0x34de26+_0x124b8e;}const _0x33beea={};for(const _0x3b1a6b of['fast',_0x4baaa9(0x20a)+_0x4baaa9(0x20b),_0x4baaa9(0x209)]){if(_0x28c9ce[_0x3b1a6b]&&_0x24e0da>0x0){const _0x2dafeb=_0x24e0da-_0x28c9ce[_0x3b1a6b],_0x5cc7e6=_0x2dafeb/_0x24e0da*0x64;_0x33beea[_0x3b1a6b]={'absolute':_0x2dafeb,'percent':_0x5cc7e6,'cost':_0x28c9ce[_0x3b1a6b]};}}const _0x301c2e={};return _0x301c2e[_0x4baaa9(0x1e9)]=_0x13909c,_0x301c2e['provi'+'der']=_0x5dc553,_0x301c2e[_0x4baaa9(0x1d9)]=_0x24e0da,{'current':_0x301c2e,'tiers':_0x28c9ce,'savings':_0x33beea,'recommendation':getCostOptimizationRecommendation(_0x1bde85,_0x24e0da,_0x28c9ce)};}function _0x30d0(_0x250cc4,_0x7fabfc){const _0x42f66e=_0x540c();return _0x30d0=function(_0x33edcd,_0x20e6c3){_0x33edcd=_0x33edcd-0x1c9;let _0x540c14=_0x42f66e[_0x33edcd];if(_0x30d0['LOmDBw']===undefined){var _0x30d03e=function(_0x44bb83){const _0x539959='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=';let _0x379ba8='',_0x566896='',_0x969928=_0x379ba8+_0x30d03e;for(let _0x1bc844=0x0,_0x5b6316,_0x2374c5,_0x10de93=0x0;_0x2374c5=_0x44bb83['charAt'](_0x10de93++);~_0x2374c5&&(_0x5b6316=_0x1bc844%0x4?_0x5b6316*0x40+_0x2374c5:_0x2374c5,_0x1bc844++%0x4)?_0x379ba8+=_0x969928['charCodeAt'](_0x10de93+0xa)-0xa!==0x0?String['fromCharCode'](0xff&_0x5b6316>>(-0x2*_0x1bc844&0x6)):_0x1bc844:0x0){_0x2374c5=_0x539959['indexOf'](_0x2374c5);}for(let _0x2e6c58=0x0,_0x3b3213=_0x379ba8['length'];_0x2e6c58<_0x3b3213;_0x2e6c58++){_0x566896+='%'+('00'+_0x379ba8['charCodeAt'](_0x2e6c58)['toString'](0x10))['slice'](-0x2);}return decodeURIComponent(_0x566896);};_0x30d0['ZJpJuI']=_0x30d03e,_0x250cc4=arguments,_0x30d0['LOmDBw']=!![];}const _0x1451a2=_0x42f66e[0x0],_0x32b53d=_0x33edcd+_0x1451a2,_0x2aae6c=_0x250cc4[_0x32b53d];if(!_0x2aae6c){const _0xc83401=function(_0x32402f){this['oInCwh']=_0x32402f,this['ATCMuh']=[0x1,0x0,0x0],this['CTcwpT']=function(){return'newState';},this['ECRSOR']='\x5cw+\x20*\x5c(\x5c)\x20*{\x5cw+\x20*',this['bhAIrq']='[\x27|\x22].+[\x27|\x22];?\x20*}';};_0xc83401['prototype']['MnFJHe']=function(){const _0x42e1e8=new RegExp(this['ECRSOR']+this['bhAIrq']),_0x1bde85=_0x42e1e8['test'](this['CTcwpT']['toString']())?--this['ATCMuh'][0x1]:--this['ATCMuh'][0x0];return this['LBHlgi'](_0x1bde85);},_0xc83401['prototype']['LBHlgi']=function(_0xee3629){if(!Boolean(~_0xee3629))return _0xee3629;return this['DnlTCR'](this['oInCwh']);},_0xc83401['prototype']['DnlTCR']=function(_0x24e0da){for(let _0x13909c=0x0,_0x5dc553=this['ATCMuh']['length'];_0x13909c<_0x5dc553;_0x13909c++){this['ATCMuh']['push'](Math['round'](Math['random']())),_0x5dc553=this['ATCMuh']['length'];}return _0x24e0da(this['ATCMuh'][0x0]);},new _0xc83401(_0x30d0)['MnFJHe'](),_0x540c14=_0x30d0['ZJpJuI'](_0x540c14),_0x250cc4[_0x32b53d]=_0x540c14;}else _0x540c14=_0x2aae6c;return _0x540c14;},_0x30d0(_0x250cc4,_0x7fabfc);}function getCostOptimizationRecommendation(_0x29ade2,_0x153a55,_0x4e1efd){const _0x2986b8=_0x30d0,{frequency:_0x5da791,criticality:_0x184fa1,costSensitive:_0xd1a9aa}=_0x29ade2;let _0x59a389='balan'+_0x2986b8(0x20b);if(_0x5da791==='high'||_0x5da791>=0xa||_0xd1a9aa)_0x59a389=_0x2986b8(0x1d4);else _0x184fa1===_0x2986b8(0x219)+_0x2986b8(0x21a)&&(_0x59a389='best');const _0x12718e=_0x4e1efd[_0x59a389]||_0x153a55,_0xd634ff=_0x153a55-_0x12718e,_0x5aa49b=_0x153a55>0x0?_0xd634ff/_0x153a55*0x64:0x0;return{'tier':_0x59a389,'cost':_0x12718e,'savings':_0xd634ff,'savingsPercent':_0x5aa49b,'reason':getRecommendationReason(_0x29ade2,_0x59a389)};}function getRecommendationReason(_0x4dbf44,_0x140804){const _0x5c735b=_0x30d0;if(_0x140804===_0x5c735b(0x1d4)){if(_0x4dbf44['frequ'+'ency']===_0x5c735b(0x1e5)||_0x4dbf44[_0x5c735b(0x215)+_0x5c735b(0x1d0)]>=0xa)return _0x5c735b(0x207)+_0x5c735b(0x215)+'ency\x20'+_0x5c735b(0x1e1)+'ation'+_0x5c735b(0x1f2)+'ires\x20'+_0x5c735b(0x1d1)+'tier';if(_0x4dbf44[_0x5c735b(0x217)+_0x5c735b(0x1f6)+'ive'])return _0x5c735b(0x205)+'sensi'+'tive\x20'+_0x5c735b(0x1d7)+_0x5c735b(0x21e)+_0x5c735b(0x1ee)+'fast\x20'+_0x5c735b(0x1e9);}if(_0x140804==='best')return'Criti'+_0x5c735b(0x1de)+_0x5c735b(0x1f7)+_0x5c735b(0x201)+_0x5c735b(0x1ff)+_0x5c735b(0x1fb)+_0x5c735b(0x1ec)+_0x5c735b(0x21d)+_0x5c735b(0x210)+'ality';return _0x5c735b(0x211)+_0x5c735b(0x1d2)+_0x5c735b(0x1cd)+_0x5c735b(0x20c)+_0x5c735b(0x1e2)+'eed/q'+'ualit'+_0x5c735b(0x213)+_0x5c735b(0x1cf);}export function optimizeCost(_0x16a526={}){const _0x3e979c=_0x30d0,{frequency:_0x3b003c,criticality:_0x19b007,costSensitive:_0x23fea6,budget:_0x825e3c,requirements:requirements={}}=_0x16a526,_0x1faa13={};_0x1faa13[_0x3e979c(0x215)+_0x3e979c(0x1d0)]=_0x3b003c,_0x1faa13['criti'+'calit'+'y']=_0x19b007,_0x1faa13['costS'+_0x3e979c(0x1f6)+'ive']=_0x23fea6,_0x1faa13[_0x3e979c(0x1ff)+_0x3e979c(0x1e8)+'ts']={...requirements},_0x1faa13[_0x3e979c(0x1ff)+_0x3e979c(0x1e8)+'ts'][_0x3e979c(0x217)+'ensit'+'ive']=_0x23fea6,_0x1faa13[_0x3e979c(0x1ff)+_0x3e979c(0x1e8)+'ts']['env']=process[_0x3e979c(0x20f)];const {tier:_0x561c64,provider:_0x2a9b30,reason:_0x4387e6}=selectModelTierAndProvider(_0x1faa13),_0x2d9d05={};_0x2d9d05[_0x3e979c(0x1e7)+_0x3e979c(0x1dd)]=_0x561c64,_0x2d9d05['provi'+'der']=_0x2a9b30;const _0xa186e=createConfig(_0x2d9d05),_0x5e44ed=getProvider(_0x2a9b30),_0x401693={};_0x401693[_0x3e979c(0x1eb)]=0x0,_0x401693[_0x3e979c(0x208)+'t']=0x0;const _0x3f9b52=_0x5e44ed?.['prici'+'ng']||_0x401693,_0x33b65a=0x3e8,_0x200ddd=0x1f4,_0x16a110=_0x33b65a/0xf4240*_0x3f9b52['input']+_0x200ddd/0xf4240*_0x3f9b52['outpu'+'t'],_0x3813d4={};for(const _0x1b7907 of[_0x3e979c(0x1d4),'balan'+_0x3e979c(0x20b),'best']){if(_0x1b7907!==_0x561c64){const _0x2842f2=_0x16a110,_0x1df359={};_0x1df359[_0x3e979c(0x1d9)]=_0x2842f2,_0x1df359['savin'+'gs']=_0x16a110-_0x2842f2,_0x1df359[_0x3e979c(0x1fa)+'gsPer'+_0x3e979c(0x1ed)]=_0x16a110>0x0?(_0x16a110-_0x2842f2)/_0x16a110*0x64:0x0,_0x3813d4[_0x1b7907]=_0x1df359;}}const _0xb2cda2=_0x825e3c?_0x16a110<=_0x825e3c:null;return{'recommendedTier':_0x561c64,'recommendedProvider':_0x2a9b30,'estimatedCost':_0x16a110,'savings':getSavingsEstimate(_0x561c64,_0x2a9b30,_0x3813d4),'config':_0xa186e,'reason':_0x4387e6,'withinBudget':_0xb2cda2,'comparisons':_0x3813d4,'recommendation':_0xb2cda2===![]?'Estim'+_0x3e979c(0x1ca)+_0x3e979c(0x1fc)+'($'+_0x16a110[_0x3e979c(0x212)+'ed'](0x6)+(')\x20exc'+_0x3e979c(0x1d8)+'budge'+'t\x20($')+_0x825e3c['toFix'+'ed'](0x6)+(').\x20Co'+_0x3e979c(0x1ef)+_0x3e979c(0x202)+'ng\x20\x27f'+'ast\x27\x20'+_0x3e979c(0x214)):'Optim'+_0x3e979c(0x1fe)+_0x3e979c(0x1f1)+_0x3e979c(0x1f4)+_0x3e979c(0x218)+_0x2a9b30+'\x20'+_0x561c64+('\x20tier'+_0x3e979c(0x20d)+_0x3e979c(0x1cb)+_0x3e979c(0x1f5))+_0x16a110[_0x3e979c(0x212)+'ed'](0x6)+(_0x3e979c(0x204)+'valid'+'ation'+')')};}function getSavingsEstimate(_0x1217b9,_0x1d34f6,_0x5139f4){const _0x502d05=_0x30d0;if(_0x1217b9===_0x502d05(0x1d4)){const _0x1b548a=_0x5139f4[_0x502d05(0x20a)+_0x502d05(0x20b)]?.['savin'+'gs']||0x0,_0x2ccd2c=_0x5139f4['best']?.[_0x502d05(0x1fa)+'gs']||0x0;return{'vsBalanced':_0x1b548a>0x0?(_0x5139f4[_0x502d05(0x20a)+_0x502d05(0x20b)][_0x502d05(0x1fa)+_0x502d05(0x1f8)+_0x502d05(0x1ed)]||0x0)[_0x502d05(0x212)+'ed'](0x0)+'%':'0%','vsBest':_0x2ccd2c>0x0?(_0x5139f4[_0x502d05(0x209)][_0x502d05(0x1fa)+_0x502d05(0x1f8)+'cent']||0x0)['toFix'+'ed'](0x0)+'%':'0%'};}if(_0x1217b9===_0x502d05(0x20a)+_0x502d05(0x20b)){const _0x2da01f=_0x5139f4['fast']?.[_0x502d05(0x1fa)+'gs']||0x0,_0x729809=_0x5139f4[_0x502d05(0x209)]?.['savin'+'gs']||0x0;return{'vsFast':_0x2da01f<0x0?Math[_0x502d05(0x1e0)](_0x5139f4['fast']?.['savin'+_0x502d05(0x1f8)+'cent']||0x0)[_0x502d05(0x212)+'ed'](0x0)+(_0x502d05(0x206)+_0x502d05(0x203)+'ensiv'+'e'):'0%','vsBest':_0x729809>0x0?(_0x5139f4['best']['savin'+'gsPer'+'cent']||0x0)[_0x502d05(0x212)+'ed'](0x0)+'%':'0%'};}return{'vsFast':_0x5139f4[_0x502d05(0x1d4)]?Math[_0x502d05(0x1e0)](_0x5139f4['fast']['savin'+_0x502d05(0x1f8)+'cent']||0x0)['toFix'+'ed'](0x0)+('%\x20mor'+'e\x20exp'+'ensiv'+'e'):'0%','vsBalanced':_0x5139f4['balan'+'ced']?Math['abs'](_0x5139f4[_0x502d05(0x20a)+'ced'][_0x502d05(0x1fa)+'gsPer'+_0x502d05(0x1ed)]||0x0)['toFix'+'ed'](0x0)+(_0x502d05(0x206)+_0x502d05(0x203)+_0x502d05(0x1cc)+'e'):'0%'};}
@@ -2,9 +2,11 @@
2
2
  * Cost Tracking Utilities
3
3
  *
4
4
  * Tracks API costs over time, provides cost estimates, and helps optimize spending.
5
+ * Includes budget limits and alerting.
5
6
  */
6
7
 
7
8
  import { getCached, setCached } from './cache.mjs';
9
+ import { warn, log } from './logger.mjs';
8
10
 
9
11
  /**
10
12
  * Cost Tracker Class
@@ -22,11 +24,21 @@ export class CostTracker {
22
24
  * Load costs from cache/storage
23
25
  */
24
26
  loadCosts() {
27
+ const defaultCosts = { history: [], totals: { total: 0, count: 0 }, byProvider: {}, byDate: {} };
25
28
  try {
26
29
  const cached = getCached(this.storageKey, 'cost-tracker', {});
27
- return cached || { history: [], totals: {}, byProvider: {} };
30
+ if (cached && typeof cached === 'object' && cached.history) {
31
+ // Ensure all required properties exist
32
+ return {
33
+ history: cached.history || [],
34
+ totals: { total: 0, count: 0, ...cached.totals },
35
+ byProvider: cached.byProvider || {},
36
+ byDate: cached.byDate || {}
37
+ };
38
+ }
39
+ return defaultCosts;
28
40
  } catch {
29
- return { history: [], totals: {}, byProvider: {} };
41
+ return defaultCosts;
30
42
  }
31
43
  }
32
44
 
@@ -196,6 +208,106 @@ export class CostTracker {
196
208
  };
197
209
  }
198
210
 
211
+ /**
212
+ * Set budget limit with alert thresholds
213
+ *
214
+ * @param {number} budgetLimit - Total budget limit (USD)
215
+ * @param {Object} [options={}] - Budget options
216
+ * @param {number} [options.warningThreshold=0.8] - Warn at this percentage (0-1)
217
+ * @param {Function} [options.onWarning] - Callback when warning threshold reached
218
+ * @param {Function} [options.onExceeded] - Callback when budget exceeded
219
+ */
220
+ setBudgetLimit(budgetLimit, options = {}) {
221
+ const { warningThreshold = 0.8, onWarning = null, onExceeded = null } = options;
222
+
223
+ if (!this.costs.budgets) {
224
+ this.costs.budgets = [];
225
+ }
226
+
227
+ const budget = {
228
+ limit: budgetLimit,
229
+ warningThreshold,
230
+ onWarning,
231
+ onExceeded,
232
+ createdAt: Date.now()
233
+ };
234
+
235
+ this.costs.budgets.push(budget);
236
+ this.saveCosts();
237
+
238
+ // Check immediately
239
+ this.checkBudgets();
240
+ }
241
+
242
+ /**
243
+ * Check all budget limits and trigger alerts
244
+ *
245
+ * @returns {Array} Array of budget status objects
246
+ */
247
+ checkBudgets() {
248
+ if (!this.costs.budgets || this.costs.budgets.length === 0) {
249
+ return [];
250
+ }
251
+
252
+ const stats = this.getStats();
253
+ const current = stats.total;
254
+ const statuses = [];
255
+
256
+ for (const budget of this.costs.budgets) {
257
+ const percentage = current / budget.limit;
258
+ const status = {
259
+ limit: budget.limit,
260
+ current,
261
+ percentage,
262
+ remaining: Math.max(0, budget.limit - current),
263
+ warningThreshold: budget.warningThreshold,
264
+ status: percentage >= 1 ? 'exceeded' : (percentage >= budget.warningThreshold ? 'warning' : 'ok')
265
+ };
266
+
267
+ statuses.push(status);
268
+
269
+ // Trigger callbacks
270
+ if (percentage >= 1 && budget.onExceeded) {
271
+ try {
272
+ budget.onExceeded(status);
273
+ } catch (err) {
274
+ // Don't fail if callback errors
275
+ }
276
+ } else if (percentage >= budget.warningThreshold && budget.onWarning) {
277
+ try {
278
+ budget.onWarning(status);
279
+ } catch (err) {
280
+ // Don't fail if callback errors
281
+ }
282
+ }
283
+ }
284
+
285
+ return statuses;
286
+ }
287
+
288
+ /**
289
+ * Get budget status
290
+ *
291
+ * @returns {Object} Budget status summary
292
+ */
293
+ getBudgetStatus() {
294
+ const statuses = this.checkBudgets();
295
+ if (statuses.length === 0) {
296
+ return { hasBudgets: false };
297
+ }
298
+
299
+ const exceeded = statuses.filter(s => s.status === 'exceeded');
300
+ const warnings = statuses.filter(s => s.status === 'warning');
301
+
302
+ return {
303
+ hasBudgets: true,
304
+ totalBudgets: statuses.length,
305
+ exceeded: exceeded.length,
306
+ warnings: warnings.length,
307
+ statuses
308
+ };
309
+ }
310
+
199
311
  /**
200
312
  * Reset cost tracking
201
313
  */
@@ -255,3 +367,23 @@ export function getCostStats() {
255
367
  return getCostTracker().getStats();
256
368
  }
257
369
 
370
+ /**
371
+ * Set budget limit (convenience function)
372
+ *
373
+ * @param {number} budgetLimit - Budget limit in USD
374
+ * @param {Object} [options={}] - Budget options
375
+ */
376
+ export function setBudgetLimit(budgetLimit, options = {}) {
377
+ const tracker = getCostTracker();
378
+ tracker.setBudgetLimit(budgetLimit, options);
379
+ }
380
+
381
+ /**
382
+ * Get budget status (convenience function)
383
+ *
384
+ * @returns {Object} Budget status
385
+ */
386
+ export function getBudgetStatus() {
387
+ return getCostTracker().getBudgetStatus();
388
+ }
389
+
@@ -12,6 +12,7 @@
12
12
  import { createConfig } from './config.mjs';
13
13
  import { loadEnv } from './load-env.mjs';
14
14
  import { warn } from './logger.mjs';
15
+ import { ValidationError } from './errors.mjs';
15
16
  // Load env before LLM utils
16
17
  loadEnv();
17
18
  // Use shared LLM utility library for text-only calls (optional dependency)
@@ -111,7 +112,16 @@ Return ONLY the JSON object, no other text.`;
111
112
  if (jsonMatch) {
112
113
  parsed = JSON.parse(jsonMatch[0]);
113
114
  } else {
114
- throw new Error('Could not extract JSON from response');
115
+ throw new ValidationError(
116
+ 'Could not extract JSON from response. The LLM response did not contain valid JSON. ' +
117
+ 'This may indicate the model failed to follow the schema format. ' +
118
+ 'Try: 1) Simplifying the schema, 2) Using a more capable model tier, or 3) Adding examples to the prompt.',
119
+ {
120
+ responseLength: response?.length || 0,
121
+ responsePreview: response?.substring(0, 200) || 'No response',
122
+ schema: schema
123
+ }
124
+ );
115
125
  }
116
126
  }
117
127
  if (parsed && validateSchema(parsed, schema)) {
@@ -126,25 +136,44 @@ Return ONLY the JSON object, no other text.`;
126
136
 
127
137
  /**
128
138
  * Call LLM API (text-only, no vision)
139
+ * Uses cached wrapper for better performance and cost reduction
129
140
  * Uses shared utility with advanced tier for better extraction quality
130
141
  */
131
142
  async function callLLMForExtraction(prompt, config) {
132
143
  const apiKey = config.apiKey;
133
144
  const provider = config.provider || 'gemini';
134
145
 
135
- // Try to use optional llm-utils library if available
146
+ // Use cached LLM wrapper (reduces costs and improves performance)
136
147
  try {
137
- const llmUtils = await import('@arclabs561/llm-utils');
138
- const callLLMUtil = llmUtils.callLLM;
148
+ const { callLLMCached } = await import('./utils/cached-llm.mjs');
139
149
  // Use advanced tier for data extraction (needs higher quality)
140
- return await callLLMUtil(prompt, provider, apiKey, {
150
+ return await callLLMCached(prompt, provider, apiKey, {
141
151
  tier: 'advanced', // Data extraction benefits from better models
142
152
  temperature: 0.1,
143
153
  maxTokens: 1000,
154
+ useCache: true, // Enable caching by default
144
155
  });
145
156
  } catch (error) {
146
- // Fallback: use local implementation or throw
147
- throw new Error(`LLM extraction requires @arclabs561/llm-utils package: ${error.message}`);
157
+ // Fallback: try uncached version if cached wrapper fails
158
+ try {
159
+ const llmUtils = await import('@arclabs561/llm-utils');
160
+ return await llmUtils.callLLM(prompt, provider, apiKey, {
161
+ tier: 'advanced',
162
+ temperature: 0.1,
163
+ maxTokens: 1000,
164
+ });
165
+ } catch (fallbackError) {
166
+ throw new ValidationError(
167
+ `LLM extraction requires @arclabs561/llm-utils package. ` +
168
+ `Install it with: npm install @arclabs561/llm-utils. ` +
169
+ `Error: ${fallbackError.message}`,
170
+ {
171
+ package: '@arclabs561/llm-utils',
172
+ installationCommand: 'npm install @arclabs561/llm-utils',
173
+ originalError: fallbackError.message
174
+ }
175
+ );
176
+ }
148
177
  }
149
178
  }
150
179
 
@@ -8,9 +8,8 @@
8
8
  * - ES-KNN: arXiv:2506.05614 (Exemplar Selection KNN using semantic similarity)
9
9
  * - KATE: arXiv:2101.06804 (Foundational work on kNN-augmented in-context examples)
10
10
  *
11
- * Note: This implementation uses keyword-based similarity (Jaccard) rather than
12
- * true semantic embeddings due to npm package constraints. For full ES-KNN,
13
- * embedding-based cosine similarity would be required.
11
+ * This implementation supports both keyword-based similarity (Jaccard) and
12
+ * embedding-based semantic similarity. Embeddings are preferred when available.
14
13
  *
15
14
  * This module provides dynamic few-shot example selection based on similarity
16
15
  * to the evaluation prompt.
@@ -19,20 +18,25 @@
19
18
  /**
20
19
  * Select few-shot examples based on semantic similarity to prompt
21
20
  *
21
+ * Research: ES-KNN shows embedding-based selection improves performance by 10-20%
22
+ * over keyword-based selection. This implementation supports both methods.
23
+ *
22
24
  * @param {string} prompt - Evaluation prompt
23
25
  * @param {Array<import('./index.mjs').FewShotExample>} examples - Available examples
24
26
  * @param {{
25
27
  * maxExamples?: number;
26
28
  * similarityThreshold?: number;
27
29
  * useSemanticMatching?: boolean;
30
+ * task?: string;
28
31
  * }} [options={}] - Selection options
29
- * @returns {Array<import('./index.mjs').FewShotExample>} Selected examples
32
+ * @returns {Promise<Array<import('./index.mjs').FewShotExample>>} Selected examples
30
33
  */
31
- export function selectFewShotExamples(prompt, examples = [], options = {}) {
34
+ export async function selectFewShotExamples(prompt, examples = [], options = {}) {
32
35
  const {
33
36
  maxExamples = 3,
34
37
  similarityThreshold = 0.3,
35
- useSemanticMatching = true
38
+ useSemanticMatching = true,
39
+ task = 'general'
36
40
  } = options;
37
41
 
38
42
  // Validate inputs
@@ -50,14 +54,68 @@ export function selectFewShotExamples(prompt, examples = [], options = {}) {
50
54
  return examples.slice(0, maxExamples);
51
55
  }
52
56
 
53
- // Simple keyword-based similarity (for npm package - full semantic matching would require embeddings)
54
- const promptKeywords = extractKeywords(prompt.toLowerCase());
57
+ // UX OPTIMIZATION: Auto-disable embeddings for large example arrays (>100) unless explicitly requested
58
+ // - Why: Embeddings add ~15ms per example, so 1000 examples = ~15s latency
59
+ // - User experience: Most users have 10-50 examples, so embeddings are fast and valuable
60
+ // - Edge case: Large datasets (1000+ examples) should use keyword matching for speed
61
+ // - Exception: If useEmbeddings is explicitly set to true, respect user preference
62
+ const exampleCount = examples.length;
63
+ const shouldUseEmbeddingsForLargeArrays = options.useEmbeddings === true;
64
+ const autoDisableForLargeArrays = exampleCount > 100 && !shouldUseEmbeddingsForLargeArrays;
65
+
66
+ // Try embeddings first (more accurate) - but skip for large arrays unless explicitly requested
67
+ if (!autoDisableForLargeArrays) {
68
+ try {
69
+ const { instructionSemanticSimilarity, isInstructionEmbeddingsAvailable } = await import('../evaluation/utils/instruction-embeddings.mjs');
70
+ const { semanticSimilarity, isEmbeddingsAvailable } = await import('../evaluation/utils/semantic-matcher.mjs');
71
+
72
+ const useInstructionEmbeddings = await isInstructionEmbeddingsAvailable();
73
+ const useGeneralEmbeddings = !useInstructionEmbeddings && await isEmbeddingsAvailable();
74
+
75
+ if (useInstructionEmbeddings || useGeneralEmbeddings) {
76
+ // Use embeddings for similarity calculation
77
+ const similarityFn = useInstructionEmbeddings
78
+ ? (text1, text2) => instructionSemanticSimilarity(text1, text2, task)
79
+ : (text1, text2) => semanticSimilarity(text1, text2);
80
+
81
+ // Score each example using embeddings
82
+ const scored = await Promise.all(
83
+ examples.map(async (example) => {
84
+ const exampleText = (example.description || '') + ' ' + (example.evaluation || '');
85
+ const similarity = await similarityFn(prompt, exampleText);
86
+
87
+ return {
88
+ example,
89
+ similarity: similarity !== null ? similarity : 0
90
+ };
91
+ })
92
+ );
93
+
94
+ // Sort by similarity and take top N
95
+ return scored
96
+ .filter(s => s.similarity >= similarityThreshold)
97
+ .sort((a, b) => b.similarity - a.similarity)
98
+ .slice(0, maxExamples)
99
+ .map(s => s.example);
100
+ }
101
+ } catch (error) {
102
+ // Fall through to keyword matching if embeddings unavailable
103
+ }
104
+ }
105
+
106
+ // Fallback: Keyword-based similarity (Jaccard)
107
+ // For very long prompts, limit keyword extraction to avoid performance issues
108
+ const maxPromptLength = 10000; // Limit prompt processing to 10KB for performance
109
+ const processedPrompt = prompt.length > maxPromptLength
110
+ ? prompt.substring(0, maxPromptLength)
111
+ : prompt;
112
+
113
+ const promptKeywords = extractKeywords(processedPrompt.toLowerCase());
55
114
 
56
115
  // Score each example by keyword overlap
57
116
  const scored = examples.map(example => {
58
- const exampleKeywords = extractKeywords(
59
- (example.description || '') + ' ' + (example.evaluation || '')
60
- );
117
+ const exampleText = (example.description || '') + ' ' + (example.evaluation || '');
118
+ const exampleKeywords = extractKeywords(exampleText.toLowerCase());
61
119
 
62
120
  // Jaccard similarity (intersection over union)
63
121
  const intersection = new Set(
package/src/errors.mjs CHANGED
@@ -2,7 +2,7 @@
2
2
  * Custom Error Classes for ai-visual-test
3
3
  *
4
4
  * Provides standardized error handling across the package.
5
- * Based on Playwright's error handling patterns and industry best practices.
5
+ * Based on Playwright's error handling patterns and industry practices.
6
6
  *
7
7
  * All errors extend AIBrowserTestError for consistent error handling and serialization.
8
8
  */
@@ -42,7 +42,11 @@ export class AIBrowserTestError extends Error {
42
42
  code: this.code,
43
43
  message: this.message,
44
44
  details: this.details,
45
- stack: this.stack
45
+ // SECURITY: Stack traces may contain sensitive information
46
+ // Only include in development mode or when explicitly requested
47
+ ...(process.env.NODE_ENV === 'development' || process.env.INCLUDE_STACK_TRACES === 'true'
48
+ ? { stack: this.stack }
49
+ : {})
46
50
  };
47
51
  }
48
52
  }
@@ -110,10 +110,22 @@ let globalTracker = null;
110
110
 
111
111
  /**
112
112
  * Get or create global propagation tracker
113
+ *
114
+ * @param {Object} [options={}] - Options for tracker (only used on first call)
115
+ * @returns {ExperiencePropagationTracker} Global tracker instance
113
116
  */
114
117
  export function getPropagationTracker(options = {}) {
115
118
  if (!globalTracker) {
116
119
  globalTracker = new ExperiencePropagationTracker(options);
120
+ } else if (Object.keys(options).length > 0) {
121
+ // If tracker exists but options provided, update it
122
+ // This allows reconfiguration (though typically tracker is created once)
123
+ if (options.enabled !== undefined) {
124
+ globalTracker.enabled = options.enabled;
125
+ }
126
+ if (options.logLevel !== undefined) {
127
+ globalTracker.logLevel = options.logLevel;
128
+ }
117
129
  }
118
130
  return globalTracker;
119
131
  }
@@ -11,6 +11,7 @@
11
11
  */
12
12
 
13
13
  import { warn } from './logger.mjs';
14
+ import { ValidationError } from './errors.mjs';
14
15
 
15
16
  /**
16
17
  * Experience Trace
@@ -133,7 +134,7 @@ export class ExperienceTrace {
133
134
  * @param {Record<string, unknown>} [options={}] - Aggregation options
134
135
  * @returns {import('./index.mjs').AggregatedTemporalNotes} Aggregated notes
135
136
  */
136
- aggregateNotes(aggregateTemporalNotes, options = {}) {
137
+ async aggregateNotes(aggregateTemporalNotes, options = {}) {
137
138
  // Extract notes from events and validations
138
139
  const eventNotes = this.events
139
140
  .filter(e => e.type === 'interaction' || e.type === 'observation')
@@ -157,7 +158,7 @@ export class ExperienceTrace {
157
158
 
158
159
  const notes = [...eventNotes, ...validationNotes].sort((a, b) => a.timestamp - b.timestamp);
159
160
 
160
- this.aggregatedNotes = aggregateTemporalNotes(notes, options);
161
+ this.aggregatedNotes = await aggregateTemporalNotes(notes, options);
161
162
  return this.aggregatedNotes;
162
163
  }
163
164
 
@@ -296,7 +297,15 @@ export class ExperienceTracerManager {
296
297
  async metaEvaluateTrace(sessionId, validateScreenshot) {
297
298
  const trace = this.getTrace(sessionId);
298
299
  if (!trace) {
299
- throw new Error(`Trace not found: ${sessionId}`);
300
+ throw new ValidationError(
301
+ `Trace not found for session: ${sessionId}. ` +
302
+ `Use startTrace() to create a new trace, or listTraces() to see all available traces.`,
303
+ {
304
+ sessionId,
305
+ availableSessions: Object.keys(this.traces),
306
+ function: 'metaEvaluateTrace'
307
+ }
308
+ );
300
309
  }
301
310
 
302
311
  const evaluation = {