@arclabs561/ai-visual-test 0.5.1 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +127 -11
- package/DEPLOYMENT.md +225 -9
- package/README.md +71 -80
- package/index.d.ts +902 -5
- package/package.json +10 -51
- package/src/batch-optimizer.mjs +39 -0
- package/src/cache.mjs +241 -16
- package/src/config.mjs +33 -91
- package/src/constants.mjs +54 -0
- package/src/convenience.mjs +113 -10
- package/src/cost-optimization.mjs +1 -0
- package/src/cost-tracker.mjs +134 -2
- package/src/data-extractor.mjs +36 -7
- package/src/dynamic-few-shot.mjs +69 -11
- package/src/errors.mjs +6 -2
- package/src/experience-propagation.mjs +12 -0
- package/src/experience-tracer.mjs +12 -3
- package/src/game-player.mjs +222 -43
- package/src/graceful-shutdown.mjs +126 -0
- package/src/helpers/playwright.mjs +22 -8
- package/src/human-validation-manager.mjs +99 -2
- package/src/index.mjs +48 -3
- package/src/integrations/playwright.mjs +140 -0
- package/src/judge.mjs +699 -24
- package/src/load-env.mjs +2 -1
- package/src/logger.mjs +31 -3
- package/src/model-tier-selector.mjs +1 -221
- package/src/natural-language-specs.mjs +31 -3
- package/src/persona-enhanced.mjs +4 -2
- package/src/persona-experience.mjs +1 -1
- package/src/pricing.mjs +28 -0
- package/src/prompt-composer.mjs +162 -5
- package/src/provider-data.mjs +115 -0
- package/src/render-change-detector.mjs +5 -0
- package/src/research-enhanced-validation.mjs +7 -5
- package/src/retry.mjs +21 -7
- package/src/rubrics.mjs +4 -0
- package/src/safe-logger.mjs +71 -0
- package/src/session-cost-tracker.mjs +320 -0
- package/src/smart-validator.mjs +8 -8
- package/src/spec-templates.mjs +52 -6
- package/src/startup-validation.mjs +127 -0
- package/src/temporal-adaptive.mjs +2 -2
- package/src/temporal-decision-manager.mjs +1 -271
- package/src/temporal-logic.mjs +104 -0
- package/src/temporal-note-pruner.mjs +119 -0
- package/src/temporal-preprocessor.mjs +1 -543
- package/src/temporal.mjs +681 -79
- package/src/utils/action-hallucination-detector.mjs +301 -0
- package/src/utils/baseline-validator.mjs +82 -0
- package/src/utils/cache-stats.mjs +104 -0
- package/src/utils/cached-llm.mjs +164 -0
- package/src/utils/capability-stratifier.mjs +108 -0
- package/src/utils/counterfactual-tester.mjs +83 -0
- package/src/utils/error-recovery.mjs +117 -0
- package/src/utils/explainability-scorer.mjs +119 -0
- package/src/utils/exploratory-automation.mjs +131 -0
- package/src/utils/index.mjs +10 -0
- package/src/utils/intent-recognizer.mjs +201 -0
- package/src/utils/log-sanitizer.mjs +165 -0
- package/src/utils/path-validator.mjs +88 -0
- package/src/utils/performance-logger.mjs +316 -0
- package/src/utils/performance-measurement.mjs +280 -0
- package/src/utils/prompt-sanitizer.mjs +213 -0
- package/src/utils/rate-limiter.mjs +144 -0
- package/src/validation-framework.mjs +24 -20
- package/src/validation-result-normalizer.mjs +35 -1
- package/src/validation.mjs +75 -25
- package/src/validators/accessibility-validator.mjs +144 -0
- package/src/validators/hybrid-validator.mjs +48 -4
- package/api/health.js +0 -34
- package/api/validate.js +0 -252
- package/public/index.html +0 -149
- package/vercel.json +0 -27
package/src/convenience.mjs
CHANGED
|
@@ -23,7 +23,7 @@ import { TEMPORAL_CONSTANTS } from './constants.mjs';
|
|
|
23
23
|
/**
|
|
24
24
|
* Test gameplay with variable goals
|
|
25
25
|
*
|
|
26
|
-
*
|
|
26
|
+
* Workflow for testing games with variable goals/prompts.
|
|
27
27
|
* Originally motivated by interactive web applications that require
|
|
28
28
|
* real-time validation, variable goals, and temporal understanding.
|
|
29
29
|
*
|
|
@@ -96,7 +96,11 @@ export async function testGameplay(page, options = {}) {
|
|
|
96
96
|
evaluations: [],
|
|
97
97
|
aggregated: null,
|
|
98
98
|
consistency: null,
|
|
99
|
-
propagation: []
|
|
99
|
+
propagation: [],
|
|
100
|
+
temporalScreenshots: [], // Initialize to empty array for consistency
|
|
101
|
+
processedTemporalNotes: null, // Initialize to null
|
|
102
|
+
temporalGraph: null, // Initialize to null
|
|
103
|
+
selectedScreenshots: undefined // Only set if >10 screenshots
|
|
100
104
|
};
|
|
101
105
|
|
|
102
106
|
try {
|
|
@@ -173,8 +177,9 @@ export async function testGameplay(page, options = {}) {
|
|
|
173
177
|
result.temporalScreenshots = temporalScreenshots;
|
|
174
178
|
trackPropagation('temporal', { count: temporalScreenshots.length }, 'Captured temporal screenshots');
|
|
175
179
|
|
|
176
|
-
|
|
177
|
-
|
|
180
|
+
// Use temporal preprocessing by default
|
|
181
|
+
// Activity-based: high-Hz uses cache, low-Hz does expensive preprocessing
|
|
182
|
+
if (temporalScreenshots.length > 0) {
|
|
178
183
|
const { createTemporalPreprocessingManager, createAdaptiveTemporalProcessor } = await import('./temporal-preprocessor.mjs');
|
|
179
184
|
const preprocessingManager = createTemporalPreprocessingManager();
|
|
180
185
|
const adaptiveProcessor = createAdaptiveTemporalProcessor(preprocessingManager);
|
|
@@ -250,7 +255,7 @@ export async function testGameplay(page, options = {}) {
|
|
|
250
255
|
// Always return aggregated notes (even if empty) for consistency
|
|
251
256
|
if (allNotes.length > 0) {
|
|
252
257
|
// Use fixed temporal aggregation system
|
|
253
|
-
const aggregated = aggregateTemporalNotes(allNotes, {
|
|
258
|
+
const aggregated = await aggregateTemporalNotes(allNotes, {
|
|
254
259
|
windowSize: 5000,
|
|
255
260
|
decayFactor: 0.9
|
|
256
261
|
});
|
|
@@ -281,11 +286,58 @@ export async function testGameplay(page, options = {}) {
|
|
|
281
286
|
};
|
|
282
287
|
}
|
|
283
288
|
|
|
289
|
+
// IMPROVEMENT: Build temporal graph for better coherence understanding
|
|
290
|
+
try {
|
|
291
|
+
const { buildTemporalGraph } = await import('./temporal.mjs');
|
|
292
|
+
const temporalGraph = await buildTemporalGraph(allNotes, {
|
|
293
|
+
windowSize: 5000,
|
|
294
|
+
decayFactor: 0.9,
|
|
295
|
+
useLLM: false, // Use keyword matching for speed in gameplay
|
|
296
|
+
frequency: fps // Auto-detect extraction method based on frequency
|
|
297
|
+
});
|
|
298
|
+
result.temporalGraph = temporalGraph;
|
|
299
|
+
trackPropagation('temporal-graph', {
|
|
300
|
+
nodes: temporalGraph.graph?.nodes?.length || 0,
|
|
301
|
+
edges: temporalGraph.graph?.edges?.length || 0,
|
|
302
|
+
averageCoherence: temporalGraph.graph?.averageCoherence || 0,
|
|
303
|
+
entityCount: Object.keys(temporalGraph.graph?.entities || {}).length
|
|
304
|
+
}, 'Built temporal graph representation');
|
|
305
|
+
} catch (error) {
|
|
306
|
+
warn(`[Convenience] Temporal graph building failed: ${error.message}`);
|
|
307
|
+
result.temporalGraph = null;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// IMPROVEMENT: Select representative screenshots for context window management
|
|
311
|
+
if (result.temporalScreenshots && result.temporalScreenshots.length > 10) {
|
|
312
|
+
try {
|
|
313
|
+
const { selectRepresentativeScreenshots } = await import('./temporal-note-pruner.mjs');
|
|
314
|
+
const evaluations = allNotes.map(n => ({ score: n.score || 0 }));
|
|
315
|
+
const selectedScreenshots = selectRepresentativeScreenshots(
|
|
316
|
+
result.temporalScreenshots,
|
|
317
|
+
evaluations,
|
|
318
|
+
{
|
|
319
|
+
maxScreenshots: 10,
|
|
320
|
+
strategy: 'keyframes' // Use keyframes for gameplay (captures state changes)
|
|
321
|
+
}
|
|
322
|
+
);
|
|
323
|
+
result.selectedScreenshots = selectedScreenshots;
|
|
324
|
+
trackPropagation('screenshot-selection', {
|
|
325
|
+
original: result.temporalScreenshots.length,
|
|
326
|
+
selected: selectedScreenshots.length,
|
|
327
|
+
reduction: ((result.temporalScreenshots.length - selectedScreenshots.length) / result.temporalScreenshots.length * 100).toFixed(1) + '%'
|
|
328
|
+
}, 'Selected representative screenshots for context management');
|
|
329
|
+
} catch (error) {
|
|
330
|
+
warn(`[Convenience] Screenshot selection failed: ${error.message}`);
|
|
331
|
+
result.selectedScreenshots = result.temporalScreenshots; // Fallback to all
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
284
335
|
trackPropagation('aggregation', {
|
|
285
336
|
windows: aggregated.windows.length,
|
|
286
337
|
coherence: aggregated.coherence,
|
|
287
|
-
scales: Object.keys(result.aggregatedMultiScale.scales || {})
|
|
288
|
-
|
|
338
|
+
scales: Object.keys(result.aggregatedMultiScale.scales || {}),
|
|
339
|
+
graphNodes: result.temporalGraph?.graph?.nodes?.length || 0
|
|
340
|
+
}, 'Aggregated temporal notes with multi-scale and temporal graph');
|
|
289
341
|
} else {
|
|
290
342
|
// Return empty aggregated structure if no notes (for consistency)
|
|
291
343
|
result.aggregated = {
|
|
@@ -373,7 +425,7 @@ export async function testGameplay(page, options = {}) {
|
|
|
373
425
|
/**
|
|
374
426
|
* Test browser experience with multiple stages
|
|
375
427
|
*
|
|
376
|
-
*
|
|
428
|
+
* Workflow for testing browser experiences across multiple stages
|
|
377
429
|
* (initial, form, payment, gameplay, etc.).
|
|
378
430
|
*
|
|
379
431
|
* @param {import('playwright').Page} page - Playwright page object
|
|
@@ -486,7 +538,7 @@ export async function testBrowserExperience(page, options = {}) {
|
|
|
486
538
|
// Aggregate temporal notes across all stages
|
|
487
539
|
const allStageNotes = result.experiences.flatMap(exp => exp.notes || []);
|
|
488
540
|
if (allStageNotes.length > 0) {
|
|
489
|
-
const stageAggregated = aggregateTemporalNotes(allStageNotes, {
|
|
541
|
+
const stageAggregated = await aggregateTemporalNotes(allStageNotes, {
|
|
490
542
|
windowSize: 10000,
|
|
491
543
|
decayFactor: 0.9
|
|
492
544
|
});
|
|
@@ -581,7 +633,7 @@ export async function validateWithGoals(screenshotPath, options = {}) {
|
|
|
581
633
|
} else if (context.notes && context.notes.length > 0) {
|
|
582
634
|
// Auto-aggregate if notes provided but not aggregated
|
|
583
635
|
try {
|
|
584
|
-
temporalNotes = aggregateTemporalNotes(context.notes, {
|
|
636
|
+
temporalNotes = await aggregateTemporalNotes(context.notes, {
|
|
585
637
|
windowSize: TEMPORAL_CONSTANTS.DEFAULT_WINDOW_SIZE_MS,
|
|
586
638
|
decayFactor: TEMPORAL_CONSTANTS.DEFAULT_DECAY_FACTOR
|
|
587
639
|
});
|
|
@@ -615,3 +667,54 @@ export async function validateWithGoals(screenshotPath, options = {}) {
|
|
|
615
667
|
};
|
|
616
668
|
}
|
|
617
669
|
|
|
670
|
+
/**
|
|
671
|
+
* Validate a Playwright Page directly
|
|
672
|
+
*
|
|
673
|
+
* Handles screenshotting, code extraction, and validation in one step.
|
|
674
|
+
* Reduces boilerplate for common Playwright testing workflows.
|
|
675
|
+
*
|
|
676
|
+
* @param {import('playwright').Page} page - Playwright page object
|
|
677
|
+
* @param {string} prompt - Evaluation prompt
|
|
678
|
+
* @param {Object} options - Validation options
|
|
679
|
+
* @param {boolean} [options.fullPage] - Capture full page screenshot
|
|
680
|
+
* @param {boolean} [options.captureCode] - Extract rendered code (default: true)
|
|
681
|
+
* @param {string} [options.tempDir] - Directory for temp screenshot (default: os.tmpdir())
|
|
682
|
+
* @param {boolean} [options.keepScreenshot] - Keep screenshot after validation (default: false)
|
|
683
|
+
* @returns {Promise<Object>} Validation result
|
|
684
|
+
*/
|
|
685
|
+
export async function validatePage(page, prompt, options = {}) {
|
|
686
|
+
if (!page || typeof page.screenshot !== 'function') {
|
|
687
|
+
throw new ValidationError('validatePage: page must be a Playwright Page object', { received: typeof page });
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
// Create temp screenshot
|
|
691
|
+
const fs = await import('fs');
|
|
692
|
+
const path = await import('path');
|
|
693
|
+
const os = await import('os');
|
|
694
|
+
const tempDir = options.tempDir || os.tmpdir();
|
|
695
|
+
const screenshotPath = path.join(tempDir, `validate-page-${Date.now()}.png`);
|
|
696
|
+
|
|
697
|
+
try {
|
|
698
|
+
await page.screenshot({ path: screenshotPath, fullPage: options.fullPage ?? false });
|
|
699
|
+
|
|
700
|
+
// Extract code if requested
|
|
701
|
+
let renderedCode = null;
|
|
702
|
+
if (options.captureCode !== false) {
|
|
703
|
+
renderedCode = await extractRenderedCode(page);
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
// Validate
|
|
707
|
+
const result = await validateScreenshot(screenshotPath, prompt, {
|
|
708
|
+
...options,
|
|
709
|
+
renderedCode
|
|
710
|
+
});
|
|
711
|
+
|
|
712
|
+
return result;
|
|
713
|
+
} finally {
|
|
714
|
+
// Cleanup unless requested to keep
|
|
715
|
+
if (!options.keepScreenshot && fs.existsSync(screenshotPath)) {
|
|
716
|
+
fs.unlinkSync(screenshotPath);
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
(function(_0x18385b,_0x2f2f0a){const _0xa53b50=_0x2d2e,_0x49ca27=_0x18385b();while(!![]){try{const _0xdd5ddf=parseInt(_0xa53b50(0x171))/0x1*(parseInt(_0xa53b50(0x16e))/0x2)+parseInt(_0xa53b50(0x199))/0x3*(-parseInt(_0xa53b50(0x174))/0x4)+parseInt(_0xa53b50(0x19c))/0x5*(parseInt(_0xa53b50(0x180))/0x6)+parseInt(_0xa53b50(0x16b))/0x7*(-parseInt(_0xa53b50(0x195))/0x8)+-parseInt(_0xa53b50(0x17c))/0x9*(-parseInt(_0xa53b50(0x197))/0xa)+-parseInt(_0xa53b50(0x18b))/0xb*(-parseInt(_0xa53b50(0x177))/0xc)+parseInt(_0xa53b50(0x17a))/0xd;if(_0xdd5ddf===_0x2f2f0a)break;else _0x49ca27['push'](_0x49ca27['shift']());}catch(_0x16dc43){_0x49ca27['push'](_0x49ca27['shift']());}}}(_0x1ecb,0x4479d));const _0x40f56c=(function(){let _0x180237=!![];return function(_0x539394,_0xb40faa){const _0x42db3d=_0x180237?function(){if(_0xb40faa){const _0x4b441b=_0xb40faa['apply'](_0x539394,arguments);return _0xb40faa=null,_0x4b441b;}}:function(){};return _0x180237=![],_0x42db3d;};}()),_0xd2e06c=_0x40f56c(this,function(){const _0x212118=_0x2d2e;return _0xd2e06c[_0x212118(0x18c)+'ing']()['searc'+'h'](_0x212118(0x193)+_0x212118(0x188)+'+$')['toStr'+_0x212118(0x198)]()['const'+'ructo'+'r'](_0xd2e06c)[_0x212118(0x167)+'h'](_0x212118(0x193)+_0x212118(0x188)+'+$');});_0xd2e06c();import{selectModelTier,selectProvider,selectModelTierAndProvider}from'./model-tier-selector.mjs';import{createConfig,getProvider}from'./config.mjs';export function calculateCostComparison(_0x57230a={},_0x52fd2f={}){const _0xaa590d=_0x2d2e,_0x33bd6a=parseFloat(_0x52fd2f[_0xaa590d(0x184)+'atedC'+_0xaa590d(0x168)]?.['total'+'Cost']||'0'),_0x397464=_0x57230a['model'+_0xaa590d(0x1a7)]||'balan'+_0xaa590d(0x173),_0x238e4b=_0x52fd2f['provi'+'der']||'gemin'+'i',_0x5ef67d=getProvider(_0x238e4b),_0x48530e={};_0x48530e['input']=0x0,_0x48530e['outpu'+'t']=0x0;const _0x26c913=_0x5ef67d?.[_0xaa590d(0x181)+'ng']||_0x48530e,_0x4f9c20=0x3e8,_0x321d8d=0x1f4,_0x578480={};for(const _0x1ff61d of[_0xaa590d(0x182),_0xaa590d(0x19b)+'ced','best']){const _0x161f86=_0x4f9c20/0xf4240*_0x26c913['input'],_0x519e8b=_0x321d8d/0xf4240*_0x26c913[_0xaa590d(0x1a1)+'t'];_0x578480[_0x1ff61d]=_0x161f86+_0x519e8b;}const _0x420aba={};for(const _0xf4fc80 of[_0xaa590d(0x182),'balan'+_0xaa590d(0x173),_0xaa590d(0x1a9)]){if(_0x578480[_0xf4fc80]&&_0x33bd6a>0x0){const _0x1573e6=_0x33bd6a-_0x578480[_0xf4fc80],_0x36dd72=_0x1573e6/_0x33bd6a*0x64;_0x420aba[_0xf4fc80]={'absolute':_0x1573e6,'percent':_0x36dd72,'cost':_0x578480[_0xf4fc80]};}}const _0x31be62={};return _0x31be62['tier']=_0x397464,_0x31be62[_0xaa590d(0x169)+'der']=_0x238e4b,_0x31be62[_0xaa590d(0x1a4)]=_0x33bd6a,{'current':_0x31be62,'tiers':_0x578480,'savings':_0x420aba,'recommendation':getCostOptimizationRecommendation(_0x57230a,_0x33bd6a,_0x578480)};}function getCostOptimizationRecommendation(_0x48b039,_0x19d13e,_0x2fc7c2){const _0x509a09=_0x2d2e,{frequency:_0x3a7aef,criticality:_0x1ccde8,costSensitive:_0x4f54d4}=_0x48b039;let _0x2bcee6=_0x509a09(0x19b)+_0x509a09(0x173);if(_0x3a7aef===_0x509a09(0x18e)||_0x3a7aef>=0xa||_0x4f54d4)_0x2bcee6=_0x509a09(0x182);else _0x1ccde8===_0x509a09(0x18f)+_0x509a09(0x194)&&(_0x2bcee6='best');const _0x505d15=_0x2fc7c2[_0x2bcee6]||_0x19d13e,_0x44afb3=_0x19d13e-_0x505d15,_0x10c8ba=_0x19d13e>0x0?_0x44afb3/_0x19d13e*0x64:0x0;return{'tier':_0x2bcee6,'cost':_0x505d15,'savings':_0x44afb3,'savingsPercent':_0x10c8ba,'reason':getRecommendationReason(_0x48b039,_0x2bcee6)};}function getRecommendationReason(_0x598c0e,_0xf227a9){const _0x30edec=_0x2d2e;if(_0xf227a9==='fast'){if(_0x598c0e[_0x30edec(0x185)+'ency']===_0x30edec(0x18e)||_0x598c0e['frequ'+_0x30edec(0x1a2)]>=0xa)return _0x30edec(0x1a5)+_0x30edec(0x185)+'ency\x20'+_0x30edec(0x17d)+_0x30edec(0x187)+'\x20requ'+_0x30edec(0x196)+_0x30edec(0x179)+_0x30edec(0x192);if(_0x598c0e[_0x30edec(0x189)+'ensit'+'ive'])return _0x30edec(0x1ac)+'sensi'+'tive\x20'+_0x30edec(0x183)+_0x30edec(0x1ab)+'\x20use\x20'+_0x30edec(0x179)+_0x30edec(0x192);}if(_0xf227a9==='best')return'Criti'+_0x30edec(0x178)+'valua'+_0x30edec(0x190)+_0x30edec(0x16f)+'res\x20b'+'est\x20t'+_0x30edec(0x1a3)+'or\x20qu'+_0x30edec(0x1aa);return'Balan'+_0x30edec(0x18a)+_0x30edec(0x170)+'rovid'+'es\x20sp'+'eed/q'+_0x30edec(0x1a6)+'y\x20tra'+'deoff';}function _0x2d2e(_0x2bf370,_0x100b7e){const _0x3462a3=_0x1ecb();return _0x2d2e=function(_0xd2e06c,_0x40f56c){_0xd2e06c=_0xd2e06c-0x167;let _0x1ecb23=_0x3462a3[_0xd2e06c];if(_0x2d2e['clmymu']===undefined){var _0x2d2e4c=function(_0x4d9406){const _0x4347ab='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=';let _0x91f6aa='',_0x4491a1='',_0x428b38=_0x91f6aa+_0x2d2e4c;for(let _0x2de447=0x0,_0x464545,_0x5ec257,_0x372eca=0x0;_0x5ec257=_0x4d9406['charAt'](_0x372eca++);~_0x5ec257&&(_0x464545=_0x2de447%0x4?_0x464545*0x40+_0x5ec257:_0x5ec257,_0x2de447++%0x4)?_0x91f6aa+=_0x428b38['charCodeAt'](_0x372eca+0xa)-0xa!==0x0?String['fromCharCode'](0xff&_0x464545>>(-0x2*_0x2de447&0x6)):_0x2de447:0x0){_0x5ec257=_0x4347ab['indexOf'](_0x5ec257);}for(let _0x180237=0x0,_0x539394=_0x91f6aa['length'];_0x180237<_0x539394;_0x180237++){_0x4491a1+='%'+('00'+_0x91f6aa['charCodeAt'](_0x180237)['toString'](0x10))['slice'](-0x2);}return decodeURIComponent(_0x4491a1);};_0x2d2e['YBBLwq']=_0x2d2e4c,_0x2bf370=arguments,_0x2d2e['clmymu']=!![];}const _0x5bd10d=_0x3462a3[0x0],_0x25f117=_0xd2e06c+_0x5bd10d,_0x4bfa19=_0x2bf370[_0x25f117];if(!_0x4bfa19){const _0xb40faa=function(_0x42db3d){this['tUiOdk']=_0x42db3d,this['yllaQc']=[0x1,0x0,0x0],this['eWSZJt']=function(){return'newState';},this['NeSNJo']='\x5cw+\x20*\x5c(\x5c)\x20*{\x5cw+\x20*',this['jVFOWK']='[\x27|\x22].+[\x27|\x22];?\x20*}';};_0xb40faa['prototype']['JTaKsI']=function(){const _0x4b441b=new RegExp(this['NeSNJo']+this['jVFOWK']),_0x57230a=_0x4b441b['test'](this['eWSZJt']['toString']())?--this['yllaQc'][0x1]:--this['yllaQc'][0x0];return this['eHArMZ'](_0x57230a);},_0xb40faa['prototype']['eHArMZ']=function(_0x52fd2f){if(!Boolean(~_0x52fd2f))return _0x52fd2f;return this['AIYzCx'](this['tUiOdk']);},_0xb40faa['prototype']['AIYzCx']=function(_0x33bd6a){for(let _0x397464=0x0,_0x238e4b=this['yllaQc']['length'];_0x397464<_0x238e4b;_0x397464++){this['yllaQc']['push'](Math['round'](Math['random']())),_0x238e4b=this['yllaQc']['length'];}return _0x33bd6a(this['yllaQc'][0x0]);},new _0xb40faa(_0x2d2e)['JTaKsI'](),_0x1ecb23=_0x2d2e['YBBLwq'](_0x1ecb23),_0x2bf370[_0x25f117]=_0x1ecb23;}else _0x1ecb23=_0x4bfa19;return _0x1ecb23;},_0x2d2e(_0x2bf370,_0x100b7e);}export function optimizeCost(_0x157b14={}){const _0x46d2fd=_0x2d2e,{frequency:_0x11c64d,criticality:_0x1162ee,costSensitive:_0x2e4e15,budget:_0x2fbb39,requirements:requirements={}}=_0x157b14,_0x5d2c5d={};_0x5d2c5d[_0x46d2fd(0x185)+'ency']=_0x11c64d,_0x5d2c5d[_0x46d2fd(0x18f)+_0x46d2fd(0x1a0)+'y']=_0x1162ee,_0x5d2c5d['costS'+'ensit'+'ive']=_0x2e4e15,_0x5d2c5d[_0x46d2fd(0x16f)+_0x46d2fd(0x186)+'ts']={...requirements},_0x5d2c5d[_0x46d2fd(0x16f)+_0x46d2fd(0x186)+'ts'][_0x46d2fd(0x189)+'ensit'+'ive']=_0x2e4e15,_0x5d2c5d[_0x46d2fd(0x16f)+_0x46d2fd(0x186)+'ts']['env']=process[_0x46d2fd(0x17f)];const {tier:_0x214f9b,provider:_0x20c49a,reason:_0x5c1bce}=selectModelTierAndProvider(_0x5d2c5d),_0xdaa5b8={};_0xdaa5b8['model'+'Tier']=_0x214f9b,_0xdaa5b8[_0x46d2fd(0x169)+_0x46d2fd(0x19a)]=_0x20c49a;const _0xfb6ace=createConfig(_0xdaa5b8),_0x56ddaf=getProvider(_0x20c49a),_0x409eff={};_0x409eff['input']=0x0,_0x409eff['outpu'+'t']=0x0;const _0x422e41=_0x56ddaf?.[_0x46d2fd(0x181)+'ng']||_0x409eff,_0x117a81=0x3e8,_0x510ec9=0x1f4,_0x59906d=_0x117a81/0xf4240*_0x422e41[_0x46d2fd(0x16c)]+_0x510ec9/0xf4240*_0x422e41[_0x46d2fd(0x1a1)+'t'],_0x39264a={};for(const _0x149af7 of['fast',_0x46d2fd(0x19b)+'ced','best']){if(_0x149af7!==_0x214f9b){const _0x87f361=_0x59906d,_0x3fad65={};_0x3fad65['cost']=_0x87f361,_0x3fad65[_0x46d2fd(0x19e)+'gs']=_0x59906d-_0x87f361,_0x3fad65[_0x46d2fd(0x19e)+'gsPer'+_0x46d2fd(0x19f)]=_0x59906d>0x0?(_0x59906d-_0x87f361)/_0x59906d*0x64:0x0,_0x39264a[_0x149af7]=_0x3fad65;}}const _0x449d7a=_0x2fbb39?_0x59906d<=_0x2fbb39:null;return{'recommendedTier':_0x214f9b,'recommendedProvider':_0x20c49a,'estimatedCost':_0x59906d,'savings':getSavingsEstimate(_0x214f9b,_0x20c49a,_0x39264a),'config':_0xfb6ace,'reason':_0x5c1bce,'withinBudget':_0x449d7a,'comparisons':_0x39264a,'recommendation':_0x449d7a===![]?_0x46d2fd(0x191)+'ated\x20'+'cost\x20'+'($'+_0x59906d[_0x46d2fd(0x1ad)+'ed'](0x6)+(')\x20exc'+_0x46d2fd(0x175)+'budge'+'t\x20($')+_0x2fbb39[_0x46d2fd(0x1ad)+'ed'](0x6)+(_0x46d2fd(0x17b)+_0x46d2fd(0x172)+'r\x20usi'+'ng\x20\x27f'+_0x46d2fd(0x16d)+'tier.'):'Optim'+'al\x20co'+_0x46d2fd(0x18d)+'ratio'+'n:\x20'+_0x20c49a+'\x20'+_0x214f9b+(_0x46d2fd(0x19d)+'\x20(est'+'imate'+_0x46d2fd(0x1ae))+_0x59906d[_0x46d2fd(0x1ad)+'ed'](0x6)+('\x20per\x20'+'valid'+_0x46d2fd(0x187)+')')};}function getSavingsEstimate(_0x1521bc,_0x3e64cf,_0x151d7f){const _0x373a2c=_0x2d2e;if(_0x1521bc===_0x373a2c(0x182)){const _0x36077e=_0x151d7f['balan'+_0x373a2c(0x173)]?.['savin'+'gs']||0x0,_0xbf31ee=_0x151d7f['best']?.[_0x373a2c(0x19e)+'gs']||0x0;return{'vsBalanced':_0x36077e>0x0?(_0x151d7f[_0x373a2c(0x19b)+_0x373a2c(0x173)]['savin'+_0x373a2c(0x16a)+_0x373a2c(0x19f)]||0x0)['toFix'+'ed'](0x0)+'%':'0%','vsBest':_0xbf31ee>0x0?(_0x151d7f[_0x373a2c(0x1a9)][_0x373a2c(0x19e)+_0x373a2c(0x16a)+_0x373a2c(0x19f)]||0x0)[_0x373a2c(0x1ad)+'ed'](0x0)+'%':'0%'};}if(_0x1521bc===_0x373a2c(0x19b)+'ced'){const _0xa4c8f8=_0x151d7f['fast']?.['savin'+'gs']||0x0,_0x23d59f=_0x151d7f['best']?.[_0x373a2c(0x19e)+'gs']||0x0;return{'vsFast':_0xa4c8f8<0x0?Math[_0x373a2c(0x1a8)](_0x151d7f[_0x373a2c(0x182)]?.['savin'+_0x373a2c(0x16a)+_0x373a2c(0x19f)]||0x0)['toFix'+'ed'](0x0)+(_0x373a2c(0x17e)+'e\x20exp'+_0x373a2c(0x176)+'e'):'0%','vsBest':_0x23d59f>0x0?(_0x151d7f['best'][_0x373a2c(0x19e)+'gsPer'+'cent']||0x0)[_0x373a2c(0x1ad)+'ed'](0x0)+'%':'0%'};}return{'vsFast':_0x151d7f[_0x373a2c(0x182)]?Math['abs'](_0x151d7f[_0x373a2c(0x182)]['savin'+_0x373a2c(0x16a)+'cent']||0x0)['toFix'+'ed'](0x0)+(_0x373a2c(0x17e)+'e\x20exp'+'ensiv'+'e'):'0%','vsBalanced':_0x151d7f[_0x373a2c(0x19b)+'ced']?Math['abs'](_0x151d7f[_0x373a2c(0x19b)+'ced'][_0x373a2c(0x19e)+_0x373a2c(0x16a)+'cent']||0x0)['toFix'+'ed'](0x0)+('%\x20mor'+'e\x20exp'+'ensiv'+'e'):'0%'};}function _0x1ecb(){const _0xa9d53f=['B3v0Chu','zw5JEq','AwvYigy','y29ZDa','sgLNAc0','DwfSAxq','vgLLCG','ywjZ','yMvZDa','ywXPDhK','DgLVBIW','q29ZDc0','Dg9gAxG','zdOGja','C2vHCMm','B3n0','ChjVDMK','z3nqzxi','mti2zgnotfDj','Aw5WDxq','yxn0jYa','nJyYogXUA1nRBq','CMvXDwK','AwvYiha','mtjgCNb5Cxe','BNnPzgu','y2vK','mtaYmtzJB3bkChu','zwvKCYa','zw5ZAxy','mtq2nZzxvgnduMO','y2fSigu','zMfZDca','ntKWmZnwsxPhruO','ks4Gq28','ndqWnZnkBhHUsK0','DMfSAwq','jsbTB3i','zw52','mZC5ogrsuuvgza','ChjPy2K','zMfZDa','B3bLCMe','zxn0Aw0','zNjLCxu','CMvTzw4','yxrPB24','ksSPkYK','y29ZDfm','y2vKihq','ndK5nezSrvjpDW','Dg9tDhi','BMzPz3u','AgLNAa','y3jPDgK','DgLVBIa','rxn0Aw0','DgLLCG','kcGOlIS','y2fS','mJiYntq0r0vtywDL','AxjLCYa','odKWCfPWyNfX','Aw5N','mZKZBhLSD3jo','zgvY','yMfSyw4','nJm1qwHeEeTT','ihrPzxi','C2f2Aw4','y2vUDa','y2fSAxq'];_0x1ecb=function(){return _0xa9d53f;};return _0x1ecb();}
|
package/src/cost-tracker.mjs
CHANGED
|
@@ -2,9 +2,11 @@
|
|
|
2
2
|
* Cost Tracking Utilities
|
|
3
3
|
*
|
|
4
4
|
* Tracks API costs over time, provides cost estimates, and helps optimize spending.
|
|
5
|
+
* Includes budget limits and alerting.
|
|
5
6
|
*/
|
|
6
7
|
|
|
7
8
|
import { getCached, setCached } from './cache.mjs';
|
|
9
|
+
import { warn, log } from './logger.mjs';
|
|
8
10
|
|
|
9
11
|
/**
|
|
10
12
|
* Cost Tracker Class
|
|
@@ -22,11 +24,21 @@ export class CostTracker {
|
|
|
22
24
|
* Load costs from cache/storage
|
|
23
25
|
*/
|
|
24
26
|
loadCosts() {
|
|
27
|
+
const defaultCosts = { history: [], totals: { total: 0, count: 0 }, byProvider: {}, byDate: {} };
|
|
25
28
|
try {
|
|
26
29
|
const cached = getCached(this.storageKey, 'cost-tracker', {});
|
|
27
|
-
|
|
30
|
+
if (cached && typeof cached === 'object' && cached.history) {
|
|
31
|
+
// Ensure all required properties exist
|
|
32
|
+
return {
|
|
33
|
+
history: cached.history || [],
|
|
34
|
+
totals: { total: 0, count: 0, ...cached.totals },
|
|
35
|
+
byProvider: cached.byProvider || {},
|
|
36
|
+
byDate: cached.byDate || {}
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
return defaultCosts;
|
|
28
40
|
} catch {
|
|
29
|
-
return
|
|
41
|
+
return defaultCosts;
|
|
30
42
|
}
|
|
31
43
|
}
|
|
32
44
|
|
|
@@ -196,6 +208,106 @@ export class CostTracker {
|
|
|
196
208
|
};
|
|
197
209
|
}
|
|
198
210
|
|
|
211
|
+
/**
|
|
212
|
+
* Set budget limit with alert thresholds
|
|
213
|
+
*
|
|
214
|
+
* @param {number} budgetLimit - Total budget limit (USD)
|
|
215
|
+
* @param {Object} [options={}] - Budget options
|
|
216
|
+
* @param {number} [options.warningThreshold=0.8] - Warn at this percentage (0-1)
|
|
217
|
+
* @param {Function} [options.onWarning] - Callback when warning threshold reached
|
|
218
|
+
* @param {Function} [options.onExceeded] - Callback when budget exceeded
|
|
219
|
+
*/
|
|
220
|
+
setBudgetLimit(budgetLimit, options = {}) {
|
|
221
|
+
const { warningThreshold = 0.8, onWarning = null, onExceeded = null } = options;
|
|
222
|
+
|
|
223
|
+
if (!this.costs.budgets) {
|
|
224
|
+
this.costs.budgets = [];
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
const budget = {
|
|
228
|
+
limit: budgetLimit,
|
|
229
|
+
warningThreshold,
|
|
230
|
+
onWarning,
|
|
231
|
+
onExceeded,
|
|
232
|
+
createdAt: Date.now()
|
|
233
|
+
};
|
|
234
|
+
|
|
235
|
+
this.costs.budgets.push(budget);
|
|
236
|
+
this.saveCosts();
|
|
237
|
+
|
|
238
|
+
// Check immediately
|
|
239
|
+
this.checkBudgets();
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Check all budget limits and trigger alerts
|
|
244
|
+
*
|
|
245
|
+
* @returns {Array} Array of budget status objects
|
|
246
|
+
*/
|
|
247
|
+
checkBudgets() {
|
|
248
|
+
if (!this.costs.budgets || this.costs.budgets.length === 0) {
|
|
249
|
+
return [];
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const stats = this.getStats();
|
|
253
|
+
const current = stats.total;
|
|
254
|
+
const statuses = [];
|
|
255
|
+
|
|
256
|
+
for (const budget of this.costs.budgets) {
|
|
257
|
+
const percentage = current / budget.limit;
|
|
258
|
+
const status = {
|
|
259
|
+
limit: budget.limit,
|
|
260
|
+
current,
|
|
261
|
+
percentage,
|
|
262
|
+
remaining: Math.max(0, budget.limit - current),
|
|
263
|
+
warningThreshold: budget.warningThreshold,
|
|
264
|
+
status: percentage >= 1 ? 'exceeded' : (percentage >= budget.warningThreshold ? 'warning' : 'ok')
|
|
265
|
+
};
|
|
266
|
+
|
|
267
|
+
statuses.push(status);
|
|
268
|
+
|
|
269
|
+
// Trigger callbacks
|
|
270
|
+
if (percentage >= 1 && budget.onExceeded) {
|
|
271
|
+
try {
|
|
272
|
+
budget.onExceeded(status);
|
|
273
|
+
} catch (err) {
|
|
274
|
+
// Don't fail if callback errors
|
|
275
|
+
}
|
|
276
|
+
} else if (percentage >= budget.warningThreshold && budget.onWarning) {
|
|
277
|
+
try {
|
|
278
|
+
budget.onWarning(status);
|
|
279
|
+
} catch (err) {
|
|
280
|
+
// Don't fail if callback errors
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return statuses;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Get budget status
|
|
290
|
+
*
|
|
291
|
+
* @returns {Object} Budget status summary
|
|
292
|
+
*/
|
|
293
|
+
getBudgetStatus() {
|
|
294
|
+
const statuses = this.checkBudgets();
|
|
295
|
+
if (statuses.length === 0) {
|
|
296
|
+
return { hasBudgets: false };
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const exceeded = statuses.filter(s => s.status === 'exceeded');
|
|
300
|
+
const warnings = statuses.filter(s => s.status === 'warning');
|
|
301
|
+
|
|
302
|
+
return {
|
|
303
|
+
hasBudgets: true,
|
|
304
|
+
totalBudgets: statuses.length,
|
|
305
|
+
exceeded: exceeded.length,
|
|
306
|
+
warnings: warnings.length,
|
|
307
|
+
statuses
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
|
|
199
311
|
/**
|
|
200
312
|
* Reset cost tracking
|
|
201
313
|
*/
|
|
@@ -255,3 +367,23 @@ export function getCostStats() {
|
|
|
255
367
|
return getCostTracker().getStats();
|
|
256
368
|
}
|
|
257
369
|
|
|
370
|
+
/**
|
|
371
|
+
* Set budget limit (convenience function)
|
|
372
|
+
*
|
|
373
|
+
* @param {number} budgetLimit - Budget limit in USD
|
|
374
|
+
* @param {Object} [options={}] - Budget options
|
|
375
|
+
*/
|
|
376
|
+
export function setBudgetLimit(budgetLimit, options = {}) {
|
|
377
|
+
const tracker = getCostTracker();
|
|
378
|
+
tracker.setBudgetLimit(budgetLimit, options);
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Get budget status (convenience function)
|
|
383
|
+
*
|
|
384
|
+
* @returns {Object} Budget status
|
|
385
|
+
*/
|
|
386
|
+
export function getBudgetStatus() {
|
|
387
|
+
return getCostTracker().getBudgetStatus();
|
|
388
|
+
}
|
|
389
|
+
|
package/src/data-extractor.mjs
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
import { createConfig } from './config.mjs';
|
|
13
13
|
import { loadEnv } from './load-env.mjs';
|
|
14
14
|
import { warn } from './logger.mjs';
|
|
15
|
+
import { ValidationError } from './errors.mjs';
|
|
15
16
|
// Load env before LLM utils
|
|
16
17
|
loadEnv();
|
|
17
18
|
// Use shared LLM utility library for text-only calls (optional dependency)
|
|
@@ -111,7 +112,16 @@ Return ONLY the JSON object, no other text.`;
|
|
|
111
112
|
if (jsonMatch) {
|
|
112
113
|
parsed = JSON.parse(jsonMatch[0]);
|
|
113
114
|
} else {
|
|
114
|
-
throw new
|
|
115
|
+
throw new ValidationError(
|
|
116
|
+
'Could not extract JSON from response. The LLM response did not contain valid JSON. ' +
|
|
117
|
+
'This may indicate the model failed to follow the schema format. ' +
|
|
118
|
+
'Try: 1) Simplifying the schema, 2) Using a more capable model tier, or 3) Adding examples to the prompt.',
|
|
119
|
+
{
|
|
120
|
+
responseLength: response?.length || 0,
|
|
121
|
+
responsePreview: response?.substring(0, 200) || 'No response',
|
|
122
|
+
schema: schema
|
|
123
|
+
}
|
|
124
|
+
);
|
|
115
125
|
}
|
|
116
126
|
}
|
|
117
127
|
if (parsed && validateSchema(parsed, schema)) {
|
|
@@ -126,25 +136,44 @@ Return ONLY the JSON object, no other text.`;
|
|
|
126
136
|
|
|
127
137
|
/**
|
|
128
138
|
* Call LLM API (text-only, no vision)
|
|
139
|
+
* Uses cached wrapper for better performance and cost reduction
|
|
129
140
|
* Uses shared utility with advanced tier for better extraction quality
|
|
130
141
|
*/
|
|
131
142
|
async function callLLMForExtraction(prompt, config) {
|
|
132
143
|
const apiKey = config.apiKey;
|
|
133
144
|
const provider = config.provider || 'gemini';
|
|
134
145
|
|
|
135
|
-
//
|
|
146
|
+
// Use cached LLM wrapper (reduces costs and improves performance)
|
|
136
147
|
try {
|
|
137
|
-
const
|
|
138
|
-
const callLLMUtil = llmUtils.callLLM;
|
|
148
|
+
const { callLLMCached } = await import('./utils/cached-llm.mjs');
|
|
139
149
|
// Use advanced tier for data extraction (needs higher quality)
|
|
140
|
-
return await
|
|
150
|
+
return await callLLMCached(prompt, provider, apiKey, {
|
|
141
151
|
tier: 'advanced', // Data extraction benefits from better models
|
|
142
152
|
temperature: 0.1,
|
|
143
153
|
maxTokens: 1000,
|
|
154
|
+
useCache: true, // Enable caching by default
|
|
144
155
|
});
|
|
145
156
|
} catch (error) {
|
|
146
|
-
// Fallback:
|
|
147
|
-
|
|
157
|
+
// Fallback: try uncached version if cached wrapper fails
|
|
158
|
+
try {
|
|
159
|
+
const llmUtils = await import('@arclabs561/llm-utils');
|
|
160
|
+
return await llmUtils.callLLM(prompt, provider, apiKey, {
|
|
161
|
+
tier: 'advanced',
|
|
162
|
+
temperature: 0.1,
|
|
163
|
+
maxTokens: 1000,
|
|
164
|
+
});
|
|
165
|
+
} catch (fallbackError) {
|
|
166
|
+
throw new ValidationError(
|
|
167
|
+
`LLM extraction requires @arclabs561/llm-utils package. ` +
|
|
168
|
+
`Install it with: npm install @arclabs561/llm-utils. ` +
|
|
169
|
+
`Error: ${fallbackError.message}`,
|
|
170
|
+
{
|
|
171
|
+
package: '@arclabs561/llm-utils',
|
|
172
|
+
installationCommand: 'npm install @arclabs561/llm-utils',
|
|
173
|
+
originalError: fallbackError.message
|
|
174
|
+
}
|
|
175
|
+
);
|
|
176
|
+
}
|
|
148
177
|
}
|
|
149
178
|
}
|
|
150
179
|
|
package/src/dynamic-few-shot.mjs
CHANGED
|
@@ -8,9 +8,8 @@
|
|
|
8
8
|
* - ES-KNN: arXiv:2506.05614 (Exemplar Selection KNN using semantic similarity)
|
|
9
9
|
* - KATE: arXiv:2101.06804 (Foundational work on kNN-augmented in-context examples)
|
|
10
10
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
* embedding-based cosine similarity would be required.
|
|
11
|
+
* This implementation supports both keyword-based similarity (Jaccard) and
|
|
12
|
+
* embedding-based semantic similarity. Embeddings are preferred when available.
|
|
14
13
|
*
|
|
15
14
|
* This module provides dynamic few-shot example selection based on similarity
|
|
16
15
|
* to the evaluation prompt.
|
|
@@ -19,20 +18,25 @@
|
|
|
19
18
|
/**
|
|
20
19
|
* Select few-shot examples based on semantic similarity to prompt
|
|
21
20
|
*
|
|
21
|
+
* Research: ES-KNN shows embedding-based selection improves performance by 10-20%
|
|
22
|
+
* over keyword-based selection. This implementation supports both methods.
|
|
23
|
+
*
|
|
22
24
|
* @param {string} prompt - Evaluation prompt
|
|
23
25
|
* @param {Array<import('./index.mjs').FewShotExample>} examples - Available examples
|
|
24
26
|
* @param {{
|
|
25
27
|
* maxExamples?: number;
|
|
26
28
|
* similarityThreshold?: number;
|
|
27
29
|
* useSemanticMatching?: boolean;
|
|
30
|
+
* task?: string;
|
|
28
31
|
* }} [options={}] - Selection options
|
|
29
|
-
* @returns {Array<import('./index.mjs').FewShotExample
|
|
32
|
+
* @returns {Promise<Array<import('./index.mjs').FewShotExample>>} Selected examples
|
|
30
33
|
*/
|
|
31
|
-
export function selectFewShotExamples(prompt, examples = [], options = {}) {
|
|
34
|
+
export async function selectFewShotExamples(prompt, examples = [], options = {}) {
|
|
32
35
|
const {
|
|
33
36
|
maxExamples = 3,
|
|
34
37
|
similarityThreshold = 0.3,
|
|
35
|
-
useSemanticMatching = true
|
|
38
|
+
useSemanticMatching = true,
|
|
39
|
+
task = 'general'
|
|
36
40
|
} = options;
|
|
37
41
|
|
|
38
42
|
// Validate inputs
|
|
@@ -50,14 +54,68 @@ export function selectFewShotExamples(prompt, examples = [], options = {}) {
|
|
|
50
54
|
return examples.slice(0, maxExamples);
|
|
51
55
|
}
|
|
52
56
|
|
|
53
|
-
//
|
|
54
|
-
|
|
57
|
+
// UX OPTIMIZATION: Auto-disable embeddings for large example arrays (>100) unless explicitly requested
|
|
58
|
+
// - Why: Embeddings add ~15ms per example, so 1000 examples = ~15s latency
|
|
59
|
+
// - User experience: Most users have 10-50 examples, so embeddings are fast and valuable
|
|
60
|
+
// - Edge case: Large datasets (1000+ examples) should use keyword matching for speed
|
|
61
|
+
// - Exception: If useEmbeddings is explicitly set to true, respect user preference
|
|
62
|
+
const exampleCount = examples.length;
|
|
63
|
+
const shouldUseEmbeddingsForLargeArrays = options.useEmbeddings === true;
|
|
64
|
+
const autoDisableForLargeArrays = exampleCount > 100 && !shouldUseEmbeddingsForLargeArrays;
|
|
65
|
+
|
|
66
|
+
// Try embeddings first (more accurate) - but skip for large arrays unless explicitly requested
|
|
67
|
+
if (!autoDisableForLargeArrays) {
|
|
68
|
+
try {
|
|
69
|
+
const { instructionSemanticSimilarity, isInstructionEmbeddingsAvailable } = await import('../evaluation/utils/instruction-embeddings.mjs');
|
|
70
|
+
const { semanticSimilarity, isEmbeddingsAvailable } = await import('../evaluation/utils/semantic-matcher.mjs');
|
|
71
|
+
|
|
72
|
+
const useInstructionEmbeddings = await isInstructionEmbeddingsAvailable();
|
|
73
|
+
const useGeneralEmbeddings = !useInstructionEmbeddings && await isEmbeddingsAvailable();
|
|
74
|
+
|
|
75
|
+
if (useInstructionEmbeddings || useGeneralEmbeddings) {
|
|
76
|
+
// Use embeddings for similarity calculation
|
|
77
|
+
const similarityFn = useInstructionEmbeddings
|
|
78
|
+
? (text1, text2) => instructionSemanticSimilarity(text1, text2, task)
|
|
79
|
+
: (text1, text2) => semanticSimilarity(text1, text2);
|
|
80
|
+
|
|
81
|
+
// Score each example using embeddings
|
|
82
|
+
const scored = await Promise.all(
|
|
83
|
+
examples.map(async (example) => {
|
|
84
|
+
const exampleText = (example.description || '') + ' ' + (example.evaluation || '');
|
|
85
|
+
const similarity = await similarityFn(prompt, exampleText);
|
|
86
|
+
|
|
87
|
+
return {
|
|
88
|
+
example,
|
|
89
|
+
similarity: similarity !== null ? similarity : 0
|
|
90
|
+
};
|
|
91
|
+
})
|
|
92
|
+
);
|
|
93
|
+
|
|
94
|
+
// Sort by similarity and take top N
|
|
95
|
+
return scored
|
|
96
|
+
.filter(s => s.similarity >= similarityThreshold)
|
|
97
|
+
.sort((a, b) => b.similarity - a.similarity)
|
|
98
|
+
.slice(0, maxExamples)
|
|
99
|
+
.map(s => s.example);
|
|
100
|
+
}
|
|
101
|
+
} catch (error) {
|
|
102
|
+
// Fall through to keyword matching if embeddings unavailable
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Fallback: Keyword-based similarity (Jaccard)
|
|
107
|
+
// For very long prompts, limit keyword extraction to avoid performance issues
|
|
108
|
+
const maxPromptLength = 10000; // Limit prompt processing to 10KB for performance
|
|
109
|
+
const processedPrompt = prompt.length > maxPromptLength
|
|
110
|
+
? prompt.substring(0, maxPromptLength)
|
|
111
|
+
: prompt;
|
|
112
|
+
|
|
113
|
+
const promptKeywords = extractKeywords(processedPrompt.toLowerCase());
|
|
55
114
|
|
|
56
115
|
// Score each example by keyword overlap
|
|
57
116
|
const scored = examples.map(example => {
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
);
|
|
117
|
+
const exampleText = (example.description || '') + ' ' + (example.evaluation || '');
|
|
118
|
+
const exampleKeywords = extractKeywords(exampleText.toLowerCase());
|
|
61
119
|
|
|
62
120
|
// Jaccard similarity (intersection over union)
|
|
63
121
|
const intersection = new Set(
|
package/src/errors.mjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Custom Error Classes for ai-visual-test
|
|
3
3
|
*
|
|
4
4
|
* Provides standardized error handling across the package.
|
|
5
|
-
* Based on Playwright's error handling patterns and industry
|
|
5
|
+
* Based on Playwright's error handling patterns and industry practices.
|
|
6
6
|
*
|
|
7
7
|
* All errors extend AIBrowserTestError for consistent error handling and serialization.
|
|
8
8
|
*/
|
|
@@ -42,7 +42,11 @@ export class AIBrowserTestError extends Error {
|
|
|
42
42
|
code: this.code,
|
|
43
43
|
message: this.message,
|
|
44
44
|
details: this.details,
|
|
45
|
-
|
|
45
|
+
// SECURITY: Stack traces may contain sensitive information
|
|
46
|
+
// Only include in development mode or when explicitly requested
|
|
47
|
+
...(process.env.NODE_ENV === 'development' || process.env.INCLUDE_STACK_TRACES === 'true'
|
|
48
|
+
? { stack: this.stack }
|
|
49
|
+
: {})
|
|
46
50
|
};
|
|
47
51
|
}
|
|
48
52
|
}
|
|
@@ -110,10 +110,22 @@ let globalTracker = null;
|
|
|
110
110
|
|
|
111
111
|
/**
|
|
112
112
|
* Get or create global propagation tracker
|
|
113
|
+
*
|
|
114
|
+
* @param {Object} [options={}] - Options for tracker (only used on first call)
|
|
115
|
+
* @returns {ExperiencePropagationTracker} Global tracker instance
|
|
113
116
|
*/
|
|
114
117
|
export function getPropagationTracker(options = {}) {
|
|
115
118
|
if (!globalTracker) {
|
|
116
119
|
globalTracker = new ExperiencePropagationTracker(options);
|
|
120
|
+
} else if (Object.keys(options).length > 0) {
|
|
121
|
+
// If tracker exists but options provided, update it
|
|
122
|
+
// This allows reconfiguration (though typically tracker is created once)
|
|
123
|
+
if (options.enabled !== undefined) {
|
|
124
|
+
globalTracker.enabled = options.enabled;
|
|
125
|
+
}
|
|
126
|
+
if (options.logLevel !== undefined) {
|
|
127
|
+
globalTracker.logLevel = options.logLevel;
|
|
128
|
+
}
|
|
117
129
|
}
|
|
118
130
|
return globalTracker;
|
|
119
131
|
}
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
13
|
import { warn } from './logger.mjs';
|
|
14
|
+
import { ValidationError } from './errors.mjs';
|
|
14
15
|
|
|
15
16
|
/**
|
|
16
17
|
* Experience Trace
|
|
@@ -133,7 +134,7 @@ export class ExperienceTrace {
|
|
|
133
134
|
* @param {Record<string, unknown>} [options={}] - Aggregation options
|
|
134
135
|
* @returns {import('./index.mjs').AggregatedTemporalNotes} Aggregated notes
|
|
135
136
|
*/
|
|
136
|
-
aggregateNotes(aggregateTemporalNotes, options = {}) {
|
|
137
|
+
async aggregateNotes(aggregateTemporalNotes, options = {}) {
|
|
137
138
|
// Extract notes from events and validations
|
|
138
139
|
const eventNotes = this.events
|
|
139
140
|
.filter(e => e.type === 'interaction' || e.type === 'observation')
|
|
@@ -157,7 +158,7 @@ export class ExperienceTrace {
|
|
|
157
158
|
|
|
158
159
|
const notes = [...eventNotes, ...validationNotes].sort((a, b) => a.timestamp - b.timestamp);
|
|
159
160
|
|
|
160
|
-
this.aggregatedNotes = aggregateTemporalNotes(notes, options);
|
|
161
|
+
this.aggregatedNotes = await aggregateTemporalNotes(notes, options);
|
|
161
162
|
return this.aggregatedNotes;
|
|
162
163
|
}
|
|
163
164
|
|
|
@@ -296,7 +297,15 @@ export class ExperienceTracerManager {
|
|
|
296
297
|
async metaEvaluateTrace(sessionId, validateScreenshot) {
|
|
297
298
|
const trace = this.getTrace(sessionId);
|
|
298
299
|
if (!trace) {
|
|
299
|
-
throw new
|
|
300
|
+
throw new ValidationError(
|
|
301
|
+
`Trace not found for session: ${sessionId}. ` +
|
|
302
|
+
`Use startTrace() to create a new trace, or listTraces() to see all available traces.`,
|
|
303
|
+
{
|
|
304
|
+
sessionId,
|
|
305
|
+
availableSessions: Object.keys(this.traces),
|
|
306
|
+
function: 'metaEvaluateTrace'
|
|
307
|
+
}
|
|
308
|
+
);
|
|
300
309
|
}
|
|
301
310
|
|
|
302
311
|
const evaluation = {
|