ai-evaluate 2.1.6 → 2.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/evaluate.ts CHANGED
@@ -6,11 +6,103 @@
6
6
  *
7
7
  * Requires:
8
8
  * - LOADER binding (worker_loaders)
9
- * - TEST binding (ai-tests service)
9
+ * - TEST binding (ai-tests service) - optional, only needed for test assertions
10
10
  */
11
11
 
12
12
  import type { EvaluateOptions, EvaluateResult, WorkerLoader, SandboxEnv } from './types.js'
13
13
  import { generateWorkerCode } from './worker-template.js'
14
+ import { CAPNWEB_SOURCE } from './capnweb-bundle.js'
15
+
16
+ /**
17
+ * Compatibility date for dynamic workers (2026)
18
+ */
19
+ const COMPATIBILITY_DATE = '2026-01-01'
20
+
21
+ /**
22
+ * Generate a minimal worker for simple script execution
23
+ * This doesn't require capnweb or TEST binding
24
+ */
25
+ function generateSimpleWorkerCode(options: {
26
+ module?: string
27
+ script?: string
28
+ imports?: string[]
29
+ }): string {
30
+ const { module = '', script = '', imports = [] } = options
31
+
32
+ // Build import statements for pre-fetched external modules
33
+ // Modules are fetched by the host worker and included in the worker definition
34
+ const importStatements = imports
35
+ .map((url, i) => `import * as __import${i}__ from './__external_${i}__.js';`)
36
+ .join('\n')
37
+
38
+ // Make imports available as globals
39
+ const importGlobals = imports
40
+ .map((url, i) => {
41
+ // Extract package name from URL for variable naming
42
+ const match = url.match(/esm\.sh\/([^@/]+)/)
43
+ const pkgName = match ? match[1].replace(/-/g, '_') : `pkg${i}`
44
+ const varName = pkgName === 'lodash' ? '_' : pkgName
45
+ return `globalThis.${varName} = __import${i}__.default || __import${i}__;
46
+ globalThis.pkg = __import${i}__.default || __import${i}__;`
47
+ })
48
+ .join('\n')
49
+
50
+ // Wrap script to capture return value (code is embedded at build time, no eval)
51
+ const wrappedScript = script
52
+ ? `const __executeScript__ = async () => { ${script} }; const __result__ = await __executeScript__();`
53
+ : 'const __result__ = undefined;'
54
+
55
+ return `
56
+ // Simple Sandbox Worker
57
+ ${importStatements}
58
+
59
+ const logs = [];
60
+
61
+ // Capture console output
62
+ const originalConsole = { ...console };
63
+ const captureConsole = (level) => (...args) => {
64
+ logs.push({
65
+ level,
66
+ message: args.map(a => typeof a === 'object' ? JSON.stringify(a) : String(a)).join(' '),
67
+ timestamp: Date.now()
68
+ });
69
+ originalConsole[level](...args);
70
+ };
71
+ console.log = captureConsole('log');
72
+ console.warn = captureConsole('warn');
73
+ console.error = captureConsole('error');
74
+ console.info = captureConsole('info');
75
+
76
+ // Make imports available globally
77
+ ${importGlobals}
78
+
79
+ // User module code (if any)
80
+ ${module}
81
+
82
+ export default {
83
+ async fetch(request, env) {
84
+ try {
85
+ // Execute the script (embedded at generation time - no new Function())
86
+ ${wrappedScript}
87
+
88
+ return Response.json({
89
+ success: true,
90
+ value: __result__,
91
+ logs,
92
+ duration: 0
93
+ });
94
+ } catch (error) {
95
+ return Response.json({
96
+ success: false,
97
+ error: error.message || String(error),
98
+ logs,
99
+ duration: 0
100
+ });
101
+ }
102
+ }
103
+ };
104
+ `
105
+ }
14
106
 
15
107
  /**
16
108
  * Evaluate code in a sandboxed worker
@@ -40,11 +132,19 @@ export async function evaluate(
40
132
  success: false,
41
133
  logs: [],
42
134
  error:
43
- 'Sandbox requires worker_loaders binding. Add to wrangler.jsonc: "worker_loaders": [{ "binding": "loader" }]. For Node.js, use: import { evaluate } from "ai-evaluate/node"',
135
+ 'Sandbox requires worker_loaders binding. Add to wrangler.toml: [[worker_loaders]] binding = "LOADER". For Node.js, use: import { evaluate } from "ai-evaluate/node"',
44
136
  duration: Date.now() - start,
45
137
  }
46
138
  }
47
139
 
140
+ // Use simple worker for basic script execution (no tests, no SDK)
141
+ const useSimpleWorker = !options.tests && !options.sdk
142
+
143
+ if (useSimpleWorker) {
144
+ return await evaluateSimple(options, loader, start)
145
+ }
146
+
147
+ // Use full worker template for tests and SDK features
48
148
  return await evaluateWithWorkerLoader(options, loader, env?.TEST, start)
49
149
  } catch (error) {
50
150
  return {
@@ -57,7 +157,110 @@ export async function evaluate(
57
157
  }
58
158
 
59
159
  /**
60
- * Evaluate using Cloudflare worker_loaders binding
160
+ * Pre-fetch external modules from URLs (e.g., esm.sh)
161
+ * Returns a map of module name to source code
162
+ *
163
+ * Handles esm.sh's redirect-style modules by following the internal import paths.
164
+ */
165
+ async function prefetchModules(imports: string[]): Promise<Record<string, string>> {
166
+ const modules: Record<string, string> = {}
167
+
168
+ await Promise.all(
169
+ imports.map(async (url, i) => {
170
+ try {
171
+ // For esm.sh URLs, try to get the bundled version directly
172
+ let fetchUrl = url
173
+ if (url.includes('esm.sh/') && !url.includes('.mjs') && !url.includes('.js')) {
174
+ // Parse the esm.sh URL to construct the bundle path
175
+ // e.g., https://esm.sh/lodash@4.17.21 -> https://esm.sh/lodash@4.17.21/es2022/lodash.bundle.mjs
176
+ const urlObj = new URL(url)
177
+ const pathParts = urlObj.pathname.slice(1).split('/')
178
+ const pkgSpec = pathParts[0] // e.g., "lodash@4.17.21"
179
+ const pkgName = pkgSpec.split('@')[0]
180
+ fetchUrl = `${urlObj.origin}/${pkgSpec}/es2022/${pkgName}.bundle.mjs`
181
+ }
182
+
183
+ const response = await fetch(fetchUrl, { redirect: 'follow' })
184
+ if (!response.ok) {
185
+ // Fallback to original URL if bundle URL fails
186
+ const fallbackResponse = await fetch(url, { redirect: 'follow' })
187
+ if (!fallbackResponse.ok) {
188
+ throw new Error(`Failed to fetch ${url}: ${fallbackResponse.status}`)
189
+ }
190
+ const source = await fallbackResponse.text()
191
+ modules[`__external_${i}__.js`] = source
192
+ return
193
+ }
194
+ const source = await response.text()
195
+ // Use a simple module name that can be imported
196
+ const moduleName = `__external_${i}__.js`
197
+ modules[moduleName] = source
198
+ } catch (error) {
199
+ throw new Error(
200
+ `Failed to fetch import ${url}: ${error instanceof Error ? error.message : String(error)}`
201
+ )
202
+ }
203
+ })
204
+ )
205
+
206
+ return modules
207
+ }
208
+
209
+ /**
210
+ * Simple evaluation without capnweb/TEST dependencies
211
+ */
212
+ async function evaluateSimple(
213
+ options: EvaluateOptions,
214
+ loader: WorkerLoader,
215
+ start: number
216
+ ): Promise<EvaluateResult> {
217
+ // Pre-fetch any external modules
218
+ let externalModules: Record<string, string> = {}
219
+ if (options.imports && options.imports.length > 0) {
220
+ try {
221
+ externalModules = await prefetchModules(options.imports)
222
+ } catch (error) {
223
+ return {
224
+ success: false,
225
+ logs: [],
226
+ error: error instanceof Error ? error.message : String(error),
227
+ duration: Date.now() - start,
228
+ }
229
+ }
230
+ }
231
+
232
+ const workerCode = generateSimpleWorkerCode({
233
+ module: options.module,
234
+ script: options.script,
235
+ imports: options.imports,
236
+ })
237
+
238
+ const id = `sandbox-${Date.now()}-${Math.random().toString(36).slice(2)}`
239
+
240
+ const worker = loader.get(id, async () => ({
241
+ mainModule: 'worker.js',
242
+ modules: {
243
+ 'worker.js': workerCode,
244
+ ...externalModules,
245
+ },
246
+ compatibilityDate: COMPATIBILITY_DATE,
247
+ // Block network access only if fetch: null
248
+ globalOutbound: options.fetch === null ? null : undefined,
249
+ }))
250
+
251
+ // Get the entrypoint and call fetch
252
+ const entrypoint = worker.getEntrypoint()
253
+ const response = await entrypoint.fetch(new Request('http://sandbox/execute'))
254
+ const result = (await response.json()) as EvaluateResult
255
+
256
+ return {
257
+ ...result,
258
+ duration: Date.now() - start,
259
+ }
260
+ }
261
+
262
+ /**
263
+ * Evaluate using full worker template with capnweb and TEST binding
61
264
  */
62
265
  async function evaluateWithWorkerLoader(
63
266
  options: EvaluateOptions,
@@ -78,8 +281,10 @@ async function evaluateWithWorkerLoader(
78
281
  mainModule: 'worker.js',
79
282
  modules: {
80
283
  'worker.js': workerCode,
284
+ // Include capnweb as a module so the worker can import it
285
+ 'capnweb.js': CAPNWEB_SOURCE,
81
286
  },
82
- compatibilityDate: '2026-01-01',
287
+ compatibilityDate: COMPATIBILITY_DATE,
83
288
  // Block network access only if fetch: null
84
289
  globalOutbound: options.fetch === null ? null : undefined,
85
290
  bindings: {
@@ -2885,17 +2885,11 @@ function transformModuleCode(moduleCode: string): string {
2885
2885
  let code = moduleCode
2886
2886
 
2887
2887
  // Transform: export const foo = ... → const foo = ...; exports.foo = foo;
2888
- code = code.replace(
2889
- /export\s+(const|let|var)\s+(\w+)\s*=/g,
2890
- '$1 $2 = exports.$2 ='
2891
- )
2888
+ code = code.replace(/export\s+(const|let|var)\s+(\w+)\s*=/g, '$1 $2 = exports.$2 =')
2892
2889
 
2893
2890
  // Transform: export function foo(...) → function foo(...) exports.foo = foo;
2894
2891
  // Also handles async generators: export async function* foo
2895
- code = code.replace(
2896
- /export\s+(async\s+)?function(\*?)\s+(\w+)/g,
2897
- '$1function$2 $3'
2898
- )
2892
+ code = code.replace(/export\s+(async\s+)?function(\*?)\s+(\w+)/g, '$1function$2 $3')
2899
2893
  // Add exports for functions after their definition
2900
2894
  const funcNames = [...moduleCode.matchAll(/export\s+(?:async\s+)?function\*?\s+(\w+)/g)]
2901
2895
  for (const [, name] of funcNames) {
@@ -2931,7 +2925,10 @@ function wrapScriptForReturn(script: string): string {
2931
2925
  const isSingleLine = !withoutTrailingSemi.includes('\n')
2932
2926
 
2933
2927
  // Check if it looks like a single expression (no control flow, no declarations)
2934
- const startsWithKeyword = /^\s*(const|let|var|if|for|while|switch|try|class|function|async\s+function)\b/.test(withoutTrailingSemi)
2928
+ const startsWithKeyword =
2929
+ /^\s*(const|let|var|if|for|while|switch|try|class|function|async\s+function)\b/.test(
2930
+ withoutTrailingSemi
2931
+ )
2935
2932
 
2936
2933
  if (isSingleLine && !startsWithKeyword) {
2937
2934
  return `return ${withoutTrailingSemi}`
@@ -2944,7 +2941,10 @@ function wrapScriptForReturn(script: string): string {
2944
2941
  const lastLine = lastLineRaw.trim()
2945
2942
 
2946
2943
  // If last line is an expression (not a declaration, control flow, or throw)
2947
- if (lastLine && !/^\s*(const|let|var|if|for|while|switch|try|class|function|return|throw)\b/.test(lastLine)) {
2944
+ if (
2945
+ lastLine &&
2946
+ !/^\s*(const|let|var|if|for|while|switch|try|class|function|return|throw)\b/.test(lastLine)
2947
+ ) {
2948
2948
  lines[lines.length - 1] = `return ${lastLine.replace(/;?\s*$/, '')}`
2949
2949
  return lines.join('\n')
2950
2950
  }
@@ -2963,7 +2963,7 @@ export function generateWorkerCode(options: {
2963
2963
  imports?: string[] | undefined
2964
2964
  }): string {
2965
2965
  const { module: rawModule = '', tests = '', script: rawScript = '', sdk, imports = [] } = options
2966
- const sdkConfig = sdk === true ? {} : (sdk || null)
2966
+ const sdkConfig = sdk === true ? {} : sdk || null
2967
2967
  const module = rawModule ? transformModuleCode(rawModule) : ''
2968
2968
  const script = rawScript ? wrapScriptForReturn(rawScript) : ''
2969
2969
  const exportNames = getExportNames(rawModule)
@@ -2973,7 +2973,7 @@ export function generateWorkerCode(options: {
2973
2973
 
2974
2974
  return `
2975
2975
  // Sandbox Worker Entry Point
2976
- import { RpcTarget, newWorkersRpcResponse } from 'capnweb';
2976
+ import { RpcTarget, newWorkersRpcResponse } from 'capnweb.js';
2977
2977
  ${hoistedImports}
2978
2978
  const logs = [];
2979
2979
 
@@ -3003,20 +3003,28 @@ console.debug = captureConsole('debug');
3003
3003
  // Module exports object - exports become top-level variables
3004
3004
  const exports = {};
3005
3005
 
3006
- ${module ? `
3006
+ ${
3007
+ module
3008
+ ? `
3007
3009
  // Execute module code
3008
3010
  try {
3009
3011
  ${module}
3010
3012
  } catch (e) {
3011
3013
  console.error('Module error:', e.message);
3012
3014
  }
3013
- ` : '// No module code provided'}
3015
+ `
3016
+ : '// No module code provided'
3017
+ }
3014
3018
 
3015
3019
  // Expose all exports as top-level variables for tests and scripts
3016
3020
  // This allows: export const add = (a, b) => a + b; then later: add(1, 2)
3017
- ${rawModule ? `
3021
+ ${
3022
+ rawModule
3023
+ ? `
3018
3024
  const { ${exportNames} } = exports;
3019
- `.trim() : ''}
3025
+ `.trim()
3026
+ : ''
3027
+ }
3020
3028
 
3021
3029
  // ============================================================
3022
3030
  // RPC SERVER - Expose exports via capnweb
@@ -3156,17 +3164,23 @@ export default {
3156
3164
  // USER TEST CODE (embedded at generation time)
3157
3165
  // ============================================================
3158
3166
 
3159
- ${tests ? `
3167
+ ${
3168
+ tests
3169
+ ? `
3160
3170
  // Register tests
3161
3171
  try {
3162
3172
  ${tests}
3163
3173
  } catch (e) {
3164
3174
  console.error('Test registration error:', e.message);
3165
3175
  }
3166
- ` : '// No test code provided'}
3176
+ `
3177
+ : '// No test code provided'
3178
+ }
3167
3179
 
3168
3180
  // Execute user script
3169
- ${script ? `
3181
+ ${
3182
+ script
3183
+ ? `
3170
3184
  try {
3171
3185
  scriptResult = await (async () => {
3172
3186
  ${script}
@@ -3175,17 +3189,23 @@ ${script}
3175
3189
  console.error('Script error:', e.message);
3176
3190
  scriptError = e.message;
3177
3191
  }
3178
- ` : '// No script code provided'}
3192
+ `
3193
+ : '// No script code provided'
3194
+ }
3179
3195
 
3180
3196
  // Run tests if any were registered
3181
- ${tests ? `
3197
+ ${
3198
+ tests
3199
+ ? `
3182
3200
  try {
3183
3201
  testResults = await testService.run();
3184
3202
  } catch (e) {
3185
3203
  console.error('Test run error:', e.message);
3186
3204
  testResults = { total: 0, passed: 0, failed: 1, skipped: 0, tests: [], duration: 0, error: e.message };
3187
3205
  }
3188
- ` : ''}
3206
+ `
3207
+ : ''
3208
+ }
3189
3209
 
3190
3210
  const hasTests = ${tests ? 'true' : 'false'};
3191
3211
  const success = scriptError === null && (!hasTests || (testResults && testResults.failed === 0));
@@ -3217,8 +3237,15 @@ export function generateDevWorkerCode(options: {
3217
3237
  imports?: string[] | undefined
3218
3238
  fetch?: null | undefined
3219
3239
  }): string {
3220
- const { module: rawModule = '', tests = '', script: rawScript = '', sdk, imports = [], fetch: fetchOption } = options
3221
- const sdkConfig = sdk === true ? {} : (sdk || null)
3240
+ const {
3241
+ module: rawModule = '',
3242
+ tests = '',
3243
+ script: rawScript = '',
3244
+ sdk,
3245
+ imports = [],
3246
+ fetch: fetchOption,
3247
+ } = options
3248
+ const sdkConfig = sdk === true ? {} : sdk || null
3222
3249
  const module = rawModule ? transformModuleCode(rawModule) : ''
3223
3250
  const script = rawScript ? wrapScriptForReturn(rawScript) : ''
3224
3251
  const exportNames = getExportNames(rawModule)
@@ -3234,13 +3261,17 @@ const logs = [];
3234
3261
  const testResults = { total: 0, passed: 0, failed: 0, skipped: 0, tests: [], duration: 0 };
3235
3262
  const pendingTests = [];
3236
3263
 
3237
- ${blockFetch ? `
3264
+ ${
3265
+ blockFetch
3266
+ ? `
3238
3267
  // Block fetch when fetch: null is specified
3239
3268
  const __originalFetch__ = globalThis.fetch;
3240
3269
  globalThis.fetch = async (...args) => {
3241
3270
  throw new Error('Network access blocked: fetch is disabled in this sandbox');
3242
3271
  };
3243
- ` : ''}
3272
+ `
3273
+ : ''
3274
+ }
3244
3275
 
3245
3276
  ${sdkConfig ? generateShouldCode() : ''}
3246
3277
 
@@ -3548,32 +3579,44 @@ const expect = (actual) => {
3548
3579
  // Module exports object - exports become top-level variables
3549
3580
  const exports = {};
3550
3581
 
3551
- ${module ? `
3582
+ ${
3583
+ module
3584
+ ? `
3552
3585
  // Execute module code
3553
3586
  try {
3554
3587
  ${module}
3555
3588
  } catch (e) {
3556
3589
  console.error('Module error:', e.message);
3557
3590
  }
3558
- ` : '// No module code provided'}
3591
+ `
3592
+ : '// No module code provided'
3593
+ }
3559
3594
 
3560
3595
  // Expose all exports as top-level variables for tests and scripts
3561
3596
  // This allows: export const add = (a, b) => a + b; then later: add(1, 2)
3562
- ${rawModule ? `
3597
+ ${
3598
+ rawModule
3599
+ ? `
3563
3600
  const { ${exportNames} } = exports;
3564
- `.trim() : ''}
3601
+ `.trim()
3602
+ : ''
3603
+ }
3565
3604
 
3566
3605
  // ============================================================
3567
3606
  // USER TEST CODE (embedded at generation time)
3568
3607
  // ============================================================
3569
- ${tests ? `
3608
+ ${
3609
+ tests
3610
+ ? `
3570
3611
  // Register tests
3571
3612
  try {
3572
3613
  ${tests}
3573
3614
  } catch (e) {
3574
3615
  console.error('Test registration error:', e.message);
3575
3616
  }
3576
- ` : '// No test code provided'}
3617
+ `
3618
+ : '// No test code provided'
3619
+ }
3577
3620
 
3578
3621
  // ============================================================
3579
3622
  // SIMPLE RPC HANDLER (dev mode - no capnweb dependency)
@@ -3674,7 +3717,9 @@ export default {
3674
3717
  let scriptError = null;
3675
3718
 
3676
3719
  // Execute user script
3677
- ${script ? `
3720
+ ${
3721
+ script
3722
+ ? `
3678
3723
  try {
3679
3724
  scriptResult = await (async () => {
3680
3725
  ${script}
@@ -3683,7 +3728,9 @@ ${script}
3683
3728
  console.error('Script error:', e.message);
3684
3729
  scriptError = e.message;
3685
3730
  }
3686
- ` : '// No script code provided'}
3731
+ `
3732
+ : '// No script code provided'
3733
+ }
3687
3734
 
3688
3735
  // Run all pending tests
3689
3736
  const testStart = Date.now();