ai-functions 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +35 -0
- package/dist/ai.d.ts +1 -1
- package/dist/ai.d.ts.map +1 -1
- package/dist/ai.js +1 -1
- package/dist/ai.js.map +1 -1
- package/dist/function-registry.d.ts +60 -0
- package/dist/function-registry.d.ts.map +1 -1
- package/dist/function-registry.js +162 -23
- package/dist/function-registry.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/sandbox.d.ts +36 -0
- package/dist/sandbox.d.ts.map +1 -0
- package/dist/sandbox.js +44 -0
- package/dist/sandbox.js.map +1 -0
- package/dist/types.d.ts +20 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +5 -4
- package/src/ai.ts +2 -0
- package/src/function-registry.ts +229 -26
- package/src/index.ts +3 -0
- package/src/sandbox.ts +52 -0
- package/src/types.ts +22 -2
- package/test/fill-template.test.ts +89 -0
- package/test/sandbox-execution.test.ts +155 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for fillTemplate in function-registry.ts
|
|
3
|
+
*
|
|
4
|
+
* fillTemplate replaces {{key}} placeholders in a template string with values
|
|
5
|
+
* from an args record. Non-primitive values (objects/arrays) must serialize
|
|
6
|
+
* via JSON.stringify, not String(), to avoid "[object Object]" corruption.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, it, expect } from 'vitest'
|
|
10
|
+
import { fillTemplate } from '../src/function-registry.js'
|
|
11
|
+
|
|
12
|
+
describe('fillTemplate', () => {
|
|
13
|
+
describe('primitive values', () => {
|
|
14
|
+
it('interpolates a string value', () => {
|
|
15
|
+
expect(fillTemplate('Hello {{name}}!', { name: 'world' })).toBe('Hello world!')
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
it('interpolates a number value', () => {
|
|
19
|
+
expect(fillTemplate('Count: {{n}}', { n: 42 })).toBe('Count: 42')
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
it('interpolates a boolean value', () => {
|
|
23
|
+
expect(fillTemplate('Active: {{flag}}', { flag: true })).toBe('Active: true')
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
it('interpolates zero without stripping it', () => {
|
|
27
|
+
expect(fillTemplate('Value: {{v}}', { v: 0 })).toBe('Value: 0')
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
it('leaves placeholder empty when key is missing', () => {
|
|
31
|
+
expect(fillTemplate('{{missing}} value', {})).toBe(' value')
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
it('replaces multiple distinct placeholders', () => {
|
|
35
|
+
expect(fillTemplate('{{a}} + {{b}} = {{c}}', { a: 1, b: 2, c: 3 })).toBe('1 + 2 = 3')
|
|
36
|
+
})
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
describe('object and array values — must NOT produce [object Object]', () => {
|
|
40
|
+
it('serializes a plain object via JSON.stringify', () => {
|
|
41
|
+
const result = fillTemplate('Data: {{obj}}', { obj: { foo: 'bar', n: 1 } })
|
|
42
|
+
expect(result).not.toContain('[object Object]')
|
|
43
|
+
expect(result).toBe('Data: {"foo":"bar","n":1}')
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
it('serializes an array via JSON.stringify', () => {
|
|
47
|
+
const result = fillTemplate('Items: {{list}}', { list: ['a', 'b', 'c'] })
|
|
48
|
+
expect(result).not.toContain('[object Object]')
|
|
49
|
+
expect(result).toBe('Items: ["a","b","c"]')
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
it('serializes a nested object', () => {
|
|
53
|
+
const payload = { subject: 'AI', stats: { count: 5, tags: ['fast', 'smart'] } }
|
|
54
|
+
const result = fillTemplate('Payload: {{payload}}', { payload })
|
|
55
|
+
expect(result).not.toContain('[object Object]')
|
|
56
|
+
const parsed = JSON.parse(result.replace('Payload: ', ''))
|
|
57
|
+
expect(parsed.subject).toBe('AI')
|
|
58
|
+
expect(parsed.stats.count).toBe(5)
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('serializes an array of objects (upstream cascade step output pattern)', () => {
|
|
62
|
+
const steps = [{ id: 1, label: 'Subject' }, { id: 2, label: 'Problem' }]
|
|
63
|
+
const result = fillTemplate('Steps: {{steps}}', { steps })
|
|
64
|
+
expect(result).not.toContain('[object Object]')
|
|
65
|
+
const parsed = JSON.parse(result.replace('Steps: ', ''))
|
|
66
|
+
expect(parsed).toHaveLength(2)
|
|
67
|
+
expect(parsed[0].label).toBe('Subject')
|
|
68
|
+
})
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
describe('edge cases', () => {
|
|
72
|
+
it('keeps null as empty string (null coalesces to empty via ?? fallback)', () => {
|
|
73
|
+
// null ?? '' → '' → String('') → ''
|
|
74
|
+
expect(fillTemplate('{{v}}', { v: null as unknown as string })).toBe('')
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
it('keeps undefined key as empty string', () => {
|
|
78
|
+
expect(fillTemplate('{{v}}', {})).toBe('')
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
it('handles template with no placeholders', () => {
|
|
82
|
+
expect(fillTemplate('no placeholders here', { x: 1 })).toBe('no placeholders here')
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
it('handles empty template', () => {
|
|
86
|
+
expect(fillTemplate('', { x: 1 })).toBe('')
|
|
87
|
+
})
|
|
88
|
+
})
|
|
89
|
+
})
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verification for routing ALL dynamic code execution through ai-evaluate's
|
|
3
|
+
* V8-isolate sandbox (Cloudflare Dynamic Workers, Miniflare fallback in Node).
|
|
4
|
+
*
|
|
5
|
+
* Two distinct paths are exercised:
|
|
6
|
+
*
|
|
7
|
+
* - Path A — `type: 'code'` is DETERMINISTIC. A `handler` is a direct call; an
|
|
8
|
+
* inline `code` body runs in the sandbox. NO model is ever consulted. We spy
|
|
9
|
+
* on the model entry points and assert zero calls, and assert identical
|
|
10
|
+
* output across repeated calls.
|
|
11
|
+
*
|
|
12
|
+
* - Path B — `generateAndRunCode` is the NON-deterministic generate → run →
|
|
13
|
+
* test → return capability. The model AUTHORS the code; we mock that author
|
|
14
|
+
* step, but the run + test + return plumbing executes against the REAL
|
|
15
|
+
* Miniflare sandbox (no live Worker, no model).
|
|
16
|
+
*
|
|
17
|
+
* What is mocked: ONLY the model-author step in Path B (`generateObject` from
|
|
18
|
+
* `./generate.js`). The sandbox itself (Miniflare) is real. Path A mocks
|
|
19
|
+
* nothing — it only spies to prove the model is never touched.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
|
|
23
|
+
|
|
24
|
+
// Spy targets for Path A: prove no model is consulted on the deterministic path.
|
|
25
|
+
import * as generateModule from '../src/generate.js'
|
|
26
|
+
|
|
27
|
+
describe('Path A — type:code is deterministic (no model, no network)', () => {
|
|
28
|
+
let generateObjectSpy: ReturnType<typeof vi.spyOn>
|
|
29
|
+
let generateTextSpy: ReturnType<typeof vi.spyOn>
|
|
30
|
+
|
|
31
|
+
beforeEach(() => {
|
|
32
|
+
generateObjectSpy = vi.spyOn(generateModule, 'generateObject')
|
|
33
|
+
generateTextSpy = vi.spyOn(generateModule, 'generateText')
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
afterEach(() => {
|
|
37
|
+
vi.restoreAllMocks()
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
it('handler functions are a direct call — deterministic, no model', async () => {
|
|
41
|
+
const { defineFunction } = await import('../src/function-registry.js')
|
|
42
|
+
const calculateTax = defineFunction<number, { amount: number; rate: number }>({
|
|
43
|
+
type: 'code',
|
|
44
|
+
name: 'calculateTax',
|
|
45
|
+
args: { amount: 'Amount (number)', rate: 'Rate (number)' },
|
|
46
|
+
handler: ({ amount, rate }) => amount * rate,
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
const a = await calculateTax.call({ amount: 100, rate: 0.2 })
|
|
50
|
+
const b = await calculateTax.call({ amount: 100, rate: 0.2 })
|
|
51
|
+
|
|
52
|
+
expect(a).toBe(20)
|
|
53
|
+
expect(b).toBe(20) // identical across repeated calls
|
|
54
|
+
expect(generateObjectSpy).not.toHaveBeenCalled()
|
|
55
|
+
expect(generateTextSpy).not.toHaveBeenCalled()
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
it('inline code bodies run in the sandbox — deterministic, no model', async () => {
|
|
59
|
+
const { defineFunction } = await import('../src/function-registry.js')
|
|
60
|
+
const sum = defineFunction<number, { items: number[] }>({
|
|
61
|
+
type: 'code',
|
|
62
|
+
name: 'sum',
|
|
63
|
+
args: { items: ['Numbers'] },
|
|
64
|
+
language: 'typescript',
|
|
65
|
+
code: 'return args.items.reduce((a, b) => a + b, 0)',
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
const a = await sum.call({ items: [1, 2, 3, 4] })
|
|
69
|
+
const b = await sum.call({ items: [1, 2, 3, 4] })
|
|
70
|
+
|
|
71
|
+
expect(a).toBe(10)
|
|
72
|
+
expect(b).toBe(10) // identical across repeated calls — fully deterministic
|
|
73
|
+
expect(generateObjectSpy).not.toHaveBeenCalled()
|
|
74
|
+
expect(generateTextSpy).not.toHaveBeenCalled()
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
it('an inline code body that throws surfaces the sandbox error', async () => {
|
|
78
|
+
const { defineFunction } = await import('../src/function-registry.js')
|
|
79
|
+
// Use an explicit statement body (contains `return`) so the runtime throw
|
|
80
|
+
// is reached rather than being mis-wrapped as a `return (expr)`.
|
|
81
|
+
const boom = defineFunction<number, Record<string, never>>({
|
|
82
|
+
type: 'code',
|
|
83
|
+
name: 'boom',
|
|
84
|
+
args: {},
|
|
85
|
+
language: 'typescript',
|
|
86
|
+
code: "if (true) { throw new Error('kaboom') }\nreturn 0",
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
await expect(boom.call({})).rejects.toThrow(/kaboom/)
|
|
90
|
+
expect(generateObjectSpy).not.toHaveBeenCalled()
|
|
91
|
+
})
|
|
92
|
+
}, 60000)
|
|
93
|
+
|
|
94
|
+
// Path B mocks ONLY the model-author step; the run+test runs in real Miniflare.
|
|
95
|
+
vi.mock('../src/generate.js', async (importOriginal) => {
|
|
96
|
+
const actual = await importOriginal<typeof import('../src/generate.js')>()
|
|
97
|
+
return { ...actual }
|
|
98
|
+
})
|
|
99
|
+
|
|
100
|
+
describe('Path B — generateAndRunCode: generate → run → test → return', () => {
|
|
101
|
+
beforeEach(() => {
|
|
102
|
+
vi.restoreAllMocks()
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
it('runs MODEL-AUTHORED code in the real sandbox and returns the computed result', async () => {
|
|
106
|
+
const gen = await import('../src/generate.js')
|
|
107
|
+
|
|
108
|
+
// Mock ONLY the model-author step. Everything downstream (run, test,
|
|
109
|
+
// return) executes against the REAL Miniflare sandbox.
|
|
110
|
+
const authoredModule = `export function calculateTax(args) {\n return args.amount * args.rate;\n}`
|
|
111
|
+
const authoredTests = `describe('calculateTax', () => {\n it('multiplies amount by rate', () => {\n expect(calculateTax({ amount: 100, rate: 0.2 })).toBe(20);\n });\n});`
|
|
112
|
+
|
|
113
|
+
const spy = vi.spyOn(gen, 'generateObject').mockResolvedValue({
|
|
114
|
+
object: { code: authoredModule, tests: authoredTests },
|
|
115
|
+
} as Awaited<ReturnType<typeof gen.generateObject>>)
|
|
116
|
+
|
|
117
|
+
const { generateAndRunCode } = await import('../src/function-registry.js')
|
|
118
|
+
|
|
119
|
+
const result = await generateAndRunCode<number, { amount: number; rate: number }>(
|
|
120
|
+
{
|
|
121
|
+
name: 'calculateTax',
|
|
122
|
+
description: 'Calculate tax owed',
|
|
123
|
+
args: { amount: '(number)', rate: '(number)' },
|
|
124
|
+
returnType: '(number)',
|
|
125
|
+
},
|
|
126
|
+
{ amount: 100, rate: 0.2 }
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
// The model was consulted exactly once (the author step).
|
|
130
|
+
expect(spy).toHaveBeenCalledTimes(1)
|
|
131
|
+
|
|
132
|
+
// The RESULT was actually computed by running the authored code (not just
|
|
133
|
+
// returned as source).
|
|
134
|
+
expect(result.value).toBe(20)
|
|
135
|
+
expect(result.code).toContain('function calculateTax')
|
|
136
|
+
|
|
137
|
+
// Tests ran in the same sandbox and passed.
|
|
138
|
+
expect(result.testResults).toBeDefined()
|
|
139
|
+
expect(result.testResults!.failed).toBe(0)
|
|
140
|
+
expect(result.testResults!.passed).toBeGreaterThanOrEqual(1)
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
it('surfaces a sandbox failure when authored code throws at runtime', async () => {
|
|
144
|
+
const gen = await import('../src/generate.js')
|
|
145
|
+
vi.spyOn(gen, 'generateObject').mockResolvedValue({
|
|
146
|
+
object: { code: `export function bad(args) { throw new Error('runtime boom'); }` },
|
|
147
|
+
} as Awaited<ReturnType<typeof gen.generateObject>>)
|
|
148
|
+
|
|
149
|
+
const { generateAndRunCode } = await import('../src/function-registry.js')
|
|
150
|
+
|
|
151
|
+
await expect(
|
|
152
|
+
generateAndRunCode({ name: 'bad', args: { x: '(number)' }, includeTests: false }, { x: 1 })
|
|
153
|
+
).rejects.toThrow(/runtime boom/)
|
|
154
|
+
})
|
|
155
|
+
}, 60000)
|