ai-functions 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -4
- package/CHANGELOG.md +68 -1
- package/README.md +397 -157
- package/dist/ai-promise.d.ts +50 -3
- package/dist/ai-promise.d.ts.map +1 -1
- package/dist/ai-promise.js +410 -51
- package/dist/ai-promise.js.map +1 -1
- package/dist/ai-schemas.d.ts +56 -0
- package/dist/ai-schemas.d.ts.map +1 -0
- package/dist/ai-schemas.js +53 -0
- package/dist/ai-schemas.js.map +1 -0
- package/dist/ai.d.ts +16 -242
- package/dist/ai.d.ts.map +1 -1
- package/dist/ai.js +54 -837
- package/dist/ai.js.map +1 -1
- package/dist/batch/anthropic.d.ts +6 -4
- package/dist/batch/anthropic.d.ts.map +1 -1
- package/dist/batch/anthropic.js +83 -145
- package/dist/batch/anthropic.js.map +1 -1
- package/dist/batch/bedrock.d.ts +8 -30
- package/dist/batch/bedrock.d.ts.map +1 -1
- package/dist/batch/bedrock.js +155 -338
- package/dist/batch/bedrock.js.map +1 -1
- package/dist/batch/cloudflare.d.ts +8 -20
- package/dist/batch/cloudflare.d.ts.map +1 -1
- package/dist/batch/cloudflare.js +68 -189
- package/dist/batch/cloudflare.js.map +1 -1
- package/dist/batch/google.d.ts +6 -20
- package/dist/batch/google.d.ts.map +1 -1
- package/dist/batch/google.js +70 -238
- package/dist/batch/google.js.map +1 -1
- package/dist/batch/index.d.ts +4 -1
- package/dist/batch/index.d.ts.map +1 -1
- package/dist/batch/index.js +4 -1
- package/dist/batch/index.js.map +1 -1
- package/dist/batch/memory.d.ts +1 -1
- package/dist/batch/memory.d.ts.map +1 -1
- package/dist/batch/memory.js +14 -10
- package/dist/batch/memory.js.map +1 -1
- package/dist/batch/openai.d.ts +11 -14
- package/dist/batch/openai.d.ts.map +1 -1
- package/dist/batch/openai.js +52 -156
- package/dist/batch/openai.js.map +1 -1
- package/dist/batch/provider.d.ts +111 -0
- package/dist/batch/provider.d.ts.map +1 -0
- package/dist/batch/provider.js +233 -0
- package/dist/batch/provider.js.map +1 -0
- package/dist/batch-map.d.ts.map +1 -1
- package/dist/batch-map.js +23 -17
- package/dist/batch-map.js.map +1 -1
- package/dist/batch-queue.d.ts +65 -0
- package/dist/batch-queue.d.ts.map +1 -1
- package/dist/batch-queue.js +169 -14
- package/dist/batch-queue.js.map +1 -1
- package/dist/budget.d.ts +272 -0
- package/dist/budget.d.ts.map +1 -0
- package/dist/budget.js +513 -0
- package/dist/budget.js.map +1 -0
- package/dist/cache.d.ts +295 -0
- package/dist/cache.d.ts.map +1 -0
- package/dist/cache.js +433 -0
- package/dist/cache.js.map +1 -0
- package/dist/context.d.ts +42 -8
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js +64 -62
- package/dist/context.js.map +1 -1
- package/dist/digital-objects-registry.d.ts +229 -0
- package/dist/digital-objects-registry.d.ts.map +1 -0
- package/dist/digital-objects-registry.js +617 -0
- package/dist/digital-objects-registry.js.map +1 -0
- package/dist/embeddings.d.ts +2 -2
- package/dist/embeddings.d.ts.map +1 -1
- package/dist/errors.d.ts +22 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +35 -0
- package/dist/errors.js.map +1 -0
- package/dist/eval/runner.d.ts +10 -1
- package/dist/eval/runner.d.ts.map +1 -1
- package/dist/eval/runner.js +41 -35
- package/dist/eval/runner.js.map +1 -1
- package/dist/eval-log/in-memory.d.ts +34 -0
- package/dist/eval-log/in-memory.d.ts.map +1 -0
- package/dist/eval-log/in-memory.js +84 -0
- package/dist/eval-log/in-memory.js.map +1 -0
- package/dist/eval-log/index.d.ts +29 -0
- package/dist/eval-log/index.d.ts.map +1 -0
- package/dist/eval-log/index.js +39 -0
- package/dist/eval-log/index.js.map +1 -0
- package/dist/eval-log/types.d.ts +101 -0
- package/dist/eval-log/types.d.ts.map +1 -0
- package/dist/eval-log/types.js +16 -0
- package/dist/eval-log/types.js.map +1 -0
- package/dist/function-registry.d.ts +116 -0
- package/dist/function-registry.d.ts.map +1 -0
- package/dist/function-registry.js +546 -0
- package/dist/function-registry.js.map +1 -0
- package/dist/generate.d.ts +9 -3
- package/dist/generate.d.ts.map +1 -1
- package/dist/generate.js +18 -22
- package/dist/generate.js.map +1 -1
- package/dist/index.d.ts +35 -20
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +89 -42
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +118 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +187 -0
- package/dist/logger.js.map +1 -0
- package/dist/middleware/budget.d.ts +84 -0
- package/dist/middleware/budget.d.ts.map +1 -0
- package/dist/middleware/budget.js +110 -0
- package/dist/middleware/budget.js.map +1 -0
- package/dist/middleware/cache.d.ts +103 -0
- package/dist/middleware/cache.d.ts.map +1 -0
- package/dist/middleware/cache.js +228 -0
- package/dist/middleware/cache.js.map +1 -0
- package/dist/middleware/embed-cache.d.ts +99 -0
- package/dist/middleware/embed-cache.d.ts.map +1 -0
- package/dist/middleware/embed-cache.js +128 -0
- package/dist/middleware/embed-cache.js.map +1 -0
- package/dist/middleware/index.d.ts +11 -0
- package/dist/middleware/index.d.ts.map +1 -0
- package/dist/middleware/index.js +11 -0
- package/dist/middleware/index.js.map +1 -0
- package/dist/middleware/trace.d.ts +103 -0
- package/dist/middleware/trace.d.ts.map +1 -0
- package/dist/middleware/trace.js +176 -0
- package/dist/middleware/trace.js.map +1 -0
- package/dist/primitives.d.ts +120 -1
- package/dist/primitives.d.ts.map +1 -1
- package/dist/primitives.js +398 -26
- package/dist/primitives.js.map +1 -1
- package/dist/retry.d.ts +368 -0
- package/dist/retry.d.ts.map +1 -0
- package/dist/retry.js +646 -0
- package/dist/retry.js.map +1 -0
- package/dist/schema.d.ts.map +1 -1
- package/dist/schema.js +2 -10
- package/dist/schema.js.map +1 -1
- package/dist/telemetry.d.ts +128 -0
- package/dist/telemetry.d.ts.map +1 -0
- package/dist/telemetry.js +285 -0
- package/dist/telemetry.js.map +1 -0
- package/dist/template.d.ts.map +1 -1
- package/dist/template.js +6 -1
- package/dist/template.js.map +1 -1
- package/dist/tool-orchestration.d.ts +453 -0
- package/dist/tool-orchestration.d.ts.map +1 -0
- package/dist/tool-orchestration.js +763 -0
- package/dist/tool-orchestration.js.map +1 -0
- package/dist/type-guards.d.ts +28 -0
- package/dist/type-guards.d.ts.map +1 -0
- package/dist/type-guards.js +29 -0
- package/dist/type-guards.js.map +1 -0
- package/dist/types.d.ts +135 -17
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +36 -1
- package/dist/types.js.map +1 -1
- package/dist/wrap-for-v3.d.ts +80 -0
- package/dist/wrap-for-v3.d.ts.map +1 -0
- package/dist/wrap-for-v3.js +89 -0
- package/dist/wrap-for-v3.js.map +1 -0
- package/examples/00-quickstart.ts +232 -0
- package/examples/01-rag-chatbot.ts +212 -0
- package/examples/02-multi-agent-research.ts +290 -0
- package/examples/03-email-classification.ts +379 -0
- package/examples/04-content-moderation.ts +400 -0
- package/examples/05-document-extraction.ts +455 -0
- package/examples/06-streaming-chat-nextjs.ts +437 -0
- package/examples/07-cloudflare-worker.ts +483 -0
- package/examples/08-batch-processing.ts +491 -0
- package/examples/09-budget-constrained.ts +527 -0
- package/examples/10-tool-orchestration.ts +565 -0
- package/examples/11-retry-resilience.ts +403 -0
- package/examples/12-caching-strategies.ts +422 -0
- package/examples/README.md +145 -0
- package/package.json +10 -6
- package/src/ai-promise.ts +528 -99
- package/src/ai-schemas.ts +122 -0
- package/src/ai.ts +69 -1153
- package/src/batch/anthropic.ts +96 -161
- package/src/batch/bedrock.ts +203 -454
- package/src/batch/cloudflare.ts +99 -282
- package/src/batch/google.ts +91 -297
- package/src/batch/index.ts +4 -1
- package/src/batch/memory.ts +15 -10
- package/src/batch/openai.ts +65 -193
- package/src/batch/provider.ts +336 -0
- package/src/batch-map.ts +29 -24
- package/src/batch-queue.ts +200 -11
- package/src/budget.ts +740 -0
- package/src/cache.ts +681 -0
- package/src/context.ts +122 -76
- package/src/digital-objects-registry.ts +750 -0
- package/src/errors.ts +37 -0
- package/src/eval/runner.ts +63 -38
- package/src/eval-log/in-memory.ts +90 -0
- package/src/eval-log/index.ts +46 -0
- package/src/eval-log/types.ts +110 -0
- package/src/function-registry.ts +671 -0
- package/src/generate.ts +33 -33
- package/src/index.ts +325 -49
- package/src/logger.ts +232 -0
- package/src/middleware/budget.ts +171 -0
- package/src/middleware/cache.ts +299 -0
- package/src/middleware/embed-cache.ts +195 -0
- package/src/middleware/index.ts +23 -0
- package/src/middleware/trace.ts +248 -0
- package/src/primitives.ts +589 -62
- package/src/retry.ts +902 -0
- package/src/schema.ts +8 -17
- package/src/telemetry.ts +403 -0
- package/src/template.ts +8 -4
- package/src/tool-orchestration.ts +1173 -0
- package/src/type-guards.ts +31 -0
- package/src/types.ts +164 -25
- package/src/wrap-for-v3.ts +105 -0
- package/test/ai-promise.test.ts +1080 -0
- package/test/ai-proxy.test.ts +1 -1
- package/test/backward-compat.test.ts +147 -0
- package/test/batch-autosubmit-errors.test.ts +610 -0
- package/test/batch-blog-posts.test.ts +87 -129
- package/test/budget-tracking.test.ts +800 -0
- package/test/cache.test.ts +712 -0
- package/test/context-isolation.test.ts +687 -0
- package/test/core-functions.test.ts +183 -579
- package/test/decide.test.ts +154 -322
- package/test/define.test.ts +211 -8
- package/test/digital-objects-registry.test.ts +760 -0
- package/test/embedding-cache-middleware.test.ts +140 -0
- package/test/evals/deterministic.eval.test.ts +376 -0
- package/test/generate-core.test.ts +140 -229
- package/test/implicit-batch.test.ts +22 -65
- package/test/json-parse-error-handling.test.ts +463 -0
- package/test/retry-policy-integration.test.ts +117 -0
- package/test/retry.test.ts +1016 -0
- package/test/schema.test.ts +55 -19
- package/test/streaming.test.ts +316 -0
- package/test/template.test.ts +1164 -0
- package/test/tool-orchestration.test.ts +1040 -0
- package/test/wrap-for-v3.test.ts +612 -0
- package/vitest.config.js +6 -0
- package/vitest.config.ts +20 -0
- package/dist/rpc/auth.d.ts +0 -69
- package/dist/rpc/auth.d.ts.map +0 -1
- package/dist/rpc/auth.js +0 -136
- package/dist/rpc/auth.js.map +0 -1
- package/dist/rpc/client.d.ts +0 -62
- package/dist/rpc/client.d.ts.map +0 -1
- package/dist/rpc/client.js +0 -103
- package/dist/rpc/client.js.map +0 -1
- package/dist/rpc/deferred.d.ts +0 -60
- package/dist/rpc/deferred.d.ts.map +0 -1
- package/dist/rpc/deferred.js +0 -96
- package/dist/rpc/deferred.js.map +0 -1
- package/dist/rpc/index.d.ts +0 -22
- package/dist/rpc/index.d.ts.map +0 -1
- package/dist/rpc/index.js +0 -38
- package/dist/rpc/index.js.map +0 -1
- package/dist/rpc/local.d.ts +0 -42
- package/dist/rpc/local.d.ts.map +0 -1
- package/dist/rpc/local.js +0 -50
- package/dist/rpc/local.js.map +0 -1
- package/dist/rpc/server.d.ts +0 -165
- package/dist/rpc/server.d.ts.map +0 -1
- package/dist/rpc/server.js +0 -405
- package/dist/rpc/server.js.map +0 -1
- package/dist/rpc/session.d.ts +0 -32
- package/dist/rpc/session.d.ts.map +0 -1
- package/dist/rpc/session.js +0 -43
- package/dist/rpc/session.js.map +0 -1
- package/dist/rpc/transport.d.ts +0 -306
- package/dist/rpc/transport.d.ts.map +0 -1
- package/dist/rpc/transport.js +0 -731
- package/dist/rpc/transport.js.map +0 -1
- package/src/batch/anthropic.js +0 -256
- package/src/batch/bedrock.js +0 -584
- package/src/batch/cloudflare.js +0 -287
- package/src/batch/google.js +0 -359
- package/src/batch/index.js +0 -30
- package/src/batch/memory.js +0 -187
- package/src/batch/openai.js +0 -402
- package/src/eval/index.js +0 -7
- package/src/eval/models.js +0 -119
- package/src/eval/runner.js +0 -147
- package/test/schema.test.js +0 -96
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for embeddingCacheMiddleware — embedding-side analogue of
|
|
3
|
+
* cacheMiddleware for `wrapEmbeddingModel`.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
|
|
7
|
+
import { wrapEmbeddingModel } from 'ai'
|
|
8
|
+
import { MockEmbeddingModelV3 } from 'ai/test'
|
|
9
|
+
import { embeddingCacheMiddleware } from '../src/index.js'
|
|
10
|
+
|
|
11
|
+
describe('embeddingCacheMiddleware', () => {
|
|
12
|
+
const originalGate = process.env['V3_EVAL_CACHE']
|
|
13
|
+
|
|
14
|
+
beforeEach(() => {
|
|
15
|
+
process.env['V3_EVAL_CACHE'] = '1'
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
afterEach(() => {
|
|
19
|
+
if (originalGate === undefined) {
|
|
20
|
+
delete process.env['V3_EVAL_CACHE']
|
|
21
|
+
} else {
|
|
22
|
+
process.env['V3_EVAL_CACHE'] = originalGate
|
|
23
|
+
}
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
it('returns cached embeddings on second call with same values', async () => {
|
|
27
|
+
let callCount = 0
|
|
28
|
+
const upstream = new MockEmbeddingModelV3({
|
|
29
|
+
modelId: 'test-embed',
|
|
30
|
+
doEmbed: async () => {
|
|
31
|
+
callCount++
|
|
32
|
+
return {
|
|
33
|
+
embeddings: [
|
|
34
|
+
[0.1, 0.2, 0.3],
|
|
35
|
+
[0.4, 0.5, 0.6],
|
|
36
|
+
],
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
const wrapped = wrapEmbeddingModel({
|
|
42
|
+
model: upstream,
|
|
43
|
+
middleware: embeddingCacheMiddleware({ enabled: true }),
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
const r1 = await wrapped.doEmbed({ values: ['a', 'b'] })
|
|
47
|
+
expect(r1.embeddings).toEqual([
|
|
48
|
+
[0.1, 0.2, 0.3],
|
|
49
|
+
[0.4, 0.5, 0.6],
|
|
50
|
+
])
|
|
51
|
+
expect(callCount).toBe(1)
|
|
52
|
+
|
|
53
|
+
const r2 = await wrapped.doEmbed({ values: ['a', 'b'] })
|
|
54
|
+
expect(r2.embeddings).toEqual([
|
|
55
|
+
[0.1, 0.2, 0.3],
|
|
56
|
+
[0.4, 0.5, 0.6],
|
|
57
|
+
])
|
|
58
|
+
expect(callCount).toBe(1) // cache hit — no second upstream call
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('treats different value batches as separate keys', async () => {
|
|
62
|
+
let callCount = 0
|
|
63
|
+
const upstream = new MockEmbeddingModelV3({
|
|
64
|
+
modelId: 'test-embed',
|
|
65
|
+
doEmbed: async ({ values }) => {
|
|
66
|
+
callCount++
|
|
67
|
+
return {
|
|
68
|
+
embeddings: values.map((_, i) => [i, i + 1]),
|
|
69
|
+
}
|
|
70
|
+
},
|
|
71
|
+
})
|
|
72
|
+
const wrapped = wrapEmbeddingModel({
|
|
73
|
+
model: upstream,
|
|
74
|
+
middleware: embeddingCacheMiddleware({ enabled: true }),
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
await wrapped.doEmbed({ values: ['a'] })
|
|
78
|
+
await wrapped.doEmbed({ values: ['b'] })
|
|
79
|
+
expect(callCount).toBe(2)
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
it('falls through to upstream when env gate is unset', async () => {
|
|
83
|
+
delete process.env['V3_EVAL_CACHE']
|
|
84
|
+
let callCount = 0
|
|
85
|
+
const upstream = new MockEmbeddingModelV3({
|
|
86
|
+
modelId: 'test-embed',
|
|
87
|
+
doEmbed: async () => {
|
|
88
|
+
callCount++
|
|
89
|
+
return { embeddings: [[1, 2, 3]] }
|
|
90
|
+
},
|
|
91
|
+
})
|
|
92
|
+
const wrapped = wrapEmbeddingModel({
|
|
93
|
+
model: upstream,
|
|
94
|
+
middleware: embeddingCacheMiddleware(),
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
await wrapped.doEmbed({ values: ['x'] })
|
|
98
|
+
await wrapped.doEmbed({ values: ['x'] })
|
|
99
|
+
expect(callCount).toBe(2) // no caching when gate is off
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
it('respects explicit enabled: false override', async () => {
|
|
103
|
+
let callCount = 0
|
|
104
|
+
const upstream = new MockEmbeddingModelV3({
|
|
105
|
+
modelId: 'test-embed',
|
|
106
|
+
doEmbed: async () => {
|
|
107
|
+
callCount++
|
|
108
|
+
return { embeddings: [[1, 2, 3]] }
|
|
109
|
+
},
|
|
110
|
+
})
|
|
111
|
+
const wrapped = wrapEmbeddingModel({
|
|
112
|
+
model: upstream,
|
|
113
|
+
middleware: embeddingCacheMiddleware({ enabled: false }),
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
await wrapped.doEmbed({ values: ['x'] })
|
|
117
|
+
await wrapped.doEmbed({ values: ['x'] })
|
|
118
|
+
expect(callCount).toBe(2)
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
it('evicts entries past TTL and re-fetches', async () => {
|
|
122
|
+
let callCount = 0
|
|
123
|
+
const upstream = new MockEmbeddingModelV3({
|
|
124
|
+
modelId: 'test-embed',
|
|
125
|
+
doEmbed: async () => {
|
|
126
|
+
callCount++
|
|
127
|
+
return { embeddings: [[callCount]] }
|
|
128
|
+
},
|
|
129
|
+
})
|
|
130
|
+
const wrapped = wrapEmbeddingModel({
|
|
131
|
+
model: upstream,
|
|
132
|
+
middleware: embeddingCacheMiddleware({ enabled: true, ttlMs: -1 }),
|
|
133
|
+
// TTL = -1 → every entry is "older than -1 ms" → always evicted on access
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
await wrapped.doEmbed({ values: ['x'] })
|
|
137
|
+
await wrapped.doEmbed({ values: ['x'] })
|
|
138
|
+
expect(callCount).toBe(2) // TTL expired, re-fetch
|
|
139
|
+
})
|
|
140
|
+
})
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic AI Eval Suite
|
|
3
|
+
*
|
|
4
|
+
* These tests are designed to be truly deterministic by following these principles:
|
|
5
|
+
*
|
|
6
|
+
* 1. **AI Gateway Caching**: All requests go through Cloudflare AI Gateway with
|
|
7
|
+
* caching enabled. Same prompt = same cached result = deterministic.
|
|
8
|
+
*
|
|
9
|
+
* 2. **Self-Validating Pattern**: Generate content, then verify each item matches
|
|
10
|
+
* the criteria using `is()`. This validates the AI output against itself.
|
|
11
|
+
* ```ts
|
|
12
|
+
* const colors = await list`5 colors`
|
|
13
|
+
* for (const color of colors) {
|
|
14
|
+
* expect(await is`${color} a color`).toBe(true)
|
|
15
|
+
* }
|
|
16
|
+
* ```
|
|
17
|
+
*
|
|
18
|
+
* 3. **Exact Count Validation**: When asking for N items, expect exactly N items.
|
|
19
|
+
* No "greater than 3" or "between 5 and 10" - if we ask for 5, we get 5.
|
|
20
|
+
*
|
|
21
|
+
* 4. **Objectively Deterministic Questions**: Use questions with unambiguous answers:
|
|
22
|
+
* - `is`red a color`` → always true
|
|
23
|
+
* - `is`JavaScript a programming language`` → always true
|
|
24
|
+
* - `is`banana a programming language`` → always false
|
|
25
|
+
*
|
|
26
|
+
* Run with: pnpm test -- test/evals/deterministic.eval.test.ts
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import { describe, it, expect, beforeAll } from 'vitest'
|
|
30
|
+
import { list, is, extract } from '../../src/primitives.js'
|
|
31
|
+
|
|
32
|
+
// Skip if no API access
|
|
33
|
+
const hasAPI = !!(
|
|
34
|
+
process.env.AI_GATEWAY_URL ||
|
|
35
|
+
process.env.ANTHROPIC_API_KEY ||
|
|
36
|
+
process.env.OPENAI_API_KEY
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
// Test timeout for AI calls
|
|
40
|
+
const AI_TIMEOUT = 30000
|
|
41
|
+
|
|
42
|
+
// Use a fast, cheap model for deterministic tests (caching makes model choice less important)
|
|
43
|
+
const TEST_MODEL = process.env.TEST_MODEL || 'haiku'
|
|
44
|
+
|
|
45
|
+
describe.skipIf(!hasAPI)('Deterministic AI Evals', () => {
|
|
46
|
+
beforeAll(() => {
|
|
47
|
+
console.log(`\nRunning deterministic evals with model: ${TEST_MODEL}`)
|
|
48
|
+
if (process.env.AI_GATEWAY_URL) {
|
|
49
|
+
console.log('✓ AI Gateway caching enabled')
|
|
50
|
+
} else {
|
|
51
|
+
console.log('⚠ AI Gateway not configured - results may vary')
|
|
52
|
+
}
|
|
53
|
+
console.log('')
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
// ==========================================================================
|
|
57
|
+
// is() - Objectively Deterministic Boolean Classification
|
|
58
|
+
// ==========================================================================
|
|
59
|
+
describe('is() - objectively deterministic questions', () => {
|
|
60
|
+
describe('colors', () => {
|
|
61
|
+
const colors = ['red', 'blue', 'green', 'yellow', 'purple', 'orange', 'pink', 'black', 'white']
|
|
62
|
+
// Avoid abstract concepts that could be interpreted metaphorically (e.g., "happiness" = golden)
|
|
63
|
+
const notColors = ['democracy', 'algorithm', 'photosynthesis', 'recursion', 'multiplication']
|
|
64
|
+
|
|
65
|
+
it.each(colors)('correctly identifies "%s" as a color', async (color) => {
|
|
66
|
+
const result = await is(`${color} a color`, { model: TEST_MODEL })
|
|
67
|
+
expect(result).toBe(true)
|
|
68
|
+
}, AI_TIMEOUT)
|
|
69
|
+
|
|
70
|
+
it.each(notColors)('correctly identifies "%s" as NOT a color', async (word) => {
|
|
71
|
+
const result = await is(`${word} a color`, { model: TEST_MODEL })
|
|
72
|
+
expect(result).toBe(false)
|
|
73
|
+
}, AI_TIMEOUT)
|
|
74
|
+
})
|
|
75
|
+
|
|
76
|
+
describe('programming languages', () => {
|
|
77
|
+
const languages = ['JavaScript', 'Python', 'TypeScript', 'Rust', 'Go', 'Java', 'C++', 'Ruby']
|
|
78
|
+
const notLanguages = ['banana', 'elephant', 'democracy', 'sunset', 'happiness']
|
|
79
|
+
|
|
80
|
+
it.each(languages)('correctly identifies "%s" as a programming language', async (lang) => {
|
|
81
|
+
const result = await is(`${lang} a programming language`, { model: TEST_MODEL })
|
|
82
|
+
expect(result).toBe(true)
|
|
83
|
+
}, AI_TIMEOUT)
|
|
84
|
+
|
|
85
|
+
it.each(notLanguages)('correctly identifies "%s" as NOT a programming language', async (word) => {
|
|
86
|
+
const result = await is(`${word} a programming language`, { model: TEST_MODEL })
|
|
87
|
+
expect(result).toBe(false)
|
|
88
|
+
}, AI_TIMEOUT)
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
describe('numbers', () => {
|
|
92
|
+
const evenNumbers = [2, 4, 6, 8, 10, 100, 1000]
|
|
93
|
+
const oddNumbers = [1, 3, 5, 7, 9, 11, 99]
|
|
94
|
+
|
|
95
|
+
it.each(evenNumbers)('correctly identifies %d as even', async (num) => {
|
|
96
|
+
const result = await is(`${num} an even number`, { model: TEST_MODEL })
|
|
97
|
+
expect(result).toBe(true)
|
|
98
|
+
}, AI_TIMEOUT)
|
|
99
|
+
|
|
100
|
+
it.each(oddNumbers)('correctly identifies %d as odd', async (num) => {
|
|
101
|
+
const result = await is(`${num} an odd number`, { model: TEST_MODEL })
|
|
102
|
+
expect(result).toBe(true)
|
|
103
|
+
}, AI_TIMEOUT)
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
describe('countries and capitals', () => {
|
|
107
|
+
const validPairs = [
|
|
108
|
+
['Paris', 'France'],
|
|
109
|
+
['Tokyo', 'Japan'],
|
|
110
|
+
['London', 'United Kingdom'],
|
|
111
|
+
['Berlin', 'Germany'],
|
|
112
|
+
['Rome', 'Italy'],
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
const invalidPairs = [
|
|
116
|
+
['Paris', 'Germany'],
|
|
117
|
+
['Tokyo', 'China'],
|
|
118
|
+
['London', 'France'],
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
it.each(validPairs)('correctly identifies %s as capital of %s', async (city, country) => {
|
|
122
|
+
const result = await is(`${city} the capital of ${country}`, { model: TEST_MODEL })
|
|
123
|
+
expect(result).toBe(true)
|
|
124
|
+
}, AI_TIMEOUT)
|
|
125
|
+
|
|
126
|
+
it.each(invalidPairs)('correctly identifies %s is NOT capital of %s', async (city, country) => {
|
|
127
|
+
const result = await is(`${city} the capital of ${country}`, { model: TEST_MODEL })
|
|
128
|
+
expect(result).toBe(false)
|
|
129
|
+
}, AI_TIMEOUT)
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
describe('email validation', () => {
|
|
133
|
+
const validEmails = [
|
|
134
|
+
'test@example.com',
|
|
135
|
+
'user.name@domain.org',
|
|
136
|
+
'hello@company.co.uk',
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
const invalidEmails = [
|
|
140
|
+
'not-an-email',
|
|
141
|
+
'missing@',
|
|
142
|
+
'@nodomain.com',
|
|
143
|
+
'spaces in@email.com',
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
it.each(validEmails)('correctly identifies "%s" as valid email format', async (email) => {
|
|
147
|
+
const result = await is(`"${email}" a valid email address format`, { model: TEST_MODEL })
|
|
148
|
+
expect(result).toBe(true)
|
|
149
|
+
}, AI_TIMEOUT)
|
|
150
|
+
|
|
151
|
+
it.each(invalidEmails)('correctly identifies "%s" as invalid email format', async (email) => {
|
|
152
|
+
const result = await is(`"${email}" a valid email address format`, { model: TEST_MODEL })
|
|
153
|
+
expect(result).toBe(false)
|
|
154
|
+
}, AI_TIMEOUT)
|
|
155
|
+
})
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
// ==========================================================================
|
|
159
|
+
// list() - Self-Validating Pattern with Exact Counts
|
|
160
|
+
// ==========================================================================
|
|
161
|
+
describe('list() - self-validating with exact counts', () => {
|
|
162
|
+
describe('colors', () => {
|
|
163
|
+
it('generates exactly 5 colors and validates each', async () => {
|
|
164
|
+
const colors = await list('exactly 5 distinct colors', { model: TEST_MODEL })
|
|
165
|
+
|
|
166
|
+
// Exact count validation
|
|
167
|
+
expect(colors).toHaveLength(5)
|
|
168
|
+
|
|
169
|
+
// Self-validation: each item should be a color
|
|
170
|
+
for (const color of colors) {
|
|
171
|
+
const isColor = await is(`${color} a color`, { model: TEST_MODEL })
|
|
172
|
+
expect(isColor).toBe(true)
|
|
173
|
+
}
|
|
174
|
+
}, AI_TIMEOUT * 2)
|
|
175
|
+
|
|
176
|
+
it('generates exactly 10 colors and validates each', async () => {
|
|
177
|
+
const colors = await list('exactly 10 distinct colors', { model: TEST_MODEL })
|
|
178
|
+
|
|
179
|
+
expect(colors).toHaveLength(10)
|
|
180
|
+
|
|
181
|
+
for (const color of colors) {
|
|
182
|
+
const isColor = await is(`${color} a color`, { model: TEST_MODEL })
|
|
183
|
+
expect(isColor).toBe(true)
|
|
184
|
+
}
|
|
185
|
+
}, AI_TIMEOUT * 3)
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
describe('programming languages', () => {
|
|
189
|
+
it('generates exactly 5 programming languages and validates each', async () => {
|
|
190
|
+
const languages = await list('exactly 5 programming languages', { model: TEST_MODEL })
|
|
191
|
+
|
|
192
|
+
expect(languages).toHaveLength(5)
|
|
193
|
+
|
|
194
|
+
for (const lang of languages) {
|
|
195
|
+
const isLang = await is(`${lang} a programming language`, { model: TEST_MODEL })
|
|
196
|
+
expect(isLang).toBe(true)
|
|
197
|
+
}
|
|
198
|
+
}, AI_TIMEOUT * 2)
|
|
199
|
+
})
|
|
200
|
+
|
|
201
|
+
describe('countries', () => {
|
|
202
|
+
it('generates exactly 7 countries and validates each', async () => {
|
|
203
|
+
const countries = await list('exactly 7 countries in the world', { model: TEST_MODEL })
|
|
204
|
+
|
|
205
|
+
expect(countries).toHaveLength(7)
|
|
206
|
+
|
|
207
|
+
for (const country of countries) {
|
|
208
|
+
const isCountry = await is(`${country} a country`, { model: TEST_MODEL })
|
|
209
|
+
expect(isCountry).toBe(true)
|
|
210
|
+
}
|
|
211
|
+
}, AI_TIMEOUT * 2)
|
|
212
|
+
})
|
|
213
|
+
|
|
214
|
+
describe('fruits', () => {
|
|
215
|
+
it('generates exactly 6 fruits and validates each', async () => {
|
|
216
|
+
const fruits = await list('exactly 6 fruits', { model: TEST_MODEL })
|
|
217
|
+
|
|
218
|
+
expect(fruits).toHaveLength(6)
|
|
219
|
+
|
|
220
|
+
for (const fruit of fruits) {
|
|
221
|
+
const isFruit = await is(`${fruit} a fruit`, { model: TEST_MODEL })
|
|
222
|
+
expect(isFruit).toBe(true)
|
|
223
|
+
}
|
|
224
|
+
}, AI_TIMEOUT * 2)
|
|
225
|
+
})
|
|
226
|
+
|
|
227
|
+
describe('animals', () => {
|
|
228
|
+
it('generates exactly 8 animals and validates each', async () => {
|
|
229
|
+
const animals = await list('exactly 8 animals', { model: TEST_MODEL })
|
|
230
|
+
|
|
231
|
+
expect(animals).toHaveLength(8)
|
|
232
|
+
|
|
233
|
+
for (const animal of animals) {
|
|
234
|
+
const isAnimal = await is(`${animal} an animal`, { model: TEST_MODEL })
|
|
235
|
+
expect(isAnimal).toBe(true)
|
|
236
|
+
}
|
|
237
|
+
}, AI_TIMEOUT * 2)
|
|
238
|
+
})
|
|
239
|
+
})
|
|
240
|
+
|
|
241
|
+
// ==========================================================================
|
|
242
|
+
// list() - Constrained Lists (verifiable subcategories)
|
|
243
|
+
// ==========================================================================
|
|
244
|
+
describe('list() - constrained categories', () => {
|
|
245
|
+
it('generates primary colors only', async () => {
|
|
246
|
+
const colors = await list('exactly 3 primary colors (red, blue, yellow)', { model: TEST_MODEL })
|
|
247
|
+
|
|
248
|
+
expect(colors).toHaveLength(3)
|
|
249
|
+
|
|
250
|
+
// Primary colors are a known finite set
|
|
251
|
+
const primaryColors = ['red', 'blue', 'yellow']
|
|
252
|
+
for (const color of colors) {
|
|
253
|
+
expect(primaryColors).toContain(color.toLowerCase())
|
|
254
|
+
}
|
|
255
|
+
}, AI_TIMEOUT)
|
|
256
|
+
|
|
257
|
+
it('generates days of the week', async () => {
|
|
258
|
+
const days = await list('exactly 7 days of the week', { model: TEST_MODEL })
|
|
259
|
+
|
|
260
|
+
expect(days).toHaveLength(7)
|
|
261
|
+
|
|
262
|
+
const validDays = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
|
|
263
|
+
for (const day of days) {
|
|
264
|
+
expect(validDays).toContain(day.toLowerCase())
|
|
265
|
+
}
|
|
266
|
+
}, AI_TIMEOUT)
|
|
267
|
+
|
|
268
|
+
it('generates months of the year', async () => {
|
|
269
|
+
const months = await list('exactly 12 months of the year', { model: TEST_MODEL })
|
|
270
|
+
|
|
271
|
+
expect(months).toHaveLength(12)
|
|
272
|
+
|
|
273
|
+
const validMonths = [
|
|
274
|
+
'january', 'february', 'march', 'april', 'may', 'june',
|
|
275
|
+
'july', 'august', 'september', 'october', 'november', 'december'
|
|
276
|
+
]
|
|
277
|
+
for (const month of months) {
|
|
278
|
+
expect(validMonths).toContain(month.toLowerCase())
|
|
279
|
+
}
|
|
280
|
+
}, AI_TIMEOUT)
|
|
281
|
+
|
|
282
|
+
it('generates continents', async () => {
|
|
283
|
+
const continents = await list('exactly 7 continents', { model: TEST_MODEL })
|
|
284
|
+
|
|
285
|
+
expect(continents).toHaveLength(7)
|
|
286
|
+
|
|
287
|
+
// Use known finite set validation (handles Oceania/Australia naming variations)
|
|
288
|
+
const validContinents = [
|
|
289
|
+
'africa', 'antarctica', 'asia', 'australia', 'oceania',
|
|
290
|
+
'europe', 'north america', 'south america'
|
|
291
|
+
]
|
|
292
|
+
for (const continent of continents) {
|
|
293
|
+
const normalized = continent.toLowerCase().trim()
|
|
294
|
+
const isValid = validContinents.some(v => normalized.includes(v) || v.includes(normalized))
|
|
295
|
+
expect(isValid).toBe(true)
|
|
296
|
+
}
|
|
297
|
+
}, AI_TIMEOUT)
|
|
298
|
+
})
|
|
299
|
+
|
|
300
|
+
// ==========================================================================
|
|
301
|
+
// extract() - Deterministic Extraction from Known Text
|
|
302
|
+
// ==========================================================================
|
|
303
|
+
describe('extract() - deterministic from known text', () => {
|
|
304
|
+
it('extracts email addresses from text', async () => {
|
|
305
|
+
const text = 'Contact support@example.com or sales@company.org for help'
|
|
306
|
+
const emails = await extract(`email addresses from "${text}"`, { model: TEST_MODEL })
|
|
307
|
+
|
|
308
|
+
expect(emails).toHaveLength(2)
|
|
309
|
+
expect(emails).toContain('support@example.com')
|
|
310
|
+
expect(emails).toContain('sales@company.org')
|
|
311
|
+
}, AI_TIMEOUT)
|
|
312
|
+
|
|
313
|
+
it('extracts numbers from text', async () => {
|
|
314
|
+
const text = 'The product costs $50 and we sold 100 units'
|
|
315
|
+
const numbers = await extract(`numbers from "${text}"`, { model: TEST_MODEL })
|
|
316
|
+
|
|
317
|
+
expect(numbers.length).toBeGreaterThanOrEqual(2)
|
|
318
|
+
// Should contain 50 and 100 (as strings or numbers)
|
|
319
|
+
const numStrings = numbers.map(n => String(n))
|
|
320
|
+
expect(numStrings.some(n => n.includes('50'))).toBe(true)
|
|
321
|
+
expect(numStrings.some(n => n.includes('100'))).toBe(true)
|
|
322
|
+
}, AI_TIMEOUT)
|
|
323
|
+
|
|
324
|
+
it('extracts names from text', async () => {
|
|
325
|
+
const text = 'John Smith and Jane Doe attended the meeting'
|
|
326
|
+
const names = await extract(`person names from "${text}"`, { model: TEST_MODEL })
|
|
327
|
+
|
|
328
|
+
expect(names.length).toBeGreaterThanOrEqual(2)
|
|
329
|
+
const nameStrings = names.map(n => String(n).toLowerCase())
|
|
330
|
+
expect(nameStrings.some(n => n.includes('john'))).toBe(true)
|
|
331
|
+
expect(nameStrings.some(n => n.includes('jane'))).toBe(true)
|
|
332
|
+
}, AI_TIMEOUT)
|
|
333
|
+
})
|
|
334
|
+
|
|
335
|
+
// ==========================================================================
|
|
336
|
+
// Chained Self-Validation (Advanced Pattern)
|
|
337
|
+
// ==========================================================================
|
|
338
|
+
describe('chained self-validation', () => {
|
|
339
|
+
it('blog post titles about a topic validate as being about that topic', async () => {
|
|
340
|
+
const topic = 'artificial intelligence'
|
|
341
|
+
const titles = await list(`exactly 5 blog post titles about ${topic}`, { model: TEST_MODEL })
|
|
342
|
+
|
|
343
|
+
expect(titles).toHaveLength(5)
|
|
344
|
+
|
|
345
|
+
// Each title should be about the topic
|
|
346
|
+
for (const title of titles) {
|
|
347
|
+
const isAboutTopic = await is(`"${title}" a blog post title about ${topic}`, { model: TEST_MODEL })
|
|
348
|
+
expect(isAboutTopic).toBe(true)
|
|
349
|
+
}
|
|
350
|
+
}, AI_TIMEOUT * 2)
|
|
351
|
+
|
|
352
|
+
it('company names in an industry validate as being in that industry', async () => {
|
|
353
|
+
const industry = 'technology'
|
|
354
|
+
const companies = await list(`exactly 5 well-known ${industry} companies`, { model: TEST_MODEL })
|
|
355
|
+
|
|
356
|
+
expect(companies).toHaveLength(5)
|
|
357
|
+
|
|
358
|
+
for (const company of companies) {
|
|
359
|
+
const isInIndustry = await is(`${company} a ${industry} company`, { model: TEST_MODEL })
|
|
360
|
+
expect(isInIndustry).toBe(true)
|
|
361
|
+
}
|
|
362
|
+
}, AI_TIMEOUT * 2)
|
|
363
|
+
|
|
364
|
+
it('cities in a country validate as being in that country', async () => {
|
|
365
|
+
const country = 'Japan'
|
|
366
|
+
const cities = await list(`exactly 5 cities in ${country}`, { model: TEST_MODEL })
|
|
367
|
+
|
|
368
|
+
expect(cities).toHaveLength(5)
|
|
369
|
+
|
|
370
|
+
for (const city of cities) {
|
|
371
|
+
const isInCountry = await is(`${city} a city in ${country}`, { model: TEST_MODEL })
|
|
372
|
+
expect(isInCountry).toBe(true)
|
|
373
|
+
}
|
|
374
|
+
}, AI_TIMEOUT * 2)
|
|
375
|
+
})
|
|
376
|
+
})
|