ai-functions 0.2.19 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/.turbo/turbo-build.log +5 -0
  2. package/.turbo/turbo-test.log +105 -0
  3. package/README.md +232 -37
  4. package/TODO.md +138 -0
  5. package/dist/ai-promise.d.ts +219 -0
  6. package/dist/ai-promise.d.ts.map +1 -0
  7. package/dist/ai-promise.js +610 -0
  8. package/dist/ai-promise.js.map +1 -0
  9. package/dist/ai.d.ts +285 -0
  10. package/dist/ai.d.ts.map +1 -0
  11. package/dist/ai.js +842 -0
  12. package/dist/ai.js.map +1 -0
  13. package/dist/batch/anthropic.d.ts +23 -0
  14. package/dist/batch/anthropic.d.ts.map +1 -0
  15. package/dist/batch/anthropic.js +257 -0
  16. package/dist/batch/anthropic.js.map +1 -0
  17. package/dist/batch/bedrock.d.ts +64 -0
  18. package/dist/batch/bedrock.d.ts.map +1 -0
  19. package/dist/batch/bedrock.js +586 -0
  20. package/dist/batch/bedrock.js.map +1 -0
  21. package/dist/batch/cloudflare.d.ts +37 -0
  22. package/dist/batch/cloudflare.d.ts.map +1 -0
  23. package/dist/batch/cloudflare.js +289 -0
  24. package/dist/batch/cloudflare.js.map +1 -0
  25. package/dist/batch/google.d.ts +41 -0
  26. package/dist/batch/google.d.ts.map +1 -0
  27. package/dist/batch/google.js +360 -0
  28. package/dist/batch/google.js.map +1 -0
  29. package/dist/batch/index.d.ts +31 -0
  30. package/dist/batch/index.d.ts.map +1 -0
  31. package/dist/batch/index.js +31 -0
  32. package/dist/batch/index.js.map +1 -0
  33. package/dist/batch/memory.d.ts +44 -0
  34. package/dist/batch/memory.d.ts.map +1 -0
  35. package/dist/batch/memory.js +188 -0
  36. package/dist/batch/memory.js.map +1 -0
  37. package/dist/batch/openai.d.ts +37 -0
  38. package/dist/batch/openai.d.ts.map +1 -0
  39. package/dist/batch/openai.js +403 -0
  40. package/dist/batch/openai.js.map +1 -0
  41. package/dist/batch-map.d.ts +125 -0
  42. package/dist/batch-map.d.ts.map +1 -0
  43. package/dist/batch-map.js +406 -0
  44. package/dist/batch-map.js.map +1 -0
  45. package/dist/batch-queue.d.ts +273 -0
  46. package/dist/batch-queue.d.ts.map +1 -0
  47. package/dist/batch-queue.js +271 -0
  48. package/dist/batch-queue.js.map +1 -0
  49. package/dist/context.d.ts +133 -0
  50. package/dist/context.d.ts.map +1 -0
  51. package/dist/context.js +267 -0
  52. package/dist/context.js.map +1 -0
  53. package/dist/embeddings.d.ts +123 -0
  54. package/dist/embeddings.d.ts.map +1 -0
  55. package/dist/embeddings.js +170 -0
  56. package/dist/embeddings.js.map +1 -0
  57. package/dist/eval/index.d.ts +8 -0
  58. package/dist/eval/index.d.ts.map +1 -0
  59. package/dist/eval/index.js +8 -0
  60. package/dist/eval/index.js.map +1 -0
  61. package/dist/eval/models.d.ts +66 -0
  62. package/dist/eval/models.d.ts.map +1 -0
  63. package/dist/eval/models.js +120 -0
  64. package/dist/eval/models.js.map +1 -0
  65. package/dist/eval/runner.d.ts +64 -0
  66. package/dist/eval/runner.d.ts.map +1 -0
  67. package/dist/eval/runner.js +148 -0
  68. package/dist/eval/runner.js.map +1 -0
  69. package/dist/generate.d.ts +168 -0
  70. package/dist/generate.d.ts.map +1 -0
  71. package/dist/generate.js +174 -0
  72. package/dist/generate.js.map +1 -0
  73. package/dist/index.d.ts +30 -0
  74. package/dist/index.d.ts.map +1 -0
  75. package/dist/index.js +54 -0
  76. package/dist/index.js.map +1 -0
  77. package/dist/primitives.d.ts +292 -0
  78. package/dist/primitives.d.ts.map +1 -0
  79. package/dist/primitives.js +471 -0
  80. package/dist/primitives.js.map +1 -0
  81. package/dist/providers/cloudflare.d.ts +9 -0
  82. package/dist/providers/cloudflare.d.ts.map +1 -0
  83. package/dist/providers/cloudflare.js +9 -0
  84. package/dist/providers/cloudflare.js.map +1 -0
  85. package/dist/providers/index.d.ts +9 -0
  86. package/dist/providers/index.d.ts.map +1 -0
  87. package/dist/providers/index.js +9 -0
  88. package/dist/providers/index.js.map +1 -0
  89. package/dist/schema.d.ts +54 -0
  90. package/dist/schema.d.ts.map +1 -0
  91. package/dist/schema.js +109 -0
  92. package/dist/schema.js.map +1 -0
  93. package/dist/template.d.ts +73 -0
  94. package/dist/template.d.ts.map +1 -0
  95. package/dist/template.js +129 -0
  96. package/dist/template.js.map +1 -0
  97. package/dist/types.d.ts +481 -0
  98. package/dist/types.d.ts.map +1 -0
  99. package/dist/types.js +5 -0
  100. package/dist/types.js.map +1 -0
  101. package/evalite.config.ts +19 -0
  102. package/evals/README.md +212 -0
  103. package/evals/classification.eval.ts +108 -0
  104. package/evals/marketing.eval.ts +370 -0
  105. package/evals/math.eval.ts +94 -0
  106. package/evals/run-evals.ts +166 -0
  107. package/evals/structured-output.eval.ts +143 -0
  108. package/evals/writing.eval.ts +117 -0
  109. package/examples/batch-blog-posts.ts +160 -0
  110. package/package.json +59 -43
  111. package/src/ai-promise.ts +784 -0
  112. package/src/ai.ts +1183 -0
  113. package/src/batch/anthropic.ts +375 -0
  114. package/src/batch/bedrock.ts +801 -0
  115. package/src/batch/cloudflare.ts +421 -0
  116. package/src/batch/google.ts +491 -0
  117. package/src/batch/index.ts +31 -0
  118. package/src/batch/memory.ts +253 -0
  119. package/src/batch/openai.ts +557 -0
  120. package/src/batch-map.ts +534 -0
  121. package/src/batch-queue.ts +493 -0
  122. package/src/context.ts +332 -0
  123. package/src/embeddings.ts +244 -0
  124. package/src/eval/index.ts +8 -0
  125. package/src/eval/models.ts +158 -0
  126. package/src/eval/runner.ts +217 -0
  127. package/src/generate.ts +245 -0
  128. package/src/index.ts +154 -0
  129. package/src/primitives.ts +612 -0
  130. package/src/providers/cloudflare.ts +15 -0
  131. package/src/providers/index.ts +14 -0
  132. package/src/schema.ts +147 -0
  133. package/src/template.ts +209 -0
  134. package/src/types.ts +540 -0
  135. package/test/README.md +105 -0
  136. package/test/ai-proxy.test.ts +192 -0
  137. package/test/async-iterators.test.ts +327 -0
  138. package/test/batch-background.test.ts +482 -0
  139. package/test/batch-blog-posts.test.ts +387 -0
  140. package/test/blog-generation.test.ts +510 -0
  141. package/test/browse-read.test.ts +611 -0
  142. package/test/core-functions.test.ts +694 -0
  143. package/test/decide.test.ts +393 -0
  144. package/test/define.test.ts +274 -0
  145. package/test/e2e-bedrock-manual.ts +163 -0
  146. package/test/e2e-bedrock.test.ts +191 -0
  147. package/test/e2e-flex-gateway.ts +157 -0
  148. package/test/e2e-flex-manual.ts +183 -0
  149. package/test/e2e-flex.test.ts +209 -0
  150. package/test/e2e-google-manual.ts +178 -0
  151. package/test/e2e-google.test.ts +216 -0
  152. package/test/embeddings.test.ts +284 -0
  153. package/test/evals/define-function.eval.test.ts +379 -0
  154. package/test/evals/primitives.eval.test.ts +384 -0
  155. package/test/function-types.test.ts +492 -0
  156. package/test/generate-core.test.ts +319 -0
  157. package/test/generate.test.ts +163 -0
  158. package/test/implicit-batch.test.ts +422 -0
  159. package/test/schema.test.ts +109 -0
  160. package/test/tagged-templates.test.ts +302 -0
  161. package/tsconfig.json +8 -6
  162. package/vitest.config.ts +42 -0
  163. package/LICENSE +0 -21
  164. package/db/cache.ts +0 -6
  165. package/db/mongo.ts +0 -75
  166. package/dist/mjs/db/cache.d.ts +0 -1
  167. package/dist/mjs/db/cache.js +0 -5
  168. package/dist/mjs/db/mongo.d.ts +0 -31
  169. package/dist/mjs/db/mongo.js +0 -48
  170. package/dist/mjs/examples/data.d.ts +0 -1105
  171. package/dist/mjs/examples/data.js +0 -1105
  172. package/dist/mjs/functions/ai.d.ts +0 -20
  173. package/dist/mjs/functions/ai.js +0 -83
  174. package/dist/mjs/functions/ai.test.d.ts +0 -1
  175. package/dist/mjs/functions/ai.test.js +0 -29
  176. package/dist/mjs/functions/gpt.d.ts +0 -4
  177. package/dist/mjs/functions/gpt.js +0 -10
  178. package/dist/mjs/functions/list.d.ts +0 -7
  179. package/dist/mjs/functions/list.js +0 -72
  180. package/dist/mjs/index.d.ts +0 -3
  181. package/dist/mjs/index.js +0 -3
  182. package/dist/mjs/queue/kafka.d.ts +0 -0
  183. package/dist/mjs/queue/kafka.js +0 -1
  184. package/dist/mjs/queue/memory.d.ts +0 -0
  185. package/dist/mjs/queue/memory.js +0 -1
  186. package/dist/mjs/queue/mongo.d.ts +0 -30
  187. package/dist/mjs/queue/mongo.js +0 -42
  188. package/dist/mjs/streams/kafka.d.ts +0 -0
  189. package/dist/mjs/streams/kafka.js +0 -1
  190. package/dist/mjs/streams/memory.d.ts +0 -0
  191. package/dist/mjs/streams/memory.js +0 -1
  192. package/dist/mjs/streams/mongo.d.ts +0 -0
  193. package/dist/mjs/streams/mongo.js +0 -1
  194. package/dist/mjs/streams/types.d.ts +0 -0
  195. package/dist/mjs/streams/types.js +0 -1
  196. package/dist/mjs/types.d.ts +0 -11
  197. package/dist/mjs/types.js +0 -1
  198. package/dist/mjs/utils/completion.d.ts +0 -9
  199. package/dist/mjs/utils/completion.js +0 -20
  200. package/dist/mjs/utils/schema.d.ts +0 -10
  201. package/dist/mjs/utils/schema.js +0 -72
  202. package/dist/mjs/utils/schema.test.d.ts +0 -1
  203. package/dist/mjs/utils/schema.test.js +0 -60
  204. package/dist/mjs/utils/state.d.ts +0 -1
  205. package/dist/mjs/utils/state.js +0 -19
  206. package/examples/data.ts +0 -1105
  207. package/fixup +0 -11
  208. package/functions/ai.test.ts +0 -41
  209. package/functions/ai.ts +0 -115
  210. package/functions/gpt.ts +0 -12
  211. package/functions/list.ts +0 -84
  212. package/index.ts +0 -3
  213. package/queue/kafka.ts +0 -0
  214. package/queue/memory.ts +0 -0
  215. package/queue/mongo.ts +0 -88
  216. package/streams/kafka.ts +0 -0
  217. package/streams/memory.ts +0 -0
  218. package/streams/mongo.ts +0 -0
  219. package/streams/types.ts +0 -0
  220. package/tsconfig-backup.json +0 -105
  221. package/tsconfig-base.json +0 -26
  222. package/tsconfig-cjs.json +0 -8
  223. package/types.ts +0 -12
  224. package/utils/completion.ts +0 -28
  225. package/utils/schema.test.ts +0 -69
  226. package/utils/schema.ts +0 -74
  227. package/utils/state.ts +0 -23
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Math Eval
3
+ *
4
+ * Tests model mathematical reasoning from simple arithmetic
5
+ * to word problems.
6
+ */
7
+
8
+ import { evalite } from 'evalite'
9
+ import { generateObject } from '../src/generate.js'
10
+ import { schema } from '../src/schema.js'
11
+ import { createModelVariants, type EvalModel } from '../src/eval/models.js'
12
+
13
+ // Math test cases
14
+ const TEST_CASES = [
15
+ // Arithmetic
16
+ { problem: 'What is 15 + 27?', expected: 42, difficulty: 'easy' },
17
+ { problem: 'What is 144 / 12?', expected: 12, difficulty: 'easy' },
18
+ { problem: 'What is 7 * 8?', expected: 56, difficulty: 'easy' },
19
+
20
+ // Word problems
21
+ { problem: 'A store sells 45 apples at $2 each. What is the total revenue?', expected: 90, difficulty: 'medium' },
22
+ { problem: 'A train travels 240 miles in 4 hours. What is the average speed in mph?', expected: 60, difficulty: 'medium' },
23
+
24
+ // Multi-step
25
+ { problem: 'A company has 120 employees. 40% work in engineering, and 25% of engineers are senior. How many senior engineers?', expected: 12, difficulty: 'hard' },
26
+ ]
27
+
28
+ const modelVariants = createModelVariants({ tiers: ['fast'] })
29
+
30
+ evalite.each(modelVariants)('Math', {
31
+ data: TEST_CASES.map(tc => ({ input: tc, expected: tc.expected })),
32
+
33
+ task: async (input, variant) => {
34
+ const model = variant as EvalModel
35
+ const startTime = Date.now()
36
+
37
+ const { object, usage } = await generateObject({
38
+ model: model.id,
39
+ schema: schema({
40
+ answer: 'The numeric answer (number)',
41
+ reasoning: 'Step by step reasoning',
42
+ }),
43
+ prompt: `Solve this math problem:\n\n${input.problem}`,
44
+ })
45
+
46
+ const latencyMs = Date.now() - startTime
47
+
48
+ return {
49
+ answer: object.answer,
50
+ reasoning: object.reasoning,
51
+ expected: input.expected,
52
+ problem: input.problem,
53
+ difficulty: input.difficulty,
54
+ modelId: model.id,
55
+ modelName: model.name,
56
+ latencyMs,
57
+ usage,
58
+ }
59
+ },
60
+
61
+ scorers: [
62
+ // Exact answer
63
+ {
64
+ name: 'Correct Answer',
65
+ description: 'Whether the numeric answer is correct',
66
+ scorer: ({ output, expected }) => {
67
+ const answer = output.answer as number
68
+ const exp = expected as number
69
+ // Allow small floating point tolerance
70
+ return { score: Math.abs(answer - exp) < 0.01 ? 1 : 0 }
71
+ },
72
+ },
73
+
74
+ // Shows reasoning
75
+ {
76
+ name: 'Shows Work',
77
+ description: 'Whether model explains reasoning',
78
+ scorer: ({ output }) => {
79
+ const reasoning = output.reasoning as string
80
+ if (!reasoning || reasoning.length < 20) return { score: 0.2 }
81
+ if (reasoning.length > 50) return { score: 1 }
82
+ return { score: 0.6 }
83
+ },
84
+ },
85
+ ],
86
+
87
+ columns: ({ output, expected }) => [
88
+ { label: 'Model', value: output.modelName },
89
+ { label: 'Difficulty', value: output.difficulty },
90
+ { label: 'Expected', value: expected },
91
+ { label: 'Got', value: output.answer },
92
+ { label: 'Correct', value: Math.abs((output.answer as number) - (expected as number)) < 0.01 ? 'Yes' : 'No' },
93
+ ],
94
+ })
@@ -0,0 +1,166 @@
1
+ #!/usr/bin/env npx tsx
2
+ /**
3
+ * Run AI Functions Eval Suite
4
+ *
5
+ * Usage:
6
+ * npx tsx evals/run-evals.ts [--fast] [--all]
7
+ *
8
+ * Options:
9
+ * --fast Only run fast-tier models (default)
10
+ * --all Run all models
11
+ * --math Run only math eval
12
+ * --class Run only classification eval
13
+ */
14
+
15
+ import { runEval, generateObject, generateText, schema } from '../src/eval/runner.js'
16
+ import type { EvalModel, ModelTier } from '../src/eval/models.js'
17
+
18
+ // Parse CLI args
19
+ const args = process.argv.slice(2)
20
+ const runAll = args.includes('--all')
21
+ const runMath = args.includes('--math')
22
+ const runClass = args.includes('--class')
23
+ const runSingle = runMath || runClass
24
+
25
+ const tiers: ModelTier[] = runAll ? ['best', 'fast', 'cheap'] : ['fast']
26
+
27
+ console.log('╔════════════════════════════════════════════════════════════════╗')
28
+ console.log('ā•‘ AI Functions Eval Suite ā•‘')
29
+ console.log('ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•')
30
+ console.log('')
31
+ console.log(`Tiers: ${tiers.join(', ')}`)
32
+
33
+ // Math eval
34
+ async function runMathEval() {
35
+ const cases = [
36
+ { name: 'Simple addition', input: { problem: 'What is 15 + 27?' }, expected: 42 },
37
+ { name: 'Division', input: { problem: 'What is 144 / 12?' }, expected: 12 },
38
+ { name: 'Multiplication', input: { problem: 'What is 7 * 8?' }, expected: 56 },
39
+ { name: 'Word problem', input: { problem: 'A store sells 45 apples at $2 each. What is the total revenue?' }, expected: 90 },
40
+ { name: 'Multi-step', input: { problem: 'A company has 120 employees. 40% work in engineering, and 25% of engineers are senior. How many senior engineers?' }, expected: 12 },
41
+ ]
42
+
43
+ return runEval({
44
+ name: 'Math',
45
+ cases,
46
+ tiers,
47
+ task: async (input, model) => {
48
+ const { object } = await generateObject({
49
+ model: model.id,
50
+ schema: schema({
51
+ answer: 'The numeric answer (number)',
52
+ reasoning: 'Step by step reasoning',
53
+ }),
54
+ prompt: `Solve this math problem:\n\n${input.problem}`,
55
+ })
56
+ return object
57
+ },
58
+ scorers: [
59
+ {
60
+ name: 'Correct Answer',
61
+ description: 'Whether the numeric answer is correct',
62
+ scorer: ({ output, expected }) => {
63
+ const answer = (output as { answer: number }).answer
64
+ const exp = expected as number
65
+ return Math.abs(answer - exp) < 0.01 ? 1 : 0
66
+ },
67
+ },
68
+ {
69
+ name: 'Shows Work',
70
+ description: 'Whether model explains reasoning',
71
+ scorer: ({ output }) => {
72
+ const reasoning = (output as { reasoning: string }).reasoning
73
+ if (!reasoning || reasoning.length < 20) return 0.2
74
+ if (reasoning.length > 50) return 1
75
+ return 0.6
76
+ },
77
+ },
78
+ ],
79
+ })
80
+ }
81
+
82
+ // Classification eval
83
+ async function runClassificationEval() {
84
+ const cases = [
85
+ { name: 'Positive sentiment', input: { text: 'This product exceeded my expectations!', options: ['positive', 'negative', 'neutral'] }, expected: 'positive' },
86
+ { name: 'Negative sentiment', input: { text: 'The delivery was late and packaging damaged.', options: ['positive', 'negative', 'neutral'] }, expected: 'negative' },
87
+ { name: 'Neutral sentiment', input: { text: 'The product arrived as described.', options: ['positive', 'negative', 'neutral'] }, expected: 'neutral' },
88
+ { name: 'Account ticket', input: { text: 'I need to reset my password', options: ['account', 'billing', 'technical', 'shipping'] }, expected: 'account' },
89
+ { name: 'Billing ticket', input: { text: 'When will my refund be processed?', options: ['account', 'billing', 'technical', 'shipping'] }, expected: 'billing' },
90
+ { name: 'Technical ticket', input: { text: 'The app crashes when uploading images', options: ['account', 'billing', 'technical', 'shipping'] }, expected: 'technical' },
91
+ ]
92
+
93
+ return runEval({
94
+ name: 'Classification',
95
+ cases,
96
+ tiers,
97
+ task: async (input, model) => {
98
+ const enumStr = input.options.join(' | ')
99
+ const { object } = await generateObject({
100
+ model: model.id,
101
+ schema: schema({
102
+ category: enumStr,
103
+ confidence: 'Confidence 0-1 (number)',
104
+ }),
105
+ prompt: `Classify this text into one of: ${input.options.join(', ')}\n\nText: "${input.text}"`,
106
+ })
107
+ return object
108
+ },
109
+ scorers: [
110
+ {
111
+ name: 'Accuracy',
112
+ description: 'Whether classification is correct',
113
+ scorer: ({ output, expected }) => {
114
+ const predicted = (output as { category: string }).category
115
+ return predicted === expected ? 1 : 0
116
+ },
117
+ },
118
+ {
119
+ name: 'Valid Category',
120
+ description: 'Whether output is a valid option',
121
+ scorer: ({ input, output }) => {
122
+ const predicted = (output as { category: string }).category
123
+ const options = (input as { options: string[] }).options
124
+ return options.includes(predicted) ? 1 : 0
125
+ },
126
+ },
127
+ ],
128
+ })
129
+ }
130
+
131
+ // Run evals
132
+ async function main() {
133
+ const results = []
134
+
135
+ if (!runSingle || runMath) {
136
+ results.push(await runMathEval())
137
+ }
138
+
139
+ if (!runSingle || runClass) {
140
+ results.push(await runClassificationEval())
141
+ }
142
+
143
+ // Overall summary
144
+ console.log('')
145
+ console.log('╔════════════════════════════════════════════════════════════════╗')
146
+ console.log('ā•‘ Summary ā•‘')
147
+ console.log('ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•')
148
+
149
+ let totalScore = 0
150
+ let totalCost = 0
151
+ let totalTime = 0
152
+
153
+ for (const result of results) {
154
+ console.log(`\n${result.name}: ${(result.avgScore * 100).toFixed(1)}%`)
155
+ totalScore += result.avgScore
156
+ totalCost += result.totalCost
157
+ totalTime += result.totalTime
158
+ }
159
+
160
+ console.log('')
161
+ console.log(`Overall: ${((totalScore / results.length) * 100).toFixed(1)}%`)
162
+ console.log(`Total Cost: $${totalCost.toFixed(4)}`)
163
+ console.log(`Total Time: ${(totalTime / 1000).toFixed(1)}s`)
164
+ }
165
+
166
+ main().catch(console.error)
@@ -0,0 +1,143 @@
1
+ /**
2
+ * Structured Output Eval
3
+ *
4
+ * Tests model ability to generate valid structured JSON output
5
+ * matching specified schemas across all providers.
6
+ */
7
+
8
+ import { evalite } from 'evalite'
9
+ import { generateObject } from '../src/generate.js'
10
+ import { schema } from '../src/schema.js'
11
+ import { createModelVariants, getModelPricing, type EvalModel } from '../src/eval/models.js'
12
+
13
+ // Test cases for structured output
14
+ const TEST_CASES = [
15
+ {
16
+ name: 'Simple object',
17
+ prompt: 'Generate a greeting in French',
18
+ schema: {
19
+ greeting: 'A friendly greeting',
20
+ language: 'The language of the greeting',
21
+ },
22
+ expectedTypes: { greeting: 'string', language: 'string' },
23
+ },
24
+ {
25
+ name: 'With numbers',
26
+ prompt: 'Generate info about Tokyo',
27
+ schema: {
28
+ name: 'City name',
29
+ population: 'Population in millions (number)',
30
+ area: 'Area in square kilometers (number)',
31
+ },
32
+ expectedTypes: { name: 'string', population: 'number', area: 'number' },
33
+ },
34
+ {
35
+ name: 'With arrays',
36
+ prompt: 'Generate a simple pasta recipe',
37
+ schema: {
38
+ title: 'Recipe title',
39
+ ingredients: ['List of ingredients'],
40
+ steps: ['Cooking steps'],
41
+ },
42
+ expectedTypes: { title: 'string', ingredients: 'array', steps: 'array' },
43
+ },
44
+ {
45
+ name: 'With enum',
46
+ prompt: 'Analyze sentiment: "I love this product!"',
47
+ schema: {
48
+ sentiment: 'positive | negative | neutral',
49
+ confidence: 'Confidence score 0-1 (number)',
50
+ },
51
+ expectedTypes: { sentiment: 'string', confidence: 'number' },
52
+ },
53
+ {
54
+ name: 'Nested object',
55
+ prompt: 'Generate a fictional person living in Japan',
56
+ schema: {
57
+ person: { name: 'Full name', age: 'Age (number)' },
58
+ address: { city: 'City name', country: 'Country name' },
59
+ },
60
+ expectedTypes: { person: 'object', address: 'object' },
61
+ },
62
+ ]
63
+
64
+ // Test across models - start with fast tier for quick iteration
65
+ const modelVariants = createModelVariants({ tiers: ['fast'] })
66
+
67
+ evalite.each(modelVariants)('Structured Output', {
68
+ data: TEST_CASES.map(tc => ({ input: tc })),
69
+
70
+ task: async (input, variant) => {
71
+ const model = variant as EvalModel
72
+ const startTime = Date.now()
73
+
74
+ const { object, usage } = await generateObject({
75
+ model: model.id,
76
+ schema: schema(input.schema),
77
+ prompt: input.prompt,
78
+ })
79
+
80
+ const latencyMs = Date.now() - startTime
81
+
82
+ // Calculate cost from language-models pricing
83
+ const pricing = getModelPricing(model.id)
84
+ const cost = pricing
85
+ ? ((usage?.promptTokens ?? 0) * pricing.prompt + (usage?.completionTokens ?? 0) * pricing.completion) / 1_000_000
86
+ : 0
87
+
88
+ return {
89
+ object,
90
+ expectedTypes: input.expectedTypes,
91
+ testName: input.name,
92
+ modelId: model.id,
93
+ modelName: model.name,
94
+ provider: model.provider,
95
+ latencyMs,
96
+ cost,
97
+ usage,
98
+ }
99
+ },
100
+
101
+ scorers: [
102
+ // Type accuracy
103
+ {
104
+ name: 'Type Accuracy',
105
+ description: 'Whether output fields have correct types',
106
+ scorer: ({ output }) => {
107
+ const obj = output.object as Record<string, unknown>
108
+ const expected = output.expectedTypes as Record<string, string>
109
+ const fields = Object.keys(expected)
110
+
111
+ let correct = 0
112
+ for (const field of fields) {
113
+ const val = obj[field]
114
+ const expectedType = expected[field]
115
+ const actualType = Array.isArray(val) ? 'array' : typeof val
116
+
117
+ if (actualType === expectedType) correct++
118
+ }
119
+
120
+ return { score: correct / fields.length }
121
+ },
122
+ },
123
+
124
+ // Latency
125
+ {
126
+ name: 'Latency',
127
+ description: 'Response time (target < 3s)',
128
+ scorer: ({ output }) => {
129
+ const ms = output.latencyMs as number
130
+ if (ms < 2000) return { score: 1 }
131
+ if (ms > 10000) return { score: 0 }
132
+ return { score: 1 - (ms - 2000) / 8000 }
133
+ },
134
+ },
135
+ ],
136
+
137
+ columns: ({ output, scores }) => [
138
+ { label: 'Model', value: output.modelName },
139
+ { label: 'Test', value: output.testName },
140
+ { label: 'Latency', value: `${output.latencyMs}ms` },
141
+ { label: 'Cost', value: `$${(output.cost as number).toFixed(6)}` },
142
+ ],
143
+ })
@@ -0,0 +1,117 @@
1
+ /**
2
+ * Writing Quality Eval (LLM-as-Judge)
3
+ *
4
+ * Tests model writing capabilities using LLM-as-judge scoring.
5
+ * Uses a strong model (sonnet) to judge output quality.
6
+ */
7
+
8
+ import { evalite } from 'evalite'
9
+ import { generateText, generateObject } from '../src/generate.js'
10
+ import { schema } from '../src/schema.js'
11
+ import { createModelVariants, type EvalModel } from '../src/eval/models.js'
12
+
13
+ // Use sonnet as the judge model
14
+ const JUDGE_MODEL = 'sonnet'
15
+
16
+ // Writing test cases
17
+ const TEST_CASES = [
18
+ {
19
+ name: 'Professional email',
20
+ prompt: 'Write a professional email declining a meeting invitation politely.',
21
+ criteria: ['Polite tone', 'Clear explanation', 'Proper email format'],
22
+ },
23
+ {
24
+ name: 'Product description',
25
+ prompt: 'Write a product description for wireless earbuds targeting tech-savvy consumers.',
26
+ criteria: ['Highlights features', 'Compelling language', 'Clear value proposition'],
27
+ },
28
+ {
29
+ name: 'Explanation',
30
+ prompt: 'Explain how photosynthesis works in simple terms for a high school student.',
31
+ criteria: ['Accurate content', 'Clear language', 'Logical flow'],
32
+ },
33
+ ]
34
+
35
+ const modelVariants = createModelVariants({ tiers: ['fast'] })
36
+
37
+ evalite.each(modelVariants)('Writing Quality', {
38
+ data: TEST_CASES.map(tc => ({ input: tc })),
39
+
40
+ task: async (input, variant) => {
41
+ const model = variant as EvalModel
42
+ const startTime = Date.now()
43
+
44
+ // Generate the writing
45
+ const { text, usage } = await generateText({
46
+ model: model.id,
47
+ prompt: input.prompt,
48
+ })
49
+
50
+ const latencyMs = Date.now() - startTime
51
+
52
+ return {
53
+ text,
54
+ testName: input.name,
55
+ criteria: input.criteria,
56
+ modelId: model.id,
57
+ modelName: model.name,
58
+ provider: model.provider,
59
+ latencyMs,
60
+ usage,
61
+ }
62
+ },
63
+
64
+ scorers: [
65
+ // LLM-as-judge for quality
66
+ {
67
+ name: 'Writing Quality',
68
+ description: 'LLM judge evaluation of writing quality',
69
+ scorer: async ({ input, output }) => {
70
+ const { object } = await generateObject({
71
+ model: JUDGE_MODEL,
72
+ schema: schema({
73
+ clarity: 'How clear is the writing? (number 0-1)',
74
+ engagement: 'How engaging is the content? (number 0-1)',
75
+ accuracy: 'How well does it meet the criteria? (number 0-1)',
76
+ reasoning: 'Brief explanation',
77
+ }),
78
+ prompt: `Evaluate this writing on a scale of 0-1.
79
+
80
+ Criteria: ${(input.criteria as string[]).join(', ')}
81
+
82
+ Writing:
83
+ """
84
+ ${output.text}
85
+ """`,
86
+ })
87
+
88
+ const avg = ((object.clarity as number) + (object.engagement as number) + (object.accuracy as number)) / 3
89
+ return {
90
+ score: avg,
91
+ metadata: object,
92
+ }
93
+ },
94
+ },
95
+
96
+ // Word count check
97
+ {
98
+ name: 'Appropriate Length',
99
+ description: 'Whether output has reasonable length',
100
+ scorer: ({ output }) => {
101
+ const words = (output.text as string).split(/\s+/).length
102
+ if (words < 20) return { score: 0.3, metadata: { words } }
103
+ if (words > 500) return { score: 0.7, metadata: { words } }
104
+ return { score: 1, metadata: { words } }
105
+ },
106
+ },
107
+ ],
108
+
109
+ columns: ({ output }) => [
110
+ { label: 'Model', value: output.modelName },
111
+ { label: 'Test', value: output.testName },
112
+ { label: 'Words', value: (output.text as string).split(/\s+/).length },
113
+ { label: 'Latency', value: `${output.latencyMs}ms` },
114
+ ],
115
+
116
+ trialCount: 2, // Run twice for variance
117
+ })
@@ -0,0 +1,160 @@
1
+ /**
2
+ * Batch Blog Post Generation Example
3
+ *
4
+ * This example demonstrates the new IMPLICIT batch processing:
5
+ *
6
+ * ```ts
7
+ * // Configure once (or use environment variables)
8
+ * configure({ provider: 'openai', model: 'gpt-4o', batchMode: 'auto' })
9
+ *
10
+ * // Use naturally - batching is automatic!
11
+ * const titles = await list`10 blog post titles about startups`
12
+ * const posts = titles.map(title => write`blog post: # ${title}`)
13
+ * console.log(await posts) // Batched automatically!
14
+ * ```
15
+ *
16
+ * Environment variables:
17
+ * - AI_PROVIDER: openai | anthropic | cloudflare | bedrock
18
+ * - AI_MODEL: model name (e.g., gpt-4o, claude-sonnet-4-20250514)
19
+ * - AI_BATCH_MODE: auto | immediate | deferred
20
+ * - AI_BATCH_THRESHOLD: minimum items for auto batch (default: 5)
21
+ *
22
+ * @example
23
+ * ```bash
24
+ * # Using environment variables
25
+ * AI_PROVIDER=openai AI_MODEL=gpt-4o npx tsx examples/batch-blog-posts.ts
26
+ *
27
+ * # Or with API keys
28
+ * OPENAI_API_KEY=sk-... npx tsx examples/batch-blog-posts.ts
29
+ * ```
30
+ */
31
+
32
+ import {
33
+ list,
34
+ write,
35
+ configure,
36
+ withContext,
37
+ type BatchProvider,
38
+ } from '../src/index.js'
39
+
40
+ // Import the batch adapter for your provider
41
+ // import '../src/batch/openai.js'
42
+ // import '../src/batch/anthropic.js'
43
+ // import '../src/batch/cloudflare.js'
44
+ // import '../src/batch/bedrock.js'
45
+
46
+ // For testing, use the memory adapter
47
+ import '../src/batch/memory.js'
48
+
49
+ async function main() {
50
+ console.log('\nšŸš€ Implicit Batch Blog Post Generation\n')
51
+
52
+ // ============================================================================
53
+ // Option 1: Global Configuration (recommended)
54
+ // ============================================================================
55
+
56
+ configure({
57
+ provider: 'openai',
58
+ model: 'gpt-4o',
59
+ batchMode: 'auto', // 'auto' | 'immediate' | 'deferred'
60
+ batchThreshold: 5, // Use batch API when >= 5 items
61
+ })
62
+
63
+ console.log('šŸ“ Step 1: Generate titles (executes immediately)...')
64
+ const titles = await list`10 blog post titles about building startups in 2026`
65
+
66
+ console.log(`\nGenerated ${(titles as any).length || 10} titles`)
67
+
68
+ // ============================================================================
69
+ // Option 2: The Clean API (what you asked for!)
70
+ // ============================================================================
71
+
72
+ console.log('\n⚔ Step 2: Map titles to blog posts (automatic batching)...')
73
+ console.log(' Code: titles.map(title => write`blog post: # ${title}`)')
74
+
75
+ // This is the API you wanted!
76
+ // - No explicit batch creation
77
+ // - No provider/model in the code
78
+ // - Automatic batch detection based on context
79
+ const posts = (titles as string[]).map(title =>
80
+ write`Write a comprehensive blog post for startup founders:
81
+
82
+ # ${title}
83
+
84
+ Include:
85
+ - Attention-grabbing introduction
86
+ - 3-5 key sections with actionable insights
87
+ - Real-world examples
88
+ - Compelling conclusion with call-to-action`
89
+ )
90
+
91
+ console.log(` Created ${posts.length} deferred operations`)
92
+
93
+ // When you await, it resolves via batch API if beneficial
94
+ console.log('\nā³ Step 3: Await results (batched automatically)...')
95
+ // Note: Each item is an AIPromise, we'd await them all
96
+ // const results = await Promise.all(posts)
97
+
98
+ console.log('\nāœ… Done!')
99
+
100
+ // ============================================================================
101
+ // Option 3: Scoped Context (for different providers in same code)
102
+ // ============================================================================
103
+
104
+ console.log('\nšŸ”„ Bonus: Using withContext for scoped configuration...')
105
+
106
+ await withContext(
107
+ { provider: 'anthropic', model: 'claude-sonnet-4-20250514', batchMode: 'deferred' },
108
+ async () => {
109
+ console.log(' Inside context: Using Anthropic with deferred batching')
110
+ // All operations here use Anthropic
111
+ // const summaries = titles.map(title => write`summarize: ${title}`)
112
+ }
113
+ )
114
+
115
+ console.log(' Outside context: Back to OpenAI')
116
+ }
117
+
118
+ // ============================================================================
119
+ // Summary of the API
120
+ // ============================================================================
121
+
122
+ /*
123
+ The new API is clean and implicit:
124
+
125
+ 1. Configure once (globally or via environment):
126
+ ```ts
127
+ configure({ provider: 'openai', model: 'gpt-4o', batchMode: 'auto' })
128
+ ```
129
+
130
+ 2. Use naturally:
131
+ ```ts
132
+ const titles = await list`10 blog post titles`
133
+ const posts = titles.map(title => write`blog post: # ${title}`)
134
+ ```
135
+
136
+ 3. Batching happens automatically when:
137
+ - batchMode is 'auto' and items >= batchThreshold
138
+ - batchMode is 'deferred' (always batch)
139
+
140
+ 4. No batching when:
141
+ - batchMode is 'immediate'
142
+ - batchMode is 'auto' and items < batchThreshold
143
+
144
+ 5. Provider batch APIs supported:
145
+ - OpenAI: 50% discount, 24hr turnaround
146
+ - Anthropic: 50% discount, 24hr turnaround
147
+ - Cloudflare: Via AI Gateway
148
+ - AWS Bedrock: Native batch inference
149
+ */
150
+
151
+ // Run the example
152
+ main()
153
+ .then(() => {
154
+ console.log('\n✨ Example complete!\n')
155
+ process.exit(0)
156
+ })
157
+ .catch((error) => {
158
+ console.error('\nāŒ Error:', error.message)
159
+ process.exit(1)
160
+ })