ai-functions 2.1.3 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +90 -1
  3. package/README.md +38 -0
  4. package/dist/ai-promise.d.ts +3 -3
  5. package/dist/ai-promise.d.ts.map +1 -1
  6. package/dist/ai-promise.js +135 -64
  7. package/dist/ai-promise.js.map +1 -1
  8. package/dist/ai-schemas.d.ts +56 -0
  9. package/dist/ai-schemas.d.ts.map +1 -0
  10. package/dist/ai-schemas.js +53 -0
  11. package/dist/ai-schemas.js.map +1 -0
  12. package/dist/ai.d.ts +16 -242
  13. package/dist/ai.d.ts.map +1 -1
  14. package/dist/ai.js +51 -858
  15. package/dist/ai.js.map +1 -1
  16. package/dist/batch/anthropic.d.ts +6 -4
  17. package/dist/batch/anthropic.d.ts.map +1 -1
  18. package/dist/batch/anthropic.js +83 -145
  19. package/dist/batch/anthropic.js.map +1 -1
  20. package/dist/batch/bedrock.d.ts +8 -30
  21. package/dist/batch/bedrock.d.ts.map +1 -1
  22. package/dist/batch/bedrock.js +155 -338
  23. package/dist/batch/bedrock.js.map +1 -1
  24. package/dist/batch/cloudflare.d.ts +8 -20
  25. package/dist/batch/cloudflare.d.ts.map +1 -1
  26. package/dist/batch/cloudflare.js +68 -189
  27. package/dist/batch/cloudflare.js.map +1 -1
  28. package/dist/batch/google.d.ts +6 -20
  29. package/dist/batch/google.d.ts.map +1 -1
  30. package/dist/batch/google.js +70 -238
  31. package/dist/batch/google.js.map +1 -1
  32. package/dist/batch/index.d.ts +4 -1
  33. package/dist/batch/index.d.ts.map +1 -1
  34. package/dist/batch/index.js +4 -1
  35. package/dist/batch/index.js.map +1 -1
  36. package/dist/batch/memory.d.ts +1 -1
  37. package/dist/batch/memory.d.ts.map +1 -1
  38. package/dist/batch/memory.js +14 -10
  39. package/dist/batch/memory.js.map +1 -1
  40. package/dist/batch/openai.d.ts +11 -14
  41. package/dist/batch/openai.d.ts.map +1 -1
  42. package/dist/batch/openai.js +52 -156
  43. package/dist/batch/openai.js.map +1 -1
  44. package/dist/batch/provider.d.ts +111 -0
  45. package/dist/batch/provider.d.ts.map +1 -0
  46. package/dist/batch/provider.js +233 -0
  47. package/dist/batch/provider.js.map +1 -0
  48. package/dist/batch-map.d.ts.map +1 -1
  49. package/dist/batch-map.js +23 -17
  50. package/dist/batch-map.js.map +1 -1
  51. package/dist/batch-queue.d.ts +65 -0
  52. package/dist/batch-queue.d.ts.map +1 -1
  53. package/dist/batch-queue.js +169 -14
  54. package/dist/batch-queue.js.map +1 -1
  55. package/dist/budget.d.ts.map +1 -1
  56. package/dist/budget.js +27 -14
  57. package/dist/budget.js.map +1 -1
  58. package/dist/cache.d.ts +23 -0
  59. package/dist/cache.d.ts.map +1 -1
  60. package/dist/cache.js +36 -15
  61. package/dist/cache.js.map +1 -1
  62. package/dist/context.d.ts +26 -8
  63. package/dist/context.d.ts.map +1 -1
  64. package/dist/context.js +64 -62
  65. package/dist/context.js.map +1 -1
  66. package/dist/digital-objects-registry.d.ts +229 -0
  67. package/dist/digital-objects-registry.d.ts.map +1 -0
  68. package/dist/digital-objects-registry.js +617 -0
  69. package/dist/digital-objects-registry.js.map +1 -0
  70. package/dist/embeddings.d.ts +2 -2
  71. package/dist/embeddings.d.ts.map +1 -1
  72. package/dist/errors.d.ts +22 -0
  73. package/dist/errors.d.ts.map +1 -0
  74. package/dist/errors.js +35 -0
  75. package/dist/errors.js.map +1 -0
  76. package/dist/eval/runner.d.ts +8 -0
  77. package/dist/eval/runner.d.ts.map +1 -1
  78. package/dist/eval/runner.js +41 -35
  79. package/dist/eval/runner.js.map +1 -1
  80. package/dist/eval-log/in-memory.d.ts +34 -0
  81. package/dist/eval-log/in-memory.d.ts.map +1 -0
  82. package/dist/eval-log/in-memory.js +84 -0
  83. package/dist/eval-log/in-memory.js.map +1 -0
  84. package/dist/eval-log/index.d.ts +29 -0
  85. package/dist/eval-log/index.d.ts.map +1 -0
  86. package/dist/eval-log/index.js +39 -0
  87. package/dist/eval-log/index.js.map +1 -0
  88. package/dist/eval-log/types.d.ts +101 -0
  89. package/dist/eval-log/types.d.ts.map +1 -0
  90. package/dist/eval-log/types.js +16 -0
  91. package/dist/eval-log/types.js.map +1 -0
  92. package/dist/function-registry.d.ts +176 -0
  93. package/dist/function-registry.d.ts.map +1 -0
  94. package/dist/function-registry.js +685 -0
  95. package/dist/function-registry.js.map +1 -0
  96. package/dist/generate.d.ts +9 -3
  97. package/dist/generate.d.ts.map +1 -1
  98. package/dist/generate.js +18 -18
  99. package/dist/generate.js.map +1 -1
  100. package/dist/index.d.ts +18 -11
  101. package/dist/index.d.ts.map +1 -1
  102. package/dist/index.js +35 -18
  103. package/dist/index.js.map +1 -1
  104. package/dist/logger.d.ts +118 -0
  105. package/dist/logger.d.ts.map +1 -0
  106. package/dist/logger.js +187 -0
  107. package/dist/logger.js.map +1 -0
  108. package/dist/middleware/budget.d.ts +84 -0
  109. package/dist/middleware/budget.d.ts.map +1 -0
  110. package/dist/middleware/budget.js +110 -0
  111. package/dist/middleware/budget.js.map +1 -0
  112. package/dist/middleware/cache.d.ts +103 -0
  113. package/dist/middleware/cache.d.ts.map +1 -0
  114. package/dist/middleware/cache.js +228 -0
  115. package/dist/middleware/cache.js.map +1 -0
  116. package/dist/middleware/embed-cache.d.ts +99 -0
  117. package/dist/middleware/embed-cache.d.ts.map +1 -0
  118. package/dist/middleware/embed-cache.js +128 -0
  119. package/dist/middleware/embed-cache.js.map +1 -0
  120. package/dist/middleware/index.d.ts +11 -0
  121. package/dist/middleware/index.d.ts.map +1 -0
  122. package/dist/middleware/index.js +11 -0
  123. package/dist/middleware/index.js.map +1 -0
  124. package/dist/middleware/trace.d.ts +103 -0
  125. package/dist/middleware/trace.d.ts.map +1 -0
  126. package/dist/middleware/trace.js +176 -0
  127. package/dist/middleware/trace.js.map +1 -0
  128. package/dist/primitives.d.ts +120 -1
  129. package/dist/primitives.d.ts.map +1 -1
  130. package/dist/primitives.js +398 -26
  131. package/dist/primitives.js.map +1 -1
  132. package/dist/retry.d.ts +66 -1
  133. package/dist/retry.d.ts.map +1 -1
  134. package/dist/retry.js +115 -8
  135. package/dist/retry.js.map +1 -1
  136. package/dist/sandbox.d.ts +36 -0
  137. package/dist/sandbox.d.ts.map +1 -0
  138. package/dist/sandbox.js +44 -0
  139. package/dist/sandbox.js.map +1 -0
  140. package/dist/schema.js +2 -2
  141. package/dist/schema.js.map +1 -1
  142. package/dist/telemetry.d.ts +128 -0
  143. package/dist/telemetry.d.ts.map +1 -0
  144. package/dist/telemetry.js +285 -0
  145. package/dist/telemetry.js.map +1 -0
  146. package/dist/template.d.ts.map +1 -1
  147. package/dist/template.js +6 -1
  148. package/dist/template.js.map +1 -1
  149. package/dist/tool-orchestration.d.ts +66 -4
  150. package/dist/tool-orchestration.d.ts.map +1 -1
  151. package/dist/tool-orchestration.js +123 -23
  152. package/dist/tool-orchestration.js.map +1 -1
  153. package/dist/type-guards.d.ts +28 -0
  154. package/dist/type-guards.d.ts.map +1 -0
  155. package/dist/type-guards.js +29 -0
  156. package/dist/type-guards.js.map +1 -0
  157. package/dist/types.d.ts +155 -19
  158. package/dist/types.d.ts.map +1 -1
  159. package/dist/types.js +36 -1
  160. package/dist/types.js.map +1 -1
  161. package/dist/wrap-for-v3.d.ts +80 -0
  162. package/dist/wrap-for-v3.d.ts.map +1 -0
  163. package/dist/wrap-for-v3.js +89 -0
  164. package/dist/wrap-for-v3.js.map +1 -0
  165. package/examples/00-quickstart.ts +232 -0
  166. package/examples/01-rag-chatbot.ts +212 -0
  167. package/examples/02-multi-agent-research.ts +290 -0
  168. package/examples/03-email-classification.ts +379 -0
  169. package/examples/04-content-moderation.ts +400 -0
  170. package/examples/05-document-extraction.ts +455 -0
  171. package/examples/06-streaming-chat-nextjs.ts +437 -0
  172. package/examples/07-cloudflare-worker.ts +483 -0
  173. package/examples/08-batch-processing.ts +491 -0
  174. package/examples/09-budget-constrained.ts +527 -0
  175. package/examples/10-tool-orchestration.ts +565 -0
  176. package/examples/11-retry-resilience.ts +403 -0
  177. package/examples/12-caching-strategies.ts +422 -0
  178. package/examples/README.md +145 -0
  179. package/package.json +29 -25
  180. package/src/ai-promise.ts +226 -140
  181. package/src/ai-schemas.ts +122 -0
  182. package/src/ai.ts +71 -1176
  183. package/src/batch/anthropic.ts +96 -161
  184. package/src/batch/bedrock.ts +203 -454
  185. package/src/batch/cloudflare.ts +99 -282
  186. package/src/batch/google.ts +91 -297
  187. package/src/batch/index.ts +4 -1
  188. package/src/batch/memory.ts +15 -10
  189. package/src/batch/openai.ts +65 -193
  190. package/src/batch/provider.ts +336 -0
  191. package/src/batch-map.ts +29 -24
  192. package/src/batch-queue.ts +200 -11
  193. package/src/budget.ts +31 -18
  194. package/src/cache.ts +45 -17
  195. package/src/context.ts +106 -77
  196. package/src/digital-objects-registry.ts +750 -0
  197. package/src/errors.ts +37 -0
  198. package/src/eval/runner.ts +60 -36
  199. package/src/eval-log/in-memory.ts +90 -0
  200. package/src/eval-log/index.ts +46 -0
  201. package/src/eval-log/types.ts +110 -0
  202. package/src/function-registry.ts +874 -0
  203. package/src/generate.ts +33 -28
  204. package/src/index.ts +122 -21
  205. package/src/logger.ts +232 -0
  206. package/src/middleware/budget.ts +171 -0
  207. package/src/middleware/cache.ts +299 -0
  208. package/src/middleware/embed-cache.ts +195 -0
  209. package/src/middleware/index.ts +23 -0
  210. package/src/middleware/trace.ts +248 -0
  211. package/src/primitives.ts +589 -62
  212. package/src/retry.ts +144 -18
  213. package/src/sandbox.ts +52 -0
  214. package/src/schema.ts +8 -8
  215. package/src/telemetry.ts +403 -0
  216. package/src/template.ts +8 -4
  217. package/src/tool-orchestration.ts +213 -48
  218. package/src/type-guards.ts +31 -0
  219. package/src/types.ts +186 -27
  220. package/src/wrap-for-v3.ts +105 -0
  221. package/test/ai-promise.test.ts +1080 -0
  222. package/test/ai-proxy.test.ts +1 -1
  223. package/test/batch-autosubmit-errors.test.ts +49 -37
  224. package/test/batch-blog-posts.test.ts +87 -129
  225. package/test/core-functions.test.ts +183 -579
  226. package/test/decide.test.ts +154 -322
  227. package/test/define.test.ts +211 -8
  228. package/test/digital-objects-registry.test.ts +760 -0
  229. package/test/embedding-cache-middleware.test.ts +140 -0
  230. package/test/fill-template.test.ts +89 -0
  231. package/test/generate-core.test.ts +140 -229
  232. package/test/implicit-batch.test.ts +22 -65
  233. package/test/retry-policy-integration.test.ts +117 -0
  234. package/test/sandbox-execution.test.ts +155 -0
  235. package/test/schema.test.ts +55 -19
  236. package/test/template.test.ts +1164 -0
  237. package/test/tool-orchestration.test.ts +270 -0
  238. package/test/wrap-for-v3.test.ts +612 -0
  239. package/vitest.config.js +6 -0
  240. package/vitest.config.ts +20 -0
  241. package/LICENSE +0 -21
  242. package/dist/rpc/auth.d.ts +0 -69
  243. package/dist/rpc/auth.d.ts.map +0 -1
  244. package/dist/rpc/auth.js +0 -136
  245. package/dist/rpc/auth.js.map +0 -1
  246. package/dist/rpc/client.d.ts +0 -62
  247. package/dist/rpc/client.d.ts.map +0 -1
  248. package/dist/rpc/client.js +0 -103
  249. package/dist/rpc/client.js.map +0 -1
  250. package/dist/rpc/deferred.d.ts +0 -60
  251. package/dist/rpc/deferred.d.ts.map +0 -1
  252. package/dist/rpc/deferred.js +0 -96
  253. package/dist/rpc/deferred.js.map +0 -1
  254. package/dist/rpc/index.d.ts +0 -22
  255. package/dist/rpc/index.d.ts.map +0 -1
  256. package/dist/rpc/index.js +0 -38
  257. package/dist/rpc/index.js.map +0 -1
  258. package/dist/rpc/local.d.ts +0 -42
  259. package/dist/rpc/local.d.ts.map +0 -1
  260. package/dist/rpc/local.js +0 -50
  261. package/dist/rpc/local.js.map +0 -1
  262. package/dist/rpc/server.d.ts +0 -165
  263. package/dist/rpc/server.d.ts.map +0 -1
  264. package/dist/rpc/server.js +0 -405
  265. package/dist/rpc/server.js.map +0 -1
  266. package/dist/rpc/session.d.ts +0 -32
  267. package/dist/rpc/session.d.ts.map +0 -1
  268. package/dist/rpc/session.js +0 -43
  269. package/dist/rpc/session.js.map +0 -1
  270. package/dist/rpc/transport.d.ts +0 -306
  271. package/dist/rpc/transport.d.ts.map +0 -1
  272. package/dist/rpc/transport.js +0 -731
  273. package/dist/rpc/transport.js.map +0 -1
  274. package/src/batch/anthropic.js +0 -256
  275. package/src/batch/bedrock.js +0 -584
  276. package/src/batch/cloudflare.js +0 -287
  277. package/src/batch/google.js +0 -359
  278. package/src/batch/index.js +0 -30
  279. package/src/batch/memory.js +0 -187
  280. package/src/batch/openai.js +0 -402
  281. package/src/eval/index.js +0 -7
  282. package/src/eval/models.js +0 -119
  283. package/src/eval/runner.js +0 -147
  284. package/test/schema.test.js +0 -96
@@ -1,119 +0,0 @@
1
- /**
2
- * Model Registry for AI Functions Eval Suite
3
- *
4
- * Simple model list for running evals across providers.
5
- * Uses ai-providers/language-models for resolution and pricing.
6
- *
7
- * @packageDocumentation
8
- */
9
- import { resolve, get } from 'language-models';
10
- // ============================================================================
11
- // Models to evaluate - using aliases from language-models
12
- // ============================================================================
13
- /**
14
- * Core models to test - one per tier per major provider
15
- * These resolve via ai-providers to OpenRouter or direct SDKs
16
- *
17
- * Updated: December 2025
18
- *
19
- * Note: Some models use OpenRouter format (provider/model) to avoid
20
- * resolution issues with provider_model_id mismatches.
21
- */
22
- export const EVAL_MODELS = [
23
- // Anthropic Claude 4.5 - via AWS Bedrock (uses AWS credits with bearer token auth)
24
- // All Claude models should be 4.5 - older versions are deprecated
25
- { id: 'bedrock:us.anthropic.claude-opus-4-5-20251101-v1:0', name: 'Claude Opus 4.5', provider: 'anthropic', tier: 'best', notes: 'Bedrock' },
26
- { id: 'bedrock:us.anthropic.claude-sonnet-4-5-20250929-v1:0', name: 'Claude Sonnet 4.5', provider: 'anthropic', tier: 'fast', notes: 'Bedrock' },
27
- { id: 'bedrock:us.anthropic.claude-haiku-4-5-20251001-v1:0', name: 'Claude Haiku 4.5', provider: 'anthropic', tier: 'cheap', notes: 'Bedrock' },
28
- // OpenAI - GPT-5.1 variants + GPT-oss (open source)
29
- { id: 'openai/o3', name: 'o3', provider: 'openai', tier: 'best' },
30
- { id: 'openai/gpt-5.1', name: 'GPT-5.1', provider: 'openai', tier: 'best' },
31
- { id: 'openai/gpt-5-mini', name: 'GPT-5 Mini', provider: 'openai', tier: 'fast' },
32
- { id: 'openai/gpt-5-nano', name: 'GPT-5 Nano', provider: 'openai', tier: 'cheap' },
33
- // GPT-oss 120B removed - times out frequently
34
- { id: 'openai/gpt-oss-20b', name: 'GPT-oss 20B', provider: 'openai', tier: 'fast', notes: 'Open source' },
35
- // Google - Gemini 3 (November 2025)
36
- { id: 'google/gemini-3-pro-preview', name: 'Gemini 3 Pro', provider: 'google', tier: 'best', notes: '1M context, #1 LMArena' },
37
- // Gemini 2.5 Pro removed - times out frequently
38
- { id: 'flash', name: 'Gemini 2.5 Flash', provider: 'google', tier: 'fast' },
39
- // Meta (via OpenRouter)
40
- { id: 'meta-llama/llama-4-maverick', name: 'Llama 4 Maverick', provider: 'meta-llama', tier: 'best' },
41
- { id: 'meta-llama/llama-3.3-70b-instruct', name: 'Llama 3.3 70B', provider: 'meta-llama', tier: 'fast' },
42
- // DeepSeek - V3.2 (December 2025)
43
- { id: 'deepseek/deepseek-v3.2', name: 'DeepSeek V3.2', provider: 'deepseek', tier: 'best', notes: 'GPT-5 class reasoning' },
44
- // DeepSeek V3.2 Speciale removed - no tool use support on OpenRouter
45
- { id: 'deepseek/deepseek-chat', name: 'DeepSeek Chat', provider: 'deepseek', tier: 'fast' },
46
- // Mistral - Mistral 3 family (December 2025)
47
- { id: 'mistralai/mistral-large-2512', name: 'Mistral Large 3', provider: 'mistralai', tier: 'best', notes: '675B MoE, 41B active' },
48
- { id: 'mistralai/mistral-medium-3.1', name: 'Mistral Medium 3.1', provider: 'mistralai', tier: 'fast' },
49
- // Ministral 3 14B removed - often fails structured output
50
- // Qwen - Qwen3 family (2025)
51
- { id: 'qwen/qwen3-coder', name: 'Qwen3 Coder 480B', provider: 'qwen', tier: 'best', notes: 'Agentic coding' },
52
- { id: 'qwen/qwen3-30b-a3b', name: 'Qwen3 30B', provider: 'qwen', tier: 'fast', notes: 'MoE 30B/3B active' },
53
- { id: 'qwen/qwen3-next-80b-a3b-instruct', name: 'Qwen3 Next 80B', provider: 'qwen', tier: 'best', notes: 'Ultra-long context' },
54
- // xAI - Grok 4 family (December 2025)
55
- { id: 'x-ai/grok-4', name: 'Grok 4', provider: 'x-ai', tier: 'best', notes: '256K context, reasoning' },
56
- { id: 'x-ai/grok-4.1-fast', name: 'Grok 4.1 Fast', provider: 'x-ai', tier: 'fast', notes: '2M context, agentic' },
57
- { id: 'x-ai/grok-4-fast', name: 'Grok 4 Fast', provider: 'x-ai', tier: 'fast', notes: '2M context' },
58
- ];
59
- /**
60
- * Get models by tier
61
- */
62
- export function getModelsByTier(tier) {
63
- return EVAL_MODELS.filter(m => m.tier === tier);
64
- }
65
- /**
66
- * Get models by provider
67
- */
68
- export function getModelsByProvider(provider) {
69
- return EVAL_MODELS.filter(m => m.provider === provider);
70
- }
71
- /**
72
- * Get model info from language-models package (includes pricing)
73
- */
74
- export function getModelInfo(id) {
75
- const resolved = resolve(id);
76
- return get(resolved);
77
- }
78
- /**
79
- * Get pricing for a model (from OpenRouter data)
80
- */
81
- export function getModelPricing(id) {
82
- const info = getModelInfo(id);
83
- if (!info?.pricing)
84
- return undefined;
85
- return {
86
- prompt: parseFloat(info.pricing.prompt) * 1_000_000, // Convert to per-million
87
- completion: parseFloat(info.pricing.completion) * 1_000_000,
88
- };
89
- }
90
- /**
91
- * Create evalite variants for model testing
92
- */
93
- export function createModelVariants(opts) {
94
- let models = EVAL_MODELS;
95
- if (opts?.tiers) {
96
- models = models.filter(m => opts.tiers.includes(m.tier));
97
- }
98
- if (opts?.providers) {
99
- models = models.filter(m => opts.providers.includes(m.provider));
100
- }
101
- return models.map(model => ({
102
- name: `${model.provider}/${model.name}`,
103
- input: model,
104
- }));
105
- }
106
- /**
107
- * Get a representative model from each provider for a given tier
108
- */
109
- export function getRepresentativeModels(tier) {
110
- const seen = new Set();
111
- const result = [];
112
- for (const model of EVAL_MODELS) {
113
- if (model.tier === tier && !seen.has(model.provider)) {
114
- seen.add(model.provider);
115
- result.push(model);
116
- }
117
- }
118
- return result;
119
- }
@@ -1,147 +0,0 @@
1
- /**
2
- * Simple eval runner for AI Functions
3
- *
4
- * Runs evals across multiple models and collects results.
5
- * Does not depend on evalite - uses our own infrastructure.
6
- */
7
- import { generateObject, generateText } from '../generate.js';
8
- import { schema } from '../schema.js';
9
- import { createModelVariants, getModelPricing } from './models.js';
10
- /**
11
- * Run an eval suite across models
12
- */
13
- export async function runEval(options) {
14
- const { name, cases, task, scorers, concurrency = 3 } = options;
15
- // Get models to test
16
- const models = options.models ?? createModelVariants({
17
- tiers: options.tiers,
18
- providers: options.providers,
19
- }).map(v => v.input);
20
- const results = [];
21
- const startTime = Date.now();
22
- console.log(`\n🧪 Running eval: ${name}`);
23
- console.log(` Models: ${models.map(m => m.name).join(', ')}`);
24
- console.log(` Cases: ${cases.length}`);
25
- console.log('');
26
- // Run all model/case combinations
27
- const jobs = [];
28
- for (const model of models) {
29
- for (const evalCase of cases) {
30
- jobs.push({ model, case: evalCase });
31
- }
32
- }
33
- // Process in batches with concurrency limit
34
- for (let i = 0; i < jobs.length; i += concurrency) {
35
- const batch = jobs.slice(i, i + concurrency);
36
- const batchResults = await Promise.all(batch.map(async (job) => {
37
- const caseStart = Date.now();
38
- try {
39
- // Run the task
40
- const output = await task(job.case.input, job.model);
41
- const latencyMs = Date.now() - caseStart;
42
- // Run scorers
43
- const scores = [];
44
- for (const s of scorers) {
45
- try {
46
- const score = await s.scorer({
47
- input: job.case.input,
48
- output,
49
- expected: job.case.expected,
50
- });
51
- scores.push({
52
- name: s.name,
53
- score: Math.max(0, Math.min(1, score)),
54
- description: s.description,
55
- });
56
- }
57
- catch (err) {
58
- scores.push({
59
- name: s.name,
60
- score: 0,
61
- description: s.description,
62
- metadata: { error: String(err) },
63
- });
64
- }
65
- }
66
- // Calculate cost
67
- const pricing = getModelPricing(job.model.id);
68
- // Estimate tokens - rough approximation
69
- const estimatedPromptTokens = 100;
70
- const estimatedCompletionTokens = 200;
71
- const cost = pricing
72
- ? (estimatedPromptTokens * pricing.prompt + estimatedCompletionTokens * pricing.completion) / 1_000_000
73
- : 0;
74
- const avgScore = scores.length > 0
75
- ? scores.reduce((sum, s) => sum + s.score, 0) / scores.length
76
- : 0;
77
- const symbol = avgScore >= 0.8 ? '✓' : avgScore >= 0.5 ? '~' : '✗';
78
- console.log(` ${symbol} ${job.model.name} | ${job.case.name} | ${(avgScore * 100).toFixed(0)}% | ${latencyMs}ms`);
79
- return {
80
- model: job.model,
81
- case: job.case,
82
- output,
83
- scores,
84
- latencyMs,
85
- cost,
86
- };
87
- }
88
- catch (err) {
89
- console.log(` ✗ ${job.model.name} | ${job.case.name} | ERROR: ${err}`);
90
- return {
91
- model: job.model,
92
- case: job.case,
93
- output: null,
94
- scores: scorers.map(s => ({ name: s.name, score: 0 })),
95
- latencyMs: Date.now() - caseStart,
96
- cost: 0,
97
- error: String(err),
98
- };
99
- }
100
- }));
101
- results.push(...batchResults);
102
- }
103
- // Calculate summary
104
- const totalTime = Date.now() - startTime;
105
- const totalCost = results.reduce((sum, r) => sum + r.cost, 0);
106
- const allScores = results.flatMap(r => r.scores.map(s => s.score));
107
- const avgScore = allScores.length > 0
108
- ? allScores.reduce((a, b) => a + b, 0) / allScores.length
109
- : 0;
110
- // Group by model
111
- const byModel = {};
112
- for (const result of results) {
113
- const modelKey = result.model.id;
114
- if (!byModel[modelKey]) {
115
- byModel[modelKey] = { avgScore: 0, count: 0 };
116
- }
117
- const resultAvg = result.scores.reduce((sum, s) => sum + s.score, 0) / result.scores.length;
118
- byModel[modelKey].avgScore += resultAvg;
119
- byModel[modelKey].count++;
120
- }
121
- for (const key of Object.keys(byModel)) {
122
- const entry = byModel[key];
123
- if (entry) {
124
- entry.avgScore /= entry.count;
125
- }
126
- }
127
- console.log('');
128
- console.log(`📊 Results:`);
129
- console.log(` Overall: ${(avgScore * 100).toFixed(1)}%`);
130
- console.log(` Time: ${(totalTime / 1000).toFixed(1)}s`);
131
- console.log(` Cost: $${totalCost.toFixed(4)}`);
132
- console.log('');
133
- console.log(' By Model:');
134
- for (const [modelId, stats] of Object.entries(byModel)) {
135
- console.log(` - ${modelId}: ${(stats.avgScore * 100).toFixed(1)}%`);
136
- }
137
- return {
138
- name,
139
- results,
140
- avgScore,
141
- byModel,
142
- totalCost,
143
- totalTime,
144
- };
145
- }
146
- // Re-export helpers
147
- export { generateObject, generateText, schema };
@@ -1,96 +0,0 @@
1
- /**
2
- * Tests for schema conversion
3
- *
4
- * These are pure unit tests - no AI calls needed.
5
- */
6
- import { describe, it, expect } from 'vitest';
7
- import { schema } from '../src/index.js';
8
- import { z } from 'zod';
9
- describe('schema', () => {
10
- describe('string types', () => {
11
- it('converts simple string description to z.string()', () => {
12
- const result = schema('User name');
13
- expect(result._def.typeName).toBe('ZodString');
14
- expect(result._def.description).toBe('User name');
15
- });
16
- it('converts (number) hint to z.number()', () => {
17
- const result = schema('User age (number)');
18
- expect(result._def.typeName).toBe('ZodNumber');
19
- expect(result._def.description).toBe('User age');
20
- });
21
- it('converts (boolean) hint to z.boolean()', () => {
22
- const result = schema('Is active (boolean)');
23
- expect(result._def.typeName).toBe('ZodBoolean');
24
- expect(result._def.description).toBe('Is active');
25
- });
26
- it('converts (integer) hint to z.number().int()', () => {
27
- const result = schema('Item count (integer)');
28
- expect(result._def.typeName).toBe('ZodNumber');
29
- expect(result._def.checks?.some((c) => c.kind === 'int')).toBe(true);
30
- });
31
- it('converts (date) hint to z.string().datetime()', () => {
32
- const result = schema('Created at (date)');
33
- expect(result._def.typeName).toBe('ZodString');
34
- expect(result._def.checks?.some((c) => c.kind === 'datetime')).toBe(true);
35
- });
36
- });
37
- describe('enum types', () => {
38
- it('converts pipe-separated values to z.enum()', () => {
39
- const result = schema('pending | done | cancelled');
40
- expect(result._def.typeName).toBe('ZodEnum');
41
- expect(result._def.values).toEqual(['pending', 'done', 'cancelled']);
42
- });
43
- it('handles spaces around pipe', () => {
44
- const result = schema('yes | no | maybe');
45
- expect(result._def.values).toEqual(['yes', 'no', 'maybe']);
46
- });
47
- });
48
- describe('array types', () => {
49
- it('converts [string] to z.array(z.string())', () => {
50
- const result = schema(['List of items']);
51
- expect(result._def.typeName).toBe('ZodArray');
52
- expect(result._def.type._def.typeName).toBe('ZodString');
53
- expect(result._def.description).toBe('List of items');
54
- });
55
- });
56
- describe('object types', () => {
57
- it('converts object to z.object()', () => {
58
- const result = schema({
59
- name: 'User name',
60
- age: 'Age (number)',
61
- });
62
- expect(result._def.typeName).toBe('ZodObject');
63
- });
64
- it('handles nested objects', () => {
65
- const result = schema({
66
- user: {
67
- name: 'Name',
68
- profile: {
69
- bio: 'Bio',
70
- },
71
- },
72
- });
73
- expect(result._def.typeName).toBe('ZodObject');
74
- });
75
- it('handles mixed types in object', () => {
76
- const result = schema({
77
- name: 'Name',
78
- count: 'Count (number)',
79
- active: 'Active (boolean)',
80
- status: 'pending | done',
81
- tags: ['Tags'],
82
- });
83
- expect(result._def.typeName).toBe('ZodObject');
84
- });
85
- });
86
- describe('zod passthrough', () => {
87
- it('passes through existing zod schemas', () => {
88
- const zodSchema = z.object({
89
- name: z.string(),
90
- age: z.number(),
91
- });
92
- const result = schema(zodSchema);
93
- expect(result).toBe(zodSchema);
94
- });
95
- });
96
- });