ai-functions 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/.turbo/turbo-build.log +1 -4
  2. package/CHANGELOG.md +68 -1
  3. package/README.md +397 -157
  4. package/dist/ai-promise.d.ts +50 -3
  5. package/dist/ai-promise.d.ts.map +1 -1
  6. package/dist/ai-promise.js +410 -51
  7. package/dist/ai-promise.js.map +1 -1
  8. package/dist/ai-schemas.d.ts +56 -0
  9. package/dist/ai-schemas.d.ts.map +1 -0
  10. package/dist/ai-schemas.js +53 -0
  11. package/dist/ai-schemas.js.map +1 -0
  12. package/dist/ai.d.ts +16 -242
  13. package/dist/ai.d.ts.map +1 -1
  14. package/dist/ai.js +54 -837
  15. package/dist/ai.js.map +1 -1
  16. package/dist/batch/anthropic.d.ts +6 -4
  17. package/dist/batch/anthropic.d.ts.map +1 -1
  18. package/dist/batch/anthropic.js +83 -145
  19. package/dist/batch/anthropic.js.map +1 -1
  20. package/dist/batch/bedrock.d.ts +8 -30
  21. package/dist/batch/bedrock.d.ts.map +1 -1
  22. package/dist/batch/bedrock.js +155 -338
  23. package/dist/batch/bedrock.js.map +1 -1
  24. package/dist/batch/cloudflare.d.ts +8 -20
  25. package/dist/batch/cloudflare.d.ts.map +1 -1
  26. package/dist/batch/cloudflare.js +68 -189
  27. package/dist/batch/cloudflare.js.map +1 -1
  28. package/dist/batch/google.d.ts +6 -20
  29. package/dist/batch/google.d.ts.map +1 -1
  30. package/dist/batch/google.js +70 -238
  31. package/dist/batch/google.js.map +1 -1
  32. package/dist/batch/index.d.ts +4 -1
  33. package/dist/batch/index.d.ts.map +1 -1
  34. package/dist/batch/index.js +4 -1
  35. package/dist/batch/index.js.map +1 -1
  36. package/dist/batch/memory.d.ts +1 -1
  37. package/dist/batch/memory.d.ts.map +1 -1
  38. package/dist/batch/memory.js +14 -10
  39. package/dist/batch/memory.js.map +1 -1
  40. package/dist/batch/openai.d.ts +11 -14
  41. package/dist/batch/openai.d.ts.map +1 -1
  42. package/dist/batch/openai.js +52 -156
  43. package/dist/batch/openai.js.map +1 -1
  44. package/dist/batch/provider.d.ts +111 -0
  45. package/dist/batch/provider.d.ts.map +1 -0
  46. package/dist/batch/provider.js +233 -0
  47. package/dist/batch/provider.js.map +1 -0
  48. package/dist/batch-map.d.ts.map +1 -1
  49. package/dist/batch-map.js +23 -17
  50. package/dist/batch-map.js.map +1 -1
  51. package/dist/batch-queue.d.ts +65 -0
  52. package/dist/batch-queue.d.ts.map +1 -1
  53. package/dist/batch-queue.js +169 -14
  54. package/dist/batch-queue.js.map +1 -1
  55. package/dist/budget.d.ts +272 -0
  56. package/dist/budget.d.ts.map +1 -0
  57. package/dist/budget.js +513 -0
  58. package/dist/budget.js.map +1 -0
  59. package/dist/cache.d.ts +295 -0
  60. package/dist/cache.d.ts.map +1 -0
  61. package/dist/cache.js +433 -0
  62. package/dist/cache.js.map +1 -0
  63. package/dist/context.d.ts +42 -8
  64. package/dist/context.d.ts.map +1 -1
  65. package/dist/context.js +64 -62
  66. package/dist/context.js.map +1 -1
  67. package/dist/digital-objects-registry.d.ts +229 -0
  68. package/dist/digital-objects-registry.d.ts.map +1 -0
  69. package/dist/digital-objects-registry.js +617 -0
  70. package/dist/digital-objects-registry.js.map +1 -0
  71. package/dist/embeddings.d.ts +2 -2
  72. package/dist/embeddings.d.ts.map +1 -1
  73. package/dist/errors.d.ts +22 -0
  74. package/dist/errors.d.ts.map +1 -0
  75. package/dist/errors.js +35 -0
  76. package/dist/errors.js.map +1 -0
  77. package/dist/eval/runner.d.ts +10 -1
  78. package/dist/eval/runner.d.ts.map +1 -1
  79. package/dist/eval/runner.js +41 -35
  80. package/dist/eval/runner.js.map +1 -1
  81. package/dist/eval-log/in-memory.d.ts +34 -0
  82. package/dist/eval-log/in-memory.d.ts.map +1 -0
  83. package/dist/eval-log/in-memory.js +84 -0
  84. package/dist/eval-log/in-memory.js.map +1 -0
  85. package/dist/eval-log/index.d.ts +29 -0
  86. package/dist/eval-log/index.d.ts.map +1 -0
  87. package/dist/eval-log/index.js +39 -0
  88. package/dist/eval-log/index.js.map +1 -0
  89. package/dist/eval-log/types.d.ts +101 -0
  90. package/dist/eval-log/types.d.ts.map +1 -0
  91. package/dist/eval-log/types.js +16 -0
  92. package/dist/eval-log/types.js.map +1 -0
  93. package/dist/function-registry.d.ts +116 -0
  94. package/dist/function-registry.d.ts.map +1 -0
  95. package/dist/function-registry.js +546 -0
  96. package/dist/function-registry.js.map +1 -0
  97. package/dist/generate.d.ts +9 -3
  98. package/dist/generate.d.ts.map +1 -1
  99. package/dist/generate.js +18 -22
  100. package/dist/generate.js.map +1 -1
  101. package/dist/index.d.ts +35 -20
  102. package/dist/index.d.ts.map +1 -1
  103. package/dist/index.js +89 -42
  104. package/dist/index.js.map +1 -1
  105. package/dist/logger.d.ts +118 -0
  106. package/dist/logger.d.ts.map +1 -0
  107. package/dist/logger.js +187 -0
  108. package/dist/logger.js.map +1 -0
  109. package/dist/middleware/budget.d.ts +84 -0
  110. package/dist/middleware/budget.d.ts.map +1 -0
  111. package/dist/middleware/budget.js +110 -0
  112. package/dist/middleware/budget.js.map +1 -0
  113. package/dist/middleware/cache.d.ts +103 -0
  114. package/dist/middleware/cache.d.ts.map +1 -0
  115. package/dist/middleware/cache.js +228 -0
  116. package/dist/middleware/cache.js.map +1 -0
  117. package/dist/middleware/embed-cache.d.ts +99 -0
  118. package/dist/middleware/embed-cache.d.ts.map +1 -0
  119. package/dist/middleware/embed-cache.js +128 -0
  120. package/dist/middleware/embed-cache.js.map +1 -0
  121. package/dist/middleware/index.d.ts +11 -0
  122. package/dist/middleware/index.d.ts.map +1 -0
  123. package/dist/middleware/index.js +11 -0
  124. package/dist/middleware/index.js.map +1 -0
  125. package/dist/middleware/trace.d.ts +103 -0
  126. package/dist/middleware/trace.d.ts.map +1 -0
  127. package/dist/middleware/trace.js +176 -0
  128. package/dist/middleware/trace.js.map +1 -0
  129. package/dist/primitives.d.ts +120 -1
  130. package/dist/primitives.d.ts.map +1 -1
  131. package/dist/primitives.js +398 -26
  132. package/dist/primitives.js.map +1 -1
  133. package/dist/retry.d.ts +368 -0
  134. package/dist/retry.d.ts.map +1 -0
  135. package/dist/retry.js +646 -0
  136. package/dist/retry.js.map +1 -0
  137. package/dist/schema.d.ts.map +1 -1
  138. package/dist/schema.js +2 -10
  139. package/dist/schema.js.map +1 -1
  140. package/dist/telemetry.d.ts +128 -0
  141. package/dist/telemetry.d.ts.map +1 -0
  142. package/dist/telemetry.js +285 -0
  143. package/dist/telemetry.js.map +1 -0
  144. package/dist/template.d.ts.map +1 -1
  145. package/dist/template.js +6 -1
  146. package/dist/template.js.map +1 -1
  147. package/dist/tool-orchestration.d.ts +453 -0
  148. package/dist/tool-orchestration.d.ts.map +1 -0
  149. package/dist/tool-orchestration.js +763 -0
  150. package/dist/tool-orchestration.js.map +1 -0
  151. package/dist/type-guards.d.ts +28 -0
  152. package/dist/type-guards.d.ts.map +1 -0
  153. package/dist/type-guards.js +29 -0
  154. package/dist/type-guards.js.map +1 -0
  155. package/dist/types.d.ts +135 -17
  156. package/dist/types.d.ts.map +1 -1
  157. package/dist/types.js +36 -1
  158. package/dist/types.js.map +1 -1
  159. package/dist/wrap-for-v3.d.ts +80 -0
  160. package/dist/wrap-for-v3.d.ts.map +1 -0
  161. package/dist/wrap-for-v3.js +89 -0
  162. package/dist/wrap-for-v3.js.map +1 -0
  163. package/examples/00-quickstart.ts +232 -0
  164. package/examples/01-rag-chatbot.ts +212 -0
  165. package/examples/02-multi-agent-research.ts +290 -0
  166. package/examples/03-email-classification.ts +379 -0
  167. package/examples/04-content-moderation.ts +400 -0
  168. package/examples/05-document-extraction.ts +455 -0
  169. package/examples/06-streaming-chat-nextjs.ts +437 -0
  170. package/examples/07-cloudflare-worker.ts +483 -0
  171. package/examples/08-batch-processing.ts +491 -0
  172. package/examples/09-budget-constrained.ts +527 -0
  173. package/examples/10-tool-orchestration.ts +565 -0
  174. package/examples/11-retry-resilience.ts +403 -0
  175. package/examples/12-caching-strategies.ts +422 -0
  176. package/examples/README.md +145 -0
  177. package/package.json +10 -6
  178. package/src/ai-promise.ts +528 -99
  179. package/src/ai-schemas.ts +122 -0
  180. package/src/ai.ts +69 -1153
  181. package/src/batch/anthropic.ts +96 -161
  182. package/src/batch/bedrock.ts +203 -454
  183. package/src/batch/cloudflare.ts +99 -282
  184. package/src/batch/google.ts +91 -297
  185. package/src/batch/index.ts +4 -1
  186. package/src/batch/memory.ts +15 -10
  187. package/src/batch/openai.ts +65 -193
  188. package/src/batch/provider.ts +336 -0
  189. package/src/batch-map.ts +29 -24
  190. package/src/batch-queue.ts +200 -11
  191. package/src/budget.ts +740 -0
  192. package/src/cache.ts +681 -0
  193. package/src/context.ts +122 -76
  194. package/src/digital-objects-registry.ts +750 -0
  195. package/src/errors.ts +37 -0
  196. package/src/eval/runner.ts +63 -38
  197. package/src/eval-log/in-memory.ts +90 -0
  198. package/src/eval-log/index.ts +46 -0
  199. package/src/eval-log/types.ts +110 -0
  200. package/src/function-registry.ts +671 -0
  201. package/src/generate.ts +33 -33
  202. package/src/index.ts +325 -49
  203. package/src/logger.ts +232 -0
  204. package/src/middleware/budget.ts +171 -0
  205. package/src/middleware/cache.ts +299 -0
  206. package/src/middleware/embed-cache.ts +195 -0
  207. package/src/middleware/index.ts +23 -0
  208. package/src/middleware/trace.ts +248 -0
  209. package/src/primitives.ts +589 -62
  210. package/src/retry.ts +902 -0
  211. package/src/schema.ts +8 -17
  212. package/src/telemetry.ts +403 -0
  213. package/src/template.ts +8 -4
  214. package/src/tool-orchestration.ts +1173 -0
  215. package/src/type-guards.ts +31 -0
  216. package/src/types.ts +164 -25
  217. package/src/wrap-for-v3.ts +105 -0
  218. package/test/ai-promise.test.ts +1080 -0
  219. package/test/ai-proxy.test.ts +1 -1
  220. package/test/backward-compat.test.ts +147 -0
  221. package/test/batch-autosubmit-errors.test.ts +610 -0
  222. package/test/batch-blog-posts.test.ts +87 -129
  223. package/test/budget-tracking.test.ts +800 -0
  224. package/test/cache.test.ts +712 -0
  225. package/test/context-isolation.test.ts +687 -0
  226. package/test/core-functions.test.ts +183 -579
  227. package/test/decide.test.ts +154 -322
  228. package/test/define.test.ts +211 -8
  229. package/test/digital-objects-registry.test.ts +760 -0
  230. package/test/embedding-cache-middleware.test.ts +140 -0
  231. package/test/evals/deterministic.eval.test.ts +376 -0
  232. package/test/generate-core.test.ts +140 -229
  233. package/test/implicit-batch.test.ts +22 -65
  234. package/test/json-parse-error-handling.test.ts +463 -0
  235. package/test/retry-policy-integration.test.ts +117 -0
  236. package/test/retry.test.ts +1016 -0
  237. package/test/schema.test.ts +55 -19
  238. package/test/streaming.test.ts +316 -0
  239. package/test/template.test.ts +1164 -0
  240. package/test/tool-orchestration.test.ts +1040 -0
  241. package/test/wrap-for-v3.test.ts +612 -0
  242. package/vitest.config.js +6 -0
  243. package/vitest.config.ts +20 -0
  244. package/dist/rpc/auth.d.ts +0 -69
  245. package/dist/rpc/auth.d.ts.map +0 -1
  246. package/dist/rpc/auth.js +0 -136
  247. package/dist/rpc/auth.js.map +0 -1
  248. package/dist/rpc/client.d.ts +0 -62
  249. package/dist/rpc/client.d.ts.map +0 -1
  250. package/dist/rpc/client.js +0 -103
  251. package/dist/rpc/client.js.map +0 -1
  252. package/dist/rpc/deferred.d.ts +0 -60
  253. package/dist/rpc/deferred.d.ts.map +0 -1
  254. package/dist/rpc/deferred.js +0 -96
  255. package/dist/rpc/deferred.js.map +0 -1
  256. package/dist/rpc/index.d.ts +0 -22
  257. package/dist/rpc/index.d.ts.map +0 -1
  258. package/dist/rpc/index.js +0 -38
  259. package/dist/rpc/index.js.map +0 -1
  260. package/dist/rpc/local.d.ts +0 -42
  261. package/dist/rpc/local.d.ts.map +0 -1
  262. package/dist/rpc/local.js +0 -50
  263. package/dist/rpc/local.js.map +0 -1
  264. package/dist/rpc/server.d.ts +0 -165
  265. package/dist/rpc/server.d.ts.map +0 -1
  266. package/dist/rpc/server.js +0 -405
  267. package/dist/rpc/server.js.map +0 -1
  268. package/dist/rpc/session.d.ts +0 -32
  269. package/dist/rpc/session.d.ts.map +0 -1
  270. package/dist/rpc/session.js +0 -43
  271. package/dist/rpc/session.js.map +0 -1
  272. package/dist/rpc/transport.d.ts +0 -306
  273. package/dist/rpc/transport.d.ts.map +0 -1
  274. package/dist/rpc/transport.js +0 -731
  275. package/dist/rpc/transport.js.map +0 -1
  276. package/src/batch/anthropic.js +0 -256
  277. package/src/batch/bedrock.js +0 -584
  278. package/src/batch/cloudflare.js +0 -287
  279. package/src/batch/google.js +0 -359
  280. package/src/batch/index.js +0 -30
  281. package/src/batch/memory.js +0 -187
  282. package/src/batch/openai.js +0 -402
  283. package/src/eval/index.js +0 -7
  284. package/src/eval/models.js +0 -119
  285. package/src/eval/runner.js +0 -147
  286. package/test/schema.test.js +0 -96
@@ -1,119 +0,0 @@
1
- /**
2
- * Model Registry for AI Functions Eval Suite
3
- *
4
- * Simple model list for running evals across providers.
5
- * Uses ai-providers/language-models for resolution and pricing.
6
- *
7
- * @packageDocumentation
8
- */
9
- import { resolve, get } from 'language-models';
10
- // ============================================================================
11
- // Models to evaluate - using aliases from language-models
12
- // ============================================================================
13
- /**
14
- * Core models to test - one per tier per major provider
15
- * These resolve via ai-providers to OpenRouter or direct SDKs
16
- *
17
- * Updated: December 2025
18
- *
19
- * Note: Some models use OpenRouter format (provider/model) to avoid
20
- * resolution issues with provider_model_id mismatches.
21
- */
22
- export const EVAL_MODELS = [
23
- // Anthropic Claude 4.5 - via AWS Bedrock (uses AWS credits with bearer token auth)
24
- // All Claude models should be 4.5 - older versions are deprecated
25
- { id: 'bedrock:us.anthropic.claude-opus-4-5-20251101-v1:0', name: 'Claude Opus 4.5', provider: 'anthropic', tier: 'best', notes: 'Bedrock' },
26
- { id: 'bedrock:us.anthropic.claude-sonnet-4-5-20250929-v1:0', name: 'Claude Sonnet 4.5', provider: 'anthropic', tier: 'fast', notes: 'Bedrock' },
27
- { id: 'bedrock:us.anthropic.claude-haiku-4-5-20251001-v1:0', name: 'Claude Haiku 4.5', provider: 'anthropic', tier: 'cheap', notes: 'Bedrock' },
28
- // OpenAI - GPT-5.1 variants + GPT-oss (open source)
29
- { id: 'openai/o3', name: 'o3', provider: 'openai', tier: 'best' },
30
- { id: 'openai/gpt-5.1', name: 'GPT-5.1', provider: 'openai', tier: 'best' },
31
- { id: 'openai/gpt-5-mini', name: 'GPT-5 Mini', provider: 'openai', tier: 'fast' },
32
- { id: 'openai/gpt-5-nano', name: 'GPT-5 Nano', provider: 'openai', tier: 'cheap' },
33
- // GPT-oss 120B removed - times out frequently
34
- { id: 'openai/gpt-oss-20b', name: 'GPT-oss 20B', provider: 'openai', tier: 'fast', notes: 'Open source' },
35
- // Google - Gemini 3 (November 2025)
36
- { id: 'google/gemini-3-pro-preview', name: 'Gemini 3 Pro', provider: 'google', tier: 'best', notes: '1M context, #1 LMArena' },
37
- // Gemini 2.5 Pro removed - times out frequently
38
- { id: 'flash', name: 'Gemini 2.5 Flash', provider: 'google', tier: 'fast' },
39
- // Meta (via OpenRouter)
40
- { id: 'meta-llama/llama-4-maverick', name: 'Llama 4 Maverick', provider: 'meta-llama', tier: 'best' },
41
- { id: 'meta-llama/llama-3.3-70b-instruct', name: 'Llama 3.3 70B', provider: 'meta-llama', tier: 'fast' },
42
- // DeepSeek - V3.2 (December 2025)
43
- { id: 'deepseek/deepseek-v3.2', name: 'DeepSeek V3.2', provider: 'deepseek', tier: 'best', notes: 'GPT-5 class reasoning' },
44
- // DeepSeek V3.2 Speciale removed - no tool use support on OpenRouter
45
- { id: 'deepseek/deepseek-chat', name: 'DeepSeek Chat', provider: 'deepseek', tier: 'fast' },
46
- // Mistral - Mistral 3 family (December 2025)
47
- { id: 'mistralai/mistral-large-2512', name: 'Mistral Large 3', provider: 'mistralai', tier: 'best', notes: '675B MoE, 41B active' },
48
- { id: 'mistralai/mistral-medium-3.1', name: 'Mistral Medium 3.1', provider: 'mistralai', tier: 'fast' },
49
- // Ministral 3 14B removed - often fails structured output
50
- // Qwen - Qwen3 family (2025)
51
- { id: 'qwen/qwen3-coder', name: 'Qwen3 Coder 480B', provider: 'qwen', tier: 'best', notes: 'Agentic coding' },
52
- { id: 'qwen/qwen3-30b-a3b', name: 'Qwen3 30B', provider: 'qwen', tier: 'fast', notes: 'MoE 30B/3B active' },
53
- { id: 'qwen/qwen3-next-80b-a3b-instruct', name: 'Qwen3 Next 80B', provider: 'qwen', tier: 'best', notes: 'Ultra-long context' },
54
- // xAI - Grok 4 family (December 2025)
55
- { id: 'x-ai/grok-4', name: 'Grok 4', provider: 'x-ai', tier: 'best', notes: '256K context, reasoning' },
56
- { id: 'x-ai/grok-4.1-fast', name: 'Grok 4.1 Fast', provider: 'x-ai', tier: 'fast', notes: '2M context, agentic' },
57
- { id: 'x-ai/grok-4-fast', name: 'Grok 4 Fast', provider: 'x-ai', tier: 'fast', notes: '2M context' },
58
- ];
59
- /**
60
- * Get models by tier
61
- */
62
- export function getModelsByTier(tier) {
63
- return EVAL_MODELS.filter(m => m.tier === tier);
64
- }
65
- /**
66
- * Get models by provider
67
- */
68
- export function getModelsByProvider(provider) {
69
- return EVAL_MODELS.filter(m => m.provider === provider);
70
- }
71
- /**
72
- * Get model info from language-models package (includes pricing)
73
- */
74
- export function getModelInfo(id) {
75
- const resolved = resolve(id);
76
- return get(resolved);
77
- }
78
- /**
79
- * Get pricing for a model (from OpenRouter data)
80
- */
81
- export function getModelPricing(id) {
82
- const info = getModelInfo(id);
83
- if (!info?.pricing)
84
- return undefined;
85
- return {
86
- prompt: parseFloat(info.pricing.prompt) * 1_000_000, // Convert to per-million
87
- completion: parseFloat(info.pricing.completion) * 1_000_000,
88
- };
89
- }
90
- /**
91
- * Create evalite variants for model testing
92
- */
93
- export function createModelVariants(opts) {
94
- let models = EVAL_MODELS;
95
- if (opts?.tiers) {
96
- models = models.filter(m => opts.tiers.includes(m.tier));
97
- }
98
- if (opts?.providers) {
99
- models = models.filter(m => opts.providers.includes(m.provider));
100
- }
101
- return models.map(model => ({
102
- name: `${model.provider}/${model.name}`,
103
- input: model,
104
- }));
105
- }
106
- /**
107
- * Get a representative model from each provider for a given tier
108
- */
109
- export function getRepresentativeModels(tier) {
110
- const seen = new Set();
111
- const result = [];
112
- for (const model of EVAL_MODELS) {
113
- if (model.tier === tier && !seen.has(model.provider)) {
114
- seen.add(model.provider);
115
- result.push(model);
116
- }
117
- }
118
- return result;
119
- }
@@ -1,147 +0,0 @@
1
- /**
2
- * Simple eval runner for AI Functions
3
- *
4
- * Runs evals across multiple models and collects results.
5
- * Does not depend on evalite - uses our own infrastructure.
6
- */
7
- import { generateObject, generateText } from '../generate.js';
8
- import { schema } from '../schema.js';
9
- import { createModelVariants, getModelPricing } from './models.js';
10
- /**
11
- * Run an eval suite across models
12
- */
13
- export async function runEval(options) {
14
- const { name, cases, task, scorers, concurrency = 3 } = options;
15
- // Get models to test
16
- const models = options.models ?? createModelVariants({
17
- tiers: options.tiers,
18
- providers: options.providers,
19
- }).map(v => v.input);
20
- const results = [];
21
- const startTime = Date.now();
22
- console.log(`\n🧪 Running eval: ${name}`);
23
- console.log(` Models: ${models.map(m => m.name).join(', ')}`);
24
- console.log(` Cases: ${cases.length}`);
25
- console.log('');
26
- // Run all model/case combinations
27
- const jobs = [];
28
- for (const model of models) {
29
- for (const evalCase of cases) {
30
- jobs.push({ model, case: evalCase });
31
- }
32
- }
33
- // Process in batches with concurrency limit
34
- for (let i = 0; i < jobs.length; i += concurrency) {
35
- const batch = jobs.slice(i, i + concurrency);
36
- const batchResults = await Promise.all(batch.map(async (job) => {
37
- const caseStart = Date.now();
38
- try {
39
- // Run the task
40
- const output = await task(job.case.input, job.model);
41
- const latencyMs = Date.now() - caseStart;
42
- // Run scorers
43
- const scores = [];
44
- for (const s of scorers) {
45
- try {
46
- const score = await s.scorer({
47
- input: job.case.input,
48
- output,
49
- expected: job.case.expected,
50
- });
51
- scores.push({
52
- name: s.name,
53
- score: Math.max(0, Math.min(1, score)),
54
- description: s.description,
55
- });
56
- }
57
- catch (err) {
58
- scores.push({
59
- name: s.name,
60
- score: 0,
61
- description: s.description,
62
- metadata: { error: String(err) },
63
- });
64
- }
65
- }
66
- // Calculate cost
67
- const pricing = getModelPricing(job.model.id);
68
- // Estimate tokens - rough approximation
69
- const estimatedPromptTokens = 100;
70
- const estimatedCompletionTokens = 200;
71
- const cost = pricing
72
- ? (estimatedPromptTokens * pricing.prompt + estimatedCompletionTokens * pricing.completion) / 1_000_000
73
- : 0;
74
- const avgScore = scores.length > 0
75
- ? scores.reduce((sum, s) => sum + s.score, 0) / scores.length
76
- : 0;
77
- const symbol = avgScore >= 0.8 ? '✓' : avgScore >= 0.5 ? '~' : '✗';
78
- console.log(` ${symbol} ${job.model.name} | ${job.case.name} | ${(avgScore * 100).toFixed(0)}% | ${latencyMs}ms`);
79
- return {
80
- model: job.model,
81
- case: job.case,
82
- output,
83
- scores,
84
- latencyMs,
85
- cost,
86
- };
87
- }
88
- catch (err) {
89
- console.log(` ✗ ${job.model.name} | ${job.case.name} | ERROR: ${err}`);
90
- return {
91
- model: job.model,
92
- case: job.case,
93
- output: null,
94
- scores: scorers.map(s => ({ name: s.name, score: 0 })),
95
- latencyMs: Date.now() - caseStart,
96
- cost: 0,
97
- error: String(err),
98
- };
99
- }
100
- }));
101
- results.push(...batchResults);
102
- }
103
- // Calculate summary
104
- const totalTime = Date.now() - startTime;
105
- const totalCost = results.reduce((sum, r) => sum + r.cost, 0);
106
- const allScores = results.flatMap(r => r.scores.map(s => s.score));
107
- const avgScore = allScores.length > 0
108
- ? allScores.reduce((a, b) => a + b, 0) / allScores.length
109
- : 0;
110
- // Group by model
111
- const byModel = {};
112
- for (const result of results) {
113
- const modelKey = result.model.id;
114
- if (!byModel[modelKey]) {
115
- byModel[modelKey] = { avgScore: 0, count: 0 };
116
- }
117
- const resultAvg = result.scores.reduce((sum, s) => sum + s.score, 0) / result.scores.length;
118
- byModel[modelKey].avgScore += resultAvg;
119
- byModel[modelKey].count++;
120
- }
121
- for (const key of Object.keys(byModel)) {
122
- const entry = byModel[key];
123
- if (entry) {
124
- entry.avgScore /= entry.count;
125
- }
126
- }
127
- console.log('');
128
- console.log(`📊 Results:`);
129
- console.log(` Overall: ${(avgScore * 100).toFixed(1)}%`);
130
- console.log(` Time: ${(totalTime / 1000).toFixed(1)}s`);
131
- console.log(` Cost: $${totalCost.toFixed(4)}`);
132
- console.log('');
133
- console.log(' By Model:');
134
- for (const [modelId, stats] of Object.entries(byModel)) {
135
- console.log(` - ${modelId}: ${(stats.avgScore * 100).toFixed(1)}%`);
136
- }
137
- return {
138
- name,
139
- results,
140
- avgScore,
141
- byModel,
142
- totalCost,
143
- totalTime,
144
- };
145
- }
146
- // Re-export helpers
147
- export { generateObject, generateText, schema };
@@ -1,96 +0,0 @@
1
- /**
2
- * Tests for schema conversion
3
- *
4
- * These are pure unit tests - no AI calls needed.
5
- */
6
- import { describe, it, expect } from 'vitest';
7
- import { schema } from '../src/index.js';
8
- import { z } from 'zod';
9
- describe('schema', () => {
10
- describe('string types', () => {
11
- it('converts simple string description to z.string()', () => {
12
- const result = schema('User name');
13
- expect(result._def.typeName).toBe('ZodString');
14
- expect(result._def.description).toBe('User name');
15
- });
16
- it('converts (number) hint to z.number()', () => {
17
- const result = schema('User age (number)');
18
- expect(result._def.typeName).toBe('ZodNumber');
19
- expect(result._def.description).toBe('User age');
20
- });
21
- it('converts (boolean) hint to z.boolean()', () => {
22
- const result = schema('Is active (boolean)');
23
- expect(result._def.typeName).toBe('ZodBoolean');
24
- expect(result._def.description).toBe('Is active');
25
- });
26
- it('converts (integer) hint to z.number().int()', () => {
27
- const result = schema('Item count (integer)');
28
- expect(result._def.typeName).toBe('ZodNumber');
29
- expect(result._def.checks?.some((c) => c.kind === 'int')).toBe(true);
30
- });
31
- it('converts (date) hint to z.string().datetime()', () => {
32
- const result = schema('Created at (date)');
33
- expect(result._def.typeName).toBe('ZodString');
34
- expect(result._def.checks?.some((c) => c.kind === 'datetime')).toBe(true);
35
- });
36
- });
37
- describe('enum types', () => {
38
- it('converts pipe-separated values to z.enum()', () => {
39
- const result = schema('pending | done | cancelled');
40
- expect(result._def.typeName).toBe('ZodEnum');
41
- expect(result._def.values).toEqual(['pending', 'done', 'cancelled']);
42
- });
43
- it('handles spaces around pipe', () => {
44
- const result = schema('yes | no | maybe');
45
- expect(result._def.values).toEqual(['yes', 'no', 'maybe']);
46
- });
47
- });
48
- describe('array types', () => {
49
- it('converts [string] to z.array(z.string())', () => {
50
- const result = schema(['List of items']);
51
- expect(result._def.typeName).toBe('ZodArray');
52
- expect(result._def.type._def.typeName).toBe('ZodString');
53
- expect(result._def.description).toBe('List of items');
54
- });
55
- });
56
- describe('object types', () => {
57
- it('converts object to z.object()', () => {
58
- const result = schema({
59
- name: 'User name',
60
- age: 'Age (number)',
61
- });
62
- expect(result._def.typeName).toBe('ZodObject');
63
- });
64
- it('handles nested objects', () => {
65
- const result = schema({
66
- user: {
67
- name: 'Name',
68
- profile: {
69
- bio: 'Bio',
70
- },
71
- },
72
- });
73
- expect(result._def.typeName).toBe('ZodObject');
74
- });
75
- it('handles mixed types in object', () => {
76
- const result = schema({
77
- name: 'Name',
78
- count: 'Count (number)',
79
- active: 'Active (boolean)',
80
- status: 'pending | done',
81
- tags: ['Tags'],
82
- });
83
- expect(result._def.typeName).toBe('ZodObject');
84
- });
85
- });
86
- describe('zod passthrough', () => {
87
- it('passes through existing zod schemas', () => {
88
- const zodSchema = z.object({
89
- name: z.string(),
90
- age: z.number(),
91
- });
92
- const result = schema(zodSchema);
93
- expect(result).toBe(zodSchema);
94
- });
95
- });
96
- });