ai-functions 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (400) hide show
  1. package/.turbo/turbo-build.log +5 -0
  2. package/.turbo/turbo-test.log +105 -0
  3. package/README.md +190 -86
  4. package/TODO.md +138 -0
  5. package/dist/ai-promise.d.ts +219 -0
  6. package/dist/ai-promise.d.ts.map +1 -0
  7. package/dist/ai-promise.js +610 -0
  8. package/dist/ai-promise.js.map +1 -0
  9. package/dist/ai.d.ts +285 -0
  10. package/dist/ai.d.ts.map +1 -0
  11. package/dist/ai.js +842 -0
  12. package/dist/ai.js.map +1 -0
  13. package/dist/batch/anthropic.d.ts +23 -0
  14. package/dist/batch/anthropic.d.ts.map +1 -0
  15. package/dist/batch/anthropic.js +257 -0
  16. package/dist/batch/anthropic.js.map +1 -0
  17. package/dist/batch/bedrock.d.ts +64 -0
  18. package/dist/batch/bedrock.d.ts.map +1 -0
  19. package/dist/batch/bedrock.js +586 -0
  20. package/dist/batch/bedrock.js.map +1 -0
  21. package/dist/batch/cloudflare.d.ts +37 -0
  22. package/dist/batch/cloudflare.d.ts.map +1 -0
  23. package/dist/batch/cloudflare.js +289 -0
  24. package/dist/batch/cloudflare.js.map +1 -0
  25. package/dist/batch/google.d.ts +41 -0
  26. package/dist/batch/google.d.ts.map +1 -0
  27. package/dist/batch/google.js +360 -0
  28. package/dist/batch/google.js.map +1 -0
  29. package/dist/batch/index.d.ts +31 -0
  30. package/dist/batch/index.d.ts.map +1 -0
  31. package/dist/batch/index.js +31 -0
  32. package/dist/batch/index.js.map +1 -0
  33. package/dist/batch/memory.d.ts +44 -0
  34. package/dist/batch/memory.d.ts.map +1 -0
  35. package/dist/batch/memory.js +188 -0
  36. package/dist/batch/memory.js.map +1 -0
  37. package/dist/batch/openai.d.ts +37 -0
  38. package/dist/batch/openai.d.ts.map +1 -0
  39. package/dist/batch/openai.js +403 -0
  40. package/dist/batch/openai.js.map +1 -0
  41. package/dist/batch-map.d.ts +125 -0
  42. package/dist/batch-map.d.ts.map +1 -0
  43. package/dist/batch-map.js +406 -0
  44. package/dist/batch-map.js.map +1 -0
  45. package/dist/batch-queue.d.ts +273 -0
  46. package/dist/batch-queue.d.ts.map +1 -0
  47. package/dist/batch-queue.js +271 -0
  48. package/dist/batch-queue.js.map +1 -0
  49. package/dist/context.d.ts +133 -0
  50. package/dist/context.d.ts.map +1 -0
  51. package/dist/context.js +267 -0
  52. package/dist/context.js.map +1 -0
  53. package/dist/embeddings.d.ts +123 -0
  54. package/dist/embeddings.d.ts.map +1 -0
  55. package/dist/embeddings.js +170 -0
  56. package/dist/embeddings.js.map +1 -0
  57. package/dist/eval/index.d.ts +8 -0
  58. package/dist/eval/index.d.ts.map +1 -0
  59. package/dist/eval/index.js +8 -0
  60. package/dist/eval/index.js.map +1 -0
  61. package/dist/eval/models.d.ts +66 -0
  62. package/dist/eval/models.d.ts.map +1 -0
  63. package/dist/eval/models.js +120 -0
  64. package/dist/eval/models.js.map +1 -0
  65. package/dist/eval/runner.d.ts +64 -0
  66. package/dist/eval/runner.d.ts.map +1 -0
  67. package/dist/eval/runner.js +148 -0
  68. package/dist/eval/runner.js.map +1 -0
  69. package/dist/generate.d.ts +168 -0
  70. package/dist/generate.d.ts.map +1 -0
  71. package/dist/generate.js +174 -0
  72. package/dist/generate.js.map +1 -0
  73. package/dist/index.d.ts +29 -4
  74. package/dist/index.d.ts.map +1 -1
  75. package/dist/index.js +53 -52
  76. package/dist/index.js.map +1 -1
  77. package/dist/primitives.d.ts +292 -0
  78. package/dist/primitives.d.ts.map +1 -0
  79. package/dist/primitives.js +471 -0
  80. package/dist/primitives.js.map +1 -0
  81. package/dist/providers/cloudflare.d.ts +9 -0
  82. package/dist/providers/cloudflare.d.ts.map +1 -0
  83. package/dist/providers/cloudflare.js +9 -0
  84. package/dist/providers/cloudflare.js.map +1 -0
  85. package/dist/providers/index.d.ts +9 -0
  86. package/dist/providers/index.d.ts.map +1 -0
  87. package/dist/providers/index.js +9 -0
  88. package/dist/providers/index.js.map +1 -0
  89. package/dist/schema.d.ts +54 -0
  90. package/dist/schema.d.ts.map +1 -0
  91. package/dist/schema.js +109 -0
  92. package/dist/schema.js.map +1 -0
  93. package/dist/template.d.ts +73 -0
  94. package/dist/template.d.ts.map +1 -0
  95. package/dist/template.js +129 -0
  96. package/dist/template.js.map +1 -0
  97. package/dist/types.d.ts +474 -106
  98. package/dist/types.d.ts.map +1 -1
  99. package/dist/types.js +4 -8
  100. package/dist/types.js.map +1 -1
  101. package/evalite.config.ts +19 -0
  102. package/evals/README.md +212 -0
  103. package/evals/classification.eval.ts +108 -0
  104. package/evals/marketing.eval.ts +370 -0
  105. package/evals/math.eval.ts +94 -0
  106. package/evals/run-evals.ts +166 -0
  107. package/evals/structured-output.eval.ts +143 -0
  108. package/evals/writing.eval.ts +117 -0
  109. package/examples/batch-blog-posts.ts +160 -0
  110. package/package.json +57 -57
  111. package/src/ai-promise.ts +784 -0
  112. package/src/ai.ts +1183 -0
  113. package/src/batch/anthropic.ts +375 -0
  114. package/src/batch/bedrock.ts +801 -0
  115. package/src/batch/cloudflare.ts +421 -0
  116. package/src/batch/google.ts +491 -0
  117. package/src/batch/index.ts +31 -0
  118. package/src/batch/memory.ts +253 -0
  119. package/src/batch/openai.ts +557 -0
  120. package/src/batch-map.ts +534 -0
  121. package/src/batch-queue.ts +493 -0
  122. package/src/context.ts +332 -0
  123. package/src/embeddings.ts +244 -0
  124. package/src/eval/index.ts +8 -0
  125. package/src/eval/models.ts +158 -0
  126. package/src/eval/runner.ts +217 -0
  127. package/src/generate.ts +245 -0
  128. package/src/index.ts +154 -0
  129. package/src/primitives.ts +612 -0
  130. package/src/providers/cloudflare.ts +15 -0
  131. package/src/providers/index.ts +14 -0
  132. package/src/schema.ts +147 -0
  133. package/src/template.ts +209 -0
  134. package/src/types.ts +540 -0
  135. package/test/README.md +105 -0
  136. package/test/ai-proxy.test.ts +192 -0
  137. package/test/async-iterators.test.ts +327 -0
  138. package/test/batch-background.test.ts +482 -0
  139. package/test/batch-blog-posts.test.ts +387 -0
  140. package/test/blog-generation.test.ts +510 -0
  141. package/test/browse-read.test.ts +611 -0
  142. package/test/core-functions.test.ts +694 -0
  143. package/test/decide.test.ts +393 -0
  144. package/test/define.test.ts +274 -0
  145. package/test/e2e-bedrock-manual.ts +163 -0
  146. package/test/e2e-bedrock.test.ts +191 -0
  147. package/test/e2e-flex-gateway.ts +157 -0
  148. package/test/e2e-flex-manual.ts +183 -0
  149. package/test/e2e-flex.test.ts +209 -0
  150. package/test/e2e-google-manual.ts +178 -0
  151. package/test/e2e-google.test.ts +216 -0
  152. package/test/embeddings.test.ts +284 -0
  153. package/test/evals/define-function.eval.test.ts +379 -0
  154. package/test/evals/primitives.eval.test.ts +384 -0
  155. package/test/function-types.test.ts +492 -0
  156. package/test/generate-core.test.ts +319 -0
  157. package/test/generate.test.ts +163 -0
  158. package/test/implicit-batch.test.ts +422 -0
  159. package/test/schema.test.ts +109 -0
  160. package/test/tagged-templates.test.ts +302 -0
  161. package/tsconfig.json +10 -0
  162. package/vitest.config.ts +42 -0
  163. package/LICENSE +0 -21
  164. package/bin/cli.js +0 -5
  165. package/dist/cli/index.d.ts +0 -10
  166. package/dist/cli/index.d.ts.map +0 -1
  167. package/dist/cli/index.js +0 -38
  168. package/dist/cli/index.js.map +0 -1
  169. package/dist/cli/index.test.d.ts +0 -2
  170. package/dist/cli/index.test.d.ts.map +0 -1
  171. package/dist/cli/index.test.js +0 -35
  172. package/dist/cli/index.test.js.map +0 -1
  173. package/dist/constants/models.d.ts +0 -10
  174. package/dist/constants/models.d.ts.map +0 -1
  175. package/dist/constants/models.js +0 -12
  176. package/dist/constants/models.js.map +0 -1
  177. package/dist/converters/index.d.ts +0 -3
  178. package/dist/converters/index.d.ts.map +0 -1
  179. package/dist/converters/index.js +0 -3
  180. package/dist/converters/index.js.map +0 -1
  181. package/dist/converters/model.d.ts +0 -4
  182. package/dist/converters/model.d.ts.map +0 -1
  183. package/dist/converters/model.js +0 -19
  184. package/dist/converters/model.js.map +0 -1
  185. package/dist/converters/schema.d.ts +0 -4
  186. package/dist/converters/schema.d.ts.map +0 -1
  187. package/dist/converters/schema.js +0 -25
  188. package/dist/converters/schema.js.map +0 -1
  189. package/dist/core/responses.d.ts +0 -5
  190. package/dist/core/responses.d.ts.map +0 -1
  191. package/dist/core/responses.js +0 -16
  192. package/dist/core/responses.js.map +0 -1
  193. package/dist/core/responses.test.d.ts +0 -2
  194. package/dist/core/responses.test.d.ts.map +0 -1
  195. package/dist/core/responses.test.js +0 -31
  196. package/dist/core/responses.test.js.map +0 -1
  197. package/dist/errors.d.ts +0 -6
  198. package/dist/errors.d.ts.map +0 -1
  199. package/dist/errors.js +0 -9
  200. package/dist/errors.js.map +0 -1
  201. package/dist/examples/streaming.test.d.ts +0 -2
  202. package/dist/examples/streaming.test.d.ts.map +0 -1
  203. package/dist/examples/streaming.test.js +0 -176
  204. package/dist/examples/streaming.test.js.map +0 -1
  205. package/dist/factory/__tests__/index.test.d.ts +0 -2
  206. package/dist/factory/__tests__/index.test.d.ts.map +0 -1
  207. package/dist/factory/__tests__/index.test.js +0 -430
  208. package/dist/factory/__tests__/index.test.js.map +0 -1
  209. package/dist/factory/__tests__/list.test.d.ts +0 -2
  210. package/dist/factory/__tests__/list.test.d.ts.map +0 -1
  211. package/dist/factory/__tests__/list.test.js +0 -92
  212. package/dist/factory/__tests__/list.test.js.map +0 -1
  213. package/dist/factory/index.d.ts +0 -20
  214. package/dist/factory/index.d.ts.map +0 -1
  215. package/dist/factory/index.js +0 -287
  216. package/dist/factory/index.js.map +0 -1
  217. package/dist/factory/index.test.d.ts +0 -2
  218. package/dist/factory/index.test.d.ts.map +0 -1
  219. package/dist/factory/index.test.js +0 -287
  220. package/dist/factory/index.test.js.map +0 -1
  221. package/dist/factory/list.d.ts +0 -3
  222. package/dist/factory/list.d.ts.map +0 -1
  223. package/dist/factory/list.js +0 -221
  224. package/dist/factory/list.js.map +0 -1
  225. package/dist/factory/list.test.d.ts +0 -2
  226. package/dist/factory/list.test.d.ts.map +0 -1
  227. package/dist/factory/list.test.js +0 -84
  228. package/dist/factory/list.test.js.map +0 -1
  229. package/dist/generate/index.d.ts +0 -5
  230. package/dist/generate/index.d.ts.map +0 -1
  231. package/dist/generate/index.js +0 -17
  232. package/dist/generate/index.js.map +0 -1
  233. package/dist/index.test.d.ts +0 -2
  234. package/dist/index.test.d.ts.map +0 -1
  235. package/dist/index.test.js +0 -59
  236. package/dist/index.test.js.map +0 -1
  237. package/dist/list/await.d.ts +0 -3
  238. package/dist/list/await.d.ts.map +0 -1
  239. package/dist/list/await.js +0 -28
  240. package/dist/list/await.js.map +0 -1
  241. package/dist/list/constants.d.ts +0 -4
  242. package/dist/list/constants.d.ts.map +0 -1
  243. package/dist/list/constants.js +0 -5
  244. package/dist/list/constants.js.map +0 -1
  245. package/dist/list/create-function.d.ts +0 -3
  246. package/dist/list/create-function.d.ts.map +0 -1
  247. package/dist/list/create-function.js +0 -11
  248. package/dist/list/create-function.js.map +0 -1
  249. package/dist/list/index.d.ts +0 -4
  250. package/dist/list/index.d.ts.map +0 -1
  251. package/dist/list/index.js +0 -5
  252. package/dist/list/index.js.map +0 -1
  253. package/dist/list/prompt.d.ts +0 -3
  254. package/dist/list/prompt.d.ts.map +0 -1
  255. package/dist/list/prompt.js +0 -6
  256. package/dist/list/prompt.js.map +0 -1
  257. package/dist/list/schemas.d.ts +0 -4
  258. package/dist/list/schemas.d.ts.map +0 -1
  259. package/dist/list/schemas.js +0 -8
  260. package/dist/list/schemas.js.map +0 -1
  261. package/dist/list/stream.d.ts +0 -3
  262. package/dist/list/stream.d.ts.map +0 -1
  263. package/dist/list/stream.js +0 -33
  264. package/dist/list/stream.js.map +0 -1
  265. package/dist/list/types.d.ts +0 -11
  266. package/dist/list/types.d.ts.map +0 -1
  267. package/dist/list/types.js +0 -2
  268. package/dist/list/types.js.map +0 -1
  269. package/dist/list/validation.d.ts +0 -3
  270. package/dist/list/validation.d.ts.map +0 -1
  271. package/dist/list/validation.js +0 -12
  272. package/dist/list/validation.js.map +0 -1
  273. package/dist/providers/config.d.ts +0 -4
  274. package/dist/providers/config.d.ts.map +0 -1
  275. package/dist/providers/config.js +0 -21
  276. package/dist/providers/config.js.map +0 -1
  277. package/dist/providers/config.test.d.ts +0 -2
  278. package/dist/providers/config.test.d.ts.map +0 -1
  279. package/dist/providers/config.test.js +0 -37
  280. package/dist/providers/config.test.js.map +0 -1
  281. package/dist/proxy/constants.d.ts +0 -4
  282. package/dist/proxy/constants.d.ts.map +0 -1
  283. package/dist/proxy/constants.js +0 -5
  284. package/dist/proxy/constants.js.map +0 -1
  285. package/dist/proxy/create-function.d.ts +0 -4
  286. package/dist/proxy/create-function.d.ts.map +0 -1
  287. package/dist/proxy/create-function.js +0 -24
  288. package/dist/proxy/create-function.js.map +0 -1
  289. package/dist/proxy/create-proxy.d.ts +0 -2
  290. package/dist/proxy/create-proxy.d.ts.map +0 -1
  291. package/dist/proxy/create-proxy.js +0 -11
  292. package/dist/proxy/create-proxy.js.map +0 -1
  293. package/dist/proxy/function-generator.d.ts +0 -9
  294. package/dist/proxy/function-generator.d.ts.map +0 -1
  295. package/dist/proxy/function-generator.js +0 -29
  296. package/dist/proxy/function-generator.js.map +0 -1
  297. package/dist/proxy/index.d.ts +0 -4
  298. package/dist/proxy/index.d.ts.map +0 -1
  299. package/dist/proxy/index.js +0 -4
  300. package/dist/proxy/index.js.map +0 -1
  301. package/dist/proxy/prompt.d.ts +0 -2
  302. package/dist/proxy/prompt.d.ts.map +0 -1
  303. package/dist/proxy/prompt.js +0 -6
  304. package/dist/proxy/prompt.js.map +0 -1
  305. package/dist/proxy/types.d.ts +0 -7
  306. package/dist/proxy/types.d.ts.map +0 -1
  307. package/dist/proxy/types.js +0 -2
  308. package/dist/proxy/types.js.map +0 -1
  309. package/dist/queue/manager.d.ts +0 -5
  310. package/dist/queue/manager.d.ts.map +0 -1
  311. package/dist/queue/manager.js +0 -37
  312. package/dist/queue/manager.js.map +0 -1
  313. package/dist/queue/manager.test.d.ts +0 -2
  314. package/dist/queue/manager.test.d.ts.map +0 -1
  315. package/dist/queue/manager.test.js +0 -52
  316. package/dist/queue/manager.test.js.map +0 -1
  317. package/dist/schema-converter.d.ts +0 -4
  318. package/dist/schema-converter.d.ts.map +0 -1
  319. package/dist/schema-converter.js +0 -30
  320. package/dist/schema-converter.js.map +0 -1
  321. package/dist/stream/index.d.ts +0 -7
  322. package/dist/stream/index.d.ts.map +0 -1
  323. package/dist/stream/index.js +0 -23
  324. package/dist/stream/index.js.map +0 -1
  325. package/dist/streaming/utils.d.ts +0 -4
  326. package/dist/streaming/utils.d.ts.map +0 -1
  327. package/dist/streaming/utils.js +0 -131
  328. package/dist/streaming/utils.js.map +0 -1
  329. package/dist/streaming/utils.test.d.ts +0 -2
  330. package/dist/streaming/utils.test.d.ts.map +0 -1
  331. package/dist/streaming/utils.test.js +0 -84
  332. package/dist/streaming/utils.test.js.map +0 -1
  333. package/dist/templates/result.d.ts +0 -7
  334. package/dist/templates/result.d.ts.map +0 -1
  335. package/dist/templates/result.js +0 -40
  336. package/dist/templates/result.js.map +0 -1
  337. package/dist/templates/result.test.d.ts +0 -2
  338. package/dist/templates/result.test.d.ts.map +0 -1
  339. package/dist/templates/result.test.js +0 -75
  340. package/dist/templates/result.test.js.map +0 -1
  341. package/dist/test/setup.d.ts +0 -2
  342. package/dist/test/setup.d.ts.map +0 -1
  343. package/dist/test/setup.js +0 -21
  344. package/dist/test/setup.js.map +0 -1
  345. package/dist/test-types.d.ts +0 -13
  346. package/dist/test-types.d.ts.map +0 -1
  347. package/dist/test-types.js +0 -55
  348. package/dist/test-types.js.map +0 -1
  349. package/dist/types/index.d.ts +0 -4
  350. package/dist/types/index.d.ts.map +0 -1
  351. package/dist/types/index.js +0 -4
  352. package/dist/types/index.js.map +0 -1
  353. package/dist/types/list.d.ts +0 -10
  354. package/dist/types/list.d.ts.map +0 -1
  355. package/dist/types/list.js +0 -2
  356. package/dist/types/list.js.map +0 -1
  357. package/dist/types/model.d.ts +0 -7
  358. package/dist/types/model.d.ts.map +0 -1
  359. package/dist/types/model.js +0 -2
  360. package/dist/types/model.js.map +0 -1
  361. package/dist/types/options.d.ts +0 -25
  362. package/dist/types/options.d.ts.map +0 -1
  363. package/dist/types/options.js +0 -2
  364. package/dist/types/options.js.map +0 -1
  365. package/dist/types/schema.d.ts +0 -5
  366. package/dist/types/schema.d.ts.map +0 -1
  367. package/dist/types/schema.js +0 -2
  368. package/dist/types/schema.js.map +0 -1
  369. package/dist/utils/__tests__/request-handler.test.d.ts +0 -2
  370. package/dist/utils/__tests__/request-handler.test.d.ts.map +0 -1
  371. package/dist/utils/__tests__/request-handler.test.js +0 -134
  372. package/dist/utils/__tests__/request-handler.test.js.map +0 -1
  373. package/dist/utils/__tests__/schema.test.d.ts +0 -2
  374. package/dist/utils/__tests__/schema.test.d.ts.map +0 -1
  375. package/dist/utils/__tests__/schema.test.js +0 -49
  376. package/dist/utils/__tests__/schema.test.js.map +0 -1
  377. package/dist/utils/__tests__/stream-progress.test.d.ts +0 -2
  378. package/dist/utils/__tests__/stream-progress.test.d.ts.map +0 -1
  379. package/dist/utils/__tests__/stream-progress.test.js +0 -85
  380. package/dist/utils/__tests__/stream-progress.test.js.map +0 -1
  381. package/dist/utils/index.d.ts +0 -2
  382. package/dist/utils/index.d.ts.map +0 -1
  383. package/dist/utils/index.js +0 -2
  384. package/dist/utils/index.js.map +0 -1
  385. package/dist/utils/request-handler.d.ts +0 -17
  386. package/dist/utils/request-handler.d.ts.map +0 -1
  387. package/dist/utils/request-handler.js +0 -105
  388. package/dist/utils/request-handler.js.map +0 -1
  389. package/dist/utils/schema.d.ts +0 -11
  390. package/dist/utils/schema.d.ts.map +0 -1
  391. package/dist/utils/schema.js +0 -51
  392. package/dist/utils/schema.js.map +0 -1
  393. package/dist/utils/stream-progress.d.ts +0 -17
  394. package/dist/utils/stream-progress.d.ts.map +0 -1
  395. package/dist/utils/stream-progress.js +0 -86
  396. package/dist/utils/stream-progress.js.map +0 -1
  397. package/dist/utils/validation.d.ts +0 -3
  398. package/dist/utils/validation.d.ts.map +0 -1
  399. package/dist/utils/validation.js +0 -30
  400. package/dist/utils/validation.js.map +0 -1
@@ -0,0 +1,158 @@
1
+ /**
2
+ * Model Registry for AI Functions Eval Suite
3
+ *
4
+ * Simple model list for running evals across providers.
5
+ * Uses ai-providers/language-models for resolution and pricing.
6
+ *
7
+ * @packageDocumentation
8
+ */
9
+
10
+ import { resolve, get, list, type ModelInfo } from 'language-models'
11
+
12
+ export type ModelTier = 'best' | 'fast' | 'cheap'
13
+
14
+ export interface EvalModel {
15
+ /** Alias or full model ID */
16
+ id: string
17
+ /** Human-readable name */
18
+ name: string
19
+ /** Provider slug */
20
+ provider: string
21
+ /** Capability tier */
22
+ tier: ModelTier
23
+ /** Optional notes */
24
+ notes?: string
25
+ }
26
+
27
+ // ============================================================================
28
+ // Models to evaluate - using aliases from language-models
29
+ // ============================================================================
30
+
31
+ /**
32
+ * Core models to test - one per tier per major provider
33
+ * These resolve via ai-providers to OpenRouter or direct SDKs
34
+ *
35
+ * Updated: December 2025
36
+ *
37
+ * Note: Some models use OpenRouter format (provider/model) to avoid
38
+ * resolution issues with provider_model_id mismatches.
39
+ */
40
+ export const EVAL_MODELS: EvalModel[] = [
41
+ // Anthropic Claude 4.5 - via AWS Bedrock (uses AWS credits with bearer token auth)
42
+ // All Claude models should be 4.5 - older versions are deprecated
43
+ { id: 'bedrock:us.anthropic.claude-opus-4-5-20251101-v1:0', name: 'Claude Opus 4.5', provider: 'anthropic', tier: 'best', notes: 'Bedrock' },
44
+ { id: 'bedrock:us.anthropic.claude-sonnet-4-5-20250929-v1:0', name: 'Claude Sonnet 4.5', provider: 'anthropic', tier: 'fast', notes: 'Bedrock' },
45
+ { id: 'bedrock:us.anthropic.claude-haiku-4-5-20251001-v1:0', name: 'Claude Haiku 4.5', provider: 'anthropic', tier: 'cheap', notes: 'Bedrock' },
46
+
47
+ // OpenAI - GPT-5.1 variants + GPT-oss (open source)
48
+ { id: 'openai/o3', name: 'o3', provider: 'openai', tier: 'best' },
49
+ { id: 'openai/gpt-5.1', name: 'GPT-5.1', provider: 'openai', tier: 'best' },
50
+ { id: 'openai/gpt-5-mini', name: 'GPT-5 Mini', provider: 'openai', tier: 'fast' },
51
+ { id: 'openai/gpt-5-nano', name: 'GPT-5 Nano', provider: 'openai', tier: 'cheap' },
52
+ // GPT-oss 120B removed - times out frequently
53
+ { id: 'openai/gpt-oss-20b', name: 'GPT-oss 20B', provider: 'openai', tier: 'fast', notes: 'Open source' },
54
+
55
+ // Google - Gemini 3 (November 2025)
56
+ { id: 'google/gemini-3-pro-preview', name: 'Gemini 3 Pro', provider: 'google', tier: 'best', notes: '1M context, #1 LMArena' },
57
+ // Gemini 2.5 Pro removed - times out frequently
58
+ { id: 'flash', name: 'Gemini 2.5 Flash', provider: 'google', tier: 'fast' },
59
+
60
+ // Meta (via OpenRouter)
61
+ { id: 'meta-llama/llama-4-maverick', name: 'Llama 4 Maverick', provider: 'meta-llama', tier: 'best' },
62
+ { id: 'meta-llama/llama-3.3-70b-instruct', name: 'Llama 3.3 70B', provider: 'meta-llama', tier: 'fast' },
63
+
64
+ // DeepSeek - V3.2 (December 2025)
65
+ { id: 'deepseek/deepseek-v3.2', name: 'DeepSeek V3.2', provider: 'deepseek', tier: 'best', notes: 'GPT-5 class reasoning' },
66
+ // DeepSeek V3.2 Speciale removed - no tool use support on OpenRouter
67
+ { id: 'deepseek/deepseek-chat', name: 'DeepSeek Chat', provider: 'deepseek', tier: 'fast' },
68
+
69
+ // Mistral - Mistral 3 family (December 2025)
70
+ { id: 'mistralai/mistral-large-2512', name: 'Mistral Large 3', provider: 'mistralai', tier: 'best', notes: '675B MoE, 41B active' },
71
+ { id: 'mistralai/mistral-medium-3.1', name: 'Mistral Medium 3.1', provider: 'mistralai', tier: 'fast' },
72
+ // Ministral 3 14B removed - often fails structured output
73
+
74
+ // Qwen - Qwen3 family (2025)
75
+ { id: 'qwen/qwen3-coder', name: 'Qwen3 Coder 480B', provider: 'qwen', tier: 'best', notes: 'Agentic coding' },
76
+ { id: 'qwen/qwen3-30b-a3b', name: 'Qwen3 30B', provider: 'qwen', tier: 'fast', notes: 'MoE 30B/3B active' },
77
+ { id: 'qwen/qwen3-next-80b-a3b-instruct', name: 'Qwen3 Next 80B', provider: 'qwen', tier: 'best', notes: 'Ultra-long context' },
78
+
79
+ // xAI - Grok 4 family (December 2025)
80
+ { id: 'x-ai/grok-4', name: 'Grok 4', provider: 'x-ai', tier: 'best', notes: '256K context, reasoning' },
81
+ { id: 'x-ai/grok-4.1-fast', name: 'Grok 4.1 Fast', provider: 'x-ai', tier: 'fast', notes: '2M context, agentic' },
82
+ { id: 'x-ai/grok-4-fast', name: 'Grok 4 Fast', provider: 'x-ai', tier: 'fast', notes: '2M context' },
83
+ ]
84
+
85
+ /**
86
+ * Get models by tier
87
+ */
88
+ export function getModelsByTier(tier: ModelTier): EvalModel[] {
89
+ return EVAL_MODELS.filter(m => m.tier === tier)
90
+ }
91
+
92
+ /**
93
+ * Get models by provider
94
+ */
95
+ export function getModelsByProvider(provider: string): EvalModel[] {
96
+ return EVAL_MODELS.filter(m => m.provider === provider)
97
+ }
98
+
99
+ /**
100
+ * Get model info from language-models package (includes pricing)
101
+ */
102
+ export function getModelInfo(id: string): ModelInfo | undefined {
103
+ const resolved = resolve(id)
104
+ return get(resolved)
105
+ }
106
+
107
+ /**
108
+ * Get pricing for a model (from OpenRouter data)
109
+ */
110
+ export function getModelPricing(id: string): { prompt: number; completion: number } | undefined {
111
+ const info = getModelInfo(id)
112
+ if (!info?.pricing) return undefined
113
+
114
+ return {
115
+ prompt: parseFloat(info.pricing.prompt) * 1_000_000, // Convert to per-million
116
+ completion: parseFloat(info.pricing.completion) * 1_000_000,
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Create evalite variants for model testing
122
+ */
123
+ export function createModelVariants(opts?: {
124
+ tiers?: ModelTier[]
125
+ providers?: string[]
126
+ }): Array<{ name: string; input: EvalModel }> {
127
+ let models = EVAL_MODELS
128
+
129
+ if (opts?.tiers) {
130
+ models = models.filter(m => opts.tiers!.includes(m.tier))
131
+ }
132
+
133
+ if (opts?.providers) {
134
+ models = models.filter(m => opts.providers!.includes(m.provider))
135
+ }
136
+
137
+ return models.map(model => ({
138
+ name: `${model.provider}/${model.name}`,
139
+ input: model,
140
+ }))
141
+ }
142
+
143
+ /**
144
+ * Get a representative model from each provider for a given tier
145
+ */
146
+ export function getRepresentativeModels(tier: ModelTier): EvalModel[] {
147
+ const seen = new Set<string>()
148
+ const result: EvalModel[] = []
149
+
150
+ for (const model of EVAL_MODELS) {
151
+ if (model.tier === tier && !seen.has(model.provider)) {
152
+ seen.add(model.provider)
153
+ result.push(model)
154
+ }
155
+ }
156
+
157
+ return result
158
+ }
@@ -0,0 +1,217 @@
1
+ /**
2
+ * Simple eval runner for AI Functions
3
+ *
4
+ * Runs evals across multiple models and collects results.
5
+ * Does not depend on evalite - uses our own infrastructure.
6
+ */
7
+
8
+ import { generateObject, generateText } from '../generate.js'
9
+ import { schema } from '../schema.js'
10
+ import { createModelVariants, getModelPricing, type EvalModel, type ModelTier } from './models.js'
11
+
12
+ export interface EvalCase<TInput = unknown, TExpected = unknown> {
13
+ name: string
14
+ input: TInput
15
+ expected?: TExpected
16
+ }
17
+
18
+ export interface EvalScore {
19
+ name: string
20
+ score: number
21
+ description?: string
22
+ metadata?: unknown
23
+ }
24
+
25
+ export interface EvalResult<TOutput = unknown> {
26
+ model: EvalModel
27
+ case: EvalCase
28
+ output: TOutput
29
+ scores: EvalScore[]
30
+ latencyMs: number
31
+ cost: number
32
+ error?: string
33
+ }
34
+
35
+ export interface EvalSummary {
36
+ name: string
37
+ results: EvalResult[]
38
+ avgScore: number
39
+ byModel: Record<string, { avgScore: number; count: number }>
40
+ totalCost: number
41
+ totalTime: number
42
+ }
43
+
44
+ export interface RunEvalOptions<TInput, TOutput, TExpected> {
45
+ name: string
46
+ cases: EvalCase<TInput, TExpected>[]
47
+ task: (input: TInput, model: EvalModel) => Promise<TOutput>
48
+ scorers: Array<{
49
+ name: string
50
+ description?: string
51
+ scorer: (args: { input: TInput; output: TOutput; expected?: TExpected }) => number | Promise<number>
52
+ }>
53
+ models?: EvalModel[]
54
+ tiers?: ModelTier[]
55
+ providers?: string[]
56
+ concurrency?: number
57
+ }
58
+
59
+ /**
60
+ * Run an eval suite across models
61
+ */
62
+ export async function runEval<TInput, TOutput, TExpected>(
63
+ options: RunEvalOptions<TInput, TOutput, TExpected>
64
+ ): Promise<EvalSummary> {
65
+ const { name, cases, task, scorers, concurrency = 3 } = options
66
+
67
+ // Get models to test
68
+ const models = options.models ?? createModelVariants({
69
+ tiers: options.tiers,
70
+ providers: options.providers,
71
+ }).map(v => v.input)
72
+
73
+ const results: EvalResult<TOutput>[] = []
74
+ const startTime = Date.now()
75
+
76
+ console.log(`\n🧪 Running eval: ${name}`)
77
+ console.log(` Models: ${models.map(m => m.name).join(', ')}`)
78
+ console.log(` Cases: ${cases.length}`)
79
+ console.log('')
80
+
81
+ // Run all model/case combinations
82
+ const jobs: Array<{ model: EvalModel; case: EvalCase<TInput, TExpected> }> = []
83
+ for (const model of models) {
84
+ for (const evalCase of cases) {
85
+ jobs.push({ model, case: evalCase })
86
+ }
87
+ }
88
+
89
+ // Process in batches with concurrency limit
90
+ for (let i = 0; i < jobs.length; i += concurrency) {
91
+ const batch = jobs.slice(i, i + concurrency)
92
+
93
+ const batchResults = await Promise.all(
94
+ batch.map(async (job) => {
95
+ const caseStart = Date.now()
96
+
97
+ try {
98
+ // Run the task
99
+ const output = await task(job.case.input, job.model)
100
+ const latencyMs = Date.now() - caseStart
101
+
102
+ // Run scorers
103
+ const scores: EvalScore[] = []
104
+ for (const s of scorers) {
105
+ try {
106
+ const score = await s.scorer({
107
+ input: job.case.input,
108
+ output,
109
+ expected: job.case.expected,
110
+ })
111
+ scores.push({
112
+ name: s.name,
113
+ score: Math.max(0, Math.min(1, score)),
114
+ description: s.description,
115
+ })
116
+ } catch (err) {
117
+ scores.push({
118
+ name: s.name,
119
+ score: 0,
120
+ description: s.description,
121
+ metadata: { error: String(err) },
122
+ })
123
+ }
124
+ }
125
+
126
+ // Calculate cost
127
+ const pricing = getModelPricing(job.model.id)
128
+ // Estimate tokens - rough approximation
129
+ const estimatedPromptTokens = 100
130
+ const estimatedCompletionTokens = 200
131
+ const cost = pricing
132
+ ? (estimatedPromptTokens * pricing.prompt + estimatedCompletionTokens * pricing.completion) / 1_000_000
133
+ : 0
134
+
135
+ const avgScore = scores.length > 0
136
+ ? scores.reduce((sum, s) => sum + s.score, 0) / scores.length
137
+ : 0
138
+
139
+ const symbol = avgScore >= 0.8 ? '✓' : avgScore >= 0.5 ? '~' : '✗'
140
+ console.log(` ${symbol} ${job.model.name} | ${job.case.name} | ${(avgScore * 100).toFixed(0)}% | ${latencyMs}ms`)
141
+
142
+ return {
143
+ model: job.model,
144
+ case: job.case,
145
+ output,
146
+ scores,
147
+ latencyMs,
148
+ cost,
149
+ }
150
+ } catch (err) {
151
+ console.log(` ✗ ${job.model.name} | ${job.case.name} | ERROR: ${err}`)
152
+
153
+ return {
154
+ model: job.model,
155
+ case: job.case,
156
+ output: null as unknown as TOutput,
157
+ scores: scorers.map(s => ({ name: s.name, score: 0 })),
158
+ latencyMs: Date.now() - caseStart,
159
+ cost: 0,
160
+ error: String(err),
161
+ }
162
+ }
163
+ })
164
+ )
165
+
166
+ results.push(...batchResults)
167
+ }
168
+
169
+ // Calculate summary
170
+ const totalTime = Date.now() - startTime
171
+ const totalCost = results.reduce((sum, r) => sum + r.cost, 0)
172
+ const allScores = results.flatMap(r => r.scores.map(s => s.score))
173
+ const avgScore = allScores.length > 0
174
+ ? allScores.reduce((a, b) => a + b, 0) / allScores.length
175
+ : 0
176
+
177
+ // Group by model
178
+ const byModel: Record<string, { avgScore: number; count: number }> = {}
179
+ for (const result of results) {
180
+ const modelKey = result.model.id
181
+ if (!byModel[modelKey]) {
182
+ byModel[modelKey] = { avgScore: 0, count: 0 }
183
+ }
184
+ const resultAvg = result.scores.reduce((sum, s) => sum + s.score, 0) / result.scores.length
185
+ byModel[modelKey].avgScore += resultAvg
186
+ byModel[modelKey].count++
187
+ }
188
+ for (const key of Object.keys(byModel)) {
189
+ const entry = byModel[key]
190
+ if (entry) {
191
+ entry.avgScore /= entry.count
192
+ }
193
+ }
194
+
195
+ console.log('')
196
+ console.log(`📊 Results:`)
197
+ console.log(` Overall: ${(avgScore * 100).toFixed(1)}%`)
198
+ console.log(` Time: ${(totalTime / 1000).toFixed(1)}s`)
199
+ console.log(` Cost: $${totalCost.toFixed(4)}`)
200
+ console.log('')
201
+ console.log(' By Model:')
202
+ for (const [modelId, stats] of Object.entries(byModel)) {
203
+ console.log(` - ${modelId}: ${(stats.avgScore * 100).toFixed(1)}%`)
204
+ }
205
+
206
+ return {
207
+ name,
208
+ results,
209
+ avgScore,
210
+ byModel,
211
+ totalCost,
212
+ totalTime,
213
+ }
214
+ }
215
+
216
+ // Re-export helpers
217
+ export { generateObject, generateText, schema }
@@ -0,0 +1,245 @@
1
+ /**
2
+ * AI Generation functions with automatic model resolution and routing
3
+ *
4
+ * Wraps AI SDK generateObject and generateText with smart model routing:
5
+ * - Simple aliases: 'opus', 'sonnet', 'gpt-4o'
6
+ * - Full IDs: 'anthropic/claude-sonnet-4.5'
7
+ * - Auto-routes to native SDKs for openai/anthropic/google
8
+ *
9
+ * @packageDocumentation
10
+ */
11
+
12
+ import {
13
+ generateObject as sdkGenerateObject,
14
+ generateText as sdkGenerateText,
15
+ streamObject as sdkStreamObject,
16
+ streamText as sdkStreamText,
17
+ type GenerateObjectResult,
18
+ type GenerateTextResult,
19
+ type StreamObjectResult,
20
+ type StreamTextResult,
21
+ type LanguageModel
22
+ } from 'ai'
23
+ import { schema as convertSchema, type SimpleSchema } from './schema.js'
24
+ import type { ZodTypeAny } from 'zod'
25
+
26
+ type ModelArg = string | LanguageModel
27
+ type SchemaArg = ZodTypeAny | SimpleSchema
28
+
29
+ interface GenerateObjectOptions<T> {
30
+ model: ModelArg
31
+ schema: T
32
+ prompt?: string
33
+ messages?: Array<{ role: 'user' | 'assistant' | 'system'; content: string }>
34
+ system?: string
35
+ mode?: 'auto' | 'json' | 'tool'
36
+ maxTokens?: number
37
+ temperature?: number
38
+ topP?: number
39
+ topK?: number
40
+ presencePenalty?: number
41
+ frequencyPenalty?: number
42
+ seed?: number
43
+ maxRetries?: number
44
+ abortSignal?: AbortSignal
45
+ headers?: Record<string, string>
46
+ experimental_telemetry?: { isEnabled?: boolean; functionId?: string; metadata?: Record<string, string> }
47
+ }
48
+
49
+ interface GenerateTextOptions {
50
+ model: ModelArg
51
+ prompt?: string
52
+ messages?: Array<{ role: 'user' | 'assistant' | 'system'; content: string }>
53
+ system?: string
54
+ maxTokens?: number
55
+ temperature?: number
56
+ topP?: number
57
+ topK?: number
58
+ presencePenalty?: number
59
+ frequencyPenalty?: number
60
+ seed?: number
61
+ maxRetries?: number
62
+ abortSignal?: AbortSignal
63
+ headers?: Record<string, string>
64
+ tools?: Record<string, unknown>
65
+ toolChoice?: 'auto' | 'none' | 'required' | { type: 'tool'; toolName: string }
66
+ maxSteps?: number
67
+ experimental_telemetry?: { isEnabled?: boolean; functionId?: string; metadata?: Record<string, string> }
68
+ }
69
+
70
+ /**
71
+ * Resolve model string to LanguageModel instance
72
+ * Uses ai-providers for model routing with Cloudflare AI Gateway support
73
+ */
74
+ async function resolveModel(modelArg: ModelArg): Promise<LanguageModel> {
75
+ // Already a LanguageModel instance
76
+ if (typeof modelArg !== 'string') {
77
+ return modelArg
78
+ }
79
+
80
+ // Use ai-providers for model resolution
81
+ const { model } = await import('ai-providers')
82
+ return model(modelArg)
83
+ }
84
+
85
+ /**
86
+ * Check if value is a Zod schema
87
+ */
88
+ function isZodSchema(value: unknown): value is ZodTypeAny {
89
+ return value !== null &&
90
+ typeof value === 'object' &&
91
+ '_def' in value &&
92
+ 'parse' in value
93
+ }
94
+
95
+ /**
96
+ * Convert schema to Zod if needed
97
+ */
98
+ function resolveSchema(schemaArg: SchemaArg): ZodTypeAny {
99
+ if (isZodSchema(schemaArg)) {
100
+ return schemaArg
101
+ }
102
+ return convertSchema(schemaArg as SimpleSchema)
103
+ }
104
+
105
+ /**
106
+ * Generate a typed object from a prompt using AI
107
+ *
108
+ * Automatically resolves model aliases and routes to the best provider.
109
+ * Supports both Zod schemas and simplified schema syntax.
110
+ *
111
+ * @example
112
+ * ```ts
113
+ * import { generateObject } from 'ai-functions'
114
+ *
115
+ * // Simplified schema syntax
116
+ * const { object } = await generateObject({
117
+ * model: 'sonnet',
118
+ * schema: {
119
+ * recipe: {
120
+ * name: 'What is the recipe name?',
121
+ * type: 'food | drink | dessert',
122
+ * ingredients: ['List all ingredients'],
123
+ * steps: ['List all cooking steps'],
124
+ * },
125
+ * },
126
+ * prompt: 'Generate a lasagna recipe.',
127
+ * })
128
+ *
129
+ * // Zod schema also works
130
+ * import { z } from 'zod'
131
+ * const { object } = await generateObject({
132
+ * model: 'sonnet',
133
+ * schema: z.object({
134
+ * name: z.string(),
135
+ * ingredients: z.array(z.string()),
136
+ * }),
137
+ * prompt: 'Generate a lasagna recipe.',
138
+ * })
139
+ * ```
140
+ */
141
+ export async function generateObject<T>(
142
+ options: GenerateObjectOptions<T>
143
+ ): Promise<GenerateObjectResult<T>> {
144
+ const model = await resolveModel(options.model)
145
+ const schema = resolveSchema(options.schema as SchemaArg)
146
+ // Use 'as any' to handle AI SDK v4 API variance
147
+ return sdkGenerateObject({
148
+ ...options,
149
+ model,
150
+ schema,
151
+ output: 'object'
152
+ } as any) as Promise<GenerateObjectResult<T>>
153
+ }
154
+
155
+ /**
156
+ * Generate text from a prompt using AI
157
+ *
158
+ * Automatically resolves model aliases and routes to the best provider.
159
+ *
160
+ * @example
161
+ * ```ts
162
+ * import { generateText } from 'ai-functions'
163
+ *
164
+ * const { text } = await generateText({
165
+ * model: 'opus', // → anthropic/claude-opus-4.5
166
+ * prompt: 'Write a haiku about programming.',
167
+ * })
168
+ *
169
+ * // With tools
170
+ * const { text, toolResults } = await generateText({
171
+ * model: 'gpt-4o', // → openai/gpt-4o
172
+ * prompt: 'What is the weather in San Francisco?',
173
+ * tools: { ... },
174
+ * maxSteps: 5,
175
+ * })
176
+ * ```
177
+ */
178
+ export async function generateText(
179
+ options: GenerateTextOptions
180
+ ): Promise<Awaited<ReturnType<typeof sdkGenerateText>>> {
181
+ const model = await resolveModel(options.model)
182
+ return sdkGenerateText({
183
+ ...options,
184
+ model
185
+ } as Parameters<typeof sdkGenerateText>[0])
186
+ }
187
+
188
+ /**
189
+ * Stream a typed object from a prompt using AI
190
+ *
191
+ * @example
192
+ * ```ts
193
+ * import { streamObject } from 'ai-functions'
194
+ *
195
+ * const { partialObjectStream } = streamObject({
196
+ * model: 'sonnet',
197
+ * schema: { story: 'Write a creative story' },
198
+ * prompt: 'Write a short story.',
199
+ * })
200
+ *
201
+ * for await (const partial of partialObjectStream) {
202
+ * console.log(partial.story)
203
+ * }
204
+ * ```
205
+ */
206
+ export async function streamObject<T>(
207
+ options: GenerateObjectOptions<T>
208
+ ): Promise<StreamObjectResult<T, T, never>> {
209
+ const model = await resolveModel(options.model)
210
+ const schema = resolveSchema(options.schema as SchemaArg)
211
+ // Use 'as any' to handle AI SDK API variance
212
+ return sdkStreamObject({
213
+ ...options,
214
+ model,
215
+ schema,
216
+ output: 'object'
217
+ } as any) as unknown as StreamObjectResult<T, T, never>
218
+ }
219
+
220
+ /**
221
+ * Stream text from a prompt using AI
222
+ *
223
+ * @example
224
+ * ```ts
225
+ * import { streamText } from 'ai-functions'
226
+ *
227
+ * const { textStream } = streamText({
228
+ * model: 'gemini', // → google/gemini-2.5-flash
229
+ * prompt: 'Explain quantum computing.',
230
+ * })
231
+ *
232
+ * for await (const chunk of textStream) {
233
+ * process.stdout.write(chunk)
234
+ * }
235
+ * ```
236
+ */
237
+ export async function streamText(
238
+ options: GenerateTextOptions
239
+ ): Promise<ReturnType<typeof sdkStreamText>> {
240
+ const model = await resolveModel(options.model)
241
+ return sdkStreamText({
242
+ ...options,
243
+ model
244
+ } as Parameters<typeof sdkStreamText>[0])
245
+ }