ai-functions 2.1.3 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (277) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +55 -1
  3. package/README.md +38 -0
  4. package/dist/ai-promise.d.ts +3 -3
  5. package/dist/ai-promise.d.ts.map +1 -1
  6. package/dist/ai-promise.js +135 -64
  7. package/dist/ai-promise.js.map +1 -1
  8. package/dist/ai-schemas.d.ts +56 -0
  9. package/dist/ai-schemas.d.ts.map +1 -0
  10. package/dist/ai-schemas.js +53 -0
  11. package/dist/ai-schemas.js.map +1 -0
  12. package/dist/ai.d.ts +16 -242
  13. package/dist/ai.d.ts.map +1 -1
  14. package/dist/ai.js +51 -858
  15. package/dist/ai.js.map +1 -1
  16. package/dist/batch/anthropic.d.ts +6 -4
  17. package/dist/batch/anthropic.d.ts.map +1 -1
  18. package/dist/batch/anthropic.js +83 -145
  19. package/dist/batch/anthropic.js.map +1 -1
  20. package/dist/batch/bedrock.d.ts +8 -30
  21. package/dist/batch/bedrock.d.ts.map +1 -1
  22. package/dist/batch/bedrock.js +155 -338
  23. package/dist/batch/bedrock.js.map +1 -1
  24. package/dist/batch/cloudflare.d.ts +8 -20
  25. package/dist/batch/cloudflare.d.ts.map +1 -1
  26. package/dist/batch/cloudflare.js +68 -189
  27. package/dist/batch/cloudflare.js.map +1 -1
  28. package/dist/batch/google.d.ts +6 -20
  29. package/dist/batch/google.d.ts.map +1 -1
  30. package/dist/batch/google.js +70 -238
  31. package/dist/batch/google.js.map +1 -1
  32. package/dist/batch/index.d.ts +4 -1
  33. package/dist/batch/index.d.ts.map +1 -1
  34. package/dist/batch/index.js +4 -1
  35. package/dist/batch/index.js.map +1 -1
  36. package/dist/batch/memory.d.ts +1 -1
  37. package/dist/batch/memory.d.ts.map +1 -1
  38. package/dist/batch/memory.js +14 -10
  39. package/dist/batch/memory.js.map +1 -1
  40. package/dist/batch/openai.d.ts +11 -14
  41. package/dist/batch/openai.d.ts.map +1 -1
  42. package/dist/batch/openai.js +52 -156
  43. package/dist/batch/openai.js.map +1 -1
  44. package/dist/batch/provider.d.ts +111 -0
  45. package/dist/batch/provider.d.ts.map +1 -0
  46. package/dist/batch/provider.js +233 -0
  47. package/dist/batch/provider.js.map +1 -0
  48. package/dist/batch-map.d.ts.map +1 -1
  49. package/dist/batch-map.js +23 -17
  50. package/dist/batch-map.js.map +1 -1
  51. package/dist/batch-queue.d.ts +65 -0
  52. package/dist/batch-queue.d.ts.map +1 -1
  53. package/dist/batch-queue.js +169 -14
  54. package/dist/batch-queue.js.map +1 -1
  55. package/dist/budget.d.ts.map +1 -1
  56. package/dist/budget.js +27 -14
  57. package/dist/budget.js.map +1 -1
  58. package/dist/cache.d.ts +23 -0
  59. package/dist/cache.d.ts.map +1 -1
  60. package/dist/cache.js +36 -15
  61. package/dist/cache.js.map +1 -1
  62. package/dist/context.d.ts +26 -8
  63. package/dist/context.d.ts.map +1 -1
  64. package/dist/context.js +64 -62
  65. package/dist/context.js.map +1 -1
  66. package/dist/digital-objects-registry.d.ts +229 -0
  67. package/dist/digital-objects-registry.d.ts.map +1 -0
  68. package/dist/digital-objects-registry.js +617 -0
  69. package/dist/digital-objects-registry.js.map +1 -0
  70. package/dist/embeddings.d.ts +2 -2
  71. package/dist/embeddings.d.ts.map +1 -1
  72. package/dist/errors.d.ts +22 -0
  73. package/dist/errors.d.ts.map +1 -0
  74. package/dist/errors.js +35 -0
  75. package/dist/errors.js.map +1 -0
  76. package/dist/eval/runner.d.ts +8 -0
  77. package/dist/eval/runner.d.ts.map +1 -1
  78. package/dist/eval/runner.js +41 -35
  79. package/dist/eval/runner.js.map +1 -1
  80. package/dist/eval-log/in-memory.d.ts +34 -0
  81. package/dist/eval-log/in-memory.d.ts.map +1 -0
  82. package/dist/eval-log/in-memory.js +84 -0
  83. package/dist/eval-log/in-memory.js.map +1 -0
  84. package/dist/eval-log/index.d.ts +29 -0
  85. package/dist/eval-log/index.d.ts.map +1 -0
  86. package/dist/eval-log/index.js +39 -0
  87. package/dist/eval-log/index.js.map +1 -0
  88. package/dist/eval-log/types.d.ts +101 -0
  89. package/dist/eval-log/types.d.ts.map +1 -0
  90. package/dist/eval-log/types.js +16 -0
  91. package/dist/eval-log/types.js.map +1 -0
  92. package/dist/function-registry.d.ts +116 -0
  93. package/dist/function-registry.d.ts.map +1 -0
  94. package/dist/function-registry.js +546 -0
  95. package/dist/function-registry.js.map +1 -0
  96. package/dist/generate.d.ts +9 -3
  97. package/dist/generate.d.ts.map +1 -1
  98. package/dist/generate.js +18 -18
  99. package/dist/generate.js.map +1 -1
  100. package/dist/index.d.ts +18 -11
  101. package/dist/index.d.ts.map +1 -1
  102. package/dist/index.js +35 -18
  103. package/dist/index.js.map +1 -1
  104. package/dist/logger.d.ts +118 -0
  105. package/dist/logger.d.ts.map +1 -0
  106. package/dist/logger.js +187 -0
  107. package/dist/logger.js.map +1 -0
  108. package/dist/middleware/budget.d.ts +84 -0
  109. package/dist/middleware/budget.d.ts.map +1 -0
  110. package/dist/middleware/budget.js +110 -0
  111. package/dist/middleware/budget.js.map +1 -0
  112. package/dist/middleware/cache.d.ts +103 -0
  113. package/dist/middleware/cache.d.ts.map +1 -0
  114. package/dist/middleware/cache.js +228 -0
  115. package/dist/middleware/cache.js.map +1 -0
  116. package/dist/middleware/embed-cache.d.ts +99 -0
  117. package/dist/middleware/embed-cache.d.ts.map +1 -0
  118. package/dist/middleware/embed-cache.js +128 -0
  119. package/dist/middleware/embed-cache.js.map +1 -0
  120. package/dist/middleware/index.d.ts +11 -0
  121. package/dist/middleware/index.d.ts.map +1 -0
  122. package/dist/middleware/index.js +11 -0
  123. package/dist/middleware/index.js.map +1 -0
  124. package/dist/middleware/trace.d.ts +103 -0
  125. package/dist/middleware/trace.d.ts.map +1 -0
  126. package/dist/middleware/trace.js +176 -0
  127. package/dist/middleware/trace.js.map +1 -0
  128. package/dist/primitives.d.ts +120 -1
  129. package/dist/primitives.d.ts.map +1 -1
  130. package/dist/primitives.js +398 -26
  131. package/dist/primitives.js.map +1 -1
  132. package/dist/retry.d.ts +66 -1
  133. package/dist/retry.d.ts.map +1 -1
  134. package/dist/retry.js +115 -8
  135. package/dist/retry.js.map +1 -1
  136. package/dist/schema.js +2 -2
  137. package/dist/schema.js.map +1 -1
  138. package/dist/telemetry.d.ts +128 -0
  139. package/dist/telemetry.d.ts.map +1 -0
  140. package/dist/telemetry.js +285 -0
  141. package/dist/telemetry.js.map +1 -0
  142. package/dist/template.d.ts.map +1 -1
  143. package/dist/template.js +6 -1
  144. package/dist/template.js.map +1 -1
  145. package/dist/tool-orchestration.d.ts +66 -4
  146. package/dist/tool-orchestration.d.ts.map +1 -1
  147. package/dist/tool-orchestration.js +123 -23
  148. package/dist/tool-orchestration.js.map +1 -1
  149. package/dist/type-guards.d.ts +28 -0
  150. package/dist/type-guards.d.ts.map +1 -0
  151. package/dist/type-guards.js +29 -0
  152. package/dist/type-guards.js.map +1 -0
  153. package/dist/types.d.ts +135 -17
  154. package/dist/types.d.ts.map +1 -1
  155. package/dist/types.js +36 -1
  156. package/dist/types.js.map +1 -1
  157. package/dist/wrap-for-v3.d.ts +80 -0
  158. package/dist/wrap-for-v3.d.ts.map +1 -0
  159. package/dist/wrap-for-v3.js +89 -0
  160. package/dist/wrap-for-v3.js.map +1 -0
  161. package/examples/00-quickstart.ts +232 -0
  162. package/examples/01-rag-chatbot.ts +212 -0
  163. package/examples/02-multi-agent-research.ts +290 -0
  164. package/examples/03-email-classification.ts +379 -0
  165. package/examples/04-content-moderation.ts +400 -0
  166. package/examples/05-document-extraction.ts +455 -0
  167. package/examples/06-streaming-chat-nextjs.ts +437 -0
  168. package/examples/07-cloudflare-worker.ts +483 -0
  169. package/examples/08-batch-processing.ts +491 -0
  170. package/examples/09-budget-constrained.ts +527 -0
  171. package/examples/10-tool-orchestration.ts +565 -0
  172. package/examples/11-retry-resilience.ts +403 -0
  173. package/examples/12-caching-strategies.ts +422 -0
  174. package/examples/README.md +145 -0
  175. package/package.json +28 -25
  176. package/src/ai-promise.ts +226 -140
  177. package/src/ai-schemas.ts +122 -0
  178. package/src/ai.ts +69 -1176
  179. package/src/batch/anthropic.ts +96 -161
  180. package/src/batch/bedrock.ts +203 -454
  181. package/src/batch/cloudflare.ts +99 -282
  182. package/src/batch/google.ts +91 -297
  183. package/src/batch/index.ts +4 -1
  184. package/src/batch/memory.ts +15 -10
  185. package/src/batch/openai.ts +65 -193
  186. package/src/batch/provider.ts +336 -0
  187. package/src/batch-map.ts +29 -24
  188. package/src/batch-queue.ts +200 -11
  189. package/src/budget.ts +31 -18
  190. package/src/cache.ts +45 -17
  191. package/src/context.ts +106 -77
  192. package/src/digital-objects-registry.ts +750 -0
  193. package/src/errors.ts +37 -0
  194. package/src/eval/runner.ts +60 -36
  195. package/src/eval-log/in-memory.ts +90 -0
  196. package/src/eval-log/index.ts +46 -0
  197. package/src/eval-log/types.ts +110 -0
  198. package/src/function-registry.ts +671 -0
  199. package/src/generate.ts +33 -28
  200. package/src/index.ts +119 -21
  201. package/src/logger.ts +232 -0
  202. package/src/middleware/budget.ts +171 -0
  203. package/src/middleware/cache.ts +299 -0
  204. package/src/middleware/embed-cache.ts +195 -0
  205. package/src/middleware/index.ts +23 -0
  206. package/src/middleware/trace.ts +248 -0
  207. package/src/primitives.ts +589 -62
  208. package/src/retry.ts +144 -18
  209. package/src/schema.ts +8 -8
  210. package/src/telemetry.ts +403 -0
  211. package/src/template.ts +8 -4
  212. package/src/tool-orchestration.ts +213 -48
  213. package/src/type-guards.ts +31 -0
  214. package/src/types.ts +164 -25
  215. package/src/wrap-for-v3.ts +105 -0
  216. package/test/ai-promise.test.ts +1080 -0
  217. package/test/ai-proxy.test.ts +1 -1
  218. package/test/batch-autosubmit-errors.test.ts +49 -37
  219. package/test/batch-blog-posts.test.ts +87 -129
  220. package/test/core-functions.test.ts +183 -579
  221. package/test/decide.test.ts +154 -322
  222. package/test/define.test.ts +211 -8
  223. package/test/digital-objects-registry.test.ts +760 -0
  224. package/test/embedding-cache-middleware.test.ts +140 -0
  225. package/test/generate-core.test.ts +140 -229
  226. package/test/implicit-batch.test.ts +22 -65
  227. package/test/retry-policy-integration.test.ts +117 -0
  228. package/test/schema.test.ts +55 -19
  229. package/test/template.test.ts +1164 -0
  230. package/test/tool-orchestration.test.ts +270 -0
  231. package/test/wrap-for-v3.test.ts +612 -0
  232. package/vitest.config.js +6 -0
  233. package/vitest.config.ts +20 -0
  234. package/LICENSE +0 -21
  235. package/dist/rpc/auth.d.ts +0 -69
  236. package/dist/rpc/auth.d.ts.map +0 -1
  237. package/dist/rpc/auth.js +0 -136
  238. package/dist/rpc/auth.js.map +0 -1
  239. package/dist/rpc/client.d.ts +0 -62
  240. package/dist/rpc/client.d.ts.map +0 -1
  241. package/dist/rpc/client.js +0 -103
  242. package/dist/rpc/client.js.map +0 -1
  243. package/dist/rpc/deferred.d.ts +0 -60
  244. package/dist/rpc/deferred.d.ts.map +0 -1
  245. package/dist/rpc/deferred.js +0 -96
  246. package/dist/rpc/deferred.js.map +0 -1
  247. package/dist/rpc/index.d.ts +0 -22
  248. package/dist/rpc/index.d.ts.map +0 -1
  249. package/dist/rpc/index.js +0 -38
  250. package/dist/rpc/index.js.map +0 -1
  251. package/dist/rpc/local.d.ts +0 -42
  252. package/dist/rpc/local.d.ts.map +0 -1
  253. package/dist/rpc/local.js +0 -50
  254. package/dist/rpc/local.js.map +0 -1
  255. package/dist/rpc/server.d.ts +0 -165
  256. package/dist/rpc/server.d.ts.map +0 -1
  257. package/dist/rpc/server.js +0 -405
  258. package/dist/rpc/server.js.map +0 -1
  259. package/dist/rpc/session.d.ts +0 -32
  260. package/dist/rpc/session.d.ts.map +0 -1
  261. package/dist/rpc/session.js +0 -43
  262. package/dist/rpc/session.js.map +0 -1
  263. package/dist/rpc/transport.d.ts +0 -306
  264. package/dist/rpc/transport.d.ts.map +0 -1
  265. package/dist/rpc/transport.js +0 -731
  266. package/dist/rpc/transport.js.map +0 -1
  267. package/src/batch/anthropic.js +0 -256
  268. package/src/batch/bedrock.js +0 -584
  269. package/src/batch/cloudflare.js +0 -287
  270. package/src/batch/google.js +0 -359
  271. package/src/batch/index.js +0 -30
  272. package/src/batch/memory.js +0 -187
  273. package/src/batch/openai.js +0 -402
  274. package/src/eval/index.js +0 -7
  275. package/src/eval/models.js +0 -119
  276. package/src/eval/runner.js +0 -147
  277. package/test/schema.test.js +0 -96
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Error classes for AI primitives
3
+ */
4
+ /**
5
+ * Error thrown when a function is not yet implemented.
6
+ *
7
+ * This is used to clearly indicate at runtime that a function exists
8
+ * in the API but does not have a working implementation yet.
9
+ *
10
+ * @example
11
+ * ```ts
12
+ * throw new NotImplementedError('human', 'Human-in-the-loop functions require channel integrations')
13
+ * ```
14
+ */
15
+ export declare class NotImplementedError extends Error {
16
+ /** The name of the function that is not implemented */
17
+ readonly functionName: string;
18
+ /** Additional details about why it's not implemented or what's needed */
19
+ readonly details?: string;
20
+ constructor(functionName: string, details?: string);
21
+ }
22
+ //# sourceMappingURL=errors.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;;;;;;;;;GAUG;AACH,qBAAa,mBAAoB,SAAQ,KAAK;IAC5C,uDAAuD;IACvD,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAA;IAE7B,yEAAyE;IACzE,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,CAAA;gBAEb,YAAY,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM;CAcnD"}
package/dist/errors.js ADDED
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Error classes for AI primitives
3
+ */
4
+ /**
5
+ * Error thrown when a function is not yet implemented.
6
+ *
7
+ * This is used to clearly indicate at runtime that a function exists
8
+ * in the API but does not have a working implementation yet.
9
+ *
10
+ * @example
11
+ * ```ts
12
+ * throw new NotImplementedError('human', 'Human-in-the-loop functions require channel integrations')
13
+ * ```
14
+ */
15
+ export class NotImplementedError extends Error {
16
+ /** The name of the function that is not implemented */
17
+ functionName;
18
+ /** Additional details about why it's not implemented or what's needed */
19
+ details;
20
+ constructor(functionName, details) {
21
+ const message = details
22
+ ? `Function '${functionName}' is not implemented: ${details}`
23
+ : `Function '${functionName}' is not implemented`;
24
+ super(message);
25
+ this.name = 'NotImplementedError';
26
+ this.functionName = functionName;
27
+ if (details !== undefined)
28
+ this.details = details;
29
+ // Maintain proper stack trace for where the error was thrown (V8 engines)
30
+ if (Error.captureStackTrace) {
31
+ Error.captureStackTrace(this, NotImplementedError);
32
+ }
33
+ }
34
+ }
35
+ //# sourceMappingURL=errors.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"errors.js","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;;;;;;;;;GAUG;AACH,MAAM,OAAO,mBAAoB,SAAQ,KAAK;IAC5C,uDAAuD;IAC9C,YAAY,CAAQ;IAE7B,yEAAyE;IAChE,OAAO,CAAS;IAEzB,YAAY,YAAoB,EAAE,OAAgB;QAChD,MAAM,OAAO,GAAG,OAAO;YACrB,CAAC,CAAC,aAAa,YAAY,yBAAyB,OAAO,EAAE;YAC7D,CAAC,CAAC,aAAa,YAAY,sBAAsB,CAAA;QACnD,KAAK,CAAC,OAAO,CAAC,CAAA;QACd,IAAI,CAAC,IAAI,GAAG,qBAAqB,CAAA;QACjC,IAAI,CAAC,YAAY,GAAG,YAAY,CAAA;QAChC,IAAI,OAAO,KAAK,SAAS;YAAE,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;QAEjD,0EAA0E;QAC1E,IAAI,KAAK,CAAC,iBAAiB,EAAE,CAAC;YAC5B,KAAK,CAAC,iBAAiB,CAAC,IAAI,EAAE,mBAAmB,CAAC,CAAA;QACpD,CAAC;IACH,CAAC;CACF"}
@@ -7,6 +7,10 @@
7
7
  import { generateObject, generateText } from '../generate.js';
8
8
  import { schema } from '../schema.js';
9
9
  import { type EvalModel, type ModelTier } from './models.js';
10
+ /**
11
+ * Output function type for eval progress reporting
12
+ */
13
+ export type EvalOutputFn = (message: string) => void;
10
14
  export interface EvalCase<TInput = unknown, TExpected = unknown> {
11
15
  name: string;
12
16
  input: TInput;
@@ -56,6 +60,10 @@ export interface RunEvalOptions<TInput, TOutput, TExpected> {
56
60
  tiers?: ModelTier[];
57
61
  providers?: string[];
58
62
  concurrency?: number;
63
+ /** Custom output function for progress reporting (defaults to logger.info) */
64
+ output?: EvalOutputFn;
65
+ /** Whether to suppress progress output (defaults to false) */
66
+ quiet?: boolean;
59
67
  }
60
68
  /**
61
69
  * Run an eval suite across models
@@ -1 +1 @@
1
- {"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAA;AAC7D,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAA;AACrC,OAAO,EAAwC,KAAK,SAAS,EAAE,KAAK,SAAS,EAAE,MAAM,aAAa,CAAA;AAElG,MAAM,WAAW,QAAQ,CAAC,MAAM,GAAG,OAAO,EAAE,SAAS,GAAG,OAAO;IAC7D,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,CAAC,EAAE,SAAS,CAAA;CACrB;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,QAAQ,CAAC,EAAE,OAAO,CAAA;CACnB;AAED,MAAM,WAAW,UAAU,CAAC,OAAO,GAAG,OAAO;IAC3C,KAAK,EAAE,SAAS,CAAA;IAChB,IAAI,EAAE,QAAQ,CAAA;IACd,mEAAmE;IACnE,MAAM,EAAE,OAAO,GAAG,IAAI,CAAA;IACtB,MAAM,EAAE,SAAS,EAAE,CAAA;IACnB,SAAS,EAAE,MAAM,CAAA;IACjB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,UAAU,EAAE,CAAA;IACrB,QAAQ,EAAE,MAAM,CAAA;IAChB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IAC5D,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,cAAc,CAAC,MAAM,EAAE,OAAO,EAAE,SAAS;IACxD,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,QAAQ,CAAC,MAAM,EAAE,SAAS,CAAC,EAAE,CAAA;IACpC,IAAI,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC,OAAO,CAAC,CAAA;IAC3D,OAAO,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,MAAM,CAAA;QACZ,WAAW,CAAC,EAAE,MAAM,CAAA;QACpB,MAAM,EAAE,CAAC,IAAI,EAAE;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,OAAO,CAAC;YAAC,QAAQ,CAAC,EAAE,SAAS,CAAA;SAAE,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;KACrG,CAAC,CAAA;IACF,MAAM,CAAC,EAAE,SAAS,EAAE,CAAA;IACpB,KAAK,CAAC,EAAE,SAAS,EAAE,CAAA;IACnB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAA;IACpB,WAAW,CAAC,EAAE,MAAM,CAAA;CACrB;AAED;;GAEG;AACH,wBAAsB,OAAO,CAAC,MAAM,EAAE,OAAO,EAAE,SAAS,EACtD,OAAO,EAAE,cAAc,CAAC,MAAM,EAAE,OAAO,EAAE,SAAS,CAAC,GAClD,OAAO,CAAC,WAAW,CAAC,CAsJtB;AAGD,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,EAAE,CAAA"}
1
+ {"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAA;AAC7D,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAA;AACrC,OAAO,EAAwC,KAAK,SAAS,EAAE,KAAK,SAAS,EAAE,MAAM,aAAa,CAAA;AAGlG;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAA;AAOpD,MAAM,WAAW,QAAQ,CAAC,MAAM,GAAG,OAAO,EAAE,SAAS,GAAG,OAAO;IAC7D,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,QAAQ,CAAC,EAAE,SAAS,CAAA;CACrB;AAED,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,QAAQ,CAAC,EAAE,OAAO,CAAA;CACnB;AAED,MAAM,WAAW,UAAU,CAAC,OAAO,GAAG,OAAO;IAC3C,KAAK,EAAE,SAAS,CAAA;IAChB,IAAI,EAAE,QAAQ,CAAA;IACd,mEAAmE;IACnE,MAAM,EAAE,OAAO,GAAG,IAAI,CAAA;IACtB,MAAM,EAAE,SAAS,EAAE,CAAA;IACnB,SAAS,EAAE,MAAM,CAAA;IACjB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,UAAU,EAAE,CAAA;IACrB,QAAQ,EAAE,MAAM,CAAA;IAChB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IAC5D,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,cAAc,CAAC,MAAM,EAAE,OAAO,EAAE,SAAS;IACxD,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,QAAQ,CAAC,MAAM,EAAE,SAAS,CAAC,EAAE,CAAA;IACpC,IAAI,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC,OAAO,CAAC,CAAA;IAC3D,OAAO,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,MAAM,CAAA;QACZ,WAAW,CAAC,EAAE,MAAM,CAAA;QACpB,MAAM,EAAE,CAAC,IAAI,EAAE;YACb,KAAK,EAAE,MAAM,CAAA;YACb,MAAM,EAAE,OAAO,CAAA;YACf,QAAQ,CAAC,EAAE,SAAS,CAAA;SACrB,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAA;KAC/B,CAAC,CAAA;IACF,MAAM,CAAC,EAAE,SAAS,EAAE,CAAA;IACpB,KAAK,CAAC,EAAE,SAAS,EAAE,CAAA;IACnB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAA;IACpB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,8EAA8E;IAC9E,MAAM,CAAC,EAAE,YAAY,CAAA;IACrB,8DAA8D;IAC9D,KAAK,CAAC,EAAE,OAAO,CAAA;CAChB;AAED;;GAEG;AACH,wBAAsB,OAAO,CAAC,MAAM,EAAE,OAAO,EAAE,SAAS,EACtD,OAAO,EAAE,cAAc,CAAC,MAAM,EAAE,OAAO,EAAE,SAAS,CAAC,GAClD,OAAO,CAAC,WAAW,CAAC,CA2JtB;AAGD,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,EAAE,CAAA"}
@@ -7,22 +7,30 @@
7
7
  import { generateObject, generateText } from '../generate.js';
8
8
  import { schema } from '../schema.js';
9
9
  import { createModelVariants, getModelPricing } from './models.js';
10
+ import { getLogger } from '../logger.js';
11
+ /**
12
+ * Default output function uses logger.info
13
+ */
14
+ const defaultOutput = (message) => getLogger().info(message);
10
15
  /**
11
16
  * Run an eval suite across models
12
17
  */
13
18
  export async function runEval(options) {
14
- const { name, cases, task, scorers, concurrency = 3 } = options;
19
+ const { name, cases, task, scorers, concurrency = 3, quiet = false } = options;
20
+ const log = quiet ? () => { } : options.output ?? defaultOutput;
15
21
  // Get models to test
16
- const models = options.models ?? createModelVariants({
17
- tiers: options.tiers,
18
- providers: options.providers,
19
- }).map(v => v.input);
22
+ const variantOptions = {};
23
+ if (options.tiers !== undefined)
24
+ variantOptions.tiers = options.tiers;
25
+ if (options.providers !== undefined)
26
+ variantOptions.providers = options.providers;
27
+ const models = options.models ?? createModelVariants(variantOptions).map((v) => v.input);
20
28
  const results = [];
21
29
  const startTime = Date.now();
22
- console.log(`\n🧪 Running eval: ${name}`);
23
- console.log(` Models: ${models.map(m => m.name).join(', ')}`);
24
- console.log(` Cases: ${cases.length}`);
25
- console.log('');
30
+ log(`\nRunning eval: ${name}`);
31
+ log(` Models: ${models.map((m) => m.name).join(', ')}`);
32
+ log(` Cases: ${cases.length}`);
33
+ log('');
26
34
  // Run all model/case combinations
27
35
  const jobs = [];
28
36
  for (const model of models) {
@@ -37,7 +45,7 @@ export async function runEval(options) {
37
45
  const caseStart = Date.now();
38
46
  try {
39
47
  // Run the task
40
- const output = await task(job.case.input, job.model);
48
+ const taskOutput = await task(job.case.input, job.model);
41
49
  const latencyMs = Date.now() - caseStart;
42
50
  // Run scorers
43
51
  const scores = [];
@@ -45,20 +53,20 @@ export async function runEval(options) {
45
53
  try {
46
54
  const score = await s.scorer({
47
55
  input: job.case.input,
48
- output,
49
- expected: job.case.expected,
56
+ output: taskOutput,
57
+ ...(job.case.expected !== undefined && { expected: job.case.expected }),
50
58
  });
51
59
  scores.push({
52
60
  name: s.name,
53
61
  score: Math.max(0, Math.min(1, score)),
54
- description: s.description,
62
+ ...(s.description && { description: s.description }),
55
63
  });
56
64
  }
57
65
  catch (err) {
58
66
  scores.push({
59
67
  name: s.name,
60
68
  score: 0,
61
- description: s.description,
69
+ ...(s.description && { description: s.description }),
62
70
  metadata: { error: String(err) },
63
71
  });
64
72
  }
@@ -69,29 +77,29 @@ export async function runEval(options) {
69
77
  const estimatedPromptTokens = 100;
70
78
  const estimatedCompletionTokens = 200;
71
79
  const cost = pricing
72
- ? (estimatedPromptTokens * pricing.prompt + estimatedCompletionTokens * pricing.completion) / 1_000_000
73
- : 0;
74
- const avgScore = scores.length > 0
75
- ? scores.reduce((sum, s) => sum + s.score, 0) / scores.length
80
+ ? (estimatedPromptTokens * pricing.prompt +
81
+ estimatedCompletionTokens * pricing.completion) /
82
+ 1_000_000
76
83
  : 0;
77
- const symbol = avgScore >= 0.8 ? '✓' : avgScore >= 0.5 ? '~' : '✗';
78
- console.log(` ${symbol} ${job.model.name} | ${job.case.name} | ${(avgScore * 100).toFixed(0)}% | ${latencyMs}ms`);
84
+ const avgScore = scores.length > 0 ? scores.reduce((sum, s) => sum + s.score, 0) / scores.length : 0;
85
+ const symbol = avgScore >= 0.8 ? 'PASS' : avgScore >= 0.5 ? 'WARN' : 'FAIL';
86
+ log(` ${symbol} ${job.model.name} | ${job.case.name} | ${(avgScore * 100).toFixed(0)}% | ${latencyMs}ms`);
79
87
  return {
80
88
  model: job.model,
81
89
  case: job.case,
82
- output,
90
+ output: taskOutput,
83
91
  scores,
84
92
  latencyMs,
85
93
  cost,
86
94
  };
87
95
  }
88
96
  catch (err) {
89
- console.log(` ${job.model.name} | ${job.case.name} | ERROR: ${err}`);
97
+ log(` FAIL ${job.model.name} | ${job.case.name} | ERROR: ${err}`);
90
98
  return {
91
99
  model: job.model,
92
100
  case: job.case,
93
101
  output: null,
94
- scores: scorers.map(s => ({ name: s.name, score: 0 })),
102
+ scores: scorers.map((s) => ({ name: s.name, score: 0 })),
95
103
  latencyMs: Date.now() - caseStart,
96
104
  cost: 0,
97
105
  error: String(err),
@@ -103,10 +111,8 @@ export async function runEval(options) {
103
111
  // Calculate summary
104
112
  const totalTime = Date.now() - startTime;
105
113
  const totalCost = results.reduce((sum, r) => sum + r.cost, 0);
106
- const allScores = results.flatMap(r => r.scores.map(s => s.score));
107
- const avgScore = allScores.length > 0
108
- ? allScores.reduce((a, b) => a + b, 0) / allScores.length
109
- : 0;
114
+ const allScores = results.flatMap((r) => r.scores.map((s) => s.score));
115
+ const avgScore = allScores.length > 0 ? allScores.reduce((a, b) => a + b, 0) / allScores.length : 0;
110
116
  // Group by model
111
117
  const byModel = {};
112
118
  for (const result of results) {
@@ -124,15 +130,15 @@ export async function runEval(options) {
124
130
  entry.avgScore /= entry.count;
125
131
  }
126
132
  }
127
- console.log('');
128
- console.log(`📊 Results:`);
129
- console.log(` Overall: ${(avgScore * 100).toFixed(1)}%`);
130
- console.log(` Time: ${(totalTime / 1000).toFixed(1)}s`);
131
- console.log(` Cost: $${totalCost.toFixed(4)}`);
132
- console.log('');
133
- console.log(' By Model:');
133
+ log('');
134
+ log(`Results:`);
135
+ log(` Overall: ${(avgScore * 100).toFixed(1)}%`);
136
+ log(` Time: ${(totalTime / 1000).toFixed(1)}s`);
137
+ log(` Cost: $${totalCost.toFixed(4)}`);
138
+ log('');
139
+ log(' By Model:');
134
140
  for (const [modelId, stats] of Object.entries(byModel)) {
135
- console.log(` - ${modelId}: ${(stats.avgScore * 100).toFixed(1)}%`);
141
+ log(` - ${modelId}: ${(stats.avgScore * 100).toFixed(1)}%`);
136
142
  }
137
143
  return {
138
144
  name,
@@ -1 +1 @@
1
- {"version":3,"file":"runner.js","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAA;AAC7D,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAA;AACrC,OAAO,EAAE,mBAAmB,EAAE,eAAe,EAAkC,MAAM,aAAa,CAAA;AAkDlG;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,OAAmD;IAEnD,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,WAAW,GAAG,CAAC,EAAE,GAAG,OAAO,CAAA;IAE/D,qBAAqB;IACrB,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,mBAAmB,CAAC;QACnD,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,SAAS,EAAE,OAAO,CAAC,SAAS;KAC7B,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAA;IAEpB,MAAM,OAAO,GAA0B,EAAE,CAAA;IACzC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;IAE5B,OAAO,CAAC,GAAG,CAAC,sBAAsB,IAAI,EAAE,CAAC,CAAA;IACzC,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC/D,OAAO,CAAC,GAAG,CAAC,aAAa,KAAK,CAAC,MAAM,EAAE,CAAC,CAAA;IACxC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;IAEf,kCAAkC;IAClC,MAAM,IAAI,GAAmE,EAAE,CAAA;IAC/E,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;YAC7B,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;QACtC,CAAC;IACH,CAAC;IAED,4CAA4C;IAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;QAClD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAA;QAE5C,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CACpC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;YACtB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;YAE5B,IAAI,CAAC;gBACH,eAAe;gBACf,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,CAAC,KAAK,CAAC,CAAA;gBACpD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;gBAExC,cAAc;gBACd,MAAM,MAAM,GAAgB,EAAE,CAAA;gBAC9B,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;oBACxB,IAAI,CAAC;wBACH,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,MAAM,CAAC;4BAC3B,KAAK,EAAE,GAAG,CAAC,IAAI,CAAC,KAAK;4BACrB,MAAM;4BACN,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,QAAQ;yBAC5B,CAAC,CAAA;wBACF,MAAM,CAAC,IAAI,CAAC;4BACV,IAAI,EAAE,CAAC,CAAC,IAAI;4BACZ,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;4BACtC,WAAW,EAAE,CAAC,CAAC,WAAW;yBAC3B,CAAC,CAAA;oBACJ,CAAC;oBAAC,OAAO,GAAG,EAAE,CAAC;wBACb,MAAM,CAAC,IAAI,CAAC;4BACV,IAAI,EAAE,CAAC,CAAC,IAAI;4BACZ,KAAK,EAAE,CAAC;4BACR,WAAW,EAAE,CAAC,CAAC,WAAW;4BAC1B,QAAQ,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE;yBACjC,CAAC,CAAA;oBACJ,CAAC;gBACH,CAAC;gBAED,iBAAiB;gBACjB,MAAM,OAAO,GAAG,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;gBAC7C,wCAAwC;gBACxC,MAAM,qBAAqB,GAAG,GAAG,CAAA;gBACjC,MAAM,yBAAyB,GAAG,GAAG,CAAA;gBACrC,MAAM,IAAI,GAAG,OAAO;oBAClB,CAAC,CAAC,CAAC,qBAAqB,GAAG,OAAO,CAAC,MAAM,GAAG,yBAAyB,GAAG,OAAO,CAAC,UAAU,CAAC,GAAG,SAAS;oBACvG,CAAC,CAAC,CAAC,CAAA;gBAEL,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC;oBAChC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM;oBAC7D,CAAC,CAAC,CAAC,CAAA;gBAEL,MAAM,MAAM,GAAG,QAAQ,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAA;gBAClE,OAAO,CAAC,GAAG,CAAC,MAAM,MAAM,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,MAAM,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,SAAS,IAAI,CAAC,CAAA;gBAEnH,OAAO;oBACL,KAAK,EAAE,GAAG,CAAC,KAAK;oBAChB,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,MAAM;oBACN,MAAM;oBACN,SAAS;oBACT,IAAI;iBACL,CAAA;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,OAAO,CAAC,GAAG,CAAC,QAAQ,GAAG,CAAC,KAAK,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,aAAa,GAAG,EAAE,CAAC,CAAA;gBAExE,OAAO;oBACL,KAAK,EAAE,GAAG,CAAC,KAAK;oBAChB,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,MAAM,EAAE,IAAI;oBACZ,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;oBACtD,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;oBACjC,IAAI,EAAE,CAAC;oBACP,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC;iBACnB,CAAA;YACH,CAAC;QACH,CAAC,CAAC,CACH,CAAA;QAED,OAAO,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAA;IAC/B,CAAC;IAED,oBAAoB;IACpB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;IACxC,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAA;IAC7D,MAAM,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAA;IAClE,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC;QACnC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM;QACzD,CAAC,CAAC,CAAC,CAAA;IAEL,iBAAiB;IACjB,MAAM,OAAO,GAAwD,EAAE,CAAA;IACvE,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,EAAE,CAAA;QAChC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,QAAQ,CAAC,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAA;QAC/C,CAAC;QACD,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAA;QAC3F,OAAO,CAAC,QAAQ,CAAC,CAAC,QAAQ,IAAI,SAAS,CAAA;QACvC,OAAO,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAA;IAC3B,CAAC;IACD,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAA;QAC1B,IAAI,KAAK,EAAE,CAAC;YACV,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,KAAK,CAAA;QAC/B,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;IACf,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAA;IAC1B,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IAC1D,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IACzD,OAAO,CAAC,GAAG,CAAC,aAAa,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAA;IAChD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;IACf,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAA;IAC3B,KAAK,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACvD,OAAO,CAAC,GAAG,CAAC,QAAQ,OAAO,KAAK,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IACvE,CAAC;IAED,OAAO;QACL,IAAI;QACJ,OAAO;QACP,QAAQ;QACR,OAAO;QACP,SAAS;QACT,SAAS;KACV,CAAA;AACH,CAAC;AAED,oBAAoB;AACpB,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,EAAE,CAAA"}
1
+ {"version":3,"file":"runner.js","sourceRoot":"","sources":["../../src/eval/runner.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAA;AAC7D,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAA;AACrC,OAAO,EAAE,mBAAmB,EAAE,eAAe,EAAkC,MAAM,aAAa,CAAA;AAClG,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAA;AAOxC;;GAEG;AACH,MAAM,aAAa,GAAiB,CAAC,OAAe,EAAE,EAAE,CAAC,SAAS,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;AA0DlF;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,OAAmD;IAEnD,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,WAAW,GAAG,CAAC,EAAE,KAAK,GAAG,KAAK,EAAE,GAAG,OAAO,CAAA;IAC9E,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,IAAI,aAAa,CAAA;IAE9D,qBAAqB;IACrB,MAAM,cAAc,GAAkD,EAAE,CAAA;IACxE,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS;QAAE,cAAc,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAA;IACrE,IAAI,OAAO,CAAC,SAAS,KAAK,SAAS;QAAE,cAAc,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAA;IACjF,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,mBAAmB,CAAC,cAAc,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAA;IAExF,MAAM,OAAO,GAA0B,EAAE,CAAA;IACzC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;IAE5B,GAAG,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAA;IAC9B,GAAG,CAAC,cAAc,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACzD,GAAG,CAAC,aAAa,KAAK,CAAC,MAAM,EAAE,CAAC,CAAA;IAChC,GAAG,CAAC,EAAE,CAAC,CAAA;IAEP,kCAAkC;IAClC,MAAM,IAAI,GAAmE,EAAE,CAAA;IAC/E,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;YAC7B,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;QACtC,CAAC;IACH,CAAC;IAED,4CAA4C;IAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;QAClD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAA;QAE5C,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CACpC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE;YACtB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;YAE5B,IAAI,CAAC;gBACH,eAAe;gBACf,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,CAAC,KAAK,CAAC,CAAA;gBACxD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;gBAExC,cAAc;gBACd,MAAM,MAAM,GAAgB,EAAE,CAAA;gBAC9B,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;oBACxB,IAAI,CAAC;wBACH,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,MAAM,CAAC;4BAC3B,KAAK,EAAE,GAAG,CAAC,IAAI,CAAC,KAAK;4BACrB,MAAM,EAAE,UAAU;4BAClB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,KAAK,SAAS,IAAI,EAAE,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;yBACxE,CAAC,CAAA;wBACF,MAAM,CAAC,IAAI,CAAC;4BACV,IAAI,EAAE,CAAC,CAAC,IAAI;4BACZ,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;4BACtC,GAAG,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;yBACrD,CAAC,CAAA;oBACJ,CAAC;oBAAC,OAAO,GAAG,EAAE,CAAC;wBACb,MAAM,CAAC,IAAI,CAAC;4BACV,IAAI,EAAE,CAAC,CAAC,IAAI;4BACZ,KAAK,EAAE,CAAC;4BACR,GAAG,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;4BACpD,QAAQ,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE;yBACjC,CAAC,CAAA;oBACJ,CAAC;gBACH,CAAC;gBAED,iBAAiB;gBACjB,MAAM,OAAO,GAAG,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;gBAC7C,wCAAwC;gBACxC,MAAM,qBAAqB,GAAG,GAAG,CAAA;gBACjC,MAAM,yBAAyB,GAAG,GAAG,CAAA;gBACrC,MAAM,IAAI,GAAG,OAAO;oBAClB,CAAC,CAAC,CAAC,qBAAqB,GAAG,OAAO,CAAC,MAAM;wBACrC,yBAAyB,GAAG,OAAO,CAAC,UAAU,CAAC;wBACjD,SAAS;oBACX,CAAC,CAAC,CAAC,CAAA;gBAEL,MAAM,QAAQ,GACZ,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;gBAErF,MAAM,MAAM,GAAG,QAAQ,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAA;gBAC3E,GAAG,CACD,MAAM,MAAM,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,MAAM,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAC7E,CAAC,CACF,OAAO,SAAS,IAAI,CACtB,CAAA;gBAED,OAAO;oBACL,KAAK,EAAE,GAAG,CAAC,KAAK;oBAChB,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,MAAM,EAAE,UAAU;oBAClB,MAAM;oBACN,SAAS;oBACT,IAAI;iBACL,CAAA;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,GAAG,CAAC,WAAW,GAAG,CAAC,KAAK,CAAC,IAAI,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,aAAa,GAAG,EAAE,CAAC,CAAA;gBAEnE,OAAO;oBACL,KAAK,EAAE,GAAG,CAAC,KAAK;oBAChB,IAAI,EAAE,GAAG,CAAC,IAAI;oBACd,MAAM,EAAE,IAAI;oBACZ,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;oBACxD,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;oBACjC,IAAI,EAAE,CAAC;oBACP,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC;iBACnB,CAAA;YACH,CAAC;QACH,CAAC,CAAC,CACH,CAAA;QAED,OAAO,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAA;IAC/B,CAAC;IAED,oBAAoB;IACpB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;IACxC,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAA;IAC7D,MAAM,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAA;IACtE,MAAM,QAAQ,GACZ,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAEpF,iBAAiB;IACjB,MAAM,OAAO,GAAwD,EAAE,CAAA;IACvE,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,EAAE,CAAA;QAChC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,QAAQ,CAAC,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAA;QAC/C,CAAC;QACD,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAA;QAC3F,OAAO,CAAC,QAAQ,CAAC,CAAC,QAAQ,IAAI,SAAS,CAAA;QACvC,OAAO,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAA;IAC3B,CAAC;IACD,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAA;QAC1B,IAAI,KAAK,EAAE,CAAC;YACV,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,KAAK,CAAA;QAC/B,CAAC;IACH,CAAC;IAED,GAAG,CAAC,EAAE,CAAC,CAAA;IACP,GAAG,CAAC,UAAU,CAAC,CAAA;IACf,GAAG,CAAC,eAAe,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IAClD,GAAG,CAAC,YAAY,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IACjD,GAAG,CAAC,aAAa,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAA;IACxC,GAAG,CAAC,EAAE,CAAC,CAAA;IACP,GAAG,CAAC,cAAc,CAAC,CAAA;IACnB,KAAK,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACvD,GAAG,CAAC,QAAQ,OAAO,KAAK,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IAC/D,CAAC;IAED,OAAO;QACL,IAAI;QACJ,OAAO;QACP,QAAQ;QACR,OAAO;QACP,SAAS;QACT,SAAS;KACV,CAAA;AACH,CAAC;AAED,oBAAoB;AACpB,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,EAAE,CAAA"}
@@ -0,0 +1,34 @@
1
+ /**
2
+ * InMemoryEvalLogStore — Map-backed default implementation of
3
+ * {@link EvalLogStore}.
4
+ *
5
+ * Matches Evalite v1's default backend: process-local Map keyed on `$id`,
6
+ * insertion-ordered for "most recent first" listing without sorting. Suitable
7
+ * for single-process tests, evals, and the cascade walker's in-flight log;
8
+ * not suitable for cross-process or multi-worker setups (use a disk/SQLite
9
+ * backend for those — same contract).
10
+ *
11
+ * @packageDocumentation
12
+ */
13
+ import type { EvalLogEntry, EvalLogListOptions, EvalLogStore } from './types.js';
14
+ /**
15
+ * In-memory implementation of {@link EvalLogStore}.
16
+ */
17
+ export declare class InMemoryEvalLogStore implements EvalLogStore {
18
+ /**
19
+ * Map keyed on `$id`. Insertion order on a JS Map is preserved, so we
20
+ * walk it in reverse for "most recent first" listing.
21
+ */
22
+ private readonly entries;
23
+ record(entry: Omit<EvalLogEntry, '$id' | 'createdAt'> & Partial<Pick<EvalLogEntry, '$id' | 'createdAt'>>): Promise<EvalLogEntry>;
24
+ get(id: string): Promise<EvalLogEntry | undefined>;
25
+ list(options?: EvalLogListOptions): Promise<EvalLogEntry[]>;
26
+ delete(id: string): Promise<boolean>;
27
+ /**
28
+ * Convenience for tests: drop every entry. Not on the public
29
+ * {@link EvalLogStore} interface because the disk/SQLite backends may not
30
+ * want to expose a one-shot wipe.
31
+ */
32
+ clear(): void;
33
+ }
34
+ //# sourceMappingURL=in-memory.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"in-memory.d.ts","sourceRoot":"","sources":["../../src/eval-log/in-memory.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAEhF;;GAEG;AACH,qBAAa,oBAAqB,YAAW,YAAY;IACvD;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAuC;IAEzD,MAAM,CACV,KAAK,EAAE,IAAI,CAAC,YAAY,EAAE,KAAK,GAAG,WAAW,CAAC,GAC5C,OAAO,CAAC,IAAI,CAAC,YAAY,EAAE,KAAK,GAAG,WAAW,CAAC,CAAC,GACjD,OAAO,CAAC,YAAY,CAAC;IAmBlB,GAAG,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,GAAG,SAAS,CAAC;IAIlD,IAAI,CAAC,OAAO,GAAE,kBAAuB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAyB/D,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAI1C;;;;OAIG;IACH,KAAK,IAAI,IAAI;CAGd"}
@@ -0,0 +1,84 @@
1
+ /**
2
+ * InMemoryEvalLogStore — Map-backed default implementation of
3
+ * {@link EvalLogStore}.
4
+ *
5
+ * Matches Evalite v1's default backend: process-local Map keyed on `$id`,
6
+ * insertion-ordered for "most recent first" listing without sorting. Suitable
7
+ * for single-process tests, evals, and the cascade walker's in-flight log;
8
+ * not suitable for cross-process or multi-worker setups (use a disk/SQLite
9
+ * backend for those — same contract).
10
+ *
11
+ * @packageDocumentation
12
+ */
13
+ import { randomUUID } from 'crypto';
14
+ /**
15
+ * In-memory implementation of {@link EvalLogStore}.
16
+ */
17
+ export class InMemoryEvalLogStore {
18
+ /**
19
+ * Map keyed on `$id`. Insertion order on a JS Map is preserved, so we
20
+ * walk it in reverse for "most recent first" listing.
21
+ */
22
+ entries = new Map();
23
+ async record(entry) {
24
+ const $id = entry.$id ?? randomUUID();
25
+ const createdAt = entry.createdAt ?? Date.now();
26
+ const stored = {
27
+ $id,
28
+ createdAt,
29
+ model: entry.model,
30
+ prompt: entry.prompt,
31
+ response: entry.response,
32
+ usage: entry.usage,
33
+ costUsd: entry.costUsd,
34
+ durationMs: entry.durationMs,
35
+ ...(entry.traceId !== undefined ? { traceId: entry.traceId } : {}),
36
+ ...(entry.tags !== undefined ? { tags: entry.tags } : {}),
37
+ };
38
+ this.entries.set($id, stored);
39
+ return stored;
40
+ }
41
+ async get(id) {
42
+ return this.entries.get(id);
43
+ }
44
+ async list(options = {}) {
45
+ const { traceId, model, tags, limit } = options;
46
+ const out = [];
47
+ // Iterate in reverse insertion order — Map preserves order; we walk
48
+ // values into an array, then reverse for most-recent-first.
49
+ const all = Array.from(this.entries.values()).reverse();
50
+ for (const entry of all) {
51
+ if (traceId !== undefined && entry.traceId !== traceId)
52
+ continue;
53
+ if (model !== undefined && entry.model !== model)
54
+ continue;
55
+ if (tags !== undefined) {
56
+ let matchesAll = true;
57
+ for (const k of Object.keys(tags)) {
58
+ if (entry.tags?.[k] !== tags[k]) {
59
+ matchesAll = false;
60
+ break;
61
+ }
62
+ }
63
+ if (!matchesAll)
64
+ continue;
65
+ }
66
+ out.push(entry);
67
+ if (limit !== undefined && out.length >= limit)
68
+ break;
69
+ }
70
+ return out;
71
+ }
72
+ async delete(id) {
73
+ return this.entries.delete(id);
74
+ }
75
+ /**
76
+ * Convenience for tests: drop every entry. Not on the public
77
+ * {@link EvalLogStore} interface because the disk/SQLite backends may not
78
+ * want to expose a one-shot wipe.
79
+ */
80
+ clear() {
81
+ this.entries.clear();
82
+ }
83
+ }
84
+ //# sourceMappingURL=in-memory.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"in-memory.js","sourceRoot":"","sources":["../../src/eval-log/in-memory.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAA;AAGnC;;GAEG;AACH,MAAM,OAAO,oBAAoB;IAC/B;;;OAGG;IACc,OAAO,GAA8B,IAAI,GAAG,EAAE,CAAA;IAE/D,KAAK,CAAC,MAAM,CACV,KACkD;QAElD,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,IAAI,UAAU,EAAE,CAAA;QACrC,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,IAAI,IAAI,CAAC,GAAG,EAAE,CAAA;QAC/C,MAAM,MAAM,GAAiB;YAC3B,GAAG;YACH,SAAS;YACT,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,GAAG,CAAC,KAAK,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAClE,GAAG,CAAC,KAAK,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC1D,CAAA;QACD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,CAAA;QAC7B,OAAO,MAAM,CAAA;IACf,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,EAAU;QAClB,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;IAC7B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,UAA8B,EAAE;QACzC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,OAAO,CAAA;QAC/C,MAAM,GAAG,GAAmB,EAAE,CAAA;QAC9B,oEAAoE;QACpE,4DAA4D;QAC5D,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE,CAAA;QACvD,KAAK,MAAM,KAAK,IAAI,GAAG,EAAE,CAAC;YACxB,IAAI,OAAO,KAAK,SAAS,IAAI,KAAK,CAAC,OAAO,KAAK,OAAO;gBAAE,SAAQ;YAChE,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,KAAK,KAAK,KAAK;gBAAE,SAAQ;YAC1D,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;gBACvB,IAAI,UAAU,GAAG,IAAI,CAAA;gBACrB,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;oBAClC,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;wBAChC,UAAU,GAAG,KAAK,CAAA;wBAClB,MAAK;oBACP,CAAC;gBACH,CAAC;gBACD,IAAI,CAAC,UAAU;oBAAE,SAAQ;YAC3B,CAAC;YACD,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YACf,IAAI,KAAK,KAAK,SAAS,IAAI,GAAG,CAAC,MAAM,IAAI,KAAK;gBAAE,MAAK;QACvD,CAAC;QACD,OAAO,GAAG,CAAA;IACZ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,EAAU;QACrB,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAA;IAChC,CAAC;IAED;;;;OAIG;IACH,KAAK;QACH,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAA;IACtB,CAAC;CACF"}
@@ -0,0 +1,29 @@
1
+ /**
2
+ * EvalLogStore — pluggable persistence primitive for trace/eval entries.
3
+ *
4
+ * Exports the {@link EvalLogStore} contract, the
5
+ * {@link InMemoryEvalLogStore} default implementation, and a global
6
+ * accessor pair (`getEvalLogStore` / `configureEvalLogStore`) mirroring the
7
+ * marketplace persistence pattern from round 9.
8
+ *
9
+ * @packageDocumentation
10
+ */
11
+ import type { EvalLogStore } from './types.js';
12
+ export type { EvalLogEntry, EvalLogListOptions, EvalLogStore } from './types.js';
13
+ export { InMemoryEvalLogStore } from './in-memory.js';
14
+ /**
15
+ * Get the global {@link EvalLogStore}. Lazily constructs an
16
+ * {@link InMemoryEvalLogStore} on first call when no store has been
17
+ * configured.
18
+ *
19
+ * Match the round-9 marketplace persistence accessor: callers that don't
20
+ * care about isolation read the global; callers that do (tests, multi-tenant
21
+ * apps) install their own via {@link configureEvalLogStore}.
22
+ */
23
+ export declare function getEvalLogStore(): EvalLogStore;
24
+ /**
25
+ * Install a global {@link EvalLogStore}. Pass `null` to reset to the lazy
26
+ * in-memory default (useful in test teardown).
27
+ */
28
+ export declare function configureEvalLogStore(store: EvalLogStore | null): void;
29
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/eval-log/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9C,YAAY,EAAE,YAAY,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAChF,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAA;AAQrD;;;;;;;;GAQG;AACH,wBAAgB,eAAe,IAAI,YAAY,CAK9C;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,YAAY,GAAG,IAAI,GAAG,IAAI,CAEtE"}
@@ -0,0 +1,39 @@
1
+ /**
2
+ * EvalLogStore — pluggable persistence primitive for trace/eval entries.
3
+ *
4
+ * Exports the {@link EvalLogStore} contract, the
5
+ * {@link InMemoryEvalLogStore} default implementation, and a global
6
+ * accessor pair (`getEvalLogStore` / `configureEvalLogStore`) mirroring the
7
+ * marketplace persistence pattern from round 9.
8
+ *
9
+ * @packageDocumentation
10
+ */
11
+ import { InMemoryEvalLogStore } from './in-memory.js';
12
+ export { InMemoryEvalLogStore } from './in-memory.js';
13
+ // ============================================================================
14
+ // Global accessor (lazy default + override)
15
+ // ============================================================================
16
+ let _store = null;
17
+ /**
18
+ * Get the global {@link EvalLogStore}. Lazily constructs an
19
+ * {@link InMemoryEvalLogStore} on first call when no store has been
20
+ * configured.
21
+ *
22
+ * Match the round-9 marketplace persistence accessor: callers that don't
23
+ * care about isolation read the global; callers that do (tests, multi-tenant
24
+ * apps) install their own via {@link configureEvalLogStore}.
25
+ */
26
+ export function getEvalLogStore() {
27
+ if (_store === null) {
28
+ _store = new InMemoryEvalLogStore();
29
+ }
30
+ return _store;
31
+ }
32
+ /**
33
+ * Install a global {@link EvalLogStore}. Pass `null` to reset to the lazy
34
+ * in-memory default (useful in test teardown).
35
+ */
36
+ export function configureEvalLogStore(store) {
37
+ _store = store;
38
+ }
39
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/eval-log/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAA;AAIrD,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAA;AAErD,+EAA+E;AAC/E,4CAA4C;AAC5C,+EAA+E;AAE/E,IAAI,MAAM,GAAwB,IAAI,CAAA;AAEtC;;;;;;;;GAQG;AACH,MAAM,UAAU,eAAe;IAC7B,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;QACpB,MAAM,GAAG,IAAI,oBAAoB,EAAE,CAAA;IACrC,CAAC;IACD,OAAO,MAAM,CAAA;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,KAA0B;IAC9D,MAAM,GAAG,KAAK,CAAA;AAChB,CAAC"}
@@ -0,0 +1,101 @@
1
+ /**
2
+ * EvalLogStore — pluggable persistence primitive for trace/eval entries.
3
+ *
4
+ * Forward-looking primitive matching Evalite v1's EvalLogStore pattern:
5
+ * the in-memory default ships today; the disk/SQLite/durable backends can
6
+ * land later without breaking the trace middleware contract.
7
+ *
8
+ * Used downstream by `traceMiddleware` (in `../middleware/trace.ts`) as the
9
+ * sink for per-call prompt+response+usage records. The cascade-walker in
10
+ * services-as-software will consume `list()` / `get()` to populate the
11
+ * InvocationEvent stream once round 16+ adds the `'persona-trace'` variant.
12
+ *
13
+ * @packageDocumentation
14
+ */
15
+ /**
16
+ * A single entry in the eval log — one LLM call with its full payload.
17
+ *
18
+ * Shape mirrors what `traceMiddleware` emits, with optional `tags` for
19
+ * caller-supplied dimensions (persona name, evaluator role, cascade depth).
20
+ */
21
+ export interface EvalLogEntry {
22
+ /** MDXLD identity — typically a UUID generated at insert time. */
23
+ $id: string;
24
+ /**
25
+ * Optional caller-supplied trace correlation ID. When the cascade walker
26
+ * spans multiple LLM calls under one user request, all entries share the
27
+ * same `traceId` so `list({ traceId })` rolls them up.
28
+ */
29
+ traceId?: string;
30
+ /** Model identifier (e.g. `'anthropic/claude-sonnet-4.5'` or `'sonnet'`). */
31
+ model: string;
32
+ /**
33
+ * Stringified prompt as submitted to the model. We don't store the
34
+ * structured `LanguageModelV3Prompt` shape because (a) it's bulky and (b)
35
+ * downstream consumers (replay, fixture diff) only need the text payload.
36
+ */
37
+ prompt: string;
38
+ /** The model's text response. Tool calls/files are not stored here. */
39
+ response: string;
40
+ /** Token usage as reported by the AI SDK. */
41
+ usage: {
42
+ inputTokens: number;
43
+ outputTokens: number;
44
+ };
45
+ /** Computed USD cost (caller-supplied via the `pricing` overlay). */
46
+ costUsd: number;
47
+ /** Wall-clock duration of the underlying `doGenerate` / `doStream` call. */
48
+ durationMs: number;
49
+ /** Caller-supplied dimensions (persona, evaluator role, cascade step). */
50
+ tags?: Record<string, string>;
51
+ /** Insert timestamp (epoch ms). */
52
+ createdAt: number;
53
+ }
54
+ /**
55
+ * Options accepted by `EvalLogStore.list`. All fields are AND-combined.
56
+ */
57
+ export interface EvalLogListOptions {
58
+ /** Filter to entries with this trace correlation ID. */
59
+ traceId?: string;
60
+ /** Filter to entries for a specific model. */
61
+ model?: string;
62
+ /**
63
+ * Filter to entries whose `tags` are a *superset* of the supplied object.
64
+ * (E.g. `{ persona: 'cfo' }` matches entries tagged
65
+ * `{ persona: 'cfo', step: '3' }` but not entries tagged
66
+ * `{ persona: 'cto' }`.)
67
+ */
68
+ tags?: Record<string, string>;
69
+ /** Maximum number of entries to return (most recent first). */
70
+ limit?: number;
71
+ }
72
+ /**
73
+ * Pluggable persistence interface for eval log entries.
74
+ *
75
+ * Modeled after the Evalite v1 EvalLogStore contract: in-memory default,
76
+ * disk JSON / SQLite / durable backends supplied via
77
+ * `configureEvalLogStore`.
78
+ *
79
+ * All methods are async to keep the contract uniform across backends — the
80
+ * in-memory implementation resolves synchronously under the hood.
81
+ */
82
+ export interface EvalLogStore {
83
+ /**
84
+ * Persist a new entry. Returns the stored entry (with `$id` and
85
+ * `createdAt` filled in if the caller omitted them).
86
+ */
87
+ record(entry: Omit<EvalLogEntry, '$id' | 'createdAt'> & Partial<Pick<EvalLogEntry, '$id' | 'createdAt'>>): Promise<EvalLogEntry>;
88
+ /**
89
+ * Read an entry by `$id`. Returns `undefined` when not found.
90
+ */
91
+ get(id: string): Promise<EvalLogEntry | undefined>;
92
+ /**
93
+ * List entries matching the supplied filter. Returns most recent first.
94
+ */
95
+ list(options?: EvalLogListOptions): Promise<EvalLogEntry[]>;
96
+ /**
97
+ * Delete an entry. Returns `true` if an entry was actually removed.
98
+ */
99
+ delete(id: string): Promise<boolean>;
100
+ }
101
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/eval-log/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAMH;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC3B,kEAAkE;IAClE,GAAG,EAAE,MAAM,CAAA;IACX;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,6EAA6E;IAC7E,KAAK,EAAE,MAAM,CAAA;IACb;;;;OAIG;IACH,MAAM,EAAE,MAAM,CAAA;IACd,uEAAuE;IACvE,QAAQ,EAAE,MAAM,CAAA;IAChB,6CAA6C;IAC7C,KAAK,EAAE;QACL,WAAW,EAAE,MAAM,CAAA;QACnB,YAAY,EAAE,MAAM,CAAA;KACrB,CAAA;IACD,qEAAqE;IACrE,OAAO,EAAE,MAAM,CAAA;IACf,4EAA4E;IAC5E,UAAU,EAAE,MAAM,CAAA;IAClB,0EAA0E;IAC1E,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC7B,mCAAmC;IACnC,SAAS,EAAE,MAAM,CAAA;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,wDAAwD;IACxD,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,8CAA8C;IAC9C,KAAK,CAAC,EAAE,MAAM,CAAA;IACd;;;;;OAKG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC7B,+DAA+D;IAC/D,KAAK,CAAC,EAAE,MAAM,CAAA;CACf;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,YAAY;IAC3B;;;OAGG;IACH,MAAM,CACJ,KAAK,EAAE,IAAI,CAAC,YAAY,EAAE,KAAK,GAAG,WAAW,CAAC,GAC5C,OAAO,CAAC,IAAI,CAAC,YAAY,EAAE,KAAK,GAAG,WAAW,CAAC,CAAC,GACjD,OAAO,CAAC,YAAY,CAAC,CAAA;IACxB;;OAEG;IACH,GAAG,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,GAAG,SAAS,CAAC,CAAA;IAClD;;OAEG;IACH,IAAI,CAAC,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAA;IAC3D;;OAEG;IACH,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAA;CACrC"}
@@ -0,0 +1,16 @@
1
+ /**
2
+ * EvalLogStore — pluggable persistence primitive for trace/eval entries.
3
+ *
4
+ * Forward-looking primitive matching Evalite v1's EvalLogStore pattern:
5
+ * the in-memory default ships today; the disk/SQLite/durable backends can
6
+ * land later without breaking the trace middleware contract.
7
+ *
8
+ * Used downstream by `traceMiddleware` (in `../middleware/trace.ts`) as the
9
+ * sink for per-call prompt+response+usage records. The cascade-walker in
10
+ * services-as-software will consume `list()` / `get()` to populate the
11
+ * InvocationEvent stream once round 16+ adds the `'persona-trace'` variant.
12
+ *
13
+ * @packageDocumentation
14
+ */
15
+ export {};
16
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/eval-log/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG"}