@rudderjs/ai 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/README.md +482 -4
  2. package/boost/guidelines.md +60 -0
  3. package/boost/skills/ai-agents/SKILL.md +7 -0
  4. package/boost/skills/ai-tools/SKILL.md +7 -0
  5. package/dist/agent.d.ts +35 -1
  6. package/dist/agent.d.ts.map +1 -1
  7. package/dist/agent.js +118 -16
  8. package/dist/agent.js.map +1 -1
  9. package/dist/budget/pricing.d.ts +124 -0
  10. package/dist/budget/pricing.d.ts.map +1 -0
  11. package/dist/budget/pricing.js +175 -0
  12. package/dist/budget/pricing.js.map +1 -0
  13. package/dist/budget/storage.d.ts +104 -0
  14. package/dist/budget/storage.d.ts.map +1 -0
  15. package/dist/budget/storage.js +0 -0
  16. package/dist/budget/storage.js.map +1 -0
  17. package/dist/budget/with-budget.d.ts +119 -0
  18. package/dist/budget/with-budget.d.ts.map +1 -0
  19. package/dist/budget/with-budget.js +175 -0
  20. package/dist/budget/with-budget.js.map +1 -0
  21. package/dist/budget-orm/index.d.ts +96 -0
  22. package/dist/budget-orm/index.d.ts.map +1 -0
  23. package/dist/budget-orm/index.js +177 -0
  24. package/dist/budget-orm/index.js.map +1 -0
  25. package/dist/commands/ai-eval.d.ts +93 -0
  26. package/dist/commands/ai-eval.d.ts.map +1 -0
  27. package/dist/commands/ai-eval.js +378 -0
  28. package/dist/commands/ai-eval.js.map +1 -0
  29. package/dist/computer-use/actions.d.ts +214 -0
  30. package/dist/computer-use/actions.d.ts.map +1 -0
  31. package/dist/computer-use/actions.js +48 -0
  32. package/dist/computer-use/actions.js.map +1 -0
  33. package/dist/computer-use/errors.d.ts +57 -0
  34. package/dist/computer-use/errors.d.ts.map +1 -0
  35. package/dist/computer-use/errors.js +76 -0
  36. package/dist/computer-use/errors.js.map +1 -0
  37. package/dist/computer-use/index.d.ts +53 -0
  38. package/dist/computer-use/index.d.ts.map +1 -0
  39. package/dist/computer-use/index.js +51 -0
  40. package/dist/computer-use/index.js.map +1 -0
  41. package/dist/computer-use/playwright.d.ts +76 -0
  42. package/dist/computer-use/playwright.d.ts.map +1 -0
  43. package/dist/computer-use/playwright.js +270 -0
  44. package/dist/computer-use/playwright.js.map +1 -0
  45. package/dist/computer-use/tool.d.ts +154 -0
  46. package/dist/computer-use/tool.d.ts.map +1 -0
  47. package/dist/computer-use/tool.js +210 -0
  48. package/dist/computer-use/tool.js.map +1 -0
  49. package/dist/eval/fixtures.d.ts +65 -0
  50. package/dist/eval/fixtures.d.ts.map +1 -0
  51. package/dist/eval/fixtures.js +110 -0
  52. package/dist/eval/fixtures.js.map +1 -0
  53. package/dist/eval/html-reporter.d.ts +25 -0
  54. package/dist/eval/html-reporter.d.ts.map +1 -0
  55. package/dist/eval/html-reporter.js +209 -0
  56. package/dist/eval/html-reporter.js.map +1 -0
  57. package/dist/eval/index.d.ts +271 -0
  58. package/dist/eval/index.d.ts.map +1 -0
  59. package/dist/eval/index.js +510 -0
  60. package/dist/eval/index.js.map +1 -0
  61. package/dist/eval/json-reporter.d.ts +43 -0
  62. package/dist/eval/json-reporter.d.ts.map +1 -0
  63. package/dist/eval/json-reporter.js +40 -0
  64. package/dist/eval/json-reporter.js.map +1 -0
  65. package/dist/fake.d.ts +36 -1
  66. package/dist/fake.d.ts.map +1 -1
  67. package/dist/fake.js +49 -2
  68. package/dist/fake.js.map +1 -1
  69. package/dist/file-search.d.ts +168 -0
  70. package/dist/file-search.d.ts.map +1 -0
  71. package/dist/file-search.js +158 -0
  72. package/dist/file-search.js.map +1 -0
  73. package/dist/index.d.ts +22 -2
  74. package/dist/index.d.ts.map +1 -1
  75. package/dist/index.js +17 -1
  76. package/dist/index.js.map +1 -1
  77. package/dist/mcp/client-tools.d.ts +39 -0
  78. package/dist/mcp/client-tools.d.ts.map +1 -0
  79. package/dist/mcp/client-tools.js +147 -0
  80. package/dist/mcp/client-tools.js.map +1 -0
  81. package/dist/mcp/index.d.ts +16 -0
  82. package/dist/mcp/index.d.ts.map +1 -0
  83. package/dist/mcp/index.js +15 -0
  84. package/dist/mcp/index.js.map +1 -0
  85. package/dist/mcp/server-from-agent.d.ts +24 -0
  86. package/dist/mcp/server-from-agent.d.ts.map +1 -0
  87. package/dist/mcp/server-from-agent.js +113 -0
  88. package/dist/mcp/server-from-agent.js.map +1 -0
  89. package/dist/mcp/types.d.ts +64 -0
  90. package/dist/mcp/types.d.ts.map +1 -0
  91. package/dist/mcp/types.js +6 -0
  92. package/dist/mcp/types.js.map +1 -0
  93. package/dist/memory-embedding/index.d.ts +121 -0
  94. package/dist/memory-embedding/index.d.ts.map +1 -0
  95. package/dist/memory-embedding/index.js +229 -0
  96. package/dist/memory-embedding/index.js.map +1 -0
  97. package/dist/memory-extract.d.ts +60 -0
  98. package/dist/memory-extract.d.ts.map +1 -0
  99. package/dist/memory-extract.js +163 -0
  100. package/dist/memory-extract.js.map +1 -0
  101. package/dist/memory-inject.d.ts +39 -0
  102. package/dist/memory-inject.d.ts.map +1 -0
  103. package/dist/memory-inject.js +135 -0
  104. package/dist/memory-inject.js.map +1 -0
  105. package/dist/memory-orm/index.d.ts +118 -0
  106. package/dist/memory-orm/index.d.ts.map +1 -0
  107. package/dist/memory-orm/index.js +187 -0
  108. package/dist/memory-orm/index.js.map +1 -0
  109. package/dist/memory.d.ts +55 -0
  110. package/dist/memory.d.ts.map +1 -0
  111. package/dist/memory.js +132 -0
  112. package/dist/memory.js.map +1 -0
  113. package/dist/observers.d.ts +22 -0
  114. package/dist/observers.d.ts.map +1 -1
  115. package/dist/observers.js.map +1 -1
  116. package/dist/provider-tools.d.ts +15 -1
  117. package/dist/provider-tools.d.ts.map +1 -1
  118. package/dist/provider-tools.js +21 -1
  119. package/dist/provider-tools.js.map +1 -1
  120. package/dist/providers/anthropic.d.ts.map +1 -1
  121. package/dist/providers/anthropic.js +61 -6
  122. package/dist/providers/anthropic.js.map +1 -1
  123. package/dist/providers/elevenlabs.d.ts +98 -0
  124. package/dist/providers/elevenlabs.d.ts.map +1 -0
  125. package/dist/providers/elevenlabs.js +229 -0
  126. package/dist/providers/elevenlabs.js.map +1 -0
  127. package/dist/providers/google.d.ts +83 -1
  128. package/dist/providers/google.d.ts.map +1 -1
  129. package/dist/providers/google.js +491 -8
  130. package/dist/providers/google.js.map +1 -1
  131. package/dist/providers/openai.d.ts +3 -1
  132. package/dist/providers/openai.d.ts.map +1 -1
  133. package/dist/providers/openai.js +209 -5
  134. package/dist/providers/openai.js.map +1 -1
  135. package/dist/providers/voyage.d.ts +91 -0
  136. package/dist/providers/voyage.d.ts.map +1 -0
  137. package/dist/providers/voyage.js +166 -0
  138. package/dist/providers/voyage.js.map +1 -0
  139. package/dist/queue-job.d.ts +69 -4
  140. package/dist/queue-job.d.ts.map +1 -1
  141. package/dist/queue-job.js +114 -11
  142. package/dist/queue-job.js.map +1 -1
  143. package/dist/registry.d.ts +3 -1
  144. package/dist/registry.d.ts.map +1 -1
  145. package/dist/registry.js +10 -0
  146. package/dist/registry.js.map +1 -1
  147. package/dist/server/provider.d.ts.map +1 -1
  148. package/dist/server/provider.js +23 -1
  149. package/dist/server/provider.js.map +1 -1
  150. package/dist/similarity-search.d.ts +163 -0
  151. package/dist/similarity-search.d.ts.map +1 -0
  152. package/dist/similarity-search.js +147 -0
  153. package/dist/similarity-search.js.map +1 -0
  154. package/dist/tool.d.ts.map +1 -1
  155. package/dist/tool.js +13 -4
  156. package/dist/tool.js.map +1 -1
  157. package/dist/types.d.ts +246 -0
  158. package/dist/types.d.ts.map +1 -1
  159. package/dist/vector-stores/index.d.ts +96 -0
  160. package/dist/vector-stores/index.d.ts.map +1 -0
  161. package/dist/vector-stores/index.js +153 -0
  162. package/dist/vector-stores/index.js.map +1 -0
  163. package/package.json +41 -3
@@ -0,0 +1,154 @@
1
+ /**
2
+ * `computerUseTool({ page })` — the agent-tool factory for #A7 Phase 2.
3
+ *
4
+ * Wraps the phase-1 {@link executeComputerAction} executor as a tool the
5
+ * agent loop can invoke. The tool is tagged so the Anthropic provider
6
+ * adapter substitutes the standard function-call schema with Anthropic's
7
+ * native `computer_20250124` tool block at the API level — Claude is
8
+ * fine-tuned on that exact tool, so quality is dramatically better than
9
+ * a generic function-call wrapper.
10
+ *
11
+ * # Anthropic-only in v1
12
+ *
13
+ * Pass `model` to fail loud at agent-construction time when the agent's
14
+ * model isn't Anthropic-family — see {@link ComputerUseProviderError}.
15
+ * Without `model`, validation is deferred (the Anthropic adapter is the
16
+ * only one that recognizes the provider hint, so non-Anthropic models
17
+ * silently see a no-arg generic tool — degraded but not catastrophic).
18
+ *
19
+ * # Wiring
20
+ *
21
+ * ```ts
22
+ * import { Agent } from '@rudderjs/ai'
23
+ * import { computerUseTool } from '@rudderjs/ai/computer-use'
24
+ * import { chromium } from 'playwright'
25
+ *
26
+ * const browser = await chromium.launch()
27
+ * const page = await browser.newPage()
28
+ * await page.setViewportSize({ width: 1280, height: 800 })
29
+ *
30
+ * class BrowserAgent extends Agent {
31
+ * model() { return 'anthropic/claude-opus-4-7' }
32
+ *
33
+ * tools() {
34
+ * return [
35
+ * computerUseTool({
36
+ * page,
37
+ * viewport: { width: 1280, height: 800 },
38
+ * model: this.model(), // upfront provider check
39
+ * }),
40
+ * ]
41
+ * }
42
+ * }
43
+ * ```
44
+ *
45
+ * # State
46
+ *
47
+ * Each `computerUseTool({...})` call captures a fresh
48
+ * {@link ComputerExecutorState} in its closure. Passing the same tool
49
+ * instance through multiple agent runs SHARES cursor state across them
50
+ * — usually fine, but call the factory inside `tools()` (which Agent
51
+ * runs per request) for clean per-run state.
52
+ *
53
+ * The same closure carries the action counter for {@link maxActions}.
54
+ *
55
+ * # Image results
56
+ *
57
+ * `screenshot` actions return PNG bytes. The tool's execute base64-
58
+ * encodes them and returns a `ContentPart[]` array with one image
59
+ * block — the Anthropic adapter's `toAnthropicMessages` handles array
60
+ * tool-message content directly (a generic enhancement, not
61
+ * computer-use-specific). Other providers see a JSON-stringified
62
+ * fallback; in practice they never get here because the tool throws at
63
+ * construction when bound to a non-Anthropic model.
64
+ */
65
+ import type { ContentPart, Tool, ToolCallContext, ToolDefinitionOptions, ToolDefinitionSchema } from '../types.js';
66
+ import { ComputerUseLimitError, ComputerUseProviderError, isAnthropicLikeModel } from './errors.js';
67
+ import { type ComputerAction, type ComputerExecutorState, type PageLike } from './actions.js';
68
+ /**
69
+ * Symbol-tagged marker identifying a computer-use tool. Looked up via
70
+ * `Symbol.for(...)` so cross-bundle / cross-realm checks succeed even
71
+ * when `@rudderjs/ai` is loaded twice (rare, but possible in monorepo +
72
+ * linked setups). Mirrors the `HANDOFF_MARKER` pattern.
73
+ */
74
+ export declare const COMPUTER_USE_MARKER: unique symbol;
75
+ /**
76
+ * The fixed tool name. Anthropic's native `computer_20250124` tool
77
+ * expects calls to land on a tool literally named `computer` — the
78
+ * model is trained on that name. Apps don't override it.
79
+ */
80
+ export declare const COMPUTER_USE_TOOL_NAME = "computer";
81
+ /** Options for {@link computerUseTool}. */
82
+ export interface ComputerUseToolOptions {
83
+ /**
84
+ * Playwright `Page` (or any object structurally matching {@link PageLike}).
85
+ * Caller owns the lifecycle — launch, set viewport, navigate, close.
86
+ */
87
+ page: PageLike;
88
+ /**
89
+ * Display dimensions reported to the model in the native
90
+ * `computer_20250124` block. Defaults to `1280×800` (Anthropic's
91
+ * recommended training-distribution size). Must match what
92
+ * `page.setViewportSize(...)` was called with — Claude grounds clicks
93
+ * in this coordinate space.
94
+ */
95
+ viewport?: {
96
+ width: number;
97
+ height: number;
98
+ };
99
+ /**
100
+ * Optional agent model id. When provided, the factory fails loud at
101
+ * construction time if the model isn't Anthropic-family — see
102
+ * {@link ComputerUseProviderError}. Pass `this.model()` from inside
103
+ * `Agent.tools()` to get the check.
104
+ */
105
+ model?: string;
106
+ /**
107
+ * Per-action approval gate. `true` (default) routes every action
108
+ * through the framework's approval middleware before execution.
109
+ * `false` opts out entirely. Function form decides per-action — useful
110
+ * for letting cheap actions (`screenshot`, `mouse_move`) run
111
+ * unattended while gating destructive ones.
112
+ *
113
+ * Wired via {@link ToolDefinitionOptions.requireApproval} — same
114
+ * channel the rest of `@rudderjs/ai`'s approval-resume machinery uses.
115
+ */
116
+ needsApproval?: boolean | ((action: ComputerAction) => boolean);
117
+ /**
118
+ * Maximum number of actions per agent run before
119
+ * {@link ComputerUseLimitError} is thrown. Default `50`. Bounds
120
+ * runaway loops where the model keeps trying the same broken UI step.
121
+ */
122
+ maxActions?: number;
123
+ /**
124
+ * Override the per-run cursor-tracking state. Rarely needed — the
125
+ * factory creates a fresh state by default. Provide one if you want
126
+ * to seed the cursor (e.g. resuming a paused session).
127
+ */
128
+ state?: ComputerExecutorState;
129
+ }
130
+ /**
131
+ * The tool returned by {@link computerUseTool}. Implements the
132
+ * {@link Tool} interface with `execute` (so the agent loop runs it
133
+ * directly), and carries the {@link COMPUTER_USE_MARKER} so adapters
134
+ * and observers can detect it without coupling to a class.
135
+ */
136
+ export interface ComputerUseTool extends Tool<ComputerAction, ContentPart[] | string> {
137
+ readonly [COMPUTER_USE_MARKER]: true;
138
+ readonly definition: ToolDefinitionOptions;
139
+ execute(input: ComputerAction, ctx?: ToolCallContext): Promise<ContentPart[] | string>;
140
+ toSchema(): ToolDefinitionSchema;
141
+ }
142
+ /**
143
+ * Build the agent tool. See module JSDoc for usage.
144
+ */
145
+ export declare function computerUseTool(opts: ComputerUseToolOptions): ComputerUseTool;
146
+ /**
147
+ * Structural typeguard. Mirrors {@link isHandoffTool} — handoff /
148
+ * computer-use tools are plain objects tagged with their respective
149
+ * `Symbol.for(...)` markers, so the loop and adapters can detect them
150
+ * without coupling to a class hierarchy.
151
+ */
152
+ export declare function isComputerUseTool(t: unknown): t is ComputerUseTool;
153
+ export { ComputerUseLimitError, ComputerUseProviderError, isAnthropicLikeModel };
154
+ //# sourceMappingURL=tool.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tool.d.ts","sourceRoot":"","sources":["../../src/computer-use/tool.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+DG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,eAAe,EAAE,qBAAqB,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAA;AAElH,OAAO,EACL,qBAAqB,EACrB,wBAAwB,EACxB,oBAAoB,EACrB,MAAM,aAAa,CAAA;AACpB,OAAO,EAEL,KAAK,cAAc,EAEnB,KAAK,qBAAqB,EAC1B,KAAK,QAAQ,EACd,MAAM,cAAc,CAAA;AAGrB;;;;;GAKG;AACH,eAAO,MAAM,mBAAmB,EAAE,OAAO,MAA+C,CAAA;AAExF;;;;GAIG;AACH,eAAO,MAAM,sBAAsB,aAAa,CAAA;AAMhD,2CAA2C;AAC3C,MAAM,WAAW,sBAAsB;IACrC;;;OAGG;IACH,IAAI,EAAE,QAAQ,CAAA;IACd;;;;;;OAMG;IACH,QAAQ,CAAC,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAA;IAC5C;;;;;OAKG;IACH,KAAK,CAAC,EAAE,MAAM,CAAA;IACd;;;;;;;;;OASG;IACH,aAAa,CAAC,EAAE,OAAO,GAAG,CAAC,CAAC,MAAM,EAAE,cAAc,KAAK,OAAO,CAAC,CAAA;IAC/D;;;;OAIG;IACH,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB;;;;OAIG;IACH,KAAK,CAAC,EAAE,qBAAqB,CAAA;CAC9B;AAED;;;;;GAKG;AACH,MAAM,WAAW,eAAgB,SAAQ,IAAI,CAAC,cAAc,EAAE,WAAW,EAAE,GAAG,MAAM,CAAC;IACnF,QAAQ,CAAC,CAAC,mBAAmB,CAAC,EAAE,IAAI,CAAA;IACpC,QAAQ,CAAC,UAAU,EAAE,qBAAqB,CAAA;IAC1C,OAAO,CAAC,KAAK,EAAE,cAAc,EAAE,GAAG,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,WAAW,EAAE,GAAG,MAAM,CAAC,CAAA;IACtF,QAAQ,IAAI,oBAAoB,CAAA;CACjC;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,sBAAsB,GAAG,eAAe,CAsE7E;AAiDD;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,OAAO,GAAG,CAAC,IAAI,eAAe,CAIlE;AAID,OAAO,EAAE,qBAAqB,EAAE,wBAAwB,EAAE,oBAAoB,EAAE,CAAA"}
@@ -0,0 +1,210 @@
1
+ /**
2
+ * `computerUseTool({ page })` — the agent-tool factory for #A7 Phase 2.
3
+ *
4
+ * Wraps the phase-1 {@link executeComputerAction} executor as a tool the
5
+ * agent loop can invoke. The tool is tagged so the Anthropic provider
6
+ * adapter substitutes the standard function-call schema with Anthropic's
7
+ * native `computer_20250124` tool block at the API level — Claude is
8
+ * fine-tuned on that exact tool, so quality is dramatically better than
9
+ * a generic function-call wrapper.
10
+ *
11
+ * # Anthropic-only in v1
12
+ *
13
+ * Pass `model` to fail loud at agent-construction time when the agent's
14
+ * model isn't Anthropic-family — see {@link ComputerUseProviderError}.
15
+ * Without `model`, validation is deferred (the Anthropic adapter is the
16
+ * only one that recognizes the provider hint, so non-Anthropic models
17
+ * silently see a no-arg generic tool — degraded but not catastrophic).
18
+ *
19
+ * # Wiring
20
+ *
21
+ * ```ts
22
+ * import { Agent } from '@rudderjs/ai'
23
+ * import { computerUseTool } from '@rudderjs/ai/computer-use'
24
+ * import { chromium } from 'playwright'
25
+ *
26
+ * const browser = await chromium.launch()
27
+ * const page = await browser.newPage()
28
+ * await page.setViewportSize({ width: 1280, height: 800 })
29
+ *
30
+ * class BrowserAgent extends Agent {
31
+ * model() { return 'anthropic/claude-opus-4-7' }
32
+ *
33
+ * tools() {
34
+ * return [
35
+ * computerUseTool({
36
+ * page,
37
+ * viewport: { width: 1280, height: 800 },
38
+ * model: this.model(), // upfront provider check
39
+ * }),
40
+ * ]
41
+ * }
42
+ * }
43
+ * ```
44
+ *
45
+ * # State
46
+ *
47
+ * Each `computerUseTool({...})` call captures a fresh
48
+ * {@link ComputerExecutorState} in its closure. Passing the same tool
49
+ * instance through multiple agent runs SHARES cursor state across them
50
+ * — usually fine, but call the factory inside `tools()` (which Agent
51
+ * runs per request) for clean per-run state.
52
+ *
53
+ * The same closure carries the action counter for {@link maxActions}.
54
+ *
55
+ * # Image results
56
+ *
57
+ * `screenshot` actions return PNG bytes. The tool's execute base64-
58
+ * encodes them and returns a `ContentPart[]` array with one image
59
+ * block — the Anthropic adapter's `toAnthropicMessages` handles array
60
+ * tool-message content directly (a generic enhancement, not
61
+ * computer-use-specific). Other providers see a JSON-stringified
62
+ * fallback; in practice they never get here because the tool throws at
63
+ * construction when bound to a non-Anthropic model.
64
+ */
65
+ import { z } from 'zod';
66
+ import { ComputerUseLimitError, ComputerUseProviderError, isAnthropicLikeModel, } from './errors.js';
67
+ import { makeExecutorState, } from './actions.js';
68
+ import { executeComputerAction } from './playwright.js';
69
+ /**
70
+ * Symbol-tagged marker identifying a computer-use tool. Looked up via
71
+ * `Symbol.for(...)` so cross-bundle / cross-realm checks succeed even
72
+ * when `@rudderjs/ai` is loaded twice (rare, but possible in monorepo +
73
+ * linked setups). Mirrors the `HANDOFF_MARKER` pattern.
74
+ */
75
+ export const COMPUTER_USE_MARKER = Symbol.for('rudderjs.ai.computer-use');
76
+ /**
77
+ * The fixed tool name. Anthropic's native `computer_20250124` tool
78
+ * expects calls to land on a tool literally named `computer` — the
79
+ * model is trained on that name. Apps don't override it.
80
+ */
81
+ export const COMPUTER_USE_TOOL_NAME = 'computer';
82
+ const DEFAULT_VIEWPORT = { width: 1280, height: 800 };
83
+ const DEFAULT_MAX_ACTIONS = 50;
84
+ const DEFAULT_NEEDS_APPROVAL = true;
85
+ /**
86
+ * Build the agent tool. See module JSDoc for usage.
87
+ */
88
+ export function computerUseTool(opts) {
89
+ // Upfront provider check — fail loud at agent construction.
90
+ if (opts.model !== undefined && !isAnthropicLikeModel(opts.model)) {
91
+ throw new ComputerUseProviderError(opts.model);
92
+ }
93
+ const viewport = opts.viewport ?? DEFAULT_VIEWPORT;
94
+ const maxActions = opts.maxActions ?? DEFAULT_MAX_ACTIONS;
95
+ const needsApproval = opts.needsApproval ?? DEFAULT_NEEDS_APPROVAL;
96
+ const state = opts.state ?? makeExecutorState();
97
+ const page = opts.page;
98
+ // Per-tool-instance counter. Closure-private so multiple tools (rare)
99
+ // don't collide.
100
+ const counter = { value: 0 };
101
+ // Build the needs-approval shape the framework's tool runner reads.
102
+ // ToolDefinitionOptions.needsApproval is `boolean | (input) => boolean | Promise<boolean>`.
103
+ const needsApprovalForDefinition = typeof needsApproval === 'function'
104
+ ? (input) => needsApproval(input)
105
+ : needsApproval;
106
+ const definition = {
107
+ name: COMPUTER_USE_TOOL_NAME,
108
+ description: 'Take screenshots, click, type, and otherwise drive a desktop / browser. ' +
109
+ 'Use to interact with on-screen UI you cannot reach via plain HTTP.',
110
+ // Anthropic's native tool block carries an implicit schema (the model
111
+ // is trained on it). The standard `parameters` we emit is irrelevant
112
+ // for Anthropic — the providerHint substitution drops it. `z.any()`
113
+ // is the conservative default for any non-Anthropic serialization
114
+ // that still tries to read the schema.
115
+ inputSchema: z.any(),
116
+ needsApproval: needsApprovalForDefinition,
117
+ // Carried through by `toolToSchema` → `toAnthropicTools` recognizes
118
+ // `providerHint.type === 'computer-use'` and emits the native
119
+ // `computer_20250124` block instead of a generic function-call shape.
120
+ providerHint: {
121
+ type: 'computer-use',
122
+ tool: 'computer_20250124',
123
+ display_width_px: viewport.width,
124
+ display_height_px: viewport.height,
125
+ },
126
+ };
127
+ const tool = {
128
+ [COMPUTER_USE_MARKER]: true,
129
+ definition,
130
+ async execute(input) {
131
+ counter.value++;
132
+ if (counter.value > maxActions) {
133
+ throw new ComputerUseLimitError(maxActions);
134
+ }
135
+ const result = await executeComputerAction(page, input, state);
136
+ return formatActionResult(result);
137
+ },
138
+ toSchema() {
139
+ return {
140
+ name: COMPUTER_USE_TOOL_NAME,
141
+ description: definition.description,
142
+ // Empty object — see comment in `definition.inputSchema` above.
143
+ parameters: { type: 'object', properties: {}, additionalProperties: true },
144
+ providerHint: definition.providerHint,
145
+ };
146
+ },
147
+ };
148
+ return tool;
149
+ }
150
+ /**
151
+ * Convert a {@link ComputerActionResult} into the message-content shape
152
+ * the agent loop stores on the tool message and passes to the provider.
153
+ *
154
+ * - `image` → `ContentPart[]` with one image block. The Anthropic
155
+ * adapter's tool-message handler emits this as Anthropic's `content:
156
+ * [{ type: 'image', source: { ... } }]` shape.
157
+ * - `text` → plain string (current adapter path: `content: <string>`).
158
+ * - `error` → throw. The agent loop's error path wraps the throw into a
159
+ * tool-result with `is_error: true` and the error message — exactly
160
+ * the Anthropic semantics we want for "the action failed; let the
161
+ * model retry."
162
+ */
163
+ function formatActionResult(result) {
164
+ if (result.type === 'image') {
165
+ const data = bytesToBase64(result.data);
166
+ return [
167
+ {
168
+ type: 'image',
169
+ mimeType: result.media_type,
170
+ data,
171
+ },
172
+ ];
173
+ }
174
+ if (result.type === 'text') {
175
+ return result.text;
176
+ }
177
+ // result.type === 'error' — throw so the agent loop wraps as is_error.
178
+ throw new Error(result.text);
179
+ }
180
+ /**
181
+ * Encode raw bytes as a base64 string. Uses `Buffer` when available
182
+ * (Node) and falls back to a synchronous browser-safe path otherwise.
183
+ * Computer-use only runs in Node (Playwright requires it), but the
184
+ * fallback keeps the module importable from runtime-agnostic tests.
185
+ */
186
+ function bytesToBase64(data) {
187
+ if (typeof Buffer !== 'undefined') {
188
+ return Buffer.from(data).toString('base64');
189
+ }
190
+ // Browser fallback — slow but correct.
191
+ let binary = '';
192
+ for (let i = 0; i < data.length; i++)
193
+ binary += String.fromCharCode(data[i]);
194
+ return btoa(binary);
195
+ }
196
+ /**
197
+ * Structural typeguard. Mirrors {@link isHandoffTool} — handoff /
198
+ * computer-use tools are plain objects tagged with their respective
199
+ * `Symbol.for(...)` markers, so the loop and adapters can detect them
200
+ * without coupling to a class hierarchy.
201
+ */
202
+ export function isComputerUseTool(t) {
203
+ if (t === null || typeof t !== 'object')
204
+ return false;
205
+ const marker = t[COMPUTER_USE_MARKER];
206
+ return marker === true;
207
+ }
208
+ // ─── Re-export error classes + helper ─────────────────────
209
+ export { ComputerUseLimitError, ComputerUseProviderError, isAnthropicLikeModel };
210
+ //# sourceMappingURL=tool.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tool.js","sourceRoot":"","sources":["../../src/computer-use/tool.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+DG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AAIvB,OAAO,EACL,qBAAqB,EACrB,wBAAwB,EACxB,oBAAoB,GACrB,MAAM,aAAa,CAAA;AACpB,OAAO,EACL,iBAAiB,GAKlB,MAAM,cAAc,CAAA;AACrB,OAAO,EAAE,qBAAqB,EAAE,MAAM,iBAAiB,CAAA;AAEvD;;;;;GAKG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAkB,MAAM,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAA;AAExF;;;;GAIG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAG,UAAU,CAAA;AAEhD,MAAM,gBAAgB,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAW,CAAA;AAC9D,MAAM,mBAAmB,GAAG,EAAE,CAAA;AAC9B,MAAM,sBAAsB,GAAoD,IAAI,CAAA;AA8DpF;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,IAA4B;IAC1D,4DAA4D;IAC5D,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAClE,MAAM,IAAI,wBAAwB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IAChD,CAAC;IAED,MAAM,QAAQ,GAAS,IAAI,CAAC,QAAQ,IAAS,gBAAgB,CAAA;IAC7D,MAAM,UAAU,GAAO,IAAI,CAAC,UAAU,IAAO,mBAAmB,CAAA;IAChE,MAAM,aAAa,GAAI,IAAI,CAAC,aAAa,IAAI,sBAAsB,CAAA;IACnE,MAAM,KAAK,GAAY,IAAI,CAAC,KAAK,IAAY,iBAAiB,EAAE,CAAA;IAChE,MAAM,IAAI,GAAa,IAAI,CAAC,IAAI,CAAA;IAEhC,sEAAsE;IACtE,iBAAiB;IACjB,MAAM,OAAO,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,CAAA;IAE5B,oEAAoE;IACpE,4FAA4F;IAC5F,MAAM,0BAA0B,GAC9B,OAAO,aAAa,KAAK,UAAU;QACjC,CAAC,CAAC,CAAC,KAAK,EAAE,EAAE,CAAE,aAAgD,CAAC,KAAK,CAAC;QACrE,CAAC,CAAC,aAAa,CAAA;IAEnB,MAAM,UAAU,GAA0B;QACxC,IAAI,EAAS,sBAAsB;QACnC,WAAW,EACT,0EAA0E;YAC1E,oEAAoE;QACtE,sEAAsE;QACtE,qEAAqE;QACrE,oEAAoE;QACpE,kEAAkE;QAClE,uCAAuC;QACvC,WAAW,EAAI,CAAC,CAAC,GAAG,EAAE;QACtB,aAAa,EAAE,0BAAoE;QACnF,oEAAoE;QACpE,8DAA8D;QAC9D,sEAAsE;QACtE,YAAY,EAAE;YACZ,IAAI,EAAe,cAAc;YACjC,IAAI,EAAe,mBAAmB;YACtC,gBAAgB,EAAG,QAAQ,CAAC,KAAK;YACjC,iBAAiB,EAAE,QAAQ,CAAC,MAAM;SACnC;KACF,CAAA;IAED,MAAM,IAAI,GAAoB;QAC5B,CAAC,mBAAmB,CAAC,EAAE,IAAI;QAC3B,UAAU;QACV,KAAK,CAAC,OAAO,CAAC,KAAqB;YACjC,OAAO,CAAC,KAAK,EAAE,CAAA;YACf,IAAI,OAAO,CAAC,KAAK,GAAG,UAAU,EAAE,CAAC;gBAC/B,MAAM,IAAI,qBAAqB,CAAC,UAAU,CAAC,CAAA;YAC7C,CAAC;YAED,MAAM,MAAM,GAAG,MAAM,qBAAqB,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,CAAA;YAC9D,OAAO,kBAAkB,CAAC,MAAM,CAAC,CAAA;QACnC,CAAC;QACD,QAAQ;YACN,OAAO;gBACL,IAAI,EAAS,sBAAsB;gBACnC,WAAW,EAAE,UAAU,CAAC,WAAW;gBACnC,gEAAgE;gBAChE,UAAU,EAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,EAAE,EAAE,EAAE,oBAAoB,EAAE,IAAI,EAAE;gBAC3E,YAAY,EAAE,UAAU,CAAC,YAAa;aACvC,CAAA;QACH,CAAC;KACF,CAAA;IAED,OAAO,IAAI,CAAA;AACb,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,SAAS,kBAAkB,CAAC,MAA4B;IACtD,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvC,OAAO;YACL;gBACE,IAAI,EAAM,OAAO;gBACjB,QAAQ,EAAE,MAAM,CAAC,UAAU;gBAC3B,IAAI;aACL;SACF,CAAA;IACH,CAAC;IACD,IAAI,MAAM,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QAC3B,OAAO,MAAM,CAAC,IAAI,CAAA;IACpB,CAAC;IACD,uEAAuE;IACvE,MAAM,IAAI,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;AAC9B,CAAC;AAED;;;;;GAKG;AACH,SAAS,aAAa,CAAC,IAAgB;IACrC,IAAI,OAAO,MAAM,KAAK,WAAW,EAAE,CAAC;QAClC,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAA;IAC7C,CAAC;IACD,uCAAuC;IACvC,IAAI,MAAM,GAAG,EAAE,CAAA;IACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,MAAM,IAAI,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAW,CAAC,CAAA;IACtF,OAAO,IAAI,CAAC,MAAM,CAAC,CAAA;AACrB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAAC,CAAU;IAC1C,IAAI,CAAC,KAAK,IAAI,IAAI,OAAO,CAAC,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAA;IACrD,MAAM,MAAM,GAAI,CAAsC,CAAC,mBAAmB,CAAC,CAAA;IAC3E,OAAO,MAAM,KAAK,IAAI,CAAA;AACxB,CAAC;AAED,6DAA6D;AAE7D,OAAO,EAAE,qBAAqB,EAAE,wBAAwB,EAAE,oBAAoB,EAAE,CAAA"}
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Fixture I/O for `pnpm rudder ai:eval --record` / `--replay` (#A5
3
+ * Phase 4). Each case writes one JSON file under
4
+ * `evals/__fixtures__/<suite>/<case>.json` carrying the assistant
5
+ * turns from a real provider run, normalized into the
6
+ * {@link AiFakeStep} shape so `--replay` can re-feed them via
7
+ * `AiFake.respondWithSequence` for zero-API regression tests.
8
+ *
9
+ * The fixture format is versioned. Bumping `version` forces a
10
+ * re-record on stale fixtures rather than silently mis-replaying.
11
+ */
12
+ import type { AgentResponse } from '../types.js';
13
+ import type { AiFakeStep } from '../fake.js';
14
+ /** Fixture format. Bump `version` when the shape changes incompatibly. */
15
+ export interface EvalFixture {
16
+ version: 1;
17
+ suite: string;
18
+ case: string;
19
+ input: string;
20
+ recordedAt: string;
21
+ steps: AiFakeStep[];
22
+ }
23
+ /**
24
+ * Convert an `AgentResponse` into the assistant-turn `AiFakeStep[]`
25
+ * sequence that `AiFake.respondWithSequence` expects.
26
+ *
27
+ * - Drops user/tool turns — those are framework-generated during a
28
+ * replayed run, not provider output.
29
+ * - Multi-modal assistant content collapses to its concatenated text
30
+ * parts (the fake's transport is text-only; image/document parts
31
+ * wouldn't replay meaningfully).
32
+ * - `toolCalls` carry through verbatim so multi-step tool loops
33
+ * replay deterministically.
34
+ */
35
+ export declare function stepsFromResponse(response: AgentResponse): AiFakeStep[];
36
+ /**
37
+ * Default fixtures directory: `<cwd>/evals/__fixtures__`. Override
38
+ * via the CLI handler's options for tests / non-standard layouts.
39
+ */
40
+ export declare function defaultFixturesDir(cwd: string): string;
41
+ /**
42
+ * Filesystem-safe slug for `<suite>/<case>` segments. Letters,
43
+ * digits, dot, dash, underscore pass through; everything else
44
+ * collapses to `-`. Multiple consecutive `-` collapse to one.
45
+ *
46
+ * Pure function; tested directly so suite/case rename diffs stay
47
+ * predictable across editors.
48
+ */
49
+ export declare function slugify(s: string): string;
50
+ export declare function fixturePath(dir: string, suite: string, caseName: string): string;
51
+ /**
52
+ * Read a fixture file. Returns `null` when the fixture is missing
53
+ * (replay falls back to running normally with a clear stderr line).
54
+ *
55
+ * Throws on parse / version errors — corruption is not a passing
56
+ * case and silently ignoring it would mask real regressions.
57
+ */
58
+ export declare function readFixture(dir: string, suite: string, caseName: string): Promise<EvalFixture | null>;
59
+ /**
60
+ * Write a fixture, creating intermediate directories as needed.
61
+ * Pretty-printed (2-space) so PR diffs remain readable when the
62
+ * model output evolves.
63
+ */
64
+ export declare function writeFixture(dir: string, suite: string, caseName: string, payload: Omit<EvalFixture, 'version' | 'suite' | 'case' | 'recordedAt'>): Promise<string>;
65
+ //# sourceMappingURL=fixtures.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fixtures.d.ts","sourceRoot":"","sources":["../../src/eval/fixtures.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAIH,OAAO,KAAK,EAAE,aAAa,EAAe,MAAM,aAAa,CAAA;AAC7D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAA;AAE5C,0EAA0E;AAC1E,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAK,CAAC,CAAA;IACb,KAAK,EAAO,MAAM,CAAA;IAClB,IAAI,EAAQ,MAAM,CAAA;IAClB,KAAK,EAAO,MAAM,CAAA;IAClB,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAO,UAAU,EAAE,CAAA;CACzB;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,aAAa,GAAG,UAAU,EAAE,CAWvE;AASD;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAEtD;AAED;;;;;;;GAOG;AACH,wBAAgB,OAAO,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAEzC;AAED,wBAAgB,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAEhF;AAID;;;;;;GAMG;AACH,wBAAsB,WAAW,CAC/B,GAAG,EAAO,MAAM,EAChB,KAAK,EAAK,MAAM,EAChB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAiB7B;AAED;;;;GAIG;AACH,wBAAsB,YAAY,CAChC,GAAG,EAAO,MAAM,EAChB,KAAK,EAAK,MAAM,EAChB,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAG,IAAI,CAAC,WAAW,EAAE,SAAS,GAAG,OAAO,GAAG,MAAM,GAAG,YAAY,CAAC,GACvE,OAAO,CAAC,MAAM,CAAC,CAYjB"}
@@ -0,0 +1,110 @@
1
+ /**
2
+ * Fixture I/O for `pnpm rudder ai:eval --record` / `--replay` (#A5
3
+ * Phase 4). Each case writes one JSON file under
4
+ * `evals/__fixtures__/<suite>/<case>.json` carrying the assistant
5
+ * turns from a real provider run, normalized into the
6
+ * {@link AiFakeStep} shape so `--replay` can re-feed them via
7
+ * `AiFake.respondWithSequence` for zero-API regression tests.
8
+ *
9
+ * The fixture format is versioned. Bumping `version` forces a
10
+ * re-record on stale fixtures rather than silently mis-replaying.
11
+ */
12
+ import { mkdir, readFile, writeFile } from 'node:fs/promises';
13
+ import path from 'node:path';
14
+ /**
15
+ * Convert an `AgentResponse` into the assistant-turn `AiFakeStep[]`
16
+ * sequence that `AiFake.respondWithSequence` expects.
17
+ *
18
+ * - Drops user/tool turns — those are framework-generated during a
19
+ * replayed run, not provider output.
20
+ * - Multi-modal assistant content collapses to its concatenated text
21
+ * parts (the fake's transport is text-only; image/document parts
22
+ * wouldn't replay meaningfully).
23
+ * - `toolCalls` carry through verbatim so multi-step tool loops
24
+ * replay deterministically.
25
+ */
26
+ export function stepsFromResponse(response) {
27
+ return response.steps
28
+ .filter(step => step.message.role === 'assistant')
29
+ .map(step => {
30
+ const out = {
31
+ text: contentToText(step.message.content),
32
+ finishReason: step.finishReason,
33
+ };
34
+ if (step.toolCalls.length > 0)
35
+ out.toolCalls = step.toolCalls;
36
+ return out;
37
+ });
38
+ }
39
+ function contentToText(content) {
40
+ if (typeof content === 'string')
41
+ return content;
42
+ return content.filter(p => p.type === 'text').map(p => p.text).join('');
43
+ }
44
+ // ─── Fixture path conventions ─────────────────────────────
45
+ /**
46
+ * Default fixtures directory: `<cwd>/evals/__fixtures__`. Override
47
+ * via the CLI handler's options for tests / non-standard layouts.
48
+ */
49
+ export function defaultFixturesDir(cwd) {
50
+ return path.join(cwd, 'evals', '__fixtures__');
51
+ }
52
+ /**
53
+ * Filesystem-safe slug for `<suite>/<case>` segments. Letters,
54
+ * digits, dot, dash, underscore pass through; everything else
55
+ * collapses to `-`. Multiple consecutive `-` collapse to one.
56
+ *
57
+ * Pure function; tested directly so suite/case rename diffs stay
58
+ * predictable across editors.
59
+ */
60
+ export function slugify(s) {
61
+ return s.replace(/[^A-Za-z0-9._-]+/g, '-').replace(/^-+|-+$/g, '') || '_';
62
+ }
63
+ export function fixturePath(dir, suite, caseName) {
64
+ return path.join(dir, slugify(suite), `${slugify(caseName)}.json`);
65
+ }
66
+ // ─── Read / write ─────────────────────────────────────────
67
+ /**
68
+ * Read a fixture file. Returns `null` when the fixture is missing
69
+ * (replay falls back to running normally with a clear stderr line).
70
+ *
71
+ * Throws on parse / version errors — corruption is not a passing
72
+ * case and silently ignoring it would mask real regressions.
73
+ */
74
+ export async function readFixture(dir, suite, caseName) {
75
+ const file = fixturePath(dir, suite, caseName);
76
+ let raw;
77
+ try {
78
+ raw = await readFile(file, 'utf8');
79
+ }
80
+ catch (err) {
81
+ if (err.code === 'ENOENT')
82
+ return null;
83
+ throw err;
84
+ }
85
+ const parsed = JSON.parse(raw);
86
+ if (parsed.version !== 1) {
87
+ throw new Error(`[RudderJS AI] Fixture ${file} is version ${String(parsed.version)}; expected 1. ` +
88
+ `Re-record with \`pnpm rudder ai:eval --record\`.`);
89
+ }
90
+ return parsed;
91
+ }
92
+ /**
93
+ * Write a fixture, creating intermediate directories as needed.
94
+ * Pretty-printed (2-space) so PR diffs remain readable when the
95
+ * model output evolves.
96
+ */
97
+ export async function writeFixture(dir, suite, caseName, payload) {
98
+ const file = fixturePath(dir, suite, caseName);
99
+ await mkdir(path.dirname(file), { recursive: true });
100
+ const fixture = {
101
+ version: 1,
102
+ suite,
103
+ case: caseName,
104
+ recordedAt: new Date().toISOString(),
105
+ ...payload,
106
+ };
107
+ await writeFile(file, `${JSON.stringify(fixture, null, 2)}\n`);
108
+ return file;
109
+ }
110
+ //# sourceMappingURL=fixtures.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fixtures.js","sourceRoot":"","sources":["../../src/eval/fixtures.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AAC7D,OAAO,IAAI,MAAM,WAAW,CAAA;AAc5B;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAuB;IACvD,OAAO,QAAQ,CAAC,KAAK;SAClB,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,KAAK,WAAW,CAAC;SACjD,GAAG,CAAC,IAAI,CAAC,EAAE;QACV,MAAM,GAAG,GAAe;YACtB,IAAI,EAAU,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC;YACjD,YAAY,EAAE,IAAI,CAAC,YAAY;SAChC,CAAA;QACD,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC;YAAE,GAAG,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAA;QAC7D,OAAO,GAAG,CAAA;IACZ,CAAC,CAAC,CAAA;AACN,CAAC;AAED,SAAS,aAAa,CAAC,OAA+B;IACpD,IAAI,OAAO,OAAO,KAAK,QAAQ;QAAE,OAAO,OAAO,CAAA;IAC/C,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;AACzE,CAAC;AAED,6DAA6D;AAE7D;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,GAAW;IAC5C,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,cAAc,CAAC,CAAA;AAChD,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,OAAO,CAAC,CAAS;IAC/B,OAAO,CAAC,CAAC,OAAO,CAAC,mBAAmB,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,IAAI,GAAG,CAAA;AAC3E,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,GAAW,EAAE,KAAa,EAAE,QAAgB;IACtE,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,KAAK,CAAC,EAAE,GAAG,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAA;AACpE,CAAC;AAED,6DAA6D;AAE7D;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,GAAgB,EAChB,KAAgB,EAChB,QAAgB;IAEhB,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAA;IAC9C,IAAI,GAAW,CAAA;IACf,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACpC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAK,GAA6B,CAAC,IAAI,KAAK,QAAQ;YAAE,OAAO,IAAI,CAAA;QACjE,MAAM,GAAG,CAAA;IACX,CAAC;IACD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAgB,CAAA;IAC7C,IAAI,MAAM,CAAC,OAAO,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CACb,yBAAyB,IAAI,eAAe,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,gBAAgB;YAClF,kDAAkD,CACnD,CAAA;IACH,CAAC;IACD,OAAO,MAAM,CAAA;AACf,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,GAAgB,EAChB,KAAgB,EAChB,QAAgB,EAChB,OAAwE;IAExE,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAA;IAC9C,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IACpD,MAAM,OAAO,GAAgB;QAC3B,OAAO,EAAK,CAAC;QACb,KAAK;QACL,IAAI,EAAQ,QAAQ;QACpB,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACpC,GAAG,OAAO;KACX,CAAA;IACD,MAAM,SAAS,CAAC,IAAI,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAA;IAC9D,OAAO,IAAI,CAAA;AACb,CAAC"}
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Self-contained HTML reporter for `pnpm rudder ai:eval --html`
3
+ * (#A5 Phase 5). Renders one or more {@link SuiteReport}s into a
4
+ * single HTML string with inline styles and minimal vanilla JS for
5
+ * row expand/collapse — no framework deps, no external assets, safe
6
+ * to paste into a PR comment or open offline.
7
+ *
8
+ * Defensive HTML-escape on every piece of user content (suite name,
9
+ * case name, input, response, metadata). Long responses get a
10
+ * `<pre>` block with `white-space: pre-wrap` so output stays
11
+ * scannable without a horizontal scroll.
12
+ */
13
+ import type { SuiteReport } from './index.js';
14
+ export interface HtmlReportOptions {
15
+ /** Document `<title>`. Defaults to `"Eval Report"`. */
16
+ title?: string;
17
+ /** ISO timestamp shown in the header. Defaults to `new Date().toISOString()`. */
18
+ generatedAt?: string;
19
+ }
20
+ /**
21
+ * Render an array of {@link SuiteReport}s as a single self-contained
22
+ * HTML document.
23
+ */
24
+ export declare function reportHtml(reports: SuiteReport[], opts?: HtmlReportOptions): string;
25
+ //# sourceMappingURL=html-reporter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-reporter.d.ts","sourceRoot":"","sources":["../../src/eval/html-reporter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAgB,MAAM,YAAY,CAAA;AAE3D,MAAM,WAAW,iBAAiB;IAChC,uDAAuD;IACvD,KAAK,CAAC,EAAQ,MAAM,CAAA;IACpB,iFAAiF;IACjF,WAAW,CAAC,EAAE,MAAM,CAAA;CACrB;AAED;;;GAGG;AACH,wBAAgB,UAAU,CAAC,OAAO,EAAE,WAAW,EAAE,EAAE,IAAI,GAAE,iBAAsB,GAAG,MAAM,CA6CvF"}