@orq-ai/evaluatorq 1.2.0-rc.2 → 1.2.0-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +50 -0
  2. package/dist/index.d.ts +1 -0
  3. package/dist/index.d.ts.map +1 -1
  4. package/dist/index.js +2 -0
  5. package/dist/lib/integrations/ai-sdk/convert.d.ts +22 -0
  6. package/dist/lib/integrations/ai-sdk/convert.d.ts.map +1 -0
  7. package/dist/lib/integrations/ai-sdk/convert.js +230 -0
  8. package/dist/lib/integrations/ai-sdk/index.d.ts +5 -0
  9. package/dist/lib/integrations/ai-sdk/index.d.ts.map +1 -0
  10. package/dist/lib/integrations/ai-sdk/index.js +4 -0
  11. package/dist/lib/integrations/ai-sdk/types.d.ts +56 -0
  12. package/dist/lib/integrations/ai-sdk/types.d.ts.map +1 -0
  13. package/dist/lib/integrations/ai-sdk/types.js +1 -0
  14. package/dist/lib/integrations/ai-sdk/wrap-agent.d.ts +58 -0
  15. package/dist/lib/integrations/ai-sdk/wrap-agent.d.ts.map +1 -0
  16. package/dist/lib/integrations/ai-sdk/wrap-agent.js +68 -0
  17. package/dist/lib/integrations/common/index.d.ts +5 -0
  18. package/dist/lib/integrations/common/index.d.ts.map +1 -0
  19. package/dist/lib/integrations/common/index.js +4 -0
  20. package/dist/lib/integrations/common/utils.d.ts +37 -0
  21. package/dist/lib/integrations/common/utils.d.ts.map +1 -0
  22. package/dist/lib/integrations/common/utils.js +73 -0
  23. package/dist/lib/integrations/langchain/convert.d.ts +22 -0
  24. package/dist/lib/integrations/langchain/convert.d.ts.map +1 -0
  25. package/dist/lib/integrations/langchain/convert.js +353 -0
  26. package/dist/lib/integrations/langchain/index.d.ts +27 -0
  27. package/dist/lib/integrations/langchain/index.d.ts.map +1 -0
  28. package/dist/lib/integrations/langchain/index.js +25 -0
  29. package/dist/lib/integrations/langchain/types.d.ts +132 -0
  30. package/dist/lib/integrations/langchain/types.d.ts.map +1 -0
  31. package/dist/lib/integrations/langchain/types.js +4 -0
  32. package/dist/lib/integrations/langchain/wrap-agent.d.ts +70 -0
  33. package/dist/lib/integrations/langchain/wrap-agent.d.ts.map +1 -0
  34. package/dist/lib/integrations/langchain/wrap-agent.js +179 -0
  35. package/dist/lib/integrations/openresponses/index.d.ts +236 -0
  36. package/dist/lib/integrations/openresponses/index.d.ts.map +1 -0
  37. package/dist/lib/integrations/openresponses/index.js +31 -0
  38. package/dist/lib/types.d.ts +2 -1
  39. package/dist/lib/types.d.ts.map +1 -1
  40. package/dist/tsconfig.lib.tsbuildinfo +1 -1
  41. package/package.json +37 -4
package/README.md CHANGED
@@ -10,6 +10,7 @@ An evaluation framework library that provides a flexible way to run parallel eva
10
10
  - **Orq Platform Integration**: Seamlessly fetch and evaluate datasets from Orq AI (optional)
11
11
  - **OpenTelemetry Tracing**: Built-in observability with automatic span creation for jobs and evaluators
12
12
  - **Pass/Fail Tracking**: Evaluators can return pass/fail status for CI/CD integration
13
+ - **Integrations**: LangChain, LangGraph, and Vercel AI SDK agent integration
13
14
 
14
15
  ## 📥 Installation
15
16
 
@@ -35,6 +36,18 @@ For OpenTelemetry tracing (optional):
35
36
  npm install @opentelemetry/api @opentelemetry/sdk-node @opentelemetry/sdk-trace-base @opentelemetry/exporter-trace-otlp-http @opentelemetry/resources @opentelemetry/semantic-conventions
36
37
  ```
37
38
 
39
+ For LangChain/LangGraph integration:
40
+
41
+ ```bash
42
+ npm install langchain @langchain/core @langchain/langgraph
43
+ ```
44
+
45
+ For Vercel AI SDK integration:
46
+
47
+ ```bash
48
+ npm install ai
49
+ ```
50
+
38
51
  ## 🚀 Quick Start
39
52
 
40
53
  ### Basic Usage
@@ -221,6 +234,38 @@ const conversationJob = job("assistant", async (data) => {
221
234
 
222
235
  The `invoke()` function returns the text content directly, while `deployment()` returns an object with both `content` and `raw` response for more control.
223
236
 
237
+ ## 🔗 LangChain Integration
238
+
239
+ Evaluatorq provides integration with LangChain and LangGraph agents, converting their outputs to the OpenResponses format for standardized evaluation.
240
+
241
+ The LangChain integration allows you to:
242
+ - Wrap LangChain agents created with `createAgent()` for use in evaluatorq jobs
243
+ - Wrap LangGraph compiled graphs for stateful agent evaluation
244
+ - Automatically convert agent outputs to OpenResponses format
245
+ - Evaluate agent behavior using standard evaluatorq evaluators
246
+
247
+ ### Examples
248
+
249
+ Complete examples are available in the examples folder:
250
+
251
+ - **LangChain Agent**: [`examples/src/lib/integrations/langchain-agent-eval.ts`](../../examples/src/lib/integrations/langchain-agent-eval.ts)
252
+ - **LangGraph Agent**: [`examples/src/lib/integrations/langgraph-agent-eval.ts`](../../examples/src/lib/integrations/langgraph-agent-eval.ts)
253
+
254
+ ## 🤖 Vercel AI SDK Integration
255
+
256
+ Evaluatorq integrates with the Vercel AI SDK, allowing you to wrap AI SDK agents and evaluate them using the standard evaluatorq framework.
257
+
258
+ The Vercel AI SDK integration allows you to:
259
+ - Wrap Vercel AI SDK `ToolLoopAgent` instances for use in evaluatorq jobs
260
+ - Automatically convert agent outputs to OpenResponses format
261
+ - Evaluate agent behavior using standard evaluatorq evaluators
262
+
263
+ ### Examples
264
+
265
+ Complete examples are available in the examples folder:
266
+
267
+ - **Vercel AI SDK Agent**: [`examples/src/lib/integrations/vercel_ai_sdk_integration_example.ts`](../../examples/src/lib/integrations/vercel_ai_sdk_integration_example.ts)
268
+
224
269
  ## 🔧 Configuration
225
270
 
226
271
  ### Environment Variables
@@ -412,6 +457,11 @@ type Scorer = (
412
457
  EvaluationResult<string | number | boolean | EvaluationResultCell>
413
458
  >;
414
459
 
460
+ // Integration wrappers
461
+ import { wrapLangChainAgent, wrapLangGraphAgent } from "@orq-ai/evaluatorq/langchain";
462
+ import { wrapAISdkAgent } from "@orq-ai/evaluatorq/ai-sdk";
463
+ import type { ResponseResource } from "@orq-ai/evaluatorq/openresponses";
464
+
415
465
  // Deployment helper types
416
466
  interface DeploymentOptions {
417
467
  inputs?: Record<string, unknown>;
package/dist/index.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  export { type DeploymentOptions, type DeploymentResponse, deployment, invoke, } from "./lib/deployment-helper.js";
2
2
  export * from "./lib/evaluatorq.js";
3
+ export * from "./lib/integrations/openresponses/index.js";
3
4
  export { job } from "./lib/job-helper.js";
4
5
  export { sendResultsToOrqEffect } from "./lib/send-results.js";
5
6
  export { displayResultsTableEffect } from "./lib/table-display.js";
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,iBAAiB,EACtB,KAAK,kBAAkB,EACvB,UAAU,EACV,MAAM,GACP,MAAM,4BAA4B,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,OAAO,EAAE,GAAG,EAAE,MAAM,qBAAqB,CAAC;AAC1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AAEnE,OAAO,EACL,mBAAmB,EACnB,gBAAgB,EAChB,eAAe,EACf,KAAK,cAAc,GACpB,MAAM,wBAAwB,CAAC;AAChC,cAAc,gBAAgB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,iBAAiB,EACtB,KAAK,kBAAkB,EACvB,UAAU,EACV,MAAM,GACP,MAAM,4BAA4B,CAAC;AACpC,cAAc,qBAAqB,CAAC;AAEpC,cAAc,2CAA2C,CAAC;AAC1D,OAAO,EAAE,GAAG,EAAE,MAAM,qBAAqB,CAAC;AAC1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AAEnE,OAAO,EACL,mBAAmB,EACnB,gBAAgB,EAChB,eAAe,EACf,KAAK,cAAc,GACpB,MAAM,wBAAwB,CAAC;AAChC,cAAc,gBAAgB,CAAC"}
package/dist/index.js CHANGED
@@ -1,5 +1,7 @@
1
1
  export { deployment, invoke, } from "./lib/deployment-helper.js";
2
2
  export * from "./lib/evaluatorq.js";
3
+ // OpenResponses types
4
+ export * from "./lib/integrations/openresponses/index.js";
3
5
  export { job } from "./lib/job-helper.js";
4
6
  export { sendResultsToOrqEffect } from "./lib/send-results.js";
5
7
  export { displayResultsTableEffect } from "./lib/table-display.js";
@@ -0,0 +1,22 @@
1
+ import type { Agent, ToolSet } from "ai";
2
+ import type { ResponseResource } from "../openresponses/index.js";
3
+ /**
4
+ * Builds the input array for OpenResponses format.
5
+ *
6
+ * Resolves item_reference entries to actual function_call items using data from steps.
7
+ * The input should contain:
8
+ * - User message
9
+ * - All function calls made during the conversation
10
+ * - All function call outputs returned
11
+ */
12
+ export declare function buildInputFromSteps<TOOLS extends ToolSet>(result: Awaited<ReturnType<Agent<never, TOOLS, never>["generate"]>>, prompt: string | undefined): unknown[];
13
+ /**
14
+ * Converts Vercel AI SDK agent result to OpenResponses format.
15
+ */
16
+ export declare function convertToOpenResponses<TOOLS extends ToolSet>(result: Awaited<ReturnType<Agent<never, TOOLS, never>["generate"]>>, agent: Agent<never, TOOLS, never>, prompt?: string): ResponseResource;
17
+ /**
18
+ * Fallback function to manually build OpenResponses format from steps.
19
+ * Used when the provider doesn't return native OpenResponses format.
20
+ */
21
+ export declare function buildOpenResponsesFromSteps<TOOLS extends ToolSet>(result: Awaited<ReturnType<Agent<never, TOOLS, never>["generate"]>>, agent: Agent<never, TOOLS, never>, _prompt?: string): ResponseResource;
22
+ //# sourceMappingURL=convert.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"convert.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/ai-sdk/convert.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAc,OAAO,EAAE,MAAM,IAAI,CAAC;AAOrD,OAAO,KAAK,EAMV,gBAAgB,EAEjB,MAAM,2BAA2B,CAAC;AASnC;;;;;;;;GAQG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,SAAS,OAAO,EACvD,MAAM,EAAE,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EACnE,MAAM,EAAE,MAAM,GAAG,SAAS,GACzB,OAAO,EAAE,CA6EX;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,SAAS,OAAO,EAC1D,MAAM,EAAE,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EACnE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,EACjC,MAAM,CAAC,EAAE,MAAM,GACd,gBAAgB,CAElB;AAwCD;;;GAGG;AACH,wBAAgB,2BAA2B,CAAC,KAAK,SAAS,OAAO,EAC/D,MAAM,EAAE,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EACnE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,EACjC,OAAO,CAAC,EAAE,MAAM,GACf,gBAAgB,CA4JlB"}
@@ -0,0 +1,230 @@
1
+ import { generateItemId, getResponseStatus, serializeArgs, } from "../common/index.js";
2
+ /**
3
+ * Builds the input array for OpenResponses format.
4
+ *
5
+ * Resolves item_reference entries to actual function_call items using data from steps.
6
+ * The input should contain:
7
+ * - User message
8
+ * - All function calls made during the conversation
9
+ * - All function call outputs returned
10
+ */
11
+ export function buildInputFromSteps(result, prompt) {
12
+ const input = [];
13
+ // Add the initial user message
14
+ if (prompt) {
15
+ input.push({
16
+ type: "message",
17
+ id: generateItemId("msg"),
18
+ status: "completed",
19
+ role: "user",
20
+ content: [
21
+ {
22
+ type: "input_text",
23
+ text: prompt,
24
+ },
25
+ ],
26
+ });
27
+ }
28
+ // Collect all function calls and outputs from steps
29
+ for (const step of result.steps) {
30
+ const stepData = step;
31
+ // Primary: Extract from content array (ToolLoopAgent format)
32
+ if (stepData.content && stepData.content.length > 0) {
33
+ for (const item of stepData.content) {
34
+ if (item.type === "tool-call" && item.toolCallId && item.toolName) {
35
+ input.push({
36
+ type: "function_call",
37
+ id: generateItemId("fc"),
38
+ call_id: item.toolCallId,
39
+ name: item.toolName,
40
+ arguments: serializeArgs(item.input),
41
+ status: "completed",
42
+ });
43
+ }
44
+ if (item.type === "tool-result" && item.toolCallId) {
45
+ input.push({
46
+ type: "function_call_output",
47
+ id: generateItemId("fco"),
48
+ call_id: item.toolCallId,
49
+ output: serializeArgs(item.output),
50
+ status: "completed",
51
+ });
52
+ }
53
+ }
54
+ }
55
+ // Fallback: Extract from toolCalls/toolResults arrays (alternative AI SDK format)
56
+ else {
57
+ if (stepData.toolCalls && stepData.toolCalls.length > 0) {
58
+ for (const toolCall of stepData.toolCalls) {
59
+ input.push({
60
+ type: "function_call",
61
+ id: generateItemId("fc"),
62
+ call_id: toolCall.toolCallId,
63
+ name: toolCall.toolName,
64
+ arguments: serializeArgs(toolCall.input),
65
+ status: "completed",
66
+ });
67
+ }
68
+ }
69
+ if (stepData.toolResults && stepData.toolResults.length > 0) {
70
+ for (const toolResult of stepData.toolResults) {
71
+ input.push({
72
+ type: "function_call_output",
73
+ id: generateItemId("fco"),
74
+ call_id: toolResult.toolCallId,
75
+ output: serializeArgs(toolResult.output),
76
+ status: "completed",
77
+ });
78
+ }
79
+ }
80
+ }
81
+ }
82
+ return input;
83
+ }
84
+ /**
85
+ * Converts Vercel AI SDK agent result to OpenResponses format.
86
+ */
87
+ export function convertToOpenResponses(result, agent, prompt) {
88
+ return buildOpenResponsesFromSteps(result, agent, prompt);
89
+ }
90
+ /**
91
+ * Fallback function to manually build OpenResponses format from steps.
92
+ * Used when the provider doesn't return native OpenResponses format.
93
+ */
94
+ export function buildOpenResponsesFromSteps(result, agent, _prompt) {
95
+ const now = Math.floor(Date.now() / 1000);
96
+ // Extract configuration from first step's request body
97
+ const firstStep = result.steps[0];
98
+ const lastStep = result.steps[result.steps.length - 1];
99
+ const requestBody = firstStep?.request?.body;
100
+ // Extract timestamps from response (handle both Date and string formats)
101
+ const getTimestamp = (ts) => {
102
+ if (!ts)
103
+ return now;
104
+ return Math.floor((ts instanceof Date ? ts : new Date(ts)).getTime() / 1000);
105
+ };
106
+ const createdAt = getTimestamp(firstStep?.response?.timestamp);
107
+ const completedAt = getTimestamp(lastStep?.response?.timestamp);
108
+ // Extract service tier from provider metadata
109
+ const serviceTier = lastStep?.providerMetadata?.openai?.serviceTier ?? "default";
110
+ // Convert tools from agent configuration
111
+ const tools = [];
112
+ if (agent.tools) {
113
+ for (const [toolName, toolDef] of Object.entries(agent.tools)) {
114
+ const toolConfig = toolDef;
115
+ tools.push({
116
+ type: "function",
117
+ name: toolName,
118
+ description: toolConfig.description ?? null,
119
+ parameters: toolConfig.parameters ?? null,
120
+ strict: null,
121
+ });
122
+ }
123
+ }
124
+ // Build output items from steps
125
+ const output = [];
126
+ let callIdCounter = 0;
127
+ for (const step of result.steps) {
128
+ const stepData = step;
129
+ // Add function calls from this step
130
+ if (stepData.toolCalls && stepData.toolCalls.length > 0) {
131
+ for (const toolCall of stepData.toolCalls) {
132
+ const functionCall = {
133
+ type: "function_call",
134
+ id: generateItemId("fc"),
135
+ call_id: toolCall.toolCallId || `call_${callIdCounter++}`,
136
+ name: toolCall.toolName,
137
+ arguments: serializeArgs(toolCall.input),
138
+ status: "completed",
139
+ };
140
+ output.push(functionCall);
141
+ }
142
+ }
143
+ // Add function call outputs from this step
144
+ if (stepData.toolResults && stepData.toolResults.length > 0) {
145
+ for (const toolResult of stepData.toolResults) {
146
+ const functionCallOutput = {
147
+ type: "function_call_output",
148
+ id: generateItemId("fco"),
149
+ call_id: toolResult.toolCallId,
150
+ output: serializeArgs(toolResult.output),
151
+ status: "completed",
152
+ };
153
+ output.push(functionCallOutput);
154
+ }
155
+ }
156
+ }
157
+ // Add final message with text response
158
+ if (result.text) {
159
+ const message = {
160
+ type: "message",
161
+ id: generateItemId("msg"),
162
+ status: "completed",
163
+ role: "assistant",
164
+ content: [
165
+ {
166
+ type: "output_text",
167
+ text: result.text,
168
+ annotations: [],
169
+ logprobs: [],
170
+ },
171
+ ],
172
+ };
173
+ output.push(message);
174
+ }
175
+ // Convert usage
176
+ const usage = result.totalUsage
177
+ ? {
178
+ input_tokens: result.totalUsage.inputTokens ?? 0,
179
+ input_tokens_details: {
180
+ cached_tokens: result.totalUsage
181
+ .cachedInputTokens ?? 0,
182
+ },
183
+ output_tokens: result.totalUsage.outputTokens ?? 0,
184
+ output_tokens_details: {
185
+ reasoning_tokens: result.totalUsage
186
+ .reasoningTokens ?? 0,
187
+ },
188
+ total_tokens: result.totalUsage.totalTokens ?? 0,
189
+ }
190
+ : null;
191
+ const status = getResponseStatus(result.finishReason);
192
+ return {
193
+ id: result.response.id || generateItemId("resp"),
194
+ object: "response",
195
+ created_at: createdAt,
196
+ completed_at: status === "completed" ? completedAt : null,
197
+ status,
198
+ incomplete_details: status === "incomplete" ? { reason: result.finishReason } : null,
199
+ model: result.response.modelId,
200
+ previous_response_id: null,
201
+ instructions: null,
202
+ output,
203
+ error: status === "failed" ? { message: "Agent execution failed" } : null,
204
+ tools,
205
+ tool_choice: requestBody?.tool_choice ?? "auto",
206
+ truncation: "disabled",
207
+ parallel_tool_calls: true,
208
+ text: {
209
+ format: {
210
+ type: "text",
211
+ },
212
+ },
213
+ top_p: requestBody?.top_p ?? 1,
214
+ presence_penalty: requestBody?.presence_penalty ?? 0,
215
+ frequency_penalty: requestBody?.frequency_penalty ?? 0,
216
+ top_logprobs: 0,
217
+ temperature: requestBody?.temperature ?? 1,
218
+ reasoning: null,
219
+ user: null,
220
+ usage,
221
+ max_output_tokens: requestBody?.max_output_tokens ?? null,
222
+ max_tool_calls: null,
223
+ store: false,
224
+ background: false,
225
+ service_tier: serviceTier,
226
+ metadata: { framework: "vercel-ai-sdk" },
227
+ safety_identifier: null,
228
+ prompt_cache_key: null,
229
+ };
230
+ }
@@ -0,0 +1,5 @@
1
+ export { generateItemId } from "../common/index.js";
2
+ export { buildInputFromSteps, buildOpenResponsesFromSteps, convertToOpenResponses, } from "./convert.js";
3
+ export type { AgentJobOptions, StepData } from "./types.js";
4
+ export { wrapAISdkAgent } from "./wrap-agent.js";
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/ai-sdk/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EACL,mBAAmB,EACnB,2BAA2B,EAC3B,sBAAsB,GACvB,MAAM,cAAc,CAAC;AACtB,YAAY,EAAE,eAAe,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,4 @@
1
+ // Re-export common utilities for backwards compatibility
2
+ export { generateItemId } from "../common/index.js";
3
+ export { buildInputFromSteps, buildOpenResponsesFromSteps, convertToOpenResponses, } from "./convert.js";
4
+ export { wrapAISdkAgent } from "./wrap-agent.js";
@@ -0,0 +1,56 @@
1
+ import type { StepResult } from "ai";
2
+ import type { FunctionTool, ResponseResource } from "../openresponses/index.js";
3
+ export type { StepResult };
4
+ /**
5
+ * Type definition for step data extracted from AI SDK results.
6
+ * This interface extends the AI SDK's StepResult type with additional fields
7
+ * that may be present in certain AI SDK versions or providers.
8
+ */
9
+ export interface StepData {
10
+ content?: Array<{
11
+ type: string;
12
+ toolCallId?: string;
13
+ toolName?: string;
14
+ input?: unknown;
15
+ output?: unknown;
16
+ }>;
17
+ request?: {
18
+ body?: {
19
+ input?: unknown[];
20
+ tools?: FunctionTool[];
21
+ };
22
+ };
23
+ response?: {
24
+ body?: ResponseResource;
25
+ messages?: Array<{
26
+ role: string;
27
+ content: Array<{
28
+ type: string;
29
+ toolCallId?: string;
30
+ toolName?: string;
31
+ input?: unknown;
32
+ text?: string;
33
+ providerOptions?: {
34
+ openai?: {
35
+ itemId?: string;
36
+ };
37
+ };
38
+ }>;
39
+ }>;
40
+ };
41
+ providerMetadata?: {
42
+ openai?: {
43
+ itemId?: string;
44
+ };
45
+ };
46
+ }
47
+ /**
48
+ * Options for creating an evaluatorq Job from an AI SDK Agent.
49
+ */
50
+ export interface AgentJobOptions {
51
+ /** The name of the job (defaults to agent.id or "agent") */
52
+ name?: string;
53
+ /** The key in data.inputs to use as the prompt (defaults to "prompt") */
54
+ promptKey?: string;
55
+ }
56
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/ai-sdk/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAErC,OAAO,KAAK,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAGhF,YAAY,EAAE,UAAU,EAAE,CAAC;AAE3B;;;;GAIG;AACH,MAAM,WAAW,QAAQ;IAEvB,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,MAAM,CAAC,EAAE,OAAO,CAAC;KAClB,CAAC,CAAC;IAEH,OAAO,CAAC,EAAE;QACR,IAAI,CAAC,EAAE;YACL,KAAK,CAAC,EAAE,OAAO,EAAE,CAAC;YAClB,KAAK,CAAC,EAAE,YAAY,EAAE,CAAC;SACxB,CAAC;KACH,CAAC;IACF,QAAQ,CAAC,EAAE;QACT,IAAI,CAAC,EAAE,gBAAgB,CAAC;QACxB,QAAQ,CAAC,EAAE,KAAK,CAAC;YACf,IAAI,EAAE,MAAM,CAAC;YACb,OAAO,EAAE,KAAK,CAAC;gBACb,IAAI,EAAE,MAAM,CAAC;gBACb,UAAU,CAAC,EAAE,MAAM,CAAC;gBACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;gBAClB,KAAK,CAAC,EAAE,OAAO,CAAC;gBAChB,IAAI,CAAC,EAAE,MAAM,CAAC;gBACd,eAAe,CAAC,EAAE;oBAChB,MAAM,CAAC,EAAE;wBACP,MAAM,CAAC,EAAE,MAAM,CAAC;qBACjB,CAAC;iBACH,CAAC;aACH,CAAC,CAAC;SACJ,CAAC,CAAC;KACJ,CAAC;IACF,gBAAgB,CAAC,EAAE;QACjB,MAAM,CAAC,EAAE;YACP,MAAM,CAAC,EAAE,MAAM,CAAC;SACjB,CAAC;KACH,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,4DAA4D;IAC5D,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,yEAAyE;IACzE,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,58 @@
1
+ import type { Agent, ToolSet } from "ai";
2
+ import type { Job } from "../../types.js";
3
+ import type { AgentJobOptions } from "./types.js";
4
+ /**
5
+ * Creates an evaluatorq Job from any AI SDK Agent.
6
+ *
7
+ * Supports:
8
+ * - `Agent` (base interface)
9
+ * - `ToolLoopAgent`
10
+ * - `Experimental_Agent` (deprecated alias for ToolLoopAgent)
11
+ *
12
+ * The job will:
13
+ * - Execute the agent with the prompt from data.inputs
14
+ * - Convert the result to OpenResponses format (industry standard)
15
+ * - Return the OpenResponses resource for backend integration
16
+ *
17
+ * @example
18
+ * ```typescript
19
+ * import { wrapAISdkAgent } from "@orq-ai/evaluatorq/ai-sdk";
20
+ * import { ToolLoopAgent, tool } from "ai";
21
+ *
22
+ * const weatherAgent = new ToolLoopAgent({
23
+ * model: openai("gpt-4o"),
24
+ * tools: {
25
+ * weather: tool({
26
+ * description: "Get the weather in a location",
27
+ * inputSchema: z.object({ location: z.string() }),
28
+ * execute: async ({ location }) => ({ location, temperature: 72 }),
29
+ * }),
30
+ * },
31
+ * });
32
+ *
33
+ * await evaluatorq("weather-agent-eval", {
34
+ * data: [
35
+ * { inputs: { prompt: "What is the weather in SF?" } },
36
+ * ],
37
+ * jobs: [wrapAISdkAgent(weatherAgent)],
38
+ * evaluators: [
39
+ * {
40
+ * name: "response-quality",
41
+ * scorer: async ({ output }) => {
42
+ * const result = output as unknown as ResponseResource;
43
+ * // Access the final message text
44
+ * const lastMessage = result.output.find(
45
+ * (item) => item.type === "message"
46
+ * );
47
+ * return {
48
+ * value: lastMessage ? 1 : 0,
49
+ * explanation: "Agent produced a response",
50
+ * };
51
+ * },
52
+ * },
53
+ * ],
54
+ * });
55
+ * ```
56
+ */
57
+ export declare function wrapAISdkAgent<TOOLS extends ToolSet>(agent: Agent<never, TOOLS, never>, options?: AgentJobOptions): Job;
58
+ //# sourceMappingURL=wrap-agent.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"wrap-agent.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/ai-sdk/wrap-agent.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,IAAI,CAAC;AAEzC,OAAO,KAAK,EAAa,GAAG,EAAU,MAAM,gBAAgB,CAAC;AAG7D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAElD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoDG;AACH,wBAAgB,cAAc,CAAC,KAAK,SAAS,OAAO,EAClD,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,EACjC,OAAO,GAAE,eAAoB,GAC5B,GAAG,CAeL"}
@@ -0,0 +1,68 @@
1
+ import { extractPromptFromData } from "../common/index.js";
2
+ import { convertToOpenResponses } from "./convert.js";
3
+ /**
4
+ * Creates an evaluatorq Job from any AI SDK Agent.
5
+ *
6
+ * Supports:
7
+ * - `Agent` (base interface)
8
+ * - `ToolLoopAgent`
9
+ * - `Experimental_Agent` (deprecated alias for ToolLoopAgent)
10
+ *
11
+ * The job will:
12
+ * - Execute the agent with the prompt from data.inputs
13
+ * - Convert the result to OpenResponses format (industry standard)
14
+ * - Return the OpenResponses resource for backend integration
15
+ *
16
+ * @example
17
+ * ```typescript
18
+ * import { wrapAISdkAgent } from "@orq-ai/evaluatorq/ai-sdk";
19
+ * import { ToolLoopAgent, tool } from "ai";
20
+ *
21
+ * const weatherAgent = new ToolLoopAgent({
22
+ * model: openai("gpt-4o"),
23
+ * tools: {
24
+ * weather: tool({
25
+ * description: "Get the weather in a location",
26
+ * inputSchema: z.object({ location: z.string() }),
27
+ * execute: async ({ location }) => ({ location, temperature: 72 }),
28
+ * }),
29
+ * },
30
+ * });
31
+ *
32
+ * await evaluatorq("weather-agent-eval", {
33
+ * data: [
34
+ * { inputs: { prompt: "What is the weather in SF?" } },
35
+ * ],
36
+ * jobs: [wrapAISdkAgent(weatherAgent)],
37
+ * evaluators: [
38
+ * {
39
+ * name: "response-quality",
40
+ * scorer: async ({ output }) => {
41
+ * const result = output as unknown as ResponseResource;
42
+ * // Access the final message text
43
+ * const lastMessage = result.output.find(
44
+ * (item) => item.type === "message"
45
+ * );
46
+ * return {
47
+ * value: lastMessage ? 1 : 0,
48
+ * explanation: "Agent produced a response",
49
+ * };
50
+ * },
51
+ * },
52
+ * ],
53
+ * });
54
+ * ```
55
+ */
56
+ export function wrapAISdkAgent(agent, options = {}) {
57
+ const { name = agent.id ?? "agent", promptKey = "prompt" } = options;
58
+ return async (data, _row) => {
59
+ const prompt = extractPromptFromData(data, promptKey);
60
+ const result = await agent.generate({ prompt });
61
+ // Convert to OpenResponses format
62
+ const openResponsesOutput = convertToOpenResponses(result, agent, prompt);
63
+ return {
64
+ name,
65
+ output: openResponsesOutput,
66
+ };
67
+ };
68
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Common utilities shared across agent integrations.
3
+ */
4
+ export { extractPromptFromData, generateItemId, getResponseStatus, serializeArgs, } from "./utils.js";
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/common/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EACL,qBAAqB,EACrB,cAAc,EACd,iBAAiB,EACjB,aAAa,GACd,MAAM,YAAY,CAAC"}
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Common utilities shared across agent integrations.
3
+ */
4
+ export { extractPromptFromData, generateItemId, getResponseStatus, serializeArgs, } from "./utils.js";
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Shared utilities for agent integrations.
3
+ */
4
+ import type { DataPoint } from "../../types.js";
5
+ import type { ResponseResource } from "../openresponses/index.js";
6
+ /**
7
+ * Generates a unique ID for OpenResponses items.
8
+ *
9
+ * @param prefix - The prefix for the ID (e.g., "fc" for function_call, "msg" for message)
10
+ * @returns A unique string ID with the given prefix
11
+ */
12
+ export declare function generateItemId(prefix: string): string;
13
+ /**
14
+ * Serializes tool arguments to a JSON string.
15
+ *
16
+ * @param args - The arguments to serialize (can be any type)
17
+ * @returns A JSON string representation of the arguments
18
+ */
19
+ export declare function serializeArgs(args: unknown): string;
20
+ /**
21
+ * Maps a finish reason to an OpenResponses status.
22
+ * Handles common finish reasons from various providers.
23
+ *
24
+ * @param finishReason - The finish reason from the LLM response
25
+ * @returns The corresponding OpenResponses status
26
+ */
27
+ export declare function getResponseStatus(finishReason: string | undefined): ResponseResource["status"];
28
+ /**
29
+ * Extracts and validates a prompt string from a DataPoint.
30
+ *
31
+ * @param data - The data point containing inputs
32
+ * @param promptKey - The key to look up in data.inputs
33
+ * @returns The prompt string
34
+ * @throws Error if the prompt is not a string
35
+ */
36
+ export declare function extractPromptFromData(data: DataPoint, promptKey: string): string;
37
+ //# sourceMappingURL=utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/common/utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAElE;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAErD;AAED;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,OAAO,GAAG,MAAM,CAiBnD;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAC/B,YAAY,EAAE,MAAM,GAAG,SAAS,GAC/B,gBAAgB,CAAC,QAAQ,CAAC,CAa5B;AAED;;;;;;;GAOG;AACH,wBAAgB,qBAAqB,CACnC,IAAI,EAAE,SAAS,EACf,SAAS,EAAE,MAAM,GAChB,MAAM,CAQR"}