@orq-ai/evaluatorq 1.2.0-rc.2 → 1.2.0-rc.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/lib/integrations/ai-sdk/convert.d.ts +22 -0
- package/dist/lib/integrations/ai-sdk/convert.d.ts.map +1 -0
- package/dist/lib/integrations/ai-sdk/convert.js +230 -0
- package/dist/lib/integrations/ai-sdk/index.d.ts +5 -0
- package/dist/lib/integrations/ai-sdk/index.d.ts.map +1 -0
- package/dist/lib/integrations/ai-sdk/index.js +4 -0
- package/dist/lib/integrations/ai-sdk/types.d.ts +56 -0
- package/dist/lib/integrations/ai-sdk/types.d.ts.map +1 -0
- package/dist/lib/integrations/ai-sdk/types.js +1 -0
- package/dist/lib/integrations/ai-sdk/wrap-agent.d.ts +58 -0
- package/dist/lib/integrations/ai-sdk/wrap-agent.d.ts.map +1 -0
- package/dist/lib/integrations/ai-sdk/wrap-agent.js +68 -0
- package/dist/lib/integrations/common/index.d.ts +5 -0
- package/dist/lib/integrations/common/index.d.ts.map +1 -0
- package/dist/lib/integrations/common/index.js +4 -0
- package/dist/lib/integrations/common/utils.d.ts +37 -0
- package/dist/lib/integrations/common/utils.d.ts.map +1 -0
- package/dist/lib/integrations/common/utils.js +73 -0
- package/dist/lib/integrations/langchain/convert.d.ts +22 -0
- package/dist/lib/integrations/langchain/convert.d.ts.map +1 -0
- package/dist/lib/integrations/langchain/convert.js +353 -0
- package/dist/lib/integrations/langchain/index.d.ts +27 -0
- package/dist/lib/integrations/langchain/index.d.ts.map +1 -0
- package/dist/lib/integrations/langchain/index.js +25 -0
- package/dist/lib/integrations/langchain/types.d.ts +132 -0
- package/dist/lib/integrations/langchain/types.d.ts.map +1 -0
- package/dist/lib/integrations/langchain/types.js +4 -0
- package/dist/lib/integrations/langchain/wrap-agent.d.ts +70 -0
- package/dist/lib/integrations/langchain/wrap-agent.d.ts.map +1 -0
- package/dist/lib/integrations/langchain/wrap-agent.js +179 -0
- package/dist/lib/integrations/openresponses/index.d.ts +236 -0
- package/dist/lib/integrations/openresponses/index.d.ts.map +1 -0
- package/dist/lib/integrations/openresponses/index.js +31 -0
- package/dist/lib/types.d.ts +2 -1
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/tsconfig.lib.tsbuildinfo +1 -1
- package/package.json +37 -4
package/README.md
CHANGED
|
@@ -10,6 +10,7 @@ An evaluation framework library that provides a flexible way to run parallel eva
|
|
|
10
10
|
- **Orq Platform Integration**: Seamlessly fetch and evaluate datasets from Orq AI (optional)
|
|
11
11
|
- **OpenTelemetry Tracing**: Built-in observability with automatic span creation for jobs and evaluators
|
|
12
12
|
- **Pass/Fail Tracking**: Evaluators can return pass/fail status for CI/CD integration
|
|
13
|
+
- **Integrations**: LangChain, LangGraph, and Vercel AI SDK agent integration
|
|
13
14
|
|
|
14
15
|
## 📥 Installation
|
|
15
16
|
|
|
@@ -35,6 +36,18 @@ For OpenTelemetry tracing (optional):
|
|
|
35
36
|
npm install @opentelemetry/api @opentelemetry/sdk-node @opentelemetry/sdk-trace-base @opentelemetry/exporter-trace-otlp-http @opentelemetry/resources @opentelemetry/semantic-conventions
|
|
36
37
|
```
|
|
37
38
|
|
|
39
|
+
For LangChain/LangGraph integration:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
npm install langchain @langchain/core @langchain/langgraph
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
For Vercel AI SDK integration:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
npm install ai
|
|
49
|
+
```
|
|
50
|
+
|
|
38
51
|
## 🚀 Quick Start
|
|
39
52
|
|
|
40
53
|
### Basic Usage
|
|
@@ -221,6 +234,38 @@ const conversationJob = job("assistant", async (data) => {
|
|
|
221
234
|
|
|
222
235
|
The `invoke()` function returns the text content directly, while `deployment()` returns an object with both `content` and `raw` response for more control.
|
|
223
236
|
|
|
237
|
+
## 🔗 LangChain Integration
|
|
238
|
+
|
|
239
|
+
Evaluatorq provides integration with LangChain and LangGraph agents, converting their outputs to the OpenResponses format for standardized evaluation.
|
|
240
|
+
|
|
241
|
+
The LangChain integration allows you to:
|
|
242
|
+
- Wrap LangChain agents created with `createAgent()` for use in evaluatorq jobs
|
|
243
|
+
- Wrap LangGraph compiled graphs for stateful agent evaluation
|
|
244
|
+
- Automatically convert agent outputs to OpenResponses format
|
|
245
|
+
- Evaluate agent behavior using standard evaluatorq evaluators
|
|
246
|
+
|
|
247
|
+
### Examples
|
|
248
|
+
|
|
249
|
+
Complete examples are available in the examples folder:
|
|
250
|
+
|
|
251
|
+
- **LangChain Agent**: [`examples/src/lib/integrations/langchain-agent-eval.ts`](../../examples/src/lib/integrations/langchain-agent-eval.ts)
|
|
252
|
+
- **LangGraph Agent**: [`examples/src/lib/integrations/langgraph-agent-eval.ts`](../../examples/src/lib/integrations/langgraph-agent-eval.ts)
|
|
253
|
+
|
|
254
|
+
## 🤖 Vercel AI SDK Integration
|
|
255
|
+
|
|
256
|
+
Evaluatorq integrates with the Vercel AI SDK, allowing you to wrap AI SDK agents and evaluate them using the standard evaluatorq framework.
|
|
257
|
+
|
|
258
|
+
The Vercel AI SDK integration allows you to:
|
|
259
|
+
- Wrap Vercel AI SDK `ToolLoopAgent` instances for use in evaluatorq jobs
|
|
260
|
+
- Automatically convert agent outputs to OpenResponses format
|
|
261
|
+
- Evaluate agent behavior using standard evaluatorq evaluators
|
|
262
|
+
|
|
263
|
+
### Examples
|
|
264
|
+
|
|
265
|
+
Complete examples are available in the examples folder:
|
|
266
|
+
|
|
267
|
+
- **Vercel AI SDK Agent**: [`examples/src/lib/integrations/vercel_ai_sdk_integration_example.ts`](../../examples/src/lib/integrations/vercel_ai_sdk_integration_example.ts)
|
|
268
|
+
|
|
224
269
|
## 🔧 Configuration
|
|
225
270
|
|
|
226
271
|
### Environment Variables
|
|
@@ -412,6 +457,11 @@ type Scorer = (
|
|
|
412
457
|
EvaluationResult<string | number | boolean | EvaluationResultCell>
|
|
413
458
|
>;
|
|
414
459
|
|
|
460
|
+
// Integration wrappers
|
|
461
|
+
import { wrapLangChainAgent, wrapLangGraphAgent } from "@orq-ai/evaluatorq/langchain";
|
|
462
|
+
import { wrapAISdkAgent } from "@orq-ai/evaluatorq/ai-sdk";
|
|
463
|
+
import type { ResponseResource } from "@orq-ai/evaluatorq/openresponses";
|
|
464
|
+
|
|
415
465
|
// Deployment helper types
|
|
416
466
|
interface DeploymentOptions {
|
|
417
467
|
inputs?: Record<string, unknown>;
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
export { type DeploymentOptions, type DeploymentResponse, deployment, invoke, } from "./lib/deployment-helper.js";
|
|
2
2
|
export * from "./lib/evaluatorq.js";
|
|
3
|
+
export * from "./lib/integrations/openresponses/index.js";
|
|
3
4
|
export { job } from "./lib/job-helper.js";
|
|
4
5
|
export { sendResultsToOrqEffect } from "./lib/send-results.js";
|
|
5
6
|
export { displayResultsTableEffect } from "./lib/table-display.js";
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,iBAAiB,EACtB,KAAK,kBAAkB,EACvB,UAAU,EACV,MAAM,GACP,MAAM,4BAA4B,CAAC;AACpC,cAAc,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,iBAAiB,EACtB,KAAK,kBAAkB,EACvB,UAAU,EACV,MAAM,GACP,MAAM,4BAA4B,CAAC;AACpC,cAAc,qBAAqB,CAAC;AAEpC,cAAc,2CAA2C,CAAC;AAC1D,OAAO,EAAE,GAAG,EAAE,MAAM,qBAAqB,CAAC;AAC1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAC/D,OAAO,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AAEnE,OAAO,EACL,mBAAmB,EACnB,gBAAgB,EAChB,eAAe,EACf,KAAK,cAAc,GACpB,MAAM,wBAAwB,CAAC;AAChC,cAAc,gBAAgB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
export { deployment, invoke, } from "./lib/deployment-helper.js";
|
|
2
2
|
export * from "./lib/evaluatorq.js";
|
|
3
|
+
// OpenResponses types
|
|
4
|
+
export * from "./lib/integrations/openresponses/index.js";
|
|
3
5
|
export { job } from "./lib/job-helper.js";
|
|
4
6
|
export { sendResultsToOrqEffect } from "./lib/send-results.js";
|
|
5
7
|
export { displayResultsTableEffect } from "./lib/table-display.js";
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { Agent, ToolSet } from "ai";
|
|
2
|
+
import type { ResponseResource } from "../openresponses/index.js";
|
|
3
|
+
/**
|
|
4
|
+
* Builds the input array for OpenResponses format.
|
|
5
|
+
*
|
|
6
|
+
* Resolves item_reference entries to actual function_call items using data from steps.
|
|
7
|
+
* The input should contain:
|
|
8
|
+
* - User message
|
|
9
|
+
* - All function calls made during the conversation
|
|
10
|
+
* - All function call outputs returned
|
|
11
|
+
*/
|
|
12
|
+
export declare function buildInputFromSteps<TOOLS extends ToolSet>(result: Awaited<ReturnType<Agent<never, TOOLS, never>["generate"]>>, prompt: string | undefined): unknown[];
|
|
13
|
+
/**
|
|
14
|
+
* Converts Vercel AI SDK agent result to OpenResponses format.
|
|
15
|
+
*/
|
|
16
|
+
export declare function convertToOpenResponses<TOOLS extends ToolSet>(result: Awaited<ReturnType<Agent<never, TOOLS, never>["generate"]>>, agent: Agent<never, TOOLS, never>, prompt?: string): ResponseResource;
|
|
17
|
+
/**
|
|
18
|
+
* Fallback function to manually build OpenResponses format from steps.
|
|
19
|
+
* Used when the provider doesn't return native OpenResponses format.
|
|
20
|
+
*/
|
|
21
|
+
export declare function buildOpenResponsesFromSteps<TOOLS extends ToolSet>(result: Awaited<ReturnType<Agent<never, TOOLS, never>["generate"]>>, agent: Agent<never, TOOLS, never>, _prompt?: string): ResponseResource;
|
|
22
|
+
//# sourceMappingURL=convert.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"convert.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/ai-sdk/convert.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAc,OAAO,EAAE,MAAM,IAAI,CAAC;AAOrD,OAAO,KAAK,EAMV,gBAAgB,EAEjB,MAAM,2BAA2B,CAAC;AASnC;;;;;;;;GAQG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,SAAS,OAAO,EACvD,MAAM,EAAE,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EACnE,MAAM,EAAE,MAAM,GAAG,SAAS,GACzB,OAAO,EAAE,CA6EX;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,KAAK,SAAS,OAAO,EAC1D,MAAM,EAAE,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EACnE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,EACjC,MAAM,CAAC,EAAE,MAAM,GACd,gBAAgB,CAElB;AAwCD;;;GAGG;AACH,wBAAgB,2BAA2B,CAAC,KAAK,SAAS,OAAO,EAC/D,MAAM,EAAE,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EACnE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,EACjC,OAAO,CAAC,EAAE,MAAM,GACf,gBAAgB,CA4JlB"}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import { generateItemId, getResponseStatus, serializeArgs, } from "../common/index.js";
|
|
2
|
+
/**
|
|
3
|
+
* Builds the input array for OpenResponses format.
|
|
4
|
+
*
|
|
5
|
+
* Resolves item_reference entries to actual function_call items using data from steps.
|
|
6
|
+
* The input should contain:
|
|
7
|
+
* - User message
|
|
8
|
+
* - All function calls made during the conversation
|
|
9
|
+
* - All function call outputs returned
|
|
10
|
+
*/
|
|
11
|
+
export function buildInputFromSteps(result, prompt) {
|
|
12
|
+
const input = [];
|
|
13
|
+
// Add the initial user message
|
|
14
|
+
if (prompt) {
|
|
15
|
+
input.push({
|
|
16
|
+
type: "message",
|
|
17
|
+
id: generateItemId("msg"),
|
|
18
|
+
status: "completed",
|
|
19
|
+
role: "user",
|
|
20
|
+
content: [
|
|
21
|
+
{
|
|
22
|
+
type: "input_text",
|
|
23
|
+
text: prompt,
|
|
24
|
+
},
|
|
25
|
+
],
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
// Collect all function calls and outputs from steps
|
|
29
|
+
for (const step of result.steps) {
|
|
30
|
+
const stepData = step;
|
|
31
|
+
// Primary: Extract from content array (ToolLoopAgent format)
|
|
32
|
+
if (stepData.content && stepData.content.length > 0) {
|
|
33
|
+
for (const item of stepData.content) {
|
|
34
|
+
if (item.type === "tool-call" && item.toolCallId && item.toolName) {
|
|
35
|
+
input.push({
|
|
36
|
+
type: "function_call",
|
|
37
|
+
id: generateItemId("fc"),
|
|
38
|
+
call_id: item.toolCallId,
|
|
39
|
+
name: item.toolName,
|
|
40
|
+
arguments: serializeArgs(item.input),
|
|
41
|
+
status: "completed",
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
if (item.type === "tool-result" && item.toolCallId) {
|
|
45
|
+
input.push({
|
|
46
|
+
type: "function_call_output",
|
|
47
|
+
id: generateItemId("fco"),
|
|
48
|
+
call_id: item.toolCallId,
|
|
49
|
+
output: serializeArgs(item.output),
|
|
50
|
+
status: "completed",
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
// Fallback: Extract from toolCalls/toolResults arrays (alternative AI SDK format)
|
|
56
|
+
else {
|
|
57
|
+
if (stepData.toolCalls && stepData.toolCalls.length > 0) {
|
|
58
|
+
for (const toolCall of stepData.toolCalls) {
|
|
59
|
+
input.push({
|
|
60
|
+
type: "function_call",
|
|
61
|
+
id: generateItemId("fc"),
|
|
62
|
+
call_id: toolCall.toolCallId,
|
|
63
|
+
name: toolCall.toolName,
|
|
64
|
+
arguments: serializeArgs(toolCall.input),
|
|
65
|
+
status: "completed",
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
if (stepData.toolResults && stepData.toolResults.length > 0) {
|
|
70
|
+
for (const toolResult of stepData.toolResults) {
|
|
71
|
+
input.push({
|
|
72
|
+
type: "function_call_output",
|
|
73
|
+
id: generateItemId("fco"),
|
|
74
|
+
call_id: toolResult.toolCallId,
|
|
75
|
+
output: serializeArgs(toolResult.output),
|
|
76
|
+
status: "completed",
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return input;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Converts Vercel AI SDK agent result to OpenResponses format.
|
|
86
|
+
*/
|
|
87
|
+
export function convertToOpenResponses(result, agent, prompt) {
|
|
88
|
+
return buildOpenResponsesFromSteps(result, agent, prompt);
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Fallback function to manually build OpenResponses format from steps.
|
|
92
|
+
* Used when the provider doesn't return native OpenResponses format.
|
|
93
|
+
*/
|
|
94
|
+
export function buildOpenResponsesFromSteps(result, agent, _prompt) {
|
|
95
|
+
const now = Math.floor(Date.now() / 1000);
|
|
96
|
+
// Extract configuration from first step's request body
|
|
97
|
+
const firstStep = result.steps[0];
|
|
98
|
+
const lastStep = result.steps[result.steps.length - 1];
|
|
99
|
+
const requestBody = firstStep?.request?.body;
|
|
100
|
+
// Extract timestamps from response (handle both Date and string formats)
|
|
101
|
+
const getTimestamp = (ts) => {
|
|
102
|
+
if (!ts)
|
|
103
|
+
return now;
|
|
104
|
+
return Math.floor((ts instanceof Date ? ts : new Date(ts)).getTime() / 1000);
|
|
105
|
+
};
|
|
106
|
+
const createdAt = getTimestamp(firstStep?.response?.timestamp);
|
|
107
|
+
const completedAt = getTimestamp(lastStep?.response?.timestamp);
|
|
108
|
+
// Extract service tier from provider metadata
|
|
109
|
+
const serviceTier = lastStep?.providerMetadata?.openai?.serviceTier ?? "default";
|
|
110
|
+
// Convert tools from agent configuration
|
|
111
|
+
const tools = [];
|
|
112
|
+
if (agent.tools) {
|
|
113
|
+
for (const [toolName, toolDef] of Object.entries(agent.tools)) {
|
|
114
|
+
const toolConfig = toolDef;
|
|
115
|
+
tools.push({
|
|
116
|
+
type: "function",
|
|
117
|
+
name: toolName,
|
|
118
|
+
description: toolConfig.description ?? null,
|
|
119
|
+
parameters: toolConfig.parameters ?? null,
|
|
120
|
+
strict: null,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
// Build output items from steps
|
|
125
|
+
const output = [];
|
|
126
|
+
let callIdCounter = 0;
|
|
127
|
+
for (const step of result.steps) {
|
|
128
|
+
const stepData = step;
|
|
129
|
+
// Add function calls from this step
|
|
130
|
+
if (stepData.toolCalls && stepData.toolCalls.length > 0) {
|
|
131
|
+
for (const toolCall of stepData.toolCalls) {
|
|
132
|
+
const functionCall = {
|
|
133
|
+
type: "function_call",
|
|
134
|
+
id: generateItemId("fc"),
|
|
135
|
+
call_id: toolCall.toolCallId || `call_${callIdCounter++}`,
|
|
136
|
+
name: toolCall.toolName,
|
|
137
|
+
arguments: serializeArgs(toolCall.input),
|
|
138
|
+
status: "completed",
|
|
139
|
+
};
|
|
140
|
+
output.push(functionCall);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
// Add function call outputs from this step
|
|
144
|
+
if (stepData.toolResults && stepData.toolResults.length > 0) {
|
|
145
|
+
for (const toolResult of stepData.toolResults) {
|
|
146
|
+
const functionCallOutput = {
|
|
147
|
+
type: "function_call_output",
|
|
148
|
+
id: generateItemId("fco"),
|
|
149
|
+
call_id: toolResult.toolCallId,
|
|
150
|
+
output: serializeArgs(toolResult.output),
|
|
151
|
+
status: "completed",
|
|
152
|
+
};
|
|
153
|
+
output.push(functionCallOutput);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
// Add final message with text response
|
|
158
|
+
if (result.text) {
|
|
159
|
+
const message = {
|
|
160
|
+
type: "message",
|
|
161
|
+
id: generateItemId("msg"),
|
|
162
|
+
status: "completed",
|
|
163
|
+
role: "assistant",
|
|
164
|
+
content: [
|
|
165
|
+
{
|
|
166
|
+
type: "output_text",
|
|
167
|
+
text: result.text,
|
|
168
|
+
annotations: [],
|
|
169
|
+
logprobs: [],
|
|
170
|
+
},
|
|
171
|
+
],
|
|
172
|
+
};
|
|
173
|
+
output.push(message);
|
|
174
|
+
}
|
|
175
|
+
// Convert usage
|
|
176
|
+
const usage = result.totalUsage
|
|
177
|
+
? {
|
|
178
|
+
input_tokens: result.totalUsage.inputTokens ?? 0,
|
|
179
|
+
input_tokens_details: {
|
|
180
|
+
cached_tokens: result.totalUsage
|
|
181
|
+
.cachedInputTokens ?? 0,
|
|
182
|
+
},
|
|
183
|
+
output_tokens: result.totalUsage.outputTokens ?? 0,
|
|
184
|
+
output_tokens_details: {
|
|
185
|
+
reasoning_tokens: result.totalUsage
|
|
186
|
+
.reasoningTokens ?? 0,
|
|
187
|
+
},
|
|
188
|
+
total_tokens: result.totalUsage.totalTokens ?? 0,
|
|
189
|
+
}
|
|
190
|
+
: null;
|
|
191
|
+
const status = getResponseStatus(result.finishReason);
|
|
192
|
+
return {
|
|
193
|
+
id: result.response.id || generateItemId("resp"),
|
|
194
|
+
object: "response",
|
|
195
|
+
created_at: createdAt,
|
|
196
|
+
completed_at: status === "completed" ? completedAt : null,
|
|
197
|
+
status,
|
|
198
|
+
incomplete_details: status === "incomplete" ? { reason: result.finishReason } : null,
|
|
199
|
+
model: result.response.modelId,
|
|
200
|
+
previous_response_id: null,
|
|
201
|
+
instructions: null,
|
|
202
|
+
output,
|
|
203
|
+
error: status === "failed" ? { message: "Agent execution failed" } : null,
|
|
204
|
+
tools,
|
|
205
|
+
tool_choice: requestBody?.tool_choice ?? "auto",
|
|
206
|
+
truncation: "disabled",
|
|
207
|
+
parallel_tool_calls: true,
|
|
208
|
+
text: {
|
|
209
|
+
format: {
|
|
210
|
+
type: "text",
|
|
211
|
+
},
|
|
212
|
+
},
|
|
213
|
+
top_p: requestBody?.top_p ?? 1,
|
|
214
|
+
presence_penalty: requestBody?.presence_penalty ?? 0,
|
|
215
|
+
frequency_penalty: requestBody?.frequency_penalty ?? 0,
|
|
216
|
+
top_logprobs: 0,
|
|
217
|
+
temperature: requestBody?.temperature ?? 1,
|
|
218
|
+
reasoning: null,
|
|
219
|
+
user: null,
|
|
220
|
+
usage,
|
|
221
|
+
max_output_tokens: requestBody?.max_output_tokens ?? null,
|
|
222
|
+
max_tool_calls: null,
|
|
223
|
+
store: false,
|
|
224
|
+
background: false,
|
|
225
|
+
service_tier: serviceTier,
|
|
226
|
+
metadata: { framework: "vercel-ai-sdk" },
|
|
227
|
+
safety_identifier: null,
|
|
228
|
+
prompt_cache_key: null,
|
|
229
|
+
};
|
|
230
|
+
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { generateItemId } from "../common/index.js";
|
|
2
|
+
export { buildInputFromSteps, buildOpenResponsesFromSteps, convertToOpenResponses, } from "./convert.js";
|
|
3
|
+
export type { AgentJobOptions, StepData } from "./types.js";
|
|
4
|
+
export { wrapAISdkAgent } from "./wrap-agent.js";
|
|
5
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/ai-sdk/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EACL,mBAAmB,EACnB,2BAA2B,EAC3B,sBAAsB,GACvB,MAAM,cAAc,CAAC;AACtB,YAAY,EAAE,eAAe,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAC5D,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type { StepResult } from "ai";
|
|
2
|
+
import type { FunctionTool, ResponseResource } from "../openresponses/index.js";
|
|
3
|
+
export type { StepResult };
|
|
4
|
+
/**
|
|
5
|
+
* Type definition for step data extracted from AI SDK results.
|
|
6
|
+
* This interface extends the AI SDK's StepResult type with additional fields
|
|
7
|
+
* that may be present in certain AI SDK versions or providers.
|
|
8
|
+
*/
|
|
9
|
+
export interface StepData {
|
|
10
|
+
content?: Array<{
|
|
11
|
+
type: string;
|
|
12
|
+
toolCallId?: string;
|
|
13
|
+
toolName?: string;
|
|
14
|
+
input?: unknown;
|
|
15
|
+
output?: unknown;
|
|
16
|
+
}>;
|
|
17
|
+
request?: {
|
|
18
|
+
body?: {
|
|
19
|
+
input?: unknown[];
|
|
20
|
+
tools?: FunctionTool[];
|
|
21
|
+
};
|
|
22
|
+
};
|
|
23
|
+
response?: {
|
|
24
|
+
body?: ResponseResource;
|
|
25
|
+
messages?: Array<{
|
|
26
|
+
role: string;
|
|
27
|
+
content: Array<{
|
|
28
|
+
type: string;
|
|
29
|
+
toolCallId?: string;
|
|
30
|
+
toolName?: string;
|
|
31
|
+
input?: unknown;
|
|
32
|
+
text?: string;
|
|
33
|
+
providerOptions?: {
|
|
34
|
+
openai?: {
|
|
35
|
+
itemId?: string;
|
|
36
|
+
};
|
|
37
|
+
};
|
|
38
|
+
}>;
|
|
39
|
+
}>;
|
|
40
|
+
};
|
|
41
|
+
providerMetadata?: {
|
|
42
|
+
openai?: {
|
|
43
|
+
itemId?: string;
|
|
44
|
+
};
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Options for creating an evaluatorq Job from an AI SDK Agent.
|
|
49
|
+
*/
|
|
50
|
+
export interface AgentJobOptions {
|
|
51
|
+
/** The name of the job (defaults to agent.id or "agent") */
|
|
52
|
+
name?: string;
|
|
53
|
+
/** The key in data.inputs to use as the prompt (defaults to "prompt") */
|
|
54
|
+
promptKey?: string;
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/ai-sdk/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAErC,OAAO,KAAK,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAGhF,YAAY,EAAE,UAAU,EAAE,CAAC;AAE3B;;;;GAIG;AACH,MAAM,WAAW,QAAQ;IAEvB,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,KAAK,CAAC,EAAE,OAAO,CAAC;QAChB,MAAM,CAAC,EAAE,OAAO,CAAC;KAClB,CAAC,CAAC;IAEH,OAAO,CAAC,EAAE;QACR,IAAI,CAAC,EAAE;YACL,KAAK,CAAC,EAAE,OAAO,EAAE,CAAC;YAClB,KAAK,CAAC,EAAE,YAAY,EAAE,CAAC;SACxB,CAAC;KACH,CAAC;IACF,QAAQ,CAAC,EAAE;QACT,IAAI,CAAC,EAAE,gBAAgB,CAAC;QACxB,QAAQ,CAAC,EAAE,KAAK,CAAC;YACf,IAAI,EAAE,MAAM,CAAC;YACb,OAAO,EAAE,KAAK,CAAC;gBACb,IAAI,EAAE,MAAM,CAAC;gBACb,UAAU,CAAC,EAAE,MAAM,CAAC;gBACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;gBAClB,KAAK,CAAC,EAAE,OAAO,CAAC;gBAChB,IAAI,CAAC,EAAE,MAAM,CAAC;gBACd,eAAe,CAAC,EAAE;oBAChB,MAAM,CAAC,EAAE;wBACP,MAAM,CAAC,EAAE,MAAM,CAAC;qBACjB,CAAC;iBACH,CAAC;aACH,CAAC,CAAC;SACJ,CAAC,CAAC;KACJ,CAAC;IACF,gBAAgB,CAAC,EAAE;QACjB,MAAM,CAAC,EAAE;YACP,MAAM,CAAC,EAAE,MAAM,CAAC;SACjB,CAAC;KACH,CAAC;CACH;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,4DAA4D;IAC5D,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,yEAAyE;IACzE,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import type { Agent, ToolSet } from "ai";
|
|
2
|
+
import type { Job } from "../../types.js";
|
|
3
|
+
import type { AgentJobOptions } from "./types.js";
|
|
4
|
+
/**
|
|
5
|
+
* Creates an evaluatorq Job from any AI SDK Agent.
|
|
6
|
+
*
|
|
7
|
+
* Supports:
|
|
8
|
+
* - `Agent` (base interface)
|
|
9
|
+
* - `ToolLoopAgent`
|
|
10
|
+
* - `Experimental_Agent` (deprecated alias for ToolLoopAgent)
|
|
11
|
+
*
|
|
12
|
+
* The job will:
|
|
13
|
+
* - Execute the agent with the prompt from data.inputs
|
|
14
|
+
* - Convert the result to OpenResponses format (industry standard)
|
|
15
|
+
* - Return the OpenResponses resource for backend integration
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```typescript
|
|
19
|
+
* import { wrapAISdkAgent } from "@orq-ai/evaluatorq/ai-sdk";
|
|
20
|
+
* import { ToolLoopAgent, tool } from "ai";
|
|
21
|
+
*
|
|
22
|
+
* const weatherAgent = new ToolLoopAgent({
|
|
23
|
+
* model: openai("gpt-4o"),
|
|
24
|
+
* tools: {
|
|
25
|
+
* weather: tool({
|
|
26
|
+
* description: "Get the weather in a location",
|
|
27
|
+
* inputSchema: z.object({ location: z.string() }),
|
|
28
|
+
* execute: async ({ location }) => ({ location, temperature: 72 }),
|
|
29
|
+
* }),
|
|
30
|
+
* },
|
|
31
|
+
* });
|
|
32
|
+
*
|
|
33
|
+
* await evaluatorq("weather-agent-eval", {
|
|
34
|
+
* data: [
|
|
35
|
+
* { inputs: { prompt: "What is the weather in SF?" } },
|
|
36
|
+
* ],
|
|
37
|
+
* jobs: [wrapAISdkAgent(weatherAgent)],
|
|
38
|
+
* evaluators: [
|
|
39
|
+
* {
|
|
40
|
+
* name: "response-quality",
|
|
41
|
+
* scorer: async ({ output }) => {
|
|
42
|
+
* const result = output as unknown as ResponseResource;
|
|
43
|
+
* // Access the final message text
|
|
44
|
+
* const lastMessage = result.output.find(
|
|
45
|
+
* (item) => item.type === "message"
|
|
46
|
+
* );
|
|
47
|
+
* return {
|
|
48
|
+
* value: lastMessage ? 1 : 0,
|
|
49
|
+
* explanation: "Agent produced a response",
|
|
50
|
+
* };
|
|
51
|
+
* },
|
|
52
|
+
* },
|
|
53
|
+
* ],
|
|
54
|
+
* });
|
|
55
|
+
* ```
|
|
56
|
+
*/
|
|
57
|
+
export declare function wrapAISdkAgent<TOOLS extends ToolSet>(agent: Agent<never, TOOLS, never>, options?: AgentJobOptions): Job;
|
|
58
|
+
//# sourceMappingURL=wrap-agent.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"wrap-agent.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/ai-sdk/wrap-agent.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,IAAI,CAAC;AAEzC,OAAO,KAAK,EAAa,GAAG,EAAU,MAAM,gBAAgB,CAAC;AAG7D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAElD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoDG;AACH,wBAAgB,cAAc,CAAC,KAAK,SAAS,OAAO,EAClD,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,EACjC,OAAO,GAAE,eAAoB,GAC5B,GAAG,CAeL"}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { extractPromptFromData } from "../common/index.js";
|
|
2
|
+
import { convertToOpenResponses } from "./convert.js";
|
|
3
|
+
/**
|
|
4
|
+
* Creates an evaluatorq Job from any AI SDK Agent.
|
|
5
|
+
*
|
|
6
|
+
* Supports:
|
|
7
|
+
* - `Agent` (base interface)
|
|
8
|
+
* - `ToolLoopAgent`
|
|
9
|
+
* - `Experimental_Agent` (deprecated alias for ToolLoopAgent)
|
|
10
|
+
*
|
|
11
|
+
* The job will:
|
|
12
|
+
* - Execute the agent with the prompt from data.inputs
|
|
13
|
+
* - Convert the result to OpenResponses format (industry standard)
|
|
14
|
+
* - Return the OpenResponses resource for backend integration
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```typescript
|
|
18
|
+
* import { wrapAISdkAgent } from "@orq-ai/evaluatorq/ai-sdk";
|
|
19
|
+
* import { ToolLoopAgent, tool } from "ai";
|
|
20
|
+
*
|
|
21
|
+
* const weatherAgent = new ToolLoopAgent({
|
|
22
|
+
* model: openai("gpt-4o"),
|
|
23
|
+
* tools: {
|
|
24
|
+
* weather: tool({
|
|
25
|
+
* description: "Get the weather in a location",
|
|
26
|
+
* inputSchema: z.object({ location: z.string() }),
|
|
27
|
+
* execute: async ({ location }) => ({ location, temperature: 72 }),
|
|
28
|
+
* }),
|
|
29
|
+
* },
|
|
30
|
+
* });
|
|
31
|
+
*
|
|
32
|
+
* await evaluatorq("weather-agent-eval", {
|
|
33
|
+
* data: [
|
|
34
|
+
* { inputs: { prompt: "What is the weather in SF?" } },
|
|
35
|
+
* ],
|
|
36
|
+
* jobs: [wrapAISdkAgent(weatherAgent)],
|
|
37
|
+
* evaluators: [
|
|
38
|
+
* {
|
|
39
|
+
* name: "response-quality",
|
|
40
|
+
* scorer: async ({ output }) => {
|
|
41
|
+
* const result = output as unknown as ResponseResource;
|
|
42
|
+
* // Access the final message text
|
|
43
|
+
* const lastMessage = result.output.find(
|
|
44
|
+
* (item) => item.type === "message"
|
|
45
|
+
* );
|
|
46
|
+
* return {
|
|
47
|
+
* value: lastMessage ? 1 : 0,
|
|
48
|
+
* explanation: "Agent produced a response",
|
|
49
|
+
* };
|
|
50
|
+
* },
|
|
51
|
+
* },
|
|
52
|
+
* ],
|
|
53
|
+
* });
|
|
54
|
+
* ```
|
|
55
|
+
*/
|
|
56
|
+
export function wrapAISdkAgent(agent, options = {}) {
|
|
57
|
+
const { name = agent.id ?? "agent", promptKey = "prompt" } = options;
|
|
58
|
+
return async (data, _row) => {
|
|
59
|
+
const prompt = extractPromptFromData(data, promptKey);
|
|
60
|
+
const result = await agent.generate({ prompt });
|
|
61
|
+
// Convert to OpenResponses format
|
|
62
|
+
const openResponsesOutput = convertToOpenResponses(result, agent, prompt);
|
|
63
|
+
return {
|
|
64
|
+
name,
|
|
65
|
+
output: openResponsesOutput,
|
|
66
|
+
};
|
|
67
|
+
};
|
|
68
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/common/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EACL,qBAAqB,EACrB,cAAc,EACd,iBAAiB,EACjB,aAAa,GACd,MAAM,YAAY,CAAC"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared utilities for agent integrations.
|
|
3
|
+
*/
|
|
4
|
+
import type { DataPoint } from "../../types.js";
|
|
5
|
+
import type { ResponseResource } from "../openresponses/index.js";
|
|
6
|
+
/**
|
|
7
|
+
* Generates a unique ID for OpenResponses items.
|
|
8
|
+
*
|
|
9
|
+
* @param prefix - The prefix for the ID (e.g., "fc" for function_call, "msg" for message)
|
|
10
|
+
* @returns A unique string ID with the given prefix
|
|
11
|
+
*/
|
|
12
|
+
export declare function generateItemId(prefix: string): string;
|
|
13
|
+
/**
|
|
14
|
+
* Serializes tool arguments to a JSON string.
|
|
15
|
+
*
|
|
16
|
+
* @param args - The arguments to serialize (can be any type)
|
|
17
|
+
* @returns A JSON string representation of the arguments
|
|
18
|
+
*/
|
|
19
|
+
export declare function serializeArgs(args: unknown): string;
|
|
20
|
+
/**
|
|
21
|
+
* Maps a finish reason to an OpenResponses status.
|
|
22
|
+
* Handles common finish reasons from various providers.
|
|
23
|
+
*
|
|
24
|
+
* @param finishReason - The finish reason from the LLM response
|
|
25
|
+
* @returns The corresponding OpenResponses status
|
|
26
|
+
*/
|
|
27
|
+
export declare function getResponseStatus(finishReason: string | undefined): ResponseResource["status"];
|
|
28
|
+
/**
|
|
29
|
+
* Extracts and validates a prompt string from a DataPoint.
|
|
30
|
+
*
|
|
31
|
+
* @param data - The data point containing inputs
|
|
32
|
+
* @param promptKey - The key to look up in data.inputs
|
|
33
|
+
* @returns The prompt string
|
|
34
|
+
* @throws Error if the prompt is not a string
|
|
35
|
+
*/
|
|
36
|
+
export declare function extractPromptFromData(data: DataPoint, promptKey: string): string;
|
|
37
|
+
//# sourceMappingURL=utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/common/utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAElE;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAErD;AAED;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,OAAO,GAAG,MAAM,CAiBnD;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAC/B,YAAY,EAAE,MAAM,GAAG,SAAS,GAC/B,gBAAgB,CAAC,QAAQ,CAAC,CAa5B;AAED;;;;;;;GAOG;AACH,wBAAgB,qBAAqB,CACnC,IAAI,EAAE,SAAS,EACf,SAAS,EAAE,MAAM,GAChB,MAAM,CAQR"}
|