agentevals 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.extractLangGraphTrajectoryFromThread = exports._getLangGraphStateHistoryRecursive = exports.extractLangGraphTrajectoryFromSnapshots = void 0;
4
4
  const messages_1 = require("@langchain/core/messages");
5
- const openai_1 = require("@langchain/openai");
5
+ const utils_js_1 = require("../utils.cjs");
6
6
  const extractLangGraphTrajectoryFromSnapshots = (snapshots) => {
7
7
  const inputs = [];
8
8
  const trajectory = {
@@ -29,7 +29,7 @@ const extractLangGraphTrajectoryFromSnapshots = (snapshots) => {
29
29
  if ((0, messages_1.isBaseMessage)(lastMessage)) {
30
30
  // Just append the last message in the output to the results to reduce context size
31
31
  trajectory.results.push({
32
- messages: (0, openai_1._convertMessagesToOpenAIParams)([lastMessage]),
32
+ messages: [(0, utils_js_1._convertToOpenAIMessage)(lastMessage)],
33
33
  });
34
34
  }
35
35
  else {
@@ -49,9 +49,9 @@ const extractLangGraphTrajectoryFromSnapshots = (snapshots) => {
49
49
  }
50
50
  for (const task of snapshot.tasks) {
51
51
  if (task.interrupts?.length) {
52
- trajectory.steps.at(-1)?.push("__interrupt__");
52
+ trajectory.steps[trajectory.steps.length - 1]?.push("__interrupt__");
53
53
  }
54
- trajectory.steps.at(-1)?.push(`${subgraphPath}${task.name}`);
54
+ trajectory.steps[trajectory.steps.length - 1]?.push(`${subgraphPath}${task.name}`);
55
55
  }
56
56
  }
57
57
  if (isAccumulatingSteps) {
@@ -1,5 +1,5 @@
1
1
  import { isBaseMessage } from "@langchain/core/messages";
2
- import { _convertMessagesToOpenAIParams } from "@langchain/openai";
2
+ import { _convertToOpenAIMessage } from "../utils.js";
3
3
  export const extractLangGraphTrajectoryFromSnapshots = (snapshots) => {
4
4
  const inputs = [];
5
5
  const trajectory = {
@@ -26,7 +26,7 @@ export const extractLangGraphTrajectoryFromSnapshots = (snapshots) => {
26
26
  if (isBaseMessage(lastMessage)) {
27
27
  // Just append the last message in the output to the results to reduce context size
28
28
  trajectory.results.push({
29
- messages: _convertMessagesToOpenAIParams([lastMessage]),
29
+ messages: [_convertToOpenAIMessage(lastMessage)],
30
30
  });
31
31
  }
32
32
  else {
@@ -46,9 +46,9 @@ export const extractLangGraphTrajectoryFromSnapshots = (snapshots) => {
46
46
  }
47
47
  for (const task of snapshot.tasks) {
48
48
  if (task.interrupts?.length) {
49
- trajectory.steps.at(-1)?.push("__interrupt__");
49
+ trajectory.steps[trajectory.steps.length - 1]?.push("__interrupt__");
50
50
  }
51
- trajectory.steps.at(-1)?.push(`${subgraphPath}${task.name}`);
51
+ trajectory.steps[trajectory.steps.length - 1]?.push(`${subgraphPath}${task.name}`);
52
52
  }
53
53
  }
54
54
  if (isAccumulatingSteps) {
@@ -1,5 +1,14 @@
1
- import { BaseMessage } from "@langchain/core/messages";
1
+ import type { BaseMessage } from "@langchain/core/messages";
2
2
  import { ChatCompletionMessage, FlexibleChatCompletionMessage, EvaluatorResult, TrajectoryLLMAsJudgeParams } from "../types.js";
3
+ type TrajectoryEvaluatorFunction = (params: {
4
+ outputs: ChatCompletionMessage[] | FlexibleChatCompletionMessage[] | BaseMessage[] | {
5
+ messages: (BaseMessage | ChatCompletionMessage | FlexibleChatCompletionMessage)[];
6
+ };
7
+ referenceOutputs?: ChatCompletionMessage[] | FlexibleChatCompletionMessage[] | BaseMessage[] | {
8
+ messages: (BaseMessage | ChatCompletionMessage | FlexibleChatCompletionMessage)[];
9
+ };
10
+ [key: string]: unknown;
11
+ }) => Promise<EvaluatorResult>;
3
12
  export declare const TRAJECTORY_ACCURACY_PROMPT_WITH_REFERENCE = "You are an expert data labeler.\nYour task is to grade the accuracy of an AI agent's internal trajectory.\n\n<Rubric>\n An accurate trajectory:\n - Makes logical sense between steps\n - Shows clear progression\n - Is relatively efficient, though it does not need to be perfectly efficient\n - Is semantically equivalent to the provided reference trajectory\n</Rubric>\n\nBased on the following reference trajectory:\n\n<reference_trajectory>\n{reference_outputs}\n</reference_trajectory>\n\nGrade this actual trajectory:\n\n<trajectory>\n{outputs}\n</trajectory>\n";
4
13
  export declare const TRAJECTORY_ACCURACY_PROMPT = "You are an expert data labeler.\nYour task is to grade the accuracy of an AI agent's internal trajectory.\n\n<Rubric>\n An accurate trajectory:\n - Makes logical sense between steps\n - Shows clear progression\n - Is relatively efficient, though it does not need to be perfectly efficient\n</Rubric>\n\nFirst, try to understand the goal of the trajectory by looking at the input\n(if the input is not present try to infer it from the content of the first message),\nas well as the output of the final message. Once you understand the goal, grade the trajectory\nas it relates to achieving that goal.\n\nGrade the following trajectory:\n\n<trajectory>\n{outputs}\n</trajectory>";
5
14
  /**
@@ -23,12 +32,5 @@ export declare const TRAJECTORY_ACCURACY_PROMPT = "You are an expert data labele
23
32
  * @param options.fewShotExamples - Optional list of example evaluations to append to the prompt.
24
33
  * @returns A function that evaluates agent trajectories using the configured LLM judge.
25
34
  */
26
- export declare const createTrajectoryLLMAsJudge: ({ prompt, feedbackKey, model, system, judge, continuous, choices, useReasoning, fewShotExamples, }: TrajectoryLLMAsJudgeParams) => ({ inputs, outputs, referenceOutputs, ...extra }: {
27
- [key: string]: unknown;
28
- outputs: ChatCompletionMessage[] | FlexibleChatCompletionMessage[] | BaseMessage[] | {
29
- messages: (BaseMessage | ChatCompletionMessage | FlexibleChatCompletionMessage)[];
30
- };
31
- referenceOutputs?: ChatCompletionMessage[] | BaseMessage[] | FlexibleChatCompletionMessage[] | {
32
- messages: (BaseMessage | ChatCompletionMessage | FlexibleChatCompletionMessage)[];
33
- } | undefined;
34
- }) => Promise<EvaluatorResult>;
35
+ export declare const createTrajectoryLLMAsJudge: ({ prompt, feedbackKey, model, system, judge, continuous, choices, useReasoning, fewShotExamples, }: TrajectoryLLMAsJudgeParams) => TrajectoryEvaluatorFunction;
36
+ export {};
package/dist/utils.cjs CHANGED
@@ -1,13 +1,52 @@
1
1
  "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
2
25
  Object.defineProperty(exports, "__esModule", { value: true });
3
26
  exports._runEvaluator = exports.processScore = exports._normalizeToOpenAIMessagesList = exports._convertToChatCompletionMessage = exports._convertToOpenAIMessage = void 0;
4
27
  const messages_1 = require("@langchain/core/messages");
5
- const openai_1 = require("@langchain/openai");
28
+ const openAIImports = __importStar(require("@langchain/openai"));
6
29
  const utils_1 = require("openevals/utils");
30
+ const {
31
+ // @ts-expect-error Shim for older versions of @langchain/openai
32
+ _convertMessagesToOpenAIParams, convertMessagesToCompletionsMessageParams, } = openAIImports;
33
+ function _convertMessagesShim(message) {
34
+ if (typeof _convertMessagesToOpenAIParams === "function") {
35
+ return _convertMessagesToOpenAIParams([
36
+ message,
37
+ ])[0];
38
+ }
39
+ return convertMessagesToCompletionsMessageParams({
40
+ messages: [message],
41
+ })[0];
42
+ }
7
43
  const _convertToOpenAIMessage = (message) => {
8
44
  if ((0, messages_1.isBaseMessage)(message)) {
9
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
10
- return (0, openai_1._convertMessagesToOpenAIParams)([message])[0];
45
+ const converted = _convertMessagesShim(message);
46
+ if (message.id && !converted.id) {
47
+ converted.id = message.id;
48
+ }
49
+ return converted;
11
50
  }
12
51
  else {
13
52
  return message;
@@ -17,8 +56,7 @@ exports._convertToOpenAIMessage = _convertToOpenAIMessage;
17
56
  const _convertToChatCompletionMessage = (message) => {
18
57
  let converted;
19
58
  if ((0, messages_1.isBaseMessage)(message)) {
20
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
21
- converted = (0, openai_1._convertMessagesToOpenAIParams)([message])[0];
59
+ converted = _convertMessagesShim(message);
22
60
  }
23
61
  else {
24
62
  converted = message;
package/dist/utils.d.ts CHANGED
@@ -1,13 +1,15 @@
1
- import { BaseMessage } from "@langchain/core/messages";
1
+ import type { BaseMessage } from "@langchain/core/messages";
2
2
  import { EvaluationResultType } from "openevals/utils";
3
3
  import { ChatCompletionMessage, FlexibleChatCompletionMessage, MultiResultScorerReturnType, SingleResultScorerReturnType } from "./types.js";
4
+ type NormalizeToOpenAIMessagesListFunction = (messages?: (BaseMessage | ChatCompletionMessage | FlexibleChatCompletionMessage)[] | {
5
+ messages: (BaseMessage | ChatCompletionMessage | FlexibleChatCompletionMessage)[];
6
+ }) => ChatCompletionMessage[];
4
7
  export declare const _convertToOpenAIMessage: (message: BaseMessage | ChatCompletionMessage) => ChatCompletionMessage;
5
8
  export declare const _convertToChatCompletionMessage: (message: BaseMessage | ChatCompletionMessage | FlexibleChatCompletionMessage) => ChatCompletionMessage;
6
- export declare const _normalizeToOpenAIMessagesList: (messages?: (FlexibleChatCompletionMessage | ChatCompletionMessage | BaseMessage)[] | {
7
- messages: (BaseMessage | ChatCompletionMessage | FlexibleChatCompletionMessage)[];
8
- } | undefined) => ChatCompletionMessage[];
9
+ export declare const _normalizeToOpenAIMessagesList: NormalizeToOpenAIMessagesListFunction;
9
10
  export declare const processScore: (_: string, value: boolean | number | {
10
11
  score: boolean | number;
11
12
  reasoning?: string;
12
13
  }) => readonly [number | boolean, string | undefined] | readonly [number | boolean];
13
14
  export declare const _runEvaluator: <T extends Record<string, unknown>, O extends SingleResultScorerReturnType | MultiResultScorerReturnType | Promise<SingleResultScorerReturnType | MultiResultScorerReturnType>>(runName: string, scorer: (params: T) => O, feedbackKey: string, extra?: T | undefined) => Promise<EvaluationResultType<O>>;
15
+ export {};
package/dist/utils.js CHANGED
@@ -1,10 +1,26 @@
1
1
  import { isBaseMessage } from "@langchain/core/messages";
2
- import { _convertMessagesToOpenAIParams } from "@langchain/openai";
2
+ import * as openAIImports from "@langchain/openai";
3
3
  import { _runEvaluator as baseRunEvaluator, } from "openevals/utils";
4
+ const {
5
+ // @ts-expect-error Shim for older versions of @langchain/openai
6
+ _convertMessagesToOpenAIParams, convertMessagesToCompletionsMessageParams, } = openAIImports;
7
+ function _convertMessagesShim(message) {
8
+ if (typeof _convertMessagesToOpenAIParams === "function") {
9
+ return _convertMessagesToOpenAIParams([
10
+ message,
11
+ ])[0];
12
+ }
13
+ return convertMessagesToCompletionsMessageParams({
14
+ messages: [message],
15
+ })[0];
16
+ }
4
17
  export const _convertToOpenAIMessage = (message) => {
5
18
  if (isBaseMessage(message)) {
6
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
7
- return _convertMessagesToOpenAIParams([message])[0];
19
+ const converted = _convertMessagesShim(message);
20
+ if (message.id && !converted.id) {
21
+ converted.id = message.id;
22
+ }
23
+ return converted;
8
24
  }
9
25
  else {
10
26
  return message;
@@ -13,8 +29,7 @@ export const _convertToOpenAIMessage = (message) => {
13
29
  export const _convertToChatCompletionMessage = (message) => {
14
30
  let converted;
15
31
  if (isBaseMessage(message)) {
16
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
17
- converted = _convertMessagesToOpenAIParams([message])[0];
32
+ converted = _convertMessagesShim(message);
18
33
  }
19
34
  else {
20
35
  converted = message;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentevals",
3
- "version": "0.0.6",
3
+ "version": "0.0.7",
4
4
  "packageManager": "yarn@3.5.1",
5
5
  "type": "module",
6
6
  "scripts": {
@@ -15,17 +15,18 @@
15
15
  },
16
16
  "dependencies": {
17
17
  "@langchain/openai": ">=0.4.4",
18
- "langchain": ">=0.3.18",
19
- "langsmith": ">=0.3.11",
20
- "openevals": "^0.1.0"
18
+ "langchain": ">=1.2.28",
19
+ "langsmith": ">=0.4.6",
20
+ "openevals": "^0.1.4"
21
21
  },
22
22
  "peerDependencies": {
23
- "@langchain/core": ">=0.3.73",
23
+ "@langchain/core": ">=0.3.80",
24
24
  "@langchain/langgraph": ">=0.2.46"
25
25
  },
26
26
  "devDependencies": {
27
- "@langchain/core": "^0.3.73",
28
- "@langchain/langgraph": "^0.4.9",
27
+ "@langchain/core": "^1.1.29",
28
+ "@langchain/langgraph": "^1.2.0",
29
+ "@langchain/openai": "^1.2.11",
29
30
  "@langchain/scripts": "0.1.3",
30
31
  "@tsconfig/recommended": "^1.0.8",
31
32
  "@typescript-eslint/eslint-plugin": "^8.24.1",
@@ -39,12 +40,26 @@
39
40
  "eslint-plugin-jest": "^27.6.0",
40
41
  "eslint-plugin-no-instanceof": "^1.0.1",
41
42
  "eslint-plugin-prettier": "^4.2.1",
42
- "openai": "^4.85.1",
43
+ "openai": "^6.25.0",
43
44
  "prettier": "^3.5.1",
44
45
  "typescript": "~5.1.6",
45
46
  "vitest": "^3.0.5",
46
47
  "zod": "^4.1.5"
47
48
  },
49
+ "resolutions": {
50
+ "form-data": "^4.0.4",
51
+ "tar": "^7.5.8",
52
+ "axios": "^1.8.2",
53
+ "lodash": "^4.17.23",
54
+ "js-yaml": "^4.1.1",
55
+ "vite": "^6.4.1",
56
+ "esbuild": "^0.25.0",
57
+ "@langchain/scripts/glob": "^10.5.0",
58
+ "cacache/glob": "^10.5.0",
59
+ "dpdm/glob": "^10.5.0",
60
+ "node-gyp/glob": "^10.5.0",
61
+ "rimraf@5.0.10/glob": "^10.5.0"
62
+ },
48
63
  "files": [
49
64
  "dist/",
50
65
  "index.cjs",