npm - @dvina/agents - Versions diffs - 0.3.7 → 0.5.0 - Mend

@dvina/agents 0.3.7 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/chunk-NHWEEBN2.mjs +159 -0
package/dist/chunk-NHWEEBN2.mjs.map +1 -0
package/dist/eval/index.d.mts +117 -0
package/dist/eval/index.d.ts +117 -0
package/dist/eval/index.js +643 -0
package/dist/eval/index.js.map +1 -0
package/dist/eval/index.mjs +483 -0
package/dist/eval/index.mjs.map +1 -0
package/dist/index.d.mts +4 -138
package/dist/index.d.ts +4 -138
package/dist/index.js +74 -42
package/dist/index.js.map +1 -1
package/dist/index.mjs +11 -125
package/dist/index.mjs.map +1 -1
package/dist/model-resolver-U0J9x1a6.d.mts +158 -0
package/dist/model-resolver-U0J9x1a6.d.ts +158 -0
package/package.json +33 -4

package/dist/eval/index.js ADDED Viewed

@@ -0,0 +1,643 @@
+"use strict";
+var __create = Object.create;
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/eval/index.ts
+var eval_exports = {};
+__export(eval_exports, {
+  ai: () => ai,
+  configureEvals: () => configureEvals,
+  contains: () => contains,
+  defineSuite: () => defineSuite,
+  fromToolSpecs: () => fromToolSpecs,
+  human: () => human,
+  llmJudge: () => llmJudge,
+  noTools: () => noTools,
+  notContains: () => notContains,
+  respondsInLanguage: () => respondsInLanguage,
+  toolResult: () => toolResult,
+  toolsCalled: () => toolsCalled
+});
+module.exports = __toCommonJS(eval_exports);
+// src/eval/config.ts
+var _config = null;
+function configureEvals(config) {
+  _config = config;
+}
+function getEvalConfig() {
+  if (!_config) {
+    throw new Error("Evals not configured. Call configureEvals() in your vitest setupFiles.");
+  }
+  return _config;
+}
+// src/eval/suite.ts
+var ls = __toESM(require("langsmith/vitest"));
+// src/eval/target.ts
+var import_tools = require("@langchain/core/tools");
+var import_messages = require("@langchain/core/messages");
+var import_zod = require("zod");
+// src/runtime/langchain/model-resolver.ts
+var import_openai = require("@langchain/openai");
+var LangchainModelResolver = class {
+  constructor(config) {
+    this.config = config;
+  }
+  resolve(modelString, tags) {
+    const parts = modelString.split(":");
+    if (parts.length === 2) {
+      const [provider, modelName] = parts;
+      return this.resolveByProvider(provider, "default", modelName, tags);
+    }
+    if (parts.length === 3) {
+      const [provider, configName, modelName] = parts;
+      return this.resolveByProvider(provider, configName, modelName, tags);
+    }
+    throw new Error(
+      'Model string must follow format "provider:modelName" (uses "default" config) or "provider:configName:modelName"'
+    );
+  }
+  resolveByProvider(provider, configName, modelName, tags) {
+    switch (provider) {
+      case "openai":
+        return this.resolveOpenAI(configName, modelName, tags);
+      case "azure":
+        return this.resolveAzure(configName, modelName, tags);
+      default:
+        throw new Error(`Unsupported model provider: ${provider}`);
+    }
+  }
+  resolveOpenAI(configName, modelName, tags) {
+    const providerConfig = this.config.openai?.[configName];
+    if (!providerConfig) {
+      throw new Error(`Configuration "${configName}" for provider "openai" is missing`);
+    }
+    return new import_openai.ChatOpenAI({
+      apiKey: providerConfig.apiKey,
+      modelName,
+      tags
+    });
+  }
+  resolveAzure(resourceName, modelName, tags) {
+    const resource = this.config.azure?.[resourceName];
+    if (!resource) {
+      throw new Error(`Resource "${resourceName}" for provider "azure" is missing`);
+    }
+    const modelEntry = resource.models.find((m) => m.model === modelName);
+    if (!modelEntry) {
+      throw new Error(`Model "${modelName}" not found in Azure resource "${resourceName}"`);
+    }
+    return new import_openai.AzureChatOpenAI({
+      model: modelEntry.model,
+      azureOpenAIApiKey: resource.apiKey,
+      azureOpenAIApiInstanceName: this.extractInstanceName(resource.endpoint),
+      azureOpenAIApiDeploymentName: modelEntry.deploymentName,
+      azureOpenAIApiVersion: modelEntry.apiVersion,
+      tags
+    });
+  }
+  extractInstanceName(endpoint) {
+    try {
+      const url = new URL(endpoint);
+      return url.hostname.split(".")[0];
+    } catch (e) {
+      return endpoint;
+    }
+  }
+};
+// src/runtime/langchain/utils.ts
+var import_langchain = require("langchain");
+function convertToLangchainMessages(messages) {
+  const result = [];
+  let tcIdx = 0;
+  let pendingToolCallIds = [];
+  for (const msg of messages) {
+    if (msg.role === "human") {
+      result.push(
+        new import_langchain.HumanMessage({
+          content: msg.content.map((c) => {
+            if (c.type === "image") {
+              return { type: "image_url", image_url: { url: c.url } };
+            }
+            return c;
+          })
+        })
+      );
+    } else if (msg.role === "ai") {
+      if (msg.toolCalls && msg.toolCalls.length > 0) {
+        pendingToolCallIds = msg.toolCalls.map(() => `tc_${++tcIdx}`);
+        result.push(
+          new import_langchain.AIMessage({
+            content: msg.content,
+            tool_calls: msg.toolCalls.map((tc, i) => ({
+              id: pendingToolCallIds[i],
+              name: tc.name,
+              args: tc.input ? JSON.parse(tc.input) : {}
+            }))
+          })
+        );
+      } else {
+        result.push(new import_langchain.AIMessage(msg.content));
+      }
+    } else if (msg.role === "tool") {
+      const toolCallId = pendingToolCallIds.shift();
+      if (!toolCallId)
+        throw new Error(`ToolMessage for "${msg.name}" without a preceding AiMessage with toolCalls`);
+      result.push(
+        new import_langchain.ToolMessage({
+          content: msg.output,
+          tool_call_id: toolCallId,
+          name: msg.name
+        })
+      );
+    }
+  }
+  return result;
+}
+// src/eval/target.ts
+var MAX_AGENT_LOOPS = 10;
+function createEvalTarget(modelConfig, modelString) {
+  return async (inputs) => {
+    const config = modelConfig && modelString ? { modelConfig, model: modelString } : getEvalConfig();
+    if (!config.model) {
+      throw new Error("model is required for model-based target. Add it to your configureEvals() call.");
+    }
+    const resolver = new LangchainModelResolver(config.modelConfig);
+    const model = resolver.resolve(config.model);
+    const toolCallCounts = {};
+    const langchainTools = inputs.tools.map((mockTool) => {
+      toolCallCounts[mockTool.name] = 0;
+      return (0, import_tools.tool)(
+        async (toolInput) => {
+          toolCallCounts[mockTool.name]++;
+          if (typeof mockTool.response === "function") {
+            return mockTool.response(toolInput, toolCallCounts[mockTool.name]);
+          }
+          return mockTool.response;
+        },
+        {
+          name: mockTool.name,
+          description: mockTool.description,
+          schema: mockTool.schema instanceof import_zod.z.ZodObject ? mockTool.schema : import_zod.z.object(
+            Object.fromEntries(
+              Object.entries(mockTool.schema).map(([key, val]) => {
+                if (typeof val === "string") return [key, import_zod.z.string().describe(val)];
+                if (typeof val === "number") return [key, import_zod.z.number().describe(String(val))];
+                return [key, import_zod.z.any()];
+              })
+            )
+          )
+        }
+      );
+    });
+    const boundModel = langchainTools.length > 0 ? model.bindTools(langchainTools) : model;
+    const messages = [];
+    if (inputs.systemPrompt) {
+      messages.push(new import_messages.SystemMessage(inputs.systemPrompt));
+    }
+    messages.push(...convertToLangchainMessages(inputs.messages));
+    let loopCount = 0;
+    while (loopCount < MAX_AGENT_LOOPS) {
+      loopCount++;
+      const response = await boundModel.invoke(messages);
+      messages.push(response);
+      const aiMessage = response;
+      if (!aiMessage.tool_calls || aiMessage.tool_calls.length === 0) {
+        break;
+      }
+      for (const tc of aiMessage.tool_calls) {
+        const mockTool = langchainTools.find((t) => t.name === tc.name);
+        if (mockTool) {
+          const result = await mockTool.invoke(tc.args);
+          messages.push(
+            new import_messages.ToolMessage({
+              content: typeof result === "string" ? result : JSON.stringify(result),
+              tool_call_id: tc.id,
+              name: tc.name
+            })
+          );
+        } else {
+          messages.push(
+            new import_messages.ToolMessage({
+              content: `Tool "${tc.name}" not found`,
+              tool_call_id: tc.id,
+              name: tc.name
+            })
+          );
+        }
+      }
+    }
+    return { messages };
+  };
+}
+function agentResultToMessages(inputMessages, result) {
+  const messages = convertToLangchainMessages(inputMessages);
+  let pendingToolCalls = [];
+  for (const block of result.content) {
+    if (block.type === "tool_call") {
+      const tc = block;
+      pendingToolCalls.push({
+        id: tc.toolCallId,
+        name: tc.name,
+        args: tc.input ? JSON.parse(tc.input) : {},
+        output: tc.output
+      });
+    } else if (block.type === "text") {
+      if (pendingToolCalls.length > 0) {
+        messages.push(
+          new import_messages.AIMessage({
+            content: "",
+            tool_calls: pendingToolCalls.map((tc) => ({ id: tc.id, name: tc.name, args: tc.args }))
+          })
+        );
+        for (const tc of pendingToolCalls) {
+          messages.push(new import_messages.ToolMessage({ content: tc.output, tool_call_id: tc.id, name: tc.name }));
+        }
+        pendingToolCalls = [];
+      }
+      messages.push(new import_messages.AIMessage(block.output));
+    }
+  }
+  if (pendingToolCalls.length > 0) {
+    messages.push(
+      new import_messages.AIMessage({
+        content: "",
+        tool_calls: pendingToolCalls.map((tc) => ({ id: tc.id, name: tc.name, args: tc.args }))
+      })
+    );
+    for (const tc of pendingToolCalls) {
+      messages.push(new import_messages.ToolMessage({ content: tc.output, tool_call_id: tc.id, name: tc.name }));
+    }
+  }
+  return messages;
+}
+function toolDefsToDefinitions(defs) {
+  const callCounts = {};
+  return Object.entries(defs).map(([name, def]) => {
+    callCounts[name] = 0;
+    return {
+      name,
+      toolKit: "eval-mock",
+      description: def.description,
+      inputSchema: def.schema instanceof import_zod.z.ZodObject ? def.schema : import_zod.z.object(
+        Object.fromEntries(
+          Object.entries(def.schema ?? {}).map(([key, val]) => {
+            if (typeof val === "string") return [key, import_zod.z.string().describe(val)];
+            return [key, import_zod.z.any()];
+          })
+        )
+      ),
+      exec: async (input) => {
+        callCounts[name]++;
+        if (typeof def.response === "function") {
+          return def.response(
+            input,
+            callCounts[name]
+          );
+        }
+        return typeof def.response === "string" ? def.response : JSON.stringify(def.response);
+      }
+    };
+  });
+}
+async function runAgentTarget(createTarget, evalMessages, extraToolDefs) {
+  const extraTools = Object.keys(extraToolDefs).length > 0 ? toolDefsToDefinitions(extraToolDefs) : [];
+  const agent = await createTarget(extraTools);
+  const result = await agent.run({
+    threadId: `eval_${Date.now()}_${Math.random().toString(36).slice(2)}`,
+    messages: evalMessages
+  });
+  return { messages: agentResultToMessages(evalMessages, result) };
+}
+// src/eval/suite.ts
+function human(content) {
+  return { role: "human", content: [{ type: "text", text: content }] };
+}
+function ai(content, toolCalls) {
+  return { role: "ai", content, ...toolCalls ? { toolCalls: toolCalls.map((name) => ({ name })) } : {} };
+}
+function toolResult(name, output) {
+  return { role: "tool", name, output };
+}
+function fromToolSpecs(specs, responses = {}) {
+  return Object.fromEntries(
+    specs.map((spec) => [
+      spec.name,
+      {
+        description: spec.description,
+        schema: spec.inputSchema,
+        response: responses[spec.name] ?? ""
+      }
+    ])
+  );
+}
+function toMockTools(defs) {
+  return Object.entries(defs).map(([name, def]) => ({
+    name,
+    description: def.description,
+    schema: def.schema ?? {},
+    response: typeof def.response === "function" ? def.response : typeof def.response === "string" ? def.response : JSON.stringify(def.response)
+  }));
+}
+function toSerializableTools(tools) {
+  return tools.map((t) => ({
+    ...t,
+    schema: t.schema instanceof Object && "shape" in t.schema ? "<ZodObject>" : t.schema,
+    response: typeof t.response === "function" ? "<function>" : t.response
+  }));
+}
+function lastHumanContent(messages) {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg.role === "human") {
+      const textBlock = msg.content.find((c) => c.type === "text");
+      return textBlock ? textBlock.text : "";
+    }
+  }
+  return "";
+}
+function resolveModelTarget(config) {
+  if (typeof config.target === "function") return config.target;
+  const evalConfig = getEvalConfig();
+  if (!evalConfig.model && typeof config.target !== "string") {
+    throw new Error("model is required for model-based target. Add it to your configureEvals() call.");
+  }
+  const model = typeof config.target === "string" ? config.target : evalConfig.model;
+  return createEvalTarget(evalConfig.modelConfig, model);
+}
+function resolveCreateTarget(config) {
+  return config.createTarget ?? getEvalConfig().createTarget;
+}
+function defineSuite(name, config) {
+  const suiteTools = config.tools ?? {};
+  const createTarget = config.target ? void 0 : resolveCreateTarget(config);
+  ls.describe(name, () => {
+    for (const tc of config.cases) {
+      const testName = tc.name ?? lastHumanContent(tc.messages);
+      const caseToolDefs = tc.tools ?? suiteTools;
+      const tools = toMockTools(caseToolDefs);
+      const ctx = { message: lastHumanContent(tc.messages) };
+      const resolved = tc.expect.map((exp) => exp(ctx));
+      const evaluators = resolved.map((r) => r.evaluator);
+      const referenceOutputs = Object.assign({}, ...resolved.map((r) => r.referenceOutputs));
+      ls.test(
+        testName,
+        {
+          inputs: {
+            messages: tc.messages,
+            tools: toSerializableTools(tools)
+          },
+          referenceOutputs
+        },
+        async ({ referenceOutputs: refOut }) => {
+          let output;
+          if (createTarget) {
+            output = await runAgentTarget(createTarget, tc.messages, caseToolDefs);
+          } else {
+            const target = resolveModelTarget(config);
+            const globalPrompt = getEvalConfig().systemPrompt;
+            const systemPrompt = tc.systemPrompt ?? config.systemPrompt ?? globalPrompt;
+            output = await target({
+              messages: tc.messages,
+              tools,
+              ...systemPrompt ? { systemPrompt } : {}
+            });
+          }
+          ls.logOutputs(output);
+          for (const evaluator of evaluators) {
+            await evaluator({ outputs: output, referenceOutputs: refOut ?? {} });
+          }
+        }
+      );
+    }
+  });
+}
+// src/eval/expectations.ts
+var ls2 = __toESM(require("langsmith/vitest"));
+var import_agentevals = require("agentevals");
+// src/eval/evaluators/language.ts
+var import_messages2 = require("@langchain/core/messages");
+function createLanguageEvaluator(modelConfig, model) {
+  const resolver = new LangchainModelResolver(modelConfig);
+  const judge = resolver.resolve(model);
+  return async ({
+    outputs,
+    referenceOutputs
+  }) => {
+    const expectedLanguage = referenceOutputs?.expectedLanguage;
+    if (!expectedLanguage) {
+      return { key: "language_match", score: true, comment: "No expected language specified, skipping" };
+    }
+    const messages = outputs.messages || [];
+    const lastAiMessage = [...messages].reverse().find((m) => m instanceof import_messages2.AIMessage);
+    if (!lastAiMessage) {
+      return { key: "language_match", score: false, comment: "No AI message found in trajectory" };
+    }
+    const responseText = typeof lastAiMessage.content === "string" ? lastAiMessage.content : JSON.stringify(lastAiMessage.content);
+    const detection = await judge.invoke([
+      {
+        role: "system",
+        content: 'You are a language detection tool. Respond with ONLY the ISO 639-1 language code (e.g., "en", "tr", "de", "fr") of the text provided. Nothing else.'
+      },
+      {
+        role: "user",
+        content: responseText
+      }
+    ]);
+    const detectedLanguage = (typeof detection.content === "string" ? detection.content : "").trim().toLowerCase();
+    const matches = detectedLanguage === expectedLanguage.toLowerCase();
+    return {
+      key: "language_match",
+      score: matches,
+      comment: matches ? `Response language matches expected: ${expectedLanguage}` : `Expected "${expectedLanguage}" but detected "${detectedLanguage}"`
+    };
+  };
+}
+// src/eval/evaluators/response-content.ts
+var import_messages3 = require("@langchain/core/messages");
+function createResponseContentEvaluator() {
+  return async ({
+    outputs,
+    referenceOutputs
+  }) => {
+    const mustContain = referenceOutputs?.responseContains || [];
+    const mustNotContain = referenceOutputs?.responseMustNotContain || [];
+    if (mustContain.length === 0 && mustNotContain.length === 0) {
+      return { key: "response_content", score: true, comment: "No content assertions specified, skipping" };
+    }
+    const messages = outputs.messages || [];
+    const lastAiMessage = [...messages].reverse().find((m) => m instanceof import_messages3.AIMessage);
+    if (!lastAiMessage) {
+      return { key: "response_content", score: false, comment: "No AI message found in trajectory" };
+    }
+    const responseText = (typeof lastAiMessage.content === "string" ? lastAiMessage.content : JSON.stringify(lastAiMessage.content)).toLowerCase();
+    const failures = [];
+    for (const expected of mustContain) {
+      if (!responseText.includes(expected.toLowerCase())) {
+        failures.push(`Missing expected text: "${expected}"`);
+      }
+    }
+    for (const forbidden of mustNotContain) {
+      if (responseText.includes(forbidden.toLowerCase())) {
+        failures.push(`Contains forbidden text: "${forbidden}"`);
+      }
+    }
+    const passed = failures.length === 0;
+    return {
+      key: "response_content",
+      score: passed,
+      comment: passed ? "All content assertions passed" : failures.join("; ")
+    };
+  };
+}
+// src/eval/evaluators/no-tool-calls.ts
+var import_messages4 = require("@langchain/core/messages");
+function createNoToolCallsEvaluator() {
+  return async ({
+    outputs,
+    referenceOutputs
+  }) => {
+    if (referenceOutputs?.maxToolCalls !== 0 && referenceOutputs?.expectNoToolCalls !== true) {
+      return { key: "no_tool_calls", score: true, comment: "No tool call restriction specified, skipping" };
+    }
+    const messages = outputs.messages || [];
+    const toolCalls = messages.filter((m) => m instanceof import_messages4.AIMessage).flatMap((m) => m.tool_calls || []);
+    const passed = toolCalls.length === 0;
+    return {
+      key: "no_tool_calls",
+      score: passed,
+      comment: passed ? "No tool calls made (as expected)" : `Agent made ${toolCalls.length} tool call(s): ${toolCalls.map((tc) => tc.name).join(", ")}`
+    };
+  };
+}
+// src/eval/expectations.ts
+function withTrajectoryGuard(evaluator, key) {
+  return async ({ outputs, referenceOutputs }) => {
+    if (!referenceOutputs?.referenceTrajectory) {
+      return { key, score: true, comment: "No referenceTrajectory specified, skipping" };
+    }
+    return evaluator({ outputs, referenceOutputs: referenceOutputs.referenceTrajectory });
+  };
+}
+function buildTrajectory(message, toolNames) {
+  const trajectory = [];
+  let tcIdx = 0;
+  trajectory.push({ role: "user", content: message });
+  for (const name of toolNames) {
+    const id = `tc${++tcIdx}`;
+    trajectory.push({
+      role: "assistant",
+      content: "",
+      tool_calls: [{ function: { name, arguments: "{}" }, id, type: "function" }]
+    });
+    trajectory.push({ role: "tool", content: "...", tool_call_id: id });
+  }
+  trajectory.push({ role: "assistant", content: "..." });
+  return trajectory;
+}
+function toolsCalled(tools) {
+  return (ctx) => ({
+    evaluator: ls2.wrapEvaluator(
+      withTrajectoryGuard(
+        (0, import_agentevals.createTrajectoryMatchEvaluator)({ trajectoryMatchMode: "superset", toolArgsMatchMode: "ignore" }),
+        "trajectory_match"
+      )
+    ),
+    referenceOutputs: { referenceTrajectory: buildTrajectory(ctx.message, tools) }
+  });
+}
+function llmJudge() {
+  return () => {
+    const config = getEvalConfig();
+    const model = config.evaluatorModel;
+    return {
+      evaluator: ls2.wrapEvaluator(
+        withTrajectoryGuard(
+          (0, import_agentevals.createTrajectoryLLMAsJudge)({ prompt: import_agentevals.TRAJECTORY_ACCURACY_PROMPT, model }),
+          "trajectory_llm_judge"
+        )
+      ),
+      referenceOutputs: {}
+    };
+  };
+}
+function noTools() {
+  return () => ({
+    evaluator: ls2.wrapEvaluator(createNoToolCallsEvaluator()),
+    referenceOutputs: { expectNoToolCalls: true }
+  });
+}
+function respondsInLanguage(code) {
+  return () => {
+    const config = getEvalConfig();
+    const model = config.evaluatorModel;
+    return {
+      evaluator: ls2.wrapEvaluator(createLanguageEvaluator(config.modelConfig, model)),
+      referenceOutputs: { expectedLanguage: code }
+    };
+  };
+}
+function contains(strings) {
+  return () => ({
+    evaluator: ls2.wrapEvaluator(createResponseContentEvaluator()),
+    referenceOutputs: { responseContains: strings }
+  });
+}
+function notContains(strings) {
+  return () => ({
+    evaluator: ls2.wrapEvaluator(createResponseContentEvaluator()),
+    referenceOutputs: { responseMustNotContain: strings }
+  });
+}
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  ai,
+  configureEvals,
+  contains,
+  defineSuite,
+  fromToolSpecs,
+  human,
+  llmJudge,
+  noTools,
+  notContains,
+  respondsInLanguage,
+  toolResult,
+  toolsCalled
+});
+//# sourceMappingURL=index.js.map