npm - @dvina/agents - Versions diffs - 0.3.6 → 0.4.0 - Mend

@dvina/agents 0.3.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/chunk-LEEZCLZM.mjs +105 -0
package/dist/chunk-LEEZCLZM.mjs.map +1 -0
package/dist/eval/index.d.mts +109 -0
package/dist/eval/index.d.ts +109 -0
package/dist/eval/index.js +519 -0
package/dist/eval/index.js.map +1 -0
package/dist/eval/index.mjs +412 -0
package/dist/eval/index.mjs.map +1 -0
package/dist/index.d.mts +7 -30
package/dist/index.d.ts +7 -30
package/dist/index.js +1 -1
package/dist/index.js.map +1 -1
package/dist/index.mjs +7 -98
package/dist/index.mjs.map +1 -1
package/dist/model-resolver-BRAaBV9n.d.mts +15 -0
package/dist/model-resolver-BRAaBV9n.d.ts +15 -0
package/package.json +33 -4

package/dist/chunk-LEEZCLZM.mjs ADDED Viewed

@@ -0,0 +1,105 @@
+var __create = Object.create;
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
+  get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
+}) : x)(function(x) {
+  if (typeof require !== "undefined") return require.apply(this, arguments);
+  throw Error('Dynamic require of "' + x + '" is not supported');
+});
+var __commonJS = (cb, mod) => function __require2() {
+  return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
+// src/runtime/langchain/model-resolver.ts
+import { AzureChatOpenAI, ChatOpenAI } from "@langchain/openai";
+var LangchainModelResolver = class {
+  constructor(config) {
+    this.config = config;
+  }
+  resolve(modelString, tags) {
+    const parts = modelString.split(":");
+    if (parts.length === 2) {
+      const [provider, modelName] = parts;
+      return this.resolveByProvider(provider, "default", modelName, tags);
+    }
+    if (parts.length === 3) {
+      const [provider, configName, modelName] = parts;
+      return this.resolveByProvider(provider, configName, modelName, tags);
+    }
+    throw new Error(
+      'Model string must follow format "provider:modelName" (uses "default" config) or "provider:configName:modelName"'
+    );
+  }
+  resolveByProvider(provider, configName, modelName, tags) {
+    switch (provider) {
+      case "openai":
+        return this.resolveOpenAI(configName, modelName, tags);
+      case "azure":
+        return this.resolveAzure(configName, modelName, tags);
+      default:
+        throw new Error(`Unsupported model provider: ${provider}`);
+    }
+  }
+  resolveOpenAI(configName, modelName, tags) {
+    const providerConfig = this.config.openai?.[configName];
+    if (!providerConfig) {
+      throw new Error(`Configuration "${configName}" for provider "openai" is missing`);
+    }
+    return new ChatOpenAI({
+      apiKey: providerConfig.apiKey,
+      modelName,
+      tags
+    });
+  }
+  resolveAzure(configName, deploymentName, tags) {
+    const providerConfig = this.config.azure?.[configName];
+    if (!providerConfig) {
+      throw new Error(`Configuration "${configName}" for provider "azure" is missing`);
+    }
+    return new AzureChatOpenAI({
+      model: providerConfig.model,
+      // shows (perhaps even uses) 3.5-turbo when not specifid
+      azureOpenAIApiKey: providerConfig.apiKey,
+      azureOpenAIApiInstanceName: this.extractInstanceName(providerConfig.endpoint),
+      azureOpenAIApiDeploymentName: deploymentName,
+      azureOpenAIApiVersion: providerConfig.apiVersion,
+      tags
+    });
+  }
+  extractInstanceName(endpoint) {
+    try {
+      const url = new URL(endpoint);
+      return url.hostname.split(".")[0];
+    } catch (e) {
+      return endpoint;
+    }
+  }
+};
+export {
+  __require,
+  __commonJS,
+  __toESM,
+  LangchainModelResolver
+};
+//# sourceMappingURL=chunk-LEEZCLZM.mjs.map

package/dist/chunk-LEEZCLZM.mjs.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/runtime/langchain/model-resolver.ts"],"sourcesContent":["import { BaseLanguageModel } from '@langchain/core/language_models/base';\nimport { AzureChatOpenAI, ChatOpenAI } from '@langchain/openai';\n\nexport type LangchainOpenAIConfig = {\n\tapiKey: string;\n};\n\nexport type LangchainAzureConfig = {\n\tmodel: string;\n\tapiKey: string;\n\tendpoint: string;\n\tapiVersion: string;\n};\n\nexport type LangchainModelConfig = {\n\topenai?: Record<string, LangchainOpenAIConfig>;\n\tazure?: Record<string, LangchainAzureConfig>;\n};\n\nexport class LangchainModelResolver {\n\tconstructor(private config: LangchainModelConfig) {}\n\n\tresolve(modelString: string, tags?: string[]): BaseLanguageModel {\n\t\tconst parts = modelString.split(':');\n\n\t\tif (parts.length === 2) {\n\t\t\tconst [provider, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, 'default', modelName, tags);\n\t\t}\n\n\t\tif (parts.length === 3) {\n\t\t\tconst [provider, configName, modelName] = parts;\n\t\t\treturn this.resolveByProvider(provider, configName, modelName, tags);\n\t\t}\n\n\t\tthrow new Error(\n\t\t\t'Model string must follow format \"provider:modelName\" (uses \"default\" config) or \"provider:configName:modelName\"',\n\t\t);\n\t}\n\n\tprivate resolveByProvider(\n\t\tprovider: string,\n\t\tconfigName: string,\n\t\tmodelName: string,\n\t\ttags?: string[],\n\t): BaseLanguageModel {\n\t\tswitch (provider) {\n\t\t\tcase 'openai':\n\t\t\t\treturn this.resolveOpenAI(configName, modelName, tags);\n\t\t\tcase 'azure':\n\t\t\t\treturn this.resolveAzure(configName, modelName, tags);\n\t\t\tdefault:\n\t\t\t\tthrow new Error(`Unsupported model provider: ${provider}`);\n\t\t}\n\t}\n\n\tprivate resolveOpenAI(configName: string, modelName: string, tags?: string[]): ChatOpenAI {\n\t\tconst providerConfig = this.config.openai?.[configName];\n\t\tif (!providerConfig) {\n\t\t\tthrow new Error(`Configuration \"${configName}\" for provider \"openai\" is missing`);\n\t\t}\n\n\t\treturn new ChatOpenAI({\n\t\t\tapiKey: providerConfig.apiKey,\n\t\t\tmodelName: modelName,\n\t\t\ttags: tags,\n\t\t});\n\t}\n\n\tprivate resolveAzure(configName: string, deploymentName: string, tags?: string[]): AzureChatOpenAI {\n\t\tconst providerConfig = this.config.azure?.[configName];\n\t\tif (!providerConfig) {\n\t\t\tthrow new Error(`Configuration \"${configName}\" for provider \"azure\" is missing`);\n\t\t}\n\n\t\treturn new AzureChatOpenAI({\n\t\t\tmodel: providerConfig.model, // shows (perhaps even uses) 3.5-turbo when not specifid\n\t\t\tazureOpenAIApiKey: providerConfig.apiKey,\n\t\t\tazureOpenAIApiInstanceName: this.extractInstanceName(providerConfig.endpoint),\n\t\t\tazureOpenAIApiDeploymentName: deploymentName,\n\t\t\tazureOpenAIApiVersion: providerConfig.apiVersion,\n\t\t\ttags: tags,\n\t\t});\n\t}\n\n\tprivate extractInstanceName(endpoint: string): string {\n\t\ttry {\n\t\t\tconst url = new URL(endpoint);\n\t\t\treturn url.hostname.split('.')[0];\n\t\t} catch (e) {\n\t\t\treturn endpoint;\n\t\t}\n\t}\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AACA,SAAS,iBAAiB,kBAAkB;AAkBrC,IAAM,yBAAN,MAA6B;AAAA,EACnC,YAAoB,QAA8B;AAA9B;AAAA,EAA+B;AAAA,EAEnD,QAAQ,aAAqB,MAAoC;AAChE,UAAM,QAAQ,YAAY,MAAM,GAAG;AAEnC,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,SAAS,IAAI;AAC9B,aAAO,KAAK,kBAAkB,UAAU,WAAW,WAAW,IAAI;AAAA,IACnE;AAEA,QAAI,MAAM,WAAW,GAAG;AACvB,YAAM,CAAC,UAAU,YAAY,SAAS,IAAI;AAC1C,aAAO,KAAK,kBAAkB,UAAU,YAAY,WAAW,IAAI;AAAA,IACpE;AAEA,UAAM,IAAI;AAAA,MACT;AAAA,IACD;AAAA,EACD;AAAA,EAEQ,kBACP,UACA,YACA,WACA,MACoB;AACpB,YAAQ,UAAU;AAAA,MACjB,KAAK;AACJ,eAAO,KAAK,cAAc,YAAY,WAAW,IAAI;AAAA,MACtD,KAAK;AACJ,eAAO,KAAK,aAAa,YAAY,WAAW,IAAI;AAAA,MACrD;AACC,cAAM,IAAI,MAAM,+BAA+B,QAAQ,EAAE;AAAA,IAC3D;AAAA,EACD;AAAA,EAEQ,cAAc,YAAoB,WAAmB,MAA6B;AACzF,UAAM,iBAAiB,KAAK,OAAO,SAAS,UAAU;AACtD,QAAI,CAAC,gBAAgB;AACpB,YAAM,IAAI,MAAM,kBAAkB,UAAU,oCAAoC;AAAA,IACjF;AAEA,WAAO,IAAI,WAAW;AAAA,MACrB,QAAQ,eAAe;AAAA,MACvB;AAAA,MACA;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEQ,aAAa,YAAoB,gBAAwB,MAAkC;AAClG,UAAM,iBAAiB,KAAK,OAAO,QAAQ,UAAU;AACrD,QAAI,CAAC,gBAAgB;AACpB,YAAM,IAAI,MAAM,kBAAkB,UAAU,mCAAmC;AAAA,IAChF;AAEA,WAAO,IAAI,gBAAgB;AAAA,MAC1B,OAAO,eAAe;AAAA;AAAA,MACtB,mBAAmB,eAAe;AAAA,MAClC,4BAA4B,KAAK,oBAAoB,eAAe,QAAQ;AAAA,MAC5E,8BAA8B;AAAA,MAC9B,uBAAuB,eAAe;AAAA,MACtC;AAAA,IACD,CAAC;AAAA,EACF;AAAA,EAEQ,oBAAoB,UAA0B;AACrD,QAAI;AACH,YAAM,MAAM,IAAI,IAAI,QAAQ;AAC5B,aAAO,IAAI,SAAS,MAAM,GAAG,EAAE,CAAC;AAAA,IACjC,SAAS,GAAG;AACX,aAAO;AAAA,IACR;AAAA,EACD;AACD;","names":[]}

package/dist/eval/index.d.mts ADDED Viewed

@@ -0,0 +1,109 @@
+import { L as LangchainModelConfig } from '../model-resolver-BRAaBV9n.mjs';
+import { BaseMessage } from '@langchain/core/messages';
+interface EvalConfig {
+    modelConfig: LangchainModelConfig;
+    model: string;
+    /** Model for evaluators needing LLM calls (language detection, LLM-as-judge). Defaults to `model`. */
+    evaluatorModel?: string;
+    /** System prompt prepended to every eval invocation. Can be overridden per-suite or per-case. */
+    systemPrompt?: string;
+}
+declare function configureEvals(config: EvalConfig): void;
+interface MockToolDef {
+    name: string;
+    description: string;
+    schema: Record<string, unknown>;
+    /**
+     * Canned response the mock tool returns.
+     * Can be a static string, or a function that receives input and returns a response.
+     * If a function is provided, it receives the full invocation count as a second arg
+     * to support scenarios like "first call fails, second call succeeds".
+     */
+    response: string | ((input: Record<string, unknown>, callCount: number) => string);
+}
+type EvaluatorFn = (args: {
+    outputs: Record<string, any>;
+    referenceOutputs: Record<string, any>;
+}) => Promise<any>;
+interface ResolvedExpectation {
+    evaluator: EvaluatorFn;
+    referenceOutputs: Record<string, unknown>;
+}
+/** A factory that receives test context and returns an evaluator + its referenceOutputs. */
+type Expectation = (ctx: {
+    message: string;
+}) => ResolvedExpectation;
+/**
+ * Expect the agent to call tools in order (superset trajectory match).
+ * Empty `[]` means the agent should answer directly without calling any tools.
+ */
+declare function toolsCalled(tools: string[]): Expectation;
+/**
+ * Run an LLM-as-judge evaluator on the trajectory.
+ * Requires `toolsCalled` in the same expect array.
+ * Uses the globally configured evaluator model.
+ */
+declare function llmJudge(): Expectation;
+/** Assert the agent made zero tool calls. */
+declare function noTools(): Expectation;
+/**
+ * Assert the response is in the given language (ISO 639-1 code).
+ * Uses the globally configured evaluator model for language detection.
+ * @param code - ISO 639-1 language code (e.g. 'en', 'tr', 'de').
+ */
+declare function respondsInLanguage(code: string): Expectation;
+/** Assert the response contains all given strings. */
+declare function contains(strings: string[]): Expectation;
+/** Assert the response does not contain any of the given strings. */
+declare function notContains(strings: string[]): Expectation;
+declare function human(content: string): {
+    role: "human";
+    content: string;
+};
+declare function ai(content: string, toolCalls?: string[]): {
+    toolCalls?: string[] | undefined;
+    role: "ai";
+    content: string;
+};
+declare function toolResult(content: string): {
+    role: "tool";
+    content: string;
+};
+type Message = ReturnType<typeof human> | ReturnType<typeof ai> | ReturnType<typeof toolResult>;
+interface ToolDef {
+    description: string;
+    schema?: Record<string, string>;
+    /** Auto-stringified if not a string or function. */
+    response: unknown | ((input: Record<string, unknown>, callCount: number) => string);
+}
+interface TestCase {
+    /** Test name. Defaults to the last human message content if omitted. */
+    name?: string;
+    messages: Message[];
+    systemPrompt?: string;
+    /** Override suite-level tools for this case. */
+    tools?: Record<string, ToolDef>;
+    expect: Expectation[];
+}
+type TargetFn = (inputs: {
+    systemPrompt?: string;
+    messages: Message[];
+    tools: MockToolDef[];
+}) => Promise<{
+    messages: BaseMessage[];
+}>;
+interface SuiteConfig {
+    /** Custom target function, or model string override. Auto-created from global config if omitted. */
+    target?: TargetFn | string;
+    /** System prompt for all cases in this suite. Overrides the global prompt; can be overridden per-case. */
+    systemPrompt?: string;
+    tools: Record<string, ToolDef>;
+    cases: TestCase[];
+}
+declare function defineSuite(name: string, config: SuiteConfig): void;
+export { type EvalConfig, type Expectation, type SuiteConfig, type TestCase, type ToolDef, ai, configureEvals, contains, defineSuite, human, llmJudge, noTools, notContains, respondsInLanguage, toolResult, toolsCalled };

package/dist/eval/index.d.ts ADDED Viewed

@@ -0,0 +1,109 @@
+import { L as LangchainModelConfig } from '../model-resolver-BRAaBV9n.js';
+import { BaseMessage } from '@langchain/core/messages';
+interface EvalConfig {
+    modelConfig: LangchainModelConfig;
+    model: string;
+    /** Model for evaluators needing LLM calls (language detection, LLM-as-judge). Defaults to `model`. */
+    evaluatorModel?: string;
+    /** System prompt prepended to every eval invocation. Can be overridden per-suite or per-case. */
+    systemPrompt?: string;
+}
+declare function configureEvals(config: EvalConfig): void;
+interface MockToolDef {
+    name: string;
+    description: string;
+    schema: Record<string, unknown>;
+    /**
+     * Canned response the mock tool returns.
+     * Can be a static string, or a function that receives input and returns a response.
+     * If a function is provided, it receives the full invocation count as a second arg
+     * to support scenarios like "first call fails, second call succeeds".
+     */
+    response: string | ((input: Record<string, unknown>, callCount: number) => string);
+}
+type EvaluatorFn = (args: {
+    outputs: Record<string, any>;
+    referenceOutputs: Record<string, any>;
+}) => Promise<any>;
+interface ResolvedExpectation {
+    evaluator: EvaluatorFn;
+    referenceOutputs: Record<string, unknown>;
+}
+/** A factory that receives test context and returns an evaluator + its referenceOutputs. */
+type Expectation = (ctx: {
+    message: string;
+}) => ResolvedExpectation;
+/**
+ * Expect the agent to call tools in order (superset trajectory match).
+ * Empty `[]` means the agent should answer directly without calling any tools.
+ */
+declare function toolsCalled(tools: string[]): Expectation;
+/**
+ * Run an LLM-as-judge evaluator on the trajectory.
+ * Requires `toolsCalled` in the same expect array.
+ * Uses the globally configured evaluator model.
+ */
+declare function llmJudge(): Expectation;
+/** Assert the agent made zero tool calls. */
+declare function noTools(): Expectation;
+/**
+ * Assert the response is in the given language (ISO 639-1 code).
+ * Uses the globally configured evaluator model for language detection.
+ * @param code - ISO 639-1 language code (e.g. 'en', 'tr', 'de').
+ */
+declare function respondsInLanguage(code: string): Expectation;
+/** Assert the response contains all given strings. */
+declare function contains(strings: string[]): Expectation;
+/** Assert the response does not contain any of the given strings. */
+declare function notContains(strings: string[]): Expectation;
+declare function human(content: string): {
+    role: "human";
+    content: string;
+};
+declare function ai(content: string, toolCalls?: string[]): {
+    toolCalls?: string[] | undefined;
+    role: "ai";
+    content: string;
+};
+declare function toolResult(content: string): {
+    role: "tool";
+    content: string;
+};
+type Message = ReturnType<typeof human> | ReturnType<typeof ai> | ReturnType<typeof toolResult>;
+interface ToolDef {
+    description: string;
+    schema?: Record<string, string>;
+    /** Auto-stringified if not a string or function. */
+    response: unknown | ((input: Record<string, unknown>, callCount: number) => string);
+}
+interface TestCase {
+    /** Test name. Defaults to the last human message content if omitted. */
+    name?: string;
+    messages: Message[];
+    systemPrompt?: string;
+    /** Override suite-level tools for this case. */
+    tools?: Record<string, ToolDef>;
+    expect: Expectation[];
+}
+type TargetFn = (inputs: {
+    systemPrompt?: string;
+    messages: Message[];
+    tools: MockToolDef[];
+}) => Promise<{
+    messages: BaseMessage[];
+}>;
+interface SuiteConfig {
+    /** Custom target function, or model string override. Auto-created from global config if omitted. */
+    target?: TargetFn | string;
+    /** System prompt for all cases in this suite. Overrides the global prompt; can be overridden per-case. */
+    systemPrompt?: string;
+    tools: Record<string, ToolDef>;
+    cases: TestCase[];
+}
+declare function defineSuite(name: string, config: SuiteConfig): void;
+export { type EvalConfig, type Expectation, type SuiteConfig, type TestCase, type ToolDef, ai, configureEvals, contains, defineSuite, human, llmJudge, noTools, notContains, respondsInLanguage, toolResult, toolsCalled };