npm - @fonoster/autopilot - Versions diffs - 0.17.4 → 0.18.0 - Mend

@fonoster/autopilot 0.17.4 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/models/createChain.d.ts CHANGED Viewed

@@ -21,5 +21,7 @@ import { RunnableSequence } from "@langchain/core/runnables";
 import { KnowledgeBase } from "../knowledge";
 import { createChatHistory } from "./chatHistory";
 import { createPromptTemplate } from "./createPromptTemplate";
-declare function createChain(model: BaseChatModel, knowledgeBase: KnowledgeBase, promptTemplate: ReturnType<typeof createPromptTemplate>, chatHistory: ReturnType<typeof createChatHistory>): RunnableSequence<any, import("@langchain/core/messages").AIMessageChunk<import("@langchain/core/messages").MessageStructure>>;
+declare function createChain(model: BaseChatModel, knowledgeBase: KnowledgeBase, promptTemplate: ReturnType<typeof createPromptTemplate>, chatHistory: ReturnType<typeof createChatHistory>): RunnableSequence<{
+    text: string;
+}, import("@langchain/core/messages").AIMessageChunk<import("@langchain/core/messages").MessageStructure>>;
 export { createChain };

package/dist/models/evaluations/createEvaluateIntelligence.d.ts CHANGED Viewed

@@ -16,8 +16,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-import { GrpcErrorMessage, IntegrationConfig } from "@fonoster/common";
-declare function createEvaluateIntelligence(integrations: IntegrationConfig[]): (call: {
-    request: unknown;
-}, callback: (error?: GrpcErrorMessage, response?: unknown) => void) => Promise<void>;
+import { IntegrationConfig } from "@fonoster/common";
+import { EvaluateIntelligenceRequest } from "./types";
+type ServerStreamCall = {
+    request: EvaluateIntelligenceRequest;
+    write: (chunk: Record<string, unknown>) => void;
+    end: () => void;
+};
+declare function createEvaluateIntelligence(integrations: IntegrationConfig[]): (call: ServerStreamCall) => Promise<void>;
 export { createEvaluateIntelligence };

package/dist/models/evaluations/createEvaluateIntelligence.js CHANGED Viewed

@@ -24,10 +24,11 @@ const logger_1 = require("@fonoster/logger");
 const pb_util_1 = require("pb-util");
 const zod_1 = require("zod");
 const createEvalEffectiveConfig_1 = require("./createEvalEffectiveConfig");
-const evalTestCases_1 = require("./evalTestCases");
+const runEval_1 = require("./runEval");
+const stepReportToEventPayload_1 = require("./stepReportToEventPayload");
 const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
 function createEvaluateIntelligence(integrations) {
-    const evaluateIntelligence = async (call, callback) => {
+    const evaluateIntelligence = async (call) => {
         const { request } = call;
         const { intelligence } = request;
         const accessKeyId = (0, common_1.getAccessKeyIdFromCall)(call);
@@ -36,25 +37,38 @@ function createEvaluateIntelligence(integrations) {
             llmProductRef: intelligence.productRef,
             evalLlmProductRef: "llm.openai"
         });
-        const config = pb_util_1.struct.decode(intelligence.config);
-        const parsedIntelligence = zod_1.z
-            .object({
-            productRef: zod_1.z.string(),
-            config: common_1.assistantSchema
-        })
-            .parse({
-            productRef: intelligence.productRef,
-            config: config
-        });
-        const credentials = (0, common_1.findIntegrationsCredentials)(integrations, intelligence.productRef);
-        const evaluationApiKey = (0, common_1.findIntegrationsCredentials)(integrations, "llm.openai");
-        const effectiveConfig = (0, createEvalEffectiveConfig_1.createEvalEffectiveConfig)(parsedIntelligence.config, credentials, evaluationApiKey);
-        const results = await (0, evalTestCases_1.evalTestCases)({
-            intelligence: {
-                config: effectiveConfig
-            }
-        });
-        callback(null, { results });
+        try {
+            const config = pb_util_1.struct.decode(intelligence.config);
+            const parsedIntelligence = zod_1.z
+                .object({
+                productRef: zod_1.z.string(),
+                config: common_1.assistantSchema
+            })
+                .parse({
+                productRef: intelligence.productRef,
+                config
+            });
+            const credentials = (0, common_1.findIntegrationsCredentials)(integrations, intelligence.productRef);
+            const evaluationApiKey = (0, common_1.findIntegrationsCredentials)(integrations, "llm.openai");
+            const effectiveConfig = (0, createEvalEffectiveConfig_1.createEvalEffectiveConfig)(parsedIntelligence.config, credentials, evaluationApiKey);
+            await (0, runEval_1.runEval)({ intelligence: { config: effectiveConfig } }, {
+                onStepResult: (scenarioRef, stepReport) => {
+                    const payload = (0, stepReportToEventPayload_1.stepReportToEventPayload)(scenarioRef, stepReport);
+                    call.write(payload);
+                },
+                onScenarioComplete: (scenarioRef, overallPassed) => {
+                    const payload = (0, stepReportToEventPayload_1.scenarioSummaryToEventPayload)(scenarioRef, overallPassed);
+                    call.write(payload);
+                }
+            });
+        }
+        catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            call.write((0, stepReportToEventPayload_1.evalErrorToEventPayload)(message));
+        }
+        finally {
+            call.end();
+        }
     };
-    return (0, common_1.withErrorHandling)(evaluateIntelligence);
+    return evaluateIntelligence;
 }

package/dist/models/evaluations/evalTestCases.js CHANGED Viewed

@@ -12,7 +12,9 @@ async function evalTestCases(autopilotApplication) {
         say: async (_) => { }
     };
     const evaluationReports = [];
-    for (const scenario of testCases?.scenarios ?? []) {
+    const scenarios = (testCases?.scenarios ?? []);
+    for (let i = 0; i < scenarios.length; i++) {
+        const scenario = scenarios[i];
         const languageModel = (0, createLanguageModel_1.createLanguageModel)({
             voice,
             assistantConfig: autopilotApplication.intelligence.config,

package/dist/models/evaluations/evaluateScenario.d.ts CHANGED Viewed

@@ -16,6 +16,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-import { ScenarioEvaluationReport } from "@fonoster/types";
+import { ScenarioEvaluationReport, StepEvaluationReport } from "@fonoster/types";
 import { ScenarioEvaluationRequest } from "./types";
-export declare function evaluateScenario(config: ScenarioEvaluationRequest): Promise<ScenarioEvaluationReport>;
+export type OnStepResultCallback = (scenarioRef: string, report: StepEvaluationReport) => void | Promise<void>;
+export declare function evaluateScenario(config: ScenarioEvaluationRequest, onStepResult?: OnStepResultCallback): Promise<ScenarioEvaluationReport>;

package/dist/models/evaluations/evaluateScenario.js CHANGED Viewed

@@ -2,10 +2,12 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.evaluateScenario = evaluateScenario;
 const evaluateStep_1 = require("./evaluateStep");
-async function evaluateScenario(config) {
+async function evaluateScenario(config, onStepResult) {
     const { scenario, languageModel, testTextSimilarity, assistantConfig } = config;
     const results = [];
-    for (const step of scenario.conversation) {
+    const steps = scenario.conversation;
+    for (let i = 0; i < steps.length; i++) {
+        const step = steps[i];
         const stepResult = await (0, evaluateStep_1.evaluateStep)({
             step,
             languageModel,
@@ -13,8 +15,10 @@ async function evaluateScenario(config) {
             assistantConfig
         });
         results.push(stepResult);
+        if (onStepResult)
+            await onStepResult(scenario.ref, stepResult);
     }
-    const overallPassed = results.every((step) => step.passed);
+    const overallPassed = results.every((s) => s.passed);
     return {
         scenarioRef: scenario.ref,
         overallPassed,

package/dist/models/evaluations/evaluateSingleTool.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import { ToolEvaluationReport } from "@fonoster/types";
+import { EvalExpectedTool } from "./types";
+type ToolCallLike = {
+    name: string;
+    args?: Record<string, unknown>;
+};
+export declare function evaluateSingleTool(expected: EvalExpectedTool, actual: ToolCallLike): ToolEvaluationReport;
+export {};

package/dist/models/evaluations/evaluateSingleTool.js ADDED Viewed

@@ -0,0 +1,41 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.evaluateSingleTool = evaluateSingleTool;
+const isValidIso8601Date_1 = require("./isValidIso8601Date");
+function evaluateSingleTool(expected, actual) {
+    let passed = true;
+    let errorMessage = "";
+    if (actual.name !== expected.tool) {
+        passed = false;
+        errorMessage = `Expected tool "${expected.tool}" but got "${actual.name}".`;
+    }
+    const expectedParams = expected.parameters ?? {};
+    const actualParams = actual.args ?? {};
+    for (const key of Object.keys(expectedParams)) {
+        const expectedVal = expectedParams[key];
+        const expectedStr = typeof expectedVal === "string"
+            ? expectedVal.trim()
+            : String(expectedVal).trim();
+        if (expectedStr === "valid-date") {
+            if (!(0, isValidIso8601Date_1.isValidIso8601Date)(actualParams[key])) {
+                passed = false;
+                const paramMsg = `Expected parameter "${key}" to be a valid date, but got ${JSON.stringify(actualParams[key])}.`;
+                errorMessage = errorMessage ? `${errorMessage} ${paramMsg}` : paramMsg;
+            }
+            continue;
+        }
+        if (actualParams[key] !== expectedVal) {
+            passed = false;
+            const paramMsg = `Expected parameter "${key}" to have value ${JSON.stringify(expectedVal)}, but got ${JSON.stringify(actualParams[key])}.`;
+            errorMessage = errorMessage ? `${errorMessage} ${paramMsg}` : paramMsg;
+        }
+    }
+    return {
+        expectedTool: expected.tool,
+        actualTool: actual.name,
+        passed,
+        expectedParameters: expected.parameters,
+        actualParameters: actual.args,
+        errorMessage: errorMessage || undefined
+    };
+}

package/dist/models/evaluations/evaluateStep.js CHANGED Viewed

@@ -3,41 +3,29 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.evaluateStep = evaluateStep;
 const evaluateTextResponse_1 = require("./evaluateTextResponse");
 const evaluateToolCalls_1 = require("./evaluateToolCalls");
+const extractAiResponse_1 = require("./extractAiResponse");
 async function evaluateStep({ step, languageModel, testTextSimilarity, assistantConfig }) {
     const stepResult = {
         humanInput: step.userInput,
         expectedResponse: step.expected.text.response,
-        aiResponse: "", // will be filled if invoke is successful
+        aiResponse: "",
         evaluationType: step.expected.text.type,
         passed: true
     };
     try {
         const response = await languageModel.invoke(step.userInput);
-        // Hangup and transfer are special cases
-        if (response.toolCalls && response.toolCalls.length > 0) {
-            const topTool = response.toolCalls[0];
-            if (topTool.name === "hangup") {
-                stepResult.aiResponse =
-                    assistantConfig.conversationSettings?.goodbyeMessage || "";
-            }
-            else if (topTool.name === "transfer") {
-                stepResult.aiResponse =
-                    assistantConfig.conversationSettings?.transferOptions?.message ?? "";
-            }
-            else {
-                stepResult.aiResponse = response.content ?? "";
-            }
-        }
-        else {
-            stepResult.aiResponse = response.content ?? "";
-        }
-        const textEvaluation = await (0, evaluateTextResponse_1.evaluateTextResponse)(step.expected.text, stepResult.aiResponse, testTextSimilarity);
+        stepResult.aiResponse = (0, extractAiResponse_1.extractAiResponse)(response, assistantConfig);
+        const textEvaluation = await (0, evaluateTextResponse_1.evaluateTextResponse)({
+            type: step.expected.text.type,
+            response: step.expected.text.response
+        }, stepResult.aiResponse, testTextSimilarity);
         if (!textEvaluation.passed) {
             stepResult.passed = false;
             stepResult.errorMessage = textEvaluation.errorMessage;
         }
         if (step.expected.tools && step.expected.tools.length > 0) {
-            const toolsEvaluation = (0, evaluateToolCalls_1.evaluateToolCalls)(step.expected.tools, response.toolCalls);
+            const toolCalls = response.toolCalls?.filter((tc) => tc?.name) ?? [];
+            const toolsEvaluation = (0, evaluateToolCalls_1.evaluateToolCalls)(step.expected.tools, toolCalls);
             stepResult.toolEvaluations = toolsEvaluation.evaluations;
             if (!toolsEvaluation.passed) {
                 stepResult.passed = false;

package/dist/models/evaluations/evaluateToolCalls.d.ts CHANGED Viewed

@@ -17,8 +17,14 @@
  * limitations under the License.
  */
 import { ToolEvaluationReport } from "@fonoster/types";
-export declare function evaluateToolCalls(expectedTools: any[], toolCalls: any[] | undefined): {
+import { EvalExpectedTool } from "./types";
+type ToolCallLike = {
+    name: string;
+    args?: Record<string, unknown>;
+};
+export declare function evaluateToolCalls(expectedTools: EvalExpectedTool[], toolCalls: ToolCallLike[] | undefined): {
     evaluations: ToolEvaluationReport[];
     passed: boolean;
     errorMessage?: string;
 };
+export {};

package/dist/models/evaluations/evaluateToolCalls.js CHANGED Viewed

@@ -1,15 +1,10 @@
 "use strict";
-var __importDefault = (this && this.__importDefault) || function (mod) {
-    return (mod && mod.__esModule) ? mod : { "default": mod };
-};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.evaluateToolCalls = evaluateToolCalls;
-const moment_1 = __importDefault(require("moment"));
+const evaluateSingleTool_1 = require("./evaluateSingleTool");
 function evaluateToolCalls(expectedTools, toolCalls) {
     const evaluations = [];
-    let overallPassed = true;
     if (!toolCalls || toolCalls.length !== expectedTools.length) {
-        overallPassed = false;
         evaluations.push({
             expectedTool: "",
             actualTool: "",
@@ -20,52 +15,14 @@ function evaluateToolCalls(expectedTools, toolCalls) {
         });
         return {
             evaluations,
-            passed: overallPassed,
-            errorMessage: `Tool invocation count mismatch.`
+            passed: false,
+            errorMessage: "Tool invocation count mismatch."
         };
     }
     for (let i = 0; i < expectedTools.length; i++) {
-        const expectedTool = expectedTools[i];
-        const actualCall = toolCalls[i];
-        let toolPassed = true;
-        let errorMessage = "";
-        if (actualCall.name !== expectedTool.tool) {
-            toolPassed = false;
-            errorMessage = `Expected tool "${expectedTool.tool}" but got "${actualCall.name}".`;
-        }
-        const expectedParams = expectedTool.parameters || {};
-        const actualParams = actualCall.args || {};
-        for (const key of Object.keys(expectedParams)) {
-            // Check for the special case of a valid-date
-            if (expectedParams[key].trim() === "valid-date") {
-                actualParams[key] = (0, moment_1.default)(actualParams[key], moment_1.default.ISO_8601, true);
-                if (!actualParams[key].isValid()) {
-                    toolPassed = false;
-                    const paramMsg = `Expected parameter "${key}" to be a valid date, but got ${JSON.stringify(actualParams[key])}.`;
-                    errorMessage = errorMessage
-                        ? errorMessage + " " + paramMsg
-                        : paramMsg;
-                }
-                continue;
-            }
-            if (actualParams[key] !== expectedParams[key]) {
-                toolPassed = false;
-                const paramMsg = `Expected parameter "${key}" to have value ${JSON.stringify(expectedParams[key])}, but got ${JSON.stringify(actualParams[key])}.`;
-                errorMessage = errorMessage ? errorMessage + " " + paramMsg : paramMsg;
-            }
-        }
-        if (!toolPassed) {
-            overallPassed = false;
-        }
-        evaluations.push({
-            expectedTool: expectedTool.tool,
-            actualTool: actualCall.name,
-            passed: toolPassed,
-            expectedParameters: expectedTool.parameters,
-            actualParameters: actualCall.args,
-            errorMessage: errorMessage || undefined
-        });
+        evaluations.push((0, evaluateSingleTool_1.evaluateSingleTool)(expectedTools[i], toolCalls[i]));
     }
+    const overallPassed = evaluations.every((e) => e.passed);
     return {
         evaluations,
         passed: overallPassed,

package/dist/models/evaluations/extractAiResponse.d.ts ADDED Viewed

@@ -0,0 +1,25 @@
+/**
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import { AssistantConfig } from "../../assistants";
+import { InvocationResult } from "../types";
+/**
+ * Maps language model invocation result to the text that should be compared
+ * in eval (hangup/transfer use config messages).
+ */
+export declare function extractAiResponse(response: InvocationResult, assistantConfig: AssistantConfig): string;

package/dist/models/evaluations/extractAiResponse.js ADDED Viewed

@@ -0,0 +1,19 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.extractAiResponse = extractAiResponse;
+/**
+ * Maps language model invocation result to the text that should be compared
+ * in eval (hangup/transfer use config messages).
+ */
+function extractAiResponse(response, assistantConfig) {
+    if (response.toolCalls && response.toolCalls.length > 0) {
+        const topTool = response.toolCalls[0];
+        if (topTool.name === "hangup") {
+            return assistantConfig.conversationSettings?.goodbyeMessage ?? "";
+        }
+        if (topTool.name === "transfer") {
+            return (assistantConfig.conversationSettings?.transferOptions?.message ?? "");
+        }
+    }
+    return response.content ?? "";
+}

package/dist/models/evaluations/index.d.ts CHANGED Viewed

@@ -16,6 +16,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+export * from "./createEvaluateIntelligence";
+export * from "./createTestTextSimilarity";
 export * from "./evalTestCases";
+export * from "./evaluateScenario";
+export * from "./evaluateSingleTool";
+export * from "./evaluateStep";
+export * from "./evaluateTextResponse";
+export * from "./evaluateToolCalls";
+export * from "./extractAiResponse";
+export * from "./isValidIso8601Date";
+export * from "./runEval";
 export * from "./types";
-export * from "./createEvaluateIntelligence";

package/dist/models/evaluations/index.js CHANGED Viewed

@@ -32,6 +32,15 @@ Object.defineProperty(exports, "__esModule", { value: true });
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+__exportStar(require("./createEvaluateIntelligence"), exports);
+__exportStar(require("./createTestTextSimilarity"), exports);
 __exportStar(require("./evalTestCases"), exports);
+__exportStar(require("./evaluateScenario"), exports);
+__exportStar(require("./evaluateSingleTool"), exports);
+__exportStar(require("./evaluateStep"), exports);
+__exportStar(require("./evaluateTextResponse"), exports);
+__exportStar(require("./evaluateToolCalls"), exports);
+__exportStar(require("./extractAiResponse"), exports);
+__exportStar(require("./isValidIso8601Date"), exports);
+__exportStar(require("./runEval"), exports);
 __exportStar(require("./types"), exports);
-__exportStar(require("./createEvaluateIntelligence"), exports);

package/dist/models/evaluations/isValidIso8601Date.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+export declare function isValidIso8601Date(value: unknown): boolean;

package/dist/models/evaluations/isValidIso8601Date.js ADDED Viewed

@@ -0,0 +1,27 @@
+"use strict";
+/**
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.isValidIso8601Date = isValidIso8601Date;
+function isValidIso8601Date(value) {
+    if (typeof value !== "string")
+        return false;
+    const time = Date.parse(value);
+    return !Number.isNaN(time);
+}

package/dist/models/evaluations/runEval.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import { AssistantConfig } from "../../assistants";
+import { RunEvalCallbacks } from "./types";
+export type RunEvalConfig = {
+    intelligence: {
+        config: AssistantConfig;
+    };
+};
+export declare function runEval(autopilotApplication: RunEvalConfig, callbacks: RunEvalCallbacks): Promise<void>;

package/dist/models/evaluations/runEval.js ADDED Viewed

@@ -0,0 +1,38 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.runEval = runEval;
+const createLanguageModel_1 = require("../createLanguageModel");
+const createTestTextSimilarity_1 = require("./createTestTextSimilarity");
+const evaluateScenario_1 = require("./evaluateScenario");
+const textSimilaryPrompt_1 = require("./textSimilaryPrompt");
+async function runEval(autopilotApplication, callbacks) {
+    const { testCases } = autopilotApplication.intelligence.config;
+    const assistantConfig = autopilotApplication.intelligence.config;
+    const voice = {
+        say: async (_) => { }
+    };
+    const scenarios = testCases?.scenarios ?? [];
+    for (const scenario of scenarios) {
+        const languageModel = (0, createLanguageModel_1.createLanguageModel)({
+            voice,
+            assistantConfig,
+            knowledgeBase: {
+                load: async () => { },
+                queryKnowledgeBase: async (query) => query
+            },
+            telephonyContext: scenario.telephonyContext
+        });
+        const testTextSimilarity = (0, createTestTextSimilarity_1.createTestTextSimilarity)({
+            provider: assistantConfig.testCases?.evalsLanguageModel?.provider,
+            model: assistantConfig.testCases?.evalsLanguageModel?.model ?? "",
+            apiKey: assistantConfig.testCases?.evalsLanguageModel?.apiKey
+        }, assistantConfig.testCases?.evalsSystemPrompt ?? textSimilaryPrompt_1.textSimilaryPrompt);
+        const report = await (0, evaluateScenario_1.evaluateScenario)({
+            assistantConfig,
+            scenario,
+            languageModel,
+            testTextSimilarity
+        }, (scenarioRef, stepReport) => callbacks.onStepResult(scenarioRef, stepReport));
+        await callbacks.onScenarioComplete(report.scenarioRef, report.overallPassed);
+    }
+}

package/dist/models/evaluations/stepReportToEventPayload.d.ts ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import { StepEvaluationReport } from "@fonoster/types";
+/**
+ * Converts StepEvaluationReport to the wire payload shape expected by the gRPC stream
+ * (camelCase, evaluationType as number for proto).
+ */
+export declare function stepReportToEventPayload(scenarioRef: string, report: StepEvaluationReport): Record<string, unknown>;
+export declare function scenarioSummaryToEventPayload(scenarioRef: string, overallPassed: boolean): Record<string, unknown>;
+export declare function evalErrorToEventPayload(message: string): Record<string, unknown>;

package/dist/models/evaluations/stepReportToEventPayload.js ADDED Viewed

@@ -0,0 +1,73 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.stepReportToEventPayload = stepReportToEventPayload;
+exports.scenarioSummaryToEventPayload = scenarioSummaryToEventPayload;
+exports.evalErrorToEventPayload = evalErrorToEventPayload;
+/**
+ * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+const types_1 = require("@fonoster/types");
+const EVALUATION_TYPE_TO_NUM = {
+    [types_1.ExpectedTextType.EXACT]: 0,
+    [types_1.ExpectedTextType.SIMILAR]: 1
+};
+function toolReportToPayload(t) {
+    return {
+        expectedTool: t.expectedTool,
+        actualTool: t.actualTool,
+        passed: t.passed,
+        expectedParameters: t.expectedParameters,
+        actualParameters: t.actualParameters,
+        errorMessage: t.errorMessage ?? ""
+    };
+}
+/**
+ * Converts StepEvaluationReport to the wire payload shape expected by the gRPC stream
+ * (camelCase, evaluationType as number for proto).
+ */
+function stepReportToEventPayload(scenarioRef, report) {
+    return {
+        stepResult: {
+            scenarioRef,
+            report: {
+                humanInput: report.humanInput,
+                expectedResponse: report.expectedResponse,
+                aiResponse: report.aiResponse,
+                evaluationType: EVALUATION_TYPE_TO_NUM[report.evaluationType] ?? 0,
+                passed: report.passed,
+                errorMessage: report.errorMessage ?? "",
+                toolEvaluations: (report.toolEvaluations ?? []).map(toolReportToPayload)
+            }
+        }
+    };
+}
+function scenarioSummaryToEventPayload(scenarioRef, overallPassed) {
+    return {
+        scenarioSummary: {
+            scenarioRef,
+            overallPassed
+        }
+    };
+}
+function evalErrorToEventPayload(message) {
+    return {
+        evalError: {
+            message
+        }
+    };
+}

package/dist/models/evaluations/types.d.ts CHANGED Viewed

@@ -16,8 +16,34 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+import { StepEvaluationReport } from "@fonoster/types";
 import { AssistantConfig } from "../../assistants";
 import { LanguageModel } from "../types";
+type EvalExpectedTool = {
+    tool: string;
+    parameters?: Record<string, unknown>;
+};
+type EvalConversationStep = {
+    userInput: string;
+    expected: {
+        text: {
+            type: "EXACT" | "SIMILAR";
+            response: string;
+        };
+        tools?: EvalExpectedTool[];
+    };
+};
+type EvalScenario = {
+    ref: string;
+    description: string;
+    telephonyContext: {
+        callDirection: string;
+        ingressNumber: string;
+        callerNumber: string;
+        metadata?: Record<string, string>;
+    };
+    conversation: EvalConversationStep[];
+};
 type EvaluateIntelligenceRequest = {
     intelligence: {
         productRef: string;
@@ -25,15 +51,19 @@ type EvaluateIntelligenceRequest = {
     };
 };
 type EvaluateStepParams = {
-    step: any;
+    step: EvalConversationStep;
     languageModel: LanguageModel;
     testTextSimilarity: (text1: string, text2: string) => Promise<boolean>;
     assistantConfig: AssistantConfig;
 };
 type ScenarioEvaluationRequest = {
     assistantConfig: AssistantConfig;
-    scenario: any;
+    scenario: EvalScenario;
     languageModel: LanguageModel;
     testTextSimilarity: (text1: string, text2: string) => Promise<boolean>;
 };
-export { EvaluateIntelligenceRequest, ScenarioEvaluationRequest, EvaluateStepParams };
+type RunEvalCallbacks = {
+    onStepResult: (scenarioRef: string, report: StepEvaluationReport) => void | Promise<void>;
+    onScenarioComplete: (scenarioRef: string, overallPassed: boolean) => void | Promise<void>;
+};
+export { EvalConversationStep, EvalExpectedTool, EvalScenario, EvaluateIntelligenceRequest, EvaluateStepParams, RunEvalCallbacks, ScenarioEvaluationRequest };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@fonoster/autopilot",
-  "version": "0.17.4",
+  "version": "0.18.0",
   "description": "Voice AI for the Fonoster platform",
   "author": "Pedro Sanders <psanders@fonoster.com>",
   "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -35,11 +35,11 @@
   "dependencies": {
     "@aws-sdk/client-s3": "^3.958.0",
     "@dmitryrechkin/json-schema-to-zod": "^1.0.1",
-    "@fonoster/common": "^0.17.4",
-    "@fonoster/logger": "^0.17.4",
-    "@fonoster/sdk": "^0.17.4",
-    "@fonoster/types": "^0.17.4",
-    "@fonoster/voice": "^0.17.4",
+    "@fonoster/common": "^0.18.0",
+    "@fonoster/logger": "^0.18.0",
+    "@fonoster/sdk": "^0.18.0",
+    "@fonoster/types": "^0.18.0",
+    "@fonoster/voice": "^0.18.0",
     "@langchain/anthropic": "^1.3.3",
     "@langchain/community": "^1.1.1",
     "@langchain/core": "^1.1.8",
@@ -59,5 +59,5 @@
     "xstate": "^5.17.3",
     "zod": "^3.25.76"
   },
-  "gitHead": "e0b7321a89bdc35b7081aeffefe7d68870c5c26f"
+  "gitHead": "051f172b266db965cf1d1366f563da995a29a93d"
 }