@fonoster/autopilot 0.17.4 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/models/createChain.d.ts +3 -1
- package/dist/models/evaluations/createEvaluateIntelligence.d.ts +8 -4
- package/dist/models/evaluations/createEvaluateIntelligence.js +36 -22
- package/dist/models/evaluations/evalTestCases.js +3 -1
- package/dist/models/evaluations/evaluateScenario.d.ts +3 -2
- package/dist/models/evaluations/evaluateScenario.js +7 -3
- package/dist/models/evaluations/evaluateSingleTool.d.ts +26 -0
- package/dist/models/evaluations/evaluateSingleTool.js +41 -0
- package/dist/models/evaluations/evaluateStep.js +9 -21
- package/dist/models/evaluations/evaluateToolCalls.d.ts +7 -1
- package/dist/models/evaluations/evaluateToolCalls.js +5 -48
- package/dist/models/evaluations/extractAiResponse.d.ts +25 -0
- package/dist/models/evaluations/extractAiResponse.js +19 -0
- package/dist/models/evaluations/index.d.ts +10 -1
- package/dist/models/evaluations/index.js +10 -1
- package/dist/models/evaluations/isValidIso8601Date.d.ts +19 -0
- package/dist/models/evaluations/isValidIso8601Date.js +27 -0
- package/dist/models/evaluations/runEval.d.ts +26 -0
- package/dist/models/evaluations/runEval.js +38 -0
- package/dist/models/evaluations/stepReportToEventPayload.d.ts +26 -0
- package/dist/models/evaluations/stepReportToEventPayload.js +73 -0
- package/dist/models/evaluations/types.d.ts +33 -3
- package/package.json +7 -7
|
@@ -21,5 +21,7 @@ import { RunnableSequence } from "@langchain/core/runnables";
|
|
|
21
21
|
import { KnowledgeBase } from "../knowledge";
|
|
22
22
|
import { createChatHistory } from "./chatHistory";
|
|
23
23
|
import { createPromptTemplate } from "./createPromptTemplate";
|
|
24
|
-
declare function createChain(model: BaseChatModel, knowledgeBase: KnowledgeBase, promptTemplate: ReturnType<typeof createPromptTemplate>, chatHistory: ReturnType<typeof createChatHistory>): RunnableSequence<
|
|
24
|
+
declare function createChain(model: BaseChatModel, knowledgeBase: KnowledgeBase, promptTemplate: ReturnType<typeof createPromptTemplate>, chatHistory: ReturnType<typeof createChatHistory>): RunnableSequence<{
|
|
25
|
+
text: string;
|
|
26
|
+
}, import("@langchain/core/messages").AIMessageChunk<import("@langchain/core/messages").MessageStructure>>;
|
|
25
27
|
export { createChain };
|
|
@@ -16,8 +16,12 @@
|
|
|
16
16
|
* See the License for the specific language governing permissions and
|
|
17
17
|
* limitations under the License.
|
|
18
18
|
*/
|
|
19
|
-
import {
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
19
|
+
import { IntegrationConfig } from "@fonoster/common";
|
|
20
|
+
import { EvaluateIntelligenceRequest } from "./types";
|
|
21
|
+
type ServerStreamCall = {
|
|
22
|
+
request: EvaluateIntelligenceRequest;
|
|
23
|
+
write: (chunk: Record<string, unknown>) => void;
|
|
24
|
+
end: () => void;
|
|
25
|
+
};
|
|
26
|
+
declare function createEvaluateIntelligence(integrations: IntegrationConfig[]): (call: ServerStreamCall) => Promise<void>;
|
|
23
27
|
export { createEvaluateIntelligence };
|
|
@@ -24,10 +24,11 @@ const logger_1 = require("@fonoster/logger");
|
|
|
24
24
|
const pb_util_1 = require("pb-util");
|
|
25
25
|
const zod_1 = require("zod");
|
|
26
26
|
const createEvalEffectiveConfig_1 = require("./createEvalEffectiveConfig");
|
|
27
|
-
const
|
|
27
|
+
const runEval_1 = require("./runEval");
|
|
28
|
+
const stepReportToEventPayload_1 = require("./stepReportToEventPayload");
|
|
28
29
|
const logger = (0, logger_1.getLogger)({ service: "apiserver", filePath: __filename });
|
|
29
30
|
function createEvaluateIntelligence(integrations) {
|
|
30
|
-
const evaluateIntelligence = async (call
|
|
31
|
+
const evaluateIntelligence = async (call) => {
|
|
31
32
|
const { request } = call;
|
|
32
33
|
const { intelligence } = request;
|
|
33
34
|
const accessKeyId = (0, common_1.getAccessKeyIdFromCall)(call);
|
|
@@ -36,25 +37,38 @@ function createEvaluateIntelligence(integrations) {
|
|
|
36
37
|
llmProductRef: intelligence.productRef,
|
|
37
38
|
evalLlmProductRef: "llm.openai"
|
|
38
39
|
});
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
.
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
intelligence: {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
40
|
+
try {
|
|
41
|
+
const config = pb_util_1.struct.decode(intelligence.config);
|
|
42
|
+
const parsedIntelligence = zod_1.z
|
|
43
|
+
.object({
|
|
44
|
+
productRef: zod_1.z.string(),
|
|
45
|
+
config: common_1.assistantSchema
|
|
46
|
+
})
|
|
47
|
+
.parse({
|
|
48
|
+
productRef: intelligence.productRef,
|
|
49
|
+
config
|
|
50
|
+
});
|
|
51
|
+
const credentials = (0, common_1.findIntegrationsCredentials)(integrations, intelligence.productRef);
|
|
52
|
+
const evaluationApiKey = (0, common_1.findIntegrationsCredentials)(integrations, "llm.openai");
|
|
53
|
+
const effectiveConfig = (0, createEvalEffectiveConfig_1.createEvalEffectiveConfig)(parsedIntelligence.config, credentials, evaluationApiKey);
|
|
54
|
+
await (0, runEval_1.runEval)({ intelligence: { config: effectiveConfig } }, {
|
|
55
|
+
onStepResult: (scenarioRef, stepReport) => {
|
|
56
|
+
const payload = (0, stepReportToEventPayload_1.stepReportToEventPayload)(scenarioRef, stepReport);
|
|
57
|
+
call.write(payload);
|
|
58
|
+
},
|
|
59
|
+
onScenarioComplete: (scenarioRef, overallPassed) => {
|
|
60
|
+
const payload = (0, stepReportToEventPayload_1.scenarioSummaryToEventPayload)(scenarioRef, overallPassed);
|
|
61
|
+
call.write(payload);
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
catch (error) {
|
|
66
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
67
|
+
call.write((0, stepReportToEventPayload_1.evalErrorToEventPayload)(message));
|
|
68
|
+
}
|
|
69
|
+
finally {
|
|
70
|
+
call.end();
|
|
71
|
+
}
|
|
58
72
|
};
|
|
59
|
-
return
|
|
73
|
+
return evaluateIntelligence;
|
|
60
74
|
}
|
|
@@ -12,7 +12,9 @@ async function evalTestCases(autopilotApplication) {
|
|
|
12
12
|
say: async (_) => { }
|
|
13
13
|
};
|
|
14
14
|
const evaluationReports = [];
|
|
15
|
-
|
|
15
|
+
const scenarios = (testCases?.scenarios ?? []);
|
|
16
|
+
for (let i = 0; i < scenarios.length; i++) {
|
|
17
|
+
const scenario = scenarios[i];
|
|
16
18
|
const languageModel = (0, createLanguageModel_1.createLanguageModel)({
|
|
17
19
|
voice,
|
|
18
20
|
assistantConfig: autopilotApplication.intelligence.config,
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
* See the License for the specific language governing permissions and
|
|
17
17
|
* limitations under the License.
|
|
18
18
|
*/
|
|
19
|
-
import { ScenarioEvaluationReport } from "@fonoster/types";
|
|
19
|
+
import { ScenarioEvaluationReport, StepEvaluationReport } from "@fonoster/types";
|
|
20
20
|
import { ScenarioEvaluationRequest } from "./types";
|
|
21
|
-
export
|
|
21
|
+
export type OnStepResultCallback = (scenarioRef: string, report: StepEvaluationReport) => void | Promise<void>;
|
|
22
|
+
export declare function evaluateScenario(config: ScenarioEvaluationRequest, onStepResult?: OnStepResultCallback): Promise<ScenarioEvaluationReport>;
|
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.evaluateScenario = evaluateScenario;
|
|
4
4
|
const evaluateStep_1 = require("./evaluateStep");
|
|
5
|
-
async function evaluateScenario(config) {
|
|
5
|
+
async function evaluateScenario(config, onStepResult) {
|
|
6
6
|
const { scenario, languageModel, testTextSimilarity, assistantConfig } = config;
|
|
7
7
|
const results = [];
|
|
8
|
-
|
|
8
|
+
const steps = scenario.conversation;
|
|
9
|
+
for (let i = 0; i < steps.length; i++) {
|
|
10
|
+
const step = steps[i];
|
|
9
11
|
const stepResult = await (0, evaluateStep_1.evaluateStep)({
|
|
10
12
|
step,
|
|
11
13
|
languageModel,
|
|
@@ -13,8 +15,10 @@ async function evaluateScenario(config) {
|
|
|
13
15
|
assistantConfig
|
|
14
16
|
});
|
|
15
17
|
results.push(stepResult);
|
|
18
|
+
if (onStepResult)
|
|
19
|
+
await onStepResult(scenario.ref, stepResult);
|
|
16
20
|
}
|
|
17
|
-
const overallPassed = results.every((
|
|
21
|
+
const overallPassed = results.every((s) => s.passed);
|
|
18
22
|
return {
|
|
19
23
|
scenarioRef: scenario.ref,
|
|
20
24
|
overallPassed,
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
3
|
+
* http://github.com/fonoster/fonoster
|
|
4
|
+
*
|
|
5
|
+
* This file is part of Fonoster
|
|
6
|
+
*
|
|
7
|
+
* Licensed under the MIT License (the "License");
|
|
8
|
+
* you may not use this file except in compliance with
|
|
9
|
+
* the License. You may obtain a copy of the License at
|
|
10
|
+
*
|
|
11
|
+
* https://opensource.org/licenses/MIT
|
|
12
|
+
*
|
|
13
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
* See the License for the specific language governing permissions and
|
|
17
|
+
* limitations under the License.
|
|
18
|
+
*/
|
|
19
|
+
import { ToolEvaluationReport } from "@fonoster/types";
|
|
20
|
+
import { EvalExpectedTool } from "./types";
|
|
21
|
+
type ToolCallLike = {
|
|
22
|
+
name: string;
|
|
23
|
+
args?: Record<string, unknown>;
|
|
24
|
+
};
|
|
25
|
+
export declare function evaluateSingleTool(expected: EvalExpectedTool, actual: ToolCallLike): ToolEvaluationReport;
|
|
26
|
+
export {};
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.evaluateSingleTool = evaluateSingleTool;
|
|
4
|
+
const isValidIso8601Date_1 = require("./isValidIso8601Date");
|
|
5
|
+
function evaluateSingleTool(expected, actual) {
|
|
6
|
+
let passed = true;
|
|
7
|
+
let errorMessage = "";
|
|
8
|
+
if (actual.name !== expected.tool) {
|
|
9
|
+
passed = false;
|
|
10
|
+
errorMessage = `Expected tool "${expected.tool}" but got "${actual.name}".`;
|
|
11
|
+
}
|
|
12
|
+
const expectedParams = expected.parameters ?? {};
|
|
13
|
+
const actualParams = actual.args ?? {};
|
|
14
|
+
for (const key of Object.keys(expectedParams)) {
|
|
15
|
+
const expectedVal = expectedParams[key];
|
|
16
|
+
const expectedStr = typeof expectedVal === "string"
|
|
17
|
+
? expectedVal.trim()
|
|
18
|
+
: String(expectedVal).trim();
|
|
19
|
+
if (expectedStr === "valid-date") {
|
|
20
|
+
if (!(0, isValidIso8601Date_1.isValidIso8601Date)(actualParams[key])) {
|
|
21
|
+
passed = false;
|
|
22
|
+
const paramMsg = `Expected parameter "${key}" to be a valid date, but got ${JSON.stringify(actualParams[key])}.`;
|
|
23
|
+
errorMessage = errorMessage ? `${errorMessage} ${paramMsg}` : paramMsg;
|
|
24
|
+
}
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
if (actualParams[key] !== expectedVal) {
|
|
28
|
+
passed = false;
|
|
29
|
+
const paramMsg = `Expected parameter "${key}" to have value ${JSON.stringify(expectedVal)}, but got ${JSON.stringify(actualParams[key])}.`;
|
|
30
|
+
errorMessage = errorMessage ? `${errorMessage} ${paramMsg}` : paramMsg;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return {
|
|
34
|
+
expectedTool: expected.tool,
|
|
35
|
+
actualTool: actual.name,
|
|
36
|
+
passed,
|
|
37
|
+
expectedParameters: expected.parameters,
|
|
38
|
+
actualParameters: actual.args,
|
|
39
|
+
errorMessage: errorMessage || undefined
|
|
40
|
+
};
|
|
41
|
+
}
|
|
@@ -3,41 +3,29 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.evaluateStep = evaluateStep;
|
|
4
4
|
const evaluateTextResponse_1 = require("./evaluateTextResponse");
|
|
5
5
|
const evaluateToolCalls_1 = require("./evaluateToolCalls");
|
|
6
|
+
const extractAiResponse_1 = require("./extractAiResponse");
|
|
6
7
|
async function evaluateStep({ step, languageModel, testTextSimilarity, assistantConfig }) {
|
|
7
8
|
const stepResult = {
|
|
8
9
|
humanInput: step.userInput,
|
|
9
10
|
expectedResponse: step.expected.text.response,
|
|
10
|
-
aiResponse: "",
|
|
11
|
+
aiResponse: "",
|
|
11
12
|
evaluationType: step.expected.text.type,
|
|
12
13
|
passed: true
|
|
13
14
|
};
|
|
14
15
|
try {
|
|
15
16
|
const response = await languageModel.invoke(step.userInput);
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
assistantConfig.conversationSettings?.goodbyeMessage || "";
|
|
22
|
-
}
|
|
23
|
-
else if (topTool.name === "transfer") {
|
|
24
|
-
stepResult.aiResponse =
|
|
25
|
-
assistantConfig.conversationSettings?.transferOptions?.message ?? "";
|
|
26
|
-
}
|
|
27
|
-
else {
|
|
28
|
-
stepResult.aiResponse = response.content ?? "";
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
else {
|
|
32
|
-
stepResult.aiResponse = response.content ?? "";
|
|
33
|
-
}
|
|
34
|
-
const textEvaluation = await (0, evaluateTextResponse_1.evaluateTextResponse)(step.expected.text, stepResult.aiResponse, testTextSimilarity);
|
|
17
|
+
stepResult.aiResponse = (0, extractAiResponse_1.extractAiResponse)(response, assistantConfig);
|
|
18
|
+
const textEvaluation = await (0, evaluateTextResponse_1.evaluateTextResponse)({
|
|
19
|
+
type: step.expected.text.type,
|
|
20
|
+
response: step.expected.text.response
|
|
21
|
+
}, stepResult.aiResponse, testTextSimilarity);
|
|
35
22
|
if (!textEvaluation.passed) {
|
|
36
23
|
stepResult.passed = false;
|
|
37
24
|
stepResult.errorMessage = textEvaluation.errorMessage;
|
|
38
25
|
}
|
|
39
26
|
if (step.expected.tools && step.expected.tools.length > 0) {
|
|
40
|
-
const
|
|
27
|
+
const toolCalls = response.toolCalls?.filter((tc) => tc?.name) ?? [];
|
|
28
|
+
const toolsEvaluation = (0, evaluateToolCalls_1.evaluateToolCalls)(step.expected.tools, toolCalls);
|
|
41
29
|
stepResult.toolEvaluations = toolsEvaluation.evaluations;
|
|
42
30
|
if (!toolsEvaluation.passed) {
|
|
43
31
|
stepResult.passed = false;
|
|
@@ -17,8 +17,14 @@
|
|
|
17
17
|
* limitations under the License.
|
|
18
18
|
*/
|
|
19
19
|
import { ToolEvaluationReport } from "@fonoster/types";
|
|
20
|
-
|
|
20
|
+
import { EvalExpectedTool } from "./types";
|
|
21
|
+
type ToolCallLike = {
|
|
22
|
+
name: string;
|
|
23
|
+
args?: Record<string, unknown>;
|
|
24
|
+
};
|
|
25
|
+
export declare function evaluateToolCalls(expectedTools: EvalExpectedTool[], toolCalls: ToolCallLike[] | undefined): {
|
|
21
26
|
evaluations: ToolEvaluationReport[];
|
|
22
27
|
passed: boolean;
|
|
23
28
|
errorMessage?: string;
|
|
24
29
|
};
|
|
30
|
+
export {};
|
|
@@ -1,15 +1,10 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
3
|
exports.evaluateToolCalls = evaluateToolCalls;
|
|
7
|
-
const
|
|
4
|
+
const evaluateSingleTool_1 = require("./evaluateSingleTool");
|
|
8
5
|
function evaluateToolCalls(expectedTools, toolCalls) {
|
|
9
6
|
const evaluations = [];
|
|
10
|
-
let overallPassed = true;
|
|
11
7
|
if (!toolCalls || toolCalls.length !== expectedTools.length) {
|
|
12
|
-
overallPassed = false;
|
|
13
8
|
evaluations.push({
|
|
14
9
|
expectedTool: "",
|
|
15
10
|
actualTool: "",
|
|
@@ -20,52 +15,14 @@ function evaluateToolCalls(expectedTools, toolCalls) {
|
|
|
20
15
|
});
|
|
21
16
|
return {
|
|
22
17
|
evaluations,
|
|
23
|
-
passed:
|
|
24
|
-
errorMessage:
|
|
18
|
+
passed: false,
|
|
19
|
+
errorMessage: "Tool invocation count mismatch."
|
|
25
20
|
};
|
|
26
21
|
}
|
|
27
22
|
for (let i = 0; i < expectedTools.length; i++) {
|
|
28
|
-
|
|
29
|
-
const actualCall = toolCalls[i];
|
|
30
|
-
let toolPassed = true;
|
|
31
|
-
let errorMessage = "";
|
|
32
|
-
if (actualCall.name !== expectedTool.tool) {
|
|
33
|
-
toolPassed = false;
|
|
34
|
-
errorMessage = `Expected tool "${expectedTool.tool}" but got "${actualCall.name}".`;
|
|
35
|
-
}
|
|
36
|
-
const expectedParams = expectedTool.parameters || {};
|
|
37
|
-
const actualParams = actualCall.args || {};
|
|
38
|
-
for (const key of Object.keys(expectedParams)) {
|
|
39
|
-
// Check for the special case of a valid-date
|
|
40
|
-
if (expectedParams[key].trim() === "valid-date") {
|
|
41
|
-
actualParams[key] = (0, moment_1.default)(actualParams[key], moment_1.default.ISO_8601, true);
|
|
42
|
-
if (!actualParams[key].isValid()) {
|
|
43
|
-
toolPassed = false;
|
|
44
|
-
const paramMsg = `Expected parameter "${key}" to be a valid date, but got ${JSON.stringify(actualParams[key])}.`;
|
|
45
|
-
errorMessage = errorMessage
|
|
46
|
-
? errorMessage + " " + paramMsg
|
|
47
|
-
: paramMsg;
|
|
48
|
-
}
|
|
49
|
-
continue;
|
|
50
|
-
}
|
|
51
|
-
if (actualParams[key] !== expectedParams[key]) {
|
|
52
|
-
toolPassed = false;
|
|
53
|
-
const paramMsg = `Expected parameter "${key}" to have value ${JSON.stringify(expectedParams[key])}, but got ${JSON.stringify(actualParams[key])}.`;
|
|
54
|
-
errorMessage = errorMessage ? errorMessage + " " + paramMsg : paramMsg;
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
if (!toolPassed) {
|
|
58
|
-
overallPassed = false;
|
|
59
|
-
}
|
|
60
|
-
evaluations.push({
|
|
61
|
-
expectedTool: expectedTool.tool,
|
|
62
|
-
actualTool: actualCall.name,
|
|
63
|
-
passed: toolPassed,
|
|
64
|
-
expectedParameters: expectedTool.parameters,
|
|
65
|
-
actualParameters: actualCall.args,
|
|
66
|
-
errorMessage: errorMessage || undefined
|
|
67
|
-
});
|
|
23
|
+
evaluations.push((0, evaluateSingleTool_1.evaluateSingleTool)(expectedTools[i], toolCalls[i]));
|
|
68
24
|
}
|
|
25
|
+
const overallPassed = evaluations.every((e) => e.passed);
|
|
69
26
|
return {
|
|
70
27
|
evaluations,
|
|
71
28
|
passed: overallPassed,
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
3
|
+
* http://github.com/fonoster/fonoster
|
|
4
|
+
*
|
|
5
|
+
* This file is part of Fonoster
|
|
6
|
+
*
|
|
7
|
+
* Licensed under the MIT License (the "License");
|
|
8
|
+
* you may not use this file except in compliance with
|
|
9
|
+
* the License. You may obtain a copy of the License at
|
|
10
|
+
*
|
|
11
|
+
* https://opensource.org/licenses/MIT
|
|
12
|
+
*
|
|
13
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
* See the License for the specific language governing permissions and
|
|
17
|
+
* limitations under the License.
|
|
18
|
+
*/
|
|
19
|
+
import { AssistantConfig } from "../../assistants";
|
|
20
|
+
import { InvocationResult } from "../types";
|
|
21
|
+
/**
|
|
22
|
+
* Maps language model invocation result to the text that should be compared
|
|
23
|
+
* in eval (hangup/transfer use config messages).
|
|
24
|
+
*/
|
|
25
|
+
export declare function extractAiResponse(response: InvocationResult, assistantConfig: AssistantConfig): string;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.extractAiResponse = extractAiResponse;
|
|
4
|
+
/**
|
|
5
|
+
* Maps language model invocation result to the text that should be compared
|
|
6
|
+
* in eval (hangup/transfer use config messages).
|
|
7
|
+
*/
|
|
8
|
+
function extractAiResponse(response, assistantConfig) {
|
|
9
|
+
if (response.toolCalls && response.toolCalls.length > 0) {
|
|
10
|
+
const topTool = response.toolCalls[0];
|
|
11
|
+
if (topTool.name === "hangup") {
|
|
12
|
+
return assistantConfig.conversationSettings?.goodbyeMessage ?? "";
|
|
13
|
+
}
|
|
14
|
+
if (topTool.name === "transfer") {
|
|
15
|
+
return (assistantConfig.conversationSettings?.transferOptions?.message ?? "");
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return response.content ?? "";
|
|
19
|
+
}
|
|
@@ -16,6 +16,15 @@
|
|
|
16
16
|
* See the License for the specific language governing permissions and
|
|
17
17
|
* limitations under the License.
|
|
18
18
|
*/
|
|
19
|
+
export * from "./createEvaluateIntelligence";
|
|
20
|
+
export * from "./createTestTextSimilarity";
|
|
19
21
|
export * from "./evalTestCases";
|
|
22
|
+
export * from "./evaluateScenario";
|
|
23
|
+
export * from "./evaluateSingleTool";
|
|
24
|
+
export * from "./evaluateStep";
|
|
25
|
+
export * from "./evaluateTextResponse";
|
|
26
|
+
export * from "./evaluateToolCalls";
|
|
27
|
+
export * from "./extractAiResponse";
|
|
28
|
+
export * from "./isValidIso8601Date";
|
|
29
|
+
export * from "./runEval";
|
|
20
30
|
export * from "./types";
|
|
21
|
-
export * from "./createEvaluateIntelligence";
|
|
@@ -32,6 +32,15 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
32
32
|
* See the License for the specific language governing permissions and
|
|
33
33
|
* limitations under the License.
|
|
34
34
|
*/
|
|
35
|
+
__exportStar(require("./createEvaluateIntelligence"), exports);
|
|
36
|
+
__exportStar(require("./createTestTextSimilarity"), exports);
|
|
35
37
|
__exportStar(require("./evalTestCases"), exports);
|
|
38
|
+
__exportStar(require("./evaluateScenario"), exports);
|
|
39
|
+
__exportStar(require("./evaluateSingleTool"), exports);
|
|
40
|
+
__exportStar(require("./evaluateStep"), exports);
|
|
41
|
+
__exportStar(require("./evaluateTextResponse"), exports);
|
|
42
|
+
__exportStar(require("./evaluateToolCalls"), exports);
|
|
43
|
+
__exportStar(require("./extractAiResponse"), exports);
|
|
44
|
+
__exportStar(require("./isValidIso8601Date"), exports);
|
|
45
|
+
__exportStar(require("./runEval"), exports);
|
|
36
46
|
__exportStar(require("./types"), exports);
|
|
37
|
-
__exportStar(require("./createEvaluateIntelligence"), exports);
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
3
|
+
* http://github.com/fonoster/fonoster
|
|
4
|
+
*
|
|
5
|
+
* This file is part of Fonoster
|
|
6
|
+
*
|
|
7
|
+
* Licensed under the MIT License (the "License");
|
|
8
|
+
* you may not use this file except in compliance with
|
|
9
|
+
* the License. You may obtain a copy of the License at
|
|
10
|
+
*
|
|
11
|
+
* https://opensource.org/licenses/MIT
|
|
12
|
+
*
|
|
13
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
* See the License for the specific language governing permissions and
|
|
17
|
+
* limitations under the License.
|
|
18
|
+
*/
|
|
19
|
+
export declare function isValidIso8601Date(value: unknown): boolean;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
4
|
+
* http://github.com/fonoster/fonoster
|
|
5
|
+
*
|
|
6
|
+
* This file is part of Fonoster
|
|
7
|
+
*
|
|
8
|
+
* Licensed under the MIT License (the "License");
|
|
9
|
+
* you may not use this file except in compliance with
|
|
10
|
+
* the License. You may obtain a copy of the License at
|
|
11
|
+
*
|
|
12
|
+
* https://opensource.org/licenses/MIT
|
|
13
|
+
*
|
|
14
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
15
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
16
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
17
|
+
* See the License for the specific language governing permissions and
|
|
18
|
+
* limitations under the License.
|
|
19
|
+
*/
|
|
20
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
21
|
+
exports.isValidIso8601Date = isValidIso8601Date;
|
|
22
|
+
function isValidIso8601Date(value) {
|
|
23
|
+
if (typeof value !== "string")
|
|
24
|
+
return false;
|
|
25
|
+
const time = Date.parse(value);
|
|
26
|
+
return !Number.isNaN(time);
|
|
27
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
3
|
+
* http://github.com/fonoster/fonoster
|
|
4
|
+
*
|
|
5
|
+
* This file is part of Fonoster
|
|
6
|
+
*
|
|
7
|
+
* Licensed under the MIT License (the "License");
|
|
8
|
+
* you may not use this file except in compliance with
|
|
9
|
+
* the License. You may obtain a copy of the License at
|
|
10
|
+
*
|
|
11
|
+
* https://opensource.org/licenses/MIT
|
|
12
|
+
*
|
|
13
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
* See the License for the specific language governing permissions and
|
|
17
|
+
* limitations under the License.
|
|
18
|
+
*/
|
|
19
|
+
import { AssistantConfig } from "../../assistants";
|
|
20
|
+
import { RunEvalCallbacks } from "./types";
|
|
21
|
+
export type RunEvalConfig = {
|
|
22
|
+
intelligence: {
|
|
23
|
+
config: AssistantConfig;
|
|
24
|
+
};
|
|
25
|
+
};
|
|
26
|
+
export declare function runEval(autopilotApplication: RunEvalConfig, callbacks: RunEvalCallbacks): Promise<void>;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runEval = runEval;
|
|
4
|
+
const createLanguageModel_1 = require("../createLanguageModel");
|
|
5
|
+
const createTestTextSimilarity_1 = require("./createTestTextSimilarity");
|
|
6
|
+
const evaluateScenario_1 = require("./evaluateScenario");
|
|
7
|
+
const textSimilaryPrompt_1 = require("./textSimilaryPrompt");
|
|
8
|
+
async function runEval(autopilotApplication, callbacks) {
|
|
9
|
+
const { testCases } = autopilotApplication.intelligence.config;
|
|
10
|
+
const assistantConfig = autopilotApplication.intelligence.config;
|
|
11
|
+
const voice = {
|
|
12
|
+
say: async (_) => { }
|
|
13
|
+
};
|
|
14
|
+
const scenarios = testCases?.scenarios ?? [];
|
|
15
|
+
for (const scenario of scenarios) {
|
|
16
|
+
const languageModel = (0, createLanguageModel_1.createLanguageModel)({
|
|
17
|
+
voice,
|
|
18
|
+
assistantConfig,
|
|
19
|
+
knowledgeBase: {
|
|
20
|
+
load: async () => { },
|
|
21
|
+
queryKnowledgeBase: async (query) => query
|
|
22
|
+
},
|
|
23
|
+
telephonyContext: scenario.telephonyContext
|
|
24
|
+
});
|
|
25
|
+
const testTextSimilarity = (0, createTestTextSimilarity_1.createTestTextSimilarity)({
|
|
26
|
+
provider: assistantConfig.testCases?.evalsLanguageModel?.provider,
|
|
27
|
+
model: assistantConfig.testCases?.evalsLanguageModel?.model ?? "",
|
|
28
|
+
apiKey: assistantConfig.testCases?.evalsLanguageModel?.apiKey
|
|
29
|
+
}, assistantConfig.testCases?.evalsSystemPrompt ?? textSimilaryPrompt_1.textSimilaryPrompt);
|
|
30
|
+
const report = await (0, evaluateScenario_1.evaluateScenario)({
|
|
31
|
+
assistantConfig,
|
|
32
|
+
scenario,
|
|
33
|
+
languageModel,
|
|
34
|
+
testTextSimilarity
|
|
35
|
+
}, (scenarioRef, stepReport) => callbacks.onStepResult(scenarioRef, stepReport));
|
|
36
|
+
await callbacks.onScenarioComplete(report.scenarioRef, report.overallPassed);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
3
|
+
* http://github.com/fonoster/fonoster
|
|
4
|
+
*
|
|
5
|
+
* This file is part of Fonoster
|
|
6
|
+
*
|
|
7
|
+
* Licensed under the MIT License (the "License");
|
|
8
|
+
* you may not use this file except in compliance with
|
|
9
|
+
* the License. You may obtain a copy of the License at
|
|
10
|
+
*
|
|
11
|
+
* https://opensource.org/licenses/MIT
|
|
12
|
+
*
|
|
13
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
* See the License for the specific language governing permissions and
|
|
17
|
+
* limitations under the License.
|
|
18
|
+
*/
|
|
19
|
+
import { StepEvaluationReport } from "@fonoster/types";
|
|
20
|
+
/**
|
|
21
|
+
* Converts StepEvaluationReport to the wire payload shape expected by the gRPC stream
|
|
22
|
+
* (camelCase, evaluationType as number for proto).
|
|
23
|
+
*/
|
|
24
|
+
export declare function stepReportToEventPayload(scenarioRef: string, report: StepEvaluationReport): Record<string, unknown>;
|
|
25
|
+
export declare function scenarioSummaryToEventPayload(scenarioRef: string, overallPassed: boolean): Record<string, unknown>;
|
|
26
|
+
export declare function evalErrorToEventPayload(message: string): Record<string, unknown>;
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.stepReportToEventPayload = stepReportToEventPayload;
|
|
4
|
+
exports.scenarioSummaryToEventPayload = scenarioSummaryToEventPayload;
|
|
5
|
+
exports.evalErrorToEventPayload = evalErrorToEventPayload;
|
|
6
|
+
/**
|
|
7
|
+
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
8
|
+
* http://github.com/fonoster/fonoster
|
|
9
|
+
*
|
|
10
|
+
* This file is part of Fonoster
|
|
11
|
+
*
|
|
12
|
+
* Licensed under the MIT License (the "License");
|
|
13
|
+
* you may not use this file except in compliance with
|
|
14
|
+
* the License. You may obtain a copy of the License at
|
|
15
|
+
*
|
|
16
|
+
* https://opensource.org/licenses/MIT
|
|
17
|
+
*
|
|
18
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
* See the License for the specific language governing permissions and
|
|
22
|
+
* limitations under the License.
|
|
23
|
+
*/
|
|
24
|
+
const types_1 = require("@fonoster/types");
|
|
25
|
+
const EVALUATION_TYPE_TO_NUM = {
|
|
26
|
+
[types_1.ExpectedTextType.EXACT]: 0,
|
|
27
|
+
[types_1.ExpectedTextType.SIMILAR]: 1
|
|
28
|
+
};
|
|
29
|
+
function toolReportToPayload(t) {
|
|
30
|
+
return {
|
|
31
|
+
expectedTool: t.expectedTool,
|
|
32
|
+
actualTool: t.actualTool,
|
|
33
|
+
passed: t.passed,
|
|
34
|
+
expectedParameters: t.expectedParameters,
|
|
35
|
+
actualParameters: t.actualParameters,
|
|
36
|
+
errorMessage: t.errorMessage ?? ""
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Converts StepEvaluationReport to the wire payload shape expected by the gRPC stream
|
|
41
|
+
* (camelCase, evaluationType as number for proto).
|
|
42
|
+
*/
|
|
43
|
+
function stepReportToEventPayload(scenarioRef, report) {
|
|
44
|
+
return {
|
|
45
|
+
stepResult: {
|
|
46
|
+
scenarioRef,
|
|
47
|
+
report: {
|
|
48
|
+
humanInput: report.humanInput,
|
|
49
|
+
expectedResponse: report.expectedResponse,
|
|
50
|
+
aiResponse: report.aiResponse,
|
|
51
|
+
evaluationType: EVALUATION_TYPE_TO_NUM[report.evaluationType] ?? 0,
|
|
52
|
+
passed: report.passed,
|
|
53
|
+
errorMessage: report.errorMessage ?? "",
|
|
54
|
+
toolEvaluations: (report.toolEvaluations ?? []).map(toolReportToPayload)
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
function scenarioSummaryToEventPayload(scenarioRef, overallPassed) {
|
|
60
|
+
return {
|
|
61
|
+
scenarioSummary: {
|
|
62
|
+
scenarioRef,
|
|
63
|
+
overallPassed
|
|
64
|
+
}
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
function evalErrorToEventPayload(message) {
|
|
68
|
+
return {
|
|
69
|
+
evalError: {
|
|
70
|
+
message
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
}
|
|
@@ -16,8 +16,34 @@
|
|
|
16
16
|
* See the License for the specific language governing permissions and
|
|
17
17
|
* limitations under the License.
|
|
18
18
|
*/
|
|
19
|
+
import { StepEvaluationReport } from "@fonoster/types";
|
|
19
20
|
import { AssistantConfig } from "../../assistants";
|
|
20
21
|
import { LanguageModel } from "../types";
|
|
22
|
+
type EvalExpectedTool = {
|
|
23
|
+
tool: string;
|
|
24
|
+
parameters?: Record<string, unknown>;
|
|
25
|
+
};
|
|
26
|
+
type EvalConversationStep = {
|
|
27
|
+
userInput: string;
|
|
28
|
+
expected: {
|
|
29
|
+
text: {
|
|
30
|
+
type: "EXACT" | "SIMILAR";
|
|
31
|
+
response: string;
|
|
32
|
+
};
|
|
33
|
+
tools?: EvalExpectedTool[];
|
|
34
|
+
};
|
|
35
|
+
};
|
|
36
|
+
type EvalScenario = {
|
|
37
|
+
ref: string;
|
|
38
|
+
description: string;
|
|
39
|
+
telephonyContext: {
|
|
40
|
+
callDirection: string;
|
|
41
|
+
ingressNumber: string;
|
|
42
|
+
callerNumber: string;
|
|
43
|
+
metadata?: Record<string, string>;
|
|
44
|
+
};
|
|
45
|
+
conversation: EvalConversationStep[];
|
|
46
|
+
};
|
|
21
47
|
type EvaluateIntelligenceRequest = {
|
|
22
48
|
intelligence: {
|
|
23
49
|
productRef: string;
|
|
@@ -25,15 +51,19 @@ type EvaluateIntelligenceRequest = {
|
|
|
25
51
|
};
|
|
26
52
|
};
|
|
27
53
|
type EvaluateStepParams = {
|
|
28
|
-
step:
|
|
54
|
+
step: EvalConversationStep;
|
|
29
55
|
languageModel: LanguageModel;
|
|
30
56
|
testTextSimilarity: (text1: string, text2: string) => Promise<boolean>;
|
|
31
57
|
assistantConfig: AssistantConfig;
|
|
32
58
|
};
|
|
33
59
|
type ScenarioEvaluationRequest = {
|
|
34
60
|
assistantConfig: AssistantConfig;
|
|
35
|
-
scenario:
|
|
61
|
+
scenario: EvalScenario;
|
|
36
62
|
languageModel: LanguageModel;
|
|
37
63
|
testTextSimilarity: (text1: string, text2: string) => Promise<boolean>;
|
|
38
64
|
};
|
|
39
|
-
|
|
65
|
+
type RunEvalCallbacks = {
|
|
66
|
+
onStepResult: (scenarioRef: string, report: StepEvaluationReport) => void | Promise<void>;
|
|
67
|
+
onScenarioComplete: (scenarioRef: string, overallPassed: boolean) => void | Promise<void>;
|
|
68
|
+
};
|
|
69
|
+
export { EvalConversationStep, EvalExpectedTool, EvalScenario, EvaluateIntelligenceRequest, EvaluateStepParams, RunEvalCallbacks, ScenarioEvaluationRequest };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fonoster/autopilot",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.18.0",
|
|
4
4
|
"description": "Voice AI for the Fonoster platform",
|
|
5
5
|
"author": "Pedro Sanders <psanders@fonoster.com>",
|
|
6
6
|
"homepage": "https://github.com/fonoster/fonoster#readme",
|
|
@@ -35,11 +35,11 @@
|
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@aws-sdk/client-s3": "^3.958.0",
|
|
37
37
|
"@dmitryrechkin/json-schema-to-zod": "^1.0.1",
|
|
38
|
-
"@fonoster/common": "^0.
|
|
39
|
-
"@fonoster/logger": "^0.
|
|
40
|
-
"@fonoster/sdk": "^0.
|
|
41
|
-
"@fonoster/types": "^0.
|
|
42
|
-
"@fonoster/voice": "^0.
|
|
38
|
+
"@fonoster/common": "^0.18.0",
|
|
39
|
+
"@fonoster/logger": "^0.18.0",
|
|
40
|
+
"@fonoster/sdk": "^0.18.0",
|
|
41
|
+
"@fonoster/types": "^0.18.0",
|
|
42
|
+
"@fonoster/voice": "^0.18.0",
|
|
43
43
|
"@langchain/anthropic": "^1.3.3",
|
|
44
44
|
"@langchain/community": "^1.1.1",
|
|
45
45
|
"@langchain/core": "^1.1.8",
|
|
@@ -59,5 +59,5 @@
|
|
|
59
59
|
"xstate": "^5.17.3",
|
|
60
60
|
"zod": "^3.25.76"
|
|
61
61
|
},
|
|
62
|
-
"gitHead": "
|
|
62
|
+
"gitHead": "051f172b266db965cf1d1366f563da995a29a93d"
|
|
63
63
|
}
|