@fonoster/ctl 0.17.4 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -4,6 +4,8 @@ export default class EvalIntelligence extends AuthenticatedCommand<typeof EvalIn
|
|
|
4
4
|
static readonly examples: string[];
|
|
5
5
|
static readonly flags: {
|
|
6
6
|
file: import("@oclif/core/lib/interfaces").OptionFlag<string, import("@oclif/core/lib/interfaces").CustomOptions>;
|
|
7
|
+
output: import("@oclif/core/lib/interfaces").OptionFlag<string, import("@oclif/core/lib/interfaces").CustomOptions>;
|
|
8
|
+
"output-file": import("@oclif/core/lib/interfaces").OptionFlag<string, import("@oclif/core/lib/interfaces").CustomOptions>;
|
|
7
9
|
};
|
|
8
10
|
run(): Promise<void>;
|
|
9
11
|
}
|
|
@@ -41,6 +41,13 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
41
41
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
42
42
|
});
|
|
43
43
|
};
|
|
44
|
+
var __asyncValues = (this && this.__asyncValues) || function (o) {
|
|
45
|
+
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
|
|
46
|
+
var m = o[Symbol.asyncIterator], i;
|
|
47
|
+
return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
|
|
48
|
+
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
|
|
49
|
+
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
|
|
50
|
+
};
|
|
44
51
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
45
52
|
/**
|
|
46
53
|
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
@@ -72,6 +79,8 @@ const printEval_1 = require("../../utils/printEval");
|
|
|
72
79
|
class EvalIntelligence extends AuthenticatedCommand_1.AuthenticatedCommand {
|
|
73
80
|
run() {
|
|
74
81
|
return __awaiter(this, void 0, void 0, function* () {
|
|
82
|
+
var _a, e_1, _b, _c;
|
|
83
|
+
var _d;
|
|
75
84
|
const { flags } = yield this.parse(EvalIntelligence);
|
|
76
85
|
const client = yield this.createSdkClient();
|
|
77
86
|
const applications = new SDK.Applications(client);
|
|
@@ -104,28 +113,93 @@ class EvalIntelligence extends AuthenticatedCommand_1.AuthenticatedCommand {
|
|
|
104
113
|
rawAutopilotApplication.intelligence.config.testCases.scenarios =
|
|
105
114
|
mappedScenarios;
|
|
106
115
|
const parsedAutopilotApplication = common_1.assistantSchema.parse(rawAutopilotApplication.intelligence.config);
|
|
107
|
-
// We only need the intelligence portion of the application
|
|
108
116
|
const autopilotApplication = {
|
|
109
117
|
intelligence: {
|
|
110
118
|
productRef: rawAutopilotApplication.intelligence.productRef,
|
|
111
119
|
config: parsedAutopilotApplication
|
|
112
120
|
}
|
|
113
121
|
};
|
|
114
|
-
const
|
|
115
|
-
|
|
122
|
+
const stream = applications.evaluateIntelligence(autopilotApplication);
|
|
123
|
+
const outputJson = flags.output === "json";
|
|
124
|
+
const writeOutputFile = Boolean(flags["output-file"]);
|
|
125
|
+
const collectEvents = outputJson || writeOutputFile;
|
|
126
|
+
const events = [];
|
|
127
|
+
let currentScenarioRef = null;
|
|
128
|
+
const stepIndexByScenario = new Map();
|
|
129
|
+
try {
|
|
130
|
+
for (var _e = true, stream_1 = __asyncValues(stream), stream_1_1; stream_1_1 = yield stream_1.next(), _a = stream_1_1.done, !_a; _e = true) {
|
|
131
|
+
_c = stream_1_1.value;
|
|
132
|
+
_e = false;
|
|
133
|
+
const event = _c;
|
|
134
|
+
if (collectEvents)
|
|
135
|
+
events.push(event);
|
|
136
|
+
if (outputJson)
|
|
137
|
+
continue;
|
|
138
|
+
if (event.type === "stepResult") {
|
|
139
|
+
if (currentScenarioRef !== event.scenarioRef) {
|
|
140
|
+
currentScenarioRef = event.scenarioRef;
|
|
141
|
+
(0, printEval_1.printScenarioHeader)(event.scenarioRef);
|
|
142
|
+
stepIndexByScenario.set(event.scenarioRef, 0);
|
|
143
|
+
}
|
|
144
|
+
const stepIndex = (_d = stepIndexByScenario.get(event.scenarioRef)) !== null && _d !== void 0 ? _d : 0;
|
|
145
|
+
(0, printEval_1.printStepResult)(event.scenarioRef, stepIndex, event.stepResult);
|
|
146
|
+
stepIndexByScenario.set(event.scenarioRef, stepIndex + 1);
|
|
147
|
+
}
|
|
148
|
+
else if (event.type === "scenarioSummary") {
|
|
149
|
+
(0, printEval_1.printScenarioSummary)(event.scenarioRef, event.overallPassed);
|
|
150
|
+
}
|
|
151
|
+
else if (event.type === "evalError") {
|
|
152
|
+
(0, printEval_1.printEvalError)(event.message);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
157
|
+
finally {
|
|
158
|
+
try {
|
|
159
|
+
if (!_e && !_a && (_b = stream_1.return)) yield _b.call(stream_1);
|
|
160
|
+
}
|
|
161
|
+
finally { if (e_1) throw e_1.error; }
|
|
162
|
+
}
|
|
163
|
+
if (!collectEvents)
|
|
164
|
+
return;
|
|
165
|
+
const summary = (0, printEval_1.buildEvalSummary)(events);
|
|
166
|
+
const jsonString = JSON.stringify(summary, null, 2);
|
|
167
|
+
if (outputJson) {
|
|
168
|
+
if (writeOutputFile && flags["output-file"]) {
|
|
169
|
+
fs.writeFileSync(flags["output-file"], jsonString, "utf8");
|
|
170
|
+
}
|
|
171
|
+
else {
|
|
172
|
+
console.log(jsonString);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
else if (writeOutputFile && flags["output-file"]) {
|
|
176
|
+
fs.writeFileSync(flags["output-file"], jsonString, "utf8");
|
|
177
|
+
}
|
|
116
178
|
});
|
|
117
179
|
}
|
|
118
180
|
}
|
|
119
181
|
EvalIntelligence.description = "experimental command to test an Autopilot application";
|
|
120
182
|
EvalIntelligence.examples = [
|
|
121
183
|
"<%= config.bin %> <%= command.id %> -f assistant.json",
|
|
122
|
-
"<%= config.bin %> <%= command.id %> -f assistant.yaml"
|
|
184
|
+
"<%= config.bin %> <%= command.id %> -f assistant.yaml",
|
|
185
|
+
"<%= config.bin %> <%= command.id %> -f assistant.yaml -o json",
|
|
186
|
+
"<%= config.bin %> <%= command.id %> -f assistant.yaml -o json --output-file results.json"
|
|
123
187
|
];
|
|
124
188
|
EvalIntelligence.flags = {
|
|
125
189
|
file: core_1.Flags.string({
|
|
126
190
|
char: "f",
|
|
127
191
|
description: "path to test cases file (json, yaml, or yml)",
|
|
128
192
|
required: true
|
|
193
|
+
}),
|
|
194
|
+
output: core_1.Flags.string({
|
|
195
|
+
char: "o",
|
|
196
|
+
description: "output format",
|
|
197
|
+
options: ["pretty", "json"],
|
|
198
|
+
default: "pretty"
|
|
199
|
+
}),
|
|
200
|
+
"output-file": core_1.Flags.string({
|
|
201
|
+
description: "write JSON summary to this file (with pretty: also show streamed output)",
|
|
202
|
+
required: false
|
|
129
203
|
})
|
|
130
204
|
};
|
|
131
205
|
exports.default = EvalIntelligence;
|
|
@@ -1,20 +1,25 @@
|
|
|
1
|
+
import type { EvaluateIntelligenceEvent, ScenarioEvaluationReport, StepEvaluationReport } from "@fonoster/types";
|
|
2
|
+
export type EvalSummary = {
|
|
3
|
+
scenarios: ScenarioEvaluationReport[];
|
|
4
|
+
errors: string[];
|
|
5
|
+
};
|
|
1
6
|
/**
|
|
2
|
-
*
|
|
3
|
-
* http://github.com/fonoster/fonoster
|
|
4
|
-
*
|
|
5
|
-
* This file is part of Fonoster
|
|
6
|
-
*
|
|
7
|
-
* Licensed under the MIT License (the "License");
|
|
8
|
-
* you may not use this file except in compliance with
|
|
9
|
-
* the License. You may obtain a copy of the License at
|
|
10
|
-
*
|
|
11
|
-
* https://opensource.org/licenses/MIT
|
|
12
|
-
*
|
|
13
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
14
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
-
* See the License for the specific language governing permissions and
|
|
17
|
-
* limitations under the License.
|
|
7
|
+
* Builds a single JSON summary from streamed eval events.
|
|
18
8
|
*/
|
|
19
|
-
|
|
20
|
-
|
|
9
|
+
export declare function buildEvalSummary(events: EvaluateIntelligenceEvent[]): EvalSummary;
|
|
10
|
+
/**
|
|
11
|
+
* Prints a single step result in vertical layout (Step, Human, AI Expected, AI Actual, Tool, Passed).
|
|
12
|
+
*/
|
|
13
|
+
export declare function printStepResult(_scenarioRef: string, stepIndex: number, step: StepEvaluationReport): void;
|
|
14
|
+
/**
|
|
15
|
+
* Prints scenario header (call once before first step of a scenario).
|
|
16
|
+
*/
|
|
17
|
+
export declare function printScenarioHeader(scenarioRef: string): void;
|
|
18
|
+
/**
|
|
19
|
+
* Prints scenario completion summary.
|
|
20
|
+
*/
|
|
21
|
+
export declare function printScenarioSummary(scenarioRef: string, overallPassed: boolean): void;
|
|
22
|
+
/**
|
|
23
|
+
* Prints an eval error event.
|
|
24
|
+
*/
|
|
25
|
+
export declare function printEvalError(message: string): void;
|
package/dist/utils/printEval.js
CHANGED
|
@@ -3,78 +3,134 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.
|
|
6
|
+
exports.buildEvalSummary = buildEvalSummary;
|
|
7
|
+
exports.printStepResult = printStepResult;
|
|
8
|
+
exports.printScenarioHeader = printScenarioHeader;
|
|
9
|
+
exports.printScenarioSummary = printScenarioSummary;
|
|
10
|
+
exports.printEvalError = printEvalError;
|
|
11
|
+
/**
|
|
12
|
+
* Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
|
|
13
|
+
* http://github.com/fonoster/fonoster
|
|
14
|
+
*
|
|
15
|
+
* This file is part of Fonoster
|
|
16
|
+
*
|
|
17
|
+
* Licensed under the MIT License (the "License");
|
|
18
|
+
* you may not use this file except in compliance with
|
|
19
|
+
* the License. You may obtain a copy of the License at
|
|
20
|
+
*
|
|
21
|
+
* https://opensource.org/licenses/MIT
|
|
22
|
+
*
|
|
23
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
24
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
25
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
26
|
+
* See the License for the specific language governing permissions and
|
|
27
|
+
* limitations under the License.
|
|
28
|
+
*/
|
|
7
29
|
const ansis_1 = __importDefault(require("ansis"));
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
28, // Human Input
|
|
25
|
-
28, // Expected
|
|
26
|
-
28, // AI Response
|
|
27
|
-
null, // Tool Calls - dynamic width
|
|
28
|
-
8 // Passed
|
|
29
|
-
],
|
|
30
|
-
wordWrap: true
|
|
31
|
-
});
|
|
32
|
-
result.steps.forEach((step, index) => {
|
|
33
|
-
// Format tool evaluations if they exist
|
|
34
|
-
let toolEvalText = "";
|
|
35
|
-
if (step.toolEvaluations && step.toolEvaluations.length > 0) {
|
|
36
|
-
toolEvalText = step.toolEvaluations
|
|
37
|
-
.map((toolEval) => {
|
|
38
|
-
if (!Object.keys(toolEval.actualParameters || {}).length) {
|
|
39
|
-
return `${toolEval.actualTool}()`;
|
|
40
|
-
}
|
|
41
|
-
const params = JSON.stringify(toolEval.actualParameters || {}, null, 1)
|
|
42
|
-
.split("\n")
|
|
43
|
-
.map((line, index, arr) => {
|
|
44
|
-
if (index === 0)
|
|
45
|
-
return "";
|
|
46
|
-
if (index === arr.length - 1)
|
|
47
|
-
return "";
|
|
48
|
-
return " " + line.trim();
|
|
49
|
-
})
|
|
50
|
-
.join("\n");
|
|
51
|
-
return `${toolEval.actualTool}({${params}})`;
|
|
52
|
-
})
|
|
53
|
-
.join("\n\n"); // Add extra line between multiple tool calls
|
|
30
|
+
/**
|
|
31
|
+
* Builds a single JSON summary from streamed eval events.
|
|
32
|
+
*/
|
|
33
|
+
function buildEvalSummary(events) {
|
|
34
|
+
const scenariosByRef = new Map();
|
|
35
|
+
const errors = [];
|
|
36
|
+
for (const event of events) {
|
|
37
|
+
if (event.type === "stepResult") {
|
|
38
|
+
let scenario = scenariosByRef.get(event.scenarioRef);
|
|
39
|
+
if (!scenario) {
|
|
40
|
+
scenario = {
|
|
41
|
+
scenarioRef: event.scenarioRef,
|
|
42
|
+
overallPassed: false,
|
|
43
|
+
steps: []
|
|
44
|
+
};
|
|
45
|
+
scenariosByRef.set(event.scenarioRef, scenario);
|
|
54
46
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
47
|
+
scenario.steps.push(event.stepResult);
|
|
48
|
+
}
|
|
49
|
+
else if (event.type === "scenarioSummary") {
|
|
50
|
+
const scenario = scenariosByRef.get(event.scenarioRef);
|
|
51
|
+
if (scenario)
|
|
52
|
+
scenario.overallPassed = event.overallPassed;
|
|
53
|
+
}
|
|
54
|
+
else if (event.type === "evalError") {
|
|
55
|
+
errors.push(event.message);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return {
|
|
59
|
+
scenarios: Array.from(scenariosByRef.values()),
|
|
60
|
+
errors
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
function formatToolCalls(step) {
|
|
64
|
+
var _a;
|
|
65
|
+
if (!((_a = step.toolEvaluations) === null || _a === void 0 ? void 0 : _a.length))
|
|
66
|
+
return "—";
|
|
67
|
+
return step.toolEvaluations
|
|
68
|
+
.map((toolEval) => {
|
|
69
|
+
if (!Object.keys(toolEval.actualParameters || {}).length) {
|
|
70
|
+
return `${toolEval.actualTool}()`;
|
|
71
|
+
}
|
|
72
|
+
const params = JSON.stringify(toolEval.actualParameters || {}, null, 1)
|
|
73
|
+
.split("\n")
|
|
74
|
+
.map((line, idx, arr) => {
|
|
75
|
+
if (idx === 0 || idx === arr.length - 1)
|
|
76
|
+
return "";
|
|
77
|
+
return " " + line.trim();
|
|
78
|
+
})
|
|
79
|
+
.join("\n");
|
|
80
|
+
return `${toolEval.actualTool}({${params}})`;
|
|
81
|
+
})
|
|
82
|
+
.join(" ");
|
|
83
|
+
}
|
|
84
|
+
const LABEL_PAD = 14; // "AI Expected: " etc.
|
|
85
|
+
function formatLine(label, value) {
|
|
86
|
+
return ` ${(label + ":").padEnd(LABEL_PAD + 1)} ${value.replace(/\n/g, " ")}`;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Prints a single step result in vertical layout (Step, Human, AI Expected, AI Actual, Tool, Passed).
|
|
90
|
+
*/
|
|
91
|
+
function printStepResult(_scenarioRef, stepIndex, step) {
|
|
92
|
+
if (stepIndex > 0)
|
|
93
|
+
console.log("");
|
|
94
|
+
const toolText = formatToolCalls(step);
|
|
95
|
+
const passedStr = step.passed
|
|
96
|
+
? ansis_1.default.green("✔ Passed")
|
|
97
|
+
: ansis_1.default.red("✘ Failed");
|
|
98
|
+
console.log(ansis_1.default.bold(` Step: ${stepIndex + 1}`));
|
|
99
|
+
console.log(formatLine("Human", step.humanInput || "—"));
|
|
100
|
+
console.log(formatLine("AI Expected", step.expectedResponse || "—"));
|
|
101
|
+
console.log(formatLine("AI Actual", step.aiResponse || "(none)"));
|
|
102
|
+
console.log(formatLine("Tool", toolText));
|
|
103
|
+
console.log(formatLine("Passed", passedStr));
|
|
104
|
+
if (!step.passed && step.errorMessage) {
|
|
105
|
+
console.log(ansis_1.default.red(` ${step.errorMessage}`));
|
|
106
|
+
}
|
|
107
|
+
if (step.toolEvaluations) {
|
|
108
|
+
for (const toolEval of step.toolEvaluations) {
|
|
109
|
+
if (!toolEval.passed && toolEval.errorMessage) {
|
|
110
|
+
console.log(ansis_1.default.red(` Tool: ${toolEval.errorMessage}`));
|
|
76
111
|
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Prints scenario header (call once before first step of a scenario).
|
|
117
|
+
*/
|
|
118
|
+
function printScenarioHeader(scenarioRef) {
|
|
119
|
+
console.log("");
|
|
120
|
+
console.log(ansis_1.default.bold.blue(`Scenario: ${scenarioRef}`));
|
|
121
|
+
console.log(ansis_1.default.dim("—".repeat(Math.min(60, process.stdout.columns || 60))));
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Prints scenario completion summary.
|
|
125
|
+
*/
|
|
126
|
+
function printScenarioSummary(scenarioRef, overallPassed) {
|
|
127
|
+
console.log(ansis_1.default.bold(` Overall: ${overallPassed ? ansis_1.default.green("✔ Passed") : ansis_1.default.red("✘ Failed")}`));
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Prints an eval error event.
|
|
131
|
+
*/
|
|
132
|
+
function printEvalError(message) {
|
|
133
|
+
console.log("");
|
|
134
|
+
console.log(ansis_1.default.red("— Eval error —"));
|
|
135
|
+
console.log(ansis_1.default.red(` ${message}`));
|
|
80
136
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fonoster/ctl",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.18.0",
|
|
4
4
|
"description": "Fonoster Control Tool",
|
|
5
5
|
"author": "Pedro Sanders <psanders@fonoster.com>",
|
|
6
6
|
"homepage": "https://github.com/fonoster/fonoster#readme",
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
"bugs": {
|
|
31
31
|
"url": "https://github.com/fonoster/fonoster/issues"
|
|
32
32
|
},
|
|
33
|
-
"gitHead": "
|
|
33
|
+
"gitHead": "051f172b266db965cf1d1366f563da995a29a93d",
|
|
34
34
|
"bin": {
|
|
35
35
|
"fonoster": "./bin/run.js"
|
|
36
36
|
},
|
|
@@ -51,7 +51,7 @@
|
|
|
51
51
|
}
|
|
52
52
|
},
|
|
53
53
|
"dependencies": {
|
|
54
|
-
"@fonoster/sdk": "^0.
|
|
54
|
+
"@fonoster/sdk": "^0.18.0",
|
|
55
55
|
"@inquirer/prompts": "^7.1.0",
|
|
56
56
|
"@oclif/core": "^4.0.34",
|
|
57
57
|
"@oclif/plugin-warn-if-update-available": "^3.1.28",
|