@princetheprogrammerbtw/husk 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +164 -6
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.ts +167 -64
- package/dist/index.js +133 -1
- package/dist/index.js.map +1 -1
- package/dist/otel/index.d.ts +49 -0
- package/dist/otel/index.js +75 -0
- package/dist/otel/index.js.map +1 -0
- package/dist/tracer-y41CTrNG.d.ts +64 -0
- package/package.json +2 -1
package/dist/cli/index.js
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import { existsSync, statSync, promises } from 'fs';
|
|
3
|
+
import { readdir } from 'fs/promises';
|
|
4
|
+
import { resolve, extname, dirname, join } from 'path';
|
|
5
|
+
import { pathToFileURL } from 'url';
|
|
2
6
|
import { promisify, parseArgs } from 'util';
|
|
3
|
-
import { promises } from 'fs';
|
|
4
|
-
import { resolve, dirname, join } from 'path';
|
|
5
7
|
import Anthropic from '@anthropic-ai/sdk';
|
|
6
8
|
import OpenAI from 'openai';
|
|
7
9
|
import { exec } from 'child_process';
|
|
@@ -1042,6 +1044,71 @@ function truncateOutput(output, limit) {
|
|
|
1042
1044
|
... (${lines.length - limit} more matches truncated)`;
|
|
1043
1045
|
}
|
|
1044
1046
|
|
|
1047
|
+
// src/evals/runner.ts
|
|
1048
|
+
async function runSuite(suite, factory, options = {}) {
|
|
1049
|
+
const start = Date.now();
|
|
1050
|
+
const results = [];
|
|
1051
|
+
let passed = 0;
|
|
1052
|
+
for (const c of suite.cases) {
|
|
1053
|
+
options.onCaseStart?.(c.name);
|
|
1054
|
+
const caseResult = await runCase(c, factory);
|
|
1055
|
+
results.push(caseResult);
|
|
1056
|
+
if (caseResult.passed) passed += 1;
|
|
1057
|
+
options.onCaseEnd?.(caseResult);
|
|
1058
|
+
if (options.failFast && !caseResult.passed) {
|
|
1059
|
+
break;
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
return {
|
|
1063
|
+
suiteName: suite.name,
|
|
1064
|
+
results,
|
|
1065
|
+
passed,
|
|
1066
|
+
total: suite.cases.length,
|
|
1067
|
+
durationMs: Date.now() - start
|
|
1068
|
+
};
|
|
1069
|
+
}
|
|
1070
|
+
async function runCase(c, factory) {
|
|
1071
|
+
const start = Date.now();
|
|
1072
|
+
const agent = await factory();
|
|
1073
|
+
let agentResult;
|
|
1074
|
+
try {
|
|
1075
|
+
agentResult = await agent.run(c.input);
|
|
1076
|
+
} catch (err) {
|
|
1077
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1078
|
+
const errorAssertionResult = {
|
|
1079
|
+
pass: false,
|
|
1080
|
+
name: "agent.run",
|
|
1081
|
+
message: `agent.run threw: ${message}`
|
|
1082
|
+
};
|
|
1083
|
+
return {
|
|
1084
|
+
caseName: c.name,
|
|
1085
|
+
passed: false,
|
|
1086
|
+
assertionResults: [errorAssertionResult],
|
|
1087
|
+
agentResult: {
|
|
1088
|
+
output: "",
|
|
1089
|
+
messages: [],
|
|
1090
|
+
iterations: 0,
|
|
1091
|
+
usage: { inputTokens: 0, outputTokens: 0 },
|
|
1092
|
+
durationMs: Date.now() - start
|
|
1093
|
+
},
|
|
1094
|
+
durationMs: Date.now() - start
|
|
1095
|
+
};
|
|
1096
|
+
}
|
|
1097
|
+
const assertionResults = [];
|
|
1098
|
+
for (const a of c.assertions) {
|
|
1099
|
+
const r = await a(agentResult);
|
|
1100
|
+
assertionResults.push(r);
|
|
1101
|
+
}
|
|
1102
|
+
const allPassed = assertionResults.every((r) => r.pass);
|
|
1103
|
+
return {
|
|
1104
|
+
caseName: c.name,
|
|
1105
|
+
passed: allPassed,
|
|
1106
|
+
assertionResults,
|
|
1107
|
+
agentResult,
|
|
1108
|
+
durationMs: Date.now() - start
|
|
1109
|
+
};
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1045
1112
|
// src/cli/index.ts
|
|
1046
1113
|
var TOOL_REGISTRY = { read: Read, write: Write, edit: Edit, bash: Bash, grep: Grep };
|
|
1047
1114
|
async function main() {
|
|
@@ -1054,6 +1121,10 @@ async function main() {
|
|
|
1054
1121
|
await runCommand();
|
|
1055
1122
|
return;
|
|
1056
1123
|
}
|
|
1124
|
+
if (subcommand === "eval") {
|
|
1125
|
+
await evalCommand();
|
|
1126
|
+
return;
|
|
1127
|
+
}
|
|
1057
1128
|
if (subcommand === "version" || subcommand === "--version" || subcommand === "-v") {
|
|
1058
1129
|
console.log(`husk ${VERSION}`);
|
|
1059
1130
|
return;
|
|
@@ -1079,7 +1150,7 @@ async function runCommand() {
|
|
|
1079
1150
|
printHelp();
|
|
1080
1151
|
return;
|
|
1081
1152
|
}
|
|
1082
|
-
const prompt =
|
|
1153
|
+
const prompt = process.argv[3];
|
|
1083
1154
|
if (!prompt) {
|
|
1084
1155
|
console.error("Error: husk run requires a prompt argument.");
|
|
1085
1156
|
console.error('Usage: husk run "your prompt here"');
|
|
@@ -1111,13 +1182,95 @@ async function runCommand() {
|
|
|
1111
1182
|
console.log(result.output);
|
|
1112
1183
|
process.exit(0);
|
|
1113
1184
|
}
|
|
1185
|
+
async function evalCommand() {
|
|
1186
|
+
const target = process.argv[3];
|
|
1187
|
+
if (!target) {
|
|
1188
|
+
console.error("Error: husk eval requires a path argument.");
|
|
1189
|
+
console.error("Usage: husk eval <file-or-dir>");
|
|
1190
|
+
process.exit(2);
|
|
1191
|
+
}
|
|
1192
|
+
const resolved = resolve(target);
|
|
1193
|
+
if (!existsSync(resolved)) {
|
|
1194
|
+
console.error(`Error: path not found: ${resolved}`);
|
|
1195
|
+
process.exit(2);
|
|
1196
|
+
}
|
|
1197
|
+
const stat = statSync(resolved);
|
|
1198
|
+
const files = [];
|
|
1199
|
+
if (stat.isDirectory()) {
|
|
1200
|
+
const entries = await readdir(resolved, { withFileTypes: true });
|
|
1201
|
+
for (const e of entries) {
|
|
1202
|
+
if (!e.isFile()) continue;
|
|
1203
|
+
const ext = extname(e.name);
|
|
1204
|
+
if (ext === ".ts" || ext === ".js" || ext === ".mjs") {
|
|
1205
|
+
files.push(resolve(resolved, e.name));
|
|
1206
|
+
}
|
|
1207
|
+
}
|
|
1208
|
+
} else {
|
|
1209
|
+
files.push(resolved);
|
|
1210
|
+
}
|
|
1211
|
+
if (files.length === 0) {
|
|
1212
|
+
console.error(`Error: no .ts/.js/.mjs files found in ${resolved}`);
|
|
1213
|
+
process.exit(2);
|
|
1214
|
+
}
|
|
1215
|
+
let totalPassed = 0;
|
|
1216
|
+
let totalCases = 0;
|
|
1217
|
+
let anyFailed = false;
|
|
1218
|
+
for (const file of files) {
|
|
1219
|
+
console.log(`
|
|
1220
|
+
=== ${file} ===`);
|
|
1221
|
+
try {
|
|
1222
|
+
const mod = await import(pathToFileURL(file).href);
|
|
1223
|
+
const suites = [];
|
|
1224
|
+
for (const value of Object.values(mod)) {
|
|
1225
|
+
if (value && typeof value === "object" && "name" in value && "cases" in value && Array.isArray(value.cases)) {
|
|
1226
|
+
suites.push(value);
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
if (suites.length === 0) {
|
|
1230
|
+
console.error(` No EvalSuite found in ${file}`);
|
|
1231
|
+
continue;
|
|
1232
|
+
}
|
|
1233
|
+
for (const suite of suites) {
|
|
1234
|
+
const factory = () => Promise.resolve(makeDefaultAgent());
|
|
1235
|
+
const result = await runSuite(suite, factory);
|
|
1236
|
+
totalPassed += result.passed;
|
|
1237
|
+
totalCases += result.total;
|
|
1238
|
+
for (const r of result.results) {
|
|
1239
|
+
const icon = r.passed ? "\u2713" : "\u2717";
|
|
1240
|
+
console.log(` ${icon} ${r.caseName}`);
|
|
1241
|
+
if (!r.passed) {
|
|
1242
|
+
anyFailed = true;
|
|
1243
|
+
for (const a of r.assertionResults) {
|
|
1244
|
+
console.log(` \u2717 ${a.name}: ${a.message ?? "failed"}`);
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
}
|
|
1248
|
+
console.log(` ${result.passed}/${result.total} passed in ${result.durationMs}ms`);
|
|
1249
|
+
}
|
|
1250
|
+
} catch (err) {
|
|
1251
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1252
|
+
console.error(` Error loading ${file}: ${message}`);
|
|
1253
|
+
anyFailed = true;
|
|
1254
|
+
}
|
|
1255
|
+
}
|
|
1256
|
+
console.log(`
|
|
1257
|
+
=== Total: ${totalPassed}/${totalCases} cases passed ===`);
|
|
1258
|
+
process.exit(anyFailed ? 1 : 0);
|
|
1259
|
+
}
|
|
1260
|
+
function makeDefaultAgent() {
|
|
1261
|
+
const providerName = process.env.HUSK_PROVIDER ?? "anthropic";
|
|
1262
|
+
const modelId = process.env.HUSK_MODEL ?? "claude-opus-4-6";
|
|
1263
|
+
const provider = providerName === "openai" ? new OpenAIProvider({ model: modelId, apiKey: process.env.OPENAI_API_KEY }) : new AnthropicProvider({ model: modelId, apiKey: process.env.ANTHROPIC_API_KEY });
|
|
1264
|
+
return new Agent({ model: provider });
|
|
1265
|
+
}
|
|
1114
1266
|
function printHelp() {
|
|
1115
|
-
console.log(`husk \u2014 run an agent from the command line
|
|
1267
|
+
console.log(`husk \u2014 run an agent or eval suite from the command line
|
|
1116
1268
|
|
|
1117
1269
|
Usage:
|
|
1118
1270
|
husk run "<prompt>" [options]
|
|
1271
|
+
husk eval <file-or-dir>
|
|
1119
1272
|
|
|
1120
|
-
|
|
1273
|
+
Run options:
|
|
1121
1274
|
--model <id> Model id (default: claude-opus-4-6)
|
|
1122
1275
|
--provider <name> 'anthropic' (default) or 'openai'
|
|
1123
1276
|
--tools <list> Comma-separated tool names: read,write,edit,bash,grep
|
|
@@ -1127,6 +1280,10 @@ Options:
|
|
|
1127
1280
|
-h, --help Show this help
|
|
1128
1281
|
-v, --version Show version
|
|
1129
1282
|
|
|
1283
|
+
Eval options:
|
|
1284
|
+
<file> A .ts/.js/.mjs file exporting one or more EvalSuite
|
|
1285
|
+
<dir> A directory; all *.ts/*.js/*.mjs files are loaded
|
|
1286
|
+
|
|
1130
1287
|
Environment:
|
|
1131
1288
|
ANTHROPIC_API_KEY Required for Anthropic provider
|
|
1132
1289
|
OPENAI_API_KEY Required for OpenAI provider
|
|
@@ -1137,9 +1294,10 @@ Examples:
|
|
|
1137
1294
|
husk run "What is the capital of France?"
|
|
1138
1295
|
husk run "Refactor src/foo.ts" --tools read,edit,write
|
|
1139
1296
|
husk run "Summarize README.md" --provider openai --model gpt-5
|
|
1297
|
+
husk eval ./evals/geography.ts
|
|
1140
1298
|
`);
|
|
1141
1299
|
}
|
|
1142
|
-
var VERSION = "0.
|
|
1300
|
+
var VERSION = "0.3.0-dev.0";
|
|
1143
1301
|
await main();
|
|
1144
1302
|
//# sourceMappingURL=index.js.map
|
|
1145
1303
|
//# sourceMappingURL=index.js.map
|