@princetheprogrammerbtw/husk 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -1,7 +1,9 @@
1
1
  #!/usr/bin/env node
2
+ import { existsSync, statSync, promises } from 'fs';
3
+ import { readdir } from 'fs/promises';
4
+ import { resolve, extname, dirname, join } from 'path';
5
+ import { pathToFileURL } from 'url';
2
6
  import { promisify, parseArgs } from 'util';
3
- import { promises } from 'fs';
4
- import { resolve, dirname, join } from 'path';
5
7
  import Anthropic from '@anthropic-ai/sdk';
6
8
  import OpenAI from 'openai';
7
9
  import { exec } from 'child_process';
@@ -1042,6 +1044,71 @@ function truncateOutput(output, limit) {
1042
1044
  ... (${lines.length - limit} more matches truncated)`;
1043
1045
  }
1044
1046
 
1047
+ // src/evals/runner.ts
1048
+ async function runSuite(suite, factory, options = {}) {
1049
+ const start = Date.now();
1050
+ const results = [];
1051
+ let passed = 0;
1052
+ for (const c of suite.cases) {
1053
+ options.onCaseStart?.(c.name);
1054
+ const caseResult = await runCase(c, factory);
1055
+ results.push(caseResult);
1056
+ if (caseResult.passed) passed += 1;
1057
+ options.onCaseEnd?.(caseResult);
1058
+ if (options.failFast && !caseResult.passed) {
1059
+ break;
1060
+ }
1061
+ }
1062
+ return {
1063
+ suiteName: suite.name,
1064
+ results,
1065
+ passed,
1066
+ total: suite.cases.length,
1067
+ durationMs: Date.now() - start
1068
+ };
1069
+ }
1070
+ async function runCase(c, factory) {
1071
+ const start = Date.now();
1072
+ const agent = await factory();
1073
+ let agentResult;
1074
+ try {
1075
+ agentResult = await agent.run(c.input);
1076
+ } catch (err) {
1077
+ const message = err instanceof Error ? err.message : String(err);
1078
+ const errorAssertionResult = {
1079
+ pass: false,
1080
+ name: "agent.run",
1081
+ message: `agent.run threw: ${message}`
1082
+ };
1083
+ return {
1084
+ caseName: c.name,
1085
+ passed: false,
1086
+ assertionResults: [errorAssertionResult],
1087
+ agentResult: {
1088
+ output: "",
1089
+ messages: [],
1090
+ iterations: 0,
1091
+ usage: { inputTokens: 0, outputTokens: 0 },
1092
+ durationMs: Date.now() - start
1093
+ },
1094
+ durationMs: Date.now() - start
1095
+ };
1096
+ }
1097
+ const assertionResults = [];
1098
+ for (const a of c.assertions) {
1099
+ const r = await a(agentResult);
1100
+ assertionResults.push(r);
1101
+ }
1102
+ const allPassed = assertionResults.every((r) => r.pass);
1103
+ return {
1104
+ caseName: c.name,
1105
+ passed: allPassed,
1106
+ assertionResults,
1107
+ agentResult,
1108
+ durationMs: Date.now() - start
1109
+ };
1110
+ }
1111
+
1045
1112
  // src/cli/index.ts
1046
1113
  var TOOL_REGISTRY = { read: Read, write: Write, edit: Edit, bash: Bash, grep: Grep };
1047
1114
  async function main() {
@@ -1054,6 +1121,10 @@ async function main() {
1054
1121
  await runCommand();
1055
1122
  return;
1056
1123
  }
1124
+ if (subcommand === "eval") {
1125
+ await evalCommand();
1126
+ return;
1127
+ }
1057
1128
  if (subcommand === "version" || subcommand === "--version" || subcommand === "-v") {
1058
1129
  console.log(`husk ${VERSION}`);
1059
1130
  return;
@@ -1079,7 +1150,7 @@ async function runCommand() {
1079
1150
  printHelp();
1080
1151
  return;
1081
1152
  }
1082
- const prompt = values.help === void 0 ? process.argv[3] : void 0;
1153
+ const prompt = process.argv[3];
1083
1154
  if (!prompt) {
1084
1155
  console.error("Error: husk run requires a prompt argument.");
1085
1156
  console.error('Usage: husk run "your prompt here"');
@@ -1111,13 +1182,95 @@ async function runCommand() {
1111
1182
  console.log(result.output);
1112
1183
  process.exit(0);
1113
1184
  }
1185
+ async function evalCommand() {
1186
+ const target = process.argv[3];
1187
+ if (!target) {
1188
+ console.error("Error: husk eval requires a path argument.");
1189
+ console.error("Usage: husk eval <file-or-dir>");
1190
+ process.exit(2);
1191
+ }
1192
+ const resolved = resolve(target);
1193
+ if (!existsSync(resolved)) {
1194
+ console.error(`Error: path not found: ${resolved}`);
1195
+ process.exit(2);
1196
+ }
1197
+ const stat = statSync(resolved);
1198
+ const files = [];
1199
+ if (stat.isDirectory()) {
1200
+ const entries = await readdir(resolved, { withFileTypes: true });
1201
+ for (const e of entries) {
1202
+ if (!e.isFile()) continue;
1203
+ const ext = extname(e.name);
1204
+ if (ext === ".ts" || ext === ".js" || ext === ".mjs") {
1205
+ files.push(resolve(resolved, e.name));
1206
+ }
1207
+ }
1208
+ } else {
1209
+ files.push(resolved);
1210
+ }
1211
+ if (files.length === 0) {
1212
+ console.error(`Error: no .ts/.js/.mjs files found in ${resolved}`);
1213
+ process.exit(2);
1214
+ }
1215
+ let totalPassed = 0;
1216
+ let totalCases = 0;
1217
+ let anyFailed = false;
1218
+ for (const file of files) {
1219
+ console.log(`
1220
+ === ${file} ===`);
1221
+ try {
1222
+ const mod = await import(pathToFileURL(file).href);
1223
+ const suites = [];
1224
+ for (const value of Object.values(mod)) {
1225
+ if (value && typeof value === "object" && "name" in value && "cases" in value && Array.isArray(value.cases)) {
1226
+ suites.push(value);
1227
+ }
1228
+ }
1229
+ if (suites.length === 0) {
1230
+ console.error(` No EvalSuite found in ${file}`);
1231
+ continue;
1232
+ }
1233
+ for (const suite of suites) {
1234
+ const factory = () => Promise.resolve(makeDefaultAgent());
1235
+ const result = await runSuite(suite, factory);
1236
+ totalPassed += result.passed;
1237
+ totalCases += result.total;
1238
+ for (const r of result.results) {
1239
+ const icon = r.passed ? "\u2713" : "\u2717";
1240
+ console.log(` ${icon} ${r.caseName}`);
1241
+ if (!r.passed) {
1242
+ anyFailed = true;
1243
+ for (const a of r.assertionResults) {
1244
+ console.log(` \u2717 ${a.name}: ${a.message ?? "failed"}`);
1245
+ }
1246
+ }
1247
+ }
1248
+ console.log(` ${result.passed}/${result.total} passed in ${result.durationMs}ms`);
1249
+ }
1250
+ } catch (err) {
1251
+ const message = err instanceof Error ? err.message : String(err);
1252
+ console.error(` Error loading ${file}: ${message}`);
1253
+ anyFailed = true;
1254
+ }
1255
+ }
1256
+ console.log(`
1257
+ === Total: ${totalPassed}/${totalCases} cases passed ===`);
1258
+ process.exit(anyFailed ? 1 : 0);
1259
+ }
1260
+ function makeDefaultAgent() {
1261
+ const providerName = process.env.HUSK_PROVIDER ?? "anthropic";
1262
+ const modelId = process.env.HUSK_MODEL ?? "claude-opus-4-6";
1263
+ const provider = providerName === "openai" ? new OpenAIProvider({ model: modelId, apiKey: process.env.OPENAI_API_KEY }) : new AnthropicProvider({ model: modelId, apiKey: process.env.ANTHROPIC_API_KEY });
1264
+ return new Agent({ model: provider });
1265
+ }
1114
1266
  function printHelp() {
1115
- console.log(`husk \u2014 run an agent from the command line
1267
+ console.log(`husk \u2014 run an agent or eval suite from the command line
1116
1268
 
1117
1269
  Usage:
1118
1270
  husk run "<prompt>" [options]
1271
+ husk eval <file-or-dir>
1119
1272
 
1120
- Options:
1273
+ Run options:
1121
1274
  --model <id> Model id (default: claude-opus-4-6)
1122
1275
  --provider <name> 'anthropic' (default) or 'openai'
1123
1276
  --tools <list> Comma-separated tool names: read,write,edit,bash,grep
@@ -1127,6 +1280,10 @@ Options:
1127
1280
  -h, --help Show this help
1128
1281
  -v, --version Show version
1129
1282
 
1283
+ Eval options:
1284
+ <file> A .ts/.js/.mjs file exporting one or more EvalSuite
1285
+ <dir> A directory; all *.ts/*.js/*.mjs files are loaded
1286
+
1130
1287
  Environment:
1131
1288
  ANTHROPIC_API_KEY Required for Anthropic provider
1132
1289
  OPENAI_API_KEY Required for OpenAI provider
@@ -1137,9 +1294,10 @@ Examples:
1137
1294
  husk run "What is the capital of France?"
1138
1295
  husk run "Refactor src/foo.ts" --tools read,edit,write
1139
1296
  husk run "Summarize README.md" --provider openai --model gpt-5
1297
+ husk eval ./evals/geography.ts
1140
1298
  `);
1141
1299
  }
1142
- var VERSION = "0.1.0";
1300
+ var VERSION = "0.3.0-dev.0";
1143
1301
  await main();
1144
1302
  //# sourceMappingURL=index.js.map
1145
1303
  //# sourceMappingURL=index.js.map