@princetheprogrammerbtw/husk 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -11
- package/dist/index.d.ts +269 -1
- package/dist/index.js +306 -1
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -821,6 +821,27 @@ function mapStopReason2(reason) {
|
|
|
821
821
|
}
|
|
822
822
|
}
|
|
823
823
|
|
|
824
|
+
// src/providers/ollama.ts
|
|
825
|
+
var DEFAULT_BASE_URL = "http://localhost:11434/v1";
|
|
826
|
+
var DEFAULT_MODEL = "llama3.2";
|
|
827
|
+
var PLACEHOLDER_API_KEY = "ollama";
|
|
828
|
+
var OllamaProvider = class {
|
|
829
|
+
name = "ollama";
|
|
830
|
+
model;
|
|
831
|
+
inner;
|
|
832
|
+
constructor(options = {}) {
|
|
833
|
+
this.model = options.model ?? DEFAULT_MODEL;
|
|
834
|
+
this.inner = new OpenAIProvider({
|
|
835
|
+
apiKey: options.apiKey ?? PLACEHOLDER_API_KEY,
|
|
836
|
+
model: this.model,
|
|
837
|
+
baseURL: options.baseURL ?? DEFAULT_BASE_URL
|
|
838
|
+
});
|
|
839
|
+
}
|
|
840
|
+
chat(request) {
|
|
841
|
+
return this.inner.chat(request);
|
|
842
|
+
}
|
|
843
|
+
};
|
|
844
|
+
|
|
824
845
|
// src/tools/registry.ts
|
|
825
846
|
function defineTool(tool) {
|
|
826
847
|
return {
|
|
@@ -1103,9 +1124,293 @@ function truncateOutput(output, limit) {
|
|
|
1103
1124
|
... (${lines.length - limit} more matches truncated)`;
|
|
1104
1125
|
}
|
|
1105
1126
|
|
|
1127
|
+
// src/evals/types.ts
|
|
1128
|
+
function equals(expected) {
|
|
1129
|
+
return (result) => {
|
|
1130
|
+
const pass = result.output === expected;
|
|
1131
|
+
return pass ? { name: `equals(${JSON.stringify(expected).slice(0, 40)})`, pass: true } : {
|
|
1132
|
+
name: `equals(${JSON.stringify(expected).slice(0, 40)})`,
|
|
1133
|
+
pass: false,
|
|
1134
|
+
message: `Expected ${JSON.stringify(expected)}, got ${JSON.stringify(result.output).slice(0, 200)}`
|
|
1135
|
+
};
|
|
1136
|
+
};
|
|
1137
|
+
}
|
|
1138
|
+
function contains(needle) {
|
|
1139
|
+
return (result) => {
|
|
1140
|
+
const pass = result.output.includes(needle);
|
|
1141
|
+
return pass ? { name: `contains(${JSON.stringify(needle).slice(0, 40)})`, pass: true } : {
|
|
1142
|
+
name: `contains(${JSON.stringify(needle).slice(0, 40)})`,
|
|
1143
|
+
pass: false,
|
|
1144
|
+
message: `Expected output to contain ${JSON.stringify(needle)}, got ${JSON.stringify(result.output).slice(0, 200)}`
|
|
1145
|
+
};
|
|
1146
|
+
};
|
|
1147
|
+
}
|
|
1148
|
+
function matches(pattern) {
|
|
1149
|
+
return (result) => {
|
|
1150
|
+
const m = pattern.exec(result.output);
|
|
1151
|
+
return {
|
|
1152
|
+
name: `matches(${pattern})`,
|
|
1153
|
+
pass: m !== null,
|
|
1154
|
+
...m === null ? {
|
|
1155
|
+
message: `Output did not match ${pattern}: ${JSON.stringify(result.output).slice(0, 200)}`
|
|
1156
|
+
} : {}
|
|
1157
|
+
};
|
|
1158
|
+
};
|
|
1159
|
+
}
|
|
1160
|
+
function fn(name, predicate, message) {
|
|
1161
|
+
return (result) => {
|
|
1162
|
+
const pass = predicate(result.output);
|
|
1163
|
+
return {
|
|
1164
|
+
name,
|
|
1165
|
+
pass,
|
|
1166
|
+
...pass ? {} : { message: message ?? `Predicate ${name} failed` }
|
|
1167
|
+
};
|
|
1168
|
+
};
|
|
1169
|
+
}
|
|
1170
|
+
function notContains(needle) {
|
|
1171
|
+
return (result) => {
|
|
1172
|
+
const pass = !result.output.includes(needle);
|
|
1173
|
+
return pass ? { name: `notContains(${JSON.stringify(needle).slice(0, 40)})`, pass: true } : {
|
|
1174
|
+
name: `notContains(${JSON.stringify(needle).slice(0, 40)})`,
|
|
1175
|
+
pass: false,
|
|
1176
|
+
message: `Output should not contain ${JSON.stringify(needle)} but did: ${JSON.stringify(result.output).slice(0, 200)}`
|
|
1177
|
+
};
|
|
1178
|
+
};
|
|
1179
|
+
}
|
|
1180
|
+
function lengthBetween(min, max) {
|
|
1181
|
+
return (result) => {
|
|
1182
|
+
const len = result.output.length;
|
|
1183
|
+
const pass = len >= min && len <= max;
|
|
1184
|
+
return pass ? { name: `lengthBetween(${min}, ${max})`, pass: true } : {
|
|
1185
|
+
name: `lengthBetween(${min}, ${max})`,
|
|
1186
|
+
pass: false,
|
|
1187
|
+
message: `Output length ${len} not in [${min}, ${max}]`
|
|
1188
|
+
};
|
|
1189
|
+
};
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
// src/evals/runner.ts
|
|
1193
|
+
async function runSuite(suite, factory, options = {}) {
|
|
1194
|
+
const start = Date.now();
|
|
1195
|
+
const results = [];
|
|
1196
|
+
let passed = 0;
|
|
1197
|
+
for (const c of suite.cases) {
|
|
1198
|
+
options.onCaseStart?.(c.name);
|
|
1199
|
+
const caseResult = await runCase(c, factory);
|
|
1200
|
+
results.push(caseResult);
|
|
1201
|
+
if (caseResult.passed) passed += 1;
|
|
1202
|
+
options.onCaseEnd?.(caseResult);
|
|
1203
|
+
if (options.failFast && !caseResult.passed) {
|
|
1204
|
+
break;
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
return {
|
|
1208
|
+
suiteName: suite.name,
|
|
1209
|
+
results,
|
|
1210
|
+
passed,
|
|
1211
|
+
total: suite.cases.length,
|
|
1212
|
+
durationMs: Date.now() - start
|
|
1213
|
+
};
|
|
1214
|
+
}
|
|
1215
|
+
async function runCase(c, factory) {
|
|
1216
|
+
const start = Date.now();
|
|
1217
|
+
const agent = await factory();
|
|
1218
|
+
let agentResult;
|
|
1219
|
+
try {
|
|
1220
|
+
agentResult = await agent.run(c.input);
|
|
1221
|
+
} catch (err) {
|
|
1222
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1223
|
+
const errorAssertionResult = {
|
|
1224
|
+
pass: false,
|
|
1225
|
+
name: "agent.run",
|
|
1226
|
+
message: `agent.run threw: ${message}`
|
|
1227
|
+
};
|
|
1228
|
+
return {
|
|
1229
|
+
caseName: c.name,
|
|
1230
|
+
passed: false,
|
|
1231
|
+
assertionResults: [errorAssertionResult],
|
|
1232
|
+
agentResult: {
|
|
1233
|
+
output: "",
|
|
1234
|
+
messages: [],
|
|
1235
|
+
iterations: 0,
|
|
1236
|
+
usage: { inputTokens: 0, outputTokens: 0 },
|
|
1237
|
+
durationMs: Date.now() - start
|
|
1238
|
+
},
|
|
1239
|
+
durationMs: Date.now() - start
|
|
1240
|
+
};
|
|
1241
|
+
}
|
|
1242
|
+
const assertionResults = [];
|
|
1243
|
+
for (const a of c.assertions) {
|
|
1244
|
+
const r = await a(agentResult);
|
|
1245
|
+
assertionResults.push(r);
|
|
1246
|
+
}
|
|
1247
|
+
const allPassed = assertionResults.every((r) => r.pass);
|
|
1248
|
+
return {
|
|
1249
|
+
caseName: c.name,
|
|
1250
|
+
passed: allPassed,
|
|
1251
|
+
assertionResults,
|
|
1252
|
+
agentResult,
|
|
1253
|
+
durationMs: Date.now() - start
|
|
1254
|
+
};
|
|
1255
|
+
}
|
|
1256
|
+
function defineSuite(suite) {
|
|
1257
|
+
return {
|
|
1258
|
+
name: suite.name,
|
|
1259
|
+
cases: suite.cases
|
|
1260
|
+
};
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
// src/obs/tracer.ts
|
|
1264
|
+
var NoopTracer = class {
|
|
1265
|
+
startSpan(_options, _parent) {
|
|
1266
|
+
const ctx = {
|
|
1267
|
+
traceId: "0",
|
|
1268
|
+
spanId: "0"
|
|
1269
|
+
};
|
|
1270
|
+
return {
|
|
1271
|
+
context: ctx,
|
|
1272
|
+
addEvent: () => {
|
|
1273
|
+
},
|
|
1274
|
+
setAttribute: () => {
|
|
1275
|
+
},
|
|
1276
|
+
recordException: () => {
|
|
1277
|
+
},
|
|
1278
|
+
setStatus: () => {
|
|
1279
|
+
},
|
|
1280
|
+
end: () => {
|
|
1281
|
+
}
|
|
1282
|
+
};
|
|
1283
|
+
}
|
|
1284
|
+
};
|
|
1285
|
+
|
|
1286
|
+
// src/obs/mapper.ts
|
|
1287
|
+
var EventTracer = class {
|
|
1288
|
+
tracer;
|
|
1289
|
+
traceSpan = null;
|
|
1290
|
+
iterationSpan = null;
|
|
1291
|
+
toolSpans = /* @__PURE__ */ new Map();
|
|
1292
|
+
constructor(tracer) {
|
|
1293
|
+
this.tracer = tracer;
|
|
1294
|
+
}
|
|
1295
|
+
/**
|
|
1296
|
+
* Bind as an event handler: `agent.onAny(tracer.onEvent.bind(tracer))`
|
|
1297
|
+
*/
|
|
1298
|
+
onEvent = (event) => {
|
|
1299
|
+
switch (event.type) {
|
|
1300
|
+
case "agent:start": {
|
|
1301
|
+
this.traceSpan = this.tracer.startSpan({
|
|
1302
|
+
name: "agent.run",
|
|
1303
|
+
kind: "internal",
|
|
1304
|
+
attributes: {
|
|
1305
|
+
"husk.input": event.input,
|
|
1306
|
+
"husk.session_id": event.sessionId
|
|
1307
|
+
}
|
|
1308
|
+
});
|
|
1309
|
+
break;
|
|
1310
|
+
}
|
|
1311
|
+
case "agent:iteration": {
|
|
1312
|
+
this.iterationSpan?.end();
|
|
1313
|
+
this.iterationSpan = this.tracer.startSpan(
|
|
1314
|
+
{
|
|
1315
|
+
name: `iteration.${event.iteration}`,
|
|
1316
|
+
kind: "internal",
|
|
1317
|
+
attributes: { "husk.iteration": event.iteration }
|
|
1318
|
+
},
|
|
1319
|
+
this.traceSpan?.context
|
|
1320
|
+
);
|
|
1321
|
+
break;
|
|
1322
|
+
}
|
|
1323
|
+
case "provider:request": {
|
|
1324
|
+
this.iterationSpan?.addEvent("provider.request", {
|
|
1325
|
+
"provider.model": event.request.model
|
|
1326
|
+
});
|
|
1327
|
+
break;
|
|
1328
|
+
}
|
|
1329
|
+
case "provider:response": {
|
|
1330
|
+
if (this.iterationSpan) {
|
|
1331
|
+
this.iterationSpan.setAttribute(
|
|
1332
|
+
"provider.input_tokens",
|
|
1333
|
+
event.response.usage.inputTokens
|
|
1334
|
+
);
|
|
1335
|
+
this.iterationSpan.setAttribute(
|
|
1336
|
+
"provider.output_tokens",
|
|
1337
|
+
event.response.usage.outputTokens
|
|
1338
|
+
);
|
|
1339
|
+
this.iterationSpan.setAttribute("provider.stop_reason", event.response.stopReason);
|
|
1340
|
+
this.iterationSpan.setAttribute("provider.duration_ms", event.durationMs);
|
|
1341
|
+
}
|
|
1342
|
+
break;
|
|
1343
|
+
}
|
|
1344
|
+
case "tool:call": {
|
|
1345
|
+
const span = this.tracer.startSpan(
|
|
1346
|
+
{
|
|
1347
|
+
name: `tool.${event.name}`,
|
|
1348
|
+
kind: "internal",
|
|
1349
|
+
attributes: {
|
|
1350
|
+
"tool.name": event.name,
|
|
1351
|
+
"tool.input": JSON.stringify(event.input)
|
|
1352
|
+
}
|
|
1353
|
+
},
|
|
1354
|
+
this.iterationSpan?.context ?? this.traceSpan?.context
|
|
1355
|
+
);
|
|
1356
|
+
this.toolSpans.set(event.id, span);
|
|
1357
|
+
break;
|
|
1358
|
+
}
|
|
1359
|
+
case "tool:result": {
|
|
1360
|
+
const span = this.toolSpans.get(event.id);
|
|
1361
|
+
if (span) {
|
|
1362
|
+
span.setAttribute("tool.is_error", event.result.isError ?? false);
|
|
1363
|
+
span.setAttribute("tool.duration_ms", event.durationMs);
|
|
1364
|
+
if (event.result.isError) {
|
|
1365
|
+
span.setStatus("error", event.result.output);
|
|
1366
|
+
} else {
|
|
1367
|
+
span.setStatus("ok");
|
|
1368
|
+
}
|
|
1369
|
+
span.end();
|
|
1370
|
+
this.toolSpans.delete(event.id);
|
|
1371
|
+
}
|
|
1372
|
+
break;
|
|
1373
|
+
}
|
|
1374
|
+
case "agent:end": {
|
|
1375
|
+
this.iterationSpan?.end();
|
|
1376
|
+
this.iterationSpan = null;
|
|
1377
|
+
if (this.traceSpan) {
|
|
1378
|
+
this.traceSpan.setAttribute("husk.iterations", event.iterations);
|
|
1379
|
+
this.traceSpan.setAttribute("husk.duration_ms", event.durationMs);
|
|
1380
|
+
this.traceSpan.setStatus("ok");
|
|
1381
|
+
this.traceSpan.end();
|
|
1382
|
+
this.traceSpan = null;
|
|
1383
|
+
}
|
|
1384
|
+
break;
|
|
1385
|
+
}
|
|
1386
|
+
case "agent:error": {
|
|
1387
|
+
if (this.traceSpan) {
|
|
1388
|
+
this.traceSpan.recordException(event.error);
|
|
1389
|
+
this.traceSpan.setStatus("error", event.error.message);
|
|
1390
|
+
this.traceSpan.end();
|
|
1391
|
+
this.traceSpan = null;
|
|
1392
|
+
}
|
|
1393
|
+
this.iterationSpan?.end();
|
|
1394
|
+
this.iterationSpan = null;
|
|
1395
|
+
for (const span of this.toolSpans.values()) {
|
|
1396
|
+
span.end();
|
|
1397
|
+
}
|
|
1398
|
+
this.toolSpans.clear();
|
|
1399
|
+
break;
|
|
1400
|
+
}
|
|
1401
|
+
case "agent:message": {
|
|
1402
|
+
this.iterationSpan?.addEvent("message", {
|
|
1403
|
+
"message.role": event.message.role
|
|
1404
|
+
});
|
|
1405
|
+
break;
|
|
1406
|
+
}
|
|
1407
|
+
}
|
|
1408
|
+
};
|
|
1409
|
+
};
|
|
1410
|
+
|
|
1106
1411
|
// src/index.ts
|
|
1107
1412
|
var VERSION = "0.1.0";
|
|
1108
1413
|
|
|
1109
|
-
export { Agent, AgentEventEmitter, AnthropicProvider, Bash, ConsoleLogger, Edit, FileStore, Grep, InMemoryStore, OpenAIProvider, Read, VERSION, Write, arrayField, booleanField, buildExampleMessages, buildSystemPrompt, defineTool, integerField, logEventsTo, numberField, objectField, objectSchema, stringField };
|
|
1414
|
+
export { Agent, AgentEventEmitter, AnthropicProvider, Bash, ConsoleLogger, Edit, EventTracer, FileStore, Grep, InMemoryStore, NoopTracer, OllamaProvider, OpenAIProvider, Read, VERSION, Write, arrayField, booleanField, buildExampleMessages, buildSystemPrompt, contains, defineSuite, defineTool, equals, fn, integerField, lengthBetween, logEventsTo, matches, notContains, numberField, objectField, objectSchema, runSuite, stringField };
|
|
1110
1415
|
//# sourceMappingURL=index.js.map
|
|
1111
1416
|
//# sourceMappingURL=index.js.map
|