@grekt/cli 6.42.1 → 6.43.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +571 -8
  2. package/package.json +2 -2
package/dist/index.js CHANGED
@@ -69100,25 +69100,43 @@ var __getProtoOf2 = Object.getPrototypeOf;
69100
69100
  var __defProp2 = Object.defineProperty;
69101
69101
  var __getOwnPropNames2 = Object.getOwnPropertyNames;
69102
69102
  var __hasOwnProp2 = Object.prototype.hasOwnProperty;
69103
+ function __accessProp2(key) {
69104
+ return this[key];
69105
+ }
69106
+ var __toESMCache_node2;
69107
+ var __toESMCache_esm2;
69103
69108
  var __toESM2 = (mod, isNodeMode, target) => {
69109
+ var canCache = mod != null && typeof mod === "object";
69110
+ if (canCache) {
69111
+ var cache2 = isNodeMode ? __toESMCache_node2 ??= new WeakMap : __toESMCache_esm2 ??= new WeakMap;
69112
+ var cached = cache2.get(mod);
69113
+ if (cached)
69114
+ return cached;
69115
+ }
69104
69116
  target = mod != null ? __create2(__getProtoOf2(mod)) : {};
69105
69117
  const to = isNodeMode || !mod || !mod.__esModule ? __defProp2(target, "default", { value: mod, enumerable: true }) : target;
69106
69118
  for (let key of __getOwnPropNames2(mod))
69107
69119
  if (!__hasOwnProp2.call(to, key))
69108
69120
  __defProp2(to, key, {
69109
- get: () => mod[key],
69121
+ get: __accessProp2.bind(mod, key),
69110
69122
  enumerable: true
69111
69123
  });
69124
+ if (canCache)
69125
+ cache2.set(mod, to);
69112
69126
  return to;
69113
69127
  };
69114
69128
  var __commonJS2 = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
69129
+ var __returnValue2 = (v) => v;
69130
+ function __exportSetter2(name2, newValue) {
69131
+ this[name2] = __returnValue2.bind(null, newValue);
69132
+ }
69115
69133
  var __export2 = (target, all) => {
69116
69134
  for (var name2 in all)
69117
69135
  __defProp2(target, name2, {
69118
69136
  get: all[name2],
69119
69137
  enumerable: true,
69120
69138
  configurable: true,
69121
- set: (newValue) => all[name2] = () => newValue
69139
+ set: __exportSetter2.bind(all, name2)
69122
69140
  });
69123
69141
  };
69124
69142
  var __require2 = /* @__PURE__ */ createRequire2(import.meta.url);
@@ -85174,10 +85192,15 @@ var StoredSessionSchema = exports_external.object({
85174
85192
  expires_at: exports_external.number().optional()
85175
85193
  });
85176
85194
  var TokensSchema = exports_external.record(exports_external.string(), exports_external.string());
85195
+ var EvalLocalConfigSchema = exports_external.object({
85196
+ providers: exports_external.array(exports_external.string()).optional(),
85197
+ server: exports_external.string().optional()
85198
+ });
85177
85199
  var LocalConfigSchema = exports_external.object({
85178
85200
  registries: exports_external.record(exports_external.string().regex(/^@/, "Registry scope must start with @"), RegistryEntrySchema).optional(),
85179
85201
  session: StoredSessionSchema.optional(),
85180
- tokens: TokensSchema.optional()
85202
+ tokens: TokensSchema.optional(),
85203
+ eval: EvalLocalConfigSchema.optional()
85181
85204
  });
85182
85205
  var ComponentTypeSchema = exports_external.enum(CATEGORIES);
85183
85206
  var WorkspaceConfigSchema = exports_external.object({
@@ -90151,6 +90174,169 @@ function verifyTrustSignature(artifactId, signature, key) {
90151
90174
  return false;
90152
90175
  return timingSafeEqual(signatureBuffer, expectedBuffer);
90153
90176
  }
90177
+ var EVALUABLE_CATEGORIES = ["agents", "skills", "commands"];
90178
+ var EvalAssertionSchema = exports_external.object({
90179
+ type: exports_external.string(),
90180
+ value: exports_external.union([exports_external.string(), exports_external.array(exports_external.string())]).optional(),
90181
+ threshold: exports_external.number().optional(),
90182
+ weight: exports_external.number().optional()
90183
+ });
90184
+ var EvalTestCaseSchema = exports_external.object({
90185
+ description: exports_external.string().optional(),
90186
+ vars: exports_external.record(exports_external.string(), exports_external.string()),
90187
+ assert: exports_external.array(EvalAssertionSchema).min(1, "At least one assertion is required")
90188
+ });
90189
+ var EvalFileConfigSchema = exports_external.object({
90190
+ provider: exports_external.string().optional(),
90191
+ tests: exports_external.array(EvalTestCaseSchema).min(1, "At least one test case is required")
90192
+ });
90193
+ var EVAL_EXTENSION = ".eval.yaml";
90194
+ function collectFiles2(fs12, dir, basePath = "") {
90195
+ const paths = [];
90196
+ let entries;
90197
+ try {
90198
+ entries = fs12.readdir(dir);
90199
+ } catch {
90200
+ return paths;
90201
+ }
90202
+ for (const entry of entries) {
90203
+ const fullPath = `${dir}/${entry}`;
90204
+ const relativePath = basePath ? `${basePath}/${entry}` : entry;
90205
+ try {
90206
+ const stat = fs12.stat(fullPath);
90207
+ if (stat.isDirectory) {
90208
+ paths.push(...collectFiles2(fs12, fullPath, relativePath));
90209
+ } else {
90210
+ paths.push(relativePath);
90211
+ }
90212
+ } catch {}
90213
+ }
90214
+ return paths;
90215
+ }
90216
+ function discoverEvals(fs12, options2) {
90217
+ const { artifactDir, artifactId, filter } = options2;
90218
+ const evals = [];
90219
+ const warnings = [];
90220
+ const allFiles = collectFiles2(fs12, artifactDir);
90221
+ const evalFiles = allFiles.filter((f) => f.endsWith(EVAL_EXTENSION));
90222
+ for (const evalRelativePath of evalFiles) {
90223
+ const evalFullPath = `${artifactDir}/${evalRelativePath}`;
90224
+ const baseName = evalRelativePath.slice(0, -EVAL_EXTENSION.length);
90225
+ const mdRelativePath = `${baseName}.md`;
90226
+ const mdFullPath = `${artifactDir}/${mdRelativePath}`;
90227
+ if (!fs12.exists(mdFullPath)) {
90228
+ warnings.push({
90229
+ evalFilePath: evalRelativePath,
90230
+ message: `Skipped ${evalRelativePath}: no matching .md file found (expected ${mdRelativePath})`
90231
+ });
90232
+ continue;
90233
+ }
90234
+ let mdContent;
90235
+ try {
90236
+ mdContent = fs12.readFile(mdFullPath);
90237
+ } catch {
90238
+ warnings.push({
90239
+ evalFilePath: evalRelativePath,
90240
+ message: `Skipped ${evalRelativePath}: could not read ${mdRelativePath}`
90241
+ });
90242
+ continue;
90243
+ }
90244
+ const frontmatterResult = parseFrontmatter(mdContent);
90245
+ if (!frontmatterResult.success) {
90246
+ warnings.push({
90247
+ evalFilePath: evalRelativePath,
90248
+ message: `Skipped ${evalRelativePath}: ${mdRelativePath} has invalid or missing frontmatter`
90249
+ });
90250
+ continue;
90251
+ }
90252
+ const { frontmatter: frontmatter2, content: systemPrompt } = frontmatterResult.parsed;
90253
+ const elementType = frontmatter2["grk-type"];
90254
+ const elementName = frontmatter2["grk-name"];
90255
+ if (!EVALUABLE_CATEGORIES.includes(elementType)) {
90256
+ warnings.push({
90257
+ evalFilePath: evalRelativePath,
90258
+ message: `Skipped ${evalRelativePath}: ${elementType} is not evaluable (only agents, skills, commands)`
90259
+ });
90260
+ continue;
90261
+ }
90262
+ let evalYamlContent;
90263
+ try {
90264
+ evalYamlContent = fs12.readFile(evalFullPath);
90265
+ } catch {
90266
+ warnings.push({
90267
+ evalFilePath: evalRelativePath,
90268
+ message: `Skipped ${evalRelativePath}: file could not be read`
90269
+ });
90270
+ continue;
90271
+ }
90272
+ const parseResult = safeParseYaml(evalYamlContent, EvalFileConfigSchema, evalRelativePath);
90273
+ if (!parseResult.success) {
90274
+ const details = parseResult.error.details?.join(", ") ?? "";
90275
+ warnings.push({
90276
+ evalFilePath: evalRelativePath,
90277
+ message: `Skipped ${evalRelativePath}: ${parseResult.error.message}${details ? ` (${details})` : ""}`
90278
+ });
90279
+ continue;
90280
+ }
90281
+ const evalConfig = parseResult.data;
90282
+ if (filter?.elementName && elementName !== filter.elementName)
90283
+ continue;
90284
+ if (filter?.elementType && elementType !== filter.elementType)
90285
+ continue;
90286
+ evals.push({
90287
+ artifactId,
90288
+ elementName,
90289
+ elementType,
90290
+ elementPath: mdRelativePath,
90291
+ systemPrompt: systemPrompt.trim(),
90292
+ evalConfig,
90293
+ evalFilePath: evalRelativePath
90294
+ });
90295
+ }
90296
+ return { evals, warnings };
90297
+ }
90298
+ function calculateScore(passed, total) {
90299
+ if (total === 0)
90300
+ return 0;
90301
+ return Math.round(passed / total * 100);
90302
+ }
90303
+ function scoreToGrade(score) {
90304
+ if (score >= 95)
90305
+ return "A";
90306
+ if (score >= 80)
90307
+ return "B";
90308
+ if (score >= 65)
90309
+ return "C";
90310
+ if (score >= 50)
90311
+ return "D";
90312
+ return "F";
90313
+ }
90314
+ function summarizeResults(results) {
90315
+ if (results.length === 0) {
90316
+ return {
90317
+ results: [],
90318
+ overallScore: 0,
90319
+ overallGrade: "F",
90320
+ totalPassed: 0,
90321
+ totalTests: 0,
90322
+ totalIssues: 0
90323
+ };
90324
+ }
90325
+ const totalPassed = results.reduce((sum, r) => sum + r.passed, 0);
90326
+ const totalTests = results.reduce((sum, r) => sum + r.total, 0);
90327
+ const totalIssues = results.reduce((sum, r) => sum + r.failures.length, 0);
90328
+ const weightedSum = results.reduce((sum, r) => sum + r.score * r.total, 0);
90329
+ const overallScore = totalTests > 0 ? Math.round(weightedSum / totalTests) : 0;
90330
+ const overallGrade = scoreToGrade(overallScore);
90331
+ return {
90332
+ results,
90333
+ overallScore,
90334
+ overallGrade,
90335
+ totalPassed,
90336
+ totalTests,
90337
+ totalIssues
90338
+ };
90339
+ }
90154
90340
 
90155
90341
  // src/constants.ts
90156
90342
  var REGISTRY_HOST2 = "registry.grekt.com";
@@ -104184,6 +104370,382 @@ var untrustCommand = new Command("untrust").description("Remove trusted status f
104184
104370
  success(`Removed trusted status from ${colors5.highlight(artifactId)}`);
104185
104371
  });
104186
104372
 
104373
+ // src/commands/eval.ts
104374
+ import { join as join35 } from "path";
104375
+
104376
+ // src/eval/promptfoo-engine.ts
104377
+ function detectPromptfoo() {
104378
+ try {
104379
+ const result = Bun.spawnSync(["promptfoo", "--version"], { stdout: "pipe", stderr: "pipe" });
104380
+ if (result.exitCode === 0)
104381
+ return "global";
104382
+ } catch {}
104383
+ try {
104384
+ const result = Bun.spawnSync(["npx", "promptfoo", "--version"], { stdout: "pipe", stderr: "pipe" });
104385
+ if (result.exitCode === 0)
104386
+ return "npx";
104387
+ } catch {}
104388
+ return "none";
104389
+ }
104390
+ function getCommand(mode) {
104391
+ return mode === "npx" ? ["npx", "promptfoo"] : ["promptfoo"];
104392
+ }
104393
+ function assemblePromptfooConfig(config) {
104394
+ return {
104395
+ prompts: [
104396
+ {
104397
+ raw: JSON.stringify([
104398
+ { role: "system", content: config.systemPrompt },
104399
+ { role: "user", content: "{{input}}" }
104400
+ ])
104401
+ }
104402
+ ],
104403
+ providers: [config.provider],
104404
+ tests: config.tests.map((test) => ({
104405
+ description: test.description,
104406
+ vars: test.vars,
104407
+ assert: test.assert.map((a) => ({
104408
+ type: a.type,
104409
+ ...a.value !== undefined && { value: a.value },
104410
+ ...a.threshold !== undefined && { threshold: a.threshold },
104411
+ ...a.weight !== undefined && { weight: a.weight }
104412
+ }))
104413
+ }))
104414
+ };
104415
+ }
104416
+ function extractFailures(promptfooResults) {
104417
+ let passed = 0;
104418
+ let total = 0;
104419
+ const failures = [];
104420
+ for (const result of promptfooResults) {
104421
+ const r = result;
104422
+ const success2 = r.success;
104423
+ total++;
104424
+ if (success2) {
104425
+ passed++;
104426
+ } else {
104427
+ const gradingResult = r.gradingResult;
104428
+ const componentResults = gradingResult?.componentResults ?? [];
104429
+ for (const component of componentResults) {
104430
+ if (!component.pass) {
104431
+ const assertion = component.assertion;
104432
+ failures.push({
104433
+ testDescription: r.description ?? `Test ${total}`,
104434
+ assertionType: assertion?.type ?? "unknown",
104435
+ expected: String(assertion?.value ?? ""),
104436
+ actual: String(component.reason ?? "")
104437
+ });
104438
+ }
104439
+ }
104440
+ if (componentResults.length === 0) {
104441
+ failures.push({
104442
+ testDescription: r.description ?? `Test ${total}`,
104443
+ assertionType: "unknown",
104444
+ expected: "",
104445
+ actual: String(gradingResult?.reason ?? "unknown error")
104446
+ });
104447
+ }
104448
+ }
104449
+ }
104450
+ return { passed, total, failures };
104451
+ }
104452
+ function createPromptfooEngine() {
104453
+ let mode = detectPromptfoo();
104454
+ return {
104455
+ name: "promptfoo",
104456
+ isAvailable() {
104457
+ return mode !== "none";
104458
+ },
104459
+ async ensureAvailable() {
104460
+ if (mode !== "none")
104461
+ return true;
104462
+ const spin = spinner("Downloading promptfoo via npx...");
104463
+ spin.start();
104464
+ Bun.spawnSync(["npx", "promptfoo@latest", "--version"], { stdout: "pipe", stderr: "pipe" });
104465
+ spin.stop();
104466
+ mode = detectPromptfoo();
104467
+ return mode !== "none";
104468
+ },
104469
+ async run(config) {
104470
+ const promptfooConfig = assemblePromptfooConfig(config);
104471
+ if (mode === "global") {
104472
+ try {
104473
+ const promptfoo = await import("promptfoo");
104474
+ const evaluate = promptfoo.evaluate;
104475
+ if (typeof evaluate === "function") {
104476
+ const evaluateResult = await evaluate(promptfooConfig);
104477
+ const results = evaluateResult.results ?? [];
104478
+ return extractFailures(results);
104479
+ }
104480
+ } catch {}
104481
+ }
104482
+ return runViaCli(mode, promptfooConfig);
104483
+ },
104484
+ openReport() {
104485
+ const cmd = getCommand(mode);
104486
+ Bun.spawnSync([...cmd, "view"], { stdout: "inherit", stderr: "inherit" });
104487
+ }
104488
+ };
104489
+ }
104490
+ async function runViaCli(mode, promptfooConfig) {
104491
+ const tempDir = `${process.env.TMPDIR ?? "/tmp"}/grekt-eval-${Date.now()}`;
104492
+ const configPath = `${tempDir}/promptfoo-config.json`;
104493
+ const outputPath = `${tempDir}/output.json`;
104494
+ const { mkdirSync: mkdirSync3, writeFileSync: writeFileSync2, readFileSync: readFileSync2, rmSync: rmSync2 } = await import("fs");
104495
+ mkdirSync3(tempDir, { recursive: true });
104496
+ try {
104497
+ writeFileSync2(configPath, JSON.stringify(promptfooConfig, null, 2));
104498
+ const cmd = getCommand(mode);
104499
+ const result = Bun.spawnSync([...cmd, "eval", "--config", configPath, "--output", outputPath, "--no-cache"], { stdout: "pipe", stderr: "pipe" });
104500
+ if (result.exitCode !== 0) {
104501
+ const stderr = result.stderr.toString();
104502
+ throw new Error(`promptfoo eval failed: ${stderr}`);
104503
+ }
104504
+ const output = JSON.parse(readFileSync2(outputPath, "utf-8"));
104505
+ const results = output.results ?? [];
104506
+ return extractFailures(results);
104507
+ } finally {
104508
+ rmSync2(tempDir, { recursive: true, force: true });
104509
+ }
104510
+ }
104511
+
104512
+ // src/eval/engine-resolver.ts
104513
+ var AVAILABLE_ENGINES = [
104514
+ {
104515
+ name: "promptfoo",
104516
+ description: "Open source LLM eval framework. Runs via npx if not installed",
104517
+ create: createPromptfooEngine
104518
+ }
104519
+ ];
104520
+ async function resolveEvalEngine() {
104521
+ for (const entry2 of AVAILABLE_ENGINES) {
104522
+ const engine3 = entry2.create();
104523
+ if (engine3.isAvailable())
104524
+ return engine3;
104525
+ }
104526
+ newline();
104527
+ info("No eval engine detected");
104528
+ newline();
104529
+ const selected = await esm_default6({
104530
+ message: "Select an eval engine to use:",
104531
+ choices: AVAILABLE_ENGINES.map((entry2) => ({
104532
+ name: `${entry2.name} - ${entry2.description}`,
104533
+ value: entry2.name
104534
+ }))
104535
+ });
104536
+ const entry = AVAILABLE_ENGINES.find((e) => e.name === selected);
104537
+ if (!entry)
104538
+ return null;
104539
+ const engine2 = entry.create();
104540
+ const ready = await engine2.ensureAvailable();
104541
+ if (!ready) {
104542
+ newline();
104543
+ error(`Failed to set up ${entry.name}`);
104544
+ newline();
104545
+ log(" Install it manually with one of:");
104546
+ log(` ${colors5.dim("$")} npm install -g ${entry.name}`);
104547
+ log(` ${colors5.dim("$")} brew install ${entry.name}`);
104548
+ return null;
104549
+ }
104550
+ return engine2;
104551
+ }
104552
+
104553
+ // src/eval/runner.ts
104554
+ async function runEval(discovered, options2) {
104555
+ const provider = discovered.evalConfig.provider ?? options2.defaultProvider;
104556
+ const result = await options2.engine.run({
104557
+ systemPrompt: discovered.systemPrompt,
104558
+ tests: discovered.evalConfig.tests,
104559
+ provider
104560
+ });
104561
+ const score = calculateScore(result.passed, result.total);
104562
+ const grade = scoreToGrade(score);
104563
+ return {
104564
+ artifactId: discovered.artifactId,
104565
+ elementName: discovered.elementName,
104566
+ elementType: discovered.elementType,
104567
+ passed: result.passed,
104568
+ total: result.total,
104569
+ score,
104570
+ grade,
104571
+ failures: result.failures
104572
+ };
104573
+ }
104574
+ async function runAllEvals(discovered, options2) {
104575
+ const results = [];
104576
+ for (const [index, disc] of discovered.entries()) {
104577
+ options2.onProgress?.(index, discovered.length, `${disc.elementType}/${disc.elementName}`);
104578
+ const result = await runEval(disc, options2);
104579
+ results.push(result);
104580
+ }
104581
+ options2.onProgress?.(discovered.length, discovered.length, "done");
104582
+ return results;
104583
+ }
104584
+
104585
+ // src/eval/display.ts
104586
+ function gradeColor(grade) {
104587
+ switch (grade) {
104588
+ case "A":
104589
+ case "B":
104590
+ return colors5.success;
104591
+ case "C":
104592
+ case "D":
104593
+ return colors5.warning;
104594
+ case "F":
104595
+ default:
104596
+ return colors5.error;
104597
+ }
104598
+ }
104599
+ function formatDots(name2, maxWidth) {
104600
+ const dotsNeeded = maxWidth - name2.length;
104601
+ const dots = dotsNeeded > 2 ? " " + ".".repeat(dotsNeeded - 2) + " " : " ";
104602
+ return colors5.dim(dots);
104603
+ }
104604
+ function displaySummary(summary) {
104605
+ const byArtifact = new Map;
104606
+ for (const result of summary.results) {
104607
+ const existing = byArtifact.get(result.artifactId) ?? [];
104608
+ existing.push(result);
104609
+ byArtifact.set(result.artifactId, existing);
104610
+ }
104611
+ const maxNameWidth = Math.max(...summary.results.map((r) => `${r.elementType}/${r.elementName}`.length), 20);
104612
+ for (const [artifactId, results] of byArtifact) {
104613
+ log(colors5.bold(artifactId));
104614
+ for (const result of results) {
104615
+ const elementPath = `${result.elementType}/${result.elementName}`;
104616
+ const dots = formatDots(elementPath, maxNameWidth);
104617
+ const passText = `${result.passed}/${result.total} passed`;
104618
+ const gradeText = gradeColor(result.grade)(result.grade);
104619
+ log(` ${elementPath}${dots}${passText} ${gradeText}`);
104620
+ }
104621
+ newline();
104622
+ }
104623
+ const overallColor = gradeColor(summary.overallGrade);
104624
+ log(`Overall: ${overallColor(summary.overallGrade)} (${summary.overallScore}/100)`);
104625
+ if (summary.totalIssues > 0) {
104626
+ log(`${summary.totalIssues} issue${summary.totalIssues === 1 ? "" : "s"} found`);
104627
+ log(`Run ${colors5.highlight("grekt eval --details")} for more info`);
104628
+ }
104629
+ }
104630
+ function displayDetails(summary) {
104631
+ displaySummary(summary);
104632
+ const failingResults = summary.results.filter((r) => r.failures.length > 0);
104633
+ if (failingResults.length === 0)
104634
+ return;
104635
+ newline();
104636
+ log(colors5.bold("Failures:"));
104637
+ newline();
104638
+ for (const result of failingResults) {
104639
+ log(` ${colors5.bold(`${result.elementType}/${result.elementName}`)} (${result.artifactId})`);
104640
+ for (const failure of result.failures) {
104641
+ log(` ${symbols.error} ${failure.testDescription}`);
104642
+ log(` ${colors5.dim("assertion:")} ${failure.assertionType}`);
104643
+ if (failure.expected) {
104644
+ log(` ${colors5.dim("expected:")} ${failure.expected}`);
104645
+ }
104646
+ if (failure.actual) {
104647
+ log(` ${colors5.dim("actual:")} ${failure.actual}`);
104648
+ }
104649
+ }
104650
+ newline();
104651
+ }
104652
+ }
104653
+ function displayJson(summary) {
104654
+ log(JSON.stringify(summary, null, 2));
104655
+ }
104656
+
104657
+ // src/commands/eval.ts
104658
+ function buildFilter(options2) {
104659
+ if (options2.skill)
104660
+ return { elementName: options2.skill, elementType: "skills" };
104661
+ if (options2.agent)
104662
+ return { elementName: options2.agent, elementType: "agents" };
104663
+ if (options2.command)
104664
+ return { elementName: options2.command, elementType: "commands" };
104665
+ return;
104666
+ }
104667
+ function requireProvider(projectRoot) {
104668
+ const localConfig = getLocalConfig(projectRoot);
104669
+ const providers = localConfig?.eval?.providers;
104670
+ const firstProvider = providers?.[0];
104671
+ if (firstProvider) {
104672
+ return firstProvider;
104673
+ }
104674
+ error("No eval provider configured");
104675
+ newline();
104676
+ log(" Add a provider to .grekt/config.yaml:");
104677
+ newline();
104678
+ log(` ${colors5.dim("eval:")}`);
104679
+ log(` ${colors5.dim(" providers:")}`);
104680
+ log(` ${colors5.dim(" - openai:gpt-4.1-mini")}`);
104681
+ newline();
104682
+ return process.exit(1);
104683
+ }
104684
+ var evalCommand = new Command("eval").description("Run eval tests against artifact elements (skills, agents, commands)").option("--artifact <name>", "Run evals for a specific artifact only").option("--skill <name>", "Run evals for a specific skill only").option("--agent <name>", "Run evals for a specific agent only").option("--command <name>", "Run evals for a specific command only").option("--details", "Show failure details").option("--report", "Open eval dashboard in browser").option("--format <format>", "Output format: text (default), json").action(async (options2) => {
104685
+ const projectRoot = process.cwd();
104686
+ requireInitialized(projectRoot);
104687
+ const engine2 = await resolveEvalEngine();
104688
+ if (!engine2)
104689
+ process.exit(1);
104690
+ if (options2.report) {
104691
+ engine2.openReport?.();
104692
+ return;
104693
+ }
104694
+ const defaultProvider = requireProvider(projectRoot);
104695
+ const lockfile = getLockfile2(projectRoot);
104696
+ const artifactIds = Object.keys(lockfile.artifacts);
104697
+ if (artifactIds.length === 0) {
104698
+ info("No artifacts installed");
104699
+ process.exit(0);
104700
+ }
104701
+ const filter = buildFilter(options2);
104702
+ const allDiscovered = [];
104703
+ const allWarnings = [];
104704
+ for (const artifactId of artifactIds) {
104705
+ if (options2.artifact && artifactId !== options2.artifact)
104706
+ continue;
104707
+ const artifactDir = join35(projectRoot, ARTIFACTS_DIR, artifactId);
104708
+ const result = discoverEvals(fs, { artifactDir, artifactId, filter });
104709
+ allDiscovered.push(...result.evals);
104710
+ allWarnings.push(...result.warnings);
104711
+ }
104712
+ for (const w of allWarnings) {
104713
+ warning(w.message);
104714
+ }
104715
+ if (allDiscovered.length === 0) {
104716
+ info("No eval files found");
104717
+ if (!filter && !options2.artifact) {
104718
+ info("Create a .eval.yaml file next to any skill, agent, or command");
104719
+ }
104720
+ process.exit(0);
104721
+ }
104722
+ const spin = spinner("Running evals...");
104723
+ spin.start();
104724
+ const results = await runAllEvals(allDiscovered, {
104725
+ engine: engine2,
104726
+ defaultProvider,
104727
+ onProgress(completed, total, current) {
104728
+ if (current === "done") {
104729
+ spin.stop();
104730
+ } else {
104731
+ spin.text = `Running evals... (${completed + 1}/${total}) ${current}`;
104732
+ }
104733
+ }
104734
+ });
104735
+ const summary = summarizeResults(results);
104736
+ newline();
104737
+ if (options2.format === "json") {
104738
+ displayJson(summary);
104739
+ } else if (options2.details) {
104740
+ displayDetails(summary);
104741
+ } else {
104742
+ displaySummary(summary);
104743
+ }
104744
+ if (summary.totalIssues > 0) {
104745
+ process.exit(1);
104746
+ }
104747
+ });
104748
+
104187
104749
  // src/auth/oauth/oauth.ts
104188
104750
  import { spawn } from "child_process";
104189
104751
  import { randomUUID as randomUUID4 } from "crypto";
@@ -104409,7 +104971,7 @@ var whoamiCommand = new Command("whoami").description("Show current user").actio
104409
104971
  // package.json
104410
104972
  var package_default = {
104411
104973
  name: "@grekt/cli",
104412
- version: "6.42.1",
104974
+ version: "6.43.0-beta.2",
104413
104975
  description: "AI tools versioned, synced, and shared across tools and teams",
104414
104976
  type: "module",
104415
104977
  bin: {
@@ -104444,7 +105006,7 @@ var package_default = {
104444
105006
  },
104445
105007
  dependencies: {
104446
105008
  "@aws-sdk/client-s3": "^3.971.0",
104447
- "@grekt/engine": "6.1.1",
105009
+ "@grekt/engine": "6.2.0-beta.1",
104448
105010
  "@inquirer/prompts": "^7.2.0",
104449
105011
  "@supabase/supabase-js": "^2.91.0",
104450
105012
  chalk: "^5.4.1",
@@ -104478,13 +105040,13 @@ var package_default = {
104478
105040
  // src/update-check/update-check.ts
104479
105041
  import { existsSync as existsSync2, mkdirSync as mkdirSync3, readFileSync as readFileSync2, writeFileSync as writeFileSync2 } from "fs";
104480
105042
  import { homedir as homedir3 } from "os";
104481
- import { join as join35 } from "path";
105043
+ import { join as join36 } from "path";
104482
105044
  var CACHE_FILENAME = ".update-check";
104483
105045
  var STALENESS_MS = 24 * 60 * 60 * 1000;
104484
105046
  var FETCH_TIMEOUT_MS = 1500;
104485
105047
  var GITHUB_RELEASES_URL = "https://api.github.com/repos/grekt-labs/cli/releases/latest";
104486
105048
  function getCachePath() {
104487
- return join35(homedir3(), ".grekt", CACHE_FILENAME);
105049
+ return join36(homedir3(), ".grekt", CACHE_FILENAME);
104488
105050
  }
104489
105051
  function isOptedOut() {
104490
105052
  return process.env.GREKT_NO_UPDATE_CHECK === "1";
@@ -104503,7 +105065,7 @@ function readCache() {
104503
105065
  }
104504
105066
  function writeCache(cache2) {
104505
105067
  try {
104506
- const dir = join35(homedir3(), ".grekt");
105068
+ const dir = join36(homedir3(), ".grekt");
104507
105069
  if (!existsSync2(dir)) {
104508
105070
  mkdirSync3(dir, { recursive: true });
104509
105071
  }
@@ -104617,6 +105179,7 @@ program2.addCommand(worktreeCommand);
104617
105179
  program2.addCommand(scanCommand);
104618
105180
  program2.addCommand(trustCommand);
104619
105181
  program2.addCommand(untrustCommand);
105182
+ program2.addCommand(evalCommand);
104620
105183
  setupUpdateCheck(package_default.version);
104621
105184
  try {
104622
105185
  await program2.parseAsync();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@grekt/cli",
3
- "version": "6.42.1",
3
+ "version": "6.43.0-beta.2",
4
4
  "description": "AI tools versioned, synced, and shared across tools and teams",
5
5
  "type": "module",
6
6
  "bin": {
@@ -35,7 +35,7 @@
35
35
  },
36
36
  "dependencies": {
37
37
  "@aws-sdk/client-s3": "^3.971.0",
38
- "@grekt/engine": "6.1.1",
38
+ "@grekt/engine": "6.2.0-beta.1",
39
39
  "@inquirer/prompts": "^7.2.0",
40
40
  "@supabase/supabase-js": "^2.91.0",
41
41
  "chalk": "^5.4.1",