@grekt/cli 6.42.0 → 6.43.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +576 -8
  2. package/package.json +2 -2
package/dist/index.js CHANGED
@@ -69100,25 +69100,43 @@ var __getProtoOf2 = Object.getPrototypeOf;
69100
69100
  var __defProp2 = Object.defineProperty;
69101
69101
  var __getOwnPropNames2 = Object.getOwnPropertyNames;
69102
69102
  var __hasOwnProp2 = Object.prototype.hasOwnProperty;
69103
+ function __accessProp2(key) {
69104
+ return this[key];
69105
+ }
69106
+ var __toESMCache_node2;
69107
+ var __toESMCache_esm2;
69103
69108
  var __toESM2 = (mod, isNodeMode, target) => {
69109
+ var canCache = mod != null && typeof mod === "object";
69110
+ if (canCache) {
69111
+ var cache2 = isNodeMode ? __toESMCache_node2 ??= new WeakMap : __toESMCache_esm2 ??= new WeakMap;
69112
+ var cached = cache2.get(mod);
69113
+ if (cached)
69114
+ return cached;
69115
+ }
69104
69116
  target = mod != null ? __create2(__getProtoOf2(mod)) : {};
69105
69117
  const to = isNodeMode || !mod || !mod.__esModule ? __defProp2(target, "default", { value: mod, enumerable: true }) : target;
69106
69118
  for (let key of __getOwnPropNames2(mod))
69107
69119
  if (!__hasOwnProp2.call(to, key))
69108
69120
  __defProp2(to, key, {
69109
- get: () => mod[key],
69121
+ get: __accessProp2.bind(mod, key),
69110
69122
  enumerable: true
69111
69123
  });
69124
+ if (canCache)
69125
+ cache2.set(mod, to);
69112
69126
  return to;
69113
69127
  };
69114
69128
  var __commonJS2 = (cb, mod) => () => (mod || cb((mod = { exports: {} }).exports, mod), mod.exports);
69129
+ var __returnValue2 = (v) => v;
69130
+ function __exportSetter2(name2, newValue) {
69131
+ this[name2] = __returnValue2.bind(null, newValue);
69132
+ }
69115
69133
  var __export2 = (target, all) => {
69116
69134
  for (var name2 in all)
69117
69135
  __defProp2(target, name2, {
69118
69136
  get: all[name2],
69119
69137
  enumerable: true,
69120
69138
  configurable: true,
69121
- set: (newValue) => all[name2] = () => newValue
69139
+ set: __exportSetter2.bind(all, name2)
69122
69140
  });
69123
69141
  };
69124
69142
  var __require2 = /* @__PURE__ */ createRequire2(import.meta.url);
@@ -85174,10 +85192,15 @@ var StoredSessionSchema = exports_external.object({
85174
85192
  expires_at: exports_external.number().optional()
85175
85193
  });
85176
85194
  var TokensSchema = exports_external.record(exports_external.string(), exports_external.string());
85195
+ var EvalLocalConfigSchema = exports_external.object({
85196
+ providers: exports_external.array(exports_external.string()).optional(),
85197
+ server: exports_external.string().optional()
85198
+ });
85177
85199
  var LocalConfigSchema = exports_external.object({
85178
85200
  registries: exports_external.record(exports_external.string().regex(/^@/, "Registry scope must start with @"), RegistryEntrySchema).optional(),
85179
85201
  session: StoredSessionSchema.optional(),
85180
- tokens: TokensSchema.optional()
85202
+ tokens: TokensSchema.optional(),
85203
+ eval: EvalLocalConfigSchema.optional()
85181
85204
  });
85182
85205
  var ComponentTypeSchema = exports_external.enum(CATEGORIES);
85183
85206
  var WorkspaceConfigSchema = exports_external.object({
@@ -90151,6 +90174,169 @@ function verifyTrustSignature(artifactId, signature, key) {
90151
90174
  return false;
90152
90175
  return timingSafeEqual(signatureBuffer, expectedBuffer);
90153
90176
  }
90177
+ var EVALUABLE_CATEGORIES = ["agents", "skills", "commands"];
90178
+ var EvalAssertionSchema = exports_external.object({
90179
+ type: exports_external.string(),
90180
+ value: exports_external.union([exports_external.string(), exports_external.array(exports_external.string())]).optional(),
90181
+ threshold: exports_external.number().optional(),
90182
+ weight: exports_external.number().optional()
90183
+ });
90184
+ var EvalTestCaseSchema = exports_external.object({
90185
+ description: exports_external.string().optional(),
90186
+ vars: exports_external.record(exports_external.string(), exports_external.string()),
90187
+ assert: exports_external.array(EvalAssertionSchema).min(1, "At least one assertion is required")
90188
+ });
90189
+ var EvalFileConfigSchema = exports_external.object({
90190
+ provider: exports_external.string().optional(),
90191
+ tests: exports_external.array(EvalTestCaseSchema).min(1, "At least one test case is required")
90192
+ });
90193
+ var EVAL_EXTENSION = ".eval.yaml";
90194
+ function collectFiles2(fs12, dir, basePath = "") {
90195
+ const paths = [];
90196
+ let entries;
90197
+ try {
90198
+ entries = fs12.readdir(dir);
90199
+ } catch {
90200
+ return paths;
90201
+ }
90202
+ for (const entry of entries) {
90203
+ const fullPath = `${dir}/${entry}`;
90204
+ const relativePath = basePath ? `${basePath}/${entry}` : entry;
90205
+ try {
90206
+ const stat = fs12.stat(fullPath);
90207
+ if (stat.isDirectory) {
90208
+ paths.push(...collectFiles2(fs12, fullPath, relativePath));
90209
+ } else {
90210
+ paths.push(relativePath);
90211
+ }
90212
+ } catch {}
90213
+ }
90214
+ return paths;
90215
+ }
90216
+ function discoverEvals(fs12, options2) {
90217
+ const { artifactDir, artifactId, filter } = options2;
90218
+ const evals = [];
90219
+ const warnings = [];
90220
+ const allFiles = collectFiles2(fs12, artifactDir);
90221
+ const evalFiles = allFiles.filter((f) => f.endsWith(EVAL_EXTENSION));
90222
+ for (const evalRelativePath of evalFiles) {
90223
+ const evalFullPath = `${artifactDir}/${evalRelativePath}`;
90224
+ const baseName = evalRelativePath.slice(0, -EVAL_EXTENSION.length);
90225
+ const mdRelativePath = `${baseName}.md`;
90226
+ const mdFullPath = `${artifactDir}/${mdRelativePath}`;
90227
+ if (!fs12.exists(mdFullPath)) {
90228
+ warnings.push({
90229
+ evalFilePath: evalRelativePath,
90230
+ message: `Skipped ${evalRelativePath}: no matching .md file found (expected ${mdRelativePath})`
90231
+ });
90232
+ continue;
90233
+ }
90234
+ let mdContent;
90235
+ try {
90236
+ mdContent = fs12.readFile(mdFullPath);
90237
+ } catch {
90238
+ warnings.push({
90239
+ evalFilePath: evalRelativePath,
90240
+ message: `Skipped ${evalRelativePath}: could not read ${mdRelativePath}`
90241
+ });
90242
+ continue;
90243
+ }
90244
+ const frontmatterResult = parseFrontmatter(mdContent);
90245
+ if (!frontmatterResult.success) {
90246
+ warnings.push({
90247
+ evalFilePath: evalRelativePath,
90248
+ message: `Skipped ${evalRelativePath}: ${mdRelativePath} has invalid or missing frontmatter`
90249
+ });
90250
+ continue;
90251
+ }
90252
+ const { frontmatter: frontmatter2, content: systemPrompt } = frontmatterResult.parsed;
90253
+ const elementType = frontmatter2["grk-type"];
90254
+ const elementName = frontmatter2["grk-name"];
90255
+ if (!EVALUABLE_CATEGORIES.includes(elementType)) {
90256
+ warnings.push({
90257
+ evalFilePath: evalRelativePath,
90258
+ message: `Skipped ${evalRelativePath}: ${elementType} is not evaluable (only agents, skills, commands)`
90259
+ });
90260
+ continue;
90261
+ }
90262
+ let evalYamlContent;
90263
+ try {
90264
+ evalYamlContent = fs12.readFile(evalFullPath);
90265
+ } catch {
90266
+ warnings.push({
90267
+ evalFilePath: evalRelativePath,
90268
+ message: `Skipped ${evalRelativePath}: file could not be read`
90269
+ });
90270
+ continue;
90271
+ }
90272
+ const parseResult = safeParseYaml(evalYamlContent, EvalFileConfigSchema, evalRelativePath);
90273
+ if (!parseResult.success) {
90274
+ const details = parseResult.error.details?.join(", ") ?? "";
90275
+ warnings.push({
90276
+ evalFilePath: evalRelativePath,
90277
+ message: `Skipped ${evalRelativePath}: ${parseResult.error.message}${details ? ` (${details})` : ""}`
90278
+ });
90279
+ continue;
90280
+ }
90281
+ const evalConfig = parseResult.data;
90282
+ if (filter?.elementName && elementName !== filter.elementName)
90283
+ continue;
90284
+ if (filter?.elementType && elementType !== filter.elementType)
90285
+ continue;
90286
+ evals.push({
90287
+ artifactId,
90288
+ elementName,
90289
+ elementType,
90290
+ elementPath: mdRelativePath,
90291
+ systemPrompt: systemPrompt.trim(),
90292
+ evalConfig,
90293
+ evalFilePath: evalRelativePath
90294
+ });
90295
+ }
90296
+ return { evals, warnings };
90297
+ }
90298
+ function calculateScore(passed, total) {
90299
+ if (total === 0)
90300
+ return 0;
90301
+ return Math.round(passed / total * 100);
90302
+ }
90303
+ function scoreToGrade(score) {
90304
+ if (score >= 95)
90305
+ return "A";
90306
+ if (score >= 80)
90307
+ return "B";
90308
+ if (score >= 65)
90309
+ return "C";
90310
+ if (score >= 50)
90311
+ return "D";
90312
+ return "F";
90313
+ }
90314
+ function summarizeResults(results) {
90315
+ if (results.length === 0) {
90316
+ return {
90317
+ results: [],
90318
+ overallScore: 0,
90319
+ overallGrade: "F",
90320
+ totalPassed: 0,
90321
+ totalTests: 0,
90322
+ totalIssues: 0
90323
+ };
90324
+ }
90325
+ const totalPassed = results.reduce((sum, r) => sum + r.passed, 0);
90326
+ const totalTests = results.reduce((sum, r) => sum + r.total, 0);
90327
+ const totalIssues = results.reduce((sum, r) => sum + r.failures.length, 0);
90328
+ const weightedSum = results.reduce((sum, r) => sum + r.score * r.total, 0);
90329
+ const overallScore = totalTests > 0 ? Math.round(weightedSum / totalTests) : 0;
90330
+ const overallGrade = scoreToGrade(overallScore);
90331
+ return {
90332
+ results,
90333
+ overallScore,
90334
+ overallGrade,
90335
+ totalPassed,
90336
+ totalTests,
90337
+ totalIssues
90338
+ };
90339
+ }
90154
90340
 
90155
90341
  // src/constants.ts
90156
90342
  var REGISTRY_HOST2 = "registry.grekt.com";
@@ -102328,6 +102514,9 @@ function detectArtifactBaseRef(artifactName) {
102328
102514
  function createArtifactTag(artifactName, version3) {
102329
102515
  const tag = `${artifactName}@${version3}`;
102330
102516
  exec(["tag", tag]);
102517
+ if (process.env.CI) {
102518
+ execOrNull(["push", "origin", tag]);
102519
+ }
102331
102520
  }
102332
102521
 
102333
102522
  // src/workspace/workspace.ts
@@ -104181,6 +104370,384 @@ var untrustCommand = new Command("untrust").description("Remove trusted status f
104181
104370
  success(`Removed trusted status from ${colors5.highlight(artifactId)}`);
104182
104371
  });
104183
104372
 
104373
+ // src/commands/eval.ts
104374
+ import { join as join35 } from "path";
104375
+
104376
+ // src/eval/promptfoo-engine.ts
104377
+ function detectPromptfoo() {
104378
+ try {
104379
+ const result = Bun.spawnSync(["promptfoo", "--version"], { stdout: "pipe", stderr: "pipe" });
104380
+ if (result.exitCode === 0)
104381
+ return "global";
104382
+ } catch {}
104383
+ try {
104384
+ const result = Bun.spawnSync(["npx", "promptfoo", "--version"], { stdout: "pipe", stderr: "pipe" });
104385
+ if (result.exitCode === 0)
104386
+ return "npx";
104387
+ } catch {}
104388
+ return "none";
104389
+ }
104390
+ function getCommand(mode) {
104391
+ return mode === "npx" ? ["npx", "promptfoo"] : ["promptfoo"];
104392
+ }
104393
+ function assemblePromptfooConfig(config) {
104394
+ return {
104395
+ prompts: [
104396
+ {
104397
+ role: "system",
104398
+ content: config.systemPrompt
104399
+ },
104400
+ {
104401
+ role: "user",
104402
+ content: "{{input}}"
104403
+ }
104404
+ ],
104405
+ providers: [config.provider],
104406
+ tests: config.tests.map((test) => ({
104407
+ description: test.description,
104408
+ vars: test.vars,
104409
+ assert: test.assert.map((a) => ({
104410
+ type: a.type,
104411
+ ...a.value !== undefined && { value: a.value },
104412
+ ...a.threshold !== undefined && { threshold: a.threshold },
104413
+ ...a.weight !== undefined && { weight: a.weight }
104414
+ }))
104415
+ }))
104416
+ };
104417
+ }
104418
+ function extractFailures(promptfooResults) {
104419
+ let passed = 0;
104420
+ let total = 0;
104421
+ const failures = [];
104422
+ for (const result of promptfooResults) {
104423
+ const r = result;
104424
+ const success2 = r.success;
104425
+ total++;
104426
+ if (success2) {
104427
+ passed++;
104428
+ } else {
104429
+ const gradingResult = r.gradingResult;
104430
+ const componentResults = gradingResult?.componentResults ?? [];
104431
+ for (const component of componentResults) {
104432
+ if (!component.pass) {
104433
+ const assertion = component.assertion;
104434
+ failures.push({
104435
+ testDescription: r.description ?? `Test ${total}`,
104436
+ assertionType: assertion?.type ?? "unknown",
104437
+ expected: String(assertion?.value ?? ""),
104438
+ actual: String(component.reason ?? "")
104439
+ });
104440
+ }
104441
+ }
104442
+ if (componentResults.length === 0) {
104443
+ failures.push({
104444
+ testDescription: r.description ?? `Test ${total}`,
104445
+ assertionType: "unknown",
104446
+ expected: "",
104447
+ actual: String(gradingResult?.reason ?? "unknown error")
104448
+ });
104449
+ }
104450
+ }
104451
+ }
104452
+ return { passed, total, failures };
104453
+ }
104454
+ function createPromptfooEngine() {
104455
+ let mode = detectPromptfoo();
104456
+ return {
104457
+ name: "promptfoo",
104458
+ isAvailable() {
104459
+ return mode !== "none";
104460
+ },
104461
+ async ensureAvailable() {
104462
+ if (mode !== "none")
104463
+ return true;
104464
+ const spin = spinner("Downloading promptfoo via npx...");
104465
+ spin.start();
104466
+ Bun.spawnSync(["npx", "promptfoo@latest", "--version"], { stdout: "pipe", stderr: "pipe" });
104467
+ spin.stop();
104468
+ mode = detectPromptfoo();
104469
+ return mode !== "none";
104470
+ },
104471
+ async run(config) {
104472
+ const promptfooConfig = assemblePromptfooConfig(config);
104473
+ if (mode === "global") {
104474
+ try {
104475
+ const promptfoo = await import("promptfoo");
104476
+ const evaluate = promptfoo.evaluate;
104477
+ if (typeof evaluate === "function") {
104478
+ const evaluateResult = await evaluate(promptfooConfig);
104479
+ const results = evaluateResult.results ?? [];
104480
+ return extractFailures(results);
104481
+ }
104482
+ } catch {}
104483
+ }
104484
+ return runViaCli(mode, promptfooConfig);
104485
+ },
104486
+ openReport() {
104487
+ const cmd = getCommand(mode);
104488
+ Bun.spawnSync([...cmd, "view"], { stdout: "inherit", stderr: "inherit" });
104489
+ }
104490
+ };
104491
+ }
104492
+ async function runViaCli(mode, promptfooConfig) {
104493
+ const tempDir = `${process.env.TMPDIR ?? "/tmp"}/grekt-eval-${Date.now()}`;
104494
+ const configPath = `${tempDir}/promptfoo-config.json`;
104495
+ const outputPath = `${tempDir}/output.json`;
104496
+ const { mkdirSync: mkdirSync3, writeFileSync: writeFileSync2, readFileSync: readFileSync2, rmSync: rmSync2 } = await import("fs");
104497
+ mkdirSync3(tempDir, { recursive: true });
104498
+ try {
104499
+ writeFileSync2(configPath, JSON.stringify(promptfooConfig, null, 2));
104500
+ const cmd = getCommand(mode);
104501
+ const result = Bun.spawnSync([...cmd, "eval", "--config", configPath, "--output", outputPath, "--no-cache"], { stdout: "pipe", stderr: "pipe" });
104502
+ if (result.exitCode !== 0) {
104503
+ const stderr = result.stderr.toString();
104504
+ throw new Error(`promptfoo eval failed: ${stderr}`);
104505
+ }
104506
+ const output = JSON.parse(readFileSync2(outputPath, "utf-8"));
104507
+ const results = output.results ?? [];
104508
+ return extractFailures(results);
104509
+ } finally {
104510
+ rmSync2(tempDir, { recursive: true, force: true });
104511
+ }
104512
+ }
104513
+
104514
+ // src/eval/engine-resolver.ts
104515
+ var AVAILABLE_ENGINES = [
104516
+ {
104517
+ name: "promptfoo",
104518
+ description: "Open source LLM eval framework. Runs via npx if not installed",
104519
+ create: createPromptfooEngine
104520
+ }
104521
+ ];
104522
+ async function resolveEvalEngine() {
104523
+ for (const entry2 of AVAILABLE_ENGINES) {
104524
+ const engine3 = entry2.create();
104525
+ if (engine3.isAvailable())
104526
+ return engine3;
104527
+ }
104528
+ newline();
104529
+ info("No eval engine detected");
104530
+ newline();
104531
+ const selected = await esm_default6({
104532
+ message: "Select an eval engine to use:",
104533
+ choices: AVAILABLE_ENGINES.map((entry2) => ({
104534
+ name: `${entry2.name} - ${entry2.description}`,
104535
+ value: entry2.name
104536
+ }))
104537
+ });
104538
+ const entry = AVAILABLE_ENGINES.find((e) => e.name === selected);
104539
+ if (!entry)
104540
+ return null;
104541
+ const engine2 = entry.create();
104542
+ const ready = await engine2.ensureAvailable();
104543
+ if (!ready) {
104544
+ newline();
104545
+ error(`Failed to set up ${entry.name}`);
104546
+ newline();
104547
+ log(" Install it manually with one of:");
104548
+ log(` ${colors5.dim("$")} npm install -g ${entry.name}`);
104549
+ log(` ${colors5.dim("$")} brew install ${entry.name}`);
104550
+ return null;
104551
+ }
104552
+ return engine2;
104553
+ }
104554
+
104555
+ // src/eval/runner.ts
104556
+ async function runEval(discovered, options2) {
104557
+ const provider = discovered.evalConfig.provider ?? options2.defaultProvider;
104558
+ const result = await options2.engine.run({
104559
+ systemPrompt: discovered.systemPrompt,
104560
+ tests: discovered.evalConfig.tests,
104561
+ provider
104562
+ });
104563
+ const score = calculateScore(result.passed, result.total);
104564
+ const grade = scoreToGrade(score);
104565
+ return {
104566
+ artifactId: discovered.artifactId,
104567
+ elementName: discovered.elementName,
104568
+ elementType: discovered.elementType,
104569
+ passed: result.passed,
104570
+ total: result.total,
104571
+ score,
104572
+ grade,
104573
+ failures: result.failures
104574
+ };
104575
+ }
104576
+ async function runAllEvals(discovered, options2) {
104577
+ const results = [];
104578
+ for (const [index, disc] of discovered.entries()) {
104579
+ options2.onProgress?.(index, discovered.length, `${disc.elementType}/${disc.elementName}`);
104580
+ const result = await runEval(disc, options2);
104581
+ results.push(result);
104582
+ }
104583
+ options2.onProgress?.(discovered.length, discovered.length, "done");
104584
+ return results;
104585
+ }
104586
+
104587
+ // src/eval/display.ts
104588
+ function gradeColor(grade) {
104589
+ switch (grade) {
104590
+ case "A":
104591
+ case "B":
104592
+ return colors5.success;
104593
+ case "C":
104594
+ case "D":
104595
+ return colors5.warning;
104596
+ case "F":
104597
+ default:
104598
+ return colors5.error;
104599
+ }
104600
+ }
104601
+ function formatDots(name2, maxWidth) {
104602
+ const dotsNeeded = maxWidth - name2.length;
104603
+ const dots = dotsNeeded > 2 ? " " + ".".repeat(dotsNeeded - 2) + " " : " ";
104604
+ return colors5.dim(dots);
104605
+ }
104606
+ function displaySummary(summary) {
104607
+ const byArtifact = new Map;
104608
+ for (const result of summary.results) {
104609
+ const existing = byArtifact.get(result.artifactId) ?? [];
104610
+ existing.push(result);
104611
+ byArtifact.set(result.artifactId, existing);
104612
+ }
104613
+ const maxNameWidth = Math.max(...summary.results.map((r) => `${r.elementType}/${r.elementName}`.length), 20);
104614
+ for (const [artifactId, results] of byArtifact) {
104615
+ log(colors5.bold(artifactId));
104616
+ for (const result of results) {
104617
+ const elementPath = `${result.elementType}/${result.elementName}`;
104618
+ const dots = formatDots(elementPath, maxNameWidth);
104619
+ const passText = `${result.passed}/${result.total} passed`;
104620
+ const gradeText = gradeColor(result.grade)(result.grade);
104621
+ log(` ${elementPath}${dots}${passText} ${gradeText}`);
104622
+ }
104623
+ newline();
104624
+ }
104625
+ const overallColor = gradeColor(summary.overallGrade);
104626
+ log(`Overall: ${overallColor(summary.overallGrade)} (${summary.overallScore}/100)`);
104627
+ if (summary.totalIssues > 0) {
104628
+ log(`${summary.totalIssues} issue${summary.totalIssues === 1 ? "" : "s"} found`);
104629
+ log(`Run ${colors5.highlight("grekt eval --details")} for more info`);
104630
+ }
104631
+ }
104632
+ function displayDetails(summary) {
104633
+ displaySummary(summary);
104634
+ const failingResults = summary.results.filter((r) => r.failures.length > 0);
104635
+ if (failingResults.length === 0)
104636
+ return;
104637
+ newline();
104638
+ log(colors5.bold("Failures:"));
104639
+ newline();
104640
+ for (const result of failingResults) {
104641
+ log(` ${colors5.bold(`${result.elementType}/${result.elementName}`)} (${result.artifactId})`);
104642
+ for (const failure of result.failures) {
104643
+ log(` ${symbols.error} ${failure.testDescription}`);
104644
+ log(` ${colors5.dim("assertion:")} ${failure.assertionType}`);
104645
+ if (failure.expected) {
104646
+ log(` ${colors5.dim("expected:")} ${failure.expected}`);
104647
+ }
104648
+ if (failure.actual) {
104649
+ log(` ${colors5.dim("actual:")} ${failure.actual}`);
104650
+ }
104651
+ }
104652
+ newline();
104653
+ }
104654
+ }
104655
+ function displayJson(summary) {
104656
+ log(JSON.stringify(summary, null, 2));
104657
+ }
104658
+
104659
+ // src/commands/eval.ts
104660
+ function buildFilter(options2) {
104661
+ if (options2.skill)
104662
+ return { elementName: options2.skill, elementType: "skills" };
104663
+ if (options2.agent)
104664
+ return { elementName: options2.agent, elementType: "agents" };
104665
+ if (options2.command)
104666
+ return { elementName: options2.command, elementType: "commands" };
104667
+ return;
104668
+ }
104669
+ function requireProvider(projectRoot) {
104670
+ const localConfig = getLocalConfig(projectRoot);
104671
+ const providers = localConfig?.eval?.providers;
104672
+ const firstProvider = providers?.[0];
104673
+ if (firstProvider) {
104674
+ return firstProvider;
104675
+ }
104676
+ error("No eval provider configured");
104677
+ newline();
104678
+ log(" Add a provider to .grekt/config.yaml:");
104679
+ newline();
104680
+ log(` ${colors5.dim("eval:")}`);
104681
+ log(` ${colors5.dim(" providers:")}`);
104682
+ log(` ${colors5.dim(" - openai:gpt-4.1-mini")}`);
104683
+ newline();
104684
+ return process.exit(1);
104685
+ }
104686
+ var evalCommand = new Command("eval").description("Run eval tests against artifact elements (skills, agents, commands)").option("--artifact <name>", "Run evals for a specific artifact only").option("--skill <name>", "Run evals for a specific skill only").option("--agent <name>", "Run evals for a specific agent only").option("--command <name>", "Run evals for a specific command only").option("--details", "Show failure details").option("--report", "Open eval dashboard in browser").option("--format <format>", "Output format: text (default), json").action(async (options2) => {
104687
+ const projectRoot = process.cwd();
104688
+ requireInitialized(projectRoot);
104689
+ const engine2 = await resolveEvalEngine();
104690
+ if (!engine2)
104691
+ process.exit(1);
104692
+ if (options2.report) {
104693
+ engine2.openReport?.();
104694
+ return;
104695
+ }
104696
+ const defaultProvider = requireProvider(projectRoot);
104697
+ const lockfile = getLockfile2(projectRoot);
104698
+ const artifactIds = Object.keys(lockfile.artifacts);
104699
+ if (artifactIds.length === 0) {
104700
+ info("No artifacts installed");
104701
+ process.exit(0);
104702
+ }
104703
+ const filter = buildFilter(options2);
104704
+ const allDiscovered = [];
104705
+ const allWarnings = [];
104706
+ for (const artifactId of artifactIds) {
104707
+ if (options2.artifact && artifactId !== options2.artifact)
104708
+ continue;
104709
+ const artifactDir = join35(projectRoot, ARTIFACTS_DIR, artifactId);
104710
+ const result = discoverEvals(fs, { artifactDir, artifactId, filter });
104711
+ allDiscovered.push(...result.evals);
104712
+ allWarnings.push(...result.warnings);
104713
+ }
104714
+ for (const w of allWarnings) {
104715
+ warning(w.message);
104716
+ }
104717
+ if (allDiscovered.length === 0) {
104718
+ info("No eval files found");
104719
+ if (!filter && !options2.artifact) {
104720
+ info("Create a .eval.yaml file next to any skill, agent, or command");
104721
+ }
104722
+ process.exit(0);
104723
+ }
104724
+ const spin = spinner("Running evals...");
104725
+ spin.start();
104726
+ const results = await runAllEvals(allDiscovered, {
104727
+ engine: engine2,
104728
+ defaultProvider,
104729
+ onProgress(completed, total, current) {
104730
+ if (current === "done") {
104731
+ spin.stop();
104732
+ } else {
104733
+ spin.text = `Running evals... (${completed + 1}/${total}) ${current}`;
104734
+ }
104735
+ }
104736
+ });
104737
+ const summary = summarizeResults(results);
104738
+ newline();
104739
+ if (options2.format === "json") {
104740
+ displayJson(summary);
104741
+ } else if (options2.details) {
104742
+ displayDetails(summary);
104743
+ } else {
104744
+ displaySummary(summary);
104745
+ }
104746
+ if (summary.totalIssues > 0) {
104747
+ process.exit(1);
104748
+ }
104749
+ });
104750
+
104184
104751
  // src/auth/oauth/oauth.ts
104185
104752
  import { spawn } from "child_process";
104186
104753
  import { randomUUID as randomUUID4 } from "crypto";
@@ -104406,7 +104973,7 @@ var whoamiCommand = new Command("whoami").description("Show current user").actio
104406
104973
  // package.json
104407
104974
  var package_default = {
104408
104975
  name: "@grekt/cli",
104409
- version: "6.42.0",
104976
+ version: "6.43.0-beta.1",
104410
104977
  description: "AI tools versioned, synced, and shared across tools and teams",
104411
104978
  type: "module",
104412
104979
  bin: {
@@ -104441,7 +105008,7 @@ var package_default = {
104441
105008
  },
104442
105009
  dependencies: {
104443
105010
  "@aws-sdk/client-s3": "^3.971.0",
104444
- "@grekt/engine": "6.1.1",
105011
+ "@grekt/engine": "6.2.0-beta.1",
104445
105012
  "@inquirer/prompts": "^7.2.0",
104446
105013
  "@supabase/supabase-js": "^2.91.0",
104447
105014
  chalk: "^5.4.1",
@@ -104475,13 +105042,13 @@ var package_default = {
104475
105042
  // src/update-check/update-check.ts
104476
105043
  import { existsSync as existsSync2, mkdirSync as mkdirSync3, readFileSync as readFileSync2, writeFileSync as writeFileSync2 } from "fs";
104477
105044
  import { homedir as homedir3 } from "os";
104478
- import { join as join35 } from "path";
105045
+ import { join as join36 } from "path";
104479
105046
  var CACHE_FILENAME = ".update-check";
104480
105047
  var STALENESS_MS = 24 * 60 * 60 * 1000;
104481
105048
  var FETCH_TIMEOUT_MS = 1500;
104482
105049
  var GITHUB_RELEASES_URL = "https://api.github.com/repos/grekt-labs/cli/releases/latest";
104483
105050
  function getCachePath() {
104484
- return join35(homedir3(), ".grekt", CACHE_FILENAME);
105051
+ return join36(homedir3(), ".grekt", CACHE_FILENAME);
104485
105052
  }
104486
105053
  function isOptedOut() {
104487
105054
  return process.env.GREKT_NO_UPDATE_CHECK === "1";
@@ -104500,7 +105067,7 @@ function readCache() {
104500
105067
  }
104501
105068
  function writeCache(cache2) {
104502
105069
  try {
104503
- const dir = join35(homedir3(), ".grekt");
105070
+ const dir = join36(homedir3(), ".grekt");
104504
105071
  if (!existsSync2(dir)) {
104505
105072
  mkdirSync3(dir, { recursive: true });
104506
105073
  }
@@ -104614,6 +105181,7 @@ program2.addCommand(worktreeCommand);
104614
105181
  program2.addCommand(scanCommand);
104615
105182
  program2.addCommand(trustCommand);
104616
105183
  program2.addCommand(untrustCommand);
105184
+ program2.addCommand(evalCommand);
104617
105185
  setupUpdateCheck(package_default.version);
104618
105186
  try {
104619
105187
  await program2.parseAsync();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@grekt/cli",
3
- "version": "6.42.0",
3
+ "version": "6.43.0-beta.1",
4
4
  "description": "AI tools versioned, synced, and shared across tools and teams",
5
5
  "type": "module",
6
6
  "bin": {
@@ -35,7 +35,7 @@
35
35
  },
36
36
  "dependencies": {
37
37
  "@aws-sdk/client-s3": "^3.971.0",
38
- "@grekt/engine": "6.1.1",
38
+ "@grekt/engine": "6.2.0-beta.1",
39
39
  "@inquirer/prompts": "^7.2.0",
40
40
  "@supabase/supabase-js": "^2.91.0",
41
41
  "chalk": "^5.4.1",