struere 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19269,32 +19269,6 @@ function getApiKey() {
19269
19269
 
19270
19270
  // src/cli/utils/convex.ts
19271
19271
  var CONVEX_URL = process.env.STRUERE_CONVEX_URL || "https://rapid-wildebeest-172.convex.cloud";
19272
- async function listAgents() {
19273
- const credentials = loadCredentials();
19274
- const apiKey = getApiKey();
19275
- const token = apiKey || credentials?.token;
19276
- if (!token) {
19277
- return { agents: [], error: "Not authenticated" };
19278
- }
19279
- const response = await fetch(`${CONVEX_URL}/api/query`, {
19280
- method: "POST",
19281
- headers: {
19282
- "Content-Type": "application/json",
19283
- Authorization: `Bearer ${token}`
19284
- },
19285
- body: JSON.stringify({
19286
- path: "agents:list",
19287
- args: {}
19288
- })
19289
- });
19290
- if (!response.ok) {
19291
- const error = await response.text();
19292
- return { agents: [], error };
19293
- }
19294
- const result = await response.json();
19295
- const agents = Array.isArray(result) ? result : result?.value || [];
19296
- return { agents };
19297
- }
19298
19272
  async function listMyOrganizations(token) {
19299
19273
  const response = await fetch(`${CONVEX_URL}/api/query`, {
19300
19274
  method: "POST",
@@ -19556,267 +19530,6 @@ async function getPullState(organizationId, environment = "development", include
19556
19530
  }
19557
19531
  return { error: `Unexpected response: ${JSON.stringify(result)}` };
19558
19532
  }
19559
- async function resolveAgentId(agentSlug) {
19560
- const { agents } = await listAgents();
19561
- const agent = agents.find((a) => a.slug === agentSlug || a._id === agentSlug);
19562
- return agent?._id;
19563
- }
19564
- async function syncEvalSuites(suites) {
19565
- const credentials = loadCredentials();
19566
- const apiKey = getApiKey();
19567
- const token = apiKey || credentials?.token;
19568
- if (!token) {
19569
- return { error: "Not authenticated" };
19570
- }
19571
- const suiteIds = {};
19572
- for (const suite of suites) {
19573
- const agentId = await resolveAgentId(suite.agent);
19574
- if (!agentId) {
19575
- return { error: `Agent "${suite.agent}" not found. Make sure the agent exists before syncing evals.` };
19576
- }
19577
- const listResponse = await fetch(`${CONVEX_URL}/api/query`, {
19578
- method: "POST",
19579
- headers: {
19580
- "Content-Type": "application/json",
19581
- Authorization: `Bearer ${token}`
19582
- },
19583
- body: JSON.stringify({
19584
- path: "evals:listAllSuites",
19585
- args: { environment: "development" }
19586
- })
19587
- });
19588
- let existingSuiteId;
19589
- if (listResponse.ok) {
19590
- const listResult = await listResponse.json();
19591
- const existing = (listResult.value || []).find((s) => s.slug === suite.slug);
19592
- if (existing) {
19593
- existingSuiteId = existing._id;
19594
- }
19595
- }
19596
- if (!existingSuiteId) {
19597
- const createResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
19598
- method: "POST",
19599
- headers: {
19600
- "Content-Type": "application/json",
19601
- Authorization: `Bearer ${token}`
19602
- },
19603
- body: JSON.stringify({
19604
- path: "evals:createSuite",
19605
- args: {
19606
- agentId,
19607
- name: suite.suite,
19608
- slug: suite.slug,
19609
- description: suite.description,
19610
- tags: suite.tags,
19611
- judgeModel: suite.judgeModel ? { provider: "anthropic", name: suite.judgeModel } : undefined,
19612
- environment: "development"
19613
- }
19614
- })
19615
- });
19616
- if (!createResponse.ok) {
19617
- const error = await createResponse.text();
19618
- return { error: `Failed to create suite "${suite.suite}": ${error}` };
19619
- }
19620
- const createResult = await createResponse.json();
19621
- existingSuiteId = createResult.value;
19622
- } else {
19623
- const updateResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
19624
- method: "POST",
19625
- headers: {
19626
- "Content-Type": "application/json",
19627
- Authorization: `Bearer ${token}`
19628
- },
19629
- body: JSON.stringify({
19630
- path: "evals:updateSuite",
19631
- args: {
19632
- id: existingSuiteId,
19633
- name: suite.suite,
19634
- description: suite.description,
19635
- tags: suite.tags,
19636
- judgeModel: suite.judgeModel ? { provider: "anthropic", name: suite.judgeModel } : undefined
19637
- }
19638
- })
19639
- });
19640
- if (!updateResponse.ok) {
19641
- const error = await updateResponse.text();
19642
- return { error: `Failed to update suite "${suite.suite}": ${error}` };
19643
- }
19644
- const deleteResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
19645
- method: "POST",
19646
- headers: {
19647
- "Content-Type": "application/json",
19648
- Authorization: `Bearer ${token}`
19649
- },
19650
- body: JSON.stringify({
19651
- path: "evals:deleteCasesBySuite",
19652
- args: { suiteId: existingSuiteId }
19653
- })
19654
- });
19655
- if (!deleteResponse.ok) {
19656
- const error = await deleteResponse.text();
19657
- return { error: `Failed to clear cases for suite "${suite.suite}": ${error}` };
19658
- }
19659
- }
19660
- if (existingSuiteId) {
19661
- suiteIds[suite.slug] = existingSuiteId;
19662
- for (let i = 0;i < suite.cases.length; i++) {
19663
- const c = suite.cases[i];
19664
- const turns = c.turns.map((t) => ({
19665
- userMessage: t.user,
19666
- assertions: t.assertions?.map((a) => ({
19667
- type: a.type,
19668
- ...a.criteria ? { criteria: a.criteria } : {},
19669
- ...a.value ? { value: a.value } : {},
19670
- ...a.weight ? { weight: a.weight } : {}
19671
- }))
19672
- }));
19673
- const finalAssertions = c.finalAssertions?.map((a) => ({
19674
- type: a.type,
19675
- ...a.criteria ? { criteria: a.criteria } : {},
19676
- ...a.value ? { value: a.value } : {},
19677
- ...a.weight ? { weight: a.weight } : {}
19678
- }));
19679
- const caseResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
19680
- method: "POST",
19681
- headers: {
19682
- "Content-Type": "application/json",
19683
- Authorization: `Bearer ${token}`
19684
- },
19685
- body: JSON.stringify({
19686
- path: "evals:createCase",
19687
- args: {
19688
- suiteId: existingSuiteId,
19689
- name: c.name,
19690
- description: c.description,
19691
- tags: c.tags,
19692
- turns,
19693
- finalAssertions
19694
- }
19695
- })
19696
- });
19697
- if (!caseResponse.ok) {
19698
- const error = await caseResponse.text();
19699
- return { error: `Failed to create case "${c.name}": ${error}` };
19700
- }
19701
- }
19702
- }
19703
- }
19704
- return { suiteIds };
19705
- }
19706
- async function startEvalRun(suiteSlug) {
19707
- const credentials = loadCredentials();
19708
- const apiKey = getApiKey();
19709
- const token = apiKey || credentials?.token;
19710
- if (!token) {
19711
- return { error: "Not authenticated" };
19712
- }
19713
- const listResponse = await fetch(`${CONVEX_URL}/api/query`, {
19714
- method: "POST",
19715
- headers: {
19716
- "Content-Type": "application/json",
19717
- Authorization: `Bearer ${token}`
19718
- },
19719
- body: JSON.stringify({
19720
- path: "evals:listAllSuites",
19721
- args: { environment: "development" }
19722
- })
19723
- });
19724
- if (!listResponse.ok) {
19725
- return { error: "Failed to list suites" };
19726
- }
19727
- const listResult = await listResponse.json();
19728
- const suite = (listResult.value || []).find((s) => s.slug === suiteSlug);
19729
- if (!suite) {
19730
- return { error: `Suite "${suiteSlug}" not found` };
19731
- }
19732
- const response = await fetch(`${CONVEX_URL}/api/mutation`, {
19733
- method: "POST",
19734
- headers: {
19735
- "Content-Type": "application/json",
19736
- Authorization: `Bearer ${token}`
19737
- },
19738
- body: JSON.stringify({
19739
- path: "evals:startRun",
19740
- args: {
19741
- suiteId: suite._id,
19742
- triggerSource: "cli"
19743
- }
19744
- })
19745
- });
19746
- if (!response.ok) {
19747
- const error = await response.text();
19748
- return { error };
19749
- }
19750
- const result = await response.json();
19751
- return { runId: result.value, suiteId: suite._id };
19752
- }
19753
- async function pollEvalRun(runId, onProgress) {
19754
- const maxAttempts = 300;
19755
- const pollInterval = 2000;
19756
- for (let i = 0;i < maxAttempts; i++) {
19757
- const currentCredentials = loadCredentials();
19758
- const currentApiKey = getApiKey();
19759
- const token = currentApiKey || currentCredentials?.token;
19760
- if (!token) {
19761
- return { error: 'Authentication expired. Please run "struere login" again.' };
19762
- }
19763
- const response = await fetch(`${CONVEX_URL}/api/query`, {
19764
- method: "POST",
19765
- headers: {
19766
- "Content-Type": "application/json",
19767
- Authorization: `Bearer ${token}`
19768
- },
19769
- body: JSON.stringify({
19770
- path: "evals:getRun",
19771
- args: { id: runId }
19772
- })
19773
- });
19774
- if (!response.ok) {
19775
- if (response.status === 401) {
19776
- return { error: 'Authentication expired. Please run "struere login" again.' };
19777
- }
19778
- return { error: `Failed to poll run status (HTTP ${response.status})` };
19779
- }
19780
- const result = await response.json();
19781
- const run = result.value;
19782
- if (!run) {
19783
- return { error: "Run not found" };
19784
- }
19785
- if (onProgress) {
19786
- onProgress(run);
19787
- }
19788
- if (run.status === "completed" || run.status === "failed" || run.status === "cancelled") {
19789
- return { run };
19790
- }
19791
- await new Promise((resolve) => setTimeout(resolve, pollInterval));
19792
- }
19793
- return { error: "Run timed out after 10 minutes" };
19794
- }
19795
- async function getEvalRunResults(runId) {
19796
- const credentials = loadCredentials();
19797
- const apiKey = getApiKey();
19798
- const token = apiKey || credentials?.token;
19799
- if (!token) {
19800
- return { error: "Not authenticated" };
19801
- }
19802
- const response = await fetch(`${CONVEX_URL}/api/query`, {
19803
- method: "POST",
19804
- headers: {
19805
- "Content-Type": "application/json",
19806
- Authorization: `Bearer ${token}`
19807
- },
19808
- body: JSON.stringify({
19809
- path: "evals:getRunResults",
19810
- args: { runId }
19811
- })
19812
- });
19813
- if (!response.ok) {
19814
- const error = await response.text();
19815
- return { error };
19816
- }
19817
- const result = await response.json();
19818
- return { results: result.value || [] };
19819
- }
19820
19533
 
19821
19534
  // src/cli/commands/login.ts
19822
19535
  var AUTH_CALLBACK_PORT = 9876;
@@ -21248,7 +20961,8 @@ import { join as join5 } from "path";
21248
20961
  import { existsSync as existsSync5, writeFileSync as writeFileSync4 } from "fs";
21249
20962
 
21250
20963
  // src/cli/utils/loader.ts
21251
- import { existsSync as existsSync4, readdirSync } from "fs";
20964
+ var import_yaml = __toESM(require_dist(), 1);
20965
+ import { existsSync as existsSync4, readdirSync, readFileSync as readFileSync4 } from "fs";
21252
20966
  import { join as join4 } from "path";
21253
20967
  async function loadAllResources(cwd) {
21254
20968
  const errors2 = [];
@@ -21259,7 +20973,9 @@ async function loadAllResources(cwd) {
21259
20973
  if (toolsError) {
21260
20974
  errors2.push(toolsError);
21261
20975
  }
21262
- return { agents, entityTypes, roles, customTools, errors: errors2 };
20976
+ const { suites: evalSuites, errors: evalErrors } = loadAllEvalSuites(join4(cwd, "evals"));
20977
+ errors2.push(...evalErrors);
20978
+ return { agents, entityTypes, roles, customTools, evalSuites, errors: errors2 };
21263
20979
  }
21264
20980
  async function loadAllAgents(dir) {
21265
20981
  if (!existsSync4(dir)) {
@@ -21349,12 +21065,31 @@ async function loadFromDirectory(dir) {
21349
21065
  }
21350
21066
  return items;
21351
21067
  }
21068
+ function loadAllEvalSuites(dir) {
21069
+ const suites = [];
21070
+ const errors2 = [];
21071
+ if (!existsSync4(dir)) {
21072
+ return { suites, errors: errors2 };
21073
+ }
21074
+ const files = readdirSync(dir).filter((f) => f.endsWith(".eval.yaml") || f.endsWith(".eval.yml"));
21075
+ for (const file of files) {
21076
+ try {
21077
+ const content = readFileSync4(join4(dir, file), "utf-8");
21078
+ const parsed = import_yaml.default.parse(content);
21079
+ suites.push(parsed);
21080
+ } catch (err) {
21081
+ errors2.push(`Failed to parse ${file}: ${err instanceof Error ? err.message : String(err)}`);
21082
+ }
21083
+ }
21084
+ return { suites, errors: errors2 };
21085
+ }
21352
21086
  function getResourceDirectories(cwd) {
21353
21087
  return {
21354
21088
  agents: join4(cwd, "agents"),
21355
21089
  entityTypes: join4(cwd, "entity-types"),
21356
21090
  roles: join4(cwd, "roles"),
21357
- tools: join4(cwd, "tools")
21091
+ tools: join4(cwd, "tools"),
21092
+ evals: join4(cwd, "evals")
21358
21093
  };
21359
21094
  }
21360
21095
 
@@ -21407,7 +21142,25 @@ function extractSyncPayload(resources) {
21407
21142
  maskConfig: fm.maskConfig
21408
21143
  }))
21409
21144
  }));
21410
- return { agents, entityTypes, roles };
21145
+ const evalSuites = resources.evalSuites.length > 0 ? resources.evalSuites.map((suite) => ({
21146
+ name: suite.suite,
21147
+ slug: suite.slug,
21148
+ agentSlug: suite.agent,
21149
+ description: suite.description,
21150
+ tags: suite.tags,
21151
+ judgeModel: suite.judgeModel ? { provider: "anthropic", name: suite.judgeModel } : undefined,
21152
+ cases: suite.cases.map((c) => ({
21153
+ name: c.name,
21154
+ description: c.description,
21155
+ tags: c.tags,
21156
+ turns: c.turns.map((t) => ({
21157
+ userMessage: t.user,
21158
+ assertions: t.assertions
21159
+ })),
21160
+ finalAssertions: c.finalAssertions
21161
+ }))
21162
+ })) : undefined;
21163
+ return { agents, entityTypes, roles, evalSuites };
21411
21164
  }
21412
21165
  function extractAgentPayload(agent, customToolsMap) {
21413
21166
  let systemPrompt;
@@ -21723,7 +21476,7 @@ var devCommand = new Command("dev").description("Sync all resources to developme
21723
21476
  spinner.start("Loading resources");
21724
21477
  try {
21725
21478
  const resources = await loadAllResources(cwd);
21726
- spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools`);
21479
+ spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools, ${resources.evalSuites.length} eval suites`);
21727
21480
  for (const err of resources.errors) {
21728
21481
  console.log(source_default.red(" \u2716"), err);
21729
21482
  }
@@ -21781,6 +21534,7 @@ var devCommand = new Command("dev").description("Sync all resources to developme
21781
21534
  dirs.entityTypes,
21782
21535
  dirs.roles,
21783
21536
  dirs.tools,
21537
+ dirs.evals,
21784
21538
  join5(cwd, "struere.config.ts")
21785
21539
  ].filter((p) => existsSync5(p));
21786
21540
  const watcher = import_chokidar.default.watch(watchPaths, {
@@ -22055,7 +21809,7 @@ var buildCommand = new Command("build").description("Build and validate agent fo
22055
21809
  });
22056
21810
 
22057
21811
  // src/cli/commands/test.ts
22058
- var import_yaml = __toESM(require_dist(), 1);
21812
+ var import_yaml2 = __toESM(require_dist(), 1);
22059
21813
  import { join as join9 } from "path";
22060
21814
  import { readdir, readFile } from "fs/promises";
22061
21815
  var testCommand = new Command("test").description("Run test conversations").argument("[pattern]", "Test file pattern", "*.test.yaml").option("-v, --verbose", "Show detailed output").option("--dry-run", "Parse tests without executing (no API calls)").action(async (pattern, options) => {
@@ -22107,7 +21861,7 @@ var testCommand = new Command("test").description("Run test conversations").argu
22107
21861
  for (const file of testFiles) {
22108
21862
  const filePath = join9(testsDir, file);
22109
21863
  const content = await readFile(filePath, "utf-8");
22110
- const testCase = import_yaml.default.parse(content);
21864
+ const testCase = import_yaml2.default.parse(content);
22111
21865
  if (options.verbose) {
22112
21866
  console.log();
22113
21867
  console.log(source_default.gray("Running:"), testCase.name);
@@ -22240,174 +21994,6 @@ function formatAssertionError(assertion, context) {
22240
21994
  }
22241
21995
  }
22242
21996
 
22243
- // src/cli/commands/eval.ts
22244
- var import_yaml2 = __toESM(require_dist(), 1);
22245
- import { join as join10 } from "path";
22246
- import { readdir as readdir2, readFile as readFile2 } from "fs/promises";
22247
- var evalCommand = new Command("eval").description("Run agent evaluations with LLM judge support").option("-s, --suite <name>", "Run a specific suite by name").option("-v, --verbose", "Show detailed output including judge reasoning").option("--dry-run", "Parse eval files without executing").option("--json", "Output results as JSON").option("--no-sync", "Skip syncing eval files to Convex").action(async (options) => {
22248
- const spinner = ora();
22249
- const cwd = process.cwd();
22250
- if (!options.json) {
22251
- console.log();
22252
- console.log(source_default.bold("Running Evaluations"));
22253
- console.log();
22254
- }
22255
- if (!hasProject(cwd)) {
22256
- console.log(source_default.yellow("No struere.json found"));
22257
- console.log();
22258
- console.log(source_default.gray("Run"), source_default.cyan("struere init"), source_default.gray("to initialize this project"));
22259
- console.log();
22260
- process.exit(1);
22261
- }
22262
- const project = loadProject(cwd);
22263
- if (!project) {
22264
- console.log(source_default.red("Failed to load struere.json"));
22265
- process.exit(1);
22266
- }
22267
- spinner.start("Finding eval files");
22268
- const evalsDir = join10(cwd, "evals");
22269
- let evalFiles = [];
22270
- try {
22271
- const files = await readdir2(evalsDir);
22272
- evalFiles = files.filter((f) => f.endsWith(".eval.yaml") || f.endsWith(".eval.yml"));
22273
- } catch {
22274
- spinner.warn("No evals directory found");
22275
- console.log();
22276
- console.log(source_default.gray("Create eval files in"), source_default.cyan("evals/*.eval.yaml"));
22277
- console.log();
22278
- return;
22279
- }
22280
- if (evalFiles.length === 0) {
22281
- spinner.warn("No eval files found");
22282
- console.log();
22283
- return;
22284
- }
22285
- spinner.succeed(`Found ${evalFiles.length} eval file(s)`);
22286
- const suites = [];
22287
- for (const file of evalFiles) {
22288
- const filePath = join10(evalsDir, file);
22289
- const content = await readFile2(filePath, "utf-8");
22290
- const parsed = import_yaml2.default.parse(content);
22291
- suites.push(parsed);
22292
- }
22293
- if (options.suite) {
22294
- const filtered = suites.filter((s) => s.suite.toLowerCase() === options.suite.toLowerCase() || s.slug === options.suite);
22295
- if (filtered.length === 0) {
22296
- console.log(source_default.red(`Suite "${options.suite}" not found`));
22297
- process.exit(1);
22298
- }
22299
- suites.length = 0;
22300
- suites.push(...filtered);
22301
- }
22302
- if (options.dryRun) {
22303
- console.log();
22304
- console.log(source_default.yellow("Dry run mode \u2014 parsed successfully"));
22305
- console.log();
22306
- for (const suite of suites) {
22307
- console.log(source_default.cyan(` ${suite.suite}`), source_default.gray(`(${suite.cases.length} cases)`));
22308
- for (const c of suite.cases) {
22309
- const assertionCount = (c.turns || []).reduce((sum, t) => sum + (t.assertions?.length || 0), 0) + (c.finalAssertions?.length || 0);
22310
- console.log(source_default.gray(` - ${c.name}`), source_default.gray(`(${c.turns.length} turns, ${assertionCount} assertions)`));
22311
- }
22312
- }
22313
- console.log();
22314
- return;
22315
- }
22316
- if (options.sync !== false) {
22317
- spinner.start("Syncing eval suites to Convex");
22318
- const syncResult = await syncEvalSuites(suites);
22319
- if (syncResult.error) {
22320
- spinner.fail(`Sync failed: ${syncResult.error}`);
22321
- process.exit(1);
22322
- }
22323
- spinner.succeed("Eval suites synced");
22324
- }
22325
- const allResults = [];
22326
- for (const suite of suites) {
22327
- if (!options.json) {
22328
- console.log();
22329
- console.log(source_default.bold(` ${suite.suite}`), source_default.gray(`(${suite.cases.length} cases)`));
22330
- }
22331
- spinner.start(`Starting run for "${suite.suite}"`);
22332
- const { runId, suiteId, error: startError } = await startEvalRun(suite.slug);
22333
- if (startError || !runId) {
22334
- spinner.fail(`Failed to start: ${startError}`);
22335
- continue;
22336
- }
22337
- spinner.succeed(`Run started`);
22338
- spinner.start("Executing cases...");
22339
- const { run, error: pollError } = await pollEvalRun(runId, (status) => {
22340
- spinner.text = `Executing cases... ${status.completedCases}/${status.totalCases}`;
22341
- });
22342
- if (pollError || !run) {
22343
- spinner.fail(`Run failed: ${pollError}`);
22344
- continue;
22345
- }
22346
- spinner.succeed(`Run completed: ${run.passedCases}/${run.totalCases} passed`);
22347
- const { results, error: resultsError } = await getEvalRunResults(runId);
22348
- if (resultsError) {
22349
- console.log(source_default.red(` Failed to get results: ${resultsError}`));
22350
- continue;
22351
- }
22352
- allResults.push({ suite: suite.suite, run, results: results || [] });
22353
- if (!options.json) {
22354
- for (let ri = 0;ri < (results || []).length; ri++) {
22355
- const result = results[ri];
22356
- const caseName = ri < suite.cases.length ? suite.cases[ri].name : result.caseId;
22357
- if (result.overallPassed) {
22358
- console.log(source_default.green(" \u2713"), caseName, result.overallScore !== undefined ? source_default.gray(`(${result.overallScore.toFixed(1)}/5)`) : "");
22359
- } else {
22360
- console.log(source_default.red(" \u2717"), caseName, result.overallScore !== undefined ? source_default.gray(`(${result.overallScore.toFixed(1)}/5)`) : "");
22361
- if (result.errorMessage) {
22362
- console.log(source_default.red(" Error:"), result.errorMessage);
22363
- }
22364
- if (options.verbose && result.turnResults) {
22365
- for (const turn of result.turnResults) {
22366
- if (turn.assertionResults) {
22367
- for (const ar of turn.assertionResults) {
22368
- if (!ar.passed) {
22369
- console.log(source_default.red(` [${ar.type}]`), ar.reason || "");
22370
- if (ar.criteria) {
22371
- console.log(source_default.gray(` Criteria: ${ar.criteria}`));
22372
- }
22373
- }
22374
- }
22375
- }
22376
- }
22377
- }
22378
- }
22379
- }
22380
- }
22381
- }
22382
- if (options.json) {
22383
- console.log(JSON.stringify(allResults, null, 2));
22384
- const anyFailed = allResults.some((r) => r.run.failedCases > 0);
22385
- process.exit(anyFailed ? 1 : 0);
22386
- return;
22387
- }
22388
- const totalPassed = allResults.reduce((sum, r) => sum + r.run.passedCases, 0);
22389
- const totalCases = allResults.reduce((sum, r) => sum + r.run.totalCases, 0);
22390
- const totalFailed = allResults.reduce((sum, r) => sum + r.run.failedCases, 0);
22391
- console.log();
22392
- if (totalFailed === 0) {
22393
- console.log(source_default.green("All evaluations passed!"), source_default.gray(`(${totalPassed}/${totalCases})`));
22394
- } else {
22395
- console.log(source_default.red("Evaluations failed:"), source_default.gray(`${totalPassed}/${totalCases} passed`));
22396
- }
22397
- const totalTokens = allResults.reduce((sum, r) => {
22398
- if (r.run.totalTokens)
22399
- return sum + r.run.totalTokens.agent + r.run.totalTokens.judge;
22400
- return sum;
22401
- }, 0);
22402
- if (totalTokens > 0) {
22403
- console.log(source_default.gray(` Total tokens used: ${totalTokens.toLocaleString()}`));
22404
- }
22405
- console.log();
22406
- if (totalFailed > 0) {
22407
- process.exit(1);
22408
- }
22409
- });
22410
-
22411
21997
  // src/cli/commands/deploy.ts
22412
21998
  var isAuthError = (error) => {
22413
21999
  const message = error instanceof Error ? error.message : String(error);
@@ -22463,7 +22049,7 @@ var deployCommand = new Command("deploy").description("Deploy all resources to p
22463
22049
  let resources;
22464
22050
  try {
22465
22051
  resources = await loadAllResources(cwd);
22466
- spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools`);
22052
+ spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools, ${resources.evalSuites.length} eval suites`);
22467
22053
  for (const err of resources.errors) {
22468
22054
  console.log(source_default.red(" \u2716"), err);
22469
22055
  }
@@ -22501,6 +22087,13 @@ var deployCommand = new Command("deploy").description("Deploy all resources to p
22501
22087
  for (const role of resources.roles) {
22502
22088
  console.log(source_default.gray(" -"), source_default.cyan(role.name));
22503
22089
  }
22090
+ if (resources.evalSuites.length > 0) {
22091
+ console.log();
22092
+ console.log("Eval suites:");
22093
+ for (const suite of resources.evalSuites) {
22094
+ console.log(source_default.gray(" -"), source_default.cyan(suite.suite), source_default.gray(`(${suite.cases.length} cases)`));
22095
+ }
22096
+ }
22504
22097
  console.log();
22505
22098
  return;
22506
22099
  }
@@ -22937,7 +22530,7 @@ var statusCommand = new Command("status").description("Compare local vs remote s
22937
22530
  let localResources;
22938
22531
  try {
22939
22532
  localResources = await loadAllResources(cwd);
22940
- spinner.succeed(`Loaded ${localResources.agents.length} agents, ${localResources.entityTypes.length} entity types, ${localResources.roles.length} roles, ${localResources.customTools.length} custom tools`);
22533
+ spinner.succeed(`Loaded ${localResources.agents.length} agents, ${localResources.entityTypes.length} entity types, ${localResources.roles.length} roles, ${localResources.customTools.length} custom tools, ${localResources.evalSuites.length} eval suites`);
22941
22534
  for (const err of localResources.errors) {
22942
22535
  console.log(source_default.red(" \u2716"), err);
22943
22536
  }
@@ -23044,7 +22637,7 @@ var statusCommand = new Command("status").description("Compare local vs remote s
23044
22637
 
23045
22638
  // src/cli/commands/pull.ts
23046
22639
  import { existsSync as existsSync6, mkdirSync as mkdirSync3, writeFileSync as writeFileSync5 } from "fs";
23047
- import { join as join11 } from "path";
22640
+ import { join as join10 } from "path";
23048
22641
 
23049
22642
  // src/cli/utils/generator.ts
23050
22643
  var BUILTIN_TOOLS2 = [
@@ -23351,7 +22944,7 @@ var pullCommand = new Command("pull").description("Pull remote resources to loca
23351
22944
  }
23352
22945
  };
23353
22946
  const writeOrSkip = (relativePath, content) => {
23354
- const fullPath = join11(cwd, relativePath);
22947
+ const fullPath = join10(cwd, relativePath);
23355
22948
  if (existsSync6(fullPath) && !options.force) {
23356
22949
  skipped.push(relativePath);
23357
22950
  return false;
@@ -23360,15 +22953,15 @@ var pullCommand = new Command("pull").description("Pull remote resources to loca
23360
22953
  created.push(relativePath);
23361
22954
  return true;
23362
22955
  }
23363
- ensureDir2(join11(cwd, relativePath.split("/").slice(0, -1).join("/")));
22956
+ ensureDir2(join10(cwd, relativePath.split("/").slice(0, -1).join("/")));
23364
22957
  writeFileSync5(fullPath, content);
23365
22958
  created.push(relativePath);
23366
22959
  return true;
23367
22960
  };
23368
- ensureDir2(join11(cwd, "agents"));
23369
- ensureDir2(join11(cwd, "entity-types"));
23370
- ensureDir2(join11(cwd, "roles"));
23371
- ensureDir2(join11(cwd, "tools"));
22961
+ ensureDir2(join10(cwd, "agents"));
22962
+ ensureDir2(join10(cwd, "entity-types"));
22963
+ ensureDir2(join10(cwd, "roles"));
22964
+ ensureDir2(join10(cwd, "tools"));
23372
22965
  const agentSlugs = [];
23373
22966
  for (const agent of state.agents) {
23374
22967
  if (!agent.systemPrompt && agent.tools.length === 0)
@@ -23454,7 +23047,7 @@ var pullCommand = new Command("pull").description("Pull remote resources to loca
23454
23047
  // package.json
23455
23048
  var package_default = {
23456
23049
  name: "struere",
23457
- version: "0.5.3",
23050
+ version: "0.5.4",
23458
23051
  description: "Build, test, and deploy AI agents",
23459
23052
  keywords: [
23460
23053
  "ai",
@@ -23556,7 +23149,6 @@ program.addCommand(whoamiCommand);
23556
23149
  program.addCommand(devCommand);
23557
23150
  program.addCommand(buildCommand);
23558
23151
  program.addCommand(testCommand);
23559
- program.addCommand(evalCommand);
23560
23152
  program.addCommand(deployCommand);
23561
23153
  program.addCommand(validateCommand);
23562
23154
  program.addCommand(logsCommand);
@@ -1 +1 @@
1
- {"version":3,"file":"deploy.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/deploy.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AA0BnC,eAAO,MAAM,aAAa,SA6LtB,CAAA"}
1
+ {"version":3,"file":"deploy.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/deploy.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AA0BnC,eAAO,MAAM,aAAa,SAoMtB,CAAA"}
@@ -1 +1 @@
1
- {"version":3,"file":"dev.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/dev.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAenC,eAAO,MAAM,UAAU,SAySnB,CAAA"}
1
+ {"version":3,"file":"dev.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/dev.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAenC,eAAO,MAAM,UAAU,SA0SnB,CAAA"}