struere 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19269,32 +19269,6 @@ function getApiKey() {
19269
19269
 
19270
19270
  // src/cli/utils/convex.ts
19271
19271
  var CONVEX_URL = process.env.STRUERE_CONVEX_URL || "https://rapid-wildebeest-172.convex.cloud";
19272
- async function listAgents() {
19273
- const credentials = loadCredentials();
19274
- const apiKey = getApiKey();
19275
- const token = apiKey || credentials?.token;
19276
- if (!token) {
19277
- return { agents: [], error: "Not authenticated" };
19278
- }
19279
- const response = await fetch(`${CONVEX_URL}/api/query`, {
19280
- method: "POST",
19281
- headers: {
19282
- "Content-Type": "application/json",
19283
- Authorization: `Bearer ${token}`
19284
- },
19285
- body: JSON.stringify({
19286
- path: "agents:list",
19287
- args: {}
19288
- })
19289
- });
19290
- if (!response.ok) {
19291
- const error = await response.text();
19292
- return { agents: [], error };
19293
- }
19294
- const result = await response.json();
19295
- const agents = Array.isArray(result) ? result : result?.value || [];
19296
- return { agents };
19297
- }
19298
19272
  async function listMyOrganizations(token) {
19299
19273
  const response = await fetch(`${CONVEX_URL}/api/query`, {
19300
19274
  method: "POST",
@@ -19481,18 +19455,24 @@ async function syncOrganization(payload) {
19481
19455
  }
19482
19456
  return { success: false, error: `Network error: ${err instanceof Error ? err.message : String(err)}` };
19483
19457
  }
19458
+ const text = await response.text();
19459
+ let json;
19460
+ try {
19461
+ json = JSON.parse(text);
19462
+ } catch {
19463
+ return { success: false, error: text || `HTTP ${response.status}` };
19464
+ }
19484
19465
  if (!response.ok) {
19485
- const error = await response.text();
19486
- return { success: false, error };
19466
+ const msg = json.errorMessage || json.message || text;
19467
+ return { success: false, error: msg };
19487
19468
  }
19488
- const json = await response.json();
19489
19469
  if (json.status === "success" && json.value) {
19490
19470
  return json.value;
19491
19471
  }
19492
19472
  if (json.status === "error") {
19493
19473
  return { success: false, error: json.errorMessage || "Unknown error from Convex" };
19494
19474
  }
19495
- return { success: false, error: `Unexpected response: ${JSON.stringify(json)}` };
19475
+ return { success: false, error: `Unexpected response: ${text}` };
19496
19476
  }
19497
19477
  async function getSyncState(organizationId, environment) {
19498
19478
  const credentials = loadCredentials();
@@ -19556,267 +19536,6 @@ async function getPullState(organizationId, environment = "development", include
19556
19536
  }
19557
19537
  return { error: `Unexpected response: ${JSON.stringify(result)}` };
19558
19538
  }
19559
- async function resolveAgentId(agentSlug) {
19560
- const { agents } = await listAgents();
19561
- const agent = agents.find((a) => a.slug === agentSlug || a._id === agentSlug);
19562
- return agent?._id;
19563
- }
19564
- async function syncEvalSuites(suites) {
19565
- const credentials = loadCredentials();
19566
- const apiKey = getApiKey();
19567
- const token = apiKey || credentials?.token;
19568
- if (!token) {
19569
- return { error: "Not authenticated" };
19570
- }
19571
- const suiteIds = {};
19572
- for (const suite of suites) {
19573
- const agentId = await resolveAgentId(suite.agent);
19574
- if (!agentId) {
19575
- return { error: `Agent "${suite.agent}" not found. Make sure the agent exists before syncing evals.` };
19576
- }
19577
- const listResponse = await fetch(`${CONVEX_URL}/api/query`, {
19578
- method: "POST",
19579
- headers: {
19580
- "Content-Type": "application/json",
19581
- Authorization: `Bearer ${token}`
19582
- },
19583
- body: JSON.stringify({
19584
- path: "evals:listAllSuites",
19585
- args: { environment: "development" }
19586
- })
19587
- });
19588
- let existingSuiteId;
19589
- if (listResponse.ok) {
19590
- const listResult = await listResponse.json();
19591
- const existing = (listResult.value || []).find((s) => s.slug === suite.slug);
19592
- if (existing) {
19593
- existingSuiteId = existing._id;
19594
- }
19595
- }
19596
- if (!existingSuiteId) {
19597
- const createResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
19598
- method: "POST",
19599
- headers: {
19600
- "Content-Type": "application/json",
19601
- Authorization: `Bearer ${token}`
19602
- },
19603
- body: JSON.stringify({
19604
- path: "evals:createSuite",
19605
- args: {
19606
- agentId,
19607
- name: suite.suite,
19608
- slug: suite.slug,
19609
- description: suite.description,
19610
- tags: suite.tags,
19611
- judgeModel: suite.judgeModel ? { provider: "anthropic", name: suite.judgeModel } : undefined,
19612
- environment: "development"
19613
- }
19614
- })
19615
- });
19616
- if (!createResponse.ok) {
19617
- const error = await createResponse.text();
19618
- return { error: `Failed to create suite "${suite.suite}": ${error}` };
19619
- }
19620
- const createResult = await createResponse.json();
19621
- existingSuiteId = createResult.value;
19622
- } else {
19623
- const updateResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
19624
- method: "POST",
19625
- headers: {
19626
- "Content-Type": "application/json",
19627
- Authorization: `Bearer ${token}`
19628
- },
19629
- body: JSON.stringify({
19630
- path: "evals:updateSuite",
19631
- args: {
19632
- id: existingSuiteId,
19633
- name: suite.suite,
19634
- description: suite.description,
19635
- tags: suite.tags,
19636
- judgeModel: suite.judgeModel ? { provider: "anthropic", name: suite.judgeModel } : undefined
19637
- }
19638
- })
19639
- });
19640
- if (!updateResponse.ok) {
19641
- const error = await updateResponse.text();
19642
- return { error: `Failed to update suite "${suite.suite}": ${error}` };
19643
- }
19644
- const deleteResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
19645
- method: "POST",
19646
- headers: {
19647
- "Content-Type": "application/json",
19648
- Authorization: `Bearer ${token}`
19649
- },
19650
- body: JSON.stringify({
19651
- path: "evals:deleteCasesBySuite",
19652
- args: { suiteId: existingSuiteId }
19653
- })
19654
- });
19655
- if (!deleteResponse.ok) {
19656
- const error = await deleteResponse.text();
19657
- return { error: `Failed to clear cases for suite "${suite.suite}": ${error}` };
19658
- }
19659
- }
19660
- if (existingSuiteId) {
19661
- suiteIds[suite.slug] = existingSuiteId;
19662
- for (let i = 0;i < suite.cases.length; i++) {
19663
- const c = suite.cases[i];
19664
- const turns = c.turns.map((t) => ({
19665
- userMessage: t.user,
19666
- assertions: t.assertions?.map((a) => ({
19667
- type: a.type,
19668
- ...a.criteria ? { criteria: a.criteria } : {},
19669
- ...a.value ? { value: a.value } : {},
19670
- ...a.weight ? { weight: a.weight } : {}
19671
- }))
19672
- }));
19673
- const finalAssertions = c.finalAssertions?.map((a) => ({
19674
- type: a.type,
19675
- ...a.criteria ? { criteria: a.criteria } : {},
19676
- ...a.value ? { value: a.value } : {},
19677
- ...a.weight ? { weight: a.weight } : {}
19678
- }));
19679
- const caseResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
19680
- method: "POST",
19681
- headers: {
19682
- "Content-Type": "application/json",
19683
- Authorization: `Bearer ${token}`
19684
- },
19685
- body: JSON.stringify({
19686
- path: "evals:createCase",
19687
- args: {
19688
- suiteId: existingSuiteId,
19689
- name: c.name,
19690
- description: c.description,
19691
- tags: c.tags,
19692
- turns,
19693
- finalAssertions
19694
- }
19695
- })
19696
- });
19697
- if (!caseResponse.ok) {
19698
- const error = await caseResponse.text();
19699
- return { error: `Failed to create case "${c.name}": ${error}` };
19700
- }
19701
- }
19702
- }
19703
- }
19704
- return { suiteIds };
19705
- }
19706
- async function startEvalRun(suiteSlug) {
19707
- const credentials = loadCredentials();
19708
- const apiKey = getApiKey();
19709
- const token = apiKey || credentials?.token;
19710
- if (!token) {
19711
- return { error: "Not authenticated" };
19712
- }
19713
- const listResponse = await fetch(`${CONVEX_URL}/api/query`, {
19714
- method: "POST",
19715
- headers: {
19716
- "Content-Type": "application/json",
19717
- Authorization: `Bearer ${token}`
19718
- },
19719
- body: JSON.stringify({
19720
- path: "evals:listAllSuites",
19721
- args: { environment: "development" }
19722
- })
19723
- });
19724
- if (!listResponse.ok) {
19725
- return { error: "Failed to list suites" };
19726
- }
19727
- const listResult = await listResponse.json();
19728
- const suite = (listResult.value || []).find((s) => s.slug === suiteSlug);
19729
- if (!suite) {
19730
- return { error: `Suite "${suiteSlug}" not found` };
19731
- }
19732
- const response = await fetch(`${CONVEX_URL}/api/mutation`, {
19733
- method: "POST",
19734
- headers: {
19735
- "Content-Type": "application/json",
19736
- Authorization: `Bearer ${token}`
19737
- },
19738
- body: JSON.stringify({
19739
- path: "evals:startRun",
19740
- args: {
19741
- suiteId: suite._id,
19742
- triggerSource: "cli"
19743
- }
19744
- })
19745
- });
19746
- if (!response.ok) {
19747
- const error = await response.text();
19748
- return { error };
19749
- }
19750
- const result = await response.json();
19751
- return { runId: result.value, suiteId: suite._id };
19752
- }
19753
- async function pollEvalRun(runId, onProgress) {
19754
- const maxAttempts = 300;
19755
- const pollInterval = 2000;
19756
- for (let i = 0;i < maxAttempts; i++) {
19757
- const currentCredentials = loadCredentials();
19758
- const currentApiKey = getApiKey();
19759
- const token = currentApiKey || currentCredentials?.token;
19760
- if (!token) {
19761
- return { error: 'Authentication expired. Please run "struere login" again.' };
19762
- }
19763
- const response = await fetch(`${CONVEX_URL}/api/query`, {
19764
- method: "POST",
19765
- headers: {
19766
- "Content-Type": "application/json",
19767
- Authorization: `Bearer ${token}`
19768
- },
19769
- body: JSON.stringify({
19770
- path: "evals:getRun",
19771
- args: { id: runId }
19772
- })
19773
- });
19774
- if (!response.ok) {
19775
- if (response.status === 401) {
19776
- return { error: 'Authentication expired. Please run "struere login" again.' };
19777
- }
19778
- return { error: `Failed to poll run status (HTTP ${response.status})` };
19779
- }
19780
- const result = await response.json();
19781
- const run = result.value;
19782
- if (!run) {
19783
- return { error: "Run not found" };
19784
- }
19785
- if (onProgress) {
19786
- onProgress(run);
19787
- }
19788
- if (run.status === "completed" || run.status === "failed" || run.status === "cancelled") {
19789
- return { run };
19790
- }
19791
- await new Promise((resolve) => setTimeout(resolve, pollInterval));
19792
- }
19793
- return { error: "Run timed out after 10 minutes" };
19794
- }
19795
- async function getEvalRunResults(runId) {
19796
- const credentials = loadCredentials();
19797
- const apiKey = getApiKey();
19798
- const token = apiKey || credentials?.token;
19799
- if (!token) {
19800
- return { error: "Not authenticated" };
19801
- }
19802
- const response = await fetch(`${CONVEX_URL}/api/query`, {
19803
- method: "POST",
19804
- headers: {
19805
- "Content-Type": "application/json",
19806
- Authorization: `Bearer ${token}`
19807
- },
19808
- body: JSON.stringify({
19809
- path: "evals:getRunResults",
19810
- args: { runId }
19811
- })
19812
- });
19813
- if (!response.ok) {
19814
- const error = await response.text();
19815
- return { error };
19816
- }
19817
- const result = await response.json();
19818
- return { results: result.value || [] };
19819
- }
19820
19539
 
19821
19540
  // src/cli/commands/login.ts
19822
19541
  var AUTH_CALLBACK_PORT = 9876;
@@ -21248,7 +20967,8 @@ import { join as join5 } from "path";
21248
20967
  import { existsSync as existsSync5, writeFileSync as writeFileSync4 } from "fs";
21249
20968
 
21250
20969
  // src/cli/utils/loader.ts
21251
- import { existsSync as existsSync4, readdirSync } from "fs";
20970
+ var import_yaml = __toESM(require_dist(), 1);
20971
+ import { existsSync as existsSync4, readdirSync, readFileSync as readFileSync4 } from "fs";
21252
20972
  import { join as join4 } from "path";
21253
20973
  async function loadAllResources(cwd) {
21254
20974
  const errors2 = [];
@@ -21259,7 +20979,9 @@ async function loadAllResources(cwd) {
21259
20979
  if (toolsError) {
21260
20980
  errors2.push(toolsError);
21261
20981
  }
21262
- return { agents, entityTypes, roles, customTools, errors: errors2 };
20982
+ const { suites: evalSuites, errors: evalErrors } = loadAllEvalSuites(join4(cwd, "evals"));
20983
+ errors2.push(...evalErrors);
20984
+ return { agents, entityTypes, roles, customTools, evalSuites, errors: errors2 };
21263
20985
  }
21264
20986
  async function loadAllAgents(dir) {
21265
20987
  if (!existsSync4(dir)) {
@@ -21349,16 +21071,42 @@ async function loadFromDirectory(dir) {
21349
21071
  }
21350
21072
  return items;
21351
21073
  }
21074
+ function loadAllEvalSuites(dir) {
21075
+ const suites = [];
21076
+ const errors2 = [];
21077
+ if (!existsSync4(dir)) {
21078
+ return { suites, errors: errors2 };
21079
+ }
21080
+ const files = readdirSync(dir).filter((f) => f.endsWith(".eval.yaml") || f.endsWith(".eval.yml"));
21081
+ for (const file of files) {
21082
+ try {
21083
+ const content = readFileSync4(join4(dir, file), "utf-8");
21084
+ const parsed = import_yaml.default.parse(content);
21085
+ suites.push(parsed);
21086
+ } catch (err) {
21087
+ errors2.push(`Failed to parse ${file}: ${err instanceof Error ? err.message : String(err)}`);
21088
+ }
21089
+ }
21090
+ return { suites, errors: errors2 };
21091
+ }
21352
21092
  function getResourceDirectories(cwd) {
21353
21093
  return {
21354
21094
  agents: join4(cwd, "agents"),
21355
21095
  entityTypes: join4(cwd, "entity-types"),
21356
21096
  roles: join4(cwd, "roles"),
21357
- tools: join4(cwd, "tools")
21097
+ tools: join4(cwd, "tools"),
21098
+ evals: join4(cwd, "evals")
21358
21099
  };
21359
21100
  }
21360
21101
 
21361
21102
  // src/cli/utils/extractor.ts
21103
+ function inferProvider(modelName) {
21104
+ if (modelName.startsWith("gpt-") || modelName.startsWith("o1") || modelName.startsWith("o3") || modelName.startsWith("o4"))
21105
+ return "openai";
21106
+ if (modelName.startsWith("gemini"))
21107
+ return "google";
21108
+ return "anthropic";
21109
+ }
21362
21110
  var BUILTIN_TOOLS = [
21363
21111
  "entity.create",
21364
21112
  "entity.get",
@@ -21407,7 +21155,25 @@ function extractSyncPayload(resources) {
21407
21155
  maskConfig: fm.maskConfig
21408
21156
  }))
21409
21157
  }));
21410
- return { agents, entityTypes, roles };
21158
+ const evalSuites = resources.evalSuites.length > 0 ? resources.evalSuites.map((suite) => ({
21159
+ name: suite.suite,
21160
+ slug: suite.slug,
21161
+ agentSlug: suite.agent,
21162
+ description: suite.description,
21163
+ tags: suite.tags,
21164
+ judgeModel: suite.judgeModel ? { provider: inferProvider(suite.judgeModel), name: suite.judgeModel } : undefined,
21165
+ cases: suite.cases.map((c) => ({
21166
+ name: c.name,
21167
+ description: c.description,
21168
+ tags: c.tags,
21169
+ turns: c.turns.map((t) => ({
21170
+ userMessage: t.user,
21171
+ assertions: t.assertions
21172
+ })),
21173
+ finalAssertions: c.finalAssertions
21174
+ }))
21175
+ })) : undefined;
21176
+ return { agents, entityTypes, roles, evalSuites };
21411
21177
  }
21412
21178
  function extractAgentPayload(agent, customToolsMap) {
21413
21179
  let systemPrompt;
@@ -21723,7 +21489,7 @@ var devCommand = new Command("dev").description("Sync all resources to developme
21723
21489
  spinner.start("Loading resources");
21724
21490
  try {
21725
21491
  const resources = await loadAllResources(cwd);
21726
- spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools`);
21492
+ spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools, ${resources.evalSuites.length} eval suites`);
21727
21493
  for (const err of resources.errors) {
21728
21494
  console.log(source_default.red(" \u2716"), err);
21729
21495
  }
@@ -21781,6 +21547,7 @@ var devCommand = new Command("dev").description("Sync all resources to developme
21781
21547
  dirs.entityTypes,
21782
21548
  dirs.roles,
21783
21549
  dirs.tools,
21550
+ dirs.evals,
21784
21551
  join5(cwd, "struere.config.ts")
21785
21552
  ].filter((p) => existsSync5(p));
21786
21553
  const watcher = import_chokidar.default.watch(watchPaths, {
@@ -22055,7 +21822,7 @@ var buildCommand = new Command("build").description("Build and validate agent fo
22055
21822
  });
22056
21823
 
22057
21824
  // src/cli/commands/test.ts
22058
- var import_yaml = __toESM(require_dist(), 1);
21825
+ var import_yaml2 = __toESM(require_dist(), 1);
22059
21826
  import { join as join9 } from "path";
22060
21827
  import { readdir, readFile } from "fs/promises";
22061
21828
  var testCommand = new Command("test").description("Run test conversations").argument("[pattern]", "Test file pattern", "*.test.yaml").option("-v, --verbose", "Show detailed output").option("--dry-run", "Parse tests without executing (no API calls)").action(async (pattern, options) => {
@@ -22107,7 +21874,7 @@ var testCommand = new Command("test").description("Run test conversations").argu
22107
21874
  for (const file of testFiles) {
22108
21875
  const filePath = join9(testsDir, file);
22109
21876
  const content = await readFile(filePath, "utf-8");
22110
- const testCase = import_yaml.default.parse(content);
21877
+ const testCase = import_yaml2.default.parse(content);
22111
21878
  if (options.verbose) {
22112
21879
  console.log();
22113
21880
  console.log(source_default.gray("Running:"), testCase.name);
@@ -22240,174 +22007,6 @@ function formatAssertionError(assertion, context) {
22240
22007
  }
22241
22008
  }
22242
22009
 
22243
- // src/cli/commands/eval.ts
22244
- var import_yaml2 = __toESM(require_dist(), 1);
22245
- import { join as join10 } from "path";
22246
- import { readdir as readdir2, readFile as readFile2 } from "fs/promises";
22247
- var evalCommand = new Command("eval").description("Run agent evaluations with LLM judge support").option("-s, --suite <name>", "Run a specific suite by name").option("-v, --verbose", "Show detailed output including judge reasoning").option("--dry-run", "Parse eval files without executing").option("--json", "Output results as JSON").option("--no-sync", "Skip syncing eval files to Convex").action(async (options) => {
22248
- const spinner = ora();
22249
- const cwd = process.cwd();
22250
- if (!options.json) {
22251
- console.log();
22252
- console.log(source_default.bold("Running Evaluations"));
22253
- console.log();
22254
- }
22255
- if (!hasProject(cwd)) {
22256
- console.log(source_default.yellow("No struere.json found"));
22257
- console.log();
22258
- console.log(source_default.gray("Run"), source_default.cyan("struere init"), source_default.gray("to initialize this project"));
22259
- console.log();
22260
- process.exit(1);
22261
- }
22262
- const project = loadProject(cwd);
22263
- if (!project) {
22264
- console.log(source_default.red("Failed to load struere.json"));
22265
- process.exit(1);
22266
- }
22267
- spinner.start("Finding eval files");
22268
- const evalsDir = join10(cwd, "evals");
22269
- let evalFiles = [];
22270
- try {
22271
- const files = await readdir2(evalsDir);
22272
- evalFiles = files.filter((f) => f.endsWith(".eval.yaml") || f.endsWith(".eval.yml"));
22273
- } catch {
22274
- spinner.warn("No evals directory found");
22275
- console.log();
22276
- console.log(source_default.gray("Create eval files in"), source_default.cyan("evals/*.eval.yaml"));
22277
- console.log();
22278
- return;
22279
- }
22280
- if (evalFiles.length === 0) {
22281
- spinner.warn("No eval files found");
22282
- console.log();
22283
- return;
22284
- }
22285
- spinner.succeed(`Found ${evalFiles.length} eval file(s)`);
22286
- const suites = [];
22287
- for (const file of evalFiles) {
22288
- const filePath = join10(evalsDir, file);
22289
- const content = await readFile2(filePath, "utf-8");
22290
- const parsed = import_yaml2.default.parse(content);
22291
- suites.push(parsed);
22292
- }
22293
- if (options.suite) {
22294
- const filtered = suites.filter((s) => s.suite.toLowerCase() === options.suite.toLowerCase() || s.slug === options.suite);
22295
- if (filtered.length === 0) {
22296
- console.log(source_default.red(`Suite "${options.suite}" not found`));
22297
- process.exit(1);
22298
- }
22299
- suites.length = 0;
22300
- suites.push(...filtered);
22301
- }
22302
- if (options.dryRun) {
22303
- console.log();
22304
- console.log(source_default.yellow("Dry run mode \u2014 parsed successfully"));
22305
- console.log();
22306
- for (const suite of suites) {
22307
- console.log(source_default.cyan(` ${suite.suite}`), source_default.gray(`(${suite.cases.length} cases)`));
22308
- for (const c of suite.cases) {
22309
- const assertionCount = (c.turns || []).reduce((sum, t) => sum + (t.assertions?.length || 0), 0) + (c.finalAssertions?.length || 0);
22310
- console.log(source_default.gray(` - ${c.name}`), source_default.gray(`(${c.turns.length} turns, ${assertionCount} assertions)`));
22311
- }
22312
- }
22313
- console.log();
22314
- return;
22315
- }
22316
- if (options.sync !== false) {
22317
- spinner.start("Syncing eval suites to Convex");
22318
- const syncResult = await syncEvalSuites(suites);
22319
- if (syncResult.error) {
22320
- spinner.fail(`Sync failed: ${syncResult.error}`);
22321
- process.exit(1);
22322
- }
22323
- spinner.succeed("Eval suites synced");
22324
- }
22325
- const allResults = [];
22326
- for (const suite of suites) {
22327
- if (!options.json) {
22328
- console.log();
22329
- console.log(source_default.bold(` ${suite.suite}`), source_default.gray(`(${suite.cases.length} cases)`));
22330
- }
22331
- spinner.start(`Starting run for "${suite.suite}"`);
22332
- const { runId, suiteId, error: startError } = await startEvalRun(suite.slug);
22333
- if (startError || !runId) {
22334
- spinner.fail(`Failed to start: ${startError}`);
22335
- continue;
22336
- }
22337
- spinner.succeed(`Run started`);
22338
- spinner.start("Executing cases...");
22339
- const { run, error: pollError } = await pollEvalRun(runId, (status) => {
22340
- spinner.text = `Executing cases... ${status.completedCases}/${status.totalCases}`;
22341
- });
22342
- if (pollError || !run) {
22343
- spinner.fail(`Run failed: ${pollError}`);
22344
- continue;
22345
- }
22346
- spinner.succeed(`Run completed: ${run.passedCases}/${run.totalCases} passed`);
22347
- const { results, error: resultsError } = await getEvalRunResults(runId);
22348
- if (resultsError) {
22349
- console.log(source_default.red(` Failed to get results: ${resultsError}`));
22350
- continue;
22351
- }
22352
- allResults.push({ suite: suite.suite, run, results: results || [] });
22353
- if (!options.json) {
22354
- for (let ri = 0;ri < (results || []).length; ri++) {
22355
- const result = results[ri];
22356
- const caseName = ri < suite.cases.length ? suite.cases[ri].name : result.caseId;
22357
- if (result.overallPassed) {
22358
- console.log(source_default.green(" \u2713"), caseName, result.overallScore !== undefined ? source_default.gray(`(${result.overallScore.toFixed(1)}/5)`) : "");
22359
- } else {
22360
- console.log(source_default.red(" \u2717"), caseName, result.overallScore !== undefined ? source_default.gray(`(${result.overallScore.toFixed(1)}/5)`) : "");
22361
- if (result.errorMessage) {
22362
- console.log(source_default.red(" Error:"), result.errorMessage);
22363
- }
22364
- if (options.verbose && result.turnResults) {
22365
- for (const turn of result.turnResults) {
22366
- if (turn.assertionResults) {
22367
- for (const ar of turn.assertionResults) {
22368
- if (!ar.passed) {
22369
- console.log(source_default.red(` [${ar.type}]`), ar.reason || "");
22370
- if (ar.criteria) {
22371
- console.log(source_default.gray(` Criteria: ${ar.criteria}`));
22372
- }
22373
- }
22374
- }
22375
- }
22376
- }
22377
- }
22378
- }
22379
- }
22380
- }
22381
- }
22382
- if (options.json) {
22383
- console.log(JSON.stringify(allResults, null, 2));
22384
- const anyFailed = allResults.some((r) => r.run.failedCases > 0);
22385
- process.exit(anyFailed ? 1 : 0);
22386
- return;
22387
- }
22388
- const totalPassed = allResults.reduce((sum, r) => sum + r.run.passedCases, 0);
22389
- const totalCases = allResults.reduce((sum, r) => sum + r.run.totalCases, 0);
22390
- const totalFailed = allResults.reduce((sum, r) => sum + r.run.failedCases, 0);
22391
- console.log();
22392
- if (totalFailed === 0) {
22393
- console.log(source_default.green("All evaluations passed!"), source_default.gray(`(${totalPassed}/${totalCases})`));
22394
- } else {
22395
- console.log(source_default.red("Evaluations failed:"), source_default.gray(`${totalPassed}/${totalCases} passed`));
22396
- }
22397
- const totalTokens = allResults.reduce((sum, r) => {
22398
- if (r.run.totalTokens)
22399
- return sum + r.run.totalTokens.agent + r.run.totalTokens.judge;
22400
- return sum;
22401
- }, 0);
22402
- if (totalTokens > 0) {
22403
- console.log(source_default.gray(` Total tokens used: ${totalTokens.toLocaleString()}`));
22404
- }
22405
- console.log();
22406
- if (totalFailed > 0) {
22407
- process.exit(1);
22408
- }
22409
- });
22410
-
22411
22010
  // src/cli/commands/deploy.ts
22412
22011
  var isAuthError = (error) => {
22413
22012
  const message = error instanceof Error ? error.message : String(error);
@@ -22463,7 +22062,7 @@ var deployCommand = new Command("deploy").description("Deploy all resources to p
22463
22062
  let resources;
22464
22063
  try {
22465
22064
  resources = await loadAllResources(cwd);
22466
- spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools`);
22065
+ spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools, ${resources.evalSuites.length} eval suites`);
22467
22066
  for (const err of resources.errors) {
22468
22067
  console.log(source_default.red(" \u2716"), err);
22469
22068
  }
@@ -22501,6 +22100,13 @@ var deployCommand = new Command("deploy").description("Deploy all resources to p
22501
22100
  for (const role of resources.roles) {
22502
22101
  console.log(source_default.gray(" -"), source_default.cyan(role.name));
22503
22102
  }
22103
+ if (resources.evalSuites.length > 0) {
22104
+ console.log();
22105
+ console.log("Eval suites:");
22106
+ for (const suite of resources.evalSuites) {
22107
+ console.log(source_default.gray(" -"), source_default.cyan(suite.suite), source_default.gray(`(${suite.cases.length} cases)`));
22108
+ }
22109
+ }
22504
22110
  console.log();
22505
22111
  return;
22506
22112
  }
@@ -22937,7 +22543,7 @@ var statusCommand = new Command("status").description("Compare local vs remote s
22937
22543
  let localResources;
22938
22544
  try {
22939
22545
  localResources = await loadAllResources(cwd);
22940
- spinner.succeed(`Loaded ${localResources.agents.length} agents, ${localResources.entityTypes.length} entity types, ${localResources.roles.length} roles, ${localResources.customTools.length} custom tools`);
22546
+ spinner.succeed(`Loaded ${localResources.agents.length} agents, ${localResources.entityTypes.length} entity types, ${localResources.roles.length} roles, ${localResources.customTools.length} custom tools, ${localResources.evalSuites.length} eval suites`);
22941
22547
  for (const err of localResources.errors) {
22942
22548
  console.log(source_default.red(" \u2716"), err);
22943
22549
  }
@@ -23044,7 +22650,7 @@ var statusCommand = new Command("status").description("Compare local vs remote s
23044
22650
 
23045
22651
  // src/cli/commands/pull.ts
23046
22652
  import { existsSync as existsSync6, mkdirSync as mkdirSync3, writeFileSync as writeFileSync5 } from "fs";
23047
- import { join as join11 } from "path";
22653
+ import { join as join10 } from "path";
23048
22654
 
23049
22655
  // src/cli/utils/generator.ts
23050
22656
  var BUILTIN_TOOLS2 = [
@@ -23351,7 +22957,7 @@ var pullCommand = new Command("pull").description("Pull remote resources to loca
23351
22957
  }
23352
22958
  };
23353
22959
  const writeOrSkip = (relativePath, content) => {
23354
- const fullPath = join11(cwd, relativePath);
22960
+ const fullPath = join10(cwd, relativePath);
23355
22961
  if (existsSync6(fullPath) && !options.force) {
23356
22962
  skipped.push(relativePath);
23357
22963
  return false;
@@ -23360,15 +22966,15 @@ var pullCommand = new Command("pull").description("Pull remote resources to loca
23360
22966
  created.push(relativePath);
23361
22967
  return true;
23362
22968
  }
23363
- ensureDir2(join11(cwd, relativePath.split("/").slice(0, -1).join("/")));
22969
+ ensureDir2(join10(cwd, relativePath.split("/").slice(0, -1).join("/")));
23364
22970
  writeFileSync5(fullPath, content);
23365
22971
  created.push(relativePath);
23366
22972
  return true;
23367
22973
  };
23368
- ensureDir2(join11(cwd, "agents"));
23369
- ensureDir2(join11(cwd, "entity-types"));
23370
- ensureDir2(join11(cwd, "roles"));
23371
- ensureDir2(join11(cwd, "tools"));
22974
+ ensureDir2(join10(cwd, "agents"));
22975
+ ensureDir2(join10(cwd, "entity-types"));
22976
+ ensureDir2(join10(cwd, "roles"));
22977
+ ensureDir2(join10(cwd, "tools"));
23372
22978
  const agentSlugs = [];
23373
22979
  for (const agent of state.agents) {
23374
22980
  if (!agent.systemPrompt && agent.tools.length === 0)
@@ -23454,7 +23060,7 @@ var pullCommand = new Command("pull").description("Pull remote resources to loca
23454
23060
  // package.json
23455
23061
  var package_default = {
23456
23062
  name: "struere",
23457
- version: "0.5.3",
23063
+ version: "0.5.5",
23458
23064
  description: "Build, test, and deploy AI agents",
23459
23065
  keywords: [
23460
23066
  "ai",
@@ -23556,7 +23162,6 @@ program.addCommand(whoamiCommand);
23556
23162
  program.addCommand(devCommand);
23557
23163
  program.addCommand(buildCommand);
23558
23164
  program.addCommand(testCommand);
23559
- program.addCommand(evalCommand);
23560
23165
  program.addCommand(deployCommand);
23561
23166
  program.addCommand(validateCommand);
23562
23167
  program.addCommand(logsCommand);