struere 0.5.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/struere.js +65 -473
- package/dist/cli/commands/deploy.d.ts.map +1 -1
- package/dist/cli/commands/dev.d.ts.map +1 -1
- package/dist/cli/index.js +249 -660
- package/dist/cli/utils/convex.d.ts +10 -18
- package/dist/cli/utils/convex.d.ts.map +1 -1
- package/dist/cli/utils/extractor.d.ts +31 -0
- package/dist/cli/utils/extractor.d.ts.map +1 -1
- package/dist/cli/utils/loader.d.ts +7 -1
- package/dist/cli/utils/loader.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/bin/struere.js
CHANGED
|
@@ -19269,32 +19269,6 @@ function getApiKey() {
|
|
|
19269
19269
|
|
|
19270
19270
|
// src/cli/utils/convex.ts
|
|
19271
19271
|
var CONVEX_URL = process.env.STRUERE_CONVEX_URL || "https://rapid-wildebeest-172.convex.cloud";
|
|
19272
|
-
async function listAgents() {
|
|
19273
|
-
const credentials = loadCredentials();
|
|
19274
|
-
const apiKey = getApiKey();
|
|
19275
|
-
const token = apiKey || credentials?.token;
|
|
19276
|
-
if (!token) {
|
|
19277
|
-
return { agents: [], error: "Not authenticated" };
|
|
19278
|
-
}
|
|
19279
|
-
const response = await fetch(`${CONVEX_URL}/api/query`, {
|
|
19280
|
-
method: "POST",
|
|
19281
|
-
headers: {
|
|
19282
|
-
"Content-Type": "application/json",
|
|
19283
|
-
Authorization: `Bearer ${token}`
|
|
19284
|
-
},
|
|
19285
|
-
body: JSON.stringify({
|
|
19286
|
-
path: "agents:list",
|
|
19287
|
-
args: {}
|
|
19288
|
-
})
|
|
19289
|
-
});
|
|
19290
|
-
if (!response.ok) {
|
|
19291
|
-
const error = await response.text();
|
|
19292
|
-
return { agents: [], error };
|
|
19293
|
-
}
|
|
19294
|
-
const result = await response.json();
|
|
19295
|
-
const agents = Array.isArray(result) ? result : result?.value || [];
|
|
19296
|
-
return { agents };
|
|
19297
|
-
}
|
|
19298
19272
|
async function listMyOrganizations(token) {
|
|
19299
19273
|
const response = await fetch(`${CONVEX_URL}/api/query`, {
|
|
19300
19274
|
method: "POST",
|
|
@@ -19556,267 +19530,6 @@ async function getPullState(organizationId, environment = "development", include
|
|
|
19556
19530
|
}
|
|
19557
19531
|
return { error: `Unexpected response: ${JSON.stringify(result)}` };
|
|
19558
19532
|
}
|
|
19559
|
-
async function resolveAgentId(agentSlug) {
|
|
19560
|
-
const { agents } = await listAgents();
|
|
19561
|
-
const agent = agents.find((a) => a.slug === agentSlug || a._id === agentSlug);
|
|
19562
|
-
return agent?._id;
|
|
19563
|
-
}
|
|
19564
|
-
async function syncEvalSuites(suites) {
|
|
19565
|
-
const credentials = loadCredentials();
|
|
19566
|
-
const apiKey = getApiKey();
|
|
19567
|
-
const token = apiKey || credentials?.token;
|
|
19568
|
-
if (!token) {
|
|
19569
|
-
return { error: "Not authenticated" };
|
|
19570
|
-
}
|
|
19571
|
-
const suiteIds = {};
|
|
19572
|
-
for (const suite of suites) {
|
|
19573
|
-
const agentId = await resolveAgentId(suite.agent);
|
|
19574
|
-
if (!agentId) {
|
|
19575
|
-
return { error: `Agent "${suite.agent}" not found. Make sure the agent exists before syncing evals.` };
|
|
19576
|
-
}
|
|
19577
|
-
const listResponse = await fetch(`${CONVEX_URL}/api/query`, {
|
|
19578
|
-
method: "POST",
|
|
19579
|
-
headers: {
|
|
19580
|
-
"Content-Type": "application/json",
|
|
19581
|
-
Authorization: `Bearer ${token}`
|
|
19582
|
-
},
|
|
19583
|
-
body: JSON.stringify({
|
|
19584
|
-
path: "evals:listAllSuites",
|
|
19585
|
-
args: { environment: "development" }
|
|
19586
|
-
})
|
|
19587
|
-
});
|
|
19588
|
-
let existingSuiteId;
|
|
19589
|
-
if (listResponse.ok) {
|
|
19590
|
-
const listResult = await listResponse.json();
|
|
19591
|
-
const existing = (listResult.value || []).find((s) => s.slug === suite.slug);
|
|
19592
|
-
if (existing) {
|
|
19593
|
-
existingSuiteId = existing._id;
|
|
19594
|
-
}
|
|
19595
|
-
}
|
|
19596
|
-
if (!existingSuiteId) {
|
|
19597
|
-
const createResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
|
|
19598
|
-
method: "POST",
|
|
19599
|
-
headers: {
|
|
19600
|
-
"Content-Type": "application/json",
|
|
19601
|
-
Authorization: `Bearer ${token}`
|
|
19602
|
-
},
|
|
19603
|
-
body: JSON.stringify({
|
|
19604
|
-
path: "evals:createSuite",
|
|
19605
|
-
args: {
|
|
19606
|
-
agentId,
|
|
19607
|
-
name: suite.suite,
|
|
19608
|
-
slug: suite.slug,
|
|
19609
|
-
description: suite.description,
|
|
19610
|
-
tags: suite.tags,
|
|
19611
|
-
judgeModel: suite.judgeModel ? { provider: "anthropic", name: suite.judgeModel } : undefined,
|
|
19612
|
-
environment: "development"
|
|
19613
|
-
}
|
|
19614
|
-
})
|
|
19615
|
-
});
|
|
19616
|
-
if (!createResponse.ok) {
|
|
19617
|
-
const error = await createResponse.text();
|
|
19618
|
-
return { error: `Failed to create suite "${suite.suite}": ${error}` };
|
|
19619
|
-
}
|
|
19620
|
-
const createResult = await createResponse.json();
|
|
19621
|
-
existingSuiteId = createResult.value;
|
|
19622
|
-
} else {
|
|
19623
|
-
const updateResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
|
|
19624
|
-
method: "POST",
|
|
19625
|
-
headers: {
|
|
19626
|
-
"Content-Type": "application/json",
|
|
19627
|
-
Authorization: `Bearer ${token}`
|
|
19628
|
-
},
|
|
19629
|
-
body: JSON.stringify({
|
|
19630
|
-
path: "evals:updateSuite",
|
|
19631
|
-
args: {
|
|
19632
|
-
id: existingSuiteId,
|
|
19633
|
-
name: suite.suite,
|
|
19634
|
-
description: suite.description,
|
|
19635
|
-
tags: suite.tags,
|
|
19636
|
-
judgeModel: suite.judgeModel ? { provider: "anthropic", name: suite.judgeModel } : undefined
|
|
19637
|
-
}
|
|
19638
|
-
})
|
|
19639
|
-
});
|
|
19640
|
-
if (!updateResponse.ok) {
|
|
19641
|
-
const error = await updateResponse.text();
|
|
19642
|
-
return { error: `Failed to update suite "${suite.suite}": ${error}` };
|
|
19643
|
-
}
|
|
19644
|
-
const deleteResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
|
|
19645
|
-
method: "POST",
|
|
19646
|
-
headers: {
|
|
19647
|
-
"Content-Type": "application/json",
|
|
19648
|
-
Authorization: `Bearer ${token}`
|
|
19649
|
-
},
|
|
19650
|
-
body: JSON.stringify({
|
|
19651
|
-
path: "evals:deleteCasesBySuite",
|
|
19652
|
-
args: { suiteId: existingSuiteId }
|
|
19653
|
-
})
|
|
19654
|
-
});
|
|
19655
|
-
if (!deleteResponse.ok) {
|
|
19656
|
-
const error = await deleteResponse.text();
|
|
19657
|
-
return { error: `Failed to clear cases for suite "${suite.suite}": ${error}` };
|
|
19658
|
-
}
|
|
19659
|
-
}
|
|
19660
|
-
if (existingSuiteId) {
|
|
19661
|
-
suiteIds[suite.slug] = existingSuiteId;
|
|
19662
|
-
for (let i = 0;i < suite.cases.length; i++) {
|
|
19663
|
-
const c = suite.cases[i];
|
|
19664
|
-
const turns = c.turns.map((t) => ({
|
|
19665
|
-
userMessage: t.user,
|
|
19666
|
-
assertions: t.assertions?.map((a) => ({
|
|
19667
|
-
type: a.type,
|
|
19668
|
-
...a.criteria ? { criteria: a.criteria } : {},
|
|
19669
|
-
...a.value ? { value: a.value } : {},
|
|
19670
|
-
...a.weight ? { weight: a.weight } : {}
|
|
19671
|
-
}))
|
|
19672
|
-
}));
|
|
19673
|
-
const finalAssertions = c.finalAssertions?.map((a) => ({
|
|
19674
|
-
type: a.type,
|
|
19675
|
-
...a.criteria ? { criteria: a.criteria } : {},
|
|
19676
|
-
...a.value ? { value: a.value } : {},
|
|
19677
|
-
...a.weight ? { weight: a.weight } : {}
|
|
19678
|
-
}));
|
|
19679
|
-
const caseResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
|
|
19680
|
-
method: "POST",
|
|
19681
|
-
headers: {
|
|
19682
|
-
"Content-Type": "application/json",
|
|
19683
|
-
Authorization: `Bearer ${token}`
|
|
19684
|
-
},
|
|
19685
|
-
body: JSON.stringify({
|
|
19686
|
-
path: "evals:createCase",
|
|
19687
|
-
args: {
|
|
19688
|
-
suiteId: existingSuiteId,
|
|
19689
|
-
name: c.name,
|
|
19690
|
-
description: c.description,
|
|
19691
|
-
tags: c.tags,
|
|
19692
|
-
turns,
|
|
19693
|
-
finalAssertions
|
|
19694
|
-
}
|
|
19695
|
-
})
|
|
19696
|
-
});
|
|
19697
|
-
if (!caseResponse.ok) {
|
|
19698
|
-
const error = await caseResponse.text();
|
|
19699
|
-
return { error: `Failed to create case "${c.name}": ${error}` };
|
|
19700
|
-
}
|
|
19701
|
-
}
|
|
19702
|
-
}
|
|
19703
|
-
}
|
|
19704
|
-
return { suiteIds };
|
|
19705
|
-
}
|
|
19706
|
-
async function startEvalRun(suiteSlug) {
|
|
19707
|
-
const credentials = loadCredentials();
|
|
19708
|
-
const apiKey = getApiKey();
|
|
19709
|
-
const token = apiKey || credentials?.token;
|
|
19710
|
-
if (!token) {
|
|
19711
|
-
return { error: "Not authenticated" };
|
|
19712
|
-
}
|
|
19713
|
-
const listResponse = await fetch(`${CONVEX_URL}/api/query`, {
|
|
19714
|
-
method: "POST",
|
|
19715
|
-
headers: {
|
|
19716
|
-
"Content-Type": "application/json",
|
|
19717
|
-
Authorization: `Bearer ${token}`
|
|
19718
|
-
},
|
|
19719
|
-
body: JSON.stringify({
|
|
19720
|
-
path: "evals:listAllSuites",
|
|
19721
|
-
args: { environment: "development" }
|
|
19722
|
-
})
|
|
19723
|
-
});
|
|
19724
|
-
if (!listResponse.ok) {
|
|
19725
|
-
return { error: "Failed to list suites" };
|
|
19726
|
-
}
|
|
19727
|
-
const listResult = await listResponse.json();
|
|
19728
|
-
const suite = (listResult.value || []).find((s) => s.slug === suiteSlug);
|
|
19729
|
-
if (!suite) {
|
|
19730
|
-
return { error: `Suite "${suiteSlug}" not found` };
|
|
19731
|
-
}
|
|
19732
|
-
const response = await fetch(`${CONVEX_URL}/api/mutation`, {
|
|
19733
|
-
method: "POST",
|
|
19734
|
-
headers: {
|
|
19735
|
-
"Content-Type": "application/json",
|
|
19736
|
-
Authorization: `Bearer ${token}`
|
|
19737
|
-
},
|
|
19738
|
-
body: JSON.stringify({
|
|
19739
|
-
path: "evals:startRun",
|
|
19740
|
-
args: {
|
|
19741
|
-
suiteId: suite._id,
|
|
19742
|
-
triggerSource: "cli"
|
|
19743
|
-
}
|
|
19744
|
-
})
|
|
19745
|
-
});
|
|
19746
|
-
if (!response.ok) {
|
|
19747
|
-
const error = await response.text();
|
|
19748
|
-
return { error };
|
|
19749
|
-
}
|
|
19750
|
-
const result = await response.json();
|
|
19751
|
-
return { runId: result.value, suiteId: suite._id };
|
|
19752
|
-
}
|
|
19753
|
-
async function pollEvalRun(runId, onProgress) {
|
|
19754
|
-
const maxAttempts = 300;
|
|
19755
|
-
const pollInterval = 2000;
|
|
19756
|
-
for (let i = 0;i < maxAttempts; i++) {
|
|
19757
|
-
const currentCredentials = loadCredentials();
|
|
19758
|
-
const currentApiKey = getApiKey();
|
|
19759
|
-
const token = currentApiKey || currentCredentials?.token;
|
|
19760
|
-
if (!token) {
|
|
19761
|
-
return { error: 'Authentication expired. Please run "struere login" again.' };
|
|
19762
|
-
}
|
|
19763
|
-
const response = await fetch(`${CONVEX_URL}/api/query`, {
|
|
19764
|
-
method: "POST",
|
|
19765
|
-
headers: {
|
|
19766
|
-
"Content-Type": "application/json",
|
|
19767
|
-
Authorization: `Bearer ${token}`
|
|
19768
|
-
},
|
|
19769
|
-
body: JSON.stringify({
|
|
19770
|
-
path: "evals:getRun",
|
|
19771
|
-
args: { id: runId }
|
|
19772
|
-
})
|
|
19773
|
-
});
|
|
19774
|
-
if (!response.ok) {
|
|
19775
|
-
if (response.status === 401) {
|
|
19776
|
-
return { error: 'Authentication expired. Please run "struere login" again.' };
|
|
19777
|
-
}
|
|
19778
|
-
return { error: `Failed to poll run status (HTTP ${response.status})` };
|
|
19779
|
-
}
|
|
19780
|
-
const result = await response.json();
|
|
19781
|
-
const run = result.value;
|
|
19782
|
-
if (!run) {
|
|
19783
|
-
return { error: "Run not found" };
|
|
19784
|
-
}
|
|
19785
|
-
if (onProgress) {
|
|
19786
|
-
onProgress(run);
|
|
19787
|
-
}
|
|
19788
|
-
if (run.status === "completed" || run.status === "failed" || run.status === "cancelled") {
|
|
19789
|
-
return { run };
|
|
19790
|
-
}
|
|
19791
|
-
await new Promise((resolve) => setTimeout(resolve, pollInterval));
|
|
19792
|
-
}
|
|
19793
|
-
return { error: "Run timed out after 10 minutes" };
|
|
19794
|
-
}
|
|
19795
|
-
async function getEvalRunResults(runId) {
|
|
19796
|
-
const credentials = loadCredentials();
|
|
19797
|
-
const apiKey = getApiKey();
|
|
19798
|
-
const token = apiKey || credentials?.token;
|
|
19799
|
-
if (!token) {
|
|
19800
|
-
return { error: "Not authenticated" };
|
|
19801
|
-
}
|
|
19802
|
-
const response = await fetch(`${CONVEX_URL}/api/query`, {
|
|
19803
|
-
method: "POST",
|
|
19804
|
-
headers: {
|
|
19805
|
-
"Content-Type": "application/json",
|
|
19806
|
-
Authorization: `Bearer ${token}`
|
|
19807
|
-
},
|
|
19808
|
-
body: JSON.stringify({
|
|
19809
|
-
path: "evals:getRunResults",
|
|
19810
|
-
args: { runId }
|
|
19811
|
-
})
|
|
19812
|
-
});
|
|
19813
|
-
if (!response.ok) {
|
|
19814
|
-
const error = await response.text();
|
|
19815
|
-
return { error };
|
|
19816
|
-
}
|
|
19817
|
-
const result = await response.json();
|
|
19818
|
-
return { results: result.value || [] };
|
|
19819
|
-
}
|
|
19820
19533
|
|
|
19821
19534
|
// src/cli/commands/login.ts
|
|
19822
19535
|
var AUTH_CALLBACK_PORT = 9876;
|
|
@@ -21248,7 +20961,8 @@ import { join as join5 } from "path";
|
|
|
21248
20961
|
import { existsSync as existsSync5, writeFileSync as writeFileSync4 } from "fs";
|
|
21249
20962
|
|
|
21250
20963
|
// src/cli/utils/loader.ts
|
|
21251
|
-
|
|
20964
|
+
var import_yaml = __toESM(require_dist(), 1);
|
|
20965
|
+
import { existsSync as existsSync4, readdirSync, readFileSync as readFileSync4 } from "fs";
|
|
21252
20966
|
import { join as join4 } from "path";
|
|
21253
20967
|
async function loadAllResources(cwd) {
|
|
21254
20968
|
const errors2 = [];
|
|
@@ -21259,7 +20973,9 @@ async function loadAllResources(cwd) {
|
|
|
21259
20973
|
if (toolsError) {
|
|
21260
20974
|
errors2.push(toolsError);
|
|
21261
20975
|
}
|
|
21262
|
-
|
|
20976
|
+
const { suites: evalSuites, errors: evalErrors } = loadAllEvalSuites(join4(cwd, "evals"));
|
|
20977
|
+
errors2.push(...evalErrors);
|
|
20978
|
+
return { agents, entityTypes, roles, customTools, evalSuites, errors: errors2 };
|
|
21263
20979
|
}
|
|
21264
20980
|
async function loadAllAgents(dir) {
|
|
21265
20981
|
if (!existsSync4(dir)) {
|
|
@@ -21349,12 +21065,31 @@ async function loadFromDirectory(dir) {
|
|
|
21349
21065
|
}
|
|
21350
21066
|
return items;
|
|
21351
21067
|
}
|
|
21068
|
+
function loadAllEvalSuites(dir) {
|
|
21069
|
+
const suites = [];
|
|
21070
|
+
const errors2 = [];
|
|
21071
|
+
if (!existsSync4(dir)) {
|
|
21072
|
+
return { suites, errors: errors2 };
|
|
21073
|
+
}
|
|
21074
|
+
const files = readdirSync(dir).filter((f) => f.endsWith(".eval.yaml") || f.endsWith(".eval.yml"));
|
|
21075
|
+
for (const file of files) {
|
|
21076
|
+
try {
|
|
21077
|
+
const content = readFileSync4(join4(dir, file), "utf-8");
|
|
21078
|
+
const parsed = import_yaml.default.parse(content);
|
|
21079
|
+
suites.push(parsed);
|
|
21080
|
+
} catch (err) {
|
|
21081
|
+
errors2.push(`Failed to parse ${file}: ${err instanceof Error ? err.message : String(err)}`);
|
|
21082
|
+
}
|
|
21083
|
+
}
|
|
21084
|
+
return { suites, errors: errors2 };
|
|
21085
|
+
}
|
|
21352
21086
|
function getResourceDirectories(cwd) {
|
|
21353
21087
|
return {
|
|
21354
21088
|
agents: join4(cwd, "agents"),
|
|
21355
21089
|
entityTypes: join4(cwd, "entity-types"),
|
|
21356
21090
|
roles: join4(cwd, "roles"),
|
|
21357
|
-
tools: join4(cwd, "tools")
|
|
21091
|
+
tools: join4(cwd, "tools"),
|
|
21092
|
+
evals: join4(cwd, "evals")
|
|
21358
21093
|
};
|
|
21359
21094
|
}
|
|
21360
21095
|
|
|
@@ -21407,7 +21142,25 @@ function extractSyncPayload(resources) {
|
|
|
21407
21142
|
maskConfig: fm.maskConfig
|
|
21408
21143
|
}))
|
|
21409
21144
|
}));
|
|
21410
|
-
|
|
21145
|
+
const evalSuites = resources.evalSuites.length > 0 ? resources.evalSuites.map((suite) => ({
|
|
21146
|
+
name: suite.suite,
|
|
21147
|
+
slug: suite.slug,
|
|
21148
|
+
agentSlug: suite.agent,
|
|
21149
|
+
description: suite.description,
|
|
21150
|
+
tags: suite.tags,
|
|
21151
|
+
judgeModel: suite.judgeModel ? { provider: "anthropic", name: suite.judgeModel } : undefined,
|
|
21152
|
+
cases: suite.cases.map((c) => ({
|
|
21153
|
+
name: c.name,
|
|
21154
|
+
description: c.description,
|
|
21155
|
+
tags: c.tags,
|
|
21156
|
+
turns: c.turns.map((t) => ({
|
|
21157
|
+
userMessage: t.user,
|
|
21158
|
+
assertions: t.assertions
|
|
21159
|
+
})),
|
|
21160
|
+
finalAssertions: c.finalAssertions
|
|
21161
|
+
}))
|
|
21162
|
+
})) : undefined;
|
|
21163
|
+
return { agents, entityTypes, roles, evalSuites };
|
|
21411
21164
|
}
|
|
21412
21165
|
function extractAgentPayload(agent, customToolsMap) {
|
|
21413
21166
|
let systemPrompt;
|
|
@@ -21723,7 +21476,7 @@ var devCommand = new Command("dev").description("Sync all resources to developme
|
|
|
21723
21476
|
spinner.start("Loading resources");
|
|
21724
21477
|
try {
|
|
21725
21478
|
const resources = await loadAllResources(cwd);
|
|
21726
|
-
spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools`);
|
|
21479
|
+
spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools, ${resources.evalSuites.length} eval suites`);
|
|
21727
21480
|
for (const err of resources.errors) {
|
|
21728
21481
|
console.log(source_default.red(" \u2716"), err);
|
|
21729
21482
|
}
|
|
@@ -21781,6 +21534,7 @@ var devCommand = new Command("dev").description("Sync all resources to developme
|
|
|
21781
21534
|
dirs.entityTypes,
|
|
21782
21535
|
dirs.roles,
|
|
21783
21536
|
dirs.tools,
|
|
21537
|
+
dirs.evals,
|
|
21784
21538
|
join5(cwd, "struere.config.ts")
|
|
21785
21539
|
].filter((p) => existsSync5(p));
|
|
21786
21540
|
const watcher = import_chokidar.default.watch(watchPaths, {
|
|
@@ -22055,7 +21809,7 @@ var buildCommand = new Command("build").description("Build and validate agent fo
|
|
|
22055
21809
|
});
|
|
22056
21810
|
|
|
22057
21811
|
// src/cli/commands/test.ts
|
|
22058
|
-
var
|
|
21812
|
+
var import_yaml2 = __toESM(require_dist(), 1);
|
|
22059
21813
|
import { join as join9 } from "path";
|
|
22060
21814
|
import { readdir, readFile } from "fs/promises";
|
|
22061
21815
|
var testCommand = new Command("test").description("Run test conversations").argument("[pattern]", "Test file pattern", "*.test.yaml").option("-v, --verbose", "Show detailed output").option("--dry-run", "Parse tests without executing (no API calls)").action(async (pattern, options) => {
|
|
@@ -22107,7 +21861,7 @@ var testCommand = new Command("test").description("Run test conversations").argu
|
|
|
22107
21861
|
for (const file of testFiles) {
|
|
22108
21862
|
const filePath = join9(testsDir, file);
|
|
22109
21863
|
const content = await readFile(filePath, "utf-8");
|
|
22110
|
-
const testCase =
|
|
21864
|
+
const testCase = import_yaml2.default.parse(content);
|
|
22111
21865
|
if (options.verbose) {
|
|
22112
21866
|
console.log();
|
|
22113
21867
|
console.log(source_default.gray("Running:"), testCase.name);
|
|
@@ -22240,174 +21994,6 @@ function formatAssertionError(assertion, context) {
|
|
|
22240
21994
|
}
|
|
22241
21995
|
}
|
|
22242
21996
|
|
|
22243
|
-
// src/cli/commands/eval.ts
|
|
22244
|
-
var import_yaml2 = __toESM(require_dist(), 1);
|
|
22245
|
-
import { join as join10 } from "path";
|
|
22246
|
-
import { readdir as readdir2, readFile as readFile2 } from "fs/promises";
|
|
22247
|
-
var evalCommand = new Command("eval").description("Run agent evaluations with LLM judge support").option("-s, --suite <name>", "Run a specific suite by name").option("-v, --verbose", "Show detailed output including judge reasoning").option("--dry-run", "Parse eval files without executing").option("--json", "Output results as JSON").option("--no-sync", "Skip syncing eval files to Convex").action(async (options) => {
|
|
22248
|
-
const spinner = ora();
|
|
22249
|
-
const cwd = process.cwd();
|
|
22250
|
-
if (!options.json) {
|
|
22251
|
-
console.log();
|
|
22252
|
-
console.log(source_default.bold("Running Evaluations"));
|
|
22253
|
-
console.log();
|
|
22254
|
-
}
|
|
22255
|
-
if (!hasProject(cwd)) {
|
|
22256
|
-
console.log(source_default.yellow("No struere.json found"));
|
|
22257
|
-
console.log();
|
|
22258
|
-
console.log(source_default.gray("Run"), source_default.cyan("struere init"), source_default.gray("to initialize this project"));
|
|
22259
|
-
console.log();
|
|
22260
|
-
process.exit(1);
|
|
22261
|
-
}
|
|
22262
|
-
const project = loadProject(cwd);
|
|
22263
|
-
if (!project) {
|
|
22264
|
-
console.log(source_default.red("Failed to load struere.json"));
|
|
22265
|
-
process.exit(1);
|
|
22266
|
-
}
|
|
22267
|
-
spinner.start("Finding eval files");
|
|
22268
|
-
const evalsDir = join10(cwd, "evals");
|
|
22269
|
-
let evalFiles = [];
|
|
22270
|
-
try {
|
|
22271
|
-
const files = await readdir2(evalsDir);
|
|
22272
|
-
evalFiles = files.filter((f) => f.endsWith(".eval.yaml") || f.endsWith(".eval.yml"));
|
|
22273
|
-
} catch {
|
|
22274
|
-
spinner.warn("No evals directory found");
|
|
22275
|
-
console.log();
|
|
22276
|
-
console.log(source_default.gray("Create eval files in"), source_default.cyan("evals/*.eval.yaml"));
|
|
22277
|
-
console.log();
|
|
22278
|
-
return;
|
|
22279
|
-
}
|
|
22280
|
-
if (evalFiles.length === 0) {
|
|
22281
|
-
spinner.warn("No eval files found");
|
|
22282
|
-
console.log();
|
|
22283
|
-
return;
|
|
22284
|
-
}
|
|
22285
|
-
spinner.succeed(`Found ${evalFiles.length} eval file(s)`);
|
|
22286
|
-
const suites = [];
|
|
22287
|
-
for (const file of evalFiles) {
|
|
22288
|
-
const filePath = join10(evalsDir, file);
|
|
22289
|
-
const content = await readFile2(filePath, "utf-8");
|
|
22290
|
-
const parsed = import_yaml2.default.parse(content);
|
|
22291
|
-
suites.push(parsed);
|
|
22292
|
-
}
|
|
22293
|
-
if (options.suite) {
|
|
22294
|
-
const filtered = suites.filter((s) => s.suite.toLowerCase() === options.suite.toLowerCase() || s.slug === options.suite);
|
|
22295
|
-
if (filtered.length === 0) {
|
|
22296
|
-
console.log(source_default.red(`Suite "${options.suite}" not found`));
|
|
22297
|
-
process.exit(1);
|
|
22298
|
-
}
|
|
22299
|
-
suites.length = 0;
|
|
22300
|
-
suites.push(...filtered);
|
|
22301
|
-
}
|
|
22302
|
-
if (options.dryRun) {
|
|
22303
|
-
console.log();
|
|
22304
|
-
console.log(source_default.yellow("Dry run mode \u2014 parsed successfully"));
|
|
22305
|
-
console.log();
|
|
22306
|
-
for (const suite of suites) {
|
|
22307
|
-
console.log(source_default.cyan(` ${suite.suite}`), source_default.gray(`(${suite.cases.length} cases)`));
|
|
22308
|
-
for (const c of suite.cases) {
|
|
22309
|
-
const assertionCount = (c.turns || []).reduce((sum, t) => sum + (t.assertions?.length || 0), 0) + (c.finalAssertions?.length || 0);
|
|
22310
|
-
console.log(source_default.gray(` - ${c.name}`), source_default.gray(`(${c.turns.length} turns, ${assertionCount} assertions)`));
|
|
22311
|
-
}
|
|
22312
|
-
}
|
|
22313
|
-
console.log();
|
|
22314
|
-
return;
|
|
22315
|
-
}
|
|
22316
|
-
if (options.sync !== false) {
|
|
22317
|
-
spinner.start("Syncing eval suites to Convex");
|
|
22318
|
-
const syncResult = await syncEvalSuites(suites);
|
|
22319
|
-
if (syncResult.error) {
|
|
22320
|
-
spinner.fail(`Sync failed: ${syncResult.error}`);
|
|
22321
|
-
process.exit(1);
|
|
22322
|
-
}
|
|
22323
|
-
spinner.succeed("Eval suites synced");
|
|
22324
|
-
}
|
|
22325
|
-
const allResults = [];
|
|
22326
|
-
for (const suite of suites) {
|
|
22327
|
-
if (!options.json) {
|
|
22328
|
-
console.log();
|
|
22329
|
-
console.log(source_default.bold(` ${suite.suite}`), source_default.gray(`(${suite.cases.length} cases)`));
|
|
22330
|
-
}
|
|
22331
|
-
spinner.start(`Starting run for "${suite.suite}"`);
|
|
22332
|
-
const { runId, suiteId, error: startError } = await startEvalRun(suite.slug);
|
|
22333
|
-
if (startError || !runId) {
|
|
22334
|
-
spinner.fail(`Failed to start: ${startError}`);
|
|
22335
|
-
continue;
|
|
22336
|
-
}
|
|
22337
|
-
spinner.succeed(`Run started`);
|
|
22338
|
-
spinner.start("Executing cases...");
|
|
22339
|
-
const { run, error: pollError } = await pollEvalRun(runId, (status) => {
|
|
22340
|
-
spinner.text = `Executing cases... ${status.completedCases}/${status.totalCases}`;
|
|
22341
|
-
});
|
|
22342
|
-
if (pollError || !run) {
|
|
22343
|
-
spinner.fail(`Run failed: ${pollError}`);
|
|
22344
|
-
continue;
|
|
22345
|
-
}
|
|
22346
|
-
spinner.succeed(`Run completed: ${run.passedCases}/${run.totalCases} passed`);
|
|
22347
|
-
const { results, error: resultsError } = await getEvalRunResults(runId);
|
|
22348
|
-
if (resultsError) {
|
|
22349
|
-
console.log(source_default.red(` Failed to get results: ${resultsError}`));
|
|
22350
|
-
continue;
|
|
22351
|
-
}
|
|
22352
|
-
allResults.push({ suite: suite.suite, run, results: results || [] });
|
|
22353
|
-
if (!options.json) {
|
|
22354
|
-
for (let ri = 0;ri < (results || []).length; ri++) {
|
|
22355
|
-
const result = results[ri];
|
|
22356
|
-
const caseName = ri < suite.cases.length ? suite.cases[ri].name : result.caseId;
|
|
22357
|
-
if (result.overallPassed) {
|
|
22358
|
-
console.log(source_default.green(" \u2713"), caseName, result.overallScore !== undefined ? source_default.gray(`(${result.overallScore.toFixed(1)}/5)`) : "");
|
|
22359
|
-
} else {
|
|
22360
|
-
console.log(source_default.red(" \u2717"), caseName, result.overallScore !== undefined ? source_default.gray(`(${result.overallScore.toFixed(1)}/5)`) : "");
|
|
22361
|
-
if (result.errorMessage) {
|
|
22362
|
-
console.log(source_default.red(" Error:"), result.errorMessage);
|
|
22363
|
-
}
|
|
22364
|
-
if (options.verbose && result.turnResults) {
|
|
22365
|
-
for (const turn of result.turnResults) {
|
|
22366
|
-
if (turn.assertionResults) {
|
|
22367
|
-
for (const ar of turn.assertionResults) {
|
|
22368
|
-
if (!ar.passed) {
|
|
22369
|
-
console.log(source_default.red(` [${ar.type}]`), ar.reason || "");
|
|
22370
|
-
if (ar.criteria) {
|
|
22371
|
-
console.log(source_default.gray(` Criteria: ${ar.criteria}`));
|
|
22372
|
-
}
|
|
22373
|
-
}
|
|
22374
|
-
}
|
|
22375
|
-
}
|
|
22376
|
-
}
|
|
22377
|
-
}
|
|
22378
|
-
}
|
|
22379
|
-
}
|
|
22380
|
-
}
|
|
22381
|
-
}
|
|
22382
|
-
if (options.json) {
|
|
22383
|
-
console.log(JSON.stringify(allResults, null, 2));
|
|
22384
|
-
const anyFailed = allResults.some((r) => r.run.failedCases > 0);
|
|
22385
|
-
process.exit(anyFailed ? 1 : 0);
|
|
22386
|
-
return;
|
|
22387
|
-
}
|
|
22388
|
-
const totalPassed = allResults.reduce((sum, r) => sum + r.run.passedCases, 0);
|
|
22389
|
-
const totalCases = allResults.reduce((sum, r) => sum + r.run.totalCases, 0);
|
|
22390
|
-
const totalFailed = allResults.reduce((sum, r) => sum + r.run.failedCases, 0);
|
|
22391
|
-
console.log();
|
|
22392
|
-
if (totalFailed === 0) {
|
|
22393
|
-
console.log(source_default.green("All evaluations passed!"), source_default.gray(`(${totalPassed}/${totalCases})`));
|
|
22394
|
-
} else {
|
|
22395
|
-
console.log(source_default.red("Evaluations failed:"), source_default.gray(`${totalPassed}/${totalCases} passed`));
|
|
22396
|
-
}
|
|
22397
|
-
const totalTokens = allResults.reduce((sum, r) => {
|
|
22398
|
-
if (r.run.totalTokens)
|
|
22399
|
-
return sum + r.run.totalTokens.agent + r.run.totalTokens.judge;
|
|
22400
|
-
return sum;
|
|
22401
|
-
}, 0);
|
|
22402
|
-
if (totalTokens > 0) {
|
|
22403
|
-
console.log(source_default.gray(` Total tokens used: ${totalTokens.toLocaleString()}`));
|
|
22404
|
-
}
|
|
22405
|
-
console.log();
|
|
22406
|
-
if (totalFailed > 0) {
|
|
22407
|
-
process.exit(1);
|
|
22408
|
-
}
|
|
22409
|
-
});
|
|
22410
|
-
|
|
22411
21997
|
// src/cli/commands/deploy.ts
|
|
22412
21998
|
var isAuthError = (error) => {
|
|
22413
21999
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -22463,7 +22049,7 @@ var deployCommand = new Command("deploy").description("Deploy all resources to p
|
|
|
22463
22049
|
let resources;
|
|
22464
22050
|
try {
|
|
22465
22051
|
resources = await loadAllResources(cwd);
|
|
22466
|
-
spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools`);
|
|
22052
|
+
spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools, ${resources.evalSuites.length} eval suites`);
|
|
22467
22053
|
for (const err of resources.errors) {
|
|
22468
22054
|
console.log(source_default.red(" \u2716"), err);
|
|
22469
22055
|
}
|
|
@@ -22501,6 +22087,13 @@ var deployCommand = new Command("deploy").description("Deploy all resources to p
|
|
|
22501
22087
|
for (const role of resources.roles) {
|
|
22502
22088
|
console.log(source_default.gray(" -"), source_default.cyan(role.name));
|
|
22503
22089
|
}
|
|
22090
|
+
if (resources.evalSuites.length > 0) {
|
|
22091
|
+
console.log();
|
|
22092
|
+
console.log("Eval suites:");
|
|
22093
|
+
for (const suite of resources.evalSuites) {
|
|
22094
|
+
console.log(source_default.gray(" -"), source_default.cyan(suite.suite), source_default.gray(`(${suite.cases.length} cases)`));
|
|
22095
|
+
}
|
|
22096
|
+
}
|
|
22504
22097
|
console.log();
|
|
22505
22098
|
return;
|
|
22506
22099
|
}
|
|
@@ -22937,7 +22530,7 @@ var statusCommand = new Command("status").description("Compare local vs remote s
|
|
|
22937
22530
|
let localResources;
|
|
22938
22531
|
try {
|
|
22939
22532
|
localResources = await loadAllResources(cwd);
|
|
22940
|
-
spinner.succeed(`Loaded ${localResources.agents.length} agents, ${localResources.entityTypes.length} entity types, ${localResources.roles.length} roles, ${localResources.customTools.length} custom tools`);
|
|
22533
|
+
spinner.succeed(`Loaded ${localResources.agents.length} agents, ${localResources.entityTypes.length} entity types, ${localResources.roles.length} roles, ${localResources.customTools.length} custom tools, ${localResources.evalSuites.length} eval suites`);
|
|
22941
22534
|
for (const err of localResources.errors) {
|
|
22942
22535
|
console.log(source_default.red(" \u2716"), err);
|
|
22943
22536
|
}
|
|
@@ -23044,7 +22637,7 @@ var statusCommand = new Command("status").description("Compare local vs remote s
|
|
|
23044
22637
|
|
|
23045
22638
|
// src/cli/commands/pull.ts
|
|
23046
22639
|
import { existsSync as existsSync6, mkdirSync as mkdirSync3, writeFileSync as writeFileSync5 } from "fs";
|
|
23047
|
-
import { join as
|
|
22640
|
+
import { join as join10 } from "path";
|
|
23048
22641
|
|
|
23049
22642
|
// src/cli/utils/generator.ts
|
|
23050
22643
|
var BUILTIN_TOOLS2 = [
|
|
@@ -23351,7 +22944,7 @@ var pullCommand = new Command("pull").description("Pull remote resources to loca
|
|
|
23351
22944
|
}
|
|
23352
22945
|
};
|
|
23353
22946
|
const writeOrSkip = (relativePath, content) => {
|
|
23354
|
-
const fullPath =
|
|
22947
|
+
const fullPath = join10(cwd, relativePath);
|
|
23355
22948
|
if (existsSync6(fullPath) && !options.force) {
|
|
23356
22949
|
skipped.push(relativePath);
|
|
23357
22950
|
return false;
|
|
@@ -23360,15 +22953,15 @@ var pullCommand = new Command("pull").description("Pull remote resources to loca
|
|
|
23360
22953
|
created.push(relativePath);
|
|
23361
22954
|
return true;
|
|
23362
22955
|
}
|
|
23363
|
-
ensureDir2(
|
|
22956
|
+
ensureDir2(join10(cwd, relativePath.split("/").slice(0, -1).join("/")));
|
|
23364
22957
|
writeFileSync5(fullPath, content);
|
|
23365
22958
|
created.push(relativePath);
|
|
23366
22959
|
return true;
|
|
23367
22960
|
};
|
|
23368
|
-
ensureDir2(
|
|
23369
|
-
ensureDir2(
|
|
23370
|
-
ensureDir2(
|
|
23371
|
-
ensureDir2(
|
|
22961
|
+
ensureDir2(join10(cwd, "agents"));
|
|
22962
|
+
ensureDir2(join10(cwd, "entity-types"));
|
|
22963
|
+
ensureDir2(join10(cwd, "roles"));
|
|
22964
|
+
ensureDir2(join10(cwd, "tools"));
|
|
23372
22965
|
const agentSlugs = [];
|
|
23373
22966
|
for (const agent of state.agents) {
|
|
23374
22967
|
if (!agent.systemPrompt && agent.tools.length === 0)
|
|
@@ -23454,7 +23047,7 @@ var pullCommand = new Command("pull").description("Pull remote resources to loca
|
|
|
23454
23047
|
// package.json
|
|
23455
23048
|
var package_default = {
|
|
23456
23049
|
name: "struere",
|
|
23457
|
-
version: "0.5.
|
|
23050
|
+
version: "0.5.4",
|
|
23458
23051
|
description: "Build, test, and deploy AI agents",
|
|
23459
23052
|
keywords: [
|
|
23460
23053
|
"ai",
|
|
@@ -23556,7 +23149,6 @@ program.addCommand(whoamiCommand);
|
|
|
23556
23149
|
program.addCommand(devCommand);
|
|
23557
23150
|
program.addCommand(buildCommand);
|
|
23558
23151
|
program.addCommand(testCommand);
|
|
23559
|
-
program.addCommand(evalCommand);
|
|
23560
23152
|
program.addCommand(deployCommand);
|
|
23561
23153
|
program.addCommand(validateCommand);
|
|
23562
23154
|
program.addCommand(logsCommand);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"deploy.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/deploy.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AA0BnC,eAAO,MAAM,aAAa,
|
|
1
|
+
{"version":3,"file":"deploy.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/deploy.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AA0BnC,eAAO,MAAM,aAAa,SAoMtB,CAAA"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dev.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/dev.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAenC,eAAO,MAAM,UAAU,
|
|
1
|
+
{"version":3,"file":"dev.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/dev.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAenC,eAAO,MAAM,UAAU,SA0SnB,CAAA"}
|