struere 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/struere.js +82 -477
- package/dist/cli/commands/deploy.d.ts.map +1 -1
- package/dist/cli/commands/dev.d.ts.map +1 -1
- package/dist/cli/index.js +266 -664
- package/dist/cli/utils/convex.d.ts +42 -18
- package/dist/cli/utils/convex.d.ts.map +1 -1
- package/dist/cli/utils/extractor.d.ts +31 -0
- package/dist/cli/utils/extractor.d.ts.map +1 -1
- package/dist/cli/utils/loader.d.ts +7 -1
- package/dist/cli/utils/loader.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/bin/struere.js
CHANGED
|
@@ -19269,32 +19269,6 @@ function getApiKey() {
|
|
|
19269
19269
|
|
|
19270
19270
|
// src/cli/utils/convex.ts
|
|
19271
19271
|
var CONVEX_URL = process.env.STRUERE_CONVEX_URL || "https://rapid-wildebeest-172.convex.cloud";
|
|
19272
|
-
async function listAgents() {
|
|
19273
|
-
const credentials = loadCredentials();
|
|
19274
|
-
const apiKey = getApiKey();
|
|
19275
|
-
const token = apiKey || credentials?.token;
|
|
19276
|
-
if (!token) {
|
|
19277
|
-
return { agents: [], error: "Not authenticated" };
|
|
19278
|
-
}
|
|
19279
|
-
const response = await fetch(`${CONVEX_URL}/api/query`, {
|
|
19280
|
-
method: "POST",
|
|
19281
|
-
headers: {
|
|
19282
|
-
"Content-Type": "application/json",
|
|
19283
|
-
Authorization: `Bearer ${token}`
|
|
19284
|
-
},
|
|
19285
|
-
body: JSON.stringify({
|
|
19286
|
-
path: "agents:list",
|
|
19287
|
-
args: {}
|
|
19288
|
-
})
|
|
19289
|
-
});
|
|
19290
|
-
if (!response.ok) {
|
|
19291
|
-
const error = await response.text();
|
|
19292
|
-
return { agents: [], error };
|
|
19293
|
-
}
|
|
19294
|
-
const result = await response.json();
|
|
19295
|
-
const agents = Array.isArray(result) ? result : result?.value || [];
|
|
19296
|
-
return { agents };
|
|
19297
|
-
}
|
|
19298
19272
|
async function listMyOrganizations(token) {
|
|
19299
19273
|
const response = await fetch(`${CONVEX_URL}/api/query`, {
|
|
19300
19274
|
method: "POST",
|
|
@@ -19481,18 +19455,24 @@ async function syncOrganization(payload) {
|
|
|
19481
19455
|
}
|
|
19482
19456
|
return { success: false, error: `Network error: ${err instanceof Error ? err.message : String(err)}` };
|
|
19483
19457
|
}
|
|
19458
|
+
const text = await response.text();
|
|
19459
|
+
let json;
|
|
19460
|
+
try {
|
|
19461
|
+
json = JSON.parse(text);
|
|
19462
|
+
} catch {
|
|
19463
|
+
return { success: false, error: text || `HTTP ${response.status}` };
|
|
19464
|
+
}
|
|
19484
19465
|
if (!response.ok) {
|
|
19485
|
-
const
|
|
19486
|
-
return { success: false, error };
|
|
19466
|
+
const msg = json.errorMessage || json.message || text;
|
|
19467
|
+
return { success: false, error: msg };
|
|
19487
19468
|
}
|
|
19488
|
-
const json = await response.json();
|
|
19489
19469
|
if (json.status === "success" && json.value) {
|
|
19490
19470
|
return json.value;
|
|
19491
19471
|
}
|
|
19492
19472
|
if (json.status === "error") {
|
|
19493
19473
|
return { success: false, error: json.errorMessage || "Unknown error from Convex" };
|
|
19494
19474
|
}
|
|
19495
|
-
return { success: false, error: `Unexpected response: ${
|
|
19475
|
+
return { success: false, error: `Unexpected response: ${text}` };
|
|
19496
19476
|
}
|
|
19497
19477
|
async function getSyncState(organizationId, environment) {
|
|
19498
19478
|
const credentials = loadCredentials();
|
|
@@ -19556,267 +19536,6 @@ async function getPullState(organizationId, environment = "development", include
|
|
|
19556
19536
|
}
|
|
19557
19537
|
return { error: `Unexpected response: ${JSON.stringify(result)}` };
|
|
19558
19538
|
}
|
|
19559
|
-
async function resolveAgentId(agentSlug) {
|
|
19560
|
-
const { agents } = await listAgents();
|
|
19561
|
-
const agent = agents.find((a) => a.slug === agentSlug || a._id === agentSlug);
|
|
19562
|
-
return agent?._id;
|
|
19563
|
-
}
|
|
19564
|
-
async function syncEvalSuites(suites) {
|
|
19565
|
-
const credentials = loadCredentials();
|
|
19566
|
-
const apiKey = getApiKey();
|
|
19567
|
-
const token = apiKey || credentials?.token;
|
|
19568
|
-
if (!token) {
|
|
19569
|
-
return { error: "Not authenticated" };
|
|
19570
|
-
}
|
|
19571
|
-
const suiteIds = {};
|
|
19572
|
-
for (const suite of suites) {
|
|
19573
|
-
const agentId = await resolveAgentId(suite.agent);
|
|
19574
|
-
if (!agentId) {
|
|
19575
|
-
return { error: `Agent "${suite.agent}" not found. Make sure the agent exists before syncing evals.` };
|
|
19576
|
-
}
|
|
19577
|
-
const listResponse = await fetch(`${CONVEX_URL}/api/query`, {
|
|
19578
|
-
method: "POST",
|
|
19579
|
-
headers: {
|
|
19580
|
-
"Content-Type": "application/json",
|
|
19581
|
-
Authorization: `Bearer ${token}`
|
|
19582
|
-
},
|
|
19583
|
-
body: JSON.stringify({
|
|
19584
|
-
path: "evals:listAllSuites",
|
|
19585
|
-
args: { environment: "development" }
|
|
19586
|
-
})
|
|
19587
|
-
});
|
|
19588
|
-
let existingSuiteId;
|
|
19589
|
-
if (listResponse.ok) {
|
|
19590
|
-
const listResult = await listResponse.json();
|
|
19591
|
-
const existing = (listResult.value || []).find((s) => s.slug === suite.slug);
|
|
19592
|
-
if (existing) {
|
|
19593
|
-
existingSuiteId = existing._id;
|
|
19594
|
-
}
|
|
19595
|
-
}
|
|
19596
|
-
if (!existingSuiteId) {
|
|
19597
|
-
const createResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
|
|
19598
|
-
method: "POST",
|
|
19599
|
-
headers: {
|
|
19600
|
-
"Content-Type": "application/json",
|
|
19601
|
-
Authorization: `Bearer ${token}`
|
|
19602
|
-
},
|
|
19603
|
-
body: JSON.stringify({
|
|
19604
|
-
path: "evals:createSuite",
|
|
19605
|
-
args: {
|
|
19606
|
-
agentId,
|
|
19607
|
-
name: suite.suite,
|
|
19608
|
-
slug: suite.slug,
|
|
19609
|
-
description: suite.description,
|
|
19610
|
-
tags: suite.tags,
|
|
19611
|
-
judgeModel: suite.judgeModel ? { provider: "anthropic", name: suite.judgeModel } : undefined,
|
|
19612
|
-
environment: "development"
|
|
19613
|
-
}
|
|
19614
|
-
})
|
|
19615
|
-
});
|
|
19616
|
-
if (!createResponse.ok) {
|
|
19617
|
-
const error = await createResponse.text();
|
|
19618
|
-
return { error: `Failed to create suite "${suite.suite}": ${error}` };
|
|
19619
|
-
}
|
|
19620
|
-
const createResult = await createResponse.json();
|
|
19621
|
-
existingSuiteId = createResult.value;
|
|
19622
|
-
} else {
|
|
19623
|
-
const updateResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
|
|
19624
|
-
method: "POST",
|
|
19625
|
-
headers: {
|
|
19626
|
-
"Content-Type": "application/json",
|
|
19627
|
-
Authorization: `Bearer ${token}`
|
|
19628
|
-
},
|
|
19629
|
-
body: JSON.stringify({
|
|
19630
|
-
path: "evals:updateSuite",
|
|
19631
|
-
args: {
|
|
19632
|
-
id: existingSuiteId,
|
|
19633
|
-
name: suite.suite,
|
|
19634
|
-
description: suite.description,
|
|
19635
|
-
tags: suite.tags,
|
|
19636
|
-
judgeModel: suite.judgeModel ? { provider: "anthropic", name: suite.judgeModel } : undefined
|
|
19637
|
-
}
|
|
19638
|
-
})
|
|
19639
|
-
});
|
|
19640
|
-
if (!updateResponse.ok) {
|
|
19641
|
-
const error = await updateResponse.text();
|
|
19642
|
-
return { error: `Failed to update suite "${suite.suite}": ${error}` };
|
|
19643
|
-
}
|
|
19644
|
-
const deleteResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
|
|
19645
|
-
method: "POST",
|
|
19646
|
-
headers: {
|
|
19647
|
-
"Content-Type": "application/json",
|
|
19648
|
-
Authorization: `Bearer ${token}`
|
|
19649
|
-
},
|
|
19650
|
-
body: JSON.stringify({
|
|
19651
|
-
path: "evals:deleteCasesBySuite",
|
|
19652
|
-
args: { suiteId: existingSuiteId }
|
|
19653
|
-
})
|
|
19654
|
-
});
|
|
19655
|
-
if (!deleteResponse.ok) {
|
|
19656
|
-
const error = await deleteResponse.text();
|
|
19657
|
-
return { error: `Failed to clear cases for suite "${suite.suite}": ${error}` };
|
|
19658
|
-
}
|
|
19659
|
-
}
|
|
19660
|
-
if (existingSuiteId) {
|
|
19661
|
-
suiteIds[suite.slug] = existingSuiteId;
|
|
19662
|
-
for (let i = 0;i < suite.cases.length; i++) {
|
|
19663
|
-
const c = suite.cases[i];
|
|
19664
|
-
const turns = c.turns.map((t) => ({
|
|
19665
|
-
userMessage: t.user,
|
|
19666
|
-
assertions: t.assertions?.map((a) => ({
|
|
19667
|
-
type: a.type,
|
|
19668
|
-
...a.criteria ? { criteria: a.criteria } : {},
|
|
19669
|
-
...a.value ? { value: a.value } : {},
|
|
19670
|
-
...a.weight ? { weight: a.weight } : {}
|
|
19671
|
-
}))
|
|
19672
|
-
}));
|
|
19673
|
-
const finalAssertions = c.finalAssertions?.map((a) => ({
|
|
19674
|
-
type: a.type,
|
|
19675
|
-
...a.criteria ? { criteria: a.criteria } : {},
|
|
19676
|
-
...a.value ? { value: a.value } : {},
|
|
19677
|
-
...a.weight ? { weight: a.weight } : {}
|
|
19678
|
-
}));
|
|
19679
|
-
const caseResponse = await fetch(`${CONVEX_URL}/api/mutation`, {
|
|
19680
|
-
method: "POST",
|
|
19681
|
-
headers: {
|
|
19682
|
-
"Content-Type": "application/json",
|
|
19683
|
-
Authorization: `Bearer ${token}`
|
|
19684
|
-
},
|
|
19685
|
-
body: JSON.stringify({
|
|
19686
|
-
path: "evals:createCase",
|
|
19687
|
-
args: {
|
|
19688
|
-
suiteId: existingSuiteId,
|
|
19689
|
-
name: c.name,
|
|
19690
|
-
description: c.description,
|
|
19691
|
-
tags: c.tags,
|
|
19692
|
-
turns,
|
|
19693
|
-
finalAssertions
|
|
19694
|
-
}
|
|
19695
|
-
})
|
|
19696
|
-
});
|
|
19697
|
-
if (!caseResponse.ok) {
|
|
19698
|
-
const error = await caseResponse.text();
|
|
19699
|
-
return { error: `Failed to create case "${c.name}": ${error}` };
|
|
19700
|
-
}
|
|
19701
|
-
}
|
|
19702
|
-
}
|
|
19703
|
-
}
|
|
19704
|
-
return { suiteIds };
|
|
19705
|
-
}
|
|
19706
|
-
async function startEvalRun(suiteSlug) {
|
|
19707
|
-
const credentials = loadCredentials();
|
|
19708
|
-
const apiKey = getApiKey();
|
|
19709
|
-
const token = apiKey || credentials?.token;
|
|
19710
|
-
if (!token) {
|
|
19711
|
-
return { error: "Not authenticated" };
|
|
19712
|
-
}
|
|
19713
|
-
const listResponse = await fetch(`${CONVEX_URL}/api/query`, {
|
|
19714
|
-
method: "POST",
|
|
19715
|
-
headers: {
|
|
19716
|
-
"Content-Type": "application/json",
|
|
19717
|
-
Authorization: `Bearer ${token}`
|
|
19718
|
-
},
|
|
19719
|
-
body: JSON.stringify({
|
|
19720
|
-
path: "evals:listAllSuites",
|
|
19721
|
-
args: { environment: "development" }
|
|
19722
|
-
})
|
|
19723
|
-
});
|
|
19724
|
-
if (!listResponse.ok) {
|
|
19725
|
-
return { error: "Failed to list suites" };
|
|
19726
|
-
}
|
|
19727
|
-
const listResult = await listResponse.json();
|
|
19728
|
-
const suite = (listResult.value || []).find((s) => s.slug === suiteSlug);
|
|
19729
|
-
if (!suite) {
|
|
19730
|
-
return { error: `Suite "${suiteSlug}" not found` };
|
|
19731
|
-
}
|
|
19732
|
-
const response = await fetch(`${CONVEX_URL}/api/mutation`, {
|
|
19733
|
-
method: "POST",
|
|
19734
|
-
headers: {
|
|
19735
|
-
"Content-Type": "application/json",
|
|
19736
|
-
Authorization: `Bearer ${token}`
|
|
19737
|
-
},
|
|
19738
|
-
body: JSON.stringify({
|
|
19739
|
-
path: "evals:startRun",
|
|
19740
|
-
args: {
|
|
19741
|
-
suiteId: suite._id,
|
|
19742
|
-
triggerSource: "cli"
|
|
19743
|
-
}
|
|
19744
|
-
})
|
|
19745
|
-
});
|
|
19746
|
-
if (!response.ok) {
|
|
19747
|
-
const error = await response.text();
|
|
19748
|
-
return { error };
|
|
19749
|
-
}
|
|
19750
|
-
const result = await response.json();
|
|
19751
|
-
return { runId: result.value, suiteId: suite._id };
|
|
19752
|
-
}
|
|
19753
|
-
async function pollEvalRun(runId, onProgress) {
|
|
19754
|
-
const maxAttempts = 300;
|
|
19755
|
-
const pollInterval = 2000;
|
|
19756
|
-
for (let i = 0;i < maxAttempts; i++) {
|
|
19757
|
-
const currentCredentials = loadCredentials();
|
|
19758
|
-
const currentApiKey = getApiKey();
|
|
19759
|
-
const token = currentApiKey || currentCredentials?.token;
|
|
19760
|
-
if (!token) {
|
|
19761
|
-
return { error: 'Authentication expired. Please run "struere login" again.' };
|
|
19762
|
-
}
|
|
19763
|
-
const response = await fetch(`${CONVEX_URL}/api/query`, {
|
|
19764
|
-
method: "POST",
|
|
19765
|
-
headers: {
|
|
19766
|
-
"Content-Type": "application/json",
|
|
19767
|
-
Authorization: `Bearer ${token}`
|
|
19768
|
-
},
|
|
19769
|
-
body: JSON.stringify({
|
|
19770
|
-
path: "evals:getRun",
|
|
19771
|
-
args: { id: runId }
|
|
19772
|
-
})
|
|
19773
|
-
});
|
|
19774
|
-
if (!response.ok) {
|
|
19775
|
-
if (response.status === 401) {
|
|
19776
|
-
return { error: 'Authentication expired. Please run "struere login" again.' };
|
|
19777
|
-
}
|
|
19778
|
-
return { error: `Failed to poll run status (HTTP ${response.status})` };
|
|
19779
|
-
}
|
|
19780
|
-
const result = await response.json();
|
|
19781
|
-
const run = result.value;
|
|
19782
|
-
if (!run) {
|
|
19783
|
-
return { error: "Run not found" };
|
|
19784
|
-
}
|
|
19785
|
-
if (onProgress) {
|
|
19786
|
-
onProgress(run);
|
|
19787
|
-
}
|
|
19788
|
-
if (run.status === "completed" || run.status === "failed" || run.status === "cancelled") {
|
|
19789
|
-
return { run };
|
|
19790
|
-
}
|
|
19791
|
-
await new Promise((resolve) => setTimeout(resolve, pollInterval));
|
|
19792
|
-
}
|
|
19793
|
-
return { error: "Run timed out after 10 minutes" };
|
|
19794
|
-
}
|
|
19795
|
-
async function getEvalRunResults(runId) {
|
|
19796
|
-
const credentials = loadCredentials();
|
|
19797
|
-
const apiKey = getApiKey();
|
|
19798
|
-
const token = apiKey || credentials?.token;
|
|
19799
|
-
if (!token) {
|
|
19800
|
-
return { error: "Not authenticated" };
|
|
19801
|
-
}
|
|
19802
|
-
const response = await fetch(`${CONVEX_URL}/api/query`, {
|
|
19803
|
-
method: "POST",
|
|
19804
|
-
headers: {
|
|
19805
|
-
"Content-Type": "application/json",
|
|
19806
|
-
Authorization: `Bearer ${token}`
|
|
19807
|
-
},
|
|
19808
|
-
body: JSON.stringify({
|
|
19809
|
-
path: "evals:getRunResults",
|
|
19810
|
-
args: { runId }
|
|
19811
|
-
})
|
|
19812
|
-
});
|
|
19813
|
-
if (!response.ok) {
|
|
19814
|
-
const error = await response.text();
|
|
19815
|
-
return { error };
|
|
19816
|
-
}
|
|
19817
|
-
const result = await response.json();
|
|
19818
|
-
return { results: result.value || [] };
|
|
19819
|
-
}
|
|
19820
19539
|
|
|
19821
19540
|
// src/cli/commands/login.ts
|
|
19822
19541
|
var AUTH_CALLBACK_PORT = 9876;
|
|
@@ -21248,7 +20967,8 @@ import { join as join5 } from "path";
|
|
|
21248
20967
|
import { existsSync as existsSync5, writeFileSync as writeFileSync4 } from "fs";
|
|
21249
20968
|
|
|
21250
20969
|
// src/cli/utils/loader.ts
|
|
21251
|
-
|
|
20970
|
+
var import_yaml = __toESM(require_dist(), 1);
|
|
20971
|
+
import { existsSync as existsSync4, readdirSync, readFileSync as readFileSync4 } from "fs";
|
|
21252
20972
|
import { join as join4 } from "path";
|
|
21253
20973
|
async function loadAllResources(cwd) {
|
|
21254
20974
|
const errors2 = [];
|
|
@@ -21259,7 +20979,9 @@ async function loadAllResources(cwd) {
|
|
|
21259
20979
|
if (toolsError) {
|
|
21260
20980
|
errors2.push(toolsError);
|
|
21261
20981
|
}
|
|
21262
|
-
|
|
20982
|
+
const { suites: evalSuites, errors: evalErrors } = loadAllEvalSuites(join4(cwd, "evals"));
|
|
20983
|
+
errors2.push(...evalErrors);
|
|
20984
|
+
return { agents, entityTypes, roles, customTools, evalSuites, errors: errors2 };
|
|
21263
20985
|
}
|
|
21264
20986
|
async function loadAllAgents(dir) {
|
|
21265
20987
|
if (!existsSync4(dir)) {
|
|
@@ -21349,16 +21071,42 @@ async function loadFromDirectory(dir) {
|
|
|
21349
21071
|
}
|
|
21350
21072
|
return items;
|
|
21351
21073
|
}
|
|
21074
|
+
function loadAllEvalSuites(dir) {
|
|
21075
|
+
const suites = [];
|
|
21076
|
+
const errors2 = [];
|
|
21077
|
+
if (!existsSync4(dir)) {
|
|
21078
|
+
return { suites, errors: errors2 };
|
|
21079
|
+
}
|
|
21080
|
+
const files = readdirSync(dir).filter((f) => f.endsWith(".eval.yaml") || f.endsWith(".eval.yml"));
|
|
21081
|
+
for (const file of files) {
|
|
21082
|
+
try {
|
|
21083
|
+
const content = readFileSync4(join4(dir, file), "utf-8");
|
|
21084
|
+
const parsed = import_yaml.default.parse(content);
|
|
21085
|
+
suites.push(parsed);
|
|
21086
|
+
} catch (err) {
|
|
21087
|
+
errors2.push(`Failed to parse ${file}: ${err instanceof Error ? err.message : String(err)}`);
|
|
21088
|
+
}
|
|
21089
|
+
}
|
|
21090
|
+
return { suites, errors: errors2 };
|
|
21091
|
+
}
|
|
21352
21092
|
function getResourceDirectories(cwd) {
|
|
21353
21093
|
return {
|
|
21354
21094
|
agents: join4(cwd, "agents"),
|
|
21355
21095
|
entityTypes: join4(cwd, "entity-types"),
|
|
21356
21096
|
roles: join4(cwd, "roles"),
|
|
21357
|
-
tools: join4(cwd, "tools")
|
|
21097
|
+
tools: join4(cwd, "tools"),
|
|
21098
|
+
evals: join4(cwd, "evals")
|
|
21358
21099
|
};
|
|
21359
21100
|
}
|
|
21360
21101
|
|
|
21361
21102
|
// src/cli/utils/extractor.ts
|
|
21103
|
+
function inferProvider(modelName) {
|
|
21104
|
+
if (modelName.startsWith("gpt-") || modelName.startsWith("o1") || modelName.startsWith("o3") || modelName.startsWith("o4"))
|
|
21105
|
+
return "openai";
|
|
21106
|
+
if (modelName.startsWith("gemini"))
|
|
21107
|
+
return "google";
|
|
21108
|
+
return "anthropic";
|
|
21109
|
+
}
|
|
21362
21110
|
var BUILTIN_TOOLS = [
|
|
21363
21111
|
"entity.create",
|
|
21364
21112
|
"entity.get",
|
|
@@ -21407,7 +21155,25 @@ function extractSyncPayload(resources) {
|
|
|
21407
21155
|
maskConfig: fm.maskConfig
|
|
21408
21156
|
}))
|
|
21409
21157
|
}));
|
|
21410
|
-
|
|
21158
|
+
const evalSuites = resources.evalSuites.length > 0 ? resources.evalSuites.map((suite) => ({
|
|
21159
|
+
name: suite.suite,
|
|
21160
|
+
slug: suite.slug,
|
|
21161
|
+
agentSlug: suite.agent,
|
|
21162
|
+
description: suite.description,
|
|
21163
|
+
tags: suite.tags,
|
|
21164
|
+
judgeModel: suite.judgeModel ? { provider: inferProvider(suite.judgeModel), name: suite.judgeModel } : undefined,
|
|
21165
|
+
cases: suite.cases.map((c) => ({
|
|
21166
|
+
name: c.name,
|
|
21167
|
+
description: c.description,
|
|
21168
|
+
tags: c.tags,
|
|
21169
|
+
turns: c.turns.map((t) => ({
|
|
21170
|
+
userMessage: t.user,
|
|
21171
|
+
assertions: t.assertions
|
|
21172
|
+
})),
|
|
21173
|
+
finalAssertions: c.finalAssertions
|
|
21174
|
+
}))
|
|
21175
|
+
})) : undefined;
|
|
21176
|
+
return { agents, entityTypes, roles, evalSuites };
|
|
21411
21177
|
}
|
|
21412
21178
|
function extractAgentPayload(agent, customToolsMap) {
|
|
21413
21179
|
let systemPrompt;
|
|
@@ -21723,7 +21489,7 @@ var devCommand = new Command("dev").description("Sync all resources to developme
|
|
|
21723
21489
|
spinner.start("Loading resources");
|
|
21724
21490
|
try {
|
|
21725
21491
|
const resources = await loadAllResources(cwd);
|
|
21726
|
-
spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools`);
|
|
21492
|
+
spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools, ${resources.evalSuites.length} eval suites`);
|
|
21727
21493
|
for (const err of resources.errors) {
|
|
21728
21494
|
console.log(source_default.red(" \u2716"), err);
|
|
21729
21495
|
}
|
|
@@ -21781,6 +21547,7 @@ var devCommand = new Command("dev").description("Sync all resources to developme
|
|
|
21781
21547
|
dirs.entityTypes,
|
|
21782
21548
|
dirs.roles,
|
|
21783
21549
|
dirs.tools,
|
|
21550
|
+
dirs.evals,
|
|
21784
21551
|
join5(cwd, "struere.config.ts")
|
|
21785
21552
|
].filter((p) => existsSync5(p));
|
|
21786
21553
|
const watcher = import_chokidar.default.watch(watchPaths, {
|
|
@@ -22055,7 +21822,7 @@ var buildCommand = new Command("build").description("Build and validate agent fo
|
|
|
22055
21822
|
});
|
|
22056
21823
|
|
|
22057
21824
|
// src/cli/commands/test.ts
|
|
22058
|
-
var
|
|
21825
|
+
var import_yaml2 = __toESM(require_dist(), 1);
|
|
22059
21826
|
import { join as join9 } from "path";
|
|
22060
21827
|
import { readdir, readFile } from "fs/promises";
|
|
22061
21828
|
var testCommand = new Command("test").description("Run test conversations").argument("[pattern]", "Test file pattern", "*.test.yaml").option("-v, --verbose", "Show detailed output").option("--dry-run", "Parse tests without executing (no API calls)").action(async (pattern, options) => {
|
|
@@ -22107,7 +21874,7 @@ var testCommand = new Command("test").description("Run test conversations").argu
|
|
|
22107
21874
|
for (const file of testFiles) {
|
|
22108
21875
|
const filePath = join9(testsDir, file);
|
|
22109
21876
|
const content = await readFile(filePath, "utf-8");
|
|
22110
|
-
const testCase =
|
|
21877
|
+
const testCase = import_yaml2.default.parse(content);
|
|
22111
21878
|
if (options.verbose) {
|
|
22112
21879
|
console.log();
|
|
22113
21880
|
console.log(source_default.gray("Running:"), testCase.name);
|
|
@@ -22240,174 +22007,6 @@ function formatAssertionError(assertion, context) {
|
|
|
22240
22007
|
}
|
|
22241
22008
|
}
|
|
22242
22009
|
|
|
22243
|
-
// src/cli/commands/eval.ts
|
|
22244
|
-
var import_yaml2 = __toESM(require_dist(), 1);
|
|
22245
|
-
import { join as join10 } from "path";
|
|
22246
|
-
import { readdir as readdir2, readFile as readFile2 } from "fs/promises";
|
|
22247
|
-
var evalCommand = new Command("eval").description("Run agent evaluations with LLM judge support").option("-s, --suite <name>", "Run a specific suite by name").option("-v, --verbose", "Show detailed output including judge reasoning").option("--dry-run", "Parse eval files without executing").option("--json", "Output results as JSON").option("--no-sync", "Skip syncing eval files to Convex").action(async (options) => {
|
|
22248
|
-
const spinner = ora();
|
|
22249
|
-
const cwd = process.cwd();
|
|
22250
|
-
if (!options.json) {
|
|
22251
|
-
console.log();
|
|
22252
|
-
console.log(source_default.bold("Running Evaluations"));
|
|
22253
|
-
console.log();
|
|
22254
|
-
}
|
|
22255
|
-
if (!hasProject(cwd)) {
|
|
22256
|
-
console.log(source_default.yellow("No struere.json found"));
|
|
22257
|
-
console.log();
|
|
22258
|
-
console.log(source_default.gray("Run"), source_default.cyan("struere init"), source_default.gray("to initialize this project"));
|
|
22259
|
-
console.log();
|
|
22260
|
-
process.exit(1);
|
|
22261
|
-
}
|
|
22262
|
-
const project = loadProject(cwd);
|
|
22263
|
-
if (!project) {
|
|
22264
|
-
console.log(source_default.red("Failed to load struere.json"));
|
|
22265
|
-
process.exit(1);
|
|
22266
|
-
}
|
|
22267
|
-
spinner.start("Finding eval files");
|
|
22268
|
-
const evalsDir = join10(cwd, "evals");
|
|
22269
|
-
let evalFiles = [];
|
|
22270
|
-
try {
|
|
22271
|
-
const files = await readdir2(evalsDir);
|
|
22272
|
-
evalFiles = files.filter((f) => f.endsWith(".eval.yaml") || f.endsWith(".eval.yml"));
|
|
22273
|
-
} catch {
|
|
22274
|
-
spinner.warn("No evals directory found");
|
|
22275
|
-
console.log();
|
|
22276
|
-
console.log(source_default.gray("Create eval files in"), source_default.cyan("evals/*.eval.yaml"));
|
|
22277
|
-
console.log();
|
|
22278
|
-
return;
|
|
22279
|
-
}
|
|
22280
|
-
if (evalFiles.length === 0) {
|
|
22281
|
-
spinner.warn("No eval files found");
|
|
22282
|
-
console.log();
|
|
22283
|
-
return;
|
|
22284
|
-
}
|
|
22285
|
-
spinner.succeed(`Found ${evalFiles.length} eval file(s)`);
|
|
22286
|
-
const suites = [];
|
|
22287
|
-
for (const file of evalFiles) {
|
|
22288
|
-
const filePath = join10(evalsDir, file);
|
|
22289
|
-
const content = await readFile2(filePath, "utf-8");
|
|
22290
|
-
const parsed = import_yaml2.default.parse(content);
|
|
22291
|
-
suites.push(parsed);
|
|
22292
|
-
}
|
|
22293
|
-
if (options.suite) {
|
|
22294
|
-
const filtered = suites.filter((s) => s.suite.toLowerCase() === options.suite.toLowerCase() || s.slug === options.suite);
|
|
22295
|
-
if (filtered.length === 0) {
|
|
22296
|
-
console.log(source_default.red(`Suite "${options.suite}" not found`));
|
|
22297
|
-
process.exit(1);
|
|
22298
|
-
}
|
|
22299
|
-
suites.length = 0;
|
|
22300
|
-
suites.push(...filtered);
|
|
22301
|
-
}
|
|
22302
|
-
if (options.dryRun) {
|
|
22303
|
-
console.log();
|
|
22304
|
-
console.log(source_default.yellow("Dry run mode \u2014 parsed successfully"));
|
|
22305
|
-
console.log();
|
|
22306
|
-
for (const suite of suites) {
|
|
22307
|
-
console.log(source_default.cyan(` ${suite.suite}`), source_default.gray(`(${suite.cases.length} cases)`));
|
|
22308
|
-
for (const c of suite.cases) {
|
|
22309
|
-
const assertionCount = (c.turns || []).reduce((sum, t) => sum + (t.assertions?.length || 0), 0) + (c.finalAssertions?.length || 0);
|
|
22310
|
-
console.log(source_default.gray(` - ${c.name}`), source_default.gray(`(${c.turns.length} turns, ${assertionCount} assertions)`));
|
|
22311
|
-
}
|
|
22312
|
-
}
|
|
22313
|
-
console.log();
|
|
22314
|
-
return;
|
|
22315
|
-
}
|
|
22316
|
-
if (options.sync !== false) {
|
|
22317
|
-
spinner.start("Syncing eval suites to Convex");
|
|
22318
|
-
const syncResult = await syncEvalSuites(suites);
|
|
22319
|
-
if (syncResult.error) {
|
|
22320
|
-
spinner.fail(`Sync failed: ${syncResult.error}`);
|
|
22321
|
-
process.exit(1);
|
|
22322
|
-
}
|
|
22323
|
-
spinner.succeed("Eval suites synced");
|
|
22324
|
-
}
|
|
22325
|
-
const allResults = [];
|
|
22326
|
-
for (const suite of suites) {
|
|
22327
|
-
if (!options.json) {
|
|
22328
|
-
console.log();
|
|
22329
|
-
console.log(source_default.bold(` ${suite.suite}`), source_default.gray(`(${suite.cases.length} cases)`));
|
|
22330
|
-
}
|
|
22331
|
-
spinner.start(`Starting run for "${suite.suite}"`);
|
|
22332
|
-
const { runId, suiteId, error: startError } = await startEvalRun(suite.slug);
|
|
22333
|
-
if (startError || !runId) {
|
|
22334
|
-
spinner.fail(`Failed to start: ${startError}`);
|
|
22335
|
-
continue;
|
|
22336
|
-
}
|
|
22337
|
-
spinner.succeed(`Run started`);
|
|
22338
|
-
spinner.start("Executing cases...");
|
|
22339
|
-
const { run, error: pollError } = await pollEvalRun(runId, (status) => {
|
|
22340
|
-
spinner.text = `Executing cases... ${status.completedCases}/${status.totalCases}`;
|
|
22341
|
-
});
|
|
22342
|
-
if (pollError || !run) {
|
|
22343
|
-
spinner.fail(`Run failed: ${pollError}`);
|
|
22344
|
-
continue;
|
|
22345
|
-
}
|
|
22346
|
-
spinner.succeed(`Run completed: ${run.passedCases}/${run.totalCases} passed`);
|
|
22347
|
-
const { results, error: resultsError } = await getEvalRunResults(runId);
|
|
22348
|
-
if (resultsError) {
|
|
22349
|
-
console.log(source_default.red(` Failed to get results: ${resultsError}`));
|
|
22350
|
-
continue;
|
|
22351
|
-
}
|
|
22352
|
-
allResults.push({ suite: suite.suite, run, results: results || [] });
|
|
22353
|
-
if (!options.json) {
|
|
22354
|
-
for (let ri = 0;ri < (results || []).length; ri++) {
|
|
22355
|
-
const result = results[ri];
|
|
22356
|
-
const caseName = ri < suite.cases.length ? suite.cases[ri].name : result.caseId;
|
|
22357
|
-
if (result.overallPassed) {
|
|
22358
|
-
console.log(source_default.green(" \u2713"), caseName, result.overallScore !== undefined ? source_default.gray(`(${result.overallScore.toFixed(1)}/5)`) : "");
|
|
22359
|
-
} else {
|
|
22360
|
-
console.log(source_default.red(" \u2717"), caseName, result.overallScore !== undefined ? source_default.gray(`(${result.overallScore.toFixed(1)}/5)`) : "");
|
|
22361
|
-
if (result.errorMessage) {
|
|
22362
|
-
console.log(source_default.red(" Error:"), result.errorMessage);
|
|
22363
|
-
}
|
|
22364
|
-
if (options.verbose && result.turnResults) {
|
|
22365
|
-
for (const turn of result.turnResults) {
|
|
22366
|
-
if (turn.assertionResults) {
|
|
22367
|
-
for (const ar of turn.assertionResults) {
|
|
22368
|
-
if (!ar.passed) {
|
|
22369
|
-
console.log(source_default.red(` [${ar.type}]`), ar.reason || "");
|
|
22370
|
-
if (ar.criteria) {
|
|
22371
|
-
console.log(source_default.gray(` Criteria: ${ar.criteria}`));
|
|
22372
|
-
}
|
|
22373
|
-
}
|
|
22374
|
-
}
|
|
22375
|
-
}
|
|
22376
|
-
}
|
|
22377
|
-
}
|
|
22378
|
-
}
|
|
22379
|
-
}
|
|
22380
|
-
}
|
|
22381
|
-
}
|
|
22382
|
-
if (options.json) {
|
|
22383
|
-
console.log(JSON.stringify(allResults, null, 2));
|
|
22384
|
-
const anyFailed = allResults.some((r) => r.run.failedCases > 0);
|
|
22385
|
-
process.exit(anyFailed ? 1 : 0);
|
|
22386
|
-
return;
|
|
22387
|
-
}
|
|
22388
|
-
const totalPassed = allResults.reduce((sum, r) => sum + r.run.passedCases, 0);
|
|
22389
|
-
const totalCases = allResults.reduce((sum, r) => sum + r.run.totalCases, 0);
|
|
22390
|
-
const totalFailed = allResults.reduce((sum, r) => sum + r.run.failedCases, 0);
|
|
22391
|
-
console.log();
|
|
22392
|
-
if (totalFailed === 0) {
|
|
22393
|
-
console.log(source_default.green("All evaluations passed!"), source_default.gray(`(${totalPassed}/${totalCases})`));
|
|
22394
|
-
} else {
|
|
22395
|
-
console.log(source_default.red("Evaluations failed:"), source_default.gray(`${totalPassed}/${totalCases} passed`));
|
|
22396
|
-
}
|
|
22397
|
-
const totalTokens = allResults.reduce((sum, r) => {
|
|
22398
|
-
if (r.run.totalTokens)
|
|
22399
|
-
return sum + r.run.totalTokens.agent + r.run.totalTokens.judge;
|
|
22400
|
-
return sum;
|
|
22401
|
-
}, 0);
|
|
22402
|
-
if (totalTokens > 0) {
|
|
22403
|
-
console.log(source_default.gray(` Total tokens used: ${totalTokens.toLocaleString()}`));
|
|
22404
|
-
}
|
|
22405
|
-
console.log();
|
|
22406
|
-
if (totalFailed > 0) {
|
|
22407
|
-
process.exit(1);
|
|
22408
|
-
}
|
|
22409
|
-
});
|
|
22410
|
-
|
|
22411
22010
|
// src/cli/commands/deploy.ts
|
|
22412
22011
|
var isAuthError = (error) => {
|
|
22413
22012
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -22463,7 +22062,7 @@ var deployCommand = new Command("deploy").description("Deploy all resources to p
|
|
|
22463
22062
|
let resources;
|
|
22464
22063
|
try {
|
|
22465
22064
|
resources = await loadAllResources(cwd);
|
|
22466
|
-
spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools`);
|
|
22065
|
+
spinner.succeed(`Loaded ${resources.agents.length} agents, ${resources.entityTypes.length} entity types, ${resources.roles.length} roles, ${resources.customTools.length} custom tools, ${resources.evalSuites.length} eval suites`);
|
|
22467
22066
|
for (const err of resources.errors) {
|
|
22468
22067
|
console.log(source_default.red(" \u2716"), err);
|
|
22469
22068
|
}
|
|
@@ -22501,6 +22100,13 @@ var deployCommand = new Command("deploy").description("Deploy all resources to p
|
|
|
22501
22100
|
for (const role of resources.roles) {
|
|
22502
22101
|
console.log(source_default.gray(" -"), source_default.cyan(role.name));
|
|
22503
22102
|
}
|
|
22103
|
+
if (resources.evalSuites.length > 0) {
|
|
22104
|
+
console.log();
|
|
22105
|
+
console.log("Eval suites:");
|
|
22106
|
+
for (const suite of resources.evalSuites) {
|
|
22107
|
+
console.log(source_default.gray(" -"), source_default.cyan(suite.suite), source_default.gray(`(${suite.cases.length} cases)`));
|
|
22108
|
+
}
|
|
22109
|
+
}
|
|
22504
22110
|
console.log();
|
|
22505
22111
|
return;
|
|
22506
22112
|
}
|
|
@@ -22937,7 +22543,7 @@ var statusCommand = new Command("status").description("Compare local vs remote s
|
|
|
22937
22543
|
let localResources;
|
|
22938
22544
|
try {
|
|
22939
22545
|
localResources = await loadAllResources(cwd);
|
|
22940
|
-
spinner.succeed(`Loaded ${localResources.agents.length} agents, ${localResources.entityTypes.length} entity types, ${localResources.roles.length} roles, ${localResources.customTools.length} custom tools`);
|
|
22546
|
+
spinner.succeed(`Loaded ${localResources.agents.length} agents, ${localResources.entityTypes.length} entity types, ${localResources.roles.length} roles, ${localResources.customTools.length} custom tools, ${localResources.evalSuites.length} eval suites`);
|
|
22941
22547
|
for (const err of localResources.errors) {
|
|
22942
22548
|
console.log(source_default.red(" \u2716"), err);
|
|
22943
22549
|
}
|
|
@@ -23044,7 +22650,7 @@ var statusCommand = new Command("status").description("Compare local vs remote s
|
|
|
23044
22650
|
|
|
23045
22651
|
// src/cli/commands/pull.ts
|
|
23046
22652
|
import { existsSync as existsSync6, mkdirSync as mkdirSync3, writeFileSync as writeFileSync5 } from "fs";
|
|
23047
|
-
import { join as
|
|
22653
|
+
import { join as join10 } from "path";
|
|
23048
22654
|
|
|
23049
22655
|
// src/cli/utils/generator.ts
|
|
23050
22656
|
var BUILTIN_TOOLS2 = [
|
|
@@ -23351,7 +22957,7 @@ var pullCommand = new Command("pull").description("Pull remote resources to loca
|
|
|
23351
22957
|
}
|
|
23352
22958
|
};
|
|
23353
22959
|
const writeOrSkip = (relativePath, content) => {
|
|
23354
|
-
const fullPath =
|
|
22960
|
+
const fullPath = join10(cwd, relativePath);
|
|
23355
22961
|
if (existsSync6(fullPath) && !options.force) {
|
|
23356
22962
|
skipped.push(relativePath);
|
|
23357
22963
|
return false;
|
|
@@ -23360,15 +22966,15 @@ var pullCommand = new Command("pull").description("Pull remote resources to loca
|
|
|
23360
22966
|
created.push(relativePath);
|
|
23361
22967
|
return true;
|
|
23362
22968
|
}
|
|
23363
|
-
ensureDir2(
|
|
22969
|
+
ensureDir2(join10(cwd, relativePath.split("/").slice(0, -1).join("/")));
|
|
23364
22970
|
writeFileSync5(fullPath, content);
|
|
23365
22971
|
created.push(relativePath);
|
|
23366
22972
|
return true;
|
|
23367
22973
|
};
|
|
23368
|
-
ensureDir2(
|
|
23369
|
-
ensureDir2(
|
|
23370
|
-
ensureDir2(
|
|
23371
|
-
ensureDir2(
|
|
22974
|
+
ensureDir2(join10(cwd, "agents"));
|
|
22975
|
+
ensureDir2(join10(cwd, "entity-types"));
|
|
22976
|
+
ensureDir2(join10(cwd, "roles"));
|
|
22977
|
+
ensureDir2(join10(cwd, "tools"));
|
|
23372
22978
|
const agentSlugs = [];
|
|
23373
22979
|
for (const agent of state.agents) {
|
|
23374
22980
|
if (!agent.systemPrompt && agent.tools.length === 0)
|
|
@@ -23454,7 +23060,7 @@ var pullCommand = new Command("pull").description("Pull remote resources to loca
|
|
|
23454
23060
|
// package.json
|
|
23455
23061
|
var package_default = {
|
|
23456
23062
|
name: "struere",
|
|
23457
|
-
version: "0.5.
|
|
23063
|
+
version: "0.5.5",
|
|
23458
23064
|
description: "Build, test, and deploy AI agents",
|
|
23459
23065
|
keywords: [
|
|
23460
23066
|
"ai",
|
|
@@ -23556,7 +23162,6 @@ program.addCommand(whoamiCommand);
|
|
|
23556
23162
|
program.addCommand(devCommand);
|
|
23557
23163
|
program.addCommand(buildCommand);
|
|
23558
23164
|
program.addCommand(testCommand);
|
|
23559
|
-
program.addCommand(evalCommand);
|
|
23560
23165
|
program.addCommand(deployCommand);
|
|
23561
23166
|
program.addCommand(validateCommand);
|
|
23562
23167
|
program.addCommand(logsCommand);
|