thinkwork-cli 0.8.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -0
- package/dist/cli.js +1927 -10
- package/dist/terraform/modules/app/agentcore-runtime/main.tf +39 -4
- package/dist/terraform/modules/app/lambda-api/handlers.tf +112 -8
- package/dist/terraform/modules/app/lambda-api/main.tf +78 -3
- package/dist/terraform/modules/foundation/cognito/variables.tf +5 -2
- package/dist/terraform/modules/thinkwork/variables.tf +12 -4
- package/dist/terraform/schema.graphql +28 -0
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -68,6 +68,32 @@ function printKeyValue(pairs) {
|
|
|
68
68
|
console.log(` ${label}${v ?? chalk.dim("\u2014")}`);
|
|
69
69
|
}
|
|
70
70
|
}
|
|
71
|
+
function printTable(rows, columns) {
|
|
72
|
+
if (jsonMode) return;
|
|
73
|
+
if (rows.length === 0) {
|
|
74
|
+
console.log(chalk.dim(" (no results)"));
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
const widths = columns.map((c) => {
|
|
78
|
+
const header2 = c.header.length;
|
|
79
|
+
const maxRow = Math.max(
|
|
80
|
+
...rows.map((r) => (c.format ? c.format(r[c.key]) : String(r[c.key] ?? "")).length)
|
|
81
|
+
);
|
|
82
|
+
return Math.max(header2, maxRow);
|
|
83
|
+
});
|
|
84
|
+
const header = columns.map((c, i) => chalk.bold(c.header.padEnd(widths[i]))).join(" ");
|
|
85
|
+
console.log(` ${header}`);
|
|
86
|
+
console.log(
|
|
87
|
+
" " + widths.map((w) => chalk.dim("\u2500".repeat(w))).join(" ")
|
|
88
|
+
);
|
|
89
|
+
for (const row of rows) {
|
|
90
|
+
const line = columns.map((c, i) => {
|
|
91
|
+
const v = c.format ? c.format(row[c.key]) : String(row[c.key] ?? "");
|
|
92
|
+
return v.padEnd(widths[i]);
|
|
93
|
+
}).join(" ");
|
|
94
|
+
console.log(` ${line}`);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
71
97
|
function logStderr(message) {
|
|
72
98
|
process.stderr.write(message + "\n");
|
|
73
99
|
}
|
|
@@ -312,6 +338,18 @@ function requireTty(label) {
|
|
|
312
338
|
process.exit(1);
|
|
313
339
|
}
|
|
314
340
|
}
|
|
341
|
+
async function promptOrExit(fn) {
|
|
342
|
+
try {
|
|
343
|
+
return await fn();
|
|
344
|
+
} catch (err) {
|
|
345
|
+
if (isCancellation(err)) {
|
|
346
|
+
console.log("");
|
|
347
|
+
console.log(" Cancelled.");
|
|
348
|
+
process.exit(0);
|
|
349
|
+
}
|
|
350
|
+
throw err;
|
|
351
|
+
}
|
|
352
|
+
}
|
|
315
353
|
|
|
316
354
|
// src/lib/resolve-stage.ts
|
|
317
355
|
async function resolveStage(opts = {}) {
|
|
@@ -2049,9 +2087,9 @@ function registerInitCommand(program2) {
|
|
|
2049
2087
|
printError("Stage name is required. Pass -s <name> or re-run in an interactive terminal.");
|
|
2050
2088
|
process.exit(1);
|
|
2051
2089
|
}
|
|
2052
|
-
const { input:
|
|
2090
|
+
const { input: input5 } = await import("@inquirer/prompts");
|
|
2053
2091
|
try {
|
|
2054
|
-
stage = await
|
|
2092
|
+
stage = await input5({
|
|
2055
2093
|
message: "Stage name (e.g. dev, staging, prod):",
|
|
2056
2094
|
validate: (v) => validateStage(v).error ?? true
|
|
2057
2095
|
});
|
|
@@ -2912,13 +2950,13 @@ Examples:
|
|
|
2912
2950
|
--auth-type tenant_api_key --api-key sk-abc -s dev -t acme
|
|
2913
2951
|
|
|
2914
2952
|
# OAuth connector (users connect from the mobile app)
|
|
2915
|
-
$ thinkwork mcp add lastmile --url https://mcp
|
|
2953
|
+
$ thinkwork mcp add lastmile --url https://dev-mcp.lastmile-tei.com/crm \\
|
|
2916
2954
|
--auth-type per_user_oauth --oauth-provider lastmile -s dev -t acme
|
|
2917
2955
|
`
|
|
2918
2956
|
).action(
|
|
2919
2957
|
async (nameArg, opts) => {
|
|
2920
2958
|
try {
|
|
2921
|
-
const { input:
|
|
2959
|
+
const { input: input5 } = await import("@inquirer/prompts");
|
|
2922
2960
|
const { stage, api, tenant } = await resolveMcpContext(opts);
|
|
2923
2961
|
let name = nameArg;
|
|
2924
2962
|
if (!name) {
|
|
@@ -2926,7 +2964,7 @@ Examples:
|
|
|
2926
2964
|
printError("Name is required. Pass it as a positional arg.");
|
|
2927
2965
|
process.exit(1);
|
|
2928
2966
|
}
|
|
2929
|
-
name = await
|
|
2967
|
+
name = await input5({ message: "Server name:" });
|
|
2930
2968
|
}
|
|
2931
2969
|
let url = opts.url;
|
|
2932
2970
|
if (!url) {
|
|
@@ -2934,7 +2972,7 @@ Examples:
|
|
|
2934
2972
|
printError("--url is required. Pass it as a flag.");
|
|
2935
2973
|
process.exit(1);
|
|
2936
2974
|
}
|
|
2937
|
-
url = await
|
|
2975
|
+
url = await input5({
|
|
2938
2976
|
message: "MCP server URL:",
|
|
2939
2977
|
validate: (v) => v.startsWith("http://") || v.startsWith("https://") ? true : "URL must start with http:// or https://"
|
|
2940
2978
|
});
|
|
@@ -3094,7 +3132,7 @@ Examples:
|
|
|
3094
3132
|
).option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("--agent <id>", "Agent ID").action(
|
|
3095
3133
|
async (mcpServerArg, opts) => {
|
|
3096
3134
|
try {
|
|
3097
|
-
const { input:
|
|
3135
|
+
const { input: input5 } = await import("@inquirer/prompts");
|
|
3098
3136
|
const { api, tenant } = await resolveMcpContext(opts);
|
|
3099
3137
|
const server = await resolveServer(mcpServerArg, api, tenant.slug);
|
|
3100
3138
|
let agent = opts.agent;
|
|
@@ -3103,7 +3141,7 @@ Examples:
|
|
|
3103
3141
|
printError("--agent is required. Pass it as a flag.");
|
|
3104
3142
|
process.exit(1);
|
|
3105
3143
|
}
|
|
3106
|
-
agent = await
|
|
3144
|
+
agent = await input5({ message: "Agent ID:" });
|
|
3107
3145
|
}
|
|
3108
3146
|
const result = await apiFetch(
|
|
3109
3147
|
api.apiUrl,
|
|
@@ -3126,7 +3164,7 @@ Examples:
|
|
|
3126
3164
|
).option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("--agent <id>", "Agent ID").action(
|
|
3127
3165
|
async (mcpServerArg, opts) => {
|
|
3128
3166
|
try {
|
|
3129
|
-
const { input:
|
|
3167
|
+
const { input: input5 } = await import("@inquirer/prompts");
|
|
3130
3168
|
const { api, tenant } = await resolveMcpContext(opts);
|
|
3131
3169
|
const server = await resolveServer(mcpServerArg, api, tenant.slug);
|
|
3132
3170
|
let agent = opts.agent;
|
|
@@ -3135,7 +3173,7 @@ Examples:
|
|
|
3135
3173
|
printError("--agent is required. Pass it as a flag.");
|
|
3136
3174
|
process.exit(1);
|
|
3137
3175
|
}
|
|
3138
|
-
agent = await
|
|
3176
|
+
agent = await input5({ message: "Agent ID:" });
|
|
3139
3177
|
}
|
|
3140
3178
|
await apiFetch(
|
|
3141
3179
|
api.apiUrl,
|
|
@@ -3866,6 +3904,24 @@ async function getGqlClient(opts) {
|
|
|
3866
3904
|
tenantSlug: auth.tenantSlug
|
|
3867
3905
|
};
|
|
3868
3906
|
}
|
|
3907
|
+
async function gqlQuery(client, doc, variables) {
|
|
3908
|
+
const res = await client.query(doc, variables).toPromise();
|
|
3909
|
+
return unwrap(res);
|
|
3910
|
+
}
|
|
3911
|
+
async function gqlMutate(client, doc, variables) {
|
|
3912
|
+
const res = await client.mutation(doc, variables).toPromise();
|
|
3913
|
+
return unwrap(res);
|
|
3914
|
+
}
|
|
3915
|
+
function unwrap(res) {
|
|
3916
|
+
if (res.error) {
|
|
3917
|
+
const msg = res.error.graphQLErrors.map((e) => e.message).filter(Boolean).join("; ") || res.error.networkError?.message || "GraphQL request failed";
|
|
3918
|
+
throw new Error(msg);
|
|
3919
|
+
}
|
|
3920
|
+
if (!res.data) {
|
|
3921
|
+
throw new Error("GraphQL request returned no data.");
|
|
3922
|
+
}
|
|
3923
|
+
return res.data;
|
|
3924
|
+
}
|
|
3869
3925
|
|
|
3870
3926
|
// src/commands/me.ts
|
|
3871
3927
|
var ME_QUERY = gql`
|
|
@@ -4612,6 +4668,1865 @@ Examples:
|
|
|
4612
4668
|
).action(() => notYetImplemented("dashboard", 5));
|
|
4613
4669
|
}
|
|
4614
4670
|
|
|
4671
|
+
// src/commands/eval/run.ts
|
|
4672
|
+
import { select as select8, checkbox, confirm as confirm2, input as input3 } from "@inquirer/prompts";
|
|
4673
|
+
import ora2 from "ora";
|
|
4674
|
+
|
|
4675
|
+
// src/gql/graphql.ts
|
|
4676
|
+
var CliEvalRunsDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "query", "name": { "kind": "Name", "value": "CliEvalRuns" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "agentId" } }, "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "limit" } }, "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "Int" } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "offset" } }, "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "Int" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "evalRuns" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "tenantId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "agentId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "agentId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "limit" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "limit" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "offset" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "offset" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "totalCount" } }, { "kind": "Field", "name": { "kind": "Name", "value": "items" }, "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "status" } }, { "kind": "Field", "name": { "kind": "Name", "value": "model" } }, { "kind": "Field", "name": { "kind": "Name", "value": "categories" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentName" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentTemplateId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentTemplateName" } }, { "kind": "Field", "name": { "kind": "Name", "value": "totalTests" } }, { "kind": "Field", "name": { "kind": "Name", "value": "passed" } }, { "kind": "Field", "name": { "kind": "Name", "value": "failed" } }, { "kind": "Field", "name": { "kind": "Name", "value": "passRate" } }, { "kind": "Field", "name": { "kind": "Name", "value": "regression" } }, { "kind": "Field", "name": { "kind": "Name", "value": "costUsd" } }, { "kind": "Field", "name": { "kind": "Name", "value": "errorMessage" } }, { "kind": "Field", "name": { "kind": "Name", "value": "startedAt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "completedAt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "createdAt" } }] } }] } }] } }] };
|
|
4677
|
+
var CliEvalRunDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "query", "name": { "kind": "Name", "value": "CliEvalRun" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "id" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "evalRun" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "id" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "id" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "status" } }, { "kind": "Field", "name": { "kind": "Name", "value": "model" } }, { "kind": "Field", "name": { "kind": "Name", "value": "categories" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentName" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentTemplateId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentTemplateName" } }, { "kind": "Field", "name": { "kind": "Name", "value": "totalTests" } }, { "kind": "Field", "name": { "kind": "Name", "value": "passed" } }, { "kind": "Field", "name": { "kind": "Name", "value": "failed" } }, { "kind": "Field", "name": { "kind": "Name", "value": "passRate" } }, { "kind": "Field", "name": { "kind": "Name", "value": "regression" } }, { "kind": "Field", "name": { "kind": "Name", "value": "costUsd" } }, { "kind": "Field", "name": { "kind": "Name", "value": "errorMessage" } }, { "kind": "Field", "name": { "kind": "Name", "value": "startedAt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "completedAt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "createdAt" } }] } }] } }] };
|
|
4678
|
+
var CliEvalRunResultsDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "query", "name": { "kind": "Name", "value": "CliEvalRunResults" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "runId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "evalRunResults" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "runId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "runId" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "testCaseId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "testCaseName" } }, { "kind": "Field", "name": { "kind": "Name", "value": "category" } }, { "kind": "Field", "name": { "kind": "Name", "value": "status" } }, { "kind": "Field", "name": { "kind": "Name", "value": "score" } }, { "kind": "Field", "name": { "kind": "Name", "value": "durationMs" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentSessionId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "input" } }, { "kind": "Field", "name": { "kind": "Name", "value": "expected" } }, { "kind": "Field", "name": { "kind": "Name", "value": "actualOutput" } }, { "kind": "Field", "name": { "kind": "Name", "value": "evaluatorResults" } }, { "kind": "Field", "name": { "kind": "Name", "value": "assertions" } }, { "kind": "Field", "name": { "kind": "Name", "value": "errorMessage" } }, { "kind": "Field", "name": { "kind": "Name", "value": "createdAt" } }] } }] } }] };
|
|
4679
|
+
var CliEvalTestCasesDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "query", "name": { "kind": "Name", "value": "CliEvalTestCases" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "category" } }, "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "String" } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "search" } }, "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "String" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "evalTestCases" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "tenantId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "category" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "category" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "search" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "search" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "name" } }, { "kind": "Field", "name": { "kind": "Name", "value": "category" } }, { "kind": "Field", "name": { "kind": "Name", "value": "query" } }, { "kind": "Field", "name": { "kind": "Name", "value": "systemPrompt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentTemplateId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentTemplateName" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentcoreEvaluatorIds" } }, { "kind": "Field", "name": { "kind": "Name", "value": "tags" } }, { "kind": "Field", "name": { "kind": "Name", "value": "enabled" } }, { "kind": "Field", "name": { "kind": "Name", "value": "source" } }, { "kind": "Field", "name": { "kind": "Name", "value": "createdAt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "updatedAt" } }] } }] } }] };
|
|
4680
|
+
var CliEvalTestCaseDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "query", "name": { "kind": "Name", "value": "CliEvalTestCase" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "id" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "evalTestCase" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "id" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "id" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "tenantId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "name" } }, { "kind": "Field", "name": { "kind": "Name", "value": "category" } }, { "kind": "Field", "name": { "kind": "Name", "value": "query" } }, { "kind": "Field", "name": { "kind": "Name", "value": "systemPrompt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentTemplateId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentTemplateName" } }, { "kind": "Field", "name": { "kind": "Name", "value": "assertions" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentcoreEvaluatorIds" } }, { "kind": "Field", "name": { "kind": "Name", "value": "tags" } }, { "kind": "Field", "name": { "kind": "Name", "value": "enabled" } }, { "kind": "Field", "name": { "kind": "Name", "value": "source" } }, { "kind": "Field", "name": { "kind": "Name", "value": "createdAt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "updatedAt" } }] } }] } }] };
|
|
4681
|
+
var CliAgentTemplatesForEvalDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "query", "name": { "kind": "Name", "value": "CliAgentTemplatesForEval" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "agentTemplates" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "tenantId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "name" } }, { "kind": "Field", "name": { "kind": "Name", "value": "slug" } }, { "kind": "Field", "name": { "kind": "Name", "value": "model" } }, { "kind": "Field", "name": { "kind": "Name", "value": "isPublished" } }] } }] } }] };
|
|
4682
|
+
var CliTenantBySlugDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "query", "name": { "kind": "Name", "value": "CliTenantBySlug" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "slug" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "String" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "tenantBySlug" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "slug" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "slug" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "slug" } }, { "kind": "Field", "name": { "kind": "Name", "value": "name" } }] } }] } }] };
|
|
4683
|
+
var CliStartEvalRunDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "mutation", "name": { "kind": "Name", "value": "CliStartEvalRun" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "input" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "StartEvalRunInput" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "startEvalRun" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "tenantId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "input" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "input" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "status" } }, { "kind": "Field", "name": { "kind": "Name", "value": "model" } }, { "kind": "Field", "name": { "kind": "Name", "value": "categories" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentTemplateId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "agentTemplateName" } }, { "kind": "Field", "name": { "kind": "Name", "value": "totalTests" } }, { "kind": "Field", "name": { "kind": "Name", "value": "createdAt" } }] } }] } }] };
|
|
4684
|
+
var CliCancelEvalRunDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "mutation", "name": { "kind": "Name", "value": "CliCancelEvalRun" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "id" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "cancelEvalRun" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "id" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "id" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "status" } }, { "kind": "Field", "name": { "kind": "Name", "value": "completedAt" } }] } }] } }] };
|
|
4685
|
+
var CliDeleteEvalRunDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "mutation", "name": { "kind": "Name", "value": "CliDeleteEvalRun" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "id" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "deleteEvalRun" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "id" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "id" } } }] }] } }] };
|
|
4686
|
+
var CliCreateEvalTestCaseDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "mutation", "name": { "kind": "Name", "value": "CliCreateEvalTestCase" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "input" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "CreateEvalTestCaseInput" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "createEvalTestCase" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "tenantId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "input" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "input" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "name" } }, { "kind": "Field", "name": { "kind": "Name", "value": "category" } }] } }] } }] };
|
|
4687
|
+
var CliUpdateEvalTestCaseDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "mutation", "name": { "kind": "Name", "value": "CliUpdateEvalTestCase" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "id" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "input" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "UpdateEvalTestCaseInput" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "updateEvalTestCase" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "id" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "id" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "input" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "input" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "name" } }, { "kind": "Field", "name": { "kind": "Name", "value": "category" } }, { "kind": "Field", "name": { "kind": "Name", "value": "enabled" } }] } }] } }] };
|
|
4688
|
+
var CliDeleteEvalTestCaseDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "mutation", "name": { "kind": "Name", "value": "CliDeleteEvalTestCase" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "id" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "deleteEvalTestCase" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "id" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "id" } } }] }] } }] };
|
|
4689
|
+
var CliSeedEvalTestCasesDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "mutation", "name": { "kind": "Name", "value": "CliSeedEvalTestCases" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "categories" } }, "type": { "kind": "ListType", "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "String" } } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "seedEvalTestCases" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "tenantId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "categories" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "categories" } } }] }] } }] };
|
|
4690
|
+
var CliMeDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "query", "name": { "kind": "Name", "value": "CliMe" }, "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "me" }, "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "email" } }, { "kind": "Field", "name": { "kind": "Name", "value": "name" } }, { "kind": "Field", "name": { "kind": "Name", "value": "tenantId" } }] } }] } }] };
|
|
4691
|
+
var CliWikiTenantBySlugDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "query", "name": { "kind": "Name", "value": "CliWikiTenantBySlug" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "slug" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "String" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "tenantBySlug" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "slug" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "slug" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "slug" } }, { "kind": "Field", "name": { "kind": "Name", "value": "name" } }] } }] } }] };
|
|
4692
|
+
var CliAllTenantAgentsForWikiDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "query", "name": { "kind": "Name", "value": "CliAllTenantAgentsForWiki" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "allTenantAgents" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "tenantId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "includeSystem" }, "value": { "kind": "BooleanValue", "value": false } }, { "kind": "Argument", "name": { "kind": "Name", "value": "includeSubAgents" }, "value": { "kind": "BooleanValue", "value": false } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "name" } }, { "kind": "Field", "name": { "kind": "Name", "value": "slug" } }, { "kind": "Field", "name": { "kind": "Name", "value": "type" } }, { "kind": "Field", "name": { "kind": "Name", "value": "status" } }] } }] } }] };
|
|
4693
|
+
var CliCompileWikiNowDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "mutation", "name": { "kind": "Name", "value": "CliCompileWikiNow" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "ownerId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "modelId" } }, "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "String" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "compileWikiNow" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "tenantId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "ownerId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "ownerId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "modelId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "modelId" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "tenantId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "ownerId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "status" } }, { "kind": "Field", "name": { "kind": "Name", "value": "trigger" } }, { "kind": "Field", "name": { "kind": "Name", "value": "dedupeKey" } }, { "kind": "Field", "name": { "kind": "Name", "value": "attempt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "createdAt" } }] } }] } }] };
|
|
4694
|
+
var CliResetWikiCursorDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "mutation", "name": { "kind": "Name", "value": "CliResetWikiCursor" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "ownerId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "force" } }, "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "Boolean" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "resetWikiCursor" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "tenantId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "ownerId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "ownerId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "force" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "force" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "tenantId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "ownerId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "cursorCleared" } }, { "kind": "Field", "name": { "kind": "Name", "value": "pagesArchived" } }] } }] } }] };
|
|
4695
|
+
var CliWikiCompileJobsDocument = { "kind": "Document", "definitions": [{ "kind": "OperationDefinition", "operation": "query", "name": { "kind": "Name", "value": "CliWikiCompileJobs" }, "variableDefinitions": [{ "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } }, "type": { "kind": "NonNullType", "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "ownerId" } }, "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "ID" } } }, { "kind": "VariableDefinition", "variable": { "kind": "Variable", "name": { "kind": "Name", "value": "limit" } }, "type": { "kind": "NamedType", "name": { "kind": "Name", "value": "Int" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "wikiCompileJobs" }, "arguments": [{ "kind": "Argument", "name": { "kind": "Name", "value": "tenantId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "tenantId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "ownerId" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "ownerId" } } }, { "kind": "Argument", "name": { "kind": "Name", "value": "limit" }, "value": { "kind": "Variable", "name": { "kind": "Name", "value": "limit" } } }], "selectionSet": { "kind": "SelectionSet", "selections": [{ "kind": "Field", "name": { "kind": "Name", "value": "id" } }, { "kind": "Field", "name": { "kind": "Name", "value": "tenantId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "ownerId" } }, { "kind": "Field", "name": { "kind": "Name", "value": "status" } }, { "kind": "Field", "name": { "kind": "Name", "value": "trigger" } }, { "kind": "Field", "name": { "kind": "Name", "value": "dedupeKey" } }, { "kind": "Field", "name": { "kind": "Name", "value": "attempt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "claimedAt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "startedAt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "finishedAt" } }, { "kind": "Field", "name": { "kind": "Name", "value": "error" } }, { "kind": "Field", "name": { "kind": "Name", "value": "metrics" } }, { "kind": "Field", "name": { "kind": "Name", "value": "createdAt" } }] } }] } }] };
|
|
4696
|
+
|
|
4697
|
+
// src/gql/gql.ts
|
|
4698
|
+
var documents = {
|
|
4699
|
+
"\n query CliEvalRuns($tenantId: ID!, $agentId: ID, $limit: Int, $offset: Int) {\n evalRuns(tenantId: $tenantId, agentId: $agentId, limit: $limit, offset: $offset) {\n totalCount\n items {\n id\n status\n model\n categories\n agentId\n agentName\n agentTemplateId\n agentTemplateName\n totalTests\n passed\n failed\n passRate\n regression\n costUsd\n errorMessage\n startedAt\n completedAt\n createdAt\n }\n }\n }\n": CliEvalRunsDocument,
|
|
4700
|
+
"\n query CliEvalRun($id: ID!) {\n evalRun(id: $id) {\n id\n status\n model\n categories\n agentId\n agentName\n agentTemplateId\n agentTemplateName\n totalTests\n passed\n failed\n passRate\n regression\n costUsd\n errorMessage\n startedAt\n completedAt\n createdAt\n }\n }\n": CliEvalRunDocument,
|
|
4701
|
+
"\n query CliEvalRunResults($runId: ID!) {\n evalRunResults(runId: $runId) {\n id\n testCaseId\n testCaseName\n category\n status\n score\n durationMs\n agentSessionId\n input\n expected\n actualOutput\n evaluatorResults\n assertions\n errorMessage\n createdAt\n }\n }\n": CliEvalRunResultsDocument,
|
|
4702
|
+
"\n query CliEvalTestCases($tenantId: ID!, $category: String, $search: String) {\n evalTestCases(tenantId: $tenantId, category: $category, search: $search) {\n id\n name\n category\n query\n systemPrompt\n agentTemplateId\n agentTemplateName\n agentcoreEvaluatorIds\n tags\n enabled\n source\n createdAt\n updatedAt\n }\n }\n": CliEvalTestCasesDocument,
|
|
4703
|
+
"\n query CliEvalTestCase($id: ID!) {\n evalTestCase(id: $id) {\n id\n tenantId\n name\n category\n query\n systemPrompt\n agentTemplateId\n agentTemplateName\n assertions\n agentcoreEvaluatorIds\n tags\n enabled\n source\n createdAt\n updatedAt\n }\n }\n": CliEvalTestCaseDocument,
|
|
4704
|
+
"\n query CliAgentTemplatesForEval($tenantId: ID!) {\n agentTemplates(tenantId: $tenantId) {\n id\n name\n slug\n model\n isPublished\n }\n }\n": CliAgentTemplatesForEvalDocument,
|
|
4705
|
+
"\n query CliTenantBySlug($slug: String!) {\n tenantBySlug(slug: $slug) {\n id\n slug\n name\n }\n }\n": CliTenantBySlugDocument,
|
|
4706
|
+
"\n mutation CliStartEvalRun($tenantId: ID!, $input: StartEvalRunInput!) {\n startEvalRun(tenantId: $tenantId, input: $input) {\n id\n status\n model\n categories\n agentTemplateId\n agentTemplateName\n totalTests\n createdAt\n }\n }\n": CliStartEvalRunDocument,
|
|
4707
|
+
"\n mutation CliCancelEvalRun($id: ID!) {\n cancelEvalRun(id: $id) {\n id\n status\n completedAt\n }\n }\n": CliCancelEvalRunDocument,
|
|
4708
|
+
"\n mutation CliDeleteEvalRun($id: ID!) {\n deleteEvalRun(id: $id)\n }\n": CliDeleteEvalRunDocument,
|
|
4709
|
+
"\n mutation CliCreateEvalTestCase($tenantId: ID!, $input: CreateEvalTestCaseInput!) {\n createEvalTestCase(tenantId: $tenantId, input: $input) {\n id\n name\n category\n }\n }\n": CliCreateEvalTestCaseDocument,
|
|
4710
|
+
"\n mutation CliUpdateEvalTestCase($id: ID!, $input: UpdateEvalTestCaseInput!) {\n updateEvalTestCase(id: $id, input: $input) {\n id\n name\n category\n enabled\n }\n }\n": CliUpdateEvalTestCaseDocument,
|
|
4711
|
+
"\n mutation CliDeleteEvalTestCase($id: ID!) {\n deleteEvalTestCase(id: $id)\n }\n": CliDeleteEvalTestCaseDocument,
|
|
4712
|
+
"\n mutation CliSeedEvalTestCases($tenantId: ID!, $categories: [String!]) {\n seedEvalTestCases(tenantId: $tenantId, categories: $categories)\n }\n": CliSeedEvalTestCasesDocument,
|
|
4713
|
+
"\n query CliMe {\n me {\n id\n email\n name\n tenantId\n }\n }\n": CliMeDocument,
|
|
4714
|
+
"\n query CliWikiTenantBySlug($slug: String!) {\n tenantBySlug(slug: $slug) {\n id\n slug\n name\n }\n }\n": CliWikiTenantBySlugDocument,
|
|
4715
|
+
"\n query CliAllTenantAgentsForWiki($tenantId: ID!) {\n allTenantAgents(tenantId: $tenantId, includeSystem: false, includeSubAgents: false) {\n id\n name\n slug\n type\n status\n }\n }\n": CliAllTenantAgentsForWikiDocument,
|
|
4716
|
+
"\n mutation CliCompileWikiNow($tenantId: ID!, $ownerId: ID!, $modelId: String) {\n compileWikiNow(tenantId: $tenantId, ownerId: $ownerId, modelId: $modelId) {\n id\n tenantId\n ownerId\n status\n trigger\n dedupeKey\n attempt\n createdAt\n }\n }\n": CliCompileWikiNowDocument,
|
|
4717
|
+
"\n mutation CliResetWikiCursor($tenantId: ID!, $ownerId: ID!, $force: Boolean) {\n resetWikiCursor(tenantId: $tenantId, ownerId: $ownerId, force: $force) {\n tenantId\n ownerId\n cursorCleared\n pagesArchived\n }\n }\n": CliResetWikiCursorDocument,
|
|
4718
|
+
"\n query CliWikiCompileJobs($tenantId: ID!, $ownerId: ID, $limit: Int) {\n wikiCompileJobs(tenantId: $tenantId, ownerId: $ownerId, limit: $limit) {\n id\n tenantId\n ownerId\n status\n trigger\n dedupeKey\n attempt\n claimedAt\n startedAt\n finishedAt\n error\n metrics\n createdAt\n }\n }\n": CliWikiCompileJobsDocument
|
|
4719
|
+
};
|
|
4720
|
+
function graphql(source) {
|
|
4721
|
+
return documents[source] ?? {};
|
|
4722
|
+
}
|
|
4723
|
+
|
|
4724
|
+
// src/commands/eval/gql.ts
|
|
4725
|
+
var EvalRunsDoc = graphql(`
|
|
4726
|
+
query CliEvalRuns($tenantId: ID!, $agentId: ID, $limit: Int, $offset: Int) {
|
|
4727
|
+
evalRuns(tenantId: $tenantId, agentId: $agentId, limit: $limit, offset: $offset) {
|
|
4728
|
+
totalCount
|
|
4729
|
+
items {
|
|
4730
|
+
id
|
|
4731
|
+
status
|
|
4732
|
+
model
|
|
4733
|
+
categories
|
|
4734
|
+
agentId
|
|
4735
|
+
agentName
|
|
4736
|
+
agentTemplateId
|
|
4737
|
+
agentTemplateName
|
|
4738
|
+
totalTests
|
|
4739
|
+
passed
|
|
4740
|
+
failed
|
|
4741
|
+
passRate
|
|
4742
|
+
regression
|
|
4743
|
+
costUsd
|
|
4744
|
+
errorMessage
|
|
4745
|
+
startedAt
|
|
4746
|
+
completedAt
|
|
4747
|
+
createdAt
|
|
4748
|
+
}
|
|
4749
|
+
}
|
|
4750
|
+
}
|
|
4751
|
+
`);
|
|
4752
|
+
var EvalRunDoc = graphql(`
|
|
4753
|
+
query CliEvalRun($id: ID!) {
|
|
4754
|
+
evalRun(id: $id) {
|
|
4755
|
+
id
|
|
4756
|
+
status
|
|
4757
|
+
model
|
|
4758
|
+
categories
|
|
4759
|
+
agentId
|
|
4760
|
+
agentName
|
|
4761
|
+
agentTemplateId
|
|
4762
|
+
agentTemplateName
|
|
4763
|
+
totalTests
|
|
4764
|
+
passed
|
|
4765
|
+
failed
|
|
4766
|
+
passRate
|
|
4767
|
+
regression
|
|
4768
|
+
costUsd
|
|
4769
|
+
errorMessage
|
|
4770
|
+
startedAt
|
|
4771
|
+
completedAt
|
|
4772
|
+
createdAt
|
|
4773
|
+
}
|
|
4774
|
+
}
|
|
4775
|
+
`);
|
|
4776
|
+
var EvalRunResultsDoc = graphql(`
|
|
4777
|
+
query CliEvalRunResults($runId: ID!) {
|
|
4778
|
+
evalRunResults(runId: $runId) {
|
|
4779
|
+
id
|
|
4780
|
+
testCaseId
|
|
4781
|
+
testCaseName
|
|
4782
|
+
category
|
|
4783
|
+
status
|
|
4784
|
+
score
|
|
4785
|
+
durationMs
|
|
4786
|
+
agentSessionId
|
|
4787
|
+
input
|
|
4788
|
+
expected
|
|
4789
|
+
actualOutput
|
|
4790
|
+
evaluatorResults
|
|
4791
|
+
assertions
|
|
4792
|
+
errorMessage
|
|
4793
|
+
createdAt
|
|
4794
|
+
}
|
|
4795
|
+
}
|
|
4796
|
+
`);
|
|
4797
|
+
var EvalTestCasesDoc = graphql(`
|
|
4798
|
+
query CliEvalTestCases($tenantId: ID!, $category: String, $search: String) {
|
|
4799
|
+
evalTestCases(tenantId: $tenantId, category: $category, search: $search) {
|
|
4800
|
+
id
|
|
4801
|
+
name
|
|
4802
|
+
category
|
|
4803
|
+
query
|
|
4804
|
+
systemPrompt
|
|
4805
|
+
agentTemplateId
|
|
4806
|
+
agentTemplateName
|
|
4807
|
+
agentcoreEvaluatorIds
|
|
4808
|
+
tags
|
|
4809
|
+
enabled
|
|
4810
|
+
source
|
|
4811
|
+
createdAt
|
|
4812
|
+
updatedAt
|
|
4813
|
+
}
|
|
4814
|
+
}
|
|
4815
|
+
`);
|
|
4816
|
+
var EvalTestCaseDoc = graphql(`
|
|
4817
|
+
query CliEvalTestCase($id: ID!) {
|
|
4818
|
+
evalTestCase(id: $id) {
|
|
4819
|
+
id
|
|
4820
|
+
tenantId
|
|
4821
|
+
name
|
|
4822
|
+
category
|
|
4823
|
+
query
|
|
4824
|
+
systemPrompt
|
|
4825
|
+
agentTemplateId
|
|
4826
|
+
agentTemplateName
|
|
4827
|
+
assertions
|
|
4828
|
+
agentcoreEvaluatorIds
|
|
4829
|
+
tags
|
|
4830
|
+
enabled
|
|
4831
|
+
source
|
|
4832
|
+
createdAt
|
|
4833
|
+
updatedAt
|
|
4834
|
+
}
|
|
4835
|
+
}
|
|
4836
|
+
`);
|
|
4837
|
+
var AgentTemplatesForEvalDoc = graphql(`
|
|
4838
|
+
query CliAgentTemplatesForEval($tenantId: ID!) {
|
|
4839
|
+
agentTemplates(tenantId: $tenantId) {
|
|
4840
|
+
id
|
|
4841
|
+
name
|
|
4842
|
+
slug
|
|
4843
|
+
model
|
|
4844
|
+
isPublished
|
|
4845
|
+
}
|
|
4846
|
+
}
|
|
4847
|
+
`);
|
|
4848
|
+
var TenantBySlugDoc = graphql(`
|
|
4849
|
+
query CliTenantBySlug($slug: String!) {
|
|
4850
|
+
tenantBySlug(slug: $slug) {
|
|
4851
|
+
id
|
|
4852
|
+
slug
|
|
4853
|
+
name
|
|
4854
|
+
}
|
|
4855
|
+
}
|
|
4856
|
+
`);
|
|
4857
|
+
var StartEvalRunDoc = graphql(`
|
|
4858
|
+
mutation CliStartEvalRun($tenantId: ID!, $input: StartEvalRunInput!) {
|
|
4859
|
+
startEvalRun(tenantId: $tenantId, input: $input) {
|
|
4860
|
+
id
|
|
4861
|
+
status
|
|
4862
|
+
model
|
|
4863
|
+
categories
|
|
4864
|
+
agentTemplateId
|
|
4865
|
+
agentTemplateName
|
|
4866
|
+
totalTests
|
|
4867
|
+
createdAt
|
|
4868
|
+
}
|
|
4869
|
+
}
|
|
4870
|
+
`);
|
|
4871
|
+
var CancelEvalRunDoc = graphql(`
|
|
4872
|
+
mutation CliCancelEvalRun($id: ID!) {
|
|
4873
|
+
cancelEvalRun(id: $id) {
|
|
4874
|
+
id
|
|
4875
|
+
status
|
|
4876
|
+
completedAt
|
|
4877
|
+
}
|
|
4878
|
+
}
|
|
4879
|
+
`);
|
|
4880
|
+
var DeleteEvalRunDoc = graphql(`
|
|
4881
|
+
mutation CliDeleteEvalRun($id: ID!) {
|
|
4882
|
+
deleteEvalRun(id: $id)
|
|
4883
|
+
}
|
|
4884
|
+
`);
|
|
4885
|
+
var CreateEvalTestCaseDoc = graphql(`
|
|
4886
|
+
mutation CliCreateEvalTestCase($tenantId: ID!, $input: CreateEvalTestCaseInput!) {
|
|
4887
|
+
createEvalTestCase(tenantId: $tenantId, input: $input) {
|
|
4888
|
+
id
|
|
4889
|
+
name
|
|
4890
|
+
category
|
|
4891
|
+
}
|
|
4892
|
+
}
|
|
4893
|
+
`);
|
|
4894
|
+
var UpdateEvalTestCaseDoc = graphql(`
|
|
4895
|
+
mutation CliUpdateEvalTestCase($id: ID!, $input: UpdateEvalTestCaseInput!) {
|
|
4896
|
+
updateEvalTestCase(id: $id, input: $input) {
|
|
4897
|
+
id
|
|
4898
|
+
name
|
|
4899
|
+
category
|
|
4900
|
+
enabled
|
|
4901
|
+
}
|
|
4902
|
+
}
|
|
4903
|
+
`);
|
|
4904
|
+
var DeleteEvalTestCaseDoc = graphql(`
|
|
4905
|
+
mutation CliDeleteEvalTestCase($id: ID!) {
|
|
4906
|
+
deleteEvalTestCase(id: $id)
|
|
4907
|
+
}
|
|
4908
|
+
`);
|
|
4909
|
+
var SeedEvalTestCasesDoc = graphql(`
|
|
4910
|
+
mutation CliSeedEvalTestCases($tenantId: ID!, $categories: [String!]) {
|
|
4911
|
+
seedEvalTestCases(tenantId: $tenantId, categories: $categories)
|
|
4912
|
+
}
|
|
4913
|
+
`);
|
|
4914
|
+
|
|
4915
|
+
// src/commands/eval/helpers.ts
|
|
4916
|
+
async function resolveEvalContext(opts) {
|
|
4917
|
+
const region = opts.region ?? "us-east-1";
|
|
4918
|
+
const stage = await resolveStage({ flag: opts.stage, region });
|
|
4919
|
+
const session = loadStageSession(stage);
|
|
4920
|
+
const { client, tenantSlug: ctxTenantSlug } = await getGqlClient({ stage, region });
|
|
4921
|
+
const flagOrEnv = opts.tenant ?? process.env.THINKWORK_TENANT;
|
|
4922
|
+
if (flagOrEnv) {
|
|
4923
|
+
if (session?.tenantSlug === flagOrEnv && session.tenantId) {
|
|
4924
|
+
return { stage, region, client, tenantId: session.tenantId, tenantSlug: flagOrEnv };
|
|
4925
|
+
}
|
|
4926
|
+
const data = await gqlQuery(client, TenantBySlugDoc, { slug: flagOrEnv });
|
|
4927
|
+
if (!data.tenantBySlug) {
|
|
4928
|
+
printError(`Tenant "${flagOrEnv}" not found.`);
|
|
4929
|
+
process.exit(1);
|
|
4930
|
+
}
|
|
4931
|
+
return {
|
|
4932
|
+
stage,
|
|
4933
|
+
region,
|
|
4934
|
+
client,
|
|
4935
|
+
tenantId: data.tenantBySlug.id,
|
|
4936
|
+
tenantSlug: data.tenantBySlug.slug
|
|
4937
|
+
};
|
|
4938
|
+
}
|
|
4939
|
+
if (session?.tenantId && session.tenantSlug) {
|
|
4940
|
+
return {
|
|
4941
|
+
stage,
|
|
4942
|
+
region,
|
|
4943
|
+
client,
|
|
4944
|
+
tenantId: session.tenantId,
|
|
4945
|
+
tenantSlug: session.tenantSlug
|
|
4946
|
+
};
|
|
4947
|
+
}
|
|
4948
|
+
if (ctxTenantSlug) {
|
|
4949
|
+
const data = await gqlQuery(client, TenantBySlugDoc, { slug: ctxTenantSlug });
|
|
4950
|
+
if (data.tenantBySlug) {
|
|
4951
|
+
return {
|
|
4952
|
+
stage,
|
|
4953
|
+
region,
|
|
4954
|
+
client,
|
|
4955
|
+
tenantId: data.tenantBySlug.id,
|
|
4956
|
+
tenantSlug: data.tenantBySlug.slug
|
|
4957
|
+
};
|
|
4958
|
+
}
|
|
4959
|
+
}
|
|
4960
|
+
printError(
|
|
4961
|
+
`No tenant resolved for stage "${stage}". Pass --tenant <slug>, set THINKWORK_TENANT, or run \`thinkwork login --stage ${stage}\`.`
|
|
4962
|
+
);
|
|
4963
|
+
process.exit(1);
|
|
4964
|
+
}
|
|
4965
|
+
function fmtIso(iso) {
|
|
4966
|
+
if (!iso) return "\u2014";
|
|
4967
|
+
const d = new Date(iso);
|
|
4968
|
+
if (Number.isNaN(d.getTime())) return iso;
|
|
4969
|
+
return d.toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
4970
|
+
}
|
|
4971
|
+
function fmtPercent(value) {
|
|
4972
|
+
if (value == null) return "\u2014";
|
|
4973
|
+
return `${(value * 100).toFixed(1)}%`;
|
|
4974
|
+
}
|
|
4975
|
+
function fmtUsd(value) {
|
|
4976
|
+
if (value == null) return "\u2014";
|
|
4977
|
+
return `$${value.toFixed(4)}`;
|
|
4978
|
+
}
|
|
4979
|
+
function isTerminalStatus(status) {
|
|
4980
|
+
return status === "completed" || status === "failed" || status === "cancelled";
|
|
4981
|
+
}
|
|
4982
|
+
|
|
4983
|
+
// src/commands/eval/run.ts
|
|
4984
|
+
async function runEvalRun(opts) {
|
|
4985
|
+
const ctx = await resolveEvalContext(opts);
|
|
4986
|
+
const interactive = isInteractive();
|
|
4987
|
+
let agentTemplateId = opts.agentTemplate ?? null;
|
|
4988
|
+
let categories = opts.category ?? null;
|
|
4989
|
+
let testCaseIds = opts.testCase ?? null;
|
|
4990
|
+
const scopeSatisfied = testCaseIds && testCaseIds.length > 0 || categories && categories.length > 0 || opts.all === true;
|
|
4991
|
+
if (!agentTemplateId || !scopeSatisfied) {
|
|
4992
|
+
if (!interactive) {
|
|
4993
|
+
const missing = [];
|
|
4994
|
+
if (!agentTemplateId) missing.push("--agent-template");
|
|
4995
|
+
if (!scopeSatisfied) missing.push("one of --all | --category | --test-case");
|
|
4996
|
+
printError(
|
|
4997
|
+
`Missing required flag(s) in non-interactive session: ${missing.join(", ")}.`
|
|
4998
|
+
);
|
|
4999
|
+
process.exit(1);
|
|
5000
|
+
}
|
|
5001
|
+
}
|
|
5002
|
+
if (!agentTemplateId) {
|
|
5003
|
+
const data = await gqlQuery(ctx.client, AgentTemplatesForEvalDoc, {
|
|
5004
|
+
tenantId: ctx.tenantId
|
|
5005
|
+
});
|
|
5006
|
+
const templates = data.agentTemplates ?? [];
|
|
5007
|
+
if (templates.length === 0) {
|
|
5008
|
+
printError("No agent templates defined for this tenant. Create one first.");
|
|
5009
|
+
process.exit(1);
|
|
5010
|
+
}
|
|
5011
|
+
requireTty("Agent template");
|
|
5012
|
+
agentTemplateId = await promptOrExit(
|
|
5013
|
+
() => select8({
|
|
5014
|
+
message: "Agent template to run against?",
|
|
5015
|
+
choices: templates.map((t) => ({
|
|
5016
|
+
name: `${t.name}${t.model ? ` (${t.model})` : ""}${t.isPublished ? "" : " [draft]"}`,
|
|
5017
|
+
value: t.id
|
|
5018
|
+
})),
|
|
5019
|
+
loop: false
|
|
5020
|
+
})
|
|
5021
|
+
);
|
|
5022
|
+
}
|
|
5023
|
+
if (!scopeSatisfied) {
|
|
5024
|
+
const scope = await promptOrExit(
|
|
5025
|
+
() => select8({
|
|
5026
|
+
message: "How should we pick test cases?",
|
|
5027
|
+
choices: [
|
|
5028
|
+
{ name: "All enabled test cases", value: "all" },
|
|
5029
|
+
{ name: "Filter by category", value: "category" },
|
|
5030
|
+
{ name: "Pick specific test cases", value: "specific" }
|
|
5031
|
+
],
|
|
5032
|
+
loop: false
|
|
5033
|
+
})
|
|
5034
|
+
);
|
|
5035
|
+
if (scope === "all") {
|
|
5036
|
+
categories = null;
|
|
5037
|
+
testCaseIds = null;
|
|
5038
|
+
opts.all = true;
|
|
5039
|
+
} else if (scope === "category") {
|
|
5040
|
+
const tcData = await gqlQuery(ctx.client, EvalTestCasesDoc, {
|
|
5041
|
+
tenantId: ctx.tenantId
|
|
5042
|
+
});
|
|
5043
|
+
const distinctCategories = Array.from(
|
|
5044
|
+
new Set((tcData.evalTestCases ?? []).map((tc) => tc.category))
|
|
5045
|
+
).sort();
|
|
5046
|
+
if (distinctCategories.length === 0) {
|
|
5047
|
+
printError(
|
|
5048
|
+
"No test cases exist for this tenant yet. Run `thinkwork eval seed` to load the starter pack."
|
|
5049
|
+
);
|
|
5050
|
+
process.exit(1);
|
|
5051
|
+
}
|
|
5052
|
+
const picked = await promptOrExit(
|
|
5053
|
+
() => checkbox({
|
|
5054
|
+
message: "Which categories? (space to toggle, enter to confirm)",
|
|
5055
|
+
choices: distinctCategories.map((c) => ({ name: c, value: c })),
|
|
5056
|
+
required: true,
|
|
5057
|
+
loop: false
|
|
5058
|
+
})
|
|
5059
|
+
);
|
|
5060
|
+
categories = picked;
|
|
5061
|
+
} else {
|
|
5062
|
+
const tcData = await gqlQuery(ctx.client, EvalTestCasesDoc, {
|
|
5063
|
+
tenantId: ctx.tenantId
|
|
5064
|
+
});
|
|
5065
|
+
const options = (tcData.evalTestCases ?? []).filter((tc) => tc.enabled);
|
|
5066
|
+
if (options.length === 0) {
|
|
5067
|
+
printError("No enabled test cases to pick from.");
|
|
5068
|
+
process.exit(1);
|
|
5069
|
+
}
|
|
5070
|
+
const picked = await promptOrExit(
|
|
5071
|
+
() => checkbox({
|
|
5072
|
+
message: "Which test cases?",
|
|
5073
|
+
choices: options.map((tc) => ({
|
|
5074
|
+
name: `${tc.name} (${tc.category})`,
|
|
5075
|
+
value: tc.id
|
|
5076
|
+
})),
|
|
5077
|
+
required: true,
|
|
5078
|
+
loop: false
|
|
5079
|
+
})
|
|
5080
|
+
);
|
|
5081
|
+
testCaseIds = picked;
|
|
5082
|
+
}
|
|
5083
|
+
}
|
|
5084
|
+
if (!opts.model && interactive) {
|
|
5085
|
+
const entered = await promptOrExit(
|
|
5086
|
+
() => input3({
|
|
5087
|
+
message: "Model override? (blank for template default)",
|
|
5088
|
+
default: ""
|
|
5089
|
+
})
|
|
5090
|
+
);
|
|
5091
|
+
if (entered.trim()) opts.model = entered.trim();
|
|
5092
|
+
}
|
|
5093
|
+
if (interactive && !isJsonMode()) {
|
|
5094
|
+
const summaryLines = [
|
|
5095
|
+
["Stage", ctx.stage],
|
|
5096
|
+
["Tenant", ctx.tenantSlug],
|
|
5097
|
+
["Agent template", agentTemplateId]
|
|
5098
|
+
];
|
|
5099
|
+
if (opts.model) summaryLines.push(["Model", opts.model]);
|
|
5100
|
+
if (categories && categories.length) summaryLines.push(["Categories", categories.join(", ")]);
|
|
5101
|
+
if (testCaseIds && testCaseIds.length)
|
|
5102
|
+
summaryLines.push(["Test cases", `${testCaseIds.length} picked`]);
|
|
5103
|
+
if (opts.all && !categories?.length && !testCaseIds?.length)
|
|
5104
|
+
summaryLines.push(["Scope", "all enabled test cases"]);
|
|
5105
|
+
printKeyValue(summaryLines);
|
|
5106
|
+
const proceed = await promptOrExit(
|
|
5107
|
+
() => confirm2({ message: "Start run?", default: true })
|
|
5108
|
+
);
|
|
5109
|
+
if (!proceed) {
|
|
5110
|
+
logStderr("Cancelled.");
|
|
5111
|
+
process.exit(0);
|
|
5112
|
+
}
|
|
5113
|
+
}
|
|
5114
|
+
const mutRes = await gqlMutate(ctx.client, StartEvalRunDoc, {
|
|
5115
|
+
tenantId: ctx.tenantId,
|
|
5116
|
+
input: {
|
|
5117
|
+
agentTemplateId,
|
|
5118
|
+
agentId: opts.agent ?? null,
|
|
5119
|
+
model: opts.model ?? null,
|
|
5120
|
+
categories: categories ?? null,
|
|
5121
|
+
testCaseIds: testCaseIds ?? null
|
|
5122
|
+
}
|
|
5123
|
+
});
|
|
5124
|
+
const run2 = mutRes.startEvalRun;
|
|
5125
|
+
if (isJsonMode()) {
|
|
5126
|
+
printJson({ runId: run2.id, status: run2.status, model: run2.model, categories: run2.categories });
|
|
5127
|
+
} else {
|
|
5128
|
+
printSuccess(`Started eval run ${run2.id} (status: ${run2.status}).`);
|
|
5129
|
+
}
|
|
5130
|
+
if (!opts.watch) return;
|
|
5131
|
+
const timeoutSec = Number.parseInt(opts.timeout ?? "900", 10);
|
|
5132
|
+
await pollUntilTerminal(ctx.client, run2.id, 3, timeoutSec);
|
|
5133
|
+
}
|
|
5134
|
+
async function pollUntilTerminal(client, runId, intervalSec, timeoutSec) {
|
|
5135
|
+
const deadline = Date.now() + timeoutSec * 1e3;
|
|
5136
|
+
const spinner = isJsonMode() ? null : ora2({ text: "Waiting for run to complete\u2026" }).start();
|
|
5137
|
+
try {
|
|
5138
|
+
while (Date.now() < deadline) {
|
|
5139
|
+
const data = await gqlQuery(client, EvalRunDoc, { id: runId });
|
|
5140
|
+
const run2 = data.evalRun;
|
|
5141
|
+
if (!run2) {
|
|
5142
|
+
if (spinner) spinner.fail("Run disappeared from the database.");
|
|
5143
|
+
process.exit(1);
|
|
5144
|
+
}
|
|
5145
|
+
if (spinner) {
|
|
5146
|
+
spinner.text = `status=${run2.status} ${run2.passed}/${run2.totalTests} passed (${fmtPercent(run2.passRate)})`;
|
|
5147
|
+
}
|
|
5148
|
+
if (isTerminalStatus(run2.status)) {
|
|
5149
|
+
if (spinner) {
|
|
5150
|
+
if (run2.status === "completed") spinner.succeed(`completed \u2014 ${run2.passed}/${run2.totalTests} (${fmtPercent(run2.passRate)})`);
|
|
5151
|
+
else if (run2.status === "failed") spinner.fail(`failed \u2014 ${run2.errorMessage ?? "unknown error"}`);
|
|
5152
|
+
else spinner.warn("cancelled");
|
|
5153
|
+
}
|
|
5154
|
+
if (isJsonMode()) {
|
|
5155
|
+
printJson({
|
|
5156
|
+
runId: run2.id,
|
|
5157
|
+
status: run2.status,
|
|
5158
|
+
passed: run2.passed,
|
|
5159
|
+
failed: run2.failed,
|
|
5160
|
+
totalTests: run2.totalTests,
|
|
5161
|
+
passRate: run2.passRate,
|
|
5162
|
+
errorMessage: run2.errorMessage
|
|
5163
|
+
});
|
|
5164
|
+
}
|
|
5165
|
+
if (run2.status === "completed") process.exit(0);
|
|
5166
|
+
process.exit(1);
|
|
5167
|
+
}
|
|
5168
|
+
await new Promise((r) => setTimeout(r, intervalSec * 1e3));
|
|
5169
|
+
}
|
|
5170
|
+
if (spinner) spinner.warn(`timeout after ${timeoutSec}s`);
|
|
5171
|
+
process.exit(2);
|
|
5172
|
+
} catch (err) {
|
|
5173
|
+
if (spinner) spinner.fail(err instanceof Error ? err.message : String(err));
|
|
5174
|
+
throw err;
|
|
5175
|
+
}
|
|
5176
|
+
}
|
|
5177
|
+
|
|
5178
|
+
// src/commands/eval/list.ts
|
|
5179
|
+
async function runEvalList(opts) {
|
|
5180
|
+
const ctx = await resolveEvalContext(opts);
|
|
5181
|
+
const data = await gqlQuery(ctx.client, EvalRunsDoc, {
|
|
5182
|
+
tenantId: ctx.tenantId,
|
|
5183
|
+
agentId: opts.agent ?? null,
|
|
5184
|
+
limit: Number.parseInt(opts.limit ?? "25", 10),
|
|
5185
|
+
offset: Number.parseInt(opts.offset ?? "0", 10)
|
|
5186
|
+
});
|
|
5187
|
+
const rows = (data.evalRuns.items ?? []).map((r) => ({
|
|
5188
|
+
id: r.id,
|
|
5189
|
+
status: r.status,
|
|
5190
|
+
template: r.agentTemplateName ?? r.agentTemplateId ?? "\u2014",
|
|
5191
|
+
categories: (r.categories ?? []).join(", ") || "\u2014",
|
|
5192
|
+
tests: `${r.passed}/${r.totalTests}`,
|
|
5193
|
+
passRate: fmtPercent(r.passRate),
|
|
5194
|
+
cost: fmtUsd(r.costUsd),
|
|
5195
|
+
started: fmtIso(r.startedAt)
|
|
5196
|
+
}));
|
|
5197
|
+
if (isJsonMode()) {
|
|
5198
|
+
printJson({ totalCount: data.evalRuns.totalCount, items: data.evalRuns.items });
|
|
5199
|
+
return;
|
|
5200
|
+
}
|
|
5201
|
+
printTable(rows, [
|
|
5202
|
+
{ key: "id", header: "RUN ID" },
|
|
5203
|
+
{ key: "status", header: "STATUS" },
|
|
5204
|
+
{ key: "template", header: "TEMPLATE" },
|
|
5205
|
+
{ key: "categories", header: "CATEGORIES" },
|
|
5206
|
+
{ key: "tests", header: "PASS/TOTAL" },
|
|
5207
|
+
{ key: "passRate", header: "PASS RATE" },
|
|
5208
|
+
{ key: "cost", header: "COST" },
|
|
5209
|
+
{ key: "started", header: "STARTED" }
|
|
5210
|
+
]);
|
|
5211
|
+
}
|
|
5212
|
+
|
|
5213
|
+
// src/commands/eval/get.ts
|
|
5214
|
+
async function runEvalGet(runId, opts) {
|
|
5215
|
+
const ctx = await resolveEvalContext(opts);
|
|
5216
|
+
const data = await gqlQuery(ctx.client, EvalRunDoc, { id: runId });
|
|
5217
|
+
if (!data.evalRun) {
|
|
5218
|
+
printError(`Run ${runId} not found.`);
|
|
5219
|
+
process.exit(1);
|
|
5220
|
+
}
|
|
5221
|
+
const run2 = data.evalRun;
|
|
5222
|
+
const results = opts.results === false ? [] : (await gqlQuery(ctx.client, EvalRunResultsDoc, { runId })).evalRunResults ?? [];
|
|
5223
|
+
if (isJsonMode()) {
|
|
5224
|
+
printJson({ run: run2, results });
|
|
5225
|
+
return;
|
|
5226
|
+
}
|
|
5227
|
+
printKeyValue([
|
|
5228
|
+
["Run ID", run2.id],
|
|
5229
|
+
["Status", run2.status],
|
|
5230
|
+
["Agent template", run2.agentTemplateName ?? run2.agentTemplateId ?? "\u2014"],
|
|
5231
|
+
["Agent", run2.agentName ?? run2.agentId ?? "\u2014"],
|
|
5232
|
+
["Model", run2.model ?? "\u2014"],
|
|
5233
|
+
["Categories", (run2.categories ?? []).join(", ") || "\u2014"],
|
|
5234
|
+
["Pass/Total", `${run2.passed}/${run2.totalTests}`],
|
|
5235
|
+
["Pass rate", fmtPercent(run2.passRate)],
|
|
5236
|
+
["Regression", run2.regression ? "YES" : "no"],
|
|
5237
|
+
["Cost", fmtUsd(run2.costUsd)],
|
|
5238
|
+
["Error", run2.errorMessage ?? "\u2014"],
|
|
5239
|
+
["Started", fmtIso(run2.startedAt)],
|
|
5240
|
+
["Completed", fmtIso(run2.completedAt)]
|
|
5241
|
+
]);
|
|
5242
|
+
if (opts.results !== false && results.length > 0) {
|
|
5243
|
+
console.log("");
|
|
5244
|
+
const rows = results.map((r) => ({
|
|
5245
|
+
name: r.testCaseName ?? "\u2014",
|
|
5246
|
+
category: r.category ?? "\u2014",
|
|
5247
|
+
status: r.status,
|
|
5248
|
+
score: r.score == null ? "\u2014" : r.score.toFixed(3),
|
|
5249
|
+
duration: r.durationMs == null ? "\u2014" : `${r.durationMs}ms`
|
|
5250
|
+
}));
|
|
5251
|
+
printTable(rows, [
|
|
5252
|
+
{ key: "name", header: "TEST CASE" },
|
|
5253
|
+
{ key: "category", header: "CATEGORY" },
|
|
5254
|
+
{ key: "status", header: "STATUS" },
|
|
5255
|
+
{ key: "score", header: "SCORE" },
|
|
5256
|
+
{ key: "duration", header: "DURATION" }
|
|
5257
|
+
]);
|
|
5258
|
+
}
|
|
5259
|
+
}
|
|
5260
|
+
|
|
5261
|
+
// src/commands/eval/watch.ts
|
|
5262
|
+
import ora3 from "ora";
|
|
5263
|
+
async function runEvalWatch(runId, opts) {
|
|
5264
|
+
const ctx = await resolveEvalContext(opts);
|
|
5265
|
+
const intervalSec = Number.parseInt(opts.interval ?? "3", 10);
|
|
5266
|
+
const timeoutSec = Number.parseInt(opts.timeout ?? "900", 10);
|
|
5267
|
+
const deadline = Date.now() + timeoutSec * 1e3;
|
|
5268
|
+
const spinner = isJsonMode() ? null : ora3({ text: `Watching run ${runId}\u2026` }).start();
|
|
5269
|
+
try {
|
|
5270
|
+
while (Date.now() < deadline) {
|
|
5271
|
+
const data = await gqlQuery(ctx.client, EvalRunDoc, { id: runId });
|
|
5272
|
+
const run2 = data.evalRun;
|
|
5273
|
+
if (!run2) {
|
|
5274
|
+
if (spinner) spinner.fail(`Run ${runId} not found.`);
|
|
5275
|
+
process.exit(1);
|
|
5276
|
+
}
|
|
5277
|
+
if (spinner) {
|
|
5278
|
+
spinner.text = `status=${run2.status} ${run2.passed}/${run2.totalTests} passed (${fmtPercent(run2.passRate)})`;
|
|
5279
|
+
}
|
|
5280
|
+
if (isTerminalStatus(run2.status)) {
|
|
5281
|
+
if (spinner) {
|
|
5282
|
+
if (run2.status === "completed")
|
|
5283
|
+
spinner.succeed(`completed \u2014 ${run2.passed}/${run2.totalTests} (${fmtPercent(run2.passRate)})`);
|
|
5284
|
+
else if (run2.status === "failed") spinner.fail(`failed \u2014 ${run2.errorMessage ?? "unknown error"}`);
|
|
5285
|
+
else spinner.warn("cancelled");
|
|
5286
|
+
}
|
|
5287
|
+
if (isJsonMode()) {
|
|
5288
|
+
printJson({
|
|
5289
|
+
runId: run2.id,
|
|
5290
|
+
status: run2.status,
|
|
5291
|
+
passed: run2.passed,
|
|
5292
|
+
failed: run2.failed,
|
|
5293
|
+
totalTests: run2.totalTests,
|
|
5294
|
+
passRate: run2.passRate,
|
|
5295
|
+
errorMessage: run2.errorMessage
|
|
5296
|
+
});
|
|
5297
|
+
}
|
|
5298
|
+
process.exit(run2.status === "completed" ? 0 : 1);
|
|
5299
|
+
}
|
|
5300
|
+
await new Promise((r) => setTimeout(r, intervalSec * 1e3));
|
|
5301
|
+
}
|
|
5302
|
+
if (spinner) spinner.warn(`timeout after ${timeoutSec}s`);
|
|
5303
|
+
process.exit(2);
|
|
5304
|
+
} catch (err) {
|
|
5305
|
+
if (spinner) spinner.fail(err instanceof Error ? err.message : String(err));
|
|
5306
|
+
throw err;
|
|
5307
|
+
}
|
|
5308
|
+
}
|
|
5309
|
+
|
|
5310
|
+
// src/commands/eval/cancel.ts
|
|
5311
|
+
async function runEvalCancel(runId, opts) {
|
|
5312
|
+
const ctx = await resolveEvalContext(opts);
|
|
5313
|
+
const data = await gqlMutate(ctx.client, CancelEvalRunDoc, { id: runId });
|
|
5314
|
+
if (isJsonMode()) {
|
|
5315
|
+
printJson({ runId: data.cancelEvalRun.id, status: data.cancelEvalRun.status });
|
|
5316
|
+
return;
|
|
5317
|
+
}
|
|
5318
|
+
printSuccess(`Cancelled run ${data.cancelEvalRun.id} (status: ${data.cancelEvalRun.status}).`);
|
|
5319
|
+
}
|
|
5320
|
+
|
|
5321
|
+
// src/commands/eval/delete.ts
|
|
5322
|
+
import { confirm as confirm3 } from "@inquirer/prompts";
|
|
5323
|
+
async function runEvalDelete(runId, opts) {
|
|
5324
|
+
const ctx = await resolveEvalContext(opts);
|
|
5325
|
+
if (!opts.yes) {
|
|
5326
|
+
if (!isInteractive()) {
|
|
5327
|
+
printError("Refusing to delete without --yes in a non-interactive session.");
|
|
5328
|
+
process.exit(1);
|
|
5329
|
+
}
|
|
5330
|
+
requireTty("Confirmation");
|
|
5331
|
+
const go = await promptOrExit(
|
|
5332
|
+
() => confirm3({
|
|
5333
|
+
message: `Permanently delete run ${runId} and its results?`,
|
|
5334
|
+
default: false
|
|
5335
|
+
})
|
|
5336
|
+
);
|
|
5337
|
+
if (!go) {
|
|
5338
|
+
logStderr("Cancelled.");
|
|
5339
|
+
process.exit(0);
|
|
5340
|
+
}
|
|
5341
|
+
}
|
|
5342
|
+
const data = await gqlMutate(ctx.client, DeleteEvalRunDoc, { id: runId });
|
|
5343
|
+
if (isJsonMode()) {
|
|
5344
|
+
printJson({ runId, deleted: data.deleteEvalRun });
|
|
5345
|
+
return;
|
|
5346
|
+
}
|
|
5347
|
+
if (data.deleteEvalRun) printSuccess(`Deleted run ${runId}.`);
|
|
5348
|
+
else printError(`Server reported not-deleted for ${runId}.`);
|
|
5349
|
+
}
|
|
5350
|
+
|
|
5351
|
+
// src/commands/eval/categories.ts
|
|
5352
|
+
async function runEvalCategories(opts) {
|
|
5353
|
+
const ctx = await resolveEvalContext(opts);
|
|
5354
|
+
const data = await gqlQuery(ctx.client, EvalTestCasesDoc, { tenantId: ctx.tenantId });
|
|
5355
|
+
const counts = /* @__PURE__ */ new Map();
|
|
5356
|
+
for (const tc of data.evalTestCases ?? []) {
|
|
5357
|
+
const entry = counts.get(tc.category) ?? { total: 0, enabled: 0 };
|
|
5358
|
+
entry.total += 1;
|
|
5359
|
+
if (tc.enabled) entry.enabled += 1;
|
|
5360
|
+
counts.set(tc.category, entry);
|
|
5361
|
+
}
|
|
5362
|
+
const rows = Array.from(counts.entries()).sort(([a], [b]) => a.localeCompare(b)).map(([category, { total, enabled }]) => ({
|
|
5363
|
+
category,
|
|
5364
|
+
enabled: String(enabled),
|
|
5365
|
+
total: String(total)
|
|
5366
|
+
}));
|
|
5367
|
+
if (isJsonMode()) {
|
|
5368
|
+
printJson(rows);
|
|
5369
|
+
return;
|
|
5370
|
+
}
|
|
5371
|
+
printTable(rows, [
|
|
5372
|
+
{ key: "category", header: "CATEGORY" },
|
|
5373
|
+
{ key: "enabled", header: "ENABLED" },
|
|
5374
|
+
{ key: "total", header: "TOTAL" }
|
|
5375
|
+
]);
|
|
5376
|
+
}
|
|
5377
|
+
|
|
5378
|
+
// src/commands/eval/seed.ts
|
|
5379
|
+
async function runEvalSeed(opts) {
|
|
5380
|
+
const ctx = await resolveEvalContext(opts);
|
|
5381
|
+
const data = await gqlMutate(ctx.client, SeedEvalTestCasesDoc, {
|
|
5382
|
+
tenantId: ctx.tenantId,
|
|
5383
|
+
categories: opts.category && opts.category.length > 0 ? opts.category : null
|
|
5384
|
+
});
|
|
5385
|
+
if (isJsonMode()) {
|
|
5386
|
+
printJson({ inserted: data.seedEvalTestCases });
|
|
5387
|
+
return;
|
|
5388
|
+
}
|
|
5389
|
+
printSuccess(`Seeded ${data.seedEvalTestCases} new test case(s). (Duplicates were skipped.)`);
|
|
5390
|
+
}
|
|
5391
|
+
|
|
5392
|
+
// src/commands/eval/test-case/list.ts
|
|
5393
|
+
async function runEvalTestCaseList(opts) {
|
|
5394
|
+
const ctx = await resolveEvalContext(opts);
|
|
5395
|
+
const data = await gqlQuery(ctx.client, EvalTestCasesDoc, {
|
|
5396
|
+
tenantId: ctx.tenantId,
|
|
5397
|
+
category: opts.category ?? null,
|
|
5398
|
+
search: opts.search ?? null
|
|
5399
|
+
});
|
|
5400
|
+
const rows = (data.evalTestCases ?? []).map((tc) => ({
|
|
5401
|
+
id: tc.id,
|
|
5402
|
+
name: tc.name,
|
|
5403
|
+
category: tc.category,
|
|
5404
|
+
template: tc.agentTemplateName ?? "\u2014",
|
|
5405
|
+
evaluators: (tc.agentcoreEvaluatorIds ?? []).join(", ") || "\u2014",
|
|
5406
|
+
enabled: tc.enabled ? "yes" : "no",
|
|
5407
|
+
updated: fmtIso(tc.updatedAt)
|
|
5408
|
+
}));
|
|
5409
|
+
if (isJsonMode()) {
|
|
5410
|
+
printJson(data.evalTestCases ?? []);
|
|
5411
|
+
return;
|
|
5412
|
+
}
|
|
5413
|
+
printTable(rows, [
|
|
5414
|
+
{ key: "id", header: "ID" },
|
|
5415
|
+
{ key: "name", header: "NAME" },
|
|
5416
|
+
{ key: "category", header: "CATEGORY" },
|
|
5417
|
+
{ key: "template", header: "TEMPLATE" },
|
|
5418
|
+
{ key: "evaluators", header: "EVALUATORS" },
|
|
5419
|
+
{ key: "enabled", header: "ENABLED" },
|
|
5420
|
+
{ key: "updated", header: "UPDATED" }
|
|
5421
|
+
]);
|
|
5422
|
+
}
|
|
5423
|
+
|
|
5424
|
+
// src/commands/eval/test-case/get.ts
|
|
5425
|
+
async function runEvalTestCaseGet(id, opts) {
|
|
5426
|
+
const ctx = await resolveEvalContext(opts);
|
|
5427
|
+
const data = await gqlQuery(ctx.client, EvalTestCaseDoc, { id });
|
|
5428
|
+
if (!data.evalTestCase) {
|
|
5429
|
+
printError(`Test case ${id} not found.`);
|
|
5430
|
+
process.exit(1);
|
|
5431
|
+
}
|
|
5432
|
+
const tc = data.evalTestCase;
|
|
5433
|
+
if (isJsonMode()) {
|
|
5434
|
+
printJson(tc);
|
|
5435
|
+
return;
|
|
5436
|
+
}
|
|
5437
|
+
printKeyValue([
|
|
5438
|
+
["ID", tc.id],
|
|
5439
|
+
["Name", tc.name],
|
|
5440
|
+
["Category", tc.category],
|
|
5441
|
+
["Agent template", tc.agentTemplateName ?? tc.agentTemplateId ?? "\u2014"],
|
|
5442
|
+
["Source", tc.source],
|
|
5443
|
+
["Enabled", tc.enabled ? "yes" : "no"],
|
|
5444
|
+
["Evaluators", (tc.agentcoreEvaluatorIds ?? []).join(", ") || "\u2014"],
|
|
5445
|
+
["Tags", (tc.tags ?? []).join(", ") || "\u2014"],
|
|
5446
|
+
["Created", fmtIso(tc.createdAt)],
|
|
5447
|
+
["Updated", fmtIso(tc.updatedAt)]
|
|
5448
|
+
]);
|
|
5449
|
+
console.log("");
|
|
5450
|
+
console.log(" QUERY");
|
|
5451
|
+
console.log(" \u2500\u2500\u2500\u2500\u2500");
|
|
5452
|
+
console.log(` ${tc.query.split("\n").join("\n ")}`);
|
|
5453
|
+
if (tc.systemPrompt) {
|
|
5454
|
+
console.log("");
|
|
5455
|
+
console.log(" SYSTEM PROMPT");
|
|
5456
|
+
console.log(" \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
|
|
5457
|
+
console.log(` ${tc.systemPrompt.split("\n").join("\n ")}`);
|
|
5458
|
+
}
|
|
5459
|
+
if (tc.assertions) {
|
|
5460
|
+
console.log("");
|
|
5461
|
+
console.log(" ASSERTIONS");
|
|
5462
|
+
console.log(" \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
|
|
5463
|
+
console.log(` ${tc.assertions}`);
|
|
5464
|
+
}
|
|
5465
|
+
}
|
|
5466
|
+
|
|
5467
|
+
// src/commands/eval/test-case/create.ts
|
|
5468
|
+
import { readFileSync as readFileSync6 } from "fs";
|
|
5469
|
+
import { input as input4, select as select9, checkbox as checkbox2 } from "@inquirer/prompts";
|
|
5470
|
+
var DEFAULT_EVALUATORS = [
|
|
5471
|
+
"Builtin.Helpfulness",
|
|
5472
|
+
"Builtin.Correctness",
|
|
5473
|
+
"Builtin.Faithfulness",
|
|
5474
|
+
"Builtin.ToolSelectionAccuracy",
|
|
5475
|
+
"Builtin.ToolParameterAccuracy",
|
|
5476
|
+
"Builtin.GoalSuccessRate"
|
|
5477
|
+
];
|
|
5478
|
+
async function runEvalTestCaseCreate(opts) {
|
|
5479
|
+
const ctx = await resolveEvalContext(opts);
|
|
5480
|
+
const interactive = isInteractive();
|
|
5481
|
+
let name = opts.name;
|
|
5482
|
+
let category = opts.category;
|
|
5483
|
+
let query = opts.query;
|
|
5484
|
+
let evaluators = opts.evaluator;
|
|
5485
|
+
let agentTemplateId = opts.agentTemplate ?? null;
|
|
5486
|
+
if (!name || !category || !query) {
|
|
5487
|
+
if (!interactive) {
|
|
5488
|
+
const missing = [];
|
|
5489
|
+
if (!name) missing.push("--name");
|
|
5490
|
+
if (!category) missing.push("--category");
|
|
5491
|
+
if (!query) missing.push("--query");
|
|
5492
|
+
printError(`Missing required flag(s): ${missing.join(", ")}.`);
|
|
5493
|
+
process.exit(1);
|
|
5494
|
+
}
|
|
5495
|
+
}
|
|
5496
|
+
if (!name) {
|
|
5497
|
+
requireTty("Name");
|
|
5498
|
+
name = await promptOrExit(
|
|
5499
|
+
() => input4({ message: "Test case name?", validate: (v) => v.trim().length > 0 || "Required" })
|
|
5500
|
+
);
|
|
5501
|
+
}
|
|
5502
|
+
if (!category) {
|
|
5503
|
+
category = await promptOrExit(
|
|
5504
|
+
() => input4({ message: "Category (free-form label)?", validate: (v) => v.trim().length > 0 || "Required" })
|
|
5505
|
+
);
|
|
5506
|
+
}
|
|
5507
|
+
if (!query) {
|
|
5508
|
+
query = await promptOrExit(
|
|
5509
|
+
() => input4({ message: "Query the agent under test will receive?", validate: (v) => v.trim().length > 0 || "Required" })
|
|
5510
|
+
);
|
|
5511
|
+
}
|
|
5512
|
+
if (interactive && agentTemplateId === null) {
|
|
5513
|
+
const tpls = await gqlQuery(ctx.client, AgentTemplatesForEvalDoc, { tenantId: ctx.tenantId });
|
|
5514
|
+
const templates = tpls.agentTemplates ?? [];
|
|
5515
|
+
if (templates.length > 0) {
|
|
5516
|
+
const choice = await promptOrExit(
|
|
5517
|
+
() => select9({
|
|
5518
|
+
message: "Pin to an agent template? (Enter for none)",
|
|
5519
|
+
choices: [
|
|
5520
|
+
{ name: "\u2014 none \u2014 (runner picks)", value: "" },
|
|
5521
|
+
...templates.map((t) => ({ name: `${t.name}${t.model ? ` (${t.model})` : ""}`, value: t.id }))
|
|
5522
|
+
],
|
|
5523
|
+
loop: false
|
|
5524
|
+
})
|
|
5525
|
+
);
|
|
5526
|
+
agentTemplateId = choice === "" ? null : choice;
|
|
5527
|
+
}
|
|
5528
|
+
}
|
|
5529
|
+
if (interactive && (!evaluators || evaluators.length === 0)) {
|
|
5530
|
+
const picked = await promptOrExit(
|
|
5531
|
+
() => checkbox2({
|
|
5532
|
+
message: "Evaluators to run for this test case?",
|
|
5533
|
+
choices: DEFAULT_EVALUATORS.map((e) => ({ name: e, value: e, checked: e === "Builtin.Helpfulness" })),
|
|
5534
|
+
loop: false
|
|
5535
|
+
})
|
|
5536
|
+
);
|
|
5537
|
+
evaluators = picked;
|
|
5538
|
+
}
|
|
5539
|
+
let assertions = null;
|
|
5540
|
+
if (opts.assertionsFile) {
|
|
5541
|
+
const parsed = JSON.parse(readFileSync6(opts.assertionsFile, "utf8"));
|
|
5542
|
+
if (!Array.isArray(parsed)) {
|
|
5543
|
+
printError(`--assertions-file must contain a JSON array.`);
|
|
5544
|
+
process.exit(1);
|
|
5545
|
+
}
|
|
5546
|
+
assertions = parsed;
|
|
5547
|
+
}
|
|
5548
|
+
const mutation = await gqlMutate(ctx.client, CreateEvalTestCaseDoc, {
|
|
5549
|
+
tenantId: ctx.tenantId,
|
|
5550
|
+
input: {
|
|
5551
|
+
name,
|
|
5552
|
+
category,
|
|
5553
|
+
query,
|
|
5554
|
+
systemPrompt: opts.systemPrompt ?? null,
|
|
5555
|
+
agentTemplateId,
|
|
5556
|
+
agentcoreEvaluatorIds: evaluators && evaluators.length > 0 ? evaluators : null,
|
|
5557
|
+
tags: opts.tag && opts.tag.length > 0 ? opts.tag : null,
|
|
5558
|
+
enabled: opts.enabled ?? true,
|
|
5559
|
+
assertions
|
|
5560
|
+
}
|
|
5561
|
+
});
|
|
5562
|
+
if (isJsonMode()) {
|
|
5563
|
+
printJson(mutation.createEvalTestCase);
|
|
5564
|
+
return;
|
|
5565
|
+
}
|
|
5566
|
+
printSuccess(
|
|
5567
|
+
`Created test case ${mutation.createEvalTestCase.id} "${mutation.createEvalTestCase.name}" (${mutation.createEvalTestCase.category}).`
|
|
5568
|
+
);
|
|
5569
|
+
}
|
|
5570
|
+
|
|
5571
|
+
// src/commands/eval/test-case/update.ts
|
|
5572
|
+
import { readFileSync as readFileSync7 } from "fs";
|
|
5573
|
+
async function runEvalTestCaseUpdate(id, opts) {
|
|
5574
|
+
const ctx = await resolveEvalContext(opts);
|
|
5575
|
+
const input5 = {};
|
|
5576
|
+
if (opts.name !== void 0) input5.name = opts.name;
|
|
5577
|
+
if (opts.category !== void 0) input5.category = opts.category;
|
|
5578
|
+
if (opts.query !== void 0) input5.query = opts.query;
|
|
5579
|
+
if (opts.systemPrompt !== void 0) input5.systemPrompt = opts.systemPrompt;
|
|
5580
|
+
if (opts.agentTemplate !== void 0) input5.agentTemplateId = opts.agentTemplate;
|
|
5581
|
+
if (opts.evaluator !== void 0) input5.agentcoreEvaluatorIds = opts.evaluator;
|
|
5582
|
+
if (opts.tag !== void 0) input5.tags = opts.tag;
|
|
5583
|
+
if (opts.enabled !== void 0) input5.enabled = opts.enabled;
|
|
5584
|
+
if (opts.assertionsFile) {
|
|
5585
|
+
const parsed = JSON.parse(readFileSync7(opts.assertionsFile, "utf8"));
|
|
5586
|
+
if (!Array.isArray(parsed)) {
|
|
5587
|
+
printError(`--assertions-file must contain a JSON array.`);
|
|
5588
|
+
process.exit(1);
|
|
5589
|
+
}
|
|
5590
|
+
input5.assertions = parsed;
|
|
5591
|
+
}
|
|
5592
|
+
if (Object.keys(input5).length === 0) {
|
|
5593
|
+
printError("No fields to update. Pass at least one --<field>.");
|
|
5594
|
+
process.exit(1);
|
|
5595
|
+
}
|
|
5596
|
+
const res = await gqlMutate(ctx.client, UpdateEvalTestCaseDoc, { id, input: input5 });
|
|
5597
|
+
if (isJsonMode()) {
|
|
5598
|
+
printJson(res.updateEvalTestCase);
|
|
5599
|
+
return;
|
|
5600
|
+
}
|
|
5601
|
+
printSuccess(`Updated test case ${res.updateEvalTestCase.id}.`);
|
|
5602
|
+
}
|
|
5603
|
+
|
|
5604
|
+
// src/commands/eval/test-case/delete.ts
|
|
5605
|
+
import { confirm as confirm4 } from "@inquirer/prompts";
|
|
5606
|
+
async function runEvalTestCaseDelete(id, opts) {
|
|
5607
|
+
const ctx = await resolveEvalContext(opts);
|
|
5608
|
+
if (!opts.yes) {
|
|
5609
|
+
if (!isInteractive()) {
|
|
5610
|
+
printError("Refusing to delete without --yes in a non-interactive session.");
|
|
5611
|
+
process.exit(1);
|
|
5612
|
+
}
|
|
5613
|
+
requireTty("Confirmation");
|
|
5614
|
+
const go = await promptOrExit(
|
|
5615
|
+
() => confirm4({ message: `Permanently delete test case ${id}?`, default: false })
|
|
5616
|
+
);
|
|
5617
|
+
if (!go) {
|
|
5618
|
+
logStderr("Cancelled.");
|
|
5619
|
+
process.exit(0);
|
|
5620
|
+
}
|
|
5621
|
+
}
|
|
5622
|
+
const res = await gqlMutate(ctx.client, DeleteEvalTestCaseDoc, { id });
|
|
5623
|
+
if (isJsonMode()) {
|
|
5624
|
+
printJson({ id, deleted: res.deleteEvalTestCase });
|
|
5625
|
+
return;
|
|
5626
|
+
}
|
|
5627
|
+
if (res.deleteEvalTestCase) printSuccess(`Deleted test case ${id}.`);
|
|
5628
|
+
else printError(`Server reported not-deleted for ${id}.`);
|
|
5629
|
+
}
|
|
5630
|
+
|
|
5631
|
+
// src/commands/eval.ts
|
|
5632
|
+
function registerEvalCommand(program2) {
|
|
5633
|
+
const evals = program2.command("eval").alias("evals").description(
|
|
5634
|
+
"Run evaluations against your agents and manage eval test cases. Integrates with the Evaluations Studio in the admin UI."
|
|
5635
|
+
);
|
|
5636
|
+
evals.command("run").description(
|
|
5637
|
+
"Start an evaluation run. Prompts for missing values in a TTY; fails fast in non-interactive sessions."
|
|
5638
|
+
).option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").option("--agent-template <id>", "Run-level agent template ID").option("--agent <id>", "Optional agent under test").option("--model <id>", "Optional model override").option("--category <name...>", "Only run these categories (repeatable)").option("--test-case <id...>", "Only run these specific test case IDs (repeatable)").option("--all", "Run all enabled test cases for the tenant").option("--watch", "Block and poll until the run reaches a terminal status").option("--timeout <seconds>", "Max wait seconds for --watch (default 900)", "900").addHelpText(
|
|
5639
|
+
"after",
|
|
5640
|
+
`
|
|
5641
|
+
Examples:
|
|
5642
|
+
# Fire and return \u2014 prints the runId; view results in the admin UI
|
|
5643
|
+
$ thinkwork eval run --agent-template tpl-abc --category tool-safety
|
|
5644
|
+
|
|
5645
|
+
# Pick categories + test cases interactively
|
|
5646
|
+
$ thinkwork eval run
|
|
5647
|
+
|
|
5648
|
+
# Block until done
|
|
5649
|
+
$ thinkwork eval run --agent-template tpl-abc --all --watch --timeout 1800
|
|
5650
|
+
`
|
|
5651
|
+
).action(runEvalRun);
|
|
5652
|
+
evals.command("list").alias("ls").description("List recent eval runs for the tenant.").option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").option("--agent <id>", "Filter by agent under test").option("--limit <n>", "Max rows (default 25)", "25").option("--offset <n>", "Skip N rows", "0").action(runEvalList);
|
|
5653
|
+
evals.command("get <runId>").description("Show one eval run with its per-test-case results.").option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").option("--results", "Also fetch per-test-case results (default: true)", true).option("--no-results", "Skip fetching per-test-case results").action(runEvalGet);
|
|
5654
|
+
evals.command("watch <runId>").description("Poll an eval run until it reaches a terminal status.").option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").option("--interval <seconds>", "Poll interval (default 3)", "3").option("--timeout <seconds>", "Max wait seconds (default 900)", "900").action(runEvalWatch);
|
|
5655
|
+
evals.command("cancel <runId>").description("Cancel a running or pending eval run.").option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").action(runEvalCancel);
|
|
5656
|
+
evals.command("delete <runId>").description("Delete an eval run and its results. Requires confirmation unless --yes.").option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").option("-y, --yes", "Skip the confirmation prompt").action(runEvalDelete);
|
|
5657
|
+
evals.command("categories").description("List distinct categories present across the tenant's test cases.").option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").action(runEvalCategories);
|
|
5658
|
+
evals.command("seed").description(
|
|
5659
|
+
"Idempotently seed the maniflow starter pack (96 test cases across 9 categories). Safe to re-run."
|
|
5660
|
+
).option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").option("--category <name...>", "Only seed these categories (repeatable)").action(runEvalSeed);
|
|
5661
|
+
const tc = evals.command("test-case").alias("test-cases").description("Manage individual eval test cases (CRUD).");
|
|
5662
|
+
tc.command("list").alias("ls").description("List test cases, optionally filtered by category or search.").option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").option("--category <name>", "Filter by a single category").option("--search <q>", "Substring match on test case name").action(runEvalTestCaseList);
|
|
5663
|
+
tc.command("get <id>").description("Show a single test case.").option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").action(runEvalTestCaseGet);
|
|
5664
|
+
tc.command("create").description("Create a new test case. Prompts for missing values in a TTY.").option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").option("--name <text>", "Human-readable name").option("--category <name>", "Category label (e.g. tool-safety, red-team)").option("--query <text>", "The user-facing query this agent will receive").option("--system-prompt <text>", "Optional system-prompt override").option("--agent-template <id>", "Pin to a specific agent template").option("--evaluator <id...>", "AgentCore evaluator IDs (repeatable)").option("--tag <name...>", "Tags (repeatable)").option("--enabled", "Mark enabled (default)", true).option("--no-enabled", "Mark disabled").option("--assertions-file <path>", "JSON file containing an array of assertions").action(runEvalTestCaseCreate);
|
|
5665
|
+
tc.command("update <id>").description("Update a test case. Only supplied fields are changed.").option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").option("--name <text>").option("--category <name>").option("--query <text>").option("--system-prompt <text>").option("--agent-template <id>").option("--evaluator <id...>", "Replace AgentCore evaluator IDs (repeatable)").option("--tag <name...>", "Replace tags (repeatable)").option("--enabled", "Mark enabled").option("--no-enabled", "Mark disabled").option("--assertions-file <path>", "JSON file containing an array of assertions (replaces all)").action(runEvalTestCaseUpdate);
|
|
5666
|
+
tc.command("delete <id>").description("Delete a test case. Requires confirmation unless --yes.").option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("-r, --region <region>", "AWS region", "us-east-1").option("-y, --yes", "Skip the confirmation prompt").action(runEvalTestCaseDelete);
|
|
5667
|
+
}
|
|
5668
|
+
|
|
5669
|
+
// src/commands/wiki/compile.ts
|
|
5670
|
+
import ora4 from "ora";
|
|
5671
|
+
|
|
5672
|
+
// src/commands/wiki/gql.ts
|
|
5673
|
+
var TenantBySlugDoc2 = graphql(`
|
|
5674
|
+
query CliWikiTenantBySlug($slug: String!) {
|
|
5675
|
+
tenantBySlug(slug: $slug) {
|
|
5676
|
+
id
|
|
5677
|
+
slug
|
|
5678
|
+
name
|
|
5679
|
+
}
|
|
5680
|
+
}
|
|
5681
|
+
`);
|
|
5682
|
+
var AllTenantAgentsForWikiDoc = graphql(`
|
|
5683
|
+
query CliAllTenantAgentsForWiki($tenantId: ID!) {
|
|
5684
|
+
allTenantAgents(tenantId: $tenantId, includeSystem: false, includeSubAgents: false) {
|
|
5685
|
+
id
|
|
5686
|
+
name
|
|
5687
|
+
slug
|
|
5688
|
+
type
|
|
5689
|
+
status
|
|
5690
|
+
}
|
|
5691
|
+
}
|
|
5692
|
+
`);
|
|
5693
|
+
var CompileWikiNowDoc = graphql(`
|
|
5694
|
+
mutation CliCompileWikiNow($tenantId: ID!, $ownerId: ID!, $modelId: String) {
|
|
5695
|
+
compileWikiNow(tenantId: $tenantId, ownerId: $ownerId, modelId: $modelId) {
|
|
5696
|
+
id
|
|
5697
|
+
tenantId
|
|
5698
|
+
ownerId
|
|
5699
|
+
status
|
|
5700
|
+
trigger
|
|
5701
|
+
dedupeKey
|
|
5702
|
+
attempt
|
|
5703
|
+
createdAt
|
|
5704
|
+
}
|
|
5705
|
+
}
|
|
5706
|
+
`);
|
|
5707
|
+
var ResetWikiCursorDoc = graphql(`
|
|
5708
|
+
mutation CliResetWikiCursor($tenantId: ID!, $ownerId: ID!, $force: Boolean) {
|
|
5709
|
+
resetWikiCursor(tenantId: $tenantId, ownerId: $ownerId, force: $force) {
|
|
5710
|
+
tenantId
|
|
5711
|
+
ownerId
|
|
5712
|
+
cursorCleared
|
|
5713
|
+
pagesArchived
|
|
5714
|
+
}
|
|
5715
|
+
}
|
|
5716
|
+
`);
|
|
5717
|
+
var WikiCompileJobsDoc = graphql(`
|
|
5718
|
+
query CliWikiCompileJobs($tenantId: ID!, $ownerId: ID, $limit: Int) {
|
|
5719
|
+
wikiCompileJobs(tenantId: $tenantId, ownerId: $ownerId, limit: $limit) {
|
|
5720
|
+
id
|
|
5721
|
+
tenantId
|
|
5722
|
+
ownerId
|
|
5723
|
+
status
|
|
5724
|
+
trigger
|
|
5725
|
+
dedupeKey
|
|
5726
|
+
attempt
|
|
5727
|
+
claimedAt
|
|
5728
|
+
startedAt
|
|
5729
|
+
finishedAt
|
|
5730
|
+
error
|
|
5731
|
+
metrics
|
|
5732
|
+
createdAt
|
|
5733
|
+
}
|
|
5734
|
+
}
|
|
5735
|
+
`);
|
|
5736
|
+
|
|
5737
|
+
// src/commands/wiki/helpers.ts
|
|
5738
|
+
import { select as select10 } from "@inquirer/prompts";
|
|
5739
|
+
async function resolveWikiContext(opts) {
|
|
5740
|
+
const region = opts.region ?? "us-east-1";
|
|
5741
|
+
const stage = await resolveStage({ flag: opts.stage, region });
|
|
5742
|
+
const session = loadStageSession(stage);
|
|
5743
|
+
const { client, tenantSlug: ctxTenantSlug } = await getGqlClient({
|
|
5744
|
+
stage,
|
|
5745
|
+
region
|
|
5746
|
+
});
|
|
5747
|
+
const flagOrEnv = opts.tenant ?? process.env.THINKWORK_TENANT;
|
|
5748
|
+
if (flagOrEnv) {
|
|
5749
|
+
if (session?.tenantSlug === flagOrEnv && session.tenantId) {
|
|
5750
|
+
return {
|
|
5751
|
+
stage,
|
|
5752
|
+
region,
|
|
5753
|
+
client,
|
|
5754
|
+
tenantId: session.tenantId,
|
|
5755
|
+
tenantSlug: flagOrEnv
|
|
5756
|
+
};
|
|
5757
|
+
}
|
|
5758
|
+
const data = await gqlQuery(client, TenantBySlugDoc2, { slug: flagOrEnv });
|
|
5759
|
+
if (!data.tenantBySlug) {
|
|
5760
|
+
printError(`Tenant "${flagOrEnv}" not found.`);
|
|
5761
|
+
process.exit(1);
|
|
5762
|
+
}
|
|
5763
|
+
return {
|
|
5764
|
+
stage,
|
|
5765
|
+
region,
|
|
5766
|
+
client,
|
|
5767
|
+
tenantId: data.tenantBySlug.id,
|
|
5768
|
+
tenantSlug: data.tenantBySlug.slug
|
|
5769
|
+
};
|
|
5770
|
+
}
|
|
5771
|
+
if (session?.tenantId && session.tenantSlug) {
|
|
5772
|
+
return {
|
|
5773
|
+
stage,
|
|
5774
|
+
region,
|
|
5775
|
+
client,
|
|
5776
|
+
tenantId: session.tenantId,
|
|
5777
|
+
tenantSlug: session.tenantSlug
|
|
5778
|
+
};
|
|
5779
|
+
}
|
|
5780
|
+
if (ctxTenantSlug) {
|
|
5781
|
+
const data = await gqlQuery(client, TenantBySlugDoc2, {
|
|
5782
|
+
slug: ctxTenantSlug
|
|
5783
|
+
});
|
|
5784
|
+
if (data.tenantBySlug) {
|
|
5785
|
+
return {
|
|
5786
|
+
stage,
|
|
5787
|
+
region,
|
|
5788
|
+
client,
|
|
5789
|
+
tenantId: data.tenantBySlug.id,
|
|
5790
|
+
tenantSlug: data.tenantBySlug.slug
|
|
5791
|
+
};
|
|
5792
|
+
}
|
|
5793
|
+
}
|
|
5794
|
+
printError(
|
|
5795
|
+
`No tenant resolved for stage "${stage}". Pass --tenant <slug>, set THINKWORK_TENANT, or run \`thinkwork login --stage ${stage}\`.`
|
|
5796
|
+
);
|
|
5797
|
+
process.exit(1);
|
|
5798
|
+
}
|
|
5799
|
+
async function resolveAgentScope(ctx, opts, config = {}) {
|
|
5800
|
+
const allowAll = config.allowAll ?? true;
|
|
5801
|
+
const needList = opts.agent != null && !isUuid(opts.agent) || opts.all === true || opts.agent == null && !opts.all;
|
|
5802
|
+
let agents = [];
|
|
5803
|
+
const loadAgents = async () => {
|
|
5804
|
+
const data = await gqlQuery(ctx.client, AllTenantAgentsForWikiDoc, {
|
|
5805
|
+
tenantId: ctx.tenantId
|
|
5806
|
+
});
|
|
5807
|
+
return data.allTenantAgents ?? [];
|
|
5808
|
+
};
|
|
5809
|
+
if (opts.agent) {
|
|
5810
|
+
if (isUuid(opts.agent)) {
|
|
5811
|
+
return {
|
|
5812
|
+
mode: "single",
|
|
5813
|
+
agentId: opts.agent,
|
|
5814
|
+
agentLabel: opts.agent
|
|
5815
|
+
};
|
|
5816
|
+
}
|
|
5817
|
+
agents = await loadAgents();
|
|
5818
|
+
const needle = opts.agent.toLowerCase();
|
|
5819
|
+
const matches = agents.filter(
|
|
5820
|
+
(a) => [a.name, a.slug].some(
|
|
5821
|
+
(v) => v != null && String(v).toLowerCase() === needle
|
|
5822
|
+
)
|
|
5823
|
+
);
|
|
5824
|
+
if (matches.length === 0) {
|
|
5825
|
+
printError(
|
|
5826
|
+
`Agent "${opts.agent}" not found. Pass the UUID or a matching name/slug.`
|
|
5827
|
+
);
|
|
5828
|
+
process.exit(1);
|
|
5829
|
+
}
|
|
5830
|
+
if (matches.length > 1) {
|
|
5831
|
+
printError(
|
|
5832
|
+
`"${opts.agent}" matches ${matches.length} agents. Pass the UUID instead \u2014 candidates: ${matches.map((a) => a.id).join(", ")}`
|
|
5833
|
+
);
|
|
5834
|
+
process.exit(1);
|
|
5835
|
+
}
|
|
5836
|
+
return {
|
|
5837
|
+
mode: "single",
|
|
5838
|
+
agentId: matches[0].id,
|
|
5839
|
+
agentLabel: matches[0].name ?? matches[0].id
|
|
5840
|
+
};
|
|
5841
|
+
}
|
|
5842
|
+
if (opts.all) {
|
|
5843
|
+
if (!allowAll) {
|
|
5844
|
+
printError(
|
|
5845
|
+
"--all is not supported for this command. Rebuild one agent at a time."
|
|
5846
|
+
);
|
|
5847
|
+
process.exit(1);
|
|
5848
|
+
}
|
|
5849
|
+
if (!needList) agents = await loadAgents();
|
|
5850
|
+
else if (agents.length === 0) agents = await loadAgents();
|
|
5851
|
+
return {
|
|
5852
|
+
mode: "all",
|
|
5853
|
+
agentIds: agents.map((a) => a.id),
|
|
5854
|
+
agentLabels: Object.fromEntries(
|
|
5855
|
+
agents.map((a) => [a.id, a.name ?? a.id])
|
|
5856
|
+
)
|
|
5857
|
+
};
|
|
5858
|
+
}
|
|
5859
|
+
if (!isInteractive()) {
|
|
5860
|
+
requireTty(allowAll ? "Agent (or --all)" : "Agent");
|
|
5861
|
+
throw new Error("unreachable");
|
|
5862
|
+
}
|
|
5863
|
+
agents = await loadAgents();
|
|
5864
|
+
if (agents.length === 0) {
|
|
5865
|
+
printError("No agents found for this tenant.");
|
|
5866
|
+
process.exit(1);
|
|
5867
|
+
}
|
|
5868
|
+
const choices = [];
|
|
5869
|
+
if (allowAll) {
|
|
5870
|
+
choices.push({ name: "All agents (fan out)", value: "__all__" });
|
|
5871
|
+
}
|
|
5872
|
+
for (const a of agents) {
|
|
5873
|
+
const label = a.name ?? a.id;
|
|
5874
|
+
const slugPart = a.slug ? ` (${a.slug})` : "";
|
|
5875
|
+
choices.push({ name: `${label}${slugPart} [${a.id}]`, value: a.id });
|
|
5876
|
+
}
|
|
5877
|
+
const pick = await promptOrExit(
|
|
5878
|
+
() => select10({
|
|
5879
|
+
message: "Which agent?",
|
|
5880
|
+
choices,
|
|
5881
|
+
loop: false
|
|
5882
|
+
})
|
|
5883
|
+
);
|
|
5884
|
+
if (pick === "__all__") {
|
|
5885
|
+
return {
|
|
5886
|
+
mode: "all",
|
|
5887
|
+
agentIds: agents.map((a) => a.id),
|
|
5888
|
+
agentLabels: Object.fromEntries(
|
|
5889
|
+
agents.map((a) => [a.id, a.name ?? a.id])
|
|
5890
|
+
)
|
|
5891
|
+
};
|
|
5892
|
+
}
|
|
5893
|
+
const picked = agents.find((a) => a.id === pick);
|
|
5894
|
+
return {
|
|
5895
|
+
mode: "single",
|
|
5896
|
+
agentId: picked.id,
|
|
5897
|
+
agentLabel: picked.name ?? picked.id
|
|
5898
|
+
};
|
|
5899
|
+
}
|
|
5900
|
+
function classifyMutationError(err) {
|
|
5901
|
+
const message = err?.message ?? String(err);
|
|
5902
|
+
const forbidden = /Admin-only|Access denied|tenant mismatch|outside tenant/i.test(message);
|
|
5903
|
+
return { forbidden, message };
|
|
5904
|
+
}
|
|
5905
|
+
function printForbiddenHint(tenantSlug) {
|
|
5906
|
+
printError(
|
|
5907
|
+
`Admin access to tenant "${tenantSlug}" is required for wiki operations. Ask your tenant owner to promote your membership or use an admin API key.`
|
|
5908
|
+
);
|
|
5909
|
+
}
|
|
5910
|
+
function isTerminalCompileStatus(status) {
|
|
5911
|
+
return status === "succeeded" || status === "failed" || status === "cancelled" || status === "skipped";
|
|
5912
|
+
}
|
|
5913
|
+
function shortJobId(id) {
|
|
5914
|
+
return id.slice(0, 8);
|
|
5915
|
+
}
|
|
5916
|
+
|
|
5917
|
+
// src/commands/wiki/compile.ts
|
|
5918
|
+
async function runWikiCompile(opts) {
|
|
5919
|
+
const ctx = await resolveWikiContext(opts);
|
|
5920
|
+
const scope = await resolveAgentScope(ctx, opts, { allowAll: true });
|
|
5921
|
+
const targets = scope.mode === "single" ? [{ id: scope.agentId, label: scope.agentLabel }] : scope.agentIds.map((id) => ({
|
|
5922
|
+
id,
|
|
5923
|
+
label: scope.agentLabels[id] ?? id
|
|
5924
|
+
}));
|
|
5925
|
+
if (targets.length === 0) {
|
|
5926
|
+
if (isJsonMode()) {
|
|
5927
|
+
printJson({
|
|
5928
|
+
ok: true,
|
|
5929
|
+
scope: { tenantId: ctx.tenantId, tenantSlug: ctx.tenantSlug, agentIds: [] },
|
|
5930
|
+
jobs: [],
|
|
5931
|
+
errors: []
|
|
5932
|
+
});
|
|
5933
|
+
} else {
|
|
5934
|
+
printWarning("No agents found for this tenant \u2014 nothing to compile.");
|
|
5935
|
+
}
|
|
5936
|
+
return;
|
|
5937
|
+
}
|
|
5938
|
+
const jobs = [];
|
|
5939
|
+
const errors = [];
|
|
5940
|
+
let forbiddenHit = false;
|
|
5941
|
+
for (const target of targets) {
|
|
5942
|
+
const spinner = isJsonMode() || scope.mode === "all" ? null : ora4({
|
|
5943
|
+
text: `Enqueuing compile for ${target.label}\u2026`,
|
|
5944
|
+
prefixText: " "
|
|
5945
|
+
}).start();
|
|
5946
|
+
try {
|
|
5947
|
+
const data = await gqlMutate(ctx.client, CompileWikiNowDoc, {
|
|
5948
|
+
tenantId: ctx.tenantId,
|
|
5949
|
+
ownerId: target.id,
|
|
5950
|
+
modelId: opts.model ?? null
|
|
5951
|
+
});
|
|
5952
|
+
const job = data.compileWikiNow;
|
|
5953
|
+
jobs.push({
|
|
5954
|
+
agentId: target.id,
|
|
5955
|
+
agentLabel: target.label,
|
|
5956
|
+
jobId: job.id,
|
|
5957
|
+
status: job.status,
|
|
5958
|
+
error: null
|
|
5959
|
+
});
|
|
5960
|
+
if (spinner) {
|
|
5961
|
+
spinner.succeed(
|
|
5962
|
+
`${target.label} \u2192 job=${shortJobId(job.id)} (${job.status})`
|
|
5963
|
+
);
|
|
5964
|
+
} else if (!isJsonMode()) {
|
|
5965
|
+
console.log(
|
|
5966
|
+
` \u2713 ${target.label} \u2192 job=${shortJobId(job.id)} (${job.status})`
|
|
5967
|
+
);
|
|
5968
|
+
}
|
|
5969
|
+
} catch (err) {
|
|
5970
|
+
const classified = classifyMutationError(err);
|
|
5971
|
+
errors.push({ agentId: target.id, message: classified.message });
|
|
5972
|
+
jobs.push({
|
|
5973
|
+
agentId: target.id,
|
|
5974
|
+
agentLabel: target.label,
|
|
5975
|
+
jobId: null,
|
|
5976
|
+
status: null,
|
|
5977
|
+
error: classified.message
|
|
5978
|
+
});
|
|
5979
|
+
if (spinner) spinner.fail(`${target.label} \u2192 ${classified.message}`);
|
|
5980
|
+
else if (!isJsonMode())
|
|
5981
|
+
console.log(` \u2717 ${target.label} \u2192 ${classified.message}`);
|
|
5982
|
+
if (classified.forbidden) {
|
|
5983
|
+
forbiddenHit = true;
|
|
5984
|
+
if (scope.mode === "all") {
|
|
5985
|
+
break;
|
|
5986
|
+
}
|
|
5987
|
+
}
|
|
5988
|
+
}
|
|
5989
|
+
}
|
|
5990
|
+
const anyFailed = errors.length > 0;
|
|
5991
|
+
const ok = !anyFailed;
|
|
5992
|
+
if (isJsonMode()) {
|
|
5993
|
+
printJson({
|
|
5994
|
+
ok,
|
|
5995
|
+
scope: {
|
|
5996
|
+
tenantId: ctx.tenantId,
|
|
5997
|
+
tenantSlug: ctx.tenantSlug,
|
|
5998
|
+
mode: scope.mode,
|
|
5999
|
+
agentIds: targets.map((t) => t.id)
|
|
6000
|
+
},
|
|
6001
|
+
model: opts.model ?? null,
|
|
6002
|
+
jobs,
|
|
6003
|
+
errors
|
|
6004
|
+
});
|
|
6005
|
+
} else if (scope.mode === "all") {
|
|
6006
|
+
console.log("");
|
|
6007
|
+
printKeyValue([
|
|
6008
|
+
["Tenant", ctx.tenantSlug],
|
|
6009
|
+
["Agents queued", `${jobs.filter((j) => j.jobId).length} / ${targets.length}`],
|
|
6010
|
+
["Failures", String(errors.length)],
|
|
6011
|
+
["Model override", opts.model ?? "(default)"]
|
|
6012
|
+
]);
|
|
6013
|
+
}
|
|
6014
|
+
if (forbiddenHit) {
|
|
6015
|
+
printForbiddenHint(ctx.tenantSlug);
|
|
6016
|
+
process.exit(2);
|
|
6017
|
+
}
|
|
6018
|
+
if (anyFailed) {
|
|
6019
|
+
process.exit(1);
|
|
6020
|
+
}
|
|
6021
|
+
if (!isJsonMode() && jobs.length === 1) {
|
|
6022
|
+
printSuccess(`Compile enqueued for ${jobs[0].agentLabel}.`);
|
|
6023
|
+
console.log(
|
|
6024
|
+
` Use \`thinkwork wiki status --tenant ${ctx.tenantSlug} --agent ${jobs[0].agentId} --watch\` to follow the job.`
|
|
6025
|
+
);
|
|
6026
|
+
}
|
|
6027
|
+
if (opts.watch && scope.mode === "single" && jobs.length === 1 && jobs[0].jobId) {
|
|
6028
|
+
await watchSingleJob(ctx, {
|
|
6029
|
+
agentId: jobs[0].agentId,
|
|
6030
|
+
jobId: jobs[0].jobId,
|
|
6031
|
+
agentLabel: jobs[0].agentLabel
|
|
6032
|
+
});
|
|
6033
|
+
} else if (opts.watch && scope.mode === "all") {
|
|
6034
|
+
printWarning(
|
|
6035
|
+
"--watch is ignored for --all. Use `thinkwork wiki status --tenant " + ctx.tenantSlug + " --watch` instead."
|
|
6036
|
+
);
|
|
6037
|
+
}
|
|
6038
|
+
}
|
|
6039
|
+
async function watchSingleJob(ctx, target) {
|
|
6040
|
+
const spinner = isJsonMode() ? null : ora4({
|
|
6041
|
+
text: `Watching job ${shortJobId(target.jobId)} for ${target.agentLabel}\u2026`,
|
|
6042
|
+
prefixText: " "
|
|
6043
|
+
}).start();
|
|
6044
|
+
const intervalMs = 3e3;
|
|
6045
|
+
const deadline = Date.now() + 15 * 60 * 1e3;
|
|
6046
|
+
try {
|
|
6047
|
+
while (Date.now() < deadline) {
|
|
6048
|
+
const data = await gqlQuery(ctx.client, WikiCompileJobsDoc, {
|
|
6049
|
+
tenantId: ctx.tenantId,
|
|
6050
|
+
ownerId: target.agentId,
|
|
6051
|
+
limit: 5
|
|
6052
|
+
});
|
|
6053
|
+
const job = data.wikiCompileJobs.find((j) => j.id === target.jobId);
|
|
6054
|
+
if (!job) {
|
|
6055
|
+
if (spinner) spinner.warn("job not visible yet \u2014 polling\u2026");
|
|
6056
|
+
} else {
|
|
6057
|
+
if (spinner) spinner.text = `status=${job.status} attempt=${job.attempt}`;
|
|
6058
|
+
if (isTerminalCompileStatus(job.status)) {
|
|
6059
|
+
if (spinner) {
|
|
6060
|
+
if (job.status === "succeeded") spinner.succeed(`succeeded`);
|
|
6061
|
+
else if (job.status === "skipped") spinner.info("skipped");
|
|
6062
|
+
else spinner.fail(`${job.status}${job.error ? ` \u2014 ${job.error}` : ""}`);
|
|
6063
|
+
}
|
|
6064
|
+
if (isJsonMode()) {
|
|
6065
|
+
printJson({
|
|
6066
|
+
ok: job.status === "succeeded",
|
|
6067
|
+
jobId: job.id,
|
|
6068
|
+
status: job.status,
|
|
6069
|
+
error: job.error,
|
|
6070
|
+
metrics: job.metrics
|
|
6071
|
+
});
|
|
6072
|
+
}
|
|
6073
|
+
process.exit(job.status === "succeeded" ? 0 : 1);
|
|
6074
|
+
}
|
|
6075
|
+
}
|
|
6076
|
+
await new Promise((r) => setTimeout(r, intervalMs));
|
|
6077
|
+
}
|
|
6078
|
+
if (spinner) spinner.warn("watch timeout \u2014 job still in progress.");
|
|
6079
|
+
process.exit(2);
|
|
6080
|
+
} catch (err) {
|
|
6081
|
+
if (spinner) spinner.fail(err instanceof Error ? err.message : String(err));
|
|
6082
|
+
printError("Watch failed. The compile job itself may still complete.");
|
|
6083
|
+
process.exit(1);
|
|
6084
|
+
}
|
|
6085
|
+
}
|
|
6086
|
+
|
|
6087
|
+
// src/commands/wiki/rebuild.ts
|
|
6088
|
+
import { confirm as confirm5 } from "@inquirer/prompts";
|
|
6089
|
+
import ora5 from "ora";
|
|
6090
|
+
async function runWikiRebuild(opts) {
|
|
6091
|
+
if (opts.all) {
|
|
6092
|
+
printError(
|
|
6093
|
+
"--all is not supported for rebuild. Rebuild one agent at a time to avoid mass-archiving pages across the tenant."
|
|
6094
|
+
);
|
|
6095
|
+
process.exit(1);
|
|
6096
|
+
}
|
|
6097
|
+
const ctx = await resolveWikiContext(opts);
|
|
6098
|
+
const scope = await resolveAgentScope(ctx, opts, { allowAll: false });
|
|
6099
|
+
if (scope.mode !== "single") {
|
|
6100
|
+
printError("rebuild requires a single agent.");
|
|
6101
|
+
process.exit(1);
|
|
6102
|
+
}
|
|
6103
|
+
const { agentId, agentLabel } = scope;
|
|
6104
|
+
const skipConfirm = opts.yes === true || isJsonMode();
|
|
6105
|
+
if (!skipConfirm) {
|
|
6106
|
+
if (!isInteractive()) {
|
|
6107
|
+
requireTty("Rebuild confirmation (--yes)");
|
|
6108
|
+
}
|
|
6109
|
+
const ok = await promptOrExit(
|
|
6110
|
+
() => confirm5({
|
|
6111
|
+
message: `Rebuild wiki for ${agentLabel}? This archives every active page in the scope and recompiles from scratch.`,
|
|
6112
|
+
default: false
|
|
6113
|
+
})
|
|
6114
|
+
);
|
|
6115
|
+
if (!ok) {
|
|
6116
|
+
if (!isJsonMode()) console.log(" Cancelled.");
|
|
6117
|
+
process.exit(0);
|
|
6118
|
+
}
|
|
6119
|
+
}
|
|
6120
|
+
const resetSpinner = isJsonMode() ? null : ora5({
|
|
6121
|
+
text: `Archiving active pages for ${agentLabel}\u2026`,
|
|
6122
|
+
prefixText: " "
|
|
6123
|
+
}).start();
|
|
6124
|
+
let pagesArchived = 0;
|
|
6125
|
+
try {
|
|
6126
|
+
const data = await gqlMutate(ctx.client, ResetWikiCursorDoc, {
|
|
6127
|
+
tenantId: ctx.tenantId,
|
|
6128
|
+
ownerId: agentId,
|
|
6129
|
+
force: true
|
|
6130
|
+
});
|
|
6131
|
+
pagesArchived = data.resetWikiCursor.pagesArchived;
|
|
6132
|
+
if (resetSpinner)
|
|
6133
|
+
resetSpinner.succeed(
|
|
6134
|
+
`${pagesArchived} page${pagesArchived === 1 ? "" : "s"} archived, cursor cleared.`
|
|
6135
|
+
);
|
|
6136
|
+
} catch (err) {
|
|
6137
|
+
const classified = classifyMutationError(err);
|
|
6138
|
+
if (resetSpinner) resetSpinner.fail(`Reset failed: ${classified.message}`);
|
|
6139
|
+
const result2 = {
|
|
6140
|
+
ok: false,
|
|
6141
|
+
scope: {
|
|
6142
|
+
tenantId: ctx.tenantId,
|
|
6143
|
+
tenantSlug: ctx.tenantSlug,
|
|
6144
|
+
agentId
|
|
6145
|
+
},
|
|
6146
|
+
pagesArchived: null,
|
|
6147
|
+
jobId: null,
|
|
6148
|
+
error: classified.message
|
|
6149
|
+
};
|
|
6150
|
+
if (isJsonMode()) printJson(result2);
|
|
6151
|
+
if (classified.forbidden) {
|
|
6152
|
+
printForbiddenHint(ctx.tenantSlug);
|
|
6153
|
+
process.exit(2);
|
|
6154
|
+
}
|
|
6155
|
+
process.exit(1);
|
|
6156
|
+
}
|
|
6157
|
+
const compileSpinner = isJsonMode() ? null : ora5({
|
|
6158
|
+
text: `Enqueuing fresh compile for ${agentLabel}\u2026`,
|
|
6159
|
+
prefixText: " "
|
|
6160
|
+
}).start();
|
|
6161
|
+
let jobId = null;
|
|
6162
|
+
try {
|
|
6163
|
+
const data = await gqlMutate(ctx.client, CompileWikiNowDoc, {
|
|
6164
|
+
tenantId: ctx.tenantId,
|
|
6165
|
+
ownerId: agentId,
|
|
6166
|
+
modelId: opts.model ?? null
|
|
6167
|
+
});
|
|
6168
|
+
jobId = data.compileWikiNow.id;
|
|
6169
|
+
if (compileSpinner)
|
|
6170
|
+
compileSpinner.succeed(
|
|
6171
|
+
`Compile enqueued \u2014 job=${shortJobId(jobId)} status=${data.compileWikiNow.status}`
|
|
6172
|
+
);
|
|
6173
|
+
} catch (err) {
|
|
6174
|
+
const classified = classifyMutationError(err);
|
|
6175
|
+
if (compileSpinner)
|
|
6176
|
+
compileSpinner.fail(`Compile enqueue failed: ${classified.message}`);
|
|
6177
|
+
const result2 = {
|
|
6178
|
+
ok: false,
|
|
6179
|
+
scope: {
|
|
6180
|
+
tenantId: ctx.tenantId,
|
|
6181
|
+
tenantSlug: ctx.tenantSlug,
|
|
6182
|
+
agentId
|
|
6183
|
+
},
|
|
6184
|
+
pagesArchived,
|
|
6185
|
+
jobId: null,
|
|
6186
|
+
error: classified.message
|
|
6187
|
+
};
|
|
6188
|
+
if (isJsonMode()) printJson(result2);
|
|
6189
|
+
else {
|
|
6190
|
+
printWarning(
|
|
6191
|
+
`Reset succeeded (${pagesArchived} page${pagesArchived === 1 ? "" : "s"} archived) but compile enqueue failed. Retry with:
|
|
6192
|
+
thinkwork wiki compile --tenant ${ctx.tenantSlug} --agent ${agentId}`
|
|
6193
|
+
);
|
|
6194
|
+
}
|
|
6195
|
+
if (classified.forbidden) {
|
|
6196
|
+
printForbiddenHint(ctx.tenantSlug);
|
|
6197
|
+
process.exit(2);
|
|
6198
|
+
}
|
|
6199
|
+
process.exit(1);
|
|
6200
|
+
}
|
|
6201
|
+
const result = {
|
|
6202
|
+
ok: true,
|
|
6203
|
+
scope: {
|
|
6204
|
+
tenantId: ctx.tenantId,
|
|
6205
|
+
tenantSlug: ctx.tenantSlug,
|
|
6206
|
+
agentId
|
|
6207
|
+
},
|
|
6208
|
+
pagesArchived,
|
|
6209
|
+
jobId,
|
|
6210
|
+
error: null
|
|
6211
|
+
};
|
|
6212
|
+
if (isJsonMode()) {
|
|
6213
|
+
printJson(result);
|
|
6214
|
+
} else {
|
|
6215
|
+
console.log("");
|
|
6216
|
+
printKeyValue([
|
|
6217
|
+
["Tenant", ctx.tenantSlug],
|
|
6218
|
+
["Agent", agentLabel],
|
|
6219
|
+
["Pages archived", String(pagesArchived)],
|
|
6220
|
+
["Compile job", jobId ?? "\u2014"],
|
|
6221
|
+
["Model override", opts.model ?? "(default)"]
|
|
6222
|
+
]);
|
|
6223
|
+
printSuccess(`Rebuild enqueued for ${agentLabel}.`);
|
|
6224
|
+
}
|
|
6225
|
+
if (opts.watch && jobId) {
|
|
6226
|
+
await watchRebuildJob(ctx, { agentId, jobId, agentLabel });
|
|
6227
|
+
}
|
|
6228
|
+
}
|
|
6229
|
+
async function watchRebuildJob(ctx, target) {
|
|
6230
|
+
const spinner = isJsonMode() ? null : ora5({
|
|
6231
|
+
text: `Watching rebuild job ${shortJobId(target.jobId)}\u2026`,
|
|
6232
|
+
prefixText: " "
|
|
6233
|
+
}).start();
|
|
6234
|
+
const intervalMs = 3e3;
|
|
6235
|
+
const deadline = Date.now() + 15 * 60 * 1e3;
|
|
6236
|
+
try {
|
|
6237
|
+
while (Date.now() < deadline) {
|
|
6238
|
+
const data = await gqlQuery(ctx.client, WikiCompileJobsDoc, {
|
|
6239
|
+
tenantId: ctx.tenantId,
|
|
6240
|
+
ownerId: target.agentId,
|
|
6241
|
+
limit: 5
|
|
6242
|
+
});
|
|
6243
|
+
const job = data.wikiCompileJobs.find((j) => j.id === target.jobId);
|
|
6244
|
+
if (job) {
|
|
6245
|
+
if (spinner) spinner.text = `status=${job.status} attempt=${job.attempt}`;
|
|
6246
|
+
if (isTerminalCompileStatus(job.status)) {
|
|
6247
|
+
if (spinner) {
|
|
6248
|
+
if (job.status === "succeeded") spinner.succeed("rebuild succeeded");
|
|
6249
|
+
else if (job.status === "skipped") spinner.info("rebuild skipped");
|
|
6250
|
+
else spinner.fail(`${job.status}${job.error ? ` \u2014 ${job.error}` : ""}`);
|
|
6251
|
+
}
|
|
6252
|
+
process.exit(job.status === "succeeded" ? 0 : 1);
|
|
6253
|
+
}
|
|
6254
|
+
}
|
|
6255
|
+
await new Promise((r) => setTimeout(r, intervalMs));
|
|
6256
|
+
}
|
|
6257
|
+
if (spinner) spinner.warn("watch timeout \u2014 rebuild still in progress.");
|
|
6258
|
+
process.exit(2);
|
|
6259
|
+
} catch (err) {
|
|
6260
|
+
if (spinner) spinner.fail(err instanceof Error ? err.message : String(err));
|
|
6261
|
+
process.exit(1);
|
|
6262
|
+
}
|
|
6263
|
+
}
|
|
6264
|
+
|
|
6265
|
+
// src/commands/wiki/status.ts
|
|
6266
|
+
import ora6 from "ora";
|
|
6267
|
+
var DEFAULT_WATCH_INTERVAL_MS = 3e3;
|
|
6268
|
+
async function runWikiStatus(opts) {
|
|
6269
|
+
const ctx = await resolveWikiContext(opts);
|
|
6270
|
+
const ownerId = opts.agent ?? null;
|
|
6271
|
+
const limit = toInt(opts.limit, 10);
|
|
6272
|
+
const timeoutSec = toInt(opts.timeout, 900);
|
|
6273
|
+
let agentNameById = null;
|
|
6274
|
+
const resolveAgentName = async () => {
|
|
6275
|
+
if (agentNameById) return agentNameById;
|
|
6276
|
+
const data = await gqlQuery(ctx.client, AllTenantAgentsForWikiDoc, {
|
|
6277
|
+
tenantId: ctx.tenantId
|
|
6278
|
+
});
|
|
6279
|
+
agentNameById = Object.fromEntries(
|
|
6280
|
+
(data.allTenantAgents ?? []).map((a) => [a.id, a.name ?? a.id])
|
|
6281
|
+
);
|
|
6282
|
+
return agentNameById;
|
|
6283
|
+
};
|
|
6284
|
+
let jobs;
|
|
6285
|
+
try {
|
|
6286
|
+
jobs = await fetchJobs(ctx, { ownerId, limit });
|
|
6287
|
+
} catch (err) {
|
|
6288
|
+
const classified = classifyMutationError(err);
|
|
6289
|
+
printError(classified.message);
|
|
6290
|
+
if (classified.forbidden) {
|
|
6291
|
+
printForbiddenHint(ctx.tenantSlug);
|
|
6292
|
+
process.exit(2);
|
|
6293
|
+
}
|
|
6294
|
+
process.exit(1);
|
|
6295
|
+
}
|
|
6296
|
+
if (!opts.watch) {
|
|
6297
|
+
await renderJobs(jobs, {
|
|
6298
|
+
tenantSlug: ctx.tenantSlug,
|
|
6299
|
+
scope: ownerId ? { agentId: ownerId } : { tenantWide: true },
|
|
6300
|
+
resolveAgentName
|
|
6301
|
+
});
|
|
6302
|
+
process.exit(0);
|
|
6303
|
+
}
|
|
6304
|
+
if (jobs.length === 0) {
|
|
6305
|
+
printWarning(
|
|
6306
|
+
`No compile jobs yet for this scope. --watch exits immediately; re-run once activity starts.`
|
|
6307
|
+
);
|
|
6308
|
+
if (isJsonMode()) {
|
|
6309
|
+
printJson({
|
|
6310
|
+
ok: true,
|
|
6311
|
+
scope: { tenantId: ctx.tenantId, ownerId },
|
|
6312
|
+
jobs: []
|
|
6313
|
+
});
|
|
6314
|
+
}
|
|
6315
|
+
process.exit(0);
|
|
6316
|
+
}
|
|
6317
|
+
const latestId = jobs[0].id;
|
|
6318
|
+
const spinner = isJsonMode() ? null : ora6({
|
|
6319
|
+
text: `Watching job ${shortJobId(latestId)}\u2026`,
|
|
6320
|
+
prefixText: " "
|
|
6321
|
+
}).start();
|
|
6322
|
+
const deadline = Date.now() + timeoutSec * 1e3;
|
|
6323
|
+
try {
|
|
6324
|
+
while (Date.now() < deadline) {
|
|
6325
|
+
const next = await fetchJobs(ctx, { ownerId, limit: 5 });
|
|
6326
|
+
const latest = next.find((j) => j.id === latestId) ?? next[0];
|
|
6327
|
+
if (latest) {
|
|
6328
|
+
if (spinner)
|
|
6329
|
+
spinner.text = `status=${latest.status} attempt=${latest.attempt}`;
|
|
6330
|
+
if (isTerminalCompileStatus(latest.status)) {
|
|
6331
|
+
if (spinner) {
|
|
6332
|
+
if (latest.status === "succeeded") spinner.succeed("succeeded");
|
|
6333
|
+
else if (latest.status === "skipped") spinner.info("skipped");
|
|
6334
|
+
else
|
|
6335
|
+
spinner.fail(
|
|
6336
|
+
`${latest.status}${latest.error ? ` \u2014 ${latest.error}` : ""}`
|
|
6337
|
+
);
|
|
6338
|
+
}
|
|
6339
|
+
if (isJsonMode()) {
|
|
6340
|
+
printJson({
|
|
6341
|
+
ok: latest.status === "succeeded",
|
|
6342
|
+
scope: { tenantId: ctx.tenantId, ownerId },
|
|
6343
|
+
job: latest
|
|
6344
|
+
});
|
|
6345
|
+
}
|
|
6346
|
+
process.exit(latest.status === "succeeded" ? 0 : 2);
|
|
6347
|
+
}
|
|
6348
|
+
}
|
|
6349
|
+
await new Promise((r) => setTimeout(r, DEFAULT_WATCH_INTERVAL_MS));
|
|
6350
|
+
}
|
|
6351
|
+
if (spinner) spinner.warn(`watch timeout after ${timeoutSec}s.`);
|
|
6352
|
+
process.exit(2);
|
|
6353
|
+
} catch (err) {
|
|
6354
|
+
if (spinner) spinner.fail(err instanceof Error ? err.message : String(err));
|
|
6355
|
+
process.exit(1);
|
|
6356
|
+
}
|
|
6357
|
+
}
|
|
6358
|
+
async function fetchJobs(ctx, args) {
|
|
6359
|
+
const data = await gqlQuery(ctx.client, WikiCompileJobsDoc, {
|
|
6360
|
+
tenantId: ctx.tenantId,
|
|
6361
|
+
ownerId: args.ownerId,
|
|
6362
|
+
limit: args.limit
|
|
6363
|
+
});
|
|
6364
|
+
return data.wikiCompileJobs;
|
|
6365
|
+
}
|
|
6366
|
+
async function renderJobs(jobs, args) {
|
|
6367
|
+
if (isJsonMode()) {
|
|
6368
|
+
printJson({
|
|
6369
|
+
ok: true,
|
|
6370
|
+
scope: "agentId" in args.scope ? { agentId: args.scope.agentId } : { tenantWide: true },
|
|
6371
|
+
jobs
|
|
6372
|
+
});
|
|
6373
|
+
return;
|
|
6374
|
+
}
|
|
6375
|
+
if (jobs.length === 0) {
|
|
6376
|
+
const label = "agentId" in args.scope ? `agent ${args.scope.agentId}` : `tenant ${args.tenantSlug}`;
|
|
6377
|
+
console.log(` No recent compile jobs for ${label}.`);
|
|
6378
|
+
return;
|
|
6379
|
+
}
|
|
6380
|
+
const names = "tenantWide" in args.scope ? await args.resolveAgentName() : {};
|
|
6381
|
+
const rows = jobs.map((j) => ({
|
|
6382
|
+
id: j.id.slice(0, 8),
|
|
6383
|
+
agent: "tenantWide" in args.scope ? names[j.ownerId] ?? j.ownerId.slice(0, 8) : "\u2014",
|
|
6384
|
+
status: j.status,
|
|
6385
|
+
trigger: j.trigger,
|
|
6386
|
+
attempt: String(j.attempt),
|
|
6387
|
+
duration: fmtDuration(j.startedAt, j.finishedAt),
|
|
6388
|
+
records: extractMetric(j.metrics, "records_read"),
|
|
6389
|
+
pages: extractMetric(j.metrics, "pages_upserted"),
|
|
6390
|
+
started: fmtIso2(j.startedAt ?? j.createdAt)
|
|
6391
|
+
}));
|
|
6392
|
+
const columns = [
|
|
6393
|
+
{ key: "id", header: "JOB" }
|
|
6394
|
+
];
|
|
6395
|
+
if ("tenantWide" in args.scope) columns.push({ key: "agent", header: "AGENT" });
|
|
6396
|
+
columns.push(
|
|
6397
|
+
{ key: "status", header: "STATUS" },
|
|
6398
|
+
{ key: "trigger", header: "TRIGGER" },
|
|
6399
|
+
{ key: "attempt", header: "TRY" },
|
|
6400
|
+
{ key: "duration", header: "DUR" },
|
|
6401
|
+
{ key: "records", header: "RECS" },
|
|
6402
|
+
{ key: "pages", header: "PAGES" },
|
|
6403
|
+
{ key: "started", header: "STARTED" }
|
|
6404
|
+
);
|
|
6405
|
+
printTable(rows, columns);
|
|
6406
|
+
}
|
|
6407
|
+
function toInt(v, fallback) {
|
|
6408
|
+
if (v == null || v === "") return fallback;
|
|
6409
|
+
const n = typeof v === "number" ? v : Number.parseInt(v, 10);
|
|
6410
|
+
return Number.isFinite(n) && n > 0 ? n : fallback;
|
|
6411
|
+
}
|
|
6412
|
+
function fmtIso2(iso) {
|
|
6413
|
+
if (!iso) return "\u2014";
|
|
6414
|
+
const d = new Date(iso);
|
|
6415
|
+
if (Number.isNaN(d.getTime())) return iso;
|
|
6416
|
+
return d.toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
6417
|
+
}
|
|
6418
|
+
function fmtDuration(startedAt, finishedAt) {
|
|
6419
|
+
if (!startedAt) return "\u2014";
|
|
6420
|
+
const start = new Date(startedAt).getTime();
|
|
6421
|
+
const end = finishedAt ? new Date(finishedAt).getTime() : Date.now();
|
|
6422
|
+
if (!Number.isFinite(start) || !Number.isFinite(end) || end < start) return "\u2014";
|
|
6423
|
+
const sec = Math.round((end - start) / 1e3);
|
|
6424
|
+
if (sec < 60) return `${sec}s`;
|
|
6425
|
+
const m = Math.floor(sec / 60);
|
|
6426
|
+
const s = sec % 60;
|
|
6427
|
+
return `${m}m${s.toString().padStart(2, "0")}s`;
|
|
6428
|
+
}
|
|
6429
|
+
function extractMetric(metrics, key) {
|
|
6430
|
+
if (!metrics || typeof metrics !== "object") return "\u2014";
|
|
6431
|
+
const v = metrics[key];
|
|
6432
|
+
if (v == null) return "\u2014";
|
|
6433
|
+
if (typeof v === "number") return String(v);
|
|
6434
|
+
return String(v);
|
|
6435
|
+
}
|
|
6436
|
+
|
|
6437
|
+
// src/commands/wiki.ts
|
|
6438
|
+
function registerWikiCommand(program2) {
|
|
6439
|
+
const wiki = program2.command("wiki").description(
|
|
6440
|
+
"Compile and rebuild agent wiki pages (Compounding Memory). Admin-only."
|
|
6441
|
+
);
|
|
6442
|
+
wiki.command("compile").description(
|
|
6443
|
+
"Enqueue a wiki compile for a single agent (--agent) or all tenant agents (--all)."
|
|
6444
|
+
).option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("--agent <id>", "Agent ID, slug, or name. Bypasses the picker.").option("--all", "Fan out to every non-system agent in the tenant.").option(
|
|
6445
|
+
"--model <id>",
|
|
6446
|
+
"Bedrock model ID override for this run. Defaults to server BEDROCK_MODEL_ID."
|
|
6447
|
+
).option(
|
|
6448
|
+
"--watch",
|
|
6449
|
+
"After enqueue, poll wiki_compile_jobs until the job reaches a terminal state (single-agent only)."
|
|
6450
|
+
).addHelpText(
|
|
6451
|
+
"after",
|
|
6452
|
+
`
|
|
6453
|
+
Examples:
|
|
6454
|
+
# Interactive: pick the agent (or "All agents") from a list
|
|
6455
|
+
$ thinkwork wiki compile
|
|
6456
|
+
|
|
6457
|
+
# Single agent, scripted
|
|
6458
|
+
$ thinkwork wiki compile --tenant acme --agent agt-xyz --json
|
|
6459
|
+
|
|
6460
|
+
# Fan-out across every agent in the tenant
|
|
6461
|
+
$ thinkwork wiki compile --tenant acme --all
|
|
6462
|
+
|
|
6463
|
+
# Spike a different Bedrock model for one run
|
|
6464
|
+
$ thinkwork wiki compile --tenant acme --agent agt-xyz \\
|
|
6465
|
+
--model anthropic.claude-sonnet-4-6-v1:0
|
|
6466
|
+
`
|
|
6467
|
+
).action(async (opts, cmd) => {
|
|
6468
|
+
const parent = cmd.parent?.parent;
|
|
6469
|
+
await runWikiCompile({
|
|
6470
|
+
...opts,
|
|
6471
|
+
stage: opts.stage ?? parent?.opts().stage,
|
|
6472
|
+
json: parent?.opts().json === true
|
|
6473
|
+
});
|
|
6474
|
+
});
|
|
6475
|
+
wiki.command("rebuild").description(
|
|
6476
|
+
"Destructive: archive an agent's active wiki pages and enqueue a fresh compile. Single-agent only."
|
|
6477
|
+
).option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option("--agent <id>", "Agent ID, slug, or name. Bypasses the picker.").option(
|
|
6478
|
+
"--model <id>",
|
|
6479
|
+
"Bedrock model ID override for the post-reset compile."
|
|
6480
|
+
).option("-y, --yes", "Skip the confirmation prompt.").option(
|
|
6481
|
+
"--watch",
|
|
6482
|
+
"After enqueue, poll wiki_compile_jobs until the job reaches a terminal state."
|
|
6483
|
+
).addHelpText(
|
|
6484
|
+
"after",
|
|
6485
|
+
`
|
|
6486
|
+
Examples:
|
|
6487
|
+
# Interactive confirm
|
|
6488
|
+
$ thinkwork wiki rebuild --tenant acme --agent agt-xyz
|
|
6489
|
+
|
|
6490
|
+
# Scripted (no prompt)
|
|
6491
|
+
$ thinkwork wiki rebuild --tenant acme --agent agt-xyz --yes --json
|
|
6492
|
+
`
|
|
6493
|
+
).action(async (opts, cmd) => {
|
|
6494
|
+
const parent = cmd.parent?.parent;
|
|
6495
|
+
await runWikiRebuild({
|
|
6496
|
+
...opts,
|
|
6497
|
+
stage: opts.stage ?? parent?.opts().stage,
|
|
6498
|
+
json: parent?.opts().json === true
|
|
6499
|
+
});
|
|
6500
|
+
});
|
|
6501
|
+
wiki.command("status").description(
|
|
6502
|
+
"Show recent compile jobs for a tenant (optionally filtered to one agent)."
|
|
6503
|
+
).option("-s, --stage <name>", "Deployment stage").option("-t, --tenant <slug>", "Tenant slug").option(
|
|
6504
|
+
"--agent <id>",
|
|
6505
|
+
"Restrict to a single agent. Omit for tenant-wide recent activity."
|
|
6506
|
+
).option("-n, --limit <n>", "Max jobs to return.", "10").option("--watch", "Poll until the most-recent job reaches a terminal state.").option(
|
|
6507
|
+
"--timeout <seconds>",
|
|
6508
|
+
"Max seconds to watch (default 900 = 15m).",
|
|
6509
|
+
"900"
|
|
6510
|
+
).addHelpText(
|
|
6511
|
+
"after",
|
|
6512
|
+
`
|
|
6513
|
+
Examples:
|
|
6514
|
+
# Tenant-wide (admin)
|
|
6515
|
+
$ thinkwork wiki status --tenant acme
|
|
6516
|
+
|
|
6517
|
+
# Single agent, watching until a job settles
|
|
6518
|
+
$ thinkwork wiki status --tenant acme --agent agt-xyz --watch
|
|
6519
|
+
`
|
|
6520
|
+
).action(async (opts, cmd) => {
|
|
6521
|
+
const parent = cmd.parent?.parent;
|
|
6522
|
+
await runWikiStatus({
|
|
6523
|
+
...opts,
|
|
6524
|
+
stage: opts.stage ?? parent?.opts().stage,
|
|
6525
|
+
json: parent?.opts().json === true
|
|
6526
|
+
});
|
|
6527
|
+
});
|
|
6528
|
+
}
|
|
6529
|
+
|
|
4615
6530
|
// src/cli.ts
|
|
4616
6531
|
var program = new Command();
|
|
4617
6532
|
program.name("thinkwork").description(
|
|
@@ -4676,6 +6591,8 @@ registerBudgetCommand(program);
|
|
|
4676
6591
|
registerPerformanceCommand(program);
|
|
4677
6592
|
registerTraceCommand(program);
|
|
4678
6593
|
registerDashboardCommand(program);
|
|
6594
|
+
registerEvalCommand(program);
|
|
6595
|
+
registerWikiCommand(program);
|
|
4679
6596
|
for (const cmd of program.commands) {
|
|
4680
6597
|
if (!cmd.options.some((o) => o.long === "--json")) {
|
|
4681
6598
|
cmd.option("--json", "Emit machine-readable JSON on stdout.");
|