agent-inspect 1.7.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -1
- package/README.md +11 -6
- package/docs/ADAPTER-CONFORMANCE.md +7 -3
- package/docs/ADAPTERS.md +120 -5
- package/docs/API.md +123 -21
- package/docs/CLI.md +154 -6
- package/docs/KNOWN-ISSUES.md +7 -1
- package/docs/LIMITATIONS.md +7 -1
- package/docs/SCHEMA.md +1 -0
- package/package.json +12 -2
- package/packages/cli/dist/index.cjs +2057 -33
- package/packages/cli/dist/index.cjs.map +1 -1
- package/packages/cli/dist/index.mjs +2057 -33
- package/packages/cli/dist/index.mjs.map +1 -1
- package/packages/core/dist/advanced.d.cts +4 -4
- package/packages/core/dist/advanced.d.ts +4 -4
- package/packages/core/dist/checks.cjs +1535 -0
- package/packages/core/dist/checks.cjs.map +1 -0
- package/packages/core/dist/checks.d.cts +585 -0
- package/packages/core/dist/checks.d.ts +585 -0
- package/packages/core/dist/checks.mjs +1512 -0
- package/packages/core/dist/checks.mjs.map +1 -0
- package/packages/core/dist/diff.d.cts +3 -3
- package/packages/core/dist/diff.d.ts +3 -3
- package/packages/core/dist/exporters.d.cts +3 -3
- package/packages/core/dist/exporters.d.ts +3 -3
- package/packages/core/dist/index.d.cts +6 -6
- package/packages/core/dist/index.d.ts +6 -6
- package/packages/core/dist/{inspect-event-Des4JDHo.d.cts → inspect-event-CevRYp58.d.cts} +1 -1
- package/packages/core/dist/{inspect-event-Des4JDHo.d.ts → inspect-event-CevRYp58.d.ts} +1 -1
- package/packages/core/dist/{log-config-C1GcJPIM.d.ts → log-config-BPHS4Sds.d.ts} +1 -1
- package/packages/core/dist/{log-config-BnH8Ykcb.d.cts → log-config-DanPV3P9.d.cts} +1 -1
- package/packages/core/dist/logs.d.cts +3 -3
- package/packages/core/dist/logs.d.ts +3 -3
- package/packages/core/dist/{persisted-inspect-event-DiFto0K2.d.ts → persisted-inspect-event-Cw7TeYGr.d.ts} +1 -1
- package/packages/core/dist/{persisted-inspect-event-0kaRADsp.d.cts → persisted-inspect-event-DHPfzUd8.d.cts} +1 -1
- package/packages/core/dist/persisted.d.cts +5 -5
- package/packages/core/dist/persisted.d.ts +5 -5
- package/packages/core/dist/readers.d.cts +2 -2
- package/packages/core/dist/readers.d.ts +2 -2
- package/packages/core/dist/{types-tSix7tfv.d.ts → types-Ap9uMdx_.d.ts} +1 -1
- package/packages/core/dist/{types-DB8jB6Jg.d.cts → types-B2-BU5CS.d.cts} +1 -1
- package/packages/core/dist/writers.d.cts +2 -2
- package/packages/core/dist/writers.d.ts +2 -2
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
var fs = require('fs');
|
|
5
|
-
var
|
|
5
|
+
var path10 = require('path');
|
|
6
6
|
var url = require('url');
|
|
7
7
|
var commander = require('commander');
|
|
8
8
|
var promises = require('fs/promises');
|
|
@@ -16,14 +16,14 @@ var tty = require('tty');
|
|
|
16
16
|
var _documentCurrentScript = typeof document !== 'undefined' ? document.currentScript : null;
|
|
17
17
|
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
18
18
|
|
|
19
|
-
var
|
|
19
|
+
var path10__default = /*#__PURE__*/_interopDefault(path10);
|
|
20
20
|
var crypto__default = /*#__PURE__*/_interopDefault(crypto);
|
|
21
21
|
var os__default = /*#__PURE__*/_interopDefault(os);
|
|
22
22
|
var process2__default = /*#__PURE__*/_interopDefault(process2);
|
|
23
23
|
var tty__default = /*#__PURE__*/_interopDefault(tty);
|
|
24
24
|
|
|
25
25
|
// package.json
|
|
26
|
-
var version = "1.
|
|
26
|
+
var version = "1.8.0";
|
|
27
27
|
|
|
28
28
|
// packages/core/src/types.ts
|
|
29
29
|
var STEP_TYPES = [
|
|
@@ -1905,7 +1905,7 @@ function formatDuration(ms) {
|
|
|
1905
1905
|
// packages/core/src/utils.ts
|
|
1906
1906
|
var DEFAULT_TRACE_DIR_NAME = ".agent-inspect";
|
|
1907
1907
|
var RUNS_DIR_NAME = "runs";
|
|
1908
|
-
var FALLBACK_TRACE_DIR =
|
|
1908
|
+
var FALLBACK_TRACE_DIR = path10__default.default.join(
|
|
1909
1909
|
os__default.default.tmpdir(),
|
|
1910
1910
|
"agent-inspect",
|
|
1911
1911
|
RUNS_DIR_NAME
|
|
@@ -1943,7 +1943,7 @@ function getDefaultTraceDir() {
|
|
|
1943
1943
|
if (typeof home !== "string" || home.trim() === "") {
|
|
1944
1944
|
return FALLBACK_TRACE_DIR;
|
|
1945
1945
|
}
|
|
1946
|
-
return
|
|
1946
|
+
return path10__default.default.join(home, DEFAULT_TRACE_DIR_NAME, RUNS_DIR_NAME);
|
|
1947
1947
|
} catch {
|
|
1948
1948
|
return FALLBACK_TRACE_DIR;
|
|
1949
1949
|
}
|
|
@@ -1951,20 +1951,20 @@ function getDefaultTraceDir() {
|
|
|
1951
1951
|
function getTraceFilePath(runId, traceDir) {
|
|
1952
1952
|
const baseDir = traceDir ?? getDefaultTraceDir();
|
|
1953
1953
|
let safeId = typeof runId === "string" && runId.trim() !== "" ? runId.trim() : "run_unknown";
|
|
1954
|
-
safeId =
|
|
1954
|
+
safeId = path10__default.default.basename(safeId);
|
|
1955
1955
|
if (safeId === "" || safeId === "." || safeId === "..") {
|
|
1956
1956
|
safeId = "run_unknown";
|
|
1957
1957
|
}
|
|
1958
|
-
return
|
|
1958
|
+
return path10__default.default.join(baseDir, `${safeId}.jsonl`);
|
|
1959
1959
|
}
|
|
1960
1960
|
async function ensureTraceDir(traceDir) {
|
|
1961
|
-
const primary =
|
|
1961
|
+
const primary = path10__default.default.resolve(traceDir);
|
|
1962
1962
|
try {
|
|
1963
1963
|
await promises.mkdir(primary, { recursive: true });
|
|
1964
1964
|
return primary;
|
|
1965
1965
|
} catch {
|
|
1966
1966
|
warn(`Failed to create trace directory: ${primary}`);
|
|
1967
|
-
const fallback =
|
|
1967
|
+
const fallback = path10__default.default.resolve(FALLBACK_TRACE_DIR);
|
|
1968
1968
|
try {
|
|
1969
1969
|
await promises.mkdir(fallback, { recursive: true });
|
|
1970
1970
|
return fallback;
|
|
@@ -2638,7 +2638,7 @@ var TraceDirectory = class {
|
|
|
2638
2638
|
this.#dir = resolveTraceDir(options);
|
|
2639
2639
|
}
|
|
2640
2640
|
getPath(filename) {
|
|
2641
|
-
return filename ?
|
|
2641
|
+
return filename ? path10__default.default.join(this.#dir, filename) : this.#dir;
|
|
2642
2642
|
}
|
|
2643
2643
|
async list() {
|
|
2644
2644
|
try {
|
|
@@ -2665,7 +2665,7 @@ function parseIsoToMs3(value) {
|
|
|
2665
2665
|
}
|
|
2666
2666
|
async function extractMetadata(filePath, _quickScan) {
|
|
2667
2667
|
const stats = await promises.stat(filePath);
|
|
2668
|
-
let runIdFromFile =
|
|
2668
|
+
let runIdFromFile = path10__default.default.basename(filePath);
|
|
2669
2669
|
if (runIdFromFile.endsWith(".jsonl")) {
|
|
2670
2670
|
runIdFromFile = runIdFromFile.slice(0, -".jsonl".length);
|
|
2671
2671
|
}
|
|
@@ -4420,8 +4420,8 @@ function matchStepLevel(m, events, opts) {
|
|
|
4420
4420
|
fields.push("step.name");
|
|
4421
4421
|
}
|
|
4422
4422
|
if (opts.toolQuery) {
|
|
4423
|
-
const
|
|
4424
|
-
if (!nameMatches(
|
|
4423
|
+
const toolName2 = typeof s.metadata?.toolName === "string" ? s.metadata.toolName : s.name;
|
|
4424
|
+
if (!nameMatches(toolName2, opts.toolQuery)) continue;
|
|
4425
4425
|
fields.push("step.tool");
|
|
4426
4426
|
}
|
|
4427
4427
|
if (opts.durationFilter) {
|
|
@@ -4626,7 +4626,7 @@ function findReaderByFormat(format, readers) {
|
|
|
4626
4626
|
}
|
|
4627
4627
|
async function jsonlFilesInDirectory(dirPath) {
|
|
4628
4628
|
const entries = await promises.readdir(dirPath, { withFileTypes: true });
|
|
4629
|
-
return entries.filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) =>
|
|
4629
|
+
return entries.filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => path10__default.default.join(dirPath, entry.name)).sort((a, b) => a.localeCompare(b));
|
|
4630
4630
|
}
|
|
4631
4631
|
async function resolveInput(input3) {
|
|
4632
4632
|
const cached = resolvedInputCache.get(input3);
|
|
@@ -6206,13 +6206,13 @@ function pairSteps(left, right) {
|
|
|
6206
6206
|
return pairs;
|
|
6207
6207
|
}
|
|
6208
6208
|
function compareLeafSteps(L, R, segments, opts, out) {
|
|
6209
|
-
const
|
|
6209
|
+
const path12 = buildPath(segments);
|
|
6210
6210
|
if (L.name !== R.name) {
|
|
6211
6211
|
out.push({
|
|
6212
6212
|
kind: "structure",
|
|
6213
6213
|
severity: "warning",
|
|
6214
6214
|
message: "Step name differs",
|
|
6215
|
-
path:
|
|
6215
|
+
path: path12,
|
|
6216
6216
|
left: L.name,
|
|
6217
6217
|
right: R.name
|
|
6218
6218
|
});
|
|
@@ -6222,7 +6222,7 @@ function compareLeafSteps(L, R, segments, opts, out) {
|
|
|
6222
6222
|
kind: "step-type",
|
|
6223
6223
|
severity: "warning",
|
|
6224
6224
|
message: "Step type differs",
|
|
6225
|
-
path:
|
|
6225
|
+
path: path12,
|
|
6226
6226
|
left: L.type,
|
|
6227
6227
|
right: R.type
|
|
6228
6228
|
});
|
|
@@ -6232,7 +6232,7 @@ function compareLeafSteps(L, R, segments, opts, out) {
|
|
|
6232
6232
|
kind: "step-status",
|
|
6233
6233
|
severity: "warning",
|
|
6234
6234
|
message: "Step status differs",
|
|
6235
|
-
path:
|
|
6235
|
+
path: path12,
|
|
6236
6236
|
left: L.status,
|
|
6237
6237
|
right: R.status
|
|
6238
6238
|
});
|
|
@@ -6244,7 +6244,7 @@ function compareLeafSteps(L, R, segments, opts, out) {
|
|
|
6244
6244
|
kind: "error",
|
|
6245
6245
|
severity: "error",
|
|
6246
6246
|
message: "Step error message differs",
|
|
6247
|
-
path:
|
|
6247
|
+
path: path12,
|
|
6248
6248
|
left: le || void 0,
|
|
6249
6249
|
right: re || void 0
|
|
6250
6250
|
});
|
|
@@ -6262,7 +6262,7 @@ function compareLeafSteps(L, R, segments, opts, out) {
|
|
|
6262
6262
|
kind: "duration",
|
|
6263
6263
|
severity: "info",
|
|
6264
6264
|
message: "Step duration differs",
|
|
6265
|
-
path:
|
|
6265
|
+
path: path12,
|
|
6266
6266
|
left: ld,
|
|
6267
6267
|
right: rd
|
|
6268
6268
|
});
|
|
@@ -6275,7 +6275,7 @@ function compareLeafSteps(L, R, segments, opts, out) {
|
|
|
6275
6275
|
kind: "metadata",
|
|
6276
6276
|
severity: "info",
|
|
6277
6277
|
message: "Step metadata differs",
|
|
6278
|
-
path:
|
|
6278
|
+
path: path12,
|
|
6279
6279
|
left: L.metadata,
|
|
6280
6280
|
right: R.metadata
|
|
6281
6281
|
});
|
|
@@ -6287,7 +6287,7 @@ function compareLeafSteps(L, R, segments, opts, out) {
|
|
|
6287
6287
|
kind: "output",
|
|
6288
6288
|
severity: "info",
|
|
6289
6289
|
message: "Output preview differs",
|
|
6290
|
-
path:
|
|
6290
|
+
path: path12,
|
|
6291
6291
|
left: L.outputPreview,
|
|
6292
6292
|
right: R.outputPreview
|
|
6293
6293
|
});
|
|
@@ -6937,11 +6937,11 @@ createChalk({ level: stderrColor ? stderrColor.level : 0 });
|
|
|
6937
6937
|
var source_default = chalk;
|
|
6938
6938
|
|
|
6939
6939
|
// packages/core/src/diff/renderer.ts
|
|
6940
|
-
function formatPath(
|
|
6941
|
-
if (
|
|
6940
|
+
function formatPath(path12) {
|
|
6941
|
+
if (path12 === void 0 || path12.path.length === 0) {
|
|
6942
6942
|
return "(run)";
|
|
6943
6943
|
}
|
|
6944
|
-
return
|
|
6944
|
+
return path12.path.map((s) => s.name).join(" > ");
|
|
6945
6945
|
}
|
|
6946
6946
|
function formatValue(v, verbose) {
|
|
6947
6947
|
if (v === void 0) return "(undefined)";
|
|
@@ -7222,8 +7222,8 @@ async function stepLlm(model, fn) {
|
|
|
7222
7222
|
metadata: { model: modelName }
|
|
7223
7223
|
});
|
|
7224
7224
|
}
|
|
7225
|
-
async function stepTool(
|
|
7226
|
-
const normalized = typeof
|
|
7225
|
+
async function stepTool(toolName2, fn) {
|
|
7226
|
+
const normalized = typeof toolName2 === "string" && toolName2.trim() !== "" ? toolName2.trim() : "unknown-tool";
|
|
7227
7227
|
return stepImpl(`tool:${normalized}`, fn, {
|
|
7228
7228
|
type: "tool",
|
|
7229
7229
|
metadata: { toolName: normalized }
|
|
@@ -8534,9 +8534,9 @@ Trace directory: ${traceDir}`);
|
|
|
8534
8534
|
if (validation !== void 0 && !validation.ok) {
|
|
8535
8535
|
process.exitCode = 1;
|
|
8536
8536
|
}
|
|
8537
|
-
const outPath = options.output !== void 0 && options.output.trim() !== "" ?
|
|
8537
|
+
const outPath = options.output !== void 0 && options.output.trim() !== "" ? path10__default.default.resolve(options.output.trim()) : void 0;
|
|
8538
8538
|
if (outPath !== void 0) {
|
|
8539
|
-
await promises.mkdir(
|
|
8539
|
+
await promises.mkdir(path10__default.default.dirname(outPath), { recursive: true });
|
|
8540
8540
|
await promises.writeFile(outPath, result.content, "utf-8");
|
|
8541
8541
|
const vlabel = validation !== void 0 ? validation.ok ? "ok" : "failed" : "skipped";
|
|
8542
8542
|
console.log(`Wrote ${result.fileExtension} export to ${outPath} (validation: ${vlabel})`);
|
|
@@ -8928,9 +8928,9 @@ async function reportCommand(runId, options = {}) {
|
|
|
8928
8928
|
redactionProfile,
|
|
8929
8929
|
correlation: !options.noCorrelation
|
|
8930
8930
|
});
|
|
8931
|
-
const outPath = options.output !== void 0 && options.output.trim() !== "" ?
|
|
8931
|
+
const outPath = options.output !== void 0 && options.output.trim() !== "" ? path10__default.default.resolve(options.output.trim()) : void 0;
|
|
8932
8932
|
if (outPath !== void 0) {
|
|
8933
|
-
await promises.mkdir(
|
|
8933
|
+
await promises.mkdir(path10__default.default.dirname(outPath), { recursive: true });
|
|
8934
8934
|
await promises.writeFile(outPath, result.content, "utf-8");
|
|
8935
8935
|
console.log(`Wrote ${result.fileExtension} report to ${outPath}`);
|
|
8936
8936
|
}
|
|
@@ -9094,6 +9094,1982 @@ async function openCommand(input3, options = {}, stdin = process.stdin) {
|
|
|
9094
9094
|
}
|
|
9095
9095
|
}
|
|
9096
9096
|
|
|
9097
|
+
// packages/core/src/checks/index.ts
|
|
9098
|
+
var SEVERITY_RANK = {
|
|
9099
|
+
error: 0,
|
|
9100
|
+
warning: 1,
|
|
9101
|
+
info: 2
|
|
9102
|
+
};
|
|
9103
|
+
var STATUS_RANK = {
|
|
9104
|
+
fail: 0,
|
|
9105
|
+
warning: 1,
|
|
9106
|
+
pass: 2
|
|
9107
|
+
};
|
|
9108
|
+
var CONFIDENCE_RANK = {
|
|
9109
|
+
unknown: 0,
|
|
9110
|
+
heuristic: 1,
|
|
9111
|
+
correlated: 2,
|
|
9112
|
+
explicit: 3
|
|
9113
|
+
};
|
|
9114
|
+
var DEFAULT_SENSITIVE_KEYS = [
|
|
9115
|
+
"authorization",
|
|
9116
|
+
"cookie",
|
|
9117
|
+
"token",
|
|
9118
|
+
"apikey",
|
|
9119
|
+
"api_key",
|
|
9120
|
+
"password",
|
|
9121
|
+
"secret",
|
|
9122
|
+
"email"
|
|
9123
|
+
];
|
|
9124
|
+
var DEFAULT_RAW_CONTENT_KEYS = [
|
|
9125
|
+
"body",
|
|
9126
|
+
"headers",
|
|
9127
|
+
"input",
|
|
9128
|
+
"messages",
|
|
9129
|
+
"output",
|
|
9130
|
+
"payload",
|
|
9131
|
+
"prompt",
|
|
9132
|
+
"requestbody",
|
|
9133
|
+
"request_body",
|
|
9134
|
+
"responsebody",
|
|
9135
|
+
"response_body",
|
|
9136
|
+
"rawprompt",
|
|
9137
|
+
"raw_prompt",
|
|
9138
|
+
"rawoutput",
|
|
9139
|
+
"raw_output",
|
|
9140
|
+
"toolinput",
|
|
9141
|
+
"tool_input",
|
|
9142
|
+
"tooloutput",
|
|
9143
|
+
"tool_output"
|
|
9144
|
+
];
|
|
9145
|
+
var DEFAULT_SECRET_PATTERNS = [
|
|
9146
|
+
{ id: "bearer-token", pattern: /Bearer\s+[A-Za-z0-9._~+/-]{12,}=*/ },
|
|
9147
|
+
{ id: "openai-key", pattern: /sk-[A-Za-z0-9_-]{16,}/ },
|
|
9148
|
+
{ id: "aws-access-key", pattern: /AKIA[0-9A-Z]{16}/ },
|
|
9149
|
+
{ id: "github-token", pattern: /gh[opsu]_[A-Za-z0-9_]{20,}/ },
|
|
9150
|
+
{ id: "key-value-secret", pattern: /(api[_-]?key|token|password|secret)=\S{8,}/i }
|
|
9151
|
+
];
|
|
9152
|
+
function compareStrings(a, b) {
|
|
9153
|
+
return (a ?? "").localeCompare(b ?? "");
|
|
9154
|
+
}
|
|
9155
|
+
function diagnostic(code, message, ruleId) {
|
|
9156
|
+
return {
|
|
9157
|
+
code,
|
|
9158
|
+
message,
|
|
9159
|
+
severity: "error",
|
|
9160
|
+
...ruleId ? { ruleId } : {}
|
|
9161
|
+
};
|
|
9162
|
+
}
|
|
9163
|
+
function emptySummary() {
|
|
9164
|
+
return {
|
|
9165
|
+
passed: 0,
|
|
9166
|
+
failed: 0,
|
|
9167
|
+
warnings: 0,
|
|
9168
|
+
errors: 0
|
|
9169
|
+
};
|
|
9170
|
+
}
|
|
9171
|
+
function errorResult(input3, diagnostics, selectedRun) {
|
|
9172
|
+
return {
|
|
9173
|
+
ok: false,
|
|
9174
|
+
status: "error",
|
|
9175
|
+
format: input3.read.format,
|
|
9176
|
+
...selectedRun ? { runId: selectedRun.runId } : {},
|
|
9177
|
+
summary: {
|
|
9178
|
+
...emptySummary(),
|
|
9179
|
+
errors: diagnostics.filter((item) => item.severity === "error").length
|
|
9180
|
+
},
|
|
9181
|
+
findings: [],
|
|
9182
|
+
diagnostics: [...diagnostics]
|
|
9183
|
+
};
|
|
9184
|
+
}
|
|
9185
|
+
function flattenNodes(nodes) {
|
|
9186
|
+
return nodes.flatMap((node) => [node, ...flattenNodes(node.children)]);
|
|
9187
|
+
}
|
|
9188
|
+
function buildFacts(input3, selectedRun) {
|
|
9189
|
+
const scopedRuns = selectedRun ? [selectedRun] : input3.read.runs;
|
|
9190
|
+
const scopedRunIds = new Set(scopedRuns.map((run) => run.runId));
|
|
9191
|
+
const scopedEvents = selectedRun === void 0 ? input3.read.events : input3.read.events.filter((event) => scopedRunIds.has(event.runId));
|
|
9192
|
+
const nodes = flattenNodes(scopedRuns.flatMap((run) => run.children));
|
|
9193
|
+
const nodesByEventId = /* @__PURE__ */ new Map();
|
|
9194
|
+
const childrenByParentId = /* @__PURE__ */ new Map();
|
|
9195
|
+
for (const node of nodes) {
|
|
9196
|
+
nodesByEventId.set(node.event.eventId, node);
|
|
9197
|
+
const parentId = node.event.parentId;
|
|
9198
|
+
if (parentId) {
|
|
9199
|
+
const children = childrenByParentId.get(parentId) ?? [];
|
|
9200
|
+
children.push(node);
|
|
9201
|
+
childrenByParentId.set(parentId, children);
|
|
9202
|
+
}
|
|
9203
|
+
}
|
|
9204
|
+
return {
|
|
9205
|
+
format: input3.read.format,
|
|
9206
|
+
runs: Object.freeze([...input3.read.runs]),
|
|
9207
|
+
events: Object.freeze([...scopedEvents]),
|
|
9208
|
+
readerWarnings: Object.freeze([...input3.read.warnings]),
|
|
9209
|
+
unsupportedFields: Object.freeze([...input3.read.unsupportedFields]),
|
|
9210
|
+
sourceFiles: Object.freeze([...input3.read.sourceFiles]),
|
|
9211
|
+
nodesByEventId,
|
|
9212
|
+
childrenByParentId,
|
|
9213
|
+
rootNodes: Object.freeze(scopedRuns.flatMap((run) => run.children))
|
|
9214
|
+
};
|
|
9215
|
+
}
|
|
9216
|
+
function resolveSelectedRun(input3, runId) {
|
|
9217
|
+
if (input3.selectedRun) {
|
|
9218
|
+
if (runId && input3.selectedRun.runId !== runId) {
|
|
9219
|
+
return {
|
|
9220
|
+
diagnostics: [
|
|
9221
|
+
diagnostic(
|
|
9222
|
+
"AI_CHECK_INVALID_ARGUMENTS",
|
|
9223
|
+
`Selected run ${input3.selectedRun.runId} does not match requested run ${runId}.`
|
|
9224
|
+
)
|
|
9225
|
+
]
|
|
9226
|
+
};
|
|
9227
|
+
}
|
|
9228
|
+
return { run: input3.selectedRun, diagnostics: [] };
|
|
9229
|
+
}
|
|
9230
|
+
if (runId) {
|
|
9231
|
+
const run = input3.read.runs.find((candidate) => candidate.runId === runId);
|
|
9232
|
+
if (!run) {
|
|
9233
|
+
return {
|
|
9234
|
+
diagnostics: [
|
|
9235
|
+
diagnostic("AI_CHECK_RUN_SELECTION_REQUIRED", `Run not found: ${runId}.`)
|
|
9236
|
+
]
|
|
9237
|
+
};
|
|
9238
|
+
}
|
|
9239
|
+
return { run, diagnostics: [] };
|
|
9240
|
+
}
|
|
9241
|
+
if (input3.read.runs.length === 1) {
|
|
9242
|
+
return { run: input3.read.runs[0], diagnostics: [] };
|
|
9243
|
+
}
|
|
9244
|
+
if (input3.read.runs.length === 0) {
|
|
9245
|
+
return {
|
|
9246
|
+
diagnostics: [
|
|
9247
|
+
diagnostic("AI_CHECK_RUN_SELECTION_REQUIRED", "No runs are available for checks.")
|
|
9248
|
+
]
|
|
9249
|
+
};
|
|
9250
|
+
}
|
|
9251
|
+
return {
|
|
9252
|
+
diagnostics: [
|
|
9253
|
+
diagnostic(
|
|
9254
|
+
"AI_CHECK_RUN_SELECTION_REQUIRED",
|
|
9255
|
+
"Multiple runs are available; select a run before executing checks."
|
|
9256
|
+
)
|
|
9257
|
+
]
|
|
9258
|
+
};
|
|
9259
|
+
}
|
|
9260
|
+
function selectRules(rules, selectedIds) {
|
|
9261
|
+
const diagnostics = [];
|
|
9262
|
+
const byId = /* @__PURE__ */ new Map();
|
|
9263
|
+
for (const rule of rules) {
|
|
9264
|
+
if (byId.has(rule.id)) {
|
|
9265
|
+
diagnostics.push(
|
|
9266
|
+
diagnostic("AI_CHECK_INVALID_CONFIG", `Duplicate trace check rule id: ${rule.id}.`, rule.id)
|
|
9267
|
+
);
|
|
9268
|
+
continue;
|
|
9269
|
+
}
|
|
9270
|
+
byId.set(rule.id, rule);
|
|
9271
|
+
}
|
|
9272
|
+
if (selectedIds && selectedIds.length > 0) {
|
|
9273
|
+
const selected = new Set(selectedIds);
|
|
9274
|
+
for (const id of selected) {
|
|
9275
|
+
if (!byId.has(id)) {
|
|
9276
|
+
diagnostics.push(
|
|
9277
|
+
diagnostic("AI_CHECK_INVALID_CONFIG", `Unknown trace check rule id: ${id}.`, id)
|
|
9278
|
+
);
|
|
9279
|
+
}
|
|
9280
|
+
}
|
|
9281
|
+
return {
|
|
9282
|
+
rules: [...byId.values()].filter((rule) => selected.has(rule.id)).sort(compareRules),
|
|
9283
|
+
diagnostics
|
|
9284
|
+
};
|
|
9285
|
+
}
|
|
9286
|
+
return { rules: [...byId.values()].sort(compareRules), diagnostics };
|
|
9287
|
+
}
|
|
9288
|
+
function compareRules(a, b) {
|
|
9289
|
+
return a.id.localeCompare(b.id);
|
|
9290
|
+
}
|
|
9291
|
+
function eventTimestamp(finding, eventById) {
|
|
9292
|
+
const eventId = finding.evidence[0]?.eventId;
|
|
9293
|
+
return eventId ? eventById.get(eventId)?.timestamp ?? "" : "";
|
|
9294
|
+
}
|
|
9295
|
+
function compareFindings(eventById) {
|
|
9296
|
+
return (a, b) => {
|
|
9297
|
+
if (SEVERITY_RANK[a.severity] !== SEVERITY_RANK[b.severity]) {
|
|
9298
|
+
return SEVERITY_RANK[a.severity] - SEVERITY_RANK[b.severity];
|
|
9299
|
+
}
|
|
9300
|
+
const byRule = a.ruleId.localeCompare(b.ruleId);
|
|
9301
|
+
if (byRule !== 0) return byRule;
|
|
9302
|
+
if (STATUS_RANK[a.status] !== STATUS_RANK[b.status]) {
|
|
9303
|
+
return STATUS_RANK[a.status] - STATUS_RANK[b.status];
|
|
9304
|
+
}
|
|
9305
|
+
const byRun = compareStrings(a.evidence[0]?.runId, b.evidence[0]?.runId);
|
|
9306
|
+
if (byRun !== 0) return byRun;
|
|
9307
|
+
const byTime = eventTimestamp(a, eventById).localeCompare(eventTimestamp(b, eventById));
|
|
9308
|
+
if (byTime !== 0) return byTime;
|
|
9309
|
+
const byEvent = compareStrings(a.evidence[0]?.eventId, b.evidence[0]?.eventId);
|
|
9310
|
+
if (byEvent !== 0) return byEvent;
|
|
9311
|
+
return compareStrings(a.evidence[0]?.path, b.evidence[0]?.path);
|
|
9312
|
+
};
|
|
9313
|
+
}
|
|
9314
|
+
function normalizeFinding(rule, finding) {
|
|
9315
|
+
return {
|
|
9316
|
+
ruleId: finding.ruleId || rule.id,
|
|
9317
|
+
severity: finding.severity ?? rule.defaultSeverity,
|
|
9318
|
+
status: finding.status,
|
|
9319
|
+
message: finding.message,
|
|
9320
|
+
...finding.expected !== void 0 ? { expected: finding.expected } : {},
|
|
9321
|
+
...finding.actual !== void 0 ? { actual: finding.actual } : {},
|
|
9322
|
+
evidence: [...finding.evidence ?? []]
|
|
9323
|
+
};
|
|
9324
|
+
}
|
|
9325
|
+
function summarize(findings, diagnostics) {
|
|
9326
|
+
return {
|
|
9327
|
+
passed: findings.filter((finding) => finding.status === "pass").length,
|
|
9328
|
+
failed: findings.filter(
|
|
9329
|
+
(finding) => finding.status === "fail" && finding.severity === "error"
|
|
9330
|
+
).length,
|
|
9331
|
+
warnings: findings.filter(
|
|
9332
|
+
(finding) => finding.status === "warning" || finding.severity === "warning"
|
|
9333
|
+
).length,
|
|
9334
|
+
errors: diagnostics.filter((item) => item.severity === "error").length
|
|
9335
|
+
};
|
|
9336
|
+
}
|
|
9337
|
+
function stringAttr2(event, keys) {
|
|
9338
|
+
for (const key of keys) {
|
|
9339
|
+
const value = event.attributes?.[key];
|
|
9340
|
+
if (typeof value === "string" && value.trim() !== "") return value;
|
|
9341
|
+
}
|
|
9342
|
+
return void 0;
|
|
9343
|
+
}
|
|
9344
|
+
function numericAttr(event, keys) {
|
|
9345
|
+
for (const key of keys) {
|
|
9346
|
+
const value = event.attributes?.[key];
|
|
9347
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
9348
|
+
}
|
|
9349
|
+
return void 0;
|
|
9350
|
+
}
|
|
9351
|
+
function booleanAttr(event, keys) {
|
|
9352
|
+
for (const key of keys) {
|
|
9353
|
+
const value = event.attributes?.[key];
|
|
9354
|
+
if (typeof value === "boolean") return value;
|
|
9355
|
+
}
|
|
9356
|
+
return void 0;
|
|
9357
|
+
}
|
|
9358
|
+
function stripPrefix(name, prefixes) {
|
|
9359
|
+
for (const prefix of prefixes) {
|
|
9360
|
+
if (name.startsWith(prefix)) return name.slice(prefix.length);
|
|
9361
|
+
}
|
|
9362
|
+
return name;
|
|
9363
|
+
}
|
|
9364
|
+
function eventEvidence(event, path12) {
|
|
9365
|
+
return {
|
|
9366
|
+
runId: event.runId,
|
|
9367
|
+
eventId: event.eventId,
|
|
9368
|
+
parentId: event.parentId,
|
|
9369
|
+
traceId: event.trace?.traceId,
|
|
9370
|
+
spanId: event.trace?.spanId,
|
|
9371
|
+
kind: event.kind,
|
|
9372
|
+
name: event.name,
|
|
9373
|
+
status: event.status,
|
|
9374
|
+
...path12 ? { path: path12 } : {}
|
|
9375
|
+
};
|
|
9376
|
+
}
|
|
9377
|
+
function runEvidence(run) {
|
|
9378
|
+
return run ? [{ runId: run.runId, name: run.name, status: run.status }] : [];
|
|
9379
|
+
}
|
|
9380
|
+
function failFinding(ruleId, message, evidence, expected, actual) {
|
|
9381
|
+
return {
|
|
9382
|
+
ruleId,
|
|
9383
|
+
severity: "error",
|
|
9384
|
+
status: "fail",
|
|
9385
|
+
message,
|
|
9386
|
+
...expected !== void 0 ? { expected } : {},
|
|
9387
|
+
...actual !== void 0 ? { actual } : {},
|
|
9388
|
+
evidence: [...evidence]
|
|
9389
|
+
};
|
|
9390
|
+
}
|
|
9391
|
+
function toolName(event) {
|
|
9392
|
+
return stringAttr2(event, ["toolName", "tool"]) ?? stripPrefix(event.name, ["tool:", "function:", "mcp-tools:"]);
|
|
9393
|
+
}
|
|
9394
|
+
function llmModel(event) {
|
|
9395
|
+
return stringAttr2(event, ["model", "modelId", "responseModelId", "modelName", "model_name"]) ?? stripPrefix(event.name, ["llm:", "generation:", "transcription:", "speech:"]);
|
|
9396
|
+
}
|
|
9397
|
+
function llmProvider(event) {
|
|
9398
|
+
return stringAttr2(event, ["provider", "providerName", "provider_name"]);
|
|
9399
|
+
}
|
|
9400
|
+
function llmFinishReason(event) {
|
|
9401
|
+
return stringAttr2(event, ["finishReason", "rawFinishReason", "finish_reason"]);
|
|
9402
|
+
}
|
|
9403
|
+
function retryCount(event) {
|
|
9404
|
+
return numericAttr(event, ["retryCount", "retryAttempt", "retry_attempt", "attempt"]);
|
|
9405
|
+
}
|
|
9406
|
+
function finishedEvents(context, kind) {
|
|
9407
|
+
return context.events.filter(
|
|
9408
|
+
(event) => (kind === void 0 || event.kind === kind) && event.status !== "running"
|
|
9409
|
+
);
|
|
9410
|
+
}
|
|
9411
|
+
function isRecord14(value) {
|
|
9412
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
9413
|
+
}
|
|
9414
|
+
function eventMap(events) {
|
|
9415
|
+
return new Map(events.map((event) => [event.eventId, event]));
|
|
9416
|
+
}
|
|
9417
|
+
function parseEventTime(value) {
|
|
9418
|
+
if (!value) return void 0;
|
|
9419
|
+
const parsed = Date.parse(value);
|
|
9420
|
+
return Number.isFinite(parsed) ? parsed : void 0;
|
|
9421
|
+
}
|
|
9422
|
+
function eventStartMs(event) {
|
|
9423
|
+
return parseEventTime(event.startedAt) ?? parseEventTime(event.timestamp);
|
|
9424
|
+
}
|
|
9425
|
+
function eventEndMs(event) {
|
|
9426
|
+
const endedAt = parseEventTime(event.endedAt);
|
|
9427
|
+
if (endedAt !== void 0) return endedAt;
|
|
9428
|
+
const startedAt = eventStartMs(event);
|
|
9429
|
+
if (startedAt !== void 0 && event.durationMs !== void 0 && Number.isFinite(event.durationMs)) {
|
|
9430
|
+
return startedAt + event.durationMs;
|
|
9431
|
+
}
|
|
9432
|
+
return void 0;
|
|
9433
|
+
}
|
|
9434
|
+
function normalizedKey(value) {
|
|
9435
|
+
return value.toLowerCase().replace(/[^a-z0-9_]/g, "");
|
|
9436
|
+
}
|
|
9437
|
+
function lastPathSegment(path12) {
|
|
9438
|
+
const parts = path12.split(".");
|
|
9439
|
+
return parts[parts.length - 1] ?? path12;
|
|
9440
|
+
}
|
|
9441
|
+
function valueType(value) {
|
|
9442
|
+
if (Array.isArray(value)) return "array";
|
|
9443
|
+
if (value === null) return "null";
|
|
9444
|
+
return typeof value;
|
|
9445
|
+
}
|
|
9446
|
+
function serializedByteLength(value) {
|
|
9447
|
+
try {
|
|
9448
|
+
return Buffer.byteLength(JSON.stringify(value), "utf-8");
|
|
9449
|
+
} catch {
|
|
9450
|
+
return void 0;
|
|
9451
|
+
}
|
|
9452
|
+
}
|
|
9453
|
+
function pushValueEntries(entries, event, value, path12, key, depth = 0) {
|
|
9454
|
+
entries.push({ event, path: path12, key, value });
|
|
9455
|
+
if (depth >= 8) return;
|
|
9456
|
+
if (Array.isArray(value)) {
|
|
9457
|
+
for (const [index, item] of value.entries()) {
|
|
9458
|
+
pushValueEntries(entries, event, item, `${path12}.${index}`, String(index), depth + 1);
|
|
9459
|
+
}
|
|
9460
|
+
return;
|
|
9461
|
+
}
|
|
9462
|
+
if (!isRecord14(value)) return;
|
|
9463
|
+
for (const nestedKey of Object.keys(value).sort((a, b) => a.localeCompare(b))) {
|
|
9464
|
+
pushValueEntries(
|
|
9465
|
+
entries,
|
|
9466
|
+
event,
|
|
9467
|
+
value[nestedKey],
|
|
9468
|
+
`${path12}.${nestedKey}`,
|
|
9469
|
+
nestedKey,
|
|
9470
|
+
depth + 1
|
|
9471
|
+
);
|
|
9472
|
+
}
|
|
9473
|
+
}
|
|
9474
|
+
function eventValueEntries(event, options = {}) {
|
|
9475
|
+
const entries = [];
|
|
9476
|
+
if (event.attributes !== void 0) {
|
|
9477
|
+
pushValueEntries(entries, event, event.attributes, "attributes", "attributes");
|
|
9478
|
+
}
|
|
9479
|
+
if (options.includeSummaries) {
|
|
9480
|
+
if (event.inputSummary !== void 0) {
|
|
9481
|
+
pushValueEntries(entries, event, event.inputSummary, "inputSummary", "inputSummary");
|
|
9482
|
+
}
|
|
9483
|
+
if (event.outputSummary !== void 0) {
|
|
9484
|
+
pushValueEntries(entries, event, event.outputSummary, "outputSummary", "outputSummary");
|
|
9485
|
+
}
|
|
9486
|
+
}
|
|
9487
|
+
if (options.includeError && event.error !== void 0) {
|
|
9488
|
+
pushValueEntries(entries, event, event.error, "error", "error");
|
|
9489
|
+
}
|
|
9490
|
+
return entries;
|
|
9491
|
+
}
|
|
9492
|
+
function limitFindings(findings, maxFindings) {
|
|
9493
|
+
if (maxFindings === void 0 || findings.length <= maxFindings) return findings;
|
|
9494
|
+
return findings.slice(0, Math.max(0, maxFindings));
|
|
9495
|
+
}
|
|
9496
|
+
function hasRedactionMarker(value, markers) {
|
|
9497
|
+
return markers.some((marker) => value.includes(marker)) || /^\[HASH:[A-Za-z0-9_-]+\]$/.test(value);
|
|
9498
|
+
}
|
|
9499
|
+
function isSensitiveKey(key, sensitiveKeys) {
|
|
9500
|
+
if (!key) return false;
|
|
9501
|
+
const normalized = normalizedKey(key);
|
|
9502
|
+
return sensitiveKeys.some((sensitive) => normalized.includes(normalizedKey(sensitive)));
|
|
9503
|
+
}
|
|
9504
|
+
function isRawContentKey(key, forbiddenKeys) {
|
|
9505
|
+
if (!key) return false;
|
|
9506
|
+
const normalized = normalizedKey(key);
|
|
9507
|
+
return forbiddenKeys.some((forbidden) => normalized === normalizedKey(forbidden));
|
|
9508
|
+
}
|
|
9509
|
+
function parentMarkedUnresolved(event) {
|
|
9510
|
+
if (booleanAttr(event, [
|
|
9511
|
+
"parentUnresolved",
|
|
9512
|
+
"unresolvedParent",
|
|
9513
|
+
"relationshipUnresolved",
|
|
9514
|
+
"unresolvedRelationship"
|
|
9515
|
+
]) === true) {
|
|
9516
|
+
return true;
|
|
9517
|
+
}
|
|
9518
|
+
const resolution = stringAttr2(event, [
|
|
9519
|
+
"parentResolution",
|
|
9520
|
+
"relationshipResolution",
|
|
9521
|
+
"relationshipStatus"
|
|
9522
|
+
]);
|
|
9523
|
+
return resolution === "unresolved" || resolution === "missing-parent";
|
|
9524
|
+
}
|
|
9525
|
+
function signalName(event, attributeKeys, prefixes) {
|
|
9526
|
+
return stringAttr2(event, attributeKeys) ?? stripPrefix(event.name, prefixes);
|
|
9527
|
+
}
|
|
9528
|
+
function guardrailEvents(context) {
|
|
9529
|
+
return finishedEvents2().filter((event) => {
|
|
9530
|
+
const name = event.name.toLowerCase();
|
|
9531
|
+
if (name.startsWith("guardrail:") || name.includes(".guardrail.")) return true;
|
|
9532
|
+
return stringAttr2(event, ["guardrailName", "guardrail", "guardrailId"]) !== void 0;
|
|
9533
|
+
});
|
|
9534
|
+
function finishedEvents2() {
|
|
9535
|
+
return context.events.filter((event) => event.status !== "running");
|
|
9536
|
+
}
|
|
9537
|
+
}
|
|
9538
|
+
function retryValue(event) {
|
|
9539
|
+
return retryCount(event) ?? 0;
|
|
9540
|
+
}
|
|
9541
|
+
function eventDurationMs(event) {
|
|
9542
|
+
if (event.durationMs !== void 0) return event.durationMs;
|
|
9543
|
+
const start = eventStartMs(event);
|
|
9544
|
+
const end = eventEndMs(event);
|
|
9545
|
+
return start !== void 0 && end !== void 0 && end >= start ? end - start : void 0;
|
|
9546
|
+
}
|
|
9547
|
+
function treeShape(nodes) {
|
|
9548
|
+
const lines = [];
|
|
9549
|
+
const visit = (node, path12) => {
|
|
9550
|
+
lines.push(`${path12}:${node.event.kind}:${node.event.name}:${node.event.status ?? "unknown"}`);
|
|
9551
|
+
node.children.forEach((child, index) => visit(child, `${path12}.${index}`));
|
|
9552
|
+
};
|
|
9553
|
+
nodes.forEach((node, index) => visit(node, String(index)));
|
|
9554
|
+
return lines;
|
|
9555
|
+
}
|
|
9556
|
+
function statusShape(context) {
|
|
9557
|
+
return context.events.map((event) => `${event.kind}:${event.name}:${event.status ?? "unknown"}`).sort((a, b) => a.localeCompare(b));
|
|
9558
|
+
}
|
|
9559
|
+
function toolShape(context) {
|
|
9560
|
+
return finishedEvents(context, "TOOL").map(
|
|
9561
|
+
(event) => [
|
|
9562
|
+
toolName(event),
|
|
9563
|
+
event.status ?? "unknown",
|
|
9564
|
+
retryValue(event),
|
|
9565
|
+
eventDurationMs(event) ?? "unknown"
|
|
9566
|
+
].join(":")
|
|
9567
|
+
);
|
|
9568
|
+
}
|
|
9569
|
+
function llmShape(context) {
|
|
9570
|
+
return finishedEvents(context, "LLM").map(
|
|
9571
|
+
(event) => [
|
|
9572
|
+
llmProvider(event) ?? "unknown",
|
|
9573
|
+
llmModel(event) ?? "unknown",
|
|
9574
|
+
llmFinishReason(event) ?? "unknown",
|
|
9575
|
+
event.tokenUsage?.input ?? 0,
|
|
9576
|
+
event.tokenUsage?.output ?? 0,
|
|
9577
|
+
event.tokenUsage?.total ?? 0,
|
|
9578
|
+
event.tokenUsage?.cached ?? 0
|
|
9579
|
+
].join(":")
|
|
9580
|
+
);
|
|
9581
|
+
}
|
|
9582
|
+
function errorShape(context) {
|
|
9583
|
+
return context.events.filter((event) => event.status === "error" || event.error !== void 0).map(
|
|
9584
|
+
(event) => [
|
|
9585
|
+
event.kind,
|
|
9586
|
+
event.name,
|
|
9587
|
+
event.error?.name ?? "Error",
|
|
9588
|
+
event.error?.code ?? "unknown"
|
|
9589
|
+
].join(":")
|
|
9590
|
+
).sort((a, b) => a.localeCompare(b));
|
|
9591
|
+
}
|
|
9592
|
+
function retrievalShape(context) {
|
|
9593
|
+
return finishedEvents(context, "RETRIEVER").map(
|
|
9594
|
+
(event) => signalName(event, ["retrievalName", "retrieverName", "retriever"], ["retriever:", "retrieval:"])
|
|
9595
|
+
).sort((a, b) => a.localeCompare(b));
|
|
9596
|
+
}
|
|
9597
|
+
function guardrailShape(context) {
|
|
9598
|
+
return guardrailEvents(context).map((event) => signalName(event, ["guardrailName", "guardrail", "guardrailId"], ["guardrail:"])).sort((a, b) => a.localeCompare(b));
|
|
9599
|
+
}
|
|
9600
|
+
function firstEvidenceForKind(context, kind, path12) {
|
|
9601
|
+
const event = context.events.find((candidate) => candidate.kind === kind);
|
|
9602
|
+
return event ? [eventEvidence(event, path12)] : runEvidence(context.selectedRun);
|
|
9603
|
+
}
|
|
9604
|
+
function baselineDiffFinding(message, evidence, expected, actual) {
|
|
9605
|
+
return failFinding("baseline.regression", message, evidence, expected, actual);
|
|
9606
|
+
}
|
|
9607
|
+
function createRunStatusRule(options = {}) {
|
|
9608
|
+
const expected = options.expected ?? "ok";
|
|
9609
|
+
const allowIncomplete = options.allowIncomplete === true;
|
|
9610
|
+
return {
|
|
9611
|
+
id: "run.status",
|
|
9612
|
+
category: "run",
|
|
9613
|
+
defaultSeverity: "error",
|
|
9614
|
+
evaluate(context) {
|
|
9615
|
+
const findings = [];
|
|
9616
|
+
const actual = context.selectedRun?.status ?? "unknown";
|
|
9617
|
+
if (actual !== expected) {
|
|
9618
|
+
findings.push(
|
|
9619
|
+
failFinding(
|
|
9620
|
+
"run.status",
|
|
9621
|
+
`Run status ${actual} did not match expected ${expected}.`,
|
|
9622
|
+
runEvidence(context.selectedRun),
|
|
9623
|
+
expected,
|
|
9624
|
+
actual
|
|
9625
|
+
)
|
|
9626
|
+
);
|
|
9627
|
+
}
|
|
9628
|
+
if (!allowIncomplete) {
|
|
9629
|
+
const running = context.events.filter((event) => event.status === "running");
|
|
9630
|
+
if (running.length > 0) {
|
|
9631
|
+
findings.push(
|
|
9632
|
+
failFinding(
|
|
9633
|
+
"run.status",
|
|
9634
|
+
"Run contains incomplete running events.",
|
|
9635
|
+
running.map((event) => eventEvidence(event)),
|
|
9636
|
+
"no running events",
|
|
9637
|
+
running.length
|
|
9638
|
+
)
|
|
9639
|
+
);
|
|
9640
|
+
}
|
|
9641
|
+
}
|
|
9642
|
+
return findings;
|
|
9643
|
+
}
|
|
9644
|
+
};
|
|
9645
|
+
}
|
|
9646
|
+
function createRunDurationRule(options) {
|
|
9647
|
+
return {
|
|
9648
|
+
id: "run.duration",
|
|
9649
|
+
category: "run",
|
|
9650
|
+
defaultSeverity: "error",
|
|
9651
|
+
evaluate(context) {
|
|
9652
|
+
const actual = context.selectedRun?.durationMs;
|
|
9653
|
+
if (actual === void 0 || actual <= options.maxDurationMs) return [];
|
|
9654
|
+
return [
|
|
9655
|
+
failFinding(
|
|
9656
|
+
"run.duration",
|
|
9657
|
+
`Run duration ${actual}ms exceeded ${options.maxDurationMs}ms.`,
|
|
9658
|
+
runEvidence(context.selectedRun),
|
|
9659
|
+
{ maxDurationMs: options.maxDurationMs },
|
|
9660
|
+
actual
|
|
9661
|
+
)
|
|
9662
|
+
];
|
|
9663
|
+
}
|
|
9664
|
+
};
|
|
9665
|
+
}
|
|
9666
|
+
function createRunDepthRule(options) {
|
|
9667
|
+
return {
|
|
9668
|
+
id: "run.depth",
|
|
9669
|
+
category: "run",
|
|
9670
|
+
defaultSeverity: "error",
|
|
9671
|
+
evaluate(context) {
|
|
9672
|
+
const nodes = [...context.nodesByEventId.values()];
|
|
9673
|
+
const maxDepth = nodes.reduce((max, node) => Math.max(max, node.depth), 0);
|
|
9674
|
+
if (maxDepth <= options.maxDepth) return [];
|
|
9675
|
+
const deepest = nodes.filter((node) => node.depth === maxDepth);
|
|
9676
|
+
return [
|
|
9677
|
+
failFinding(
|
|
9678
|
+
"run.depth",
|
|
9679
|
+
`Run depth ${maxDepth} exceeded ${options.maxDepth}.`,
|
|
9680
|
+
deepest.map((node) => ({
|
|
9681
|
+
runId: node.event.runId,
|
|
9682
|
+
eventId: node.event.eventId,
|
|
9683
|
+
parentId: node.event.parentId,
|
|
9684
|
+
kind: node.event.kind,
|
|
9685
|
+
name: node.event.name,
|
|
9686
|
+
status: node.event.status
|
|
9687
|
+
})),
|
|
9688
|
+
{ maxDepth: options.maxDepth },
|
|
9689
|
+
maxDepth
|
|
9690
|
+
)
|
|
9691
|
+
];
|
|
9692
|
+
}
|
|
9693
|
+
};
|
|
9694
|
+
}
|
|
9695
|
+
function createToolUsageRule(options) {
|
|
9696
|
+
return {
|
|
9697
|
+
id: "tool.usage",
|
|
9698
|
+
category: "tool",
|
|
9699
|
+
defaultSeverity: "error",
|
|
9700
|
+
evaluate(context) {
|
|
9701
|
+
const tools = finishedEvents(context, "TOOL");
|
|
9702
|
+
const names = tools.map(toolName);
|
|
9703
|
+
const nameSet = new Set(names);
|
|
9704
|
+
const findings = [];
|
|
9705
|
+
for (const required of options.required ?? []) {
|
|
9706
|
+
if (!nameSet.has(required)) {
|
|
9707
|
+
findings.push(
|
|
9708
|
+
failFinding("tool.usage", `Required tool ${required} did not appear.`, runEvidence(context.selectedRun), required, names)
|
|
9709
|
+
);
|
|
9710
|
+
}
|
|
9711
|
+
}
|
|
9712
|
+
const forbidden = new Set(options.forbidden ?? []);
|
|
9713
|
+
const allowed = options.allowed ? new Set(options.allowed) : void 0;
|
|
9714
|
+
for (const event of tools) {
|
|
9715
|
+
const name = toolName(event);
|
|
9716
|
+
if (forbidden.has(name)) {
|
|
9717
|
+
findings.push(
|
|
9718
|
+
failFinding("tool.usage", `Forbidden tool ${name} appeared.`, [eventEvidence(event)], "tool absent", name)
|
|
9719
|
+
);
|
|
9720
|
+
}
|
|
9721
|
+
if (allowed && !allowed.has(name)) {
|
|
9722
|
+
findings.push(
|
|
9723
|
+
failFinding("tool.usage", `Tool ${name} is not in the allowed tool set.`, [eventEvidence(event)], [...allowed].sort(), name)
|
|
9724
|
+
);
|
|
9725
|
+
}
|
|
9726
|
+
}
|
|
9727
|
+
if (options.minCount !== void 0 && tools.length < options.minCount) {
|
|
9728
|
+
findings.push(
|
|
9729
|
+
failFinding("tool.usage", `Tool count ${tools.length} was below minimum ${options.minCount}.`, runEvidence(context.selectedRun), { minCount: options.minCount }, tools.length)
|
|
9730
|
+
);
|
|
9731
|
+
}
|
|
9732
|
+
if (options.maxCount !== void 0 && tools.length > options.maxCount) {
|
|
9733
|
+
findings.push(
|
|
9734
|
+
failFinding("tool.usage", `Tool count ${tools.length} exceeded maximum ${options.maxCount}.`, tools.map((event) => eventEvidence(event)), { maxCount: options.maxCount }, tools.length)
|
|
9735
|
+
);
|
|
9736
|
+
}
|
|
9737
|
+
return findings;
|
|
9738
|
+
}
|
|
9739
|
+
};
|
|
9740
|
+
}
|
|
9741
|
+
function createLlmUsageRule(options) {
|
|
9742
|
+
return {
|
|
9743
|
+
id: "llm.usage",
|
|
9744
|
+
category: "llm",
|
|
9745
|
+
defaultSeverity: "error",
|
|
9746
|
+
evaluate(context) {
|
|
9747
|
+
const llms = finishedEvents(context, "LLM");
|
|
9748
|
+
const findings = [];
|
|
9749
|
+
const allowedModels = options.allowedModels ? new Set(options.allowedModels) : void 0;
|
|
9750
|
+
const allowedProviders = options.allowedProviders ? new Set(options.allowedProviders) : void 0;
|
|
9751
|
+
const finishReasons = options.finishReasons ? new Set(options.finishReasons) : void 0;
|
|
9752
|
+
if (options.maxCalls !== void 0 && llms.length > options.maxCalls) {
|
|
9753
|
+
findings.push(
|
|
9754
|
+
failFinding(
|
|
9755
|
+
"llm.usage",
|
|
9756
|
+
`LLM call count ${llms.length} exceeded ${options.maxCalls}.`,
|
|
9757
|
+
llms.map((event) => eventEvidence(event)),
|
|
9758
|
+
{ maxCalls: options.maxCalls },
|
|
9759
|
+
llms.length
|
|
9760
|
+
)
|
|
9761
|
+
);
|
|
9762
|
+
}
|
|
9763
|
+
for (const event of llms) {
|
|
9764
|
+
const model = llmModel(event);
|
|
9765
|
+
const provider = llmProvider(event);
|
|
9766
|
+
const finishReason = llmFinishReason(event);
|
|
9767
|
+
if (allowedModels && (!model || !allowedModels.has(model))) {
|
|
9768
|
+
findings.push(
|
|
9769
|
+
failFinding("llm.usage", `LLM model ${model ?? "unknown"} is not allowed.`, [eventEvidence(event, "attributes.model")], [...allowedModels].sort(), model ?? "unknown")
|
|
9770
|
+
);
|
|
9771
|
+
}
|
|
9772
|
+
if (allowedProviders && (!provider || !allowedProviders.has(provider))) {
|
|
9773
|
+
findings.push(
|
|
9774
|
+
failFinding("llm.usage", `LLM provider ${provider ?? "unknown"} is not allowed.`, [eventEvidence(event, "attributes.provider")], [...allowedProviders].sort(), provider ?? "unknown")
|
|
9775
|
+
);
|
|
9776
|
+
}
|
|
9777
|
+
if (finishReasons && (!finishReason || !finishReasons.has(finishReason))) {
|
|
9778
|
+
findings.push(
|
|
9779
|
+
failFinding("llm.usage", `LLM finish reason ${finishReason ?? "unknown"} is not allowed.`, [eventEvidence(event, "attributes.finishReason")], [...finishReasons].sort(), finishReason ?? "unknown")
|
|
9780
|
+
);
|
|
9781
|
+
}
|
|
9782
|
+
}
|
|
9783
|
+
const tokenTotals = llms.reduce(
|
|
9784
|
+
(totals, event) => ({
|
|
9785
|
+
input: totals.input + (event.tokenUsage?.input ?? 0),
|
|
9786
|
+
output: totals.output + (event.tokenUsage?.output ?? 0),
|
|
9787
|
+
total: totals.total + (event.tokenUsage?.total ?? 0),
|
|
9788
|
+
cached: totals.cached + (event.tokenUsage?.cached ?? 0)
|
|
9789
|
+
}),
|
|
9790
|
+
{ input: 0, output: 0, total: 0, cached: 0 }
|
|
9791
|
+
);
|
|
9792
|
+
const tokenLimits = [
|
|
9793
|
+
["input", options.maxInputTokens],
|
|
9794
|
+
["output", options.maxOutputTokens],
|
|
9795
|
+
["total", options.maxTotalTokens],
|
|
9796
|
+
["cached", options.maxCachedTokens]
|
|
9797
|
+
];
|
|
9798
|
+
for (const [key, limit] of tokenLimits) {
|
|
9799
|
+
if (limit !== void 0 && tokenTotals[key] > limit) {
|
|
9800
|
+
findings.push(
|
|
9801
|
+
failFinding(
|
|
9802
|
+
"llm.usage",
|
|
9803
|
+
`LLM ${key} token count ${tokenTotals[key]} exceeded ${limit}.`,
|
|
9804
|
+
llms.map((event) => eventEvidence(event, `tokenUsage.${key}`)),
|
|
9805
|
+
{ [`max${key[0].toUpperCase()}${key.slice(1)}Tokens`]: limit },
|
|
9806
|
+
tokenTotals[key]
|
|
9807
|
+
)
|
|
9808
|
+
);
|
|
9809
|
+
}
|
|
9810
|
+
}
|
|
9811
|
+
return findings;
|
|
9812
|
+
}
|
|
9813
|
+
};
|
|
9814
|
+
}
|
|
9815
|
+
function createStructureOrphanRule(options = {}) {
|
|
9816
|
+
const allowMarkedUnresolved = options.allowMarkedUnresolved ?? true;
|
|
9817
|
+
return {
|
|
9818
|
+
id: "structure.orphan",
|
|
9819
|
+
category: "structure",
|
|
9820
|
+
defaultSeverity: "error",
|
|
9821
|
+
evaluate(context) {
|
|
9822
|
+
const byId = eventMap(context.events);
|
|
9823
|
+
const orphans = context.events.filter((event) => {
|
|
9824
|
+
if (!event.parentId || byId.has(event.parentId)) return false;
|
|
9825
|
+
return !(allowMarkedUnresolved && parentMarkedUnresolved(event));
|
|
9826
|
+
});
|
|
9827
|
+
if (orphans.length === 0) return [];
|
|
9828
|
+
return [
|
|
9829
|
+
failFinding(
|
|
9830
|
+
"structure.orphan",
|
|
9831
|
+
"Trace contains events whose parentId is not present in the selected run.",
|
|
9832
|
+
orphans.map((event) => eventEvidence(event, "parentId")),
|
|
9833
|
+
"parentId resolves to an event in the selected run",
|
|
9834
|
+
orphans.length
|
|
9835
|
+
)
|
|
9836
|
+
];
|
|
9837
|
+
}
|
|
9838
|
+
};
|
|
9839
|
+
}
|
|
9840
|
+
function createStructureCycleRule() {
|
|
9841
|
+
return {
|
|
9842
|
+
id: "structure.cycle",
|
|
9843
|
+
category: "structure",
|
|
9844
|
+
defaultSeverity: "error",
|
|
9845
|
+
evaluate(context) {
|
|
9846
|
+
const byId = eventMap(context.events);
|
|
9847
|
+
const seenCycles = /* @__PURE__ */ new Set();
|
|
9848
|
+
const findings = [];
|
|
9849
|
+
for (const event of [...context.events].sort((a, b) => a.eventId.localeCompare(b.eventId))) {
|
|
9850
|
+
const path12 = [];
|
|
9851
|
+
const seenAt = /* @__PURE__ */ new Map();
|
|
9852
|
+
let current = event;
|
|
9853
|
+
while (current) {
|
|
9854
|
+
const existing = seenAt.get(current.eventId);
|
|
9855
|
+
if (existing !== void 0) {
|
|
9856
|
+
const cycle = path12.slice(existing);
|
|
9857
|
+
const key = cycle.map((item) => item.eventId).sort().join("\0");
|
|
9858
|
+
if (!seenCycles.has(key)) {
|
|
9859
|
+
seenCycles.add(key);
|
|
9860
|
+
findings.push(
|
|
9861
|
+
failFinding(
|
|
9862
|
+
"structure.cycle",
|
|
9863
|
+
"Trace contains a parentId cycle.",
|
|
9864
|
+
cycle.map((item) => eventEvidence(item, "parentId")),
|
|
9865
|
+
"acyclic parentId graph",
|
|
9866
|
+
cycle.map((item) => item.eventId).sort()
|
|
9867
|
+
)
|
|
9868
|
+
);
|
|
9869
|
+
}
|
|
9870
|
+
break;
|
|
9871
|
+
}
|
|
9872
|
+
seenAt.set(current.eventId, path12.length);
|
|
9873
|
+
path12.push(current);
|
|
9874
|
+
current = current.parentId ? byId.get(current.parentId) : void 0;
|
|
9875
|
+
}
|
|
9876
|
+
}
|
|
9877
|
+
return findings;
|
|
9878
|
+
}
|
|
9879
|
+
};
|
|
9880
|
+
}
|
|
9881
|
+
function createStructureRelationshipRule(options = {}) {
|
|
9882
|
+
return {
|
|
9883
|
+
id: "structure.relationship",
|
|
9884
|
+
category: "structure",
|
|
9885
|
+
defaultSeverity: "error",
|
|
9886
|
+
evaluate(context) {
|
|
9887
|
+
const byId = eventMap(context.events);
|
|
9888
|
+
const findings = [];
|
|
9889
|
+
const minConfidence = options.minConfidence;
|
|
9890
|
+
for (const event of context.events) {
|
|
9891
|
+
if (minConfidence && CONFIDENCE_RANK[event.confidence] < CONFIDENCE_RANK[minConfidence]) {
|
|
9892
|
+
findings.push(
|
|
9893
|
+
failFinding(
|
|
9894
|
+
"structure.relationship",
|
|
9895
|
+
`Event confidence ${event.confidence} is below ${minConfidence}.`,
|
|
9896
|
+
[eventEvidence(event, "confidence")],
|
|
9897
|
+
{ minConfidence },
|
|
9898
|
+
event.confidence
|
|
9899
|
+
)
|
|
9900
|
+
);
|
|
9901
|
+
}
|
|
9902
|
+
if (!event.parentId) continue;
|
|
9903
|
+
if (event.parentId === event.eventId) {
|
|
9904
|
+
findings.push(
|
|
9905
|
+
failFinding(
|
|
9906
|
+
"structure.relationship",
|
|
9907
|
+
"Event parentId points to itself.",
|
|
9908
|
+
[eventEvidence(event, "parentId")],
|
|
9909
|
+
"parentId references a distinct event",
|
|
9910
|
+
"self"
|
|
9911
|
+
)
|
|
9912
|
+
);
|
|
9913
|
+
continue;
|
|
9914
|
+
}
|
|
9915
|
+
const parent = byId.get(event.parentId);
|
|
9916
|
+
if (!parent) continue;
|
|
9917
|
+
if (options.requireParentBeforeChild) {
|
|
9918
|
+
const parentTime = eventStartMs(parent);
|
|
9919
|
+
const childTime = eventStartMs(event);
|
|
9920
|
+
if (parentTime !== void 0 && childTime !== void 0 && parentTime > childTime) {
|
|
9921
|
+
findings.push(
|
|
9922
|
+
failFinding(
|
|
9923
|
+
"structure.relationship",
|
|
9924
|
+
"Parent event starts after child event.",
|
|
9925
|
+
[eventEvidence(parent), eventEvidence(event, "parentId")],
|
|
9926
|
+
"parent start <= child start",
|
|
9927
|
+
{ parentEventId: parent.eventId, childEventId: event.eventId }
|
|
9928
|
+
)
|
|
9929
|
+
);
|
|
9930
|
+
}
|
|
9931
|
+
}
|
|
9932
|
+
if (options.requireTraceParentSpan && parent.trace?.spanId && event.trace) {
|
|
9933
|
+
const actual = event.trace.parentSpanId;
|
|
9934
|
+
if (actual !== parent.trace.spanId) {
|
|
9935
|
+
findings.push(
|
|
9936
|
+
failFinding(
|
|
9937
|
+
"structure.relationship",
|
|
9938
|
+
"Trace parentSpanId does not match parent spanId.",
|
|
9939
|
+
[eventEvidence(event, "trace.parentSpanId")],
|
|
9940
|
+
{ parentSpanId: parent.trace.spanId },
|
|
9941
|
+
actual ?? "missing"
|
|
9942
|
+
)
|
|
9943
|
+
);
|
|
9944
|
+
}
|
|
9945
|
+
}
|
|
9946
|
+
}
|
|
9947
|
+
return findings;
|
|
9948
|
+
}
|
|
9949
|
+
};
|
|
9950
|
+
}
|
|
9951
|
+
function createStructureParallelWidthRule(options) {
|
|
9952
|
+
return {
|
|
9953
|
+
id: "structure.parallelWidth",
|
|
9954
|
+
category: "structure",
|
|
9955
|
+
defaultSeverity: "error",
|
|
9956
|
+
evaluate(context) {
|
|
9957
|
+
const findings = [];
|
|
9958
|
+
const byId = eventMap(context.events);
|
|
9959
|
+
if (options.maxChildren !== void 0) {
|
|
9960
|
+
for (const [parentId, children] of context.childrenByParentId.entries()) {
|
|
9961
|
+
if (children.length <= options.maxChildren) continue;
|
|
9962
|
+
const parent = byId.get(parentId);
|
|
9963
|
+
findings.push(
|
|
9964
|
+
failFinding(
|
|
9965
|
+
"structure.parallelWidth",
|
|
9966
|
+
`Parent ${parentId} has ${children.length} children, exceeding ${options.maxChildren}.`,
|
|
9967
|
+
[
|
|
9968
|
+
...parent ? [eventEvidence(parent)] : [{ runId: context.selectedRun?.runId, eventId: parentId }],
|
|
9969
|
+
...children.map((child) => ({
|
|
9970
|
+
runId: child.event.runId,
|
|
9971
|
+
eventId: child.event.eventId,
|
|
9972
|
+
parentId: child.event.parentId,
|
|
9973
|
+
kind: child.event.kind,
|
|
9974
|
+
name: child.event.name,
|
|
9975
|
+
status: child.event.status
|
|
9976
|
+
}))
|
|
9977
|
+
],
|
|
9978
|
+
{ maxChildren: options.maxChildren },
|
|
9979
|
+
children.length
|
|
9980
|
+
)
|
|
9981
|
+
);
|
|
9982
|
+
}
|
|
9983
|
+
}
|
|
9984
|
+
if (options.maxConcurrent !== void 0) {
|
|
9985
|
+
const intervals = context.events.map((event) => ({ event, start: eventStartMs(event), end: eventEndMs(event) })).filter(
|
|
9986
|
+
(item) => item.start !== void 0 && item.end !== void 0 && item.end > item.start
|
|
9987
|
+
);
|
|
9988
|
+
const points = intervals.flatMap((item) => [
|
|
9989
|
+
{ time: item.start, delta: 1, event: item.event },
|
|
9990
|
+
{ time: item.end, delta: -1, event: item.event }
|
|
9991
|
+
]);
|
|
9992
|
+
points.sort((a, b) => {
|
|
9993
|
+
const byTime = a.time - b.time;
|
|
9994
|
+
if (byTime !== 0) return byTime;
|
|
9995
|
+
const byDelta = a.delta - b.delta;
|
|
9996
|
+
if (byDelta !== 0) return byDelta;
|
|
9997
|
+
return a.event.eventId.localeCompare(b.event.eventId);
|
|
9998
|
+
});
|
|
9999
|
+
const active = /* @__PURE__ */ new Map();
|
|
10000
|
+
let maxActive = [];
|
|
10001
|
+
for (const point of points) {
|
|
10002
|
+
if (point.delta > 0) {
|
|
10003
|
+
active.set(point.event.eventId, point.event);
|
|
10004
|
+
if (active.size > maxActive.length) {
|
|
10005
|
+
maxActive = [...active.values()].sort((a, b) => a.eventId.localeCompare(b.eventId));
|
|
10006
|
+
}
|
|
10007
|
+
} else {
|
|
10008
|
+
active.delete(point.event.eventId);
|
|
10009
|
+
}
|
|
10010
|
+
}
|
|
10011
|
+
if (maxActive.length > options.maxConcurrent) {
|
|
10012
|
+
findings.push(
|
|
10013
|
+
failFinding(
|
|
10014
|
+
"structure.parallelWidth",
|
|
10015
|
+
`Concurrent event width ${maxActive.length} exceeded ${options.maxConcurrent}.`,
|
|
10016
|
+
maxActive.map((event) => eventEvidence(event)),
|
|
10017
|
+
{ maxConcurrent: options.maxConcurrent },
|
|
10018
|
+
maxActive.length
|
|
10019
|
+
)
|
|
10020
|
+
);
|
|
10021
|
+
}
|
|
10022
|
+
}
|
|
10023
|
+
return findings;
|
|
10024
|
+
}
|
|
10025
|
+
};
|
|
10026
|
+
}
|
|
10027
|
+
function createSafetyRedactionRule(options = {}) {
|
|
10028
|
+
const sensitiveKeys = options.sensitiveKeys ?? DEFAULT_SENSITIVE_KEYS;
|
|
10029
|
+
const markers = options.redactedMarkers ?? ["[REDACTED]", "[REDACTED:"];
|
|
10030
|
+
return {
|
|
10031
|
+
id: "safety.redaction",
|
|
10032
|
+
category: "safety",
|
|
10033
|
+
defaultSeverity: "error",
|
|
10034
|
+
evaluate(context) {
|
|
10035
|
+
const findings = [];
|
|
10036
|
+
for (const event of context.events) {
|
|
10037
|
+
for (const entry of eventValueEntries(event, { includeSummaries: true, includeError: true })) {
|
|
10038
|
+
if (!isSensitiveKey(entry.key ?? lastPathSegment(entry.path), sensitiveKeys)) continue;
|
|
10039
|
+
if (typeof entry.value === "string" && hasRedactionMarker(entry.value, markers)) continue;
|
|
10040
|
+
findings.push(
|
|
10041
|
+
failFinding(
|
|
10042
|
+
"safety.redaction",
|
|
10043
|
+
`Sensitive-looking field at ${entry.path} is not redacted.`,
|
|
10044
|
+
[eventEvidence(event, entry.path)],
|
|
10045
|
+
"redaction marker",
|
|
10046
|
+
{ path: entry.path, valueType: valueType(entry.value) }
|
|
10047
|
+
)
|
|
10048
|
+
);
|
|
10049
|
+
}
|
|
10050
|
+
}
|
|
10051
|
+
return limitFindings(findings, options.maxFindings);
|
|
10052
|
+
}
|
|
10053
|
+
};
|
|
10054
|
+
}
|
|
10055
|
+
function createSafetyRawContentRule(options = {}) {
|
|
10056
|
+
const forbiddenKeys = options.forbiddenKeys ?? DEFAULT_RAW_CONTENT_KEYS;
|
|
10057
|
+
return {
|
|
10058
|
+
id: "safety.rawPrompt",
|
|
10059
|
+
category: "safety",
|
|
10060
|
+
defaultSeverity: "error",
|
|
10061
|
+
evaluate(context) {
|
|
10062
|
+
const findings = [];
|
|
10063
|
+
for (const event of context.events) {
|
|
10064
|
+
for (const entry of eventValueEntries(event, { includeSummaries: options.includeSummaries })) {
|
|
10065
|
+
const key = entry.key ?? lastPathSegment(entry.path);
|
|
10066
|
+
if (!isRawContentKey(key, forbiddenKeys)) continue;
|
|
10067
|
+
findings.push(
|
|
10068
|
+
failFinding(
|
|
10069
|
+
"safety.rawPrompt",
|
|
10070
|
+
`Raw content-like field ${entry.path} is present.`,
|
|
10071
|
+
[eventEvidence(event, entry.path)],
|
|
10072
|
+
"metadata-only trace fields",
|
|
10073
|
+
{ path: entry.path, valueType: valueType(entry.value) }
|
|
10074
|
+
)
|
|
10075
|
+
);
|
|
10076
|
+
}
|
|
10077
|
+
}
|
|
10078
|
+
return limitFindings(findings, options.maxFindings);
|
|
10079
|
+
}
|
|
10080
|
+
};
|
|
10081
|
+
}
|
|
10082
|
+
function createSafetySecretPatternRule(options = {}) {
|
|
10083
|
+
const patterns = options.patterns ?? DEFAULT_SECRET_PATTERNS;
|
|
10084
|
+
const maxStringLength = options.maxStringLength ?? 4096;
|
|
10085
|
+
return {
|
|
10086
|
+
id: "safety.secretPattern",
|
|
10087
|
+
category: "safety",
|
|
10088
|
+
defaultSeverity: "error",
|
|
10089
|
+
evaluate(context) {
|
|
10090
|
+
const findings = [];
|
|
10091
|
+
for (const event of context.events) {
|
|
10092
|
+
for (const entry of eventValueEntries(event, { includeSummaries: true, includeError: true })) {
|
|
10093
|
+
if (typeof entry.value !== "string") continue;
|
|
10094
|
+
const sample = entry.value.slice(0, maxStringLength);
|
|
10095
|
+
for (const pattern of patterns) {
|
|
10096
|
+
pattern.pattern.lastIndex = 0;
|
|
10097
|
+
if (!pattern.pattern.test(sample)) continue;
|
|
10098
|
+
pattern.pattern.lastIndex = 0;
|
|
10099
|
+
findings.push(
|
|
10100
|
+
failFinding(
|
|
10101
|
+
"safety.secretPattern",
|
|
10102
|
+
`Secret-like pattern ${pattern.id} matched at ${entry.path}.`,
|
|
10103
|
+
[eventEvidence(event, entry.path)],
|
|
10104
|
+
"no secret-like strings",
|
|
10105
|
+
{ pattern: pattern.id, path: entry.path }
|
|
10106
|
+
)
|
|
10107
|
+
);
|
|
10108
|
+
break;
|
|
10109
|
+
}
|
|
10110
|
+
}
|
|
10111
|
+
}
|
|
10112
|
+
return limitFindings(findings, options.maxFindings);
|
|
10113
|
+
}
|
|
10114
|
+
};
|
|
10115
|
+
}
|
|
10116
|
+
function createSafetyOversizedAttributeRule(options) {
|
|
10117
|
+
return {
|
|
10118
|
+
id: "safety.oversizedAttribute",
|
|
10119
|
+
category: "safety",
|
|
10120
|
+
defaultSeverity: "error",
|
|
10121
|
+
evaluate(context) {
|
|
10122
|
+
const findings = [];
|
|
10123
|
+
for (const event of context.events) {
|
|
10124
|
+
for (const entry of eventValueEntries(event, { includeSummaries: true, includeError: true })) {
|
|
10125
|
+
if (typeof entry.value === "string" && options.maxStringLength !== void 0 && entry.value.length > options.maxStringLength) {
|
|
10126
|
+
findings.push(
|
|
10127
|
+
failFinding(
|
|
10128
|
+
"safety.oversizedAttribute",
|
|
10129
|
+
`String at ${entry.path} exceeds ${options.maxStringLength} characters.`,
|
|
10130
|
+
[eventEvidence(event, entry.path)],
|
|
10131
|
+
{ maxStringLength: options.maxStringLength },
|
|
10132
|
+
{ path: entry.path, length: entry.value.length }
|
|
10133
|
+
)
|
|
10134
|
+
);
|
|
10135
|
+
}
|
|
10136
|
+
if (Array.isArray(entry.value) && options.maxArrayLength !== void 0 && entry.value.length > options.maxArrayLength) {
|
|
10137
|
+
findings.push(
|
|
10138
|
+
failFinding(
|
|
10139
|
+
"safety.oversizedAttribute",
|
|
10140
|
+
`Array at ${entry.path} exceeds ${options.maxArrayLength} items.`,
|
|
10141
|
+
[eventEvidence(event, entry.path)],
|
|
10142
|
+
{ maxArrayLength: options.maxArrayLength },
|
|
10143
|
+
{ path: entry.path, length: entry.value.length }
|
|
10144
|
+
)
|
|
10145
|
+
);
|
|
10146
|
+
}
|
|
10147
|
+
if (isRecord14(entry.value) && options.maxObjectKeys !== void 0 && Object.keys(entry.value).length > options.maxObjectKeys) {
|
|
10148
|
+
findings.push(
|
|
10149
|
+
failFinding(
|
|
10150
|
+
"safety.oversizedAttribute",
|
|
10151
|
+
`Object at ${entry.path} exceeds ${options.maxObjectKeys} keys.`,
|
|
10152
|
+
[eventEvidence(event, entry.path)],
|
|
10153
|
+
{ maxObjectKeys: options.maxObjectKeys },
|
|
10154
|
+
{ path: entry.path, keys: Object.keys(entry.value).length }
|
|
10155
|
+
)
|
|
10156
|
+
);
|
|
10157
|
+
}
|
|
10158
|
+
if (options.maxSerializedBytes !== void 0) {
|
|
10159
|
+
const bytes = serializedByteLength(entry.value);
|
|
10160
|
+
if (bytes !== void 0 && bytes > options.maxSerializedBytes) {
|
|
10161
|
+
findings.push(
|
|
10162
|
+
failFinding(
|
|
10163
|
+
"safety.oversizedAttribute",
|
|
10164
|
+
`Value at ${entry.path} exceeds ${options.maxSerializedBytes} serialized bytes.`,
|
|
10165
|
+
[eventEvidence(event, entry.path)],
|
|
10166
|
+
{ maxSerializedBytes: options.maxSerializedBytes },
|
|
10167
|
+
{ path: entry.path, bytes }
|
|
10168
|
+
)
|
|
10169
|
+
);
|
|
10170
|
+
}
|
|
10171
|
+
}
|
|
10172
|
+
}
|
|
10173
|
+
}
|
|
10174
|
+
return limitFindings(findings, options.maxFindings);
|
|
10175
|
+
}
|
|
10176
|
+
};
|
|
10177
|
+
}
|
|
10178
|
+
function createBaselineRegressionRule(options) {
|
|
10179
|
+
return {
|
|
10180
|
+
id: "baseline.regression",
|
|
10181
|
+
category: "baseline",
|
|
10182
|
+
defaultSeverity: "error",
|
|
10183
|
+
evaluate(context) {
|
|
10184
|
+
const baselineSelection = resolveSelectedRun(options.baseline, options.baselineRunId);
|
|
10185
|
+
if (baselineSelection.diagnostics.length > 0 || !baselineSelection.run) {
|
|
10186
|
+
return [
|
|
10187
|
+
failFinding(
|
|
10188
|
+
"baseline.regression",
|
|
10189
|
+
"Baseline run could not be selected.",
|
|
10190
|
+
runEvidence(context.selectedRun),
|
|
10191
|
+
"selectable baseline run",
|
|
10192
|
+
baselineSelection.diagnostics.map((item) => item.code)
|
|
10193
|
+
)
|
|
10194
|
+
];
|
|
10195
|
+
}
|
|
10196
|
+
const baselineFacts = buildFacts(options.baseline, baselineSelection.run);
|
|
10197
|
+
const baselineContext = {
|
|
10198
|
+
...baselineFacts,
|
|
10199
|
+
selectedRun: baselineSelection.run,
|
|
10200
|
+
sourceLabel: options.baseline.sourceLabel
|
|
10201
|
+
};
|
|
10202
|
+
const findings = [];
|
|
10203
|
+
const durationToleranceMs = options.durationToleranceMs ?? 0;
|
|
10204
|
+
if (baselineContext.format !== context.format) {
|
|
10205
|
+
findings.push(
|
|
10206
|
+
baselineDiffFinding(
|
|
10207
|
+
"Trace format differs from baseline.",
|
|
10208
|
+
runEvidence(context.selectedRun),
|
|
10209
|
+
baselineContext.format,
|
|
10210
|
+
context.format
|
|
10211
|
+
)
|
|
10212
|
+
);
|
|
10213
|
+
}
|
|
10214
|
+
const baselineRunStatus = baselineContext.selectedRun?.status ?? "unknown";
|
|
10215
|
+
const candidateRunStatus = context.selectedRun?.status ?? "unknown";
|
|
10216
|
+
if (baselineRunStatus !== candidateRunStatus) {
|
|
10217
|
+
findings.push(
|
|
10218
|
+
baselineDiffFinding(
|
|
10219
|
+
"Run status differs from baseline.",
|
|
10220
|
+
runEvidence(context.selectedRun),
|
|
10221
|
+
baselineRunStatus,
|
|
10222
|
+
candidateRunStatus
|
|
10223
|
+
)
|
|
10224
|
+
);
|
|
10225
|
+
}
|
|
10226
|
+
const baselineDuration = baselineContext.selectedRun?.durationMs;
|
|
10227
|
+
const candidateDuration = context.selectedRun?.durationMs;
|
|
10228
|
+
if (baselineDuration !== void 0 && candidateDuration !== void 0 && Math.abs(candidateDuration - baselineDuration) > durationToleranceMs) {
|
|
10229
|
+
findings.push(
|
|
10230
|
+
baselineDiffFinding(
|
|
10231
|
+
"Run duration differs from baseline beyond tolerance.",
|
|
10232
|
+
runEvidence(context.selectedRun),
|
|
10233
|
+
{ durationMs: baselineDuration, toleranceMs: durationToleranceMs },
|
|
10234
|
+
candidateDuration
|
|
10235
|
+
)
|
|
10236
|
+
);
|
|
10237
|
+
}
|
|
10238
|
+
const comparisons = [
|
|
10239
|
+
{
|
|
10240
|
+
label: "Tree shape",
|
|
10241
|
+
path: "tree",
|
|
10242
|
+
expected: treeShape(baselineContext.rootNodes),
|
|
10243
|
+
actual: treeShape(context.rootNodes),
|
|
10244
|
+
evidence: runEvidence(context.selectedRun)
|
|
10245
|
+
},
|
|
10246
|
+
{
|
|
10247
|
+
label: "Event statuses",
|
|
10248
|
+
path: "status",
|
|
10249
|
+
expected: statusShape(baselineContext),
|
|
10250
|
+
actual: statusShape(context),
|
|
10251
|
+
evidence: runEvidence(context.selectedRun)
|
|
10252
|
+
},
|
|
10253
|
+
{
|
|
10254
|
+
label: "Tool usage",
|
|
10255
|
+
path: "tool",
|
|
10256
|
+
expected: toolShape(baselineContext),
|
|
10257
|
+
actual: toolShape(context),
|
|
10258
|
+
evidence: firstEvidenceForKind(context, "TOOL", "tool")
|
|
10259
|
+
},
|
|
10260
|
+
{
|
|
10261
|
+
label: "LLM usage",
|
|
10262
|
+
path: "llm",
|
|
10263
|
+
expected: llmShape(baselineContext),
|
|
10264
|
+
actual: llmShape(context),
|
|
10265
|
+
evidence: firstEvidenceForKind(context, "LLM", "llm")
|
|
10266
|
+
},
|
|
10267
|
+
{
|
|
10268
|
+
label: "Error profile",
|
|
10269
|
+
path: "error",
|
|
10270
|
+
expected: errorShape(baselineContext),
|
|
10271
|
+
actual: errorShape(context),
|
|
10272
|
+
evidence: firstEvidenceForKind(context, "ERROR", "error")
|
|
10273
|
+
},
|
|
10274
|
+
{
|
|
10275
|
+
label: "Retrieval signals",
|
|
10276
|
+
path: "retrieval",
|
|
10277
|
+
expected: retrievalShape(baselineContext),
|
|
10278
|
+
actual: retrievalShape(context),
|
|
10279
|
+
evidence: firstEvidenceForKind(context, "RETRIEVER", "retrieval")
|
|
10280
|
+
},
|
|
10281
|
+
{
|
|
10282
|
+
label: "Guardrail signals",
|
|
10283
|
+
path: "guardrail",
|
|
10284
|
+
expected: guardrailShape(baselineContext),
|
|
10285
|
+
actual: guardrailShape(context),
|
|
10286
|
+
evidence: guardrailEvents(context)[0] ? [eventEvidence(guardrailEvents(context)[0], "guardrail")] : runEvidence(context.selectedRun)
|
|
10287
|
+
}
|
|
10288
|
+
];
|
|
10289
|
+
for (const comparison of comparisons) {
|
|
10290
|
+
if (JSON.stringify(comparison.expected) === JSON.stringify(comparison.actual)) {
|
|
10291
|
+
continue;
|
|
10292
|
+
}
|
|
10293
|
+
findings.push(
|
|
10294
|
+
baselineDiffFinding(
|
|
10295
|
+
`${comparison.label} differs from baseline.`,
|
|
10296
|
+
comparison.evidence.length > 0 ? comparison.evidence : [{ runId: context.selectedRun?.runId, path: comparison.path }],
|
|
10297
|
+
comparison.expected,
|
|
10298
|
+
comparison.actual
|
|
10299
|
+
)
|
|
10300
|
+
);
|
|
10301
|
+
}
|
|
10302
|
+
return findings;
|
|
10303
|
+
}
|
|
10304
|
+
};
|
|
10305
|
+
}
|
|
10306
|
+
function runTraceChecks(input3, options = {}) {
|
|
10307
|
+
const selected = resolveSelectedRun(input3, options.runId);
|
|
10308
|
+
if (selected.diagnostics.length > 0) {
|
|
10309
|
+
return errorResult(input3, selected.diagnostics, selected.run);
|
|
10310
|
+
}
|
|
10311
|
+
const rules = selectRules(options.rules ?? [], options.select);
|
|
10312
|
+
if (rules.diagnostics.length > 0) {
|
|
10313
|
+
return errorResult(input3, rules.diagnostics, selected.run);
|
|
10314
|
+
}
|
|
10315
|
+
const facts = buildFacts(input3, selected.run);
|
|
10316
|
+
const context = {
|
|
10317
|
+
...facts,
|
|
10318
|
+
...selected.run ? { selectedRun: selected.run } : {},
|
|
10319
|
+
...input3.sourceLabel ? { sourceLabel: input3.sourceLabel } : {}
|
|
10320
|
+
};
|
|
10321
|
+
const diagnostics = [];
|
|
10322
|
+
const findings = [];
|
|
10323
|
+
for (const rule of rules.rules) {
|
|
10324
|
+
try {
|
|
10325
|
+
findings.push(...rule.evaluate(context).map((finding) => normalizeFinding(rule, finding)));
|
|
10326
|
+
} catch (error) {
|
|
10327
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
10328
|
+
diagnostics.push(
|
|
10329
|
+
diagnostic("AI_CHECK_INTERNAL_ERROR", `Rule ${rule.id} failed: ${message}`, rule.id)
|
|
10330
|
+
);
|
|
10331
|
+
}
|
|
10332
|
+
}
|
|
10333
|
+
if (diagnostics.length > 0) {
|
|
10334
|
+
return errorResult(input3, diagnostics, selected.run);
|
|
10335
|
+
}
|
|
10336
|
+
const eventById = new Map(input3.read.events.map((event) => [event.eventId, event]));
|
|
10337
|
+
const sortedFindings = findings.sort(compareFindings(eventById));
|
|
10338
|
+
const summary = summarize(sortedFindings, diagnostics);
|
|
10339
|
+
const status = summary.failed > 0 ? "fail" : "pass";
|
|
10340
|
+
return {
|
|
10341
|
+
ok: status === "pass",
|
|
10342
|
+
status,
|
|
10343
|
+
format: input3.read.format,
|
|
10344
|
+
...selected.run ? { runId: selected.run.runId } : {},
|
|
10345
|
+
summary,
|
|
10346
|
+
findings: sortedFindings,
|
|
10347
|
+
diagnostics
|
|
10348
|
+
};
|
|
10349
|
+
}
|
|
10350
|
+
async function readStdin2(stdin) {
|
|
10351
|
+
stdin.setEncoding("utf8");
|
|
10352
|
+
let content = "";
|
|
10353
|
+
for await (const chunk of stdin) {
|
|
10354
|
+
content += typeof chunk === "string" ? chunk : String(chunk);
|
|
10355
|
+
}
|
|
10356
|
+
return content;
|
|
10357
|
+
}
|
|
10358
|
+
function isMissingFileError2(error) {
|
|
10359
|
+
return error !== null && typeof error === "object" && "code" in error && error.code === "ENOENT";
|
|
10360
|
+
}
|
|
10361
|
+
async function inputFromTarget(target, options, stdin) {
|
|
10362
|
+
if (target === "-") {
|
|
10363
|
+
return { type: "string", content: await readStdin2(stdin) };
|
|
10364
|
+
}
|
|
10365
|
+
try {
|
|
10366
|
+
const stats2 = await promises.stat(target);
|
|
10367
|
+
if (stats2.isDirectory()) return { type: "directory", path: target };
|
|
10368
|
+
return { type: "file", path: target };
|
|
10369
|
+
} catch (error) {
|
|
10370
|
+
if (!isMissingFileError2(error)) throw error;
|
|
10371
|
+
}
|
|
10372
|
+
const runPath = getTraceFilePath(target, resolveTraceDir({ dir: options.dir }));
|
|
10373
|
+
const stats = await promises.stat(runPath);
|
|
10374
|
+
if (stats.isDirectory()) return { type: "directory", path: runPath };
|
|
10375
|
+
return { type: "file", path: runPath };
|
|
10376
|
+
}
|
|
10377
|
+
|
|
10378
|
+
// packages/cli/src/check.ts
|
|
10379
|
+
var DEFAULT_SELECT = ["run.status"];
|
|
10380
|
+
var CONFIG_EXTENSIONS = /* @__PURE__ */ new Set([".json", ".js", ".mjs", ".cjs"]);
|
|
10381
|
+
var TS_CONFIG_EXTENSIONS = /* @__PURE__ */ new Set([".ts", ".mts", ".cts"]);
|
|
10382
|
+
function diagnostic2(code, message, severity = "error") {
|
|
10383
|
+
return { code, message, severity };
|
|
10384
|
+
}
|
|
10385
|
+
function errorResult2(code, message, format = "unknown") {
|
|
10386
|
+
const diagnostics = [diagnostic2(code, message)];
|
|
10387
|
+
return {
|
|
10388
|
+
ok: false,
|
|
10389
|
+
status: "error",
|
|
10390
|
+
format,
|
|
10391
|
+
summary: {
|
|
10392
|
+
passed: 0,
|
|
10393
|
+
failed: 0,
|
|
10394
|
+
warnings: 0,
|
|
10395
|
+
errors: 1
|
|
10396
|
+
},
|
|
10397
|
+
findings: [],
|
|
10398
|
+
diagnostics
|
|
10399
|
+
};
|
|
10400
|
+
}
|
|
10401
|
+
function parseNumber(value, label) {
|
|
10402
|
+
if (value === void 0) return void 0;
|
|
10403
|
+
const parsed = Number(value);
|
|
10404
|
+
if (!Number.isFinite(parsed) || parsed < 0) {
|
|
10405
|
+
throw new Error(`${label} must be a non-negative number.`);
|
|
10406
|
+
}
|
|
10407
|
+
return parsed;
|
|
10408
|
+
}
|
|
10409
|
+
function asStringArray(value) {
|
|
10410
|
+
if (value === void 0) return void 0;
|
|
10411
|
+
if (!Array.isArray(value) || value.some((item) => typeof item !== "string")) {
|
|
10412
|
+
throw new Error("Expected an array of strings.");
|
|
10413
|
+
}
|
|
10414
|
+
return value;
|
|
10415
|
+
}
|
|
10416
|
+
function asConfig(value) {
|
|
10417
|
+
if (value === void 0 || value === null) return {};
|
|
10418
|
+
if (typeof value !== "object" || Array.isArray(value)) {
|
|
10419
|
+
throw new Error("Config must export an object.");
|
|
10420
|
+
}
|
|
10421
|
+
return value;
|
|
10422
|
+
}
|
|
10423
|
+
async function loadConfig(configPath) {
|
|
10424
|
+
if (configPath === void 0) return {};
|
|
10425
|
+
const extension = path10__default.default.extname(configPath);
|
|
10426
|
+
if (TS_CONFIG_EXTENSIONS.has(extension)) {
|
|
10427
|
+
throw new Error(
|
|
10428
|
+
"TypeScript check configs require an explicit precompiled JavaScript config or future --config-loader support."
|
|
10429
|
+
);
|
|
10430
|
+
}
|
|
10431
|
+
if (!CONFIG_EXTENSIONS.has(extension)) {
|
|
10432
|
+
throw new Error("Unsupported check config extension. Use .json, .js, .mjs, or .cjs.");
|
|
10433
|
+
}
|
|
10434
|
+
const absolute = path10__default.default.resolve(configPath);
|
|
10435
|
+
if (extension === ".json") {
|
|
10436
|
+
const raw = await promises.readFile(absolute, "utf-8");
|
|
10437
|
+
return asConfig(JSON.parse(raw));
|
|
10438
|
+
}
|
|
10439
|
+
const mod = await import(url.pathToFileURL(absolute).href);
|
|
10440
|
+
return asConfig("default" in mod ? mod.default : mod);
|
|
10441
|
+
}
|
|
10442
|
+
function normalizeConfig(config) {
|
|
10443
|
+
if (config.checks === void 0) return {};
|
|
10444
|
+
if (typeof config.checks !== "object" || Array.isArray(config.checks)) {
|
|
10445
|
+
throw new Error("checks config must be an object.");
|
|
10446
|
+
}
|
|
10447
|
+
return config.checks;
|
|
10448
|
+
}
|
|
10449
|
+
function buildRules(config, options) {
|
|
10450
|
+
const diagnostics = [];
|
|
10451
|
+
const checks = normalizeConfig(config);
|
|
10452
|
+
const run = checks.run ?? {};
|
|
10453
|
+
const tool = checks.tool ?? {};
|
|
10454
|
+
const llm = checks.llm ?? {};
|
|
10455
|
+
const structure = checks.structure ?? {};
|
|
10456
|
+
const safety = checks.safety ?? {};
|
|
10457
|
+
const maxDurationMs = parseNumber(options.maxDurationMs, "--max-duration-ms") ?? run.maxDurationMs;
|
|
10458
|
+
const maxTotalTokens = parseNumber(options.maxTotalTokens, "--max-total-tokens") ?? llm.maxTotalTokens;
|
|
10459
|
+
const rules = [
|
|
10460
|
+
createRunStatusRule(run),
|
|
10461
|
+
createStructureOrphanRule(),
|
|
10462
|
+
createStructureCycleRule(),
|
|
10463
|
+
createSafetyRawContentRule(),
|
|
10464
|
+
createSafetySecretPatternRule()
|
|
10465
|
+
];
|
|
10466
|
+
if (maxDurationMs !== void 0) {
|
|
10467
|
+
rules.push(createRunDurationRule({ maxDurationMs }));
|
|
10468
|
+
}
|
|
10469
|
+
if (run.maxDepth !== void 0) {
|
|
10470
|
+
rules.push(createRunDepthRule({ maxDepth: run.maxDepth }));
|
|
10471
|
+
}
|
|
10472
|
+
const toolOptions = {
|
|
10473
|
+
...tool,
|
|
10474
|
+
required: [...tool.required ?? [], ...options.requiredTool ?? []],
|
|
10475
|
+
forbidden: [...tool.forbidden ?? [], ...options.forbiddenTool ?? []]
|
|
10476
|
+
};
|
|
10477
|
+
if (toolOptions.required?.length || toolOptions.forbidden?.length || toolOptions.allowed?.length || toolOptions.minCount !== void 0 || toolOptions.maxCount !== void 0) {
|
|
10478
|
+
rules.push(createToolUsageRule(toolOptions));
|
|
10479
|
+
}
|
|
10480
|
+
const llmOptions = {
|
|
10481
|
+
...llm,
|
|
10482
|
+
allowedModels: [...llm.allowedModels ?? [], ...options.allowedModel ?? []],
|
|
10483
|
+
...maxTotalTokens !== void 0 ? { maxTotalTokens } : {}
|
|
10484
|
+
};
|
|
10485
|
+
if (llmOptions.allowedModels?.length || llmOptions.allowedProviders?.length || llmOptions.finishReasons?.length || llmOptions.maxCalls !== void 0 || llmOptions.maxInputTokens !== void 0 || llmOptions.maxOutputTokens !== void 0 || llmOptions.maxTotalTokens !== void 0 || llmOptions.maxCachedTokens !== void 0) {
|
|
10486
|
+
rules.push(createLlmUsageRule(llmOptions));
|
|
10487
|
+
}
|
|
10488
|
+
if (structure.minConfidence !== void 0 || structure.requireParentBeforeChild !== void 0 || structure.requireTraceParentSpan !== void 0) {
|
|
10489
|
+
rules.push(createStructureRelationshipRule(structure));
|
|
10490
|
+
}
|
|
10491
|
+
if (structure.maxChildren !== void 0 || structure.maxConcurrent !== void 0) {
|
|
10492
|
+
rules.push(
|
|
10493
|
+
createStructureParallelWidthRule({
|
|
10494
|
+
maxChildren: structure.maxChildren,
|
|
10495
|
+
maxConcurrent: structure.maxConcurrent
|
|
10496
|
+
})
|
|
10497
|
+
);
|
|
10498
|
+
}
|
|
10499
|
+
if (safety.redaction) rules.push(createSafetyRedactionRule());
|
|
10500
|
+
if (safety.maxStringLength !== void 0 || safety.maxArrayLength !== void 0 || safety.maxObjectKeys !== void 0 || safety.maxSerializedBytes !== void 0) {
|
|
10501
|
+
rules.push(createSafetyOversizedAttributeRule(safety));
|
|
10502
|
+
}
|
|
10503
|
+
const select = [
|
|
10504
|
+
...asStringArray(checks.select) ?? [],
|
|
10505
|
+
...options.rule ?? []
|
|
10506
|
+
];
|
|
10507
|
+
return {
|
|
10508
|
+
rules,
|
|
10509
|
+
select: select.length > 0 ? select : DEFAULT_SELECT,
|
|
10510
|
+
diagnostics
|
|
10511
|
+
};
|
|
10512
|
+
}
|
|
10513
|
+
function exitCodeFor(result) {
|
|
10514
|
+
if (result.status === "pass") return 0;
|
|
10515
|
+
if (result.status === "fail") return 1;
|
|
10516
|
+
const codes = result.diagnostics.map((item) => item.code);
|
|
10517
|
+
if (codes.some(
|
|
10518
|
+
(code) => code === "AI_CHECK_UNSUPPORTED_FORMAT" || code === "AI_CHECK_AMBIGUOUS_FORMAT"
|
|
10519
|
+
)) {
|
|
10520
|
+
return 4;
|
|
10521
|
+
}
|
|
10522
|
+
if (codes.some(
|
|
10523
|
+
(code) => code === "AI_CHECK_TRACE_UNREADABLE" || code === "AI_CHECK_BASELINE_UNREADABLE"
|
|
10524
|
+
)) {
|
|
10525
|
+
return 3;
|
|
10526
|
+
}
|
|
10527
|
+
if (codes.some(
|
|
10528
|
+
(code) => code === "AI_CHECK_INVALID_ARGUMENTS" || code === "AI_CHECK_INVALID_CONFIG" || code === "AI_CHECK_CONFIG_LOAD_FAILED" || code === "AI_CHECK_RUN_SELECTION_REQUIRED"
|
|
10529
|
+
)) {
|
|
10530
|
+
return 2;
|
|
10531
|
+
}
|
|
10532
|
+
return 1;
|
|
10533
|
+
}
|
|
10534
|
+
function stable(value) {
|
|
10535
|
+
if (Array.isArray(value)) return value.map(stable);
|
|
10536
|
+
if (value === null || typeof value !== "object") return value;
|
|
10537
|
+
const record = value;
|
|
10538
|
+
return Object.fromEntries(
|
|
10539
|
+
Object.keys(record).sort((a, b) => a.localeCompare(b)).map((key) => [key, stable(record[key])])
|
|
10540
|
+
);
|
|
10541
|
+
}
|
|
10542
|
+
function printJson(result) {
|
|
10543
|
+
console.log(JSON.stringify(stable(result), null, 2));
|
|
10544
|
+
}
|
|
10545
|
+
function printHuman(result) {
|
|
10546
|
+
console.log(`Check status: ${result.status}`);
|
|
10547
|
+
console.log(`Format: ${result.format}`);
|
|
10548
|
+
if (result.runId !== void 0) console.log(`Run: ${result.runId}`);
|
|
10549
|
+
console.log(
|
|
10550
|
+
`Summary: ${result.summary.failed} failed, ${result.summary.warnings} warning(s), ${result.summary.errors} error(s)`
|
|
10551
|
+
);
|
|
10552
|
+
for (const diagnostic3 of result.diagnostics) {
|
|
10553
|
+
console.log(`- ${diagnostic3.code}: ${diagnostic3.message}`);
|
|
10554
|
+
}
|
|
10555
|
+
for (const finding of result.findings) {
|
|
10556
|
+
const path12 = finding.evidence[0]?.path;
|
|
10557
|
+
console.log(`- ${finding.ruleId}: ${finding.message}${path12 ? ` (${path12})` : ""}`);
|
|
10558
|
+
}
|
|
10559
|
+
}
|
|
10560
|
+
function readErrorResult(error) {
|
|
10561
|
+
if (error instanceof TraceReadError) {
|
|
10562
|
+
const code = error.code === "unsupported_format" ? "AI_CHECK_UNSUPPORTED_FORMAT" : error.code === "ambiguous_format" ? "AI_CHECK_AMBIGUOUS_FORMAT" : "AI_CHECK_TRACE_UNREADABLE";
|
|
10563
|
+
return errorResult2(code, error.message);
|
|
10564
|
+
}
|
|
10565
|
+
return errorResult2(
|
|
10566
|
+
"AI_CHECK_TRACE_UNREADABLE",
|
|
10567
|
+
error instanceof Error ? error.message : String(error)
|
|
10568
|
+
);
|
|
10569
|
+
}
|
|
10570
|
+
async function checkCommand(target, options = {}, stdin = process.stdin) {
|
|
10571
|
+
let result;
|
|
10572
|
+
let phase = "config";
|
|
10573
|
+
try {
|
|
10574
|
+
const config = await loadConfig(options.config);
|
|
10575
|
+
const built = buildRules(config, options);
|
|
10576
|
+
if (built.diagnostics.some((item) => item.severity === "error")) {
|
|
10577
|
+
result = errorResult2("AI_CHECK_INVALID_CONFIG", "Invalid check configuration.");
|
|
10578
|
+
result.diagnostics = [...built.diagnostics];
|
|
10579
|
+
} else {
|
|
10580
|
+
phase = "read";
|
|
10581
|
+
const input3 = await inputFromTarget(target, options, stdin);
|
|
10582
|
+
const read = await openTrace(input3, {
|
|
10583
|
+
...options.format !== void 0 ? { format: options.format } : {}
|
|
10584
|
+
});
|
|
10585
|
+
result = runTraceChecks(
|
|
10586
|
+
{ read },
|
|
10587
|
+
{
|
|
10588
|
+
rules: built.rules,
|
|
10589
|
+
select: built.select,
|
|
10590
|
+
...options.run !== void 0 ? { runId: options.run } : {}
|
|
10591
|
+
}
|
|
10592
|
+
);
|
|
10593
|
+
}
|
|
10594
|
+
} catch (error) {
|
|
10595
|
+
if (phase === "config") {
|
|
10596
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
10597
|
+
const code = message.startsWith("--") ? "AI_CHECK_INVALID_ARGUMENTS" : error instanceof SyntaxError || message.includes("Unsupported check config extension") || message.includes("TypeScript check configs") || message.includes("Config must") || message.includes("checks config") || message.includes("Expected an array") ? "AI_CHECK_INVALID_CONFIG" : "AI_CHECK_CONFIG_LOAD_FAILED";
|
|
10598
|
+
result = errorResult2(
|
|
10599
|
+
code,
|
|
10600
|
+
message
|
|
10601
|
+
);
|
|
10602
|
+
} else {
|
|
10603
|
+
result = readErrorResult(error);
|
|
10604
|
+
}
|
|
10605
|
+
}
|
|
10606
|
+
process.exitCode = exitCodeFor(result);
|
|
10607
|
+
if (options.json) printJson(result);
|
|
10608
|
+
else printHuman(result);
|
|
10609
|
+
}
|
|
10610
|
+
|
|
10611
|
+
// packages/cli/src/safety.ts
|
|
10612
|
+
var BEST_EFFORT_NOTE = "Best-effort local safety verification only; not a compliance, privacy, security, or regulatory certification.";
|
|
10613
|
+
var DEFAULT_MAX_STRING_LENGTH = 16384;
|
|
10614
|
+
var DEFAULT_MAX_ARRAY_LENGTH = 1e3;
|
|
10615
|
+
var DEFAULT_MAX_OBJECT_KEYS = 200;
|
|
10616
|
+
var DEFAULT_MAX_SERIALIZED_BYTES = 128 * 1024;
|
|
10617
|
+
function parseLimit3(value, label) {
|
|
10618
|
+
if (value === void 0) return void 0;
|
|
10619
|
+
const parsed = Number(value);
|
|
10620
|
+
if (!Number.isFinite(parsed) || parsed < 0) {
|
|
10621
|
+
throw new Error(`${label} must be a non-negative number.`);
|
|
10622
|
+
}
|
|
10623
|
+
return parsed;
|
|
10624
|
+
}
|
|
10625
|
+
function stable2(value) {
|
|
10626
|
+
if (Array.isArray(value)) return value.map(stable2);
|
|
10627
|
+
if (value === null || typeof value !== "object") return value;
|
|
10628
|
+
const record = value;
|
|
10629
|
+
return Object.fromEntries(
|
|
10630
|
+
Object.keys(record).sort((a, b) => a.localeCompare(b)).map((key) => [key, stable2(record[key])])
|
|
10631
|
+
);
|
|
10632
|
+
}
|
|
10633
|
+
function safetyDiagnostic(code, message, severity = "error") {
|
|
10634
|
+
return { code, message, severity };
|
|
10635
|
+
}
|
|
10636
|
+
function warningDiagnostics(warnings, unsupportedFields) {
|
|
10637
|
+
return [
|
|
10638
|
+
...warnings.map(
|
|
10639
|
+
(warning) => safetyDiagnostic(
|
|
10640
|
+
warning.code,
|
|
10641
|
+
warning.message,
|
|
10642
|
+
warning.severity === "error" ? "error" : "warning"
|
|
10643
|
+
)
|
|
10644
|
+
),
|
|
10645
|
+
...unsupportedFields.map(
|
|
10646
|
+
(field) => safetyDiagnostic(
|
|
10647
|
+
"unsupported_field",
|
|
10648
|
+
`Reader reported unsupported field: ${field}`,
|
|
10649
|
+
"warning"
|
|
10650
|
+
)
|
|
10651
|
+
)
|
|
10652
|
+
];
|
|
10653
|
+
}
|
|
10654
|
+
function diagnosticFromCheck(item) {
|
|
10655
|
+
return safetyDiagnostic(item.code, item.message, item.severity);
|
|
10656
|
+
}
|
|
10657
|
+
function statusFrom(findings, diagnostics) {
|
|
10658
|
+
if (diagnostics.some((item) => item.severity === "error")) return "UNKNOWN";
|
|
10659
|
+
if (findings.some((item) => item.severity === "error")) return "UNSAFE";
|
|
10660
|
+
if (diagnostics.some((item) => item.severity === "warning")) return "SAFE WITH WARNINGS";
|
|
10661
|
+
if (findings.some((item) => item.severity === "warning")) return "SAFE WITH WARNINGS";
|
|
10662
|
+
return "SAFE";
|
|
10663
|
+
}
|
|
10664
|
+
function resultFromParts(parts) {
|
|
10665
|
+
const findings = [...parts.findings ?? []];
|
|
10666
|
+
const diagnostics = [...parts.diagnostics ?? []];
|
|
10667
|
+
const warnings = [...parts.warnings ?? []];
|
|
10668
|
+
const unsupportedFields = [...parts.unsupportedFields ?? []];
|
|
10669
|
+
const status = statusFrom(findings, diagnostics);
|
|
10670
|
+
return {
|
|
10671
|
+
ok: status === "SAFE" || status === "SAFE WITH WARNINGS",
|
|
10672
|
+
command: parts.command,
|
|
10673
|
+
status,
|
|
10674
|
+
format: parts.format,
|
|
10675
|
+
...parts.runId !== void 0 ? { runId: parts.runId } : {},
|
|
10676
|
+
summary: {
|
|
10677
|
+
findings: findings.length,
|
|
10678
|
+
warnings: diagnostics.filter((item) => item.severity === "warning").length + findings.filter((item) => item.severity === "warning").length,
|
|
10679
|
+
errors: diagnostics.filter((item) => item.severity === "error").length + findings.filter((item) => item.severity === "error").length
|
|
10680
|
+
},
|
|
10681
|
+
findings,
|
|
10682
|
+
diagnostics,
|
|
10683
|
+
warnings,
|
|
10684
|
+
unsupportedFields,
|
|
10685
|
+
note: BEST_EFFORT_NOTE
|
|
10686
|
+
};
|
|
10687
|
+
}
|
|
10688
|
+
function readErrorResult2(command, error) {
|
|
10689
|
+
if (error instanceof TraceReadError) {
|
|
10690
|
+
const code = error.code === "unsupported_format" ? "AI_SAFETY_UNSUPPORTED_FORMAT" : error.code === "ambiguous_format" ? "AI_SAFETY_AMBIGUOUS_FORMAT" : "AI_SAFETY_TRACE_UNREADABLE";
|
|
10691
|
+
return resultFromParts({
|
|
10692
|
+
command,
|
|
10693
|
+
format: "unknown",
|
|
10694
|
+
diagnostics: [safetyDiagnostic(code, error.message)],
|
|
10695
|
+
warnings: error.warnings
|
|
10696
|
+
});
|
|
10697
|
+
}
|
|
10698
|
+
return resultFromParts({
|
|
10699
|
+
command,
|
|
10700
|
+
format: "unknown",
|
|
10701
|
+
diagnostics: [
|
|
10702
|
+
safetyDiagnostic(
|
|
10703
|
+
"AI_SAFETY_TRACE_UNREADABLE",
|
|
10704
|
+
error instanceof Error ? error.message : String(error)
|
|
10705
|
+
)
|
|
10706
|
+
]
|
|
10707
|
+
});
|
|
10708
|
+
}
|
|
10709
|
+
function invalidArgumentResult(command, error) {
|
|
10710
|
+
return resultFromParts({
|
|
10711
|
+
command,
|
|
10712
|
+
format: "unknown",
|
|
10713
|
+
diagnostics: [
|
|
10714
|
+
safetyDiagnostic(
|
|
10715
|
+
"AI_SAFETY_INVALID_ARGUMENTS",
|
|
10716
|
+
error instanceof Error ? error.message : String(error)
|
|
10717
|
+
)
|
|
10718
|
+
]
|
|
10719
|
+
});
|
|
10720
|
+
}
|
|
10721
|
+
function buildSafetyRules(options) {
|
|
10722
|
+
const maxStringLength = parseLimit3(options.maxStringLength, "--max-string-length") ?? DEFAULT_MAX_STRING_LENGTH;
|
|
10723
|
+
const maxArrayLength = parseLimit3(options.maxArrayLength, "--max-array-length") ?? DEFAULT_MAX_ARRAY_LENGTH;
|
|
10724
|
+
const maxObjectKeys = parseLimit3(options.maxObjectKeys, "--max-object-keys") ?? DEFAULT_MAX_OBJECT_KEYS;
|
|
10725
|
+
const maxSerializedBytes = parseLimit3(options.maxSerializedBytes, "--max-serialized-bytes") ?? DEFAULT_MAX_SERIALIZED_BYTES;
|
|
10726
|
+
return [
|
|
10727
|
+
createSafetyRawContentRule(),
|
|
10728
|
+
createSafetyRedactionRule(),
|
|
10729
|
+
createSafetySecretPatternRule(),
|
|
10730
|
+
createSafetyOversizedAttributeRule({
|
|
10731
|
+
maxStringLength,
|
|
10732
|
+
maxArrayLength,
|
|
10733
|
+
maxObjectKeys,
|
|
10734
|
+
maxSerializedBytes
|
|
10735
|
+
})
|
|
10736
|
+
];
|
|
10737
|
+
}
|
|
10738
|
+
function exitCodeFor2(result) {
|
|
10739
|
+
if (result.status === "SAFE" || result.status === "SAFE WITH WARNINGS") return 0;
|
|
10740
|
+
if (result.status === "UNSAFE") return 1;
|
|
10741
|
+
return 2;
|
|
10742
|
+
}
|
|
10743
|
+
function printJson2(result) {
|
|
10744
|
+
console.log(JSON.stringify(stable2(result), null, 2));
|
|
10745
|
+
}
|
|
10746
|
+
function printHuman2(result) {
|
|
10747
|
+
console.log(`Safety status: ${result.status}`);
|
|
10748
|
+
console.log(`Format: ${result.format}`);
|
|
10749
|
+
if (result.runId !== void 0) console.log(`Run: ${result.runId}`);
|
|
10750
|
+
console.log(
|
|
10751
|
+
`Summary: ${result.summary.findings} finding(s), ${result.summary.warnings} warning(s), ${result.summary.errors} error(s)`
|
|
10752
|
+
);
|
|
10753
|
+
for (const diagnostic3 of result.diagnostics) {
|
|
10754
|
+
console.log(`- ${diagnostic3.code}: ${diagnostic3.message}`);
|
|
10755
|
+
}
|
|
10756
|
+
for (const finding of result.findings) {
|
|
10757
|
+
const path12 = finding.evidence[0]?.path;
|
|
10758
|
+
console.log(`- ${finding.ruleId}: ${finding.message}${path12 ? ` (${path12})` : ""}`);
|
|
10759
|
+
}
|
|
10760
|
+
console.log(`Note: ${result.note}`);
|
|
10761
|
+
}
|
|
10762
|
+
async function safetyCommand(command, target, options, stdin) {
|
|
10763
|
+
let result;
|
|
10764
|
+
try {
|
|
10765
|
+
const rules = buildSafetyRules(options);
|
|
10766
|
+
const input3 = await inputFromTarget(target, options, stdin);
|
|
10767
|
+
const read = await openTrace(input3, {
|
|
10768
|
+
...options.format !== void 0 ? { format: options.format } : {}
|
|
10769
|
+
});
|
|
10770
|
+
const checkResult = runTraceChecks(
|
|
10771
|
+
{ read },
|
|
10772
|
+
{
|
|
10773
|
+
rules,
|
|
10774
|
+
...options.run !== void 0 ? { runId: options.run } : {}
|
|
10775
|
+
}
|
|
10776
|
+
);
|
|
10777
|
+
result = resultFromParts({
|
|
10778
|
+
command,
|
|
10779
|
+
format: checkResult.format,
|
|
10780
|
+
runId: checkResult.runId,
|
|
10781
|
+
findings: checkResult.findings,
|
|
10782
|
+
diagnostics: [
|
|
10783
|
+
...checkResult.diagnostics.map(diagnosticFromCheck),
|
|
10784
|
+
...warningDiagnostics(read.warnings, read.unsupportedFields)
|
|
10785
|
+
],
|
|
10786
|
+
warnings: read.warnings,
|
|
10787
|
+
unsupportedFields: read.unsupportedFields
|
|
10788
|
+
});
|
|
10789
|
+
} catch (error) {
|
|
10790
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
10791
|
+
result = message.startsWith("--") ? invalidArgumentResult(command, error) : readErrorResult2(command, error);
|
|
10792
|
+
}
|
|
10793
|
+
process.exitCode = exitCodeFor2(result);
|
|
10794
|
+
if (options.json) printJson2(result);
|
|
10795
|
+
else printHuman2(result);
|
|
10796
|
+
}
|
|
10797
|
+
function scanCommand(target, options = {}, stdin = process.stdin) {
|
|
10798
|
+
return safetyCommand("scan", target, options, stdin);
|
|
10799
|
+
}
|
|
10800
|
+
function verifySafeCommand(target, options = {}, stdin = process.stdin) {
|
|
10801
|
+
return safetyCommand("verify-safe", target, options, stdin);
|
|
10802
|
+
}
|
|
10803
|
+
var NOTE = "Generated locally by AgentInspect. Artifacts are best-effort summaries, not compliance or security certification.";
|
|
10804
|
+
var SAFETY_RULES = [
|
|
10805
|
+
createSafetyRawContentRule(),
|
|
10806
|
+
createSafetyRedactionRule(),
|
|
10807
|
+
createSafetySecretPatternRule(),
|
|
10808
|
+
createSafetyOversizedAttributeRule({
|
|
10809
|
+
maxStringLength: 16384,
|
|
10810
|
+
maxArrayLength: 1e3,
|
|
10811
|
+
maxObjectKeys: 200,
|
|
10812
|
+
maxSerializedBytes: 128 * 1024
|
|
10813
|
+
})
|
|
10814
|
+
];
|
|
10815
|
+
function stable3(value) {
|
|
10816
|
+
if (Array.isArray(value)) return value.map(stable3);
|
|
10817
|
+
if (value === null || typeof value !== "object") return value;
|
|
10818
|
+
const record = value;
|
|
10819
|
+
return Object.fromEntries(
|
|
10820
|
+
Object.keys(record).sort((a, b) => a.localeCompare(b)).map((key) => [key, stable3(record[key])])
|
|
10821
|
+
);
|
|
10822
|
+
}
|
|
10823
|
+
function writeJson2(value) {
|
|
10824
|
+
return `${JSON.stringify(stable3(value), null, 2)}
|
|
10825
|
+
`;
|
|
10826
|
+
}
|
|
10827
|
+
function escapeHtml2(value) {
|
|
10828
|
+
return value.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """);
|
|
10829
|
+
}
|
|
10830
|
+
function markdownTable(rows) {
|
|
10831
|
+
const lines = ["| Field | Value |", "| --- | --- |"];
|
|
10832
|
+
for (const [key, value] of rows) {
|
|
10833
|
+
lines.push(`| ${key} | ${value ?? "unknown"} |`);
|
|
10834
|
+
}
|
|
10835
|
+
return lines.join("\n");
|
|
10836
|
+
}
|
|
10837
|
+
function increment(record, key) {
|
|
10838
|
+
const label = key && key.trim() !== "" ? key : "unknown";
|
|
10839
|
+
record[label] = (record[label] ?? 0) + 1;
|
|
10840
|
+
}
|
|
10841
|
+
function selectRun2(read, runId) {
|
|
10842
|
+
if (runId !== void 0) {
|
|
10843
|
+
return read.runs.find((run) => run.runId === runId);
|
|
10844
|
+
}
|
|
10845
|
+
return read.runs.length === 1 ? read.runs[0] : void 0;
|
|
10846
|
+
}
|
|
10847
|
+
function summarizeTrace(read, run) {
|
|
10848
|
+
const runId = run?.runId;
|
|
10849
|
+
const scopedEvents = runId === void 0 ? read.events : read.events.filter((event) => event.runId === runId);
|
|
10850
|
+
const eventsByKind = {};
|
|
10851
|
+
const eventsByStatus = {};
|
|
10852
|
+
for (const event of scopedEvents) {
|
|
10853
|
+
increment(eventsByKind, event.kind);
|
|
10854
|
+
increment(eventsByStatus, event.status);
|
|
10855
|
+
}
|
|
10856
|
+
return {
|
|
10857
|
+
format: read.format,
|
|
10858
|
+
...runId !== void 0 ? { runId } : {},
|
|
10859
|
+
...run?.status !== void 0 ? { runStatus: run.status } : {},
|
|
10860
|
+
...run?.durationMs !== void 0 ? { runDurationMs: run.durationMs } : {},
|
|
10861
|
+
runCount: read.runs.length,
|
|
10862
|
+
eventCount: scopedEvents.length,
|
|
10863
|
+
eventsByKind: Object.fromEntries(Object.entries(eventsByKind).sort()),
|
|
10864
|
+
eventsByStatus: Object.fromEntries(Object.entries(eventsByStatus).sort()),
|
|
10865
|
+
readerWarnings: read.warnings.length,
|
|
10866
|
+
unsupportedFields: read.unsupportedFields.length
|
|
10867
|
+
};
|
|
10868
|
+
}
|
|
10869
|
+
function renderCheckSection(result) {
|
|
10870
|
+
const lines = [
|
|
10871
|
+
`Status: ${result.status}`,
|
|
10872
|
+
`Findings: ${result.findings.length}`,
|
|
10873
|
+
`Diagnostics: ${result.diagnostics.length}`
|
|
10874
|
+
];
|
|
10875
|
+
for (const finding of result.findings.slice(0, 10)) {
|
|
10876
|
+
const path12 = finding.evidence[0]?.path ?? "(run)";
|
|
10877
|
+
lines.push(`- ${finding.ruleId}: ${finding.message} (${path12})`);
|
|
10878
|
+
}
|
|
10879
|
+
for (const diagnostic3 of result.diagnostics.slice(0, 10)) {
|
|
10880
|
+
lines.push(`- ${diagnostic3.code}: ${diagnostic3.message}`);
|
|
10881
|
+
}
|
|
10882
|
+
return lines.join("\n");
|
|
10883
|
+
}
|
|
10884
|
+
function renderMarkdown(trace, check, diff) {
|
|
10885
|
+
const lines = [
|
|
10886
|
+
"# AgentInspect CI Artifacts",
|
|
10887
|
+
"",
|
|
10888
|
+
NOTE,
|
|
10889
|
+
"",
|
|
10890
|
+
"## Trace",
|
|
10891
|
+
"",
|
|
10892
|
+
markdownTable([
|
|
10893
|
+
["Format", trace.format],
|
|
10894
|
+
["Run", trace.runId],
|
|
10895
|
+
["Run status", trace.runStatus],
|
|
10896
|
+
["Run duration ms", trace.runDurationMs],
|
|
10897
|
+
["Runs", trace.runCount],
|
|
10898
|
+
["Events", trace.eventCount],
|
|
10899
|
+
["Reader warnings", trace.readerWarnings],
|
|
10900
|
+
["Unsupported fields", trace.unsupportedFields]
|
|
10901
|
+
]),
|
|
10902
|
+
"",
|
|
10903
|
+
"## Safety check",
|
|
10904
|
+
"",
|
|
10905
|
+
"```text",
|
|
10906
|
+
renderCheckSection(check),
|
|
10907
|
+
"```",
|
|
10908
|
+
"",
|
|
10909
|
+
"## Baseline diff",
|
|
10910
|
+
""
|
|
10911
|
+
];
|
|
10912
|
+
if (diff) {
|
|
10913
|
+
lines.push("```text", renderCheckSection(diff), "```", "");
|
|
10914
|
+
} else {
|
|
10915
|
+
lines.push("No baseline was supplied.", "");
|
|
10916
|
+
}
|
|
10917
|
+
return lines.join("\n");
|
|
10918
|
+
}
|
|
10919
|
+
function renderHtml(trace, check, diff) {
|
|
10920
|
+
const rows = [
|
|
10921
|
+
["Format", trace.format],
|
|
10922
|
+
["Run", trace.runId],
|
|
10923
|
+
["Run status", trace.runStatus],
|
|
10924
|
+
["Run duration ms", trace.runDurationMs],
|
|
10925
|
+
["Runs", trace.runCount],
|
|
10926
|
+
["Events", trace.eventCount],
|
|
10927
|
+
["Reader warnings", trace.readerWarnings],
|
|
10928
|
+
["Unsupported fields", trace.unsupportedFields]
|
|
10929
|
+
];
|
|
10930
|
+
const table = rows.map(
|
|
10931
|
+
([key, value]) => `<tr><th>${escapeHtml2(key)}</th><td>${escapeHtml2(String(value ?? "unknown"))}</td></tr>`
|
|
10932
|
+
).join("");
|
|
10933
|
+
return `<!doctype html>
|
|
10934
|
+
<html lang="en">
|
|
10935
|
+
<head>
|
|
10936
|
+
<meta charset="utf-8"/>
|
|
10937
|
+
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
|
10938
|
+
<title>AgentInspect CI Artifacts</title>
|
|
10939
|
+
<style>body{font-family:system-ui,sans-serif;line-height:1.5;margin:1.5rem;max-width:960px;color:#111}table{border-collapse:collapse}th,td{border:1px solid #ddd;padding:0.35rem 0.5rem;text-align:left}pre{white-space:pre-wrap;background:#f8f8f8;padding:0.75rem;overflow:auto}</style>
|
|
10940
|
+
</head>
|
|
10941
|
+
<body>
|
|
10942
|
+
<h1>AgentInspect CI Artifacts</h1>
|
|
10943
|
+
<p>${escapeHtml2(NOTE)}</p>
|
|
10944
|
+
<h2>Trace</h2>
|
|
10945
|
+
<table><tbody>${table}</tbody></table>
|
|
10946
|
+
<h2>Safety check</h2>
|
|
10947
|
+
<pre>${escapeHtml2(renderCheckSection(check))}</pre>
|
|
10948
|
+
<h2>Baseline diff</h2>
|
|
10949
|
+
<pre>${escapeHtml2(diff ? renderCheckSection(diff) : "No baseline was supplied.")}</pre>
|
|
10950
|
+
</body>
|
|
10951
|
+
</html>
|
|
10952
|
+
`;
|
|
10953
|
+
}
|
|
10954
|
+
async function writeArtifact(outputDir, relativePath, content, files) {
|
|
10955
|
+
const outPath = path10__default.default.join(outputDir, relativePath);
|
|
10956
|
+
await promises.mkdir(path10__default.default.dirname(outPath), { recursive: true });
|
|
10957
|
+
await promises.writeFile(outPath, content, "utf-8");
|
|
10958
|
+
files.push(relativePath);
|
|
10959
|
+
}
|
|
10960
|
+
function readErrorMessage(error) {
|
|
10961
|
+
if (error instanceof TraceReadError) return error.message;
|
|
10962
|
+
return error instanceof Error ? error.message : String(error);
|
|
10963
|
+
}
|
|
10964
|
+
function manifestStatus(check, diff) {
|
|
10965
|
+
if (check.status === "error" || diff?.status === "error") return "unknown";
|
|
10966
|
+
if (check.status === "fail") return "unsafe";
|
|
10967
|
+
if (diff?.status === "fail") return "regression";
|
|
10968
|
+
if (check.summary.warnings > 0 || (diff?.summary.warnings ?? 0) > 0) return "warning";
|
|
10969
|
+
return "ok";
|
|
10970
|
+
}
|
|
10971
|
+
async function artifactsCommand(target, options = {}, stdin = process.stdin) {
|
|
10972
|
+
const outputDir = options.outputDir !== void 0 && options.outputDir.trim() !== "" ? path10__default.default.resolve(options.outputDir.trim()) : "";
|
|
10973
|
+
if (outputDir === "") {
|
|
10974
|
+
console.error("--output-dir is required.");
|
|
10975
|
+
process.exitCode = 1;
|
|
10976
|
+
return;
|
|
10977
|
+
}
|
|
10978
|
+
let read;
|
|
10979
|
+
try {
|
|
10980
|
+
const input3 = await inputFromTarget(target, options, stdin);
|
|
10981
|
+
read = await openTrace(input3, {
|
|
10982
|
+
...options.format !== void 0 ? { format: options.format } : {}
|
|
10983
|
+
});
|
|
10984
|
+
} catch (error) {
|
|
10985
|
+
console.error(`[AgentInspect] artifacts failed: ${readErrorMessage(error)}`);
|
|
10986
|
+
process.exitCode = 1;
|
|
10987
|
+
return;
|
|
10988
|
+
}
|
|
10989
|
+
const selectedRun = selectRun2(read, options.run);
|
|
10990
|
+
const check = runTraceChecks(
|
|
10991
|
+
{ read },
|
|
10992
|
+
{
|
|
10993
|
+
rules: SAFETY_RULES,
|
|
10994
|
+
...options.run !== void 0 ? { runId: options.run } : {}
|
|
10995
|
+
}
|
|
10996
|
+
);
|
|
10997
|
+
const trace = summarizeTrace(read, selectedRun);
|
|
10998
|
+
let diff;
|
|
10999
|
+
if (options.baseline !== void 0 && options.baseline.trim() !== "") {
|
|
11000
|
+
try {
|
|
11001
|
+
const baselineInput = await inputFromTarget(options.baseline, options, stdin);
|
|
11002
|
+
const baselineRead = await openTrace(baselineInput, {
|
|
11003
|
+
...options.format !== void 0 ? { format: options.format } : {}
|
|
11004
|
+
});
|
|
11005
|
+
diff = runTraceChecks(
|
|
11006
|
+
{ read },
|
|
11007
|
+
{
|
|
11008
|
+
rules: [
|
|
11009
|
+
createBaselineRegressionRule({
|
|
11010
|
+
baseline: { read: baselineRead },
|
|
11011
|
+
...options.baselineRun !== void 0 ? { baselineRunId: options.baselineRun } : {},
|
|
11012
|
+
compareFormat: true
|
|
11013
|
+
})
|
|
11014
|
+
],
|
|
11015
|
+
...options.run !== void 0 ? { runId: options.run } : {}
|
|
11016
|
+
}
|
|
11017
|
+
);
|
|
11018
|
+
} catch (error) {
|
|
11019
|
+
console.error(`[AgentInspect] baseline diff failed: ${readErrorMessage(error)}`);
|
|
11020
|
+
process.exitCode = 1;
|
|
11021
|
+
return;
|
|
11022
|
+
}
|
|
11023
|
+
}
|
|
11024
|
+
const files = [];
|
|
11025
|
+
await promises.mkdir(outputDir, { recursive: true });
|
|
11026
|
+
await writeArtifact(outputDir, "trace.json", writeJson2(trace), files);
|
|
11027
|
+
await writeArtifact(outputDir, "check.json", writeJson2(check), files);
|
|
11028
|
+
await writeArtifact(
|
|
11029
|
+
outputDir,
|
|
11030
|
+
"diff.json",
|
|
11031
|
+
writeJson2(diff ?? { status: "not_requested", findings: [], diagnostics: [] }),
|
|
11032
|
+
files
|
|
11033
|
+
);
|
|
11034
|
+
await writeArtifact(outputDir, "summary.md", renderMarkdown(trace, check, diff), files);
|
|
11035
|
+
await writeArtifact(outputDir, "report.html", renderHtml(trace, check, diff), files);
|
|
11036
|
+
const summaryTarget = options.githubSummary ?? process.env.GITHUB_STEP_SUMMARY;
|
|
11037
|
+
if (summaryTarget !== void 0 && summaryTarget.trim() !== "") {
|
|
11038
|
+
await promises.mkdir(path10__default.default.dirname(path10__default.default.resolve(summaryTarget)), { recursive: true });
|
|
11039
|
+
await promises.appendFile(path10__default.default.resolve(summaryTarget), `
|
|
11040
|
+
${renderMarkdown(trace, check, diff)}`, "utf-8");
|
|
11041
|
+
}
|
|
11042
|
+
const manifestFiles = [...files, "manifest.json"].sort((a, b) => a.localeCompare(b));
|
|
11043
|
+
const manifest = {
|
|
11044
|
+
status: manifestStatus(check, diff),
|
|
11045
|
+
outputDir,
|
|
11046
|
+
files: manifestFiles,
|
|
11047
|
+
trace,
|
|
11048
|
+
check: {
|
|
11049
|
+
status: check.status,
|
|
11050
|
+
findings: check.findings.length,
|
|
11051
|
+
diagnostics: check.diagnostics.length
|
|
11052
|
+
},
|
|
11053
|
+
diff: {
|
|
11054
|
+
status: diff?.status ?? "not_requested",
|
|
11055
|
+
findings: diff?.findings.length ?? 0,
|
|
11056
|
+
diagnostics: diff?.diagnostics.length ?? 0
|
|
11057
|
+
},
|
|
11058
|
+
...summaryTarget !== void 0 && summaryTarget.trim() !== "" ? { githubSummary: path10__default.default.resolve(summaryTarget) } : {},
|
|
11059
|
+
note: NOTE
|
|
11060
|
+
};
|
|
11061
|
+
await promises.writeFile(path10__default.default.join(outputDir, "manifest.json"), writeJson2(manifest), "utf-8");
|
|
11062
|
+
if (options.json === true) {
|
|
11063
|
+
console.log(writeJson2(manifest).trimEnd());
|
|
11064
|
+
} else {
|
|
11065
|
+
console.log(`Wrote AgentInspect artifacts to ${outputDir}`);
|
|
11066
|
+
console.log(`Status: ${manifest.status}`);
|
|
11067
|
+
for (const file of manifest.files) {
|
|
11068
|
+
console.log(`- ${file}`);
|
|
11069
|
+
}
|
|
11070
|
+
}
|
|
11071
|
+
}
|
|
11072
|
+
|
|
9097
11073
|
// packages/cli/src/index.ts
|
|
9098
11074
|
function runCommand(action) {
|
|
9099
11075
|
void action().catch((error) => {
|
|
@@ -9195,6 +11171,54 @@ function createCliProgram() {
|
|
|
9195
11171
|
).option("--json", "print result as JSON").option("--diagnostics", "print reader warnings and unsupported fields").option("--run <run-id>", "select a run when the trace contains multiple runs").action((input3, opts) => {
|
|
9196
11172
|
runCommand(() => openCommand(input3, opts));
|
|
9197
11173
|
});
|
|
11174
|
+
program.command("check").description("Run deterministic checks against a local trace").argument("<trace-path-or-run-id>", "trace file, directory, stdin -, or run id").option("--dir <path>", "trace directory for run-id lookup").addOption(
|
|
11175
|
+
new commander.Option("--format <format>", "trace input format").choices([
|
|
11176
|
+
"agent-inspect-jsonl",
|
|
11177
|
+
"openinference-json",
|
|
11178
|
+
"otlp-json"
|
|
11179
|
+
])
|
|
11180
|
+
).option("--run <run-id>", "select a run when the trace contains multiple runs").option("--config <path>", "path to check config (.json, .js, .mjs, .cjs)").option("--json", "print deterministic JSON check result").option("--rule <id>", "select a rule id (repeatable)", (value, previous = []) => [
|
|
11181
|
+
...previous,
|
|
11182
|
+
value
|
|
11183
|
+
]).option("--max-duration-ms <number>", "add run.duration with a max duration").option("--required-tool <name>", "require a tool name (repeatable)", (value, previous = []) => [
|
|
11184
|
+
...previous,
|
|
11185
|
+
value
|
|
11186
|
+
]).option("--forbidden-tool <name>", "forbid a tool name (repeatable)", (value, previous = []) => [
|
|
11187
|
+
...previous,
|
|
11188
|
+
value
|
|
11189
|
+
]).option("--allowed-model <model>", "allow an LLM model (repeatable)", (value, previous = []) => [
|
|
11190
|
+
...previous,
|
|
11191
|
+
value
|
|
11192
|
+
]).option("--max-total-tokens <number>", "add llm.usage with a max total-token budget").action((target, opts) => {
|
|
11193
|
+
runCommand(() => checkCommand(target, opts));
|
|
11194
|
+
});
|
|
11195
|
+
program.command("scan").description("Best-effort local safety scan for trace capture risks").argument("<trace-path-or-run-id>", "trace file, directory, stdin -, or run id").option("--dir <path>", "trace directory for run-id lookup").addOption(
|
|
11196
|
+
new commander.Option("--format <format>", "trace input format").choices([
|
|
11197
|
+
"agent-inspect-jsonl",
|
|
11198
|
+
"openinference-json",
|
|
11199
|
+
"otlp-json"
|
|
11200
|
+
])
|
|
11201
|
+
).option("--run <run-id>", "select a run when the trace contains multiple runs").option("--json", "print deterministic JSON safety result").option("--max-string-length <number>", "unsafe threshold for string values").option("--max-array-length <number>", "unsafe threshold for array values").option("--max-object-keys <number>", "unsafe threshold for object key counts").option("--max-serialized-bytes <number>", "unsafe threshold for serialized values").action((target, opts) => {
|
|
11202
|
+
runCommand(() => scanCommand(target, opts));
|
|
11203
|
+
});
|
|
11204
|
+
program.command("verify-safe").description("Best-effort local trace safety verification").argument("<trace-path-or-run-id>", "trace file, directory, stdin -, or run id").option("--dir <path>", "trace directory for run-id lookup").addOption(
|
|
11205
|
+
new commander.Option("--format <format>", "trace input format").choices([
|
|
11206
|
+
"agent-inspect-jsonl",
|
|
11207
|
+
"openinference-json",
|
|
11208
|
+
"otlp-json"
|
|
11209
|
+
])
|
|
11210
|
+
).option("--run <run-id>", "select a run when the trace contains multiple runs").option("--json", "print deterministic JSON safety result").option("--max-string-length <number>", "unsafe threshold for string values").option("--max-array-length <number>", "unsafe threshold for array values").option("--max-object-keys <number>", "unsafe threshold for object key counts").option("--max-serialized-bytes <number>", "unsafe threshold for serialized values").action((target, opts) => {
|
|
11211
|
+
runCommand(() => verifySafeCommand(target, opts));
|
|
11212
|
+
});
|
|
11213
|
+
program.command("artifacts").description("Create safe local CI trace artifacts").argument("<trace-path-or-run-id>", "trace file, directory, stdin -, or run id").requiredOption("--output-dir <path>", "directory for generated artifacts").option("--dir <path>", "trace directory for run-id lookup").addOption(
|
|
11214
|
+
new commander.Option("--format <format>", "trace input format").choices([
|
|
11215
|
+
"agent-inspect-jsonl",
|
|
11216
|
+
"openinference-json",
|
|
11217
|
+
"otlp-json"
|
|
11218
|
+
])
|
|
11219
|
+
).option("--run <run-id>", "select a run when the trace contains multiple runs").option("--baseline <trace-path-or-run-id>", "optional baseline trace for diff artifacts").option("--baseline-run <run-id>", "select a run from the baseline trace").option("--github-summary <path>", "append a safe summary to this file, e.g. GITHUB_STEP_SUMMARY").option("--json", "print deterministic JSON manifest").action((target, opts) => {
|
|
11220
|
+
runCommand(() => artifactsCommand(target, opts));
|
|
11221
|
+
});
|
|
9198
11222
|
program.command("diff").description("Compare two local AgentInspect JSONL traces (read-only)").argument("<left-run-id>", "first run id").argument("<right-run-id>", "second run id").option("--dir <path>", "trace directory").option("--json", "print diff result as JSON").option("--ignore-duration", "omit duration comparisons").option(
|
|
9199
11223
|
"--duration-threshold <duration>",
|
|
9200
11224
|
"ignore duration deltas at or below this (e.g. 500ms, 2s, 1m)"
|
|
@@ -9262,9 +11286,9 @@ function isPrimaryModule() {
|
|
|
9262
11286
|
if (!entry) return false;
|
|
9263
11287
|
const selfPath = url.fileURLToPath((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.cjs', document.baseURI).href)));
|
|
9264
11288
|
try {
|
|
9265
|
-
return fs.realpathSync(
|
|
11289
|
+
return fs.realpathSync(path10__default.default.resolve(entry)) === fs.realpathSync(path10__default.default.resolve(selfPath));
|
|
9266
11290
|
} catch {
|
|
9267
|
-
return
|
|
11291
|
+
return path10__default.default.resolve(entry) === path10__default.default.resolve(selfPath);
|
|
9268
11292
|
}
|
|
9269
11293
|
}
|
|
9270
11294
|
if (isPrimaryModule()) {
|