agent-inspect 1.6.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +12 -5
- package/docs/ADAPTER-CONFORMANCE.md +39 -0
- package/docs/ADAPTERS.md +194 -1
- package/docs/API.md +171 -10
- package/docs/CLI.md +154 -6
- package/docs/KNOWN-ISSUES.md +9 -0
- package/docs/LIMITATIONS.md +14 -0
- package/docs/SCHEMA.md +2 -1
- package/package.json +13 -2
- package/packages/cli/dist/index.cjs +2057 -33
- package/packages/cli/dist/index.cjs.map +1 -1
- package/packages/cli/dist/index.mjs +2057 -33
- package/packages/cli/dist/index.mjs.map +1 -1
- package/packages/core/dist/advanced.d.cts +4 -4
- package/packages/core/dist/advanced.d.ts +4 -4
- package/packages/core/dist/checks.cjs +1535 -0
- package/packages/core/dist/checks.cjs.map +1 -0
- package/packages/core/dist/checks.d.cts +585 -0
- package/packages/core/dist/checks.d.ts +585 -0
- package/packages/core/dist/checks.mjs +1512 -0
- package/packages/core/dist/checks.mjs.map +1 -0
- package/packages/core/dist/diff.d.cts +3 -3
- package/packages/core/dist/diff.d.ts +3 -3
- package/packages/core/dist/exporters.d.cts +3 -3
- package/packages/core/dist/exporters.d.ts +3 -3
- package/packages/core/dist/index.d.cts +6 -6
- package/packages/core/dist/index.d.ts +6 -6
- package/packages/core/dist/{inspect-event-Des4JDHo.d.cts → inspect-event-CevRYp58.d.cts} +1 -1
- package/packages/core/dist/{inspect-event-Des4JDHo.d.ts → inspect-event-CevRYp58.d.ts} +1 -1
- package/packages/core/dist/{log-config-C1GcJPIM.d.ts → log-config-BPHS4Sds.d.ts} +1 -1
- package/packages/core/dist/{log-config-BnH8Ykcb.d.cts → log-config-DanPV3P9.d.cts} +1 -1
- package/packages/core/dist/logs.d.cts +3 -3
- package/packages/core/dist/logs.d.ts +3 -3
- package/packages/core/dist/{persisted-inspect-event-DiFto0K2.d.ts → persisted-inspect-event-Cw7TeYGr.d.ts} +1 -1
- package/packages/core/dist/{persisted-inspect-event-0kaRADsp.d.cts → persisted-inspect-event-DHPfzUd8.d.cts} +1 -1
- package/packages/core/dist/persisted.d.cts +5 -5
- package/packages/core/dist/persisted.d.ts +5 -5
- package/packages/core/dist/readers.d.cts +2 -2
- package/packages/core/dist/readers.d.ts +2 -2
- package/packages/core/dist/{types-tSix7tfv.d.ts → types-Ap9uMdx_.d.ts} +1 -1
- package/packages/core/dist/{types-DB8jB6Jg.d.cts → types-B2-BU5CS.d.cts} +1 -1
- package/packages/core/dist/writers.d.cts +2 -2
- package/packages/core/dist/writers.d.ts +2 -2
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { realpathSync, createReadStream } from 'fs';
|
|
3
|
-
import
|
|
4
|
-
import { fileURLToPath } from 'url';
|
|
3
|
+
import path10 from 'path';
|
|
4
|
+
import { fileURLToPath, pathToFileURL } from 'url';
|
|
5
5
|
import { Command, Option } from 'commander';
|
|
6
6
|
import { unlink, stat, mkdir, writeFile, appendFile, readdir, readFile, access, open } from 'fs/promises';
|
|
7
7
|
import crypto, { webcrypto } from 'crypto';
|
|
@@ -12,7 +12,7 @@ import process2, { stdin, stdout } from 'process';
|
|
|
12
12
|
import tty from 'tty';
|
|
13
13
|
|
|
14
14
|
// package.json
|
|
15
|
-
var version = "1.
|
|
15
|
+
var version = "1.8.0";
|
|
16
16
|
|
|
17
17
|
// packages/core/src/types.ts
|
|
18
18
|
var STEP_TYPES = [
|
|
@@ -1894,7 +1894,7 @@ function formatDuration(ms) {
|
|
|
1894
1894
|
// packages/core/src/utils.ts
|
|
1895
1895
|
var DEFAULT_TRACE_DIR_NAME = ".agent-inspect";
|
|
1896
1896
|
var RUNS_DIR_NAME = "runs";
|
|
1897
|
-
var FALLBACK_TRACE_DIR =
|
|
1897
|
+
var FALLBACK_TRACE_DIR = path10.join(
|
|
1898
1898
|
os.tmpdir(),
|
|
1899
1899
|
"agent-inspect",
|
|
1900
1900
|
RUNS_DIR_NAME
|
|
@@ -1932,7 +1932,7 @@ function getDefaultTraceDir() {
|
|
|
1932
1932
|
if (typeof home !== "string" || home.trim() === "") {
|
|
1933
1933
|
return FALLBACK_TRACE_DIR;
|
|
1934
1934
|
}
|
|
1935
|
-
return
|
|
1935
|
+
return path10.join(home, DEFAULT_TRACE_DIR_NAME, RUNS_DIR_NAME);
|
|
1936
1936
|
} catch {
|
|
1937
1937
|
return FALLBACK_TRACE_DIR;
|
|
1938
1938
|
}
|
|
@@ -1940,20 +1940,20 @@ function getDefaultTraceDir() {
|
|
|
1940
1940
|
function getTraceFilePath(runId, traceDir) {
|
|
1941
1941
|
const baseDir = traceDir ?? getDefaultTraceDir();
|
|
1942
1942
|
let safeId = typeof runId === "string" && runId.trim() !== "" ? runId.trim() : "run_unknown";
|
|
1943
|
-
safeId =
|
|
1943
|
+
safeId = path10.basename(safeId);
|
|
1944
1944
|
if (safeId === "" || safeId === "." || safeId === "..") {
|
|
1945
1945
|
safeId = "run_unknown";
|
|
1946
1946
|
}
|
|
1947
|
-
return
|
|
1947
|
+
return path10.join(baseDir, `${safeId}.jsonl`);
|
|
1948
1948
|
}
|
|
1949
1949
|
async function ensureTraceDir(traceDir) {
|
|
1950
|
-
const primary =
|
|
1950
|
+
const primary = path10.resolve(traceDir);
|
|
1951
1951
|
try {
|
|
1952
1952
|
await mkdir(primary, { recursive: true });
|
|
1953
1953
|
return primary;
|
|
1954
1954
|
} catch {
|
|
1955
1955
|
warn(`Failed to create trace directory: ${primary}`);
|
|
1956
|
-
const fallback =
|
|
1956
|
+
const fallback = path10.resolve(FALLBACK_TRACE_DIR);
|
|
1957
1957
|
try {
|
|
1958
1958
|
await mkdir(fallback, { recursive: true });
|
|
1959
1959
|
return fallback;
|
|
@@ -2627,7 +2627,7 @@ var TraceDirectory = class {
|
|
|
2627
2627
|
this.#dir = resolveTraceDir(options);
|
|
2628
2628
|
}
|
|
2629
2629
|
getPath(filename) {
|
|
2630
|
-
return filename ?
|
|
2630
|
+
return filename ? path10.join(this.#dir, filename) : this.#dir;
|
|
2631
2631
|
}
|
|
2632
2632
|
async list() {
|
|
2633
2633
|
try {
|
|
@@ -2654,7 +2654,7 @@ function parseIsoToMs3(value) {
|
|
|
2654
2654
|
}
|
|
2655
2655
|
async function extractMetadata(filePath, _quickScan) {
|
|
2656
2656
|
const stats = await stat(filePath);
|
|
2657
|
-
let runIdFromFile =
|
|
2657
|
+
let runIdFromFile = path10.basename(filePath);
|
|
2658
2658
|
if (runIdFromFile.endsWith(".jsonl")) {
|
|
2659
2659
|
runIdFromFile = runIdFromFile.slice(0, -".jsonl".length);
|
|
2660
2660
|
}
|
|
@@ -4409,8 +4409,8 @@ function matchStepLevel(m, events, opts) {
|
|
|
4409
4409
|
fields.push("step.name");
|
|
4410
4410
|
}
|
|
4411
4411
|
if (opts.toolQuery) {
|
|
4412
|
-
const
|
|
4413
|
-
if (!nameMatches(
|
|
4412
|
+
const toolName2 = typeof s.metadata?.toolName === "string" ? s.metadata.toolName : s.name;
|
|
4413
|
+
if (!nameMatches(toolName2, opts.toolQuery)) continue;
|
|
4414
4414
|
fields.push("step.tool");
|
|
4415
4415
|
}
|
|
4416
4416
|
if (opts.durationFilter) {
|
|
@@ -4615,7 +4615,7 @@ function findReaderByFormat(format, readers) {
|
|
|
4615
4615
|
}
|
|
4616
4616
|
async function jsonlFilesInDirectory(dirPath) {
|
|
4617
4617
|
const entries = await readdir(dirPath, { withFileTypes: true });
|
|
4618
|
-
return entries.filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) =>
|
|
4618
|
+
return entries.filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => path10.join(dirPath, entry.name)).sort((a, b) => a.localeCompare(b));
|
|
4619
4619
|
}
|
|
4620
4620
|
async function resolveInput(input3) {
|
|
4621
4621
|
const cached = resolvedInputCache.get(input3);
|
|
@@ -6195,13 +6195,13 @@ function pairSteps(left, right) {
|
|
|
6195
6195
|
return pairs;
|
|
6196
6196
|
}
|
|
6197
6197
|
function compareLeafSteps(L, R, segments, opts, out) {
|
|
6198
|
-
const
|
|
6198
|
+
const path12 = buildPath(segments);
|
|
6199
6199
|
if (L.name !== R.name) {
|
|
6200
6200
|
out.push({
|
|
6201
6201
|
kind: "structure",
|
|
6202
6202
|
severity: "warning",
|
|
6203
6203
|
message: "Step name differs",
|
|
6204
|
-
path:
|
|
6204
|
+
path: path12,
|
|
6205
6205
|
left: L.name,
|
|
6206
6206
|
right: R.name
|
|
6207
6207
|
});
|
|
@@ -6211,7 +6211,7 @@ function compareLeafSteps(L, R, segments, opts, out) {
|
|
|
6211
6211
|
kind: "step-type",
|
|
6212
6212
|
severity: "warning",
|
|
6213
6213
|
message: "Step type differs",
|
|
6214
|
-
path:
|
|
6214
|
+
path: path12,
|
|
6215
6215
|
left: L.type,
|
|
6216
6216
|
right: R.type
|
|
6217
6217
|
});
|
|
@@ -6221,7 +6221,7 @@ function compareLeafSteps(L, R, segments, opts, out) {
|
|
|
6221
6221
|
kind: "step-status",
|
|
6222
6222
|
severity: "warning",
|
|
6223
6223
|
message: "Step status differs",
|
|
6224
|
-
path:
|
|
6224
|
+
path: path12,
|
|
6225
6225
|
left: L.status,
|
|
6226
6226
|
right: R.status
|
|
6227
6227
|
});
|
|
@@ -6233,7 +6233,7 @@ function compareLeafSteps(L, R, segments, opts, out) {
|
|
|
6233
6233
|
kind: "error",
|
|
6234
6234
|
severity: "error",
|
|
6235
6235
|
message: "Step error message differs",
|
|
6236
|
-
path:
|
|
6236
|
+
path: path12,
|
|
6237
6237
|
left: le || void 0,
|
|
6238
6238
|
right: re || void 0
|
|
6239
6239
|
});
|
|
@@ -6251,7 +6251,7 @@ function compareLeafSteps(L, R, segments, opts, out) {
|
|
|
6251
6251
|
kind: "duration",
|
|
6252
6252
|
severity: "info",
|
|
6253
6253
|
message: "Step duration differs",
|
|
6254
|
-
path:
|
|
6254
|
+
path: path12,
|
|
6255
6255
|
left: ld,
|
|
6256
6256
|
right: rd
|
|
6257
6257
|
});
|
|
@@ -6264,7 +6264,7 @@ function compareLeafSteps(L, R, segments, opts, out) {
|
|
|
6264
6264
|
kind: "metadata",
|
|
6265
6265
|
severity: "info",
|
|
6266
6266
|
message: "Step metadata differs",
|
|
6267
|
-
path:
|
|
6267
|
+
path: path12,
|
|
6268
6268
|
left: L.metadata,
|
|
6269
6269
|
right: R.metadata
|
|
6270
6270
|
});
|
|
@@ -6276,7 +6276,7 @@ function compareLeafSteps(L, R, segments, opts, out) {
|
|
|
6276
6276
|
kind: "output",
|
|
6277
6277
|
severity: "info",
|
|
6278
6278
|
message: "Output preview differs",
|
|
6279
|
-
path:
|
|
6279
|
+
path: path12,
|
|
6280
6280
|
left: L.outputPreview,
|
|
6281
6281
|
right: R.outputPreview
|
|
6282
6282
|
});
|
|
@@ -6926,11 +6926,11 @@ createChalk({ level: stderrColor ? stderrColor.level : 0 });
|
|
|
6926
6926
|
var source_default = chalk;
|
|
6927
6927
|
|
|
6928
6928
|
// packages/core/src/diff/renderer.ts
|
|
6929
|
-
function formatPath(
|
|
6930
|
-
if (
|
|
6929
|
+
function formatPath(path12) {
|
|
6930
|
+
if (path12 === void 0 || path12.path.length === 0) {
|
|
6931
6931
|
return "(run)";
|
|
6932
6932
|
}
|
|
6933
|
-
return
|
|
6933
|
+
return path12.path.map((s) => s.name).join(" > ");
|
|
6934
6934
|
}
|
|
6935
6935
|
function formatValue(v, verbose) {
|
|
6936
6936
|
if (v === void 0) return "(undefined)";
|
|
@@ -7211,8 +7211,8 @@ async function stepLlm(model, fn) {
|
|
|
7211
7211
|
metadata: { model: modelName }
|
|
7212
7212
|
});
|
|
7213
7213
|
}
|
|
7214
|
-
async function stepTool(
|
|
7215
|
-
const normalized = typeof
|
|
7214
|
+
async function stepTool(toolName2, fn) {
|
|
7215
|
+
const normalized = typeof toolName2 === "string" && toolName2.trim() !== "" ? toolName2.trim() : "unknown-tool";
|
|
7216
7216
|
return stepImpl(`tool:${normalized}`, fn, {
|
|
7217
7217
|
type: "tool",
|
|
7218
7218
|
metadata: { toolName: normalized }
|
|
@@ -8523,9 +8523,9 @@ Trace directory: ${traceDir}`);
|
|
|
8523
8523
|
if (validation !== void 0 && !validation.ok) {
|
|
8524
8524
|
process.exitCode = 1;
|
|
8525
8525
|
}
|
|
8526
|
-
const outPath = options.output !== void 0 && options.output.trim() !== "" ?
|
|
8526
|
+
const outPath = options.output !== void 0 && options.output.trim() !== "" ? path10.resolve(options.output.trim()) : void 0;
|
|
8527
8527
|
if (outPath !== void 0) {
|
|
8528
|
-
await mkdir(
|
|
8528
|
+
await mkdir(path10.dirname(outPath), { recursive: true });
|
|
8529
8529
|
await writeFile(outPath, result.content, "utf-8");
|
|
8530
8530
|
const vlabel = validation !== void 0 ? validation.ok ? "ok" : "failed" : "skipped";
|
|
8531
8531
|
console.log(`Wrote ${result.fileExtension} export to ${outPath} (validation: ${vlabel})`);
|
|
@@ -8917,9 +8917,9 @@ async function reportCommand(runId, options = {}) {
|
|
|
8917
8917
|
redactionProfile,
|
|
8918
8918
|
correlation: !options.noCorrelation
|
|
8919
8919
|
});
|
|
8920
|
-
const outPath = options.output !== void 0 && options.output.trim() !== "" ?
|
|
8920
|
+
const outPath = options.output !== void 0 && options.output.trim() !== "" ? path10.resolve(options.output.trim()) : void 0;
|
|
8921
8921
|
if (outPath !== void 0) {
|
|
8922
|
-
await mkdir(
|
|
8922
|
+
await mkdir(path10.dirname(outPath), { recursive: true });
|
|
8923
8923
|
await writeFile(outPath, result.content, "utf-8");
|
|
8924
8924
|
console.log(`Wrote ${result.fileExtension} report to ${outPath}`);
|
|
8925
8925
|
}
|
|
@@ -9083,6 +9083,1982 @@ async function openCommand(input3, options = {}, stdin = process.stdin) {
|
|
|
9083
9083
|
}
|
|
9084
9084
|
}
|
|
9085
9085
|
|
|
9086
|
+
// packages/core/src/checks/index.ts
|
|
9087
|
+
var SEVERITY_RANK = {
|
|
9088
|
+
error: 0,
|
|
9089
|
+
warning: 1,
|
|
9090
|
+
info: 2
|
|
9091
|
+
};
|
|
9092
|
+
var STATUS_RANK = {
|
|
9093
|
+
fail: 0,
|
|
9094
|
+
warning: 1,
|
|
9095
|
+
pass: 2
|
|
9096
|
+
};
|
|
9097
|
+
var CONFIDENCE_RANK = {
|
|
9098
|
+
unknown: 0,
|
|
9099
|
+
heuristic: 1,
|
|
9100
|
+
correlated: 2,
|
|
9101
|
+
explicit: 3
|
|
9102
|
+
};
|
|
9103
|
+
var DEFAULT_SENSITIVE_KEYS = [
|
|
9104
|
+
"authorization",
|
|
9105
|
+
"cookie",
|
|
9106
|
+
"token",
|
|
9107
|
+
"apikey",
|
|
9108
|
+
"api_key",
|
|
9109
|
+
"password",
|
|
9110
|
+
"secret",
|
|
9111
|
+
"email"
|
|
9112
|
+
];
|
|
9113
|
+
var DEFAULT_RAW_CONTENT_KEYS = [
|
|
9114
|
+
"body",
|
|
9115
|
+
"headers",
|
|
9116
|
+
"input",
|
|
9117
|
+
"messages",
|
|
9118
|
+
"output",
|
|
9119
|
+
"payload",
|
|
9120
|
+
"prompt",
|
|
9121
|
+
"requestbody",
|
|
9122
|
+
"request_body",
|
|
9123
|
+
"responsebody",
|
|
9124
|
+
"response_body",
|
|
9125
|
+
"rawprompt",
|
|
9126
|
+
"raw_prompt",
|
|
9127
|
+
"rawoutput",
|
|
9128
|
+
"raw_output",
|
|
9129
|
+
"toolinput",
|
|
9130
|
+
"tool_input",
|
|
9131
|
+
"tooloutput",
|
|
9132
|
+
"tool_output"
|
|
9133
|
+
];
|
|
9134
|
+
var DEFAULT_SECRET_PATTERNS = [
|
|
9135
|
+
{ id: "bearer-token", pattern: /Bearer\s+[A-Za-z0-9._~+/-]{12,}=*/ },
|
|
9136
|
+
{ id: "openai-key", pattern: /sk-[A-Za-z0-9_-]{16,}/ },
|
|
9137
|
+
{ id: "aws-access-key", pattern: /AKIA[0-9A-Z]{16}/ },
|
|
9138
|
+
{ id: "github-token", pattern: /gh[opsu]_[A-Za-z0-9_]{20,}/ },
|
|
9139
|
+
{ id: "key-value-secret", pattern: /(api[_-]?key|token|password|secret)=\S{8,}/i }
|
|
9140
|
+
];
|
|
9141
|
+
function compareStrings(a, b) {
|
|
9142
|
+
return (a ?? "").localeCompare(b ?? "");
|
|
9143
|
+
}
|
|
9144
|
+
function diagnostic(code, message, ruleId) {
|
|
9145
|
+
return {
|
|
9146
|
+
code,
|
|
9147
|
+
message,
|
|
9148
|
+
severity: "error",
|
|
9149
|
+
...ruleId ? { ruleId } : {}
|
|
9150
|
+
};
|
|
9151
|
+
}
|
|
9152
|
+
function emptySummary() {
|
|
9153
|
+
return {
|
|
9154
|
+
passed: 0,
|
|
9155
|
+
failed: 0,
|
|
9156
|
+
warnings: 0,
|
|
9157
|
+
errors: 0
|
|
9158
|
+
};
|
|
9159
|
+
}
|
|
9160
|
+
function errorResult(input3, diagnostics, selectedRun) {
|
|
9161
|
+
return {
|
|
9162
|
+
ok: false,
|
|
9163
|
+
status: "error",
|
|
9164
|
+
format: input3.read.format,
|
|
9165
|
+
...selectedRun ? { runId: selectedRun.runId } : {},
|
|
9166
|
+
summary: {
|
|
9167
|
+
...emptySummary(),
|
|
9168
|
+
errors: diagnostics.filter((item) => item.severity === "error").length
|
|
9169
|
+
},
|
|
9170
|
+
findings: [],
|
|
9171
|
+
diagnostics: [...diagnostics]
|
|
9172
|
+
};
|
|
9173
|
+
}
|
|
9174
|
+
function flattenNodes(nodes) {
|
|
9175
|
+
return nodes.flatMap((node) => [node, ...flattenNodes(node.children)]);
|
|
9176
|
+
}
|
|
9177
|
+
function buildFacts(input3, selectedRun) {
|
|
9178
|
+
const scopedRuns = selectedRun ? [selectedRun] : input3.read.runs;
|
|
9179
|
+
const scopedRunIds = new Set(scopedRuns.map((run) => run.runId));
|
|
9180
|
+
const scopedEvents = selectedRun === void 0 ? input3.read.events : input3.read.events.filter((event) => scopedRunIds.has(event.runId));
|
|
9181
|
+
const nodes = flattenNodes(scopedRuns.flatMap((run) => run.children));
|
|
9182
|
+
const nodesByEventId = /* @__PURE__ */ new Map();
|
|
9183
|
+
const childrenByParentId = /* @__PURE__ */ new Map();
|
|
9184
|
+
for (const node of nodes) {
|
|
9185
|
+
nodesByEventId.set(node.event.eventId, node);
|
|
9186
|
+
const parentId = node.event.parentId;
|
|
9187
|
+
if (parentId) {
|
|
9188
|
+
const children = childrenByParentId.get(parentId) ?? [];
|
|
9189
|
+
children.push(node);
|
|
9190
|
+
childrenByParentId.set(parentId, children);
|
|
9191
|
+
}
|
|
9192
|
+
}
|
|
9193
|
+
return {
|
|
9194
|
+
format: input3.read.format,
|
|
9195
|
+
runs: Object.freeze([...input3.read.runs]),
|
|
9196
|
+
events: Object.freeze([...scopedEvents]),
|
|
9197
|
+
readerWarnings: Object.freeze([...input3.read.warnings]),
|
|
9198
|
+
unsupportedFields: Object.freeze([...input3.read.unsupportedFields]),
|
|
9199
|
+
sourceFiles: Object.freeze([...input3.read.sourceFiles]),
|
|
9200
|
+
nodesByEventId,
|
|
9201
|
+
childrenByParentId,
|
|
9202
|
+
rootNodes: Object.freeze(scopedRuns.flatMap((run) => run.children))
|
|
9203
|
+
};
|
|
9204
|
+
}
|
|
9205
|
+
function resolveSelectedRun(input3, runId) {
|
|
9206
|
+
if (input3.selectedRun) {
|
|
9207
|
+
if (runId && input3.selectedRun.runId !== runId) {
|
|
9208
|
+
return {
|
|
9209
|
+
diagnostics: [
|
|
9210
|
+
diagnostic(
|
|
9211
|
+
"AI_CHECK_INVALID_ARGUMENTS",
|
|
9212
|
+
`Selected run ${input3.selectedRun.runId} does not match requested run ${runId}.`
|
|
9213
|
+
)
|
|
9214
|
+
]
|
|
9215
|
+
};
|
|
9216
|
+
}
|
|
9217
|
+
return { run: input3.selectedRun, diagnostics: [] };
|
|
9218
|
+
}
|
|
9219
|
+
if (runId) {
|
|
9220
|
+
const run = input3.read.runs.find((candidate) => candidate.runId === runId);
|
|
9221
|
+
if (!run) {
|
|
9222
|
+
return {
|
|
9223
|
+
diagnostics: [
|
|
9224
|
+
diagnostic("AI_CHECK_RUN_SELECTION_REQUIRED", `Run not found: ${runId}.`)
|
|
9225
|
+
]
|
|
9226
|
+
};
|
|
9227
|
+
}
|
|
9228
|
+
return { run, diagnostics: [] };
|
|
9229
|
+
}
|
|
9230
|
+
if (input3.read.runs.length === 1) {
|
|
9231
|
+
return { run: input3.read.runs[0], diagnostics: [] };
|
|
9232
|
+
}
|
|
9233
|
+
if (input3.read.runs.length === 0) {
|
|
9234
|
+
return {
|
|
9235
|
+
diagnostics: [
|
|
9236
|
+
diagnostic("AI_CHECK_RUN_SELECTION_REQUIRED", "No runs are available for checks.")
|
|
9237
|
+
]
|
|
9238
|
+
};
|
|
9239
|
+
}
|
|
9240
|
+
return {
|
|
9241
|
+
diagnostics: [
|
|
9242
|
+
diagnostic(
|
|
9243
|
+
"AI_CHECK_RUN_SELECTION_REQUIRED",
|
|
9244
|
+
"Multiple runs are available; select a run before executing checks."
|
|
9245
|
+
)
|
|
9246
|
+
]
|
|
9247
|
+
};
|
|
9248
|
+
}
|
|
9249
|
+
function selectRules(rules, selectedIds) {
|
|
9250
|
+
const diagnostics = [];
|
|
9251
|
+
const byId = /* @__PURE__ */ new Map();
|
|
9252
|
+
for (const rule of rules) {
|
|
9253
|
+
if (byId.has(rule.id)) {
|
|
9254
|
+
diagnostics.push(
|
|
9255
|
+
diagnostic("AI_CHECK_INVALID_CONFIG", `Duplicate trace check rule id: ${rule.id}.`, rule.id)
|
|
9256
|
+
);
|
|
9257
|
+
continue;
|
|
9258
|
+
}
|
|
9259
|
+
byId.set(rule.id, rule);
|
|
9260
|
+
}
|
|
9261
|
+
if (selectedIds && selectedIds.length > 0) {
|
|
9262
|
+
const selected = new Set(selectedIds);
|
|
9263
|
+
for (const id of selected) {
|
|
9264
|
+
if (!byId.has(id)) {
|
|
9265
|
+
diagnostics.push(
|
|
9266
|
+
diagnostic("AI_CHECK_INVALID_CONFIG", `Unknown trace check rule id: ${id}.`, id)
|
|
9267
|
+
);
|
|
9268
|
+
}
|
|
9269
|
+
}
|
|
9270
|
+
return {
|
|
9271
|
+
rules: [...byId.values()].filter((rule) => selected.has(rule.id)).sort(compareRules),
|
|
9272
|
+
diagnostics
|
|
9273
|
+
};
|
|
9274
|
+
}
|
|
9275
|
+
return { rules: [...byId.values()].sort(compareRules), diagnostics };
|
|
9276
|
+
}
|
|
9277
|
+
function compareRules(a, b) {
|
|
9278
|
+
return a.id.localeCompare(b.id);
|
|
9279
|
+
}
|
|
9280
|
+
function eventTimestamp(finding, eventById) {
|
|
9281
|
+
const eventId = finding.evidence[0]?.eventId;
|
|
9282
|
+
return eventId ? eventById.get(eventId)?.timestamp ?? "" : "";
|
|
9283
|
+
}
|
|
9284
|
+
function compareFindings(eventById) {
|
|
9285
|
+
return (a, b) => {
|
|
9286
|
+
if (SEVERITY_RANK[a.severity] !== SEVERITY_RANK[b.severity]) {
|
|
9287
|
+
return SEVERITY_RANK[a.severity] - SEVERITY_RANK[b.severity];
|
|
9288
|
+
}
|
|
9289
|
+
const byRule = a.ruleId.localeCompare(b.ruleId);
|
|
9290
|
+
if (byRule !== 0) return byRule;
|
|
9291
|
+
if (STATUS_RANK[a.status] !== STATUS_RANK[b.status]) {
|
|
9292
|
+
return STATUS_RANK[a.status] - STATUS_RANK[b.status];
|
|
9293
|
+
}
|
|
9294
|
+
const byRun = compareStrings(a.evidence[0]?.runId, b.evidence[0]?.runId);
|
|
9295
|
+
if (byRun !== 0) return byRun;
|
|
9296
|
+
const byTime = eventTimestamp(a, eventById).localeCompare(eventTimestamp(b, eventById));
|
|
9297
|
+
if (byTime !== 0) return byTime;
|
|
9298
|
+
const byEvent = compareStrings(a.evidence[0]?.eventId, b.evidence[0]?.eventId);
|
|
9299
|
+
if (byEvent !== 0) return byEvent;
|
|
9300
|
+
return compareStrings(a.evidence[0]?.path, b.evidence[0]?.path);
|
|
9301
|
+
};
|
|
9302
|
+
}
|
|
9303
|
+
function normalizeFinding(rule, finding) {
|
|
9304
|
+
return {
|
|
9305
|
+
ruleId: finding.ruleId || rule.id,
|
|
9306
|
+
severity: finding.severity ?? rule.defaultSeverity,
|
|
9307
|
+
status: finding.status,
|
|
9308
|
+
message: finding.message,
|
|
9309
|
+
...finding.expected !== void 0 ? { expected: finding.expected } : {},
|
|
9310
|
+
...finding.actual !== void 0 ? { actual: finding.actual } : {},
|
|
9311
|
+
evidence: [...finding.evidence ?? []]
|
|
9312
|
+
};
|
|
9313
|
+
}
|
|
9314
|
+
function summarize(findings, diagnostics) {
|
|
9315
|
+
return {
|
|
9316
|
+
passed: findings.filter((finding) => finding.status === "pass").length,
|
|
9317
|
+
failed: findings.filter(
|
|
9318
|
+
(finding) => finding.status === "fail" && finding.severity === "error"
|
|
9319
|
+
).length,
|
|
9320
|
+
warnings: findings.filter(
|
|
9321
|
+
(finding) => finding.status === "warning" || finding.severity === "warning"
|
|
9322
|
+
).length,
|
|
9323
|
+
errors: diagnostics.filter((item) => item.severity === "error").length
|
|
9324
|
+
};
|
|
9325
|
+
}
|
|
9326
|
+
function stringAttr2(event, keys) {
|
|
9327
|
+
for (const key of keys) {
|
|
9328
|
+
const value = event.attributes?.[key];
|
|
9329
|
+
if (typeof value === "string" && value.trim() !== "") return value;
|
|
9330
|
+
}
|
|
9331
|
+
return void 0;
|
|
9332
|
+
}
|
|
9333
|
+
function numericAttr(event, keys) {
|
|
9334
|
+
for (const key of keys) {
|
|
9335
|
+
const value = event.attributes?.[key];
|
|
9336
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
9337
|
+
}
|
|
9338
|
+
return void 0;
|
|
9339
|
+
}
|
|
9340
|
+
function booleanAttr(event, keys) {
|
|
9341
|
+
for (const key of keys) {
|
|
9342
|
+
const value = event.attributes?.[key];
|
|
9343
|
+
if (typeof value === "boolean") return value;
|
|
9344
|
+
}
|
|
9345
|
+
return void 0;
|
|
9346
|
+
}
|
|
9347
|
+
function stripPrefix(name, prefixes) {
|
|
9348
|
+
for (const prefix of prefixes) {
|
|
9349
|
+
if (name.startsWith(prefix)) return name.slice(prefix.length);
|
|
9350
|
+
}
|
|
9351
|
+
return name;
|
|
9352
|
+
}
|
|
9353
|
+
function eventEvidence(event, path12) {
|
|
9354
|
+
return {
|
|
9355
|
+
runId: event.runId,
|
|
9356
|
+
eventId: event.eventId,
|
|
9357
|
+
parentId: event.parentId,
|
|
9358
|
+
traceId: event.trace?.traceId,
|
|
9359
|
+
spanId: event.trace?.spanId,
|
|
9360
|
+
kind: event.kind,
|
|
9361
|
+
name: event.name,
|
|
9362
|
+
status: event.status,
|
|
9363
|
+
...path12 ? { path: path12 } : {}
|
|
9364
|
+
};
|
|
9365
|
+
}
|
|
9366
|
+
function runEvidence(run) {
|
|
9367
|
+
return run ? [{ runId: run.runId, name: run.name, status: run.status }] : [];
|
|
9368
|
+
}
|
|
9369
|
+
function failFinding(ruleId, message, evidence, expected, actual) {
|
|
9370
|
+
return {
|
|
9371
|
+
ruleId,
|
|
9372
|
+
severity: "error",
|
|
9373
|
+
status: "fail",
|
|
9374
|
+
message,
|
|
9375
|
+
...expected !== void 0 ? { expected } : {},
|
|
9376
|
+
...actual !== void 0 ? { actual } : {},
|
|
9377
|
+
evidence: [...evidence]
|
|
9378
|
+
};
|
|
9379
|
+
}
|
|
9380
|
+
function toolName(event) {
|
|
9381
|
+
return stringAttr2(event, ["toolName", "tool"]) ?? stripPrefix(event.name, ["tool:", "function:", "mcp-tools:"]);
|
|
9382
|
+
}
|
|
9383
|
+
function llmModel(event) {
|
|
9384
|
+
return stringAttr2(event, ["model", "modelId", "responseModelId", "modelName", "model_name"]) ?? stripPrefix(event.name, ["llm:", "generation:", "transcription:", "speech:"]);
|
|
9385
|
+
}
|
|
9386
|
+
function llmProvider(event) {
|
|
9387
|
+
return stringAttr2(event, ["provider", "providerName", "provider_name"]);
|
|
9388
|
+
}
|
|
9389
|
+
function llmFinishReason(event) {
|
|
9390
|
+
return stringAttr2(event, ["finishReason", "rawFinishReason", "finish_reason"]);
|
|
9391
|
+
}
|
|
9392
|
+
function retryCount(event) {
|
|
9393
|
+
return numericAttr(event, ["retryCount", "retryAttempt", "retry_attempt", "attempt"]);
|
|
9394
|
+
}
|
|
9395
|
+
function finishedEvents(context, kind) {
|
|
9396
|
+
return context.events.filter(
|
|
9397
|
+
(event) => (kind === void 0 || event.kind === kind) && event.status !== "running"
|
|
9398
|
+
);
|
|
9399
|
+
}
|
|
9400
|
+
function isRecord14(value) {
|
|
9401
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
9402
|
+
}
|
|
9403
|
+
function eventMap(events) {
|
|
9404
|
+
return new Map(events.map((event) => [event.eventId, event]));
|
|
9405
|
+
}
|
|
9406
|
+
function parseEventTime(value) {
|
|
9407
|
+
if (!value) return void 0;
|
|
9408
|
+
const parsed = Date.parse(value);
|
|
9409
|
+
return Number.isFinite(parsed) ? parsed : void 0;
|
|
9410
|
+
}
|
|
9411
|
+
function eventStartMs(event) {
|
|
9412
|
+
return parseEventTime(event.startedAt) ?? parseEventTime(event.timestamp);
|
|
9413
|
+
}
|
|
9414
|
+
function eventEndMs(event) {
|
|
9415
|
+
const endedAt = parseEventTime(event.endedAt);
|
|
9416
|
+
if (endedAt !== void 0) return endedAt;
|
|
9417
|
+
const startedAt = eventStartMs(event);
|
|
9418
|
+
if (startedAt !== void 0 && event.durationMs !== void 0 && Number.isFinite(event.durationMs)) {
|
|
9419
|
+
return startedAt + event.durationMs;
|
|
9420
|
+
}
|
|
9421
|
+
return void 0;
|
|
9422
|
+
}
|
|
9423
|
+
function normalizedKey(value) {
|
|
9424
|
+
return value.toLowerCase().replace(/[^a-z0-9_]/g, "");
|
|
9425
|
+
}
|
|
9426
|
+
function lastPathSegment(path12) {
|
|
9427
|
+
const parts = path12.split(".");
|
|
9428
|
+
return parts[parts.length - 1] ?? path12;
|
|
9429
|
+
}
|
|
9430
|
+
function valueType(value) {
|
|
9431
|
+
if (Array.isArray(value)) return "array";
|
|
9432
|
+
if (value === null) return "null";
|
|
9433
|
+
return typeof value;
|
|
9434
|
+
}
|
|
9435
|
+
function serializedByteLength(value) {
|
|
9436
|
+
try {
|
|
9437
|
+
return Buffer.byteLength(JSON.stringify(value), "utf-8");
|
|
9438
|
+
} catch {
|
|
9439
|
+
return void 0;
|
|
9440
|
+
}
|
|
9441
|
+
}
|
|
9442
|
+
function pushValueEntries(entries, event, value, path12, key, depth = 0) {
|
|
9443
|
+
entries.push({ event, path: path12, key, value });
|
|
9444
|
+
if (depth >= 8) return;
|
|
9445
|
+
if (Array.isArray(value)) {
|
|
9446
|
+
for (const [index, item] of value.entries()) {
|
|
9447
|
+
pushValueEntries(entries, event, item, `${path12}.${index}`, String(index), depth + 1);
|
|
9448
|
+
}
|
|
9449
|
+
return;
|
|
9450
|
+
}
|
|
9451
|
+
if (!isRecord14(value)) return;
|
|
9452
|
+
for (const nestedKey of Object.keys(value).sort((a, b) => a.localeCompare(b))) {
|
|
9453
|
+
pushValueEntries(
|
|
9454
|
+
entries,
|
|
9455
|
+
event,
|
|
9456
|
+
value[nestedKey],
|
|
9457
|
+
`${path12}.${nestedKey}`,
|
|
9458
|
+
nestedKey,
|
|
9459
|
+
depth + 1
|
|
9460
|
+
);
|
|
9461
|
+
}
|
|
9462
|
+
}
|
|
9463
|
+
function eventValueEntries(event, options = {}) {
|
|
9464
|
+
const entries = [];
|
|
9465
|
+
if (event.attributes !== void 0) {
|
|
9466
|
+
pushValueEntries(entries, event, event.attributes, "attributes", "attributes");
|
|
9467
|
+
}
|
|
9468
|
+
if (options.includeSummaries) {
|
|
9469
|
+
if (event.inputSummary !== void 0) {
|
|
9470
|
+
pushValueEntries(entries, event, event.inputSummary, "inputSummary", "inputSummary");
|
|
9471
|
+
}
|
|
9472
|
+
if (event.outputSummary !== void 0) {
|
|
9473
|
+
pushValueEntries(entries, event, event.outputSummary, "outputSummary", "outputSummary");
|
|
9474
|
+
}
|
|
9475
|
+
}
|
|
9476
|
+
if (options.includeError && event.error !== void 0) {
|
|
9477
|
+
pushValueEntries(entries, event, event.error, "error", "error");
|
|
9478
|
+
}
|
|
9479
|
+
return entries;
|
|
9480
|
+
}
|
|
9481
|
+
function limitFindings(findings, maxFindings) {
|
|
9482
|
+
if (maxFindings === void 0 || findings.length <= maxFindings) return findings;
|
|
9483
|
+
return findings.slice(0, Math.max(0, maxFindings));
|
|
9484
|
+
}
|
|
9485
|
+
function hasRedactionMarker(value, markers) {
|
|
9486
|
+
return markers.some((marker) => value.includes(marker)) || /^\[HASH:[A-Za-z0-9_-]+\]$/.test(value);
|
|
9487
|
+
}
|
|
9488
|
+
function isSensitiveKey(key, sensitiveKeys) {
|
|
9489
|
+
if (!key) return false;
|
|
9490
|
+
const normalized = normalizedKey(key);
|
|
9491
|
+
return sensitiveKeys.some((sensitive) => normalized.includes(normalizedKey(sensitive)));
|
|
9492
|
+
}
|
|
9493
|
+
function isRawContentKey(key, forbiddenKeys) {
|
|
9494
|
+
if (!key) return false;
|
|
9495
|
+
const normalized = normalizedKey(key);
|
|
9496
|
+
return forbiddenKeys.some((forbidden) => normalized === normalizedKey(forbidden));
|
|
9497
|
+
}
|
|
9498
|
+
function parentMarkedUnresolved(event) {
|
|
9499
|
+
if (booleanAttr(event, [
|
|
9500
|
+
"parentUnresolved",
|
|
9501
|
+
"unresolvedParent",
|
|
9502
|
+
"relationshipUnresolved",
|
|
9503
|
+
"unresolvedRelationship"
|
|
9504
|
+
]) === true) {
|
|
9505
|
+
return true;
|
|
9506
|
+
}
|
|
9507
|
+
const resolution = stringAttr2(event, [
|
|
9508
|
+
"parentResolution",
|
|
9509
|
+
"relationshipResolution",
|
|
9510
|
+
"relationshipStatus"
|
|
9511
|
+
]);
|
|
9512
|
+
return resolution === "unresolved" || resolution === "missing-parent";
|
|
9513
|
+
}
|
|
9514
|
+
function signalName(event, attributeKeys, prefixes) {
|
|
9515
|
+
return stringAttr2(event, attributeKeys) ?? stripPrefix(event.name, prefixes);
|
|
9516
|
+
}
|
|
9517
|
+
function guardrailEvents(context) {
|
|
9518
|
+
return finishedEvents2().filter((event) => {
|
|
9519
|
+
const name = event.name.toLowerCase();
|
|
9520
|
+
if (name.startsWith("guardrail:") || name.includes(".guardrail.")) return true;
|
|
9521
|
+
return stringAttr2(event, ["guardrailName", "guardrail", "guardrailId"]) !== void 0;
|
|
9522
|
+
});
|
|
9523
|
+
function finishedEvents2() {
|
|
9524
|
+
return context.events.filter((event) => event.status !== "running");
|
|
9525
|
+
}
|
|
9526
|
+
}
|
|
9527
|
+
function retryValue(event) {
|
|
9528
|
+
return retryCount(event) ?? 0;
|
|
9529
|
+
}
|
|
9530
|
+
function eventDurationMs(event) {
|
|
9531
|
+
if (event.durationMs !== void 0) return event.durationMs;
|
|
9532
|
+
const start = eventStartMs(event);
|
|
9533
|
+
const end = eventEndMs(event);
|
|
9534
|
+
return start !== void 0 && end !== void 0 && end >= start ? end - start : void 0;
|
|
9535
|
+
}
|
|
9536
|
+
function treeShape(nodes) {
|
|
9537
|
+
const lines = [];
|
|
9538
|
+
const visit = (node, path12) => {
|
|
9539
|
+
lines.push(`${path12}:${node.event.kind}:${node.event.name}:${node.event.status ?? "unknown"}`);
|
|
9540
|
+
node.children.forEach((child, index) => visit(child, `${path12}.${index}`));
|
|
9541
|
+
};
|
|
9542
|
+
nodes.forEach((node, index) => visit(node, String(index)));
|
|
9543
|
+
return lines;
|
|
9544
|
+
}
|
|
9545
|
+
function statusShape(context) {
|
|
9546
|
+
return context.events.map((event) => `${event.kind}:${event.name}:${event.status ?? "unknown"}`).sort((a, b) => a.localeCompare(b));
|
|
9547
|
+
}
|
|
9548
|
+
function toolShape(context) {
|
|
9549
|
+
return finishedEvents(context, "TOOL").map(
|
|
9550
|
+
(event) => [
|
|
9551
|
+
toolName(event),
|
|
9552
|
+
event.status ?? "unknown",
|
|
9553
|
+
retryValue(event),
|
|
9554
|
+
eventDurationMs(event) ?? "unknown"
|
|
9555
|
+
].join(":")
|
|
9556
|
+
);
|
|
9557
|
+
}
|
|
9558
|
+
function llmShape(context) {
|
|
9559
|
+
return finishedEvents(context, "LLM").map(
|
|
9560
|
+
(event) => [
|
|
9561
|
+
llmProvider(event) ?? "unknown",
|
|
9562
|
+
llmModel(event) ?? "unknown",
|
|
9563
|
+
llmFinishReason(event) ?? "unknown",
|
|
9564
|
+
event.tokenUsage?.input ?? 0,
|
|
9565
|
+
event.tokenUsage?.output ?? 0,
|
|
9566
|
+
event.tokenUsage?.total ?? 0,
|
|
9567
|
+
event.tokenUsage?.cached ?? 0
|
|
9568
|
+
].join(":")
|
|
9569
|
+
);
|
|
9570
|
+
}
|
|
9571
|
+
function errorShape(context) {
|
|
9572
|
+
return context.events.filter((event) => event.status === "error" || event.error !== void 0).map(
|
|
9573
|
+
(event) => [
|
|
9574
|
+
event.kind,
|
|
9575
|
+
event.name,
|
|
9576
|
+
event.error?.name ?? "Error",
|
|
9577
|
+
event.error?.code ?? "unknown"
|
|
9578
|
+
].join(":")
|
|
9579
|
+
).sort((a, b) => a.localeCompare(b));
|
|
9580
|
+
}
|
|
9581
|
+
function retrievalShape(context) {
|
|
9582
|
+
return finishedEvents(context, "RETRIEVER").map(
|
|
9583
|
+
(event) => signalName(event, ["retrievalName", "retrieverName", "retriever"], ["retriever:", "retrieval:"])
|
|
9584
|
+
).sort((a, b) => a.localeCompare(b));
|
|
9585
|
+
}
|
|
9586
|
+
function guardrailShape(context) {
|
|
9587
|
+
return guardrailEvents(context).map((event) => signalName(event, ["guardrailName", "guardrail", "guardrailId"], ["guardrail:"])).sort((a, b) => a.localeCompare(b));
|
|
9588
|
+
}
|
|
9589
|
+
function firstEvidenceForKind(context, kind, path12) {
|
|
9590
|
+
const event = context.events.find((candidate) => candidate.kind === kind);
|
|
9591
|
+
return event ? [eventEvidence(event, path12)] : runEvidence(context.selectedRun);
|
|
9592
|
+
}
|
|
9593
|
+
function baselineDiffFinding(message, evidence, expected, actual) {
|
|
9594
|
+
return failFinding("baseline.regression", message, evidence, expected, actual);
|
|
9595
|
+
}
|
|
9596
|
+
function createRunStatusRule(options = {}) {
|
|
9597
|
+
const expected = options.expected ?? "ok";
|
|
9598
|
+
const allowIncomplete = options.allowIncomplete === true;
|
|
9599
|
+
return {
|
|
9600
|
+
id: "run.status",
|
|
9601
|
+
category: "run",
|
|
9602
|
+
defaultSeverity: "error",
|
|
9603
|
+
evaluate(context) {
|
|
9604
|
+
const findings = [];
|
|
9605
|
+
const actual = context.selectedRun?.status ?? "unknown";
|
|
9606
|
+
if (actual !== expected) {
|
|
9607
|
+
findings.push(
|
|
9608
|
+
failFinding(
|
|
9609
|
+
"run.status",
|
|
9610
|
+
`Run status ${actual} did not match expected ${expected}.`,
|
|
9611
|
+
runEvidence(context.selectedRun),
|
|
9612
|
+
expected,
|
|
9613
|
+
actual
|
|
9614
|
+
)
|
|
9615
|
+
);
|
|
9616
|
+
}
|
|
9617
|
+
if (!allowIncomplete) {
|
|
9618
|
+
const running = context.events.filter((event) => event.status === "running");
|
|
9619
|
+
if (running.length > 0) {
|
|
9620
|
+
findings.push(
|
|
9621
|
+
failFinding(
|
|
9622
|
+
"run.status",
|
|
9623
|
+
"Run contains incomplete running events.",
|
|
9624
|
+
running.map((event) => eventEvidence(event)),
|
|
9625
|
+
"no running events",
|
|
9626
|
+
running.length
|
|
9627
|
+
)
|
|
9628
|
+
);
|
|
9629
|
+
}
|
|
9630
|
+
}
|
|
9631
|
+
return findings;
|
|
9632
|
+
}
|
|
9633
|
+
};
|
|
9634
|
+
}
|
|
9635
|
+
function createRunDurationRule(options) {
|
|
9636
|
+
return {
|
|
9637
|
+
id: "run.duration",
|
|
9638
|
+
category: "run",
|
|
9639
|
+
defaultSeverity: "error",
|
|
9640
|
+
evaluate(context) {
|
|
9641
|
+
const actual = context.selectedRun?.durationMs;
|
|
9642
|
+
if (actual === void 0 || actual <= options.maxDurationMs) return [];
|
|
9643
|
+
return [
|
|
9644
|
+
failFinding(
|
|
9645
|
+
"run.duration",
|
|
9646
|
+
`Run duration ${actual}ms exceeded ${options.maxDurationMs}ms.`,
|
|
9647
|
+
runEvidence(context.selectedRun),
|
|
9648
|
+
{ maxDurationMs: options.maxDurationMs },
|
|
9649
|
+
actual
|
|
9650
|
+
)
|
|
9651
|
+
];
|
|
9652
|
+
}
|
|
9653
|
+
};
|
|
9654
|
+
}
|
|
9655
|
+
function createRunDepthRule(options) {
|
|
9656
|
+
return {
|
|
9657
|
+
id: "run.depth",
|
|
9658
|
+
category: "run",
|
|
9659
|
+
defaultSeverity: "error",
|
|
9660
|
+
evaluate(context) {
|
|
9661
|
+
const nodes = [...context.nodesByEventId.values()];
|
|
9662
|
+
const maxDepth = nodes.reduce((max, node) => Math.max(max, node.depth), 0);
|
|
9663
|
+
if (maxDepth <= options.maxDepth) return [];
|
|
9664
|
+
const deepest = nodes.filter((node) => node.depth === maxDepth);
|
|
9665
|
+
return [
|
|
9666
|
+
failFinding(
|
|
9667
|
+
"run.depth",
|
|
9668
|
+
`Run depth ${maxDepth} exceeded ${options.maxDepth}.`,
|
|
9669
|
+
deepest.map((node) => ({
|
|
9670
|
+
runId: node.event.runId,
|
|
9671
|
+
eventId: node.event.eventId,
|
|
9672
|
+
parentId: node.event.parentId,
|
|
9673
|
+
kind: node.event.kind,
|
|
9674
|
+
name: node.event.name,
|
|
9675
|
+
status: node.event.status
|
|
9676
|
+
})),
|
|
9677
|
+
{ maxDepth: options.maxDepth },
|
|
9678
|
+
maxDepth
|
|
9679
|
+
)
|
|
9680
|
+
];
|
|
9681
|
+
}
|
|
9682
|
+
};
|
|
9683
|
+
}
|
|
9684
|
+
function createToolUsageRule(options) {
|
|
9685
|
+
return {
|
|
9686
|
+
id: "tool.usage",
|
|
9687
|
+
category: "tool",
|
|
9688
|
+
defaultSeverity: "error",
|
|
9689
|
+
evaluate(context) {
|
|
9690
|
+
const tools = finishedEvents(context, "TOOL");
|
|
9691
|
+
const names = tools.map(toolName);
|
|
9692
|
+
const nameSet = new Set(names);
|
|
9693
|
+
const findings = [];
|
|
9694
|
+
for (const required of options.required ?? []) {
|
|
9695
|
+
if (!nameSet.has(required)) {
|
|
9696
|
+
findings.push(
|
|
9697
|
+
failFinding("tool.usage", `Required tool ${required} did not appear.`, runEvidence(context.selectedRun), required, names)
|
|
9698
|
+
);
|
|
9699
|
+
}
|
|
9700
|
+
}
|
|
9701
|
+
const forbidden = new Set(options.forbidden ?? []);
|
|
9702
|
+
const allowed = options.allowed ? new Set(options.allowed) : void 0;
|
|
9703
|
+
for (const event of tools) {
|
|
9704
|
+
const name = toolName(event);
|
|
9705
|
+
if (forbidden.has(name)) {
|
|
9706
|
+
findings.push(
|
|
9707
|
+
failFinding("tool.usage", `Forbidden tool ${name} appeared.`, [eventEvidence(event)], "tool absent", name)
|
|
9708
|
+
);
|
|
9709
|
+
}
|
|
9710
|
+
if (allowed && !allowed.has(name)) {
|
|
9711
|
+
findings.push(
|
|
9712
|
+
failFinding("tool.usage", `Tool ${name} is not in the allowed tool set.`, [eventEvidence(event)], [...allowed].sort(), name)
|
|
9713
|
+
);
|
|
9714
|
+
}
|
|
9715
|
+
}
|
|
9716
|
+
if (options.minCount !== void 0 && tools.length < options.minCount) {
|
|
9717
|
+
findings.push(
|
|
9718
|
+
failFinding("tool.usage", `Tool count ${tools.length} was below minimum ${options.minCount}.`, runEvidence(context.selectedRun), { minCount: options.minCount }, tools.length)
|
|
9719
|
+
);
|
|
9720
|
+
}
|
|
9721
|
+
if (options.maxCount !== void 0 && tools.length > options.maxCount) {
|
|
9722
|
+
findings.push(
|
|
9723
|
+
failFinding("tool.usage", `Tool count ${tools.length} exceeded maximum ${options.maxCount}.`, tools.map((event) => eventEvidence(event)), { maxCount: options.maxCount }, tools.length)
|
|
9724
|
+
);
|
|
9725
|
+
}
|
|
9726
|
+
return findings;
|
|
9727
|
+
}
|
|
9728
|
+
};
|
|
9729
|
+
}
|
|
9730
|
+
function createLlmUsageRule(options) {
|
|
9731
|
+
return {
|
|
9732
|
+
id: "llm.usage",
|
|
9733
|
+
category: "llm",
|
|
9734
|
+
defaultSeverity: "error",
|
|
9735
|
+
evaluate(context) {
|
|
9736
|
+
const llms = finishedEvents(context, "LLM");
|
|
9737
|
+
const findings = [];
|
|
9738
|
+
const allowedModels = options.allowedModels ? new Set(options.allowedModels) : void 0;
|
|
9739
|
+
const allowedProviders = options.allowedProviders ? new Set(options.allowedProviders) : void 0;
|
|
9740
|
+
const finishReasons = options.finishReasons ? new Set(options.finishReasons) : void 0;
|
|
9741
|
+
if (options.maxCalls !== void 0 && llms.length > options.maxCalls) {
|
|
9742
|
+
findings.push(
|
|
9743
|
+
failFinding(
|
|
9744
|
+
"llm.usage",
|
|
9745
|
+
`LLM call count ${llms.length} exceeded ${options.maxCalls}.`,
|
|
9746
|
+
llms.map((event) => eventEvidence(event)),
|
|
9747
|
+
{ maxCalls: options.maxCalls },
|
|
9748
|
+
llms.length
|
|
9749
|
+
)
|
|
9750
|
+
);
|
|
9751
|
+
}
|
|
9752
|
+
for (const event of llms) {
|
|
9753
|
+
const model = llmModel(event);
|
|
9754
|
+
const provider = llmProvider(event);
|
|
9755
|
+
const finishReason = llmFinishReason(event);
|
|
9756
|
+
if (allowedModels && (!model || !allowedModels.has(model))) {
|
|
9757
|
+
findings.push(
|
|
9758
|
+
failFinding("llm.usage", `LLM model ${model ?? "unknown"} is not allowed.`, [eventEvidence(event, "attributes.model")], [...allowedModels].sort(), model ?? "unknown")
|
|
9759
|
+
);
|
|
9760
|
+
}
|
|
9761
|
+
if (allowedProviders && (!provider || !allowedProviders.has(provider))) {
|
|
9762
|
+
findings.push(
|
|
9763
|
+
failFinding("llm.usage", `LLM provider ${provider ?? "unknown"} is not allowed.`, [eventEvidence(event, "attributes.provider")], [...allowedProviders].sort(), provider ?? "unknown")
|
|
9764
|
+
);
|
|
9765
|
+
}
|
|
9766
|
+
if (finishReasons && (!finishReason || !finishReasons.has(finishReason))) {
|
|
9767
|
+
findings.push(
|
|
9768
|
+
failFinding("llm.usage", `LLM finish reason ${finishReason ?? "unknown"} is not allowed.`, [eventEvidence(event, "attributes.finishReason")], [...finishReasons].sort(), finishReason ?? "unknown")
|
|
9769
|
+
);
|
|
9770
|
+
}
|
|
9771
|
+
}
|
|
9772
|
+
const tokenTotals = llms.reduce(
|
|
9773
|
+
(totals, event) => ({
|
|
9774
|
+
input: totals.input + (event.tokenUsage?.input ?? 0),
|
|
9775
|
+
output: totals.output + (event.tokenUsage?.output ?? 0),
|
|
9776
|
+
total: totals.total + (event.tokenUsage?.total ?? 0),
|
|
9777
|
+
cached: totals.cached + (event.tokenUsage?.cached ?? 0)
|
|
9778
|
+
}),
|
|
9779
|
+
{ input: 0, output: 0, total: 0, cached: 0 }
|
|
9780
|
+
);
|
|
9781
|
+
const tokenLimits = [
|
|
9782
|
+
["input", options.maxInputTokens],
|
|
9783
|
+
["output", options.maxOutputTokens],
|
|
9784
|
+
["total", options.maxTotalTokens],
|
|
9785
|
+
["cached", options.maxCachedTokens]
|
|
9786
|
+
];
|
|
9787
|
+
for (const [key, limit] of tokenLimits) {
|
|
9788
|
+
if (limit !== void 0 && tokenTotals[key] > limit) {
|
|
9789
|
+
findings.push(
|
|
9790
|
+
failFinding(
|
|
9791
|
+
"llm.usage",
|
|
9792
|
+
`LLM ${key} token count ${tokenTotals[key]} exceeded ${limit}.`,
|
|
9793
|
+
llms.map((event) => eventEvidence(event, `tokenUsage.${key}`)),
|
|
9794
|
+
{ [`max${key[0].toUpperCase()}${key.slice(1)}Tokens`]: limit },
|
|
9795
|
+
tokenTotals[key]
|
|
9796
|
+
)
|
|
9797
|
+
);
|
|
9798
|
+
}
|
|
9799
|
+
}
|
|
9800
|
+
return findings;
|
|
9801
|
+
}
|
|
9802
|
+
};
|
|
9803
|
+
}
|
|
9804
|
+
function createStructureOrphanRule(options = {}) {
|
|
9805
|
+
const allowMarkedUnresolved = options.allowMarkedUnresolved ?? true;
|
|
9806
|
+
return {
|
|
9807
|
+
id: "structure.orphan",
|
|
9808
|
+
category: "structure",
|
|
9809
|
+
defaultSeverity: "error",
|
|
9810
|
+
evaluate(context) {
|
|
9811
|
+
const byId = eventMap(context.events);
|
|
9812
|
+
const orphans = context.events.filter((event) => {
|
|
9813
|
+
if (!event.parentId || byId.has(event.parentId)) return false;
|
|
9814
|
+
return !(allowMarkedUnresolved && parentMarkedUnresolved(event));
|
|
9815
|
+
});
|
|
9816
|
+
if (orphans.length === 0) return [];
|
|
9817
|
+
return [
|
|
9818
|
+
failFinding(
|
|
9819
|
+
"structure.orphan",
|
|
9820
|
+
"Trace contains events whose parentId is not present in the selected run.",
|
|
9821
|
+
orphans.map((event) => eventEvidence(event, "parentId")),
|
|
9822
|
+
"parentId resolves to an event in the selected run",
|
|
9823
|
+
orphans.length
|
|
9824
|
+
)
|
|
9825
|
+
];
|
|
9826
|
+
}
|
|
9827
|
+
};
|
|
9828
|
+
}
|
|
9829
|
+
function createStructureCycleRule() {
|
|
9830
|
+
return {
|
|
9831
|
+
id: "structure.cycle",
|
|
9832
|
+
category: "structure",
|
|
9833
|
+
defaultSeverity: "error",
|
|
9834
|
+
evaluate(context) {
|
|
9835
|
+
const byId = eventMap(context.events);
|
|
9836
|
+
const seenCycles = /* @__PURE__ */ new Set();
|
|
9837
|
+
const findings = [];
|
|
9838
|
+
for (const event of [...context.events].sort((a, b) => a.eventId.localeCompare(b.eventId))) {
|
|
9839
|
+
const path12 = [];
|
|
9840
|
+
const seenAt = /* @__PURE__ */ new Map();
|
|
9841
|
+
let current = event;
|
|
9842
|
+
while (current) {
|
|
9843
|
+
const existing = seenAt.get(current.eventId);
|
|
9844
|
+
if (existing !== void 0) {
|
|
9845
|
+
const cycle = path12.slice(existing);
|
|
9846
|
+
const key = cycle.map((item) => item.eventId).sort().join("\0");
|
|
9847
|
+
if (!seenCycles.has(key)) {
|
|
9848
|
+
seenCycles.add(key);
|
|
9849
|
+
findings.push(
|
|
9850
|
+
failFinding(
|
|
9851
|
+
"structure.cycle",
|
|
9852
|
+
"Trace contains a parentId cycle.",
|
|
9853
|
+
cycle.map((item) => eventEvidence(item, "parentId")),
|
|
9854
|
+
"acyclic parentId graph",
|
|
9855
|
+
cycle.map((item) => item.eventId).sort()
|
|
9856
|
+
)
|
|
9857
|
+
);
|
|
9858
|
+
}
|
|
9859
|
+
break;
|
|
9860
|
+
}
|
|
9861
|
+
seenAt.set(current.eventId, path12.length);
|
|
9862
|
+
path12.push(current);
|
|
9863
|
+
current = current.parentId ? byId.get(current.parentId) : void 0;
|
|
9864
|
+
}
|
|
9865
|
+
}
|
|
9866
|
+
return findings;
|
|
9867
|
+
}
|
|
9868
|
+
};
|
|
9869
|
+
}
|
|
9870
|
+
function createStructureRelationshipRule(options = {}) {
|
|
9871
|
+
return {
|
|
9872
|
+
id: "structure.relationship",
|
|
9873
|
+
category: "structure",
|
|
9874
|
+
defaultSeverity: "error",
|
|
9875
|
+
evaluate(context) {
|
|
9876
|
+
const byId = eventMap(context.events);
|
|
9877
|
+
const findings = [];
|
|
9878
|
+
const minConfidence = options.minConfidence;
|
|
9879
|
+
for (const event of context.events) {
|
|
9880
|
+
if (minConfidence && CONFIDENCE_RANK[event.confidence] < CONFIDENCE_RANK[minConfidence]) {
|
|
9881
|
+
findings.push(
|
|
9882
|
+
failFinding(
|
|
9883
|
+
"structure.relationship",
|
|
9884
|
+
`Event confidence ${event.confidence} is below ${minConfidence}.`,
|
|
9885
|
+
[eventEvidence(event, "confidence")],
|
|
9886
|
+
{ minConfidence },
|
|
9887
|
+
event.confidence
|
|
9888
|
+
)
|
|
9889
|
+
);
|
|
9890
|
+
}
|
|
9891
|
+
if (!event.parentId) continue;
|
|
9892
|
+
if (event.parentId === event.eventId) {
|
|
9893
|
+
findings.push(
|
|
9894
|
+
failFinding(
|
|
9895
|
+
"structure.relationship",
|
|
9896
|
+
"Event parentId points to itself.",
|
|
9897
|
+
[eventEvidence(event, "parentId")],
|
|
9898
|
+
"parentId references a distinct event",
|
|
9899
|
+
"self"
|
|
9900
|
+
)
|
|
9901
|
+
);
|
|
9902
|
+
continue;
|
|
9903
|
+
}
|
|
9904
|
+
const parent = byId.get(event.parentId);
|
|
9905
|
+
if (!parent) continue;
|
|
9906
|
+
if (options.requireParentBeforeChild) {
|
|
9907
|
+
const parentTime = eventStartMs(parent);
|
|
9908
|
+
const childTime = eventStartMs(event);
|
|
9909
|
+
if (parentTime !== void 0 && childTime !== void 0 && parentTime > childTime) {
|
|
9910
|
+
findings.push(
|
|
9911
|
+
failFinding(
|
|
9912
|
+
"structure.relationship",
|
|
9913
|
+
"Parent event starts after child event.",
|
|
9914
|
+
[eventEvidence(parent), eventEvidence(event, "parentId")],
|
|
9915
|
+
"parent start <= child start",
|
|
9916
|
+
{ parentEventId: parent.eventId, childEventId: event.eventId }
|
|
9917
|
+
)
|
|
9918
|
+
);
|
|
9919
|
+
}
|
|
9920
|
+
}
|
|
9921
|
+
if (options.requireTraceParentSpan && parent.trace?.spanId && event.trace) {
|
|
9922
|
+
const actual = event.trace.parentSpanId;
|
|
9923
|
+
if (actual !== parent.trace.spanId) {
|
|
9924
|
+
findings.push(
|
|
9925
|
+
failFinding(
|
|
9926
|
+
"structure.relationship",
|
|
9927
|
+
"Trace parentSpanId does not match parent spanId.",
|
|
9928
|
+
[eventEvidence(event, "trace.parentSpanId")],
|
|
9929
|
+
{ parentSpanId: parent.trace.spanId },
|
|
9930
|
+
actual ?? "missing"
|
|
9931
|
+
)
|
|
9932
|
+
);
|
|
9933
|
+
}
|
|
9934
|
+
}
|
|
9935
|
+
}
|
|
9936
|
+
return findings;
|
|
9937
|
+
}
|
|
9938
|
+
};
|
|
9939
|
+
}
|
|
9940
|
+
function createStructureParallelWidthRule(options) {
|
|
9941
|
+
return {
|
|
9942
|
+
id: "structure.parallelWidth",
|
|
9943
|
+
category: "structure",
|
|
9944
|
+
defaultSeverity: "error",
|
|
9945
|
+
evaluate(context) {
|
|
9946
|
+
const findings = [];
|
|
9947
|
+
const byId = eventMap(context.events);
|
|
9948
|
+
if (options.maxChildren !== void 0) {
|
|
9949
|
+
for (const [parentId, children] of context.childrenByParentId.entries()) {
|
|
9950
|
+
if (children.length <= options.maxChildren) continue;
|
|
9951
|
+
const parent = byId.get(parentId);
|
|
9952
|
+
findings.push(
|
|
9953
|
+
failFinding(
|
|
9954
|
+
"structure.parallelWidth",
|
|
9955
|
+
`Parent ${parentId} has ${children.length} children, exceeding ${options.maxChildren}.`,
|
|
9956
|
+
[
|
|
9957
|
+
...parent ? [eventEvidence(parent)] : [{ runId: context.selectedRun?.runId, eventId: parentId }],
|
|
9958
|
+
...children.map((child) => ({
|
|
9959
|
+
runId: child.event.runId,
|
|
9960
|
+
eventId: child.event.eventId,
|
|
9961
|
+
parentId: child.event.parentId,
|
|
9962
|
+
kind: child.event.kind,
|
|
9963
|
+
name: child.event.name,
|
|
9964
|
+
status: child.event.status
|
|
9965
|
+
}))
|
|
9966
|
+
],
|
|
9967
|
+
{ maxChildren: options.maxChildren },
|
|
9968
|
+
children.length
|
|
9969
|
+
)
|
|
9970
|
+
);
|
|
9971
|
+
}
|
|
9972
|
+
}
|
|
9973
|
+
if (options.maxConcurrent !== void 0) {
|
|
9974
|
+
const intervals = context.events.map((event) => ({ event, start: eventStartMs(event), end: eventEndMs(event) })).filter(
|
|
9975
|
+
(item) => item.start !== void 0 && item.end !== void 0 && item.end > item.start
|
|
9976
|
+
);
|
|
9977
|
+
const points = intervals.flatMap((item) => [
|
|
9978
|
+
{ time: item.start, delta: 1, event: item.event },
|
|
9979
|
+
{ time: item.end, delta: -1, event: item.event }
|
|
9980
|
+
]);
|
|
9981
|
+
points.sort((a, b) => {
|
|
9982
|
+
const byTime = a.time - b.time;
|
|
9983
|
+
if (byTime !== 0) return byTime;
|
|
9984
|
+
const byDelta = a.delta - b.delta;
|
|
9985
|
+
if (byDelta !== 0) return byDelta;
|
|
9986
|
+
return a.event.eventId.localeCompare(b.event.eventId);
|
|
9987
|
+
});
|
|
9988
|
+
const active = /* @__PURE__ */ new Map();
|
|
9989
|
+
let maxActive = [];
|
|
9990
|
+
for (const point of points) {
|
|
9991
|
+
if (point.delta > 0) {
|
|
9992
|
+
active.set(point.event.eventId, point.event);
|
|
9993
|
+
if (active.size > maxActive.length) {
|
|
9994
|
+
maxActive = [...active.values()].sort((a, b) => a.eventId.localeCompare(b.eventId));
|
|
9995
|
+
}
|
|
9996
|
+
} else {
|
|
9997
|
+
active.delete(point.event.eventId);
|
|
9998
|
+
}
|
|
9999
|
+
}
|
|
10000
|
+
if (maxActive.length > options.maxConcurrent) {
|
|
10001
|
+
findings.push(
|
|
10002
|
+
failFinding(
|
|
10003
|
+
"structure.parallelWidth",
|
|
10004
|
+
`Concurrent event width ${maxActive.length} exceeded ${options.maxConcurrent}.`,
|
|
10005
|
+
maxActive.map((event) => eventEvidence(event)),
|
|
10006
|
+
{ maxConcurrent: options.maxConcurrent },
|
|
10007
|
+
maxActive.length
|
|
10008
|
+
)
|
|
10009
|
+
);
|
|
10010
|
+
}
|
|
10011
|
+
}
|
|
10012
|
+
return findings;
|
|
10013
|
+
}
|
|
10014
|
+
};
|
|
10015
|
+
}
|
|
10016
|
+
function createSafetyRedactionRule(options = {}) {
|
|
10017
|
+
const sensitiveKeys = options.sensitiveKeys ?? DEFAULT_SENSITIVE_KEYS;
|
|
10018
|
+
const markers = options.redactedMarkers ?? ["[REDACTED]", "[REDACTED:"];
|
|
10019
|
+
return {
|
|
10020
|
+
id: "safety.redaction",
|
|
10021
|
+
category: "safety",
|
|
10022
|
+
defaultSeverity: "error",
|
|
10023
|
+
evaluate(context) {
|
|
10024
|
+
const findings = [];
|
|
10025
|
+
for (const event of context.events) {
|
|
10026
|
+
for (const entry of eventValueEntries(event, { includeSummaries: true, includeError: true })) {
|
|
10027
|
+
if (!isSensitiveKey(entry.key ?? lastPathSegment(entry.path), sensitiveKeys)) continue;
|
|
10028
|
+
if (typeof entry.value === "string" && hasRedactionMarker(entry.value, markers)) continue;
|
|
10029
|
+
findings.push(
|
|
10030
|
+
failFinding(
|
|
10031
|
+
"safety.redaction",
|
|
10032
|
+
`Sensitive-looking field at ${entry.path} is not redacted.`,
|
|
10033
|
+
[eventEvidence(event, entry.path)],
|
|
10034
|
+
"redaction marker",
|
|
10035
|
+
{ path: entry.path, valueType: valueType(entry.value) }
|
|
10036
|
+
)
|
|
10037
|
+
);
|
|
10038
|
+
}
|
|
10039
|
+
}
|
|
10040
|
+
return limitFindings(findings, options.maxFindings);
|
|
10041
|
+
}
|
|
10042
|
+
};
|
|
10043
|
+
}
|
|
10044
|
+
function createSafetyRawContentRule(options = {}) {
|
|
10045
|
+
const forbiddenKeys = options.forbiddenKeys ?? DEFAULT_RAW_CONTENT_KEYS;
|
|
10046
|
+
return {
|
|
10047
|
+
id: "safety.rawPrompt",
|
|
10048
|
+
category: "safety",
|
|
10049
|
+
defaultSeverity: "error",
|
|
10050
|
+
evaluate(context) {
|
|
10051
|
+
const findings = [];
|
|
10052
|
+
for (const event of context.events) {
|
|
10053
|
+
for (const entry of eventValueEntries(event, { includeSummaries: options.includeSummaries })) {
|
|
10054
|
+
const key = entry.key ?? lastPathSegment(entry.path);
|
|
10055
|
+
if (!isRawContentKey(key, forbiddenKeys)) continue;
|
|
10056
|
+
findings.push(
|
|
10057
|
+
failFinding(
|
|
10058
|
+
"safety.rawPrompt",
|
|
10059
|
+
`Raw content-like field ${entry.path} is present.`,
|
|
10060
|
+
[eventEvidence(event, entry.path)],
|
|
10061
|
+
"metadata-only trace fields",
|
|
10062
|
+
{ path: entry.path, valueType: valueType(entry.value) }
|
|
10063
|
+
)
|
|
10064
|
+
);
|
|
10065
|
+
}
|
|
10066
|
+
}
|
|
10067
|
+
return limitFindings(findings, options.maxFindings);
|
|
10068
|
+
}
|
|
10069
|
+
};
|
|
10070
|
+
}
|
|
10071
|
+
function createSafetySecretPatternRule(options = {}) {
|
|
10072
|
+
const patterns = options.patterns ?? DEFAULT_SECRET_PATTERNS;
|
|
10073
|
+
const maxStringLength = options.maxStringLength ?? 4096;
|
|
10074
|
+
return {
|
|
10075
|
+
id: "safety.secretPattern",
|
|
10076
|
+
category: "safety",
|
|
10077
|
+
defaultSeverity: "error",
|
|
10078
|
+
evaluate(context) {
|
|
10079
|
+
const findings = [];
|
|
10080
|
+
for (const event of context.events) {
|
|
10081
|
+
for (const entry of eventValueEntries(event, { includeSummaries: true, includeError: true })) {
|
|
10082
|
+
if (typeof entry.value !== "string") continue;
|
|
10083
|
+
const sample = entry.value.slice(0, maxStringLength);
|
|
10084
|
+
for (const pattern of patterns) {
|
|
10085
|
+
pattern.pattern.lastIndex = 0;
|
|
10086
|
+
if (!pattern.pattern.test(sample)) continue;
|
|
10087
|
+
pattern.pattern.lastIndex = 0;
|
|
10088
|
+
findings.push(
|
|
10089
|
+
failFinding(
|
|
10090
|
+
"safety.secretPattern",
|
|
10091
|
+
`Secret-like pattern ${pattern.id} matched at ${entry.path}.`,
|
|
10092
|
+
[eventEvidence(event, entry.path)],
|
|
10093
|
+
"no secret-like strings",
|
|
10094
|
+
{ pattern: pattern.id, path: entry.path }
|
|
10095
|
+
)
|
|
10096
|
+
);
|
|
10097
|
+
break;
|
|
10098
|
+
}
|
|
10099
|
+
}
|
|
10100
|
+
}
|
|
10101
|
+
return limitFindings(findings, options.maxFindings);
|
|
10102
|
+
}
|
|
10103
|
+
};
|
|
10104
|
+
}
|
|
10105
|
+
function createSafetyOversizedAttributeRule(options) {
|
|
10106
|
+
return {
|
|
10107
|
+
id: "safety.oversizedAttribute",
|
|
10108
|
+
category: "safety",
|
|
10109
|
+
defaultSeverity: "error",
|
|
10110
|
+
evaluate(context) {
|
|
10111
|
+
const findings = [];
|
|
10112
|
+
for (const event of context.events) {
|
|
10113
|
+
for (const entry of eventValueEntries(event, { includeSummaries: true, includeError: true })) {
|
|
10114
|
+
if (typeof entry.value === "string" && options.maxStringLength !== void 0 && entry.value.length > options.maxStringLength) {
|
|
10115
|
+
findings.push(
|
|
10116
|
+
failFinding(
|
|
10117
|
+
"safety.oversizedAttribute",
|
|
10118
|
+
`String at ${entry.path} exceeds ${options.maxStringLength} characters.`,
|
|
10119
|
+
[eventEvidence(event, entry.path)],
|
|
10120
|
+
{ maxStringLength: options.maxStringLength },
|
|
10121
|
+
{ path: entry.path, length: entry.value.length }
|
|
10122
|
+
)
|
|
10123
|
+
);
|
|
10124
|
+
}
|
|
10125
|
+
if (Array.isArray(entry.value) && options.maxArrayLength !== void 0 && entry.value.length > options.maxArrayLength) {
|
|
10126
|
+
findings.push(
|
|
10127
|
+
failFinding(
|
|
10128
|
+
"safety.oversizedAttribute",
|
|
10129
|
+
`Array at ${entry.path} exceeds ${options.maxArrayLength} items.`,
|
|
10130
|
+
[eventEvidence(event, entry.path)],
|
|
10131
|
+
{ maxArrayLength: options.maxArrayLength },
|
|
10132
|
+
{ path: entry.path, length: entry.value.length }
|
|
10133
|
+
)
|
|
10134
|
+
);
|
|
10135
|
+
}
|
|
10136
|
+
if (isRecord14(entry.value) && options.maxObjectKeys !== void 0 && Object.keys(entry.value).length > options.maxObjectKeys) {
|
|
10137
|
+
findings.push(
|
|
10138
|
+
failFinding(
|
|
10139
|
+
"safety.oversizedAttribute",
|
|
10140
|
+
`Object at ${entry.path} exceeds ${options.maxObjectKeys} keys.`,
|
|
10141
|
+
[eventEvidence(event, entry.path)],
|
|
10142
|
+
{ maxObjectKeys: options.maxObjectKeys },
|
|
10143
|
+
{ path: entry.path, keys: Object.keys(entry.value).length }
|
|
10144
|
+
)
|
|
10145
|
+
);
|
|
10146
|
+
}
|
|
10147
|
+
if (options.maxSerializedBytes !== void 0) {
|
|
10148
|
+
const bytes = serializedByteLength(entry.value);
|
|
10149
|
+
if (bytes !== void 0 && bytes > options.maxSerializedBytes) {
|
|
10150
|
+
findings.push(
|
|
10151
|
+
failFinding(
|
|
10152
|
+
"safety.oversizedAttribute",
|
|
10153
|
+
`Value at ${entry.path} exceeds ${options.maxSerializedBytes} serialized bytes.`,
|
|
10154
|
+
[eventEvidence(event, entry.path)],
|
|
10155
|
+
{ maxSerializedBytes: options.maxSerializedBytes },
|
|
10156
|
+
{ path: entry.path, bytes }
|
|
10157
|
+
)
|
|
10158
|
+
);
|
|
10159
|
+
}
|
|
10160
|
+
}
|
|
10161
|
+
}
|
|
10162
|
+
}
|
|
10163
|
+
return limitFindings(findings, options.maxFindings);
|
|
10164
|
+
}
|
|
10165
|
+
};
|
|
10166
|
+
}
|
|
10167
|
+
function createBaselineRegressionRule(options) {
|
|
10168
|
+
return {
|
|
10169
|
+
id: "baseline.regression",
|
|
10170
|
+
category: "baseline",
|
|
10171
|
+
defaultSeverity: "error",
|
|
10172
|
+
evaluate(context) {
|
|
10173
|
+
const baselineSelection = resolveSelectedRun(options.baseline, options.baselineRunId);
|
|
10174
|
+
if (baselineSelection.diagnostics.length > 0 || !baselineSelection.run) {
|
|
10175
|
+
return [
|
|
10176
|
+
failFinding(
|
|
10177
|
+
"baseline.regression",
|
|
10178
|
+
"Baseline run could not be selected.",
|
|
10179
|
+
runEvidence(context.selectedRun),
|
|
10180
|
+
"selectable baseline run",
|
|
10181
|
+
baselineSelection.diagnostics.map((item) => item.code)
|
|
10182
|
+
)
|
|
10183
|
+
];
|
|
10184
|
+
}
|
|
10185
|
+
const baselineFacts = buildFacts(options.baseline, baselineSelection.run);
|
|
10186
|
+
const baselineContext = {
|
|
10187
|
+
...baselineFacts,
|
|
10188
|
+
selectedRun: baselineSelection.run,
|
|
10189
|
+
sourceLabel: options.baseline.sourceLabel
|
|
10190
|
+
};
|
|
10191
|
+
const findings = [];
|
|
10192
|
+
const durationToleranceMs = options.durationToleranceMs ?? 0;
|
|
10193
|
+
if (baselineContext.format !== context.format) {
|
|
10194
|
+
findings.push(
|
|
10195
|
+
baselineDiffFinding(
|
|
10196
|
+
"Trace format differs from baseline.",
|
|
10197
|
+
runEvidence(context.selectedRun),
|
|
10198
|
+
baselineContext.format,
|
|
10199
|
+
context.format
|
|
10200
|
+
)
|
|
10201
|
+
);
|
|
10202
|
+
}
|
|
10203
|
+
const baselineRunStatus = baselineContext.selectedRun?.status ?? "unknown";
|
|
10204
|
+
const candidateRunStatus = context.selectedRun?.status ?? "unknown";
|
|
10205
|
+
if (baselineRunStatus !== candidateRunStatus) {
|
|
10206
|
+
findings.push(
|
|
10207
|
+
baselineDiffFinding(
|
|
10208
|
+
"Run status differs from baseline.",
|
|
10209
|
+
runEvidence(context.selectedRun),
|
|
10210
|
+
baselineRunStatus,
|
|
10211
|
+
candidateRunStatus
|
|
10212
|
+
)
|
|
10213
|
+
);
|
|
10214
|
+
}
|
|
10215
|
+
const baselineDuration = baselineContext.selectedRun?.durationMs;
|
|
10216
|
+
const candidateDuration = context.selectedRun?.durationMs;
|
|
10217
|
+
if (baselineDuration !== void 0 && candidateDuration !== void 0 && Math.abs(candidateDuration - baselineDuration) > durationToleranceMs) {
|
|
10218
|
+
findings.push(
|
|
10219
|
+
baselineDiffFinding(
|
|
10220
|
+
"Run duration differs from baseline beyond tolerance.",
|
|
10221
|
+
runEvidence(context.selectedRun),
|
|
10222
|
+
{ durationMs: baselineDuration, toleranceMs: durationToleranceMs },
|
|
10223
|
+
candidateDuration
|
|
10224
|
+
)
|
|
10225
|
+
);
|
|
10226
|
+
}
|
|
10227
|
+
const comparisons = [
|
|
10228
|
+
{
|
|
10229
|
+
label: "Tree shape",
|
|
10230
|
+
path: "tree",
|
|
10231
|
+
expected: treeShape(baselineContext.rootNodes),
|
|
10232
|
+
actual: treeShape(context.rootNodes),
|
|
10233
|
+
evidence: runEvidence(context.selectedRun)
|
|
10234
|
+
},
|
|
10235
|
+
{
|
|
10236
|
+
label: "Event statuses",
|
|
10237
|
+
path: "status",
|
|
10238
|
+
expected: statusShape(baselineContext),
|
|
10239
|
+
actual: statusShape(context),
|
|
10240
|
+
evidence: runEvidence(context.selectedRun)
|
|
10241
|
+
},
|
|
10242
|
+
{
|
|
10243
|
+
label: "Tool usage",
|
|
10244
|
+
path: "tool",
|
|
10245
|
+
expected: toolShape(baselineContext),
|
|
10246
|
+
actual: toolShape(context),
|
|
10247
|
+
evidence: firstEvidenceForKind(context, "TOOL", "tool")
|
|
10248
|
+
},
|
|
10249
|
+
{
|
|
10250
|
+
label: "LLM usage",
|
|
10251
|
+
path: "llm",
|
|
10252
|
+
expected: llmShape(baselineContext),
|
|
10253
|
+
actual: llmShape(context),
|
|
10254
|
+
evidence: firstEvidenceForKind(context, "LLM", "llm")
|
|
10255
|
+
},
|
|
10256
|
+
{
|
|
10257
|
+
label: "Error profile",
|
|
10258
|
+
path: "error",
|
|
10259
|
+
expected: errorShape(baselineContext),
|
|
10260
|
+
actual: errorShape(context),
|
|
10261
|
+
evidence: firstEvidenceForKind(context, "ERROR", "error")
|
|
10262
|
+
},
|
|
10263
|
+
{
|
|
10264
|
+
label: "Retrieval signals",
|
|
10265
|
+
path: "retrieval",
|
|
10266
|
+
expected: retrievalShape(baselineContext),
|
|
10267
|
+
actual: retrievalShape(context),
|
|
10268
|
+
evidence: firstEvidenceForKind(context, "RETRIEVER", "retrieval")
|
|
10269
|
+
},
|
|
10270
|
+
{
|
|
10271
|
+
label: "Guardrail signals",
|
|
10272
|
+
path: "guardrail",
|
|
10273
|
+
expected: guardrailShape(baselineContext),
|
|
10274
|
+
actual: guardrailShape(context),
|
|
10275
|
+
evidence: guardrailEvents(context)[0] ? [eventEvidence(guardrailEvents(context)[0], "guardrail")] : runEvidence(context.selectedRun)
|
|
10276
|
+
}
|
|
10277
|
+
];
|
|
10278
|
+
for (const comparison of comparisons) {
|
|
10279
|
+
if (JSON.stringify(comparison.expected) === JSON.stringify(comparison.actual)) {
|
|
10280
|
+
continue;
|
|
10281
|
+
}
|
|
10282
|
+
findings.push(
|
|
10283
|
+
baselineDiffFinding(
|
|
10284
|
+
`${comparison.label} differs from baseline.`,
|
|
10285
|
+
comparison.evidence.length > 0 ? comparison.evidence : [{ runId: context.selectedRun?.runId, path: comparison.path }],
|
|
10286
|
+
comparison.expected,
|
|
10287
|
+
comparison.actual
|
|
10288
|
+
)
|
|
10289
|
+
);
|
|
10290
|
+
}
|
|
10291
|
+
return findings;
|
|
10292
|
+
}
|
|
10293
|
+
};
|
|
10294
|
+
}
|
|
10295
|
+
function runTraceChecks(input3, options = {}) {
|
|
10296
|
+
const selected = resolveSelectedRun(input3, options.runId);
|
|
10297
|
+
if (selected.diagnostics.length > 0) {
|
|
10298
|
+
return errorResult(input3, selected.diagnostics, selected.run);
|
|
10299
|
+
}
|
|
10300
|
+
const rules = selectRules(options.rules ?? [], options.select);
|
|
10301
|
+
if (rules.diagnostics.length > 0) {
|
|
10302
|
+
return errorResult(input3, rules.diagnostics, selected.run);
|
|
10303
|
+
}
|
|
10304
|
+
const facts = buildFacts(input3, selected.run);
|
|
10305
|
+
const context = {
|
|
10306
|
+
...facts,
|
|
10307
|
+
...selected.run ? { selectedRun: selected.run } : {},
|
|
10308
|
+
...input3.sourceLabel ? { sourceLabel: input3.sourceLabel } : {}
|
|
10309
|
+
};
|
|
10310
|
+
const diagnostics = [];
|
|
10311
|
+
const findings = [];
|
|
10312
|
+
for (const rule of rules.rules) {
|
|
10313
|
+
try {
|
|
10314
|
+
findings.push(...rule.evaluate(context).map((finding) => normalizeFinding(rule, finding)));
|
|
10315
|
+
} catch (error) {
|
|
10316
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
10317
|
+
diagnostics.push(
|
|
10318
|
+
diagnostic("AI_CHECK_INTERNAL_ERROR", `Rule ${rule.id} failed: ${message}`, rule.id)
|
|
10319
|
+
);
|
|
10320
|
+
}
|
|
10321
|
+
}
|
|
10322
|
+
if (diagnostics.length > 0) {
|
|
10323
|
+
return errorResult(input3, diagnostics, selected.run);
|
|
10324
|
+
}
|
|
10325
|
+
const eventById = new Map(input3.read.events.map((event) => [event.eventId, event]));
|
|
10326
|
+
const sortedFindings = findings.sort(compareFindings(eventById));
|
|
10327
|
+
const summary = summarize(sortedFindings, diagnostics);
|
|
10328
|
+
const status = summary.failed > 0 ? "fail" : "pass";
|
|
10329
|
+
return {
|
|
10330
|
+
ok: status === "pass",
|
|
10331
|
+
status,
|
|
10332
|
+
format: input3.read.format,
|
|
10333
|
+
...selected.run ? { runId: selected.run.runId } : {},
|
|
10334
|
+
summary,
|
|
10335
|
+
findings: sortedFindings,
|
|
10336
|
+
diagnostics
|
|
10337
|
+
};
|
|
10338
|
+
}
|
|
10339
|
+
async function readStdin2(stdin) {
|
|
10340
|
+
stdin.setEncoding("utf8");
|
|
10341
|
+
let content = "";
|
|
10342
|
+
for await (const chunk of stdin) {
|
|
10343
|
+
content += typeof chunk === "string" ? chunk : String(chunk);
|
|
10344
|
+
}
|
|
10345
|
+
return content;
|
|
10346
|
+
}
|
|
10347
|
+
function isMissingFileError2(error) {
|
|
10348
|
+
return error !== null && typeof error === "object" && "code" in error && error.code === "ENOENT";
|
|
10349
|
+
}
|
|
10350
|
+
async function inputFromTarget(target, options, stdin) {
|
|
10351
|
+
if (target === "-") {
|
|
10352
|
+
return { type: "string", content: await readStdin2(stdin) };
|
|
10353
|
+
}
|
|
10354
|
+
try {
|
|
10355
|
+
const stats2 = await stat(target);
|
|
10356
|
+
if (stats2.isDirectory()) return { type: "directory", path: target };
|
|
10357
|
+
return { type: "file", path: target };
|
|
10358
|
+
} catch (error) {
|
|
10359
|
+
if (!isMissingFileError2(error)) throw error;
|
|
10360
|
+
}
|
|
10361
|
+
const runPath = getTraceFilePath(target, resolveTraceDir({ dir: options.dir }));
|
|
10362
|
+
const stats = await stat(runPath);
|
|
10363
|
+
if (stats.isDirectory()) return { type: "directory", path: runPath };
|
|
10364
|
+
return { type: "file", path: runPath };
|
|
10365
|
+
}
|
|
10366
|
+
|
|
10367
|
+
// packages/cli/src/check.ts
|
|
10368
|
+
var DEFAULT_SELECT = ["run.status"];
|
|
10369
|
+
var CONFIG_EXTENSIONS = /* @__PURE__ */ new Set([".json", ".js", ".mjs", ".cjs"]);
|
|
10370
|
+
var TS_CONFIG_EXTENSIONS = /* @__PURE__ */ new Set([".ts", ".mts", ".cts"]);
|
|
10371
|
+
function diagnostic2(code, message, severity = "error") {
|
|
10372
|
+
return { code, message, severity };
|
|
10373
|
+
}
|
|
10374
|
+
function errorResult2(code, message, format = "unknown") {
|
|
10375
|
+
const diagnostics = [diagnostic2(code, message)];
|
|
10376
|
+
return {
|
|
10377
|
+
ok: false,
|
|
10378
|
+
status: "error",
|
|
10379
|
+
format,
|
|
10380
|
+
summary: {
|
|
10381
|
+
passed: 0,
|
|
10382
|
+
failed: 0,
|
|
10383
|
+
warnings: 0,
|
|
10384
|
+
errors: 1
|
|
10385
|
+
},
|
|
10386
|
+
findings: [],
|
|
10387
|
+
diagnostics
|
|
10388
|
+
};
|
|
10389
|
+
}
|
|
10390
|
+
function parseNumber(value, label) {
|
|
10391
|
+
if (value === void 0) return void 0;
|
|
10392
|
+
const parsed = Number(value);
|
|
10393
|
+
if (!Number.isFinite(parsed) || parsed < 0) {
|
|
10394
|
+
throw new Error(`${label} must be a non-negative number.`);
|
|
10395
|
+
}
|
|
10396
|
+
return parsed;
|
|
10397
|
+
}
|
|
10398
|
+
function asStringArray(value) {
|
|
10399
|
+
if (value === void 0) return void 0;
|
|
10400
|
+
if (!Array.isArray(value) || value.some((item) => typeof item !== "string")) {
|
|
10401
|
+
throw new Error("Expected an array of strings.");
|
|
10402
|
+
}
|
|
10403
|
+
return value;
|
|
10404
|
+
}
|
|
10405
|
+
function asConfig(value) {
|
|
10406
|
+
if (value === void 0 || value === null) return {};
|
|
10407
|
+
if (typeof value !== "object" || Array.isArray(value)) {
|
|
10408
|
+
throw new Error("Config must export an object.");
|
|
10409
|
+
}
|
|
10410
|
+
return value;
|
|
10411
|
+
}
|
|
10412
|
+
async function loadConfig(configPath) {
|
|
10413
|
+
if (configPath === void 0) return {};
|
|
10414
|
+
const extension = path10.extname(configPath);
|
|
10415
|
+
if (TS_CONFIG_EXTENSIONS.has(extension)) {
|
|
10416
|
+
throw new Error(
|
|
10417
|
+
"TypeScript check configs require an explicit precompiled JavaScript config or future --config-loader support."
|
|
10418
|
+
);
|
|
10419
|
+
}
|
|
10420
|
+
if (!CONFIG_EXTENSIONS.has(extension)) {
|
|
10421
|
+
throw new Error("Unsupported check config extension. Use .json, .js, .mjs, or .cjs.");
|
|
10422
|
+
}
|
|
10423
|
+
const absolute = path10.resolve(configPath);
|
|
10424
|
+
if (extension === ".json") {
|
|
10425
|
+
const raw = await readFile(absolute, "utf-8");
|
|
10426
|
+
return asConfig(JSON.parse(raw));
|
|
10427
|
+
}
|
|
10428
|
+
const mod = await import(pathToFileURL(absolute).href);
|
|
10429
|
+
return asConfig("default" in mod ? mod.default : mod);
|
|
10430
|
+
}
|
|
10431
|
+
function normalizeConfig(config) {
|
|
10432
|
+
if (config.checks === void 0) return {};
|
|
10433
|
+
if (typeof config.checks !== "object" || Array.isArray(config.checks)) {
|
|
10434
|
+
throw new Error("checks config must be an object.");
|
|
10435
|
+
}
|
|
10436
|
+
return config.checks;
|
|
10437
|
+
}
|
|
10438
|
+
function buildRules(config, options) {
|
|
10439
|
+
const diagnostics = [];
|
|
10440
|
+
const checks = normalizeConfig(config);
|
|
10441
|
+
const run = checks.run ?? {};
|
|
10442
|
+
const tool = checks.tool ?? {};
|
|
10443
|
+
const llm = checks.llm ?? {};
|
|
10444
|
+
const structure = checks.structure ?? {};
|
|
10445
|
+
const safety = checks.safety ?? {};
|
|
10446
|
+
const maxDurationMs = parseNumber(options.maxDurationMs, "--max-duration-ms") ?? run.maxDurationMs;
|
|
10447
|
+
const maxTotalTokens = parseNumber(options.maxTotalTokens, "--max-total-tokens") ?? llm.maxTotalTokens;
|
|
10448
|
+
const rules = [
|
|
10449
|
+
createRunStatusRule(run),
|
|
10450
|
+
createStructureOrphanRule(),
|
|
10451
|
+
createStructureCycleRule(),
|
|
10452
|
+
createSafetyRawContentRule(),
|
|
10453
|
+
createSafetySecretPatternRule()
|
|
10454
|
+
];
|
|
10455
|
+
if (maxDurationMs !== void 0) {
|
|
10456
|
+
rules.push(createRunDurationRule({ maxDurationMs }));
|
|
10457
|
+
}
|
|
10458
|
+
if (run.maxDepth !== void 0) {
|
|
10459
|
+
rules.push(createRunDepthRule({ maxDepth: run.maxDepth }));
|
|
10460
|
+
}
|
|
10461
|
+
const toolOptions = {
|
|
10462
|
+
...tool,
|
|
10463
|
+
required: [...tool.required ?? [], ...options.requiredTool ?? []],
|
|
10464
|
+
forbidden: [...tool.forbidden ?? [], ...options.forbiddenTool ?? []]
|
|
10465
|
+
};
|
|
10466
|
+
if (toolOptions.required?.length || toolOptions.forbidden?.length || toolOptions.allowed?.length || toolOptions.minCount !== void 0 || toolOptions.maxCount !== void 0) {
|
|
10467
|
+
rules.push(createToolUsageRule(toolOptions));
|
|
10468
|
+
}
|
|
10469
|
+
const llmOptions = {
|
|
10470
|
+
...llm,
|
|
10471
|
+
allowedModels: [...llm.allowedModels ?? [], ...options.allowedModel ?? []],
|
|
10472
|
+
...maxTotalTokens !== void 0 ? { maxTotalTokens } : {}
|
|
10473
|
+
};
|
|
10474
|
+
if (llmOptions.allowedModels?.length || llmOptions.allowedProviders?.length || llmOptions.finishReasons?.length || llmOptions.maxCalls !== void 0 || llmOptions.maxInputTokens !== void 0 || llmOptions.maxOutputTokens !== void 0 || llmOptions.maxTotalTokens !== void 0 || llmOptions.maxCachedTokens !== void 0) {
|
|
10475
|
+
rules.push(createLlmUsageRule(llmOptions));
|
|
10476
|
+
}
|
|
10477
|
+
if (structure.minConfidence !== void 0 || structure.requireParentBeforeChild !== void 0 || structure.requireTraceParentSpan !== void 0) {
|
|
10478
|
+
rules.push(createStructureRelationshipRule(structure));
|
|
10479
|
+
}
|
|
10480
|
+
if (structure.maxChildren !== void 0 || structure.maxConcurrent !== void 0) {
|
|
10481
|
+
rules.push(
|
|
10482
|
+
createStructureParallelWidthRule({
|
|
10483
|
+
maxChildren: structure.maxChildren,
|
|
10484
|
+
maxConcurrent: structure.maxConcurrent
|
|
10485
|
+
})
|
|
10486
|
+
);
|
|
10487
|
+
}
|
|
10488
|
+
if (safety.redaction) rules.push(createSafetyRedactionRule());
|
|
10489
|
+
if (safety.maxStringLength !== void 0 || safety.maxArrayLength !== void 0 || safety.maxObjectKeys !== void 0 || safety.maxSerializedBytes !== void 0) {
|
|
10490
|
+
rules.push(createSafetyOversizedAttributeRule(safety));
|
|
10491
|
+
}
|
|
10492
|
+
const select = [
|
|
10493
|
+
...asStringArray(checks.select) ?? [],
|
|
10494
|
+
...options.rule ?? []
|
|
10495
|
+
];
|
|
10496
|
+
return {
|
|
10497
|
+
rules,
|
|
10498
|
+
select: select.length > 0 ? select : DEFAULT_SELECT,
|
|
10499
|
+
diagnostics
|
|
10500
|
+
};
|
|
10501
|
+
}
|
|
10502
|
+
function exitCodeFor(result) {
|
|
10503
|
+
if (result.status === "pass") return 0;
|
|
10504
|
+
if (result.status === "fail") return 1;
|
|
10505
|
+
const codes = result.diagnostics.map((item) => item.code);
|
|
10506
|
+
if (codes.some(
|
|
10507
|
+
(code) => code === "AI_CHECK_UNSUPPORTED_FORMAT" || code === "AI_CHECK_AMBIGUOUS_FORMAT"
|
|
10508
|
+
)) {
|
|
10509
|
+
return 4;
|
|
10510
|
+
}
|
|
10511
|
+
if (codes.some(
|
|
10512
|
+
(code) => code === "AI_CHECK_TRACE_UNREADABLE" || code === "AI_CHECK_BASELINE_UNREADABLE"
|
|
10513
|
+
)) {
|
|
10514
|
+
return 3;
|
|
10515
|
+
}
|
|
10516
|
+
if (codes.some(
|
|
10517
|
+
(code) => code === "AI_CHECK_INVALID_ARGUMENTS" || code === "AI_CHECK_INVALID_CONFIG" || code === "AI_CHECK_CONFIG_LOAD_FAILED" || code === "AI_CHECK_RUN_SELECTION_REQUIRED"
|
|
10518
|
+
)) {
|
|
10519
|
+
return 2;
|
|
10520
|
+
}
|
|
10521
|
+
return 1;
|
|
10522
|
+
}
|
|
10523
|
+
function stable(value) {
|
|
10524
|
+
if (Array.isArray(value)) return value.map(stable);
|
|
10525
|
+
if (value === null || typeof value !== "object") return value;
|
|
10526
|
+
const record = value;
|
|
10527
|
+
return Object.fromEntries(
|
|
10528
|
+
Object.keys(record).sort((a, b) => a.localeCompare(b)).map((key) => [key, stable(record[key])])
|
|
10529
|
+
);
|
|
10530
|
+
}
|
|
10531
|
+
function printJson(result) {
|
|
10532
|
+
console.log(JSON.stringify(stable(result), null, 2));
|
|
10533
|
+
}
|
|
10534
|
+
function printHuman(result) {
|
|
10535
|
+
console.log(`Check status: ${result.status}`);
|
|
10536
|
+
console.log(`Format: ${result.format}`);
|
|
10537
|
+
if (result.runId !== void 0) console.log(`Run: ${result.runId}`);
|
|
10538
|
+
console.log(
|
|
10539
|
+
`Summary: ${result.summary.failed} failed, ${result.summary.warnings} warning(s), ${result.summary.errors} error(s)`
|
|
10540
|
+
);
|
|
10541
|
+
for (const diagnostic3 of result.diagnostics) {
|
|
10542
|
+
console.log(`- ${diagnostic3.code}: ${diagnostic3.message}`);
|
|
10543
|
+
}
|
|
10544
|
+
for (const finding of result.findings) {
|
|
10545
|
+
const path12 = finding.evidence[0]?.path;
|
|
10546
|
+
console.log(`- ${finding.ruleId}: ${finding.message}${path12 ? ` (${path12})` : ""}`);
|
|
10547
|
+
}
|
|
10548
|
+
}
|
|
10549
|
+
function readErrorResult(error) {
|
|
10550
|
+
if (error instanceof TraceReadError) {
|
|
10551
|
+
const code = error.code === "unsupported_format" ? "AI_CHECK_UNSUPPORTED_FORMAT" : error.code === "ambiguous_format" ? "AI_CHECK_AMBIGUOUS_FORMAT" : "AI_CHECK_TRACE_UNREADABLE";
|
|
10552
|
+
return errorResult2(code, error.message);
|
|
10553
|
+
}
|
|
10554
|
+
return errorResult2(
|
|
10555
|
+
"AI_CHECK_TRACE_UNREADABLE",
|
|
10556
|
+
error instanceof Error ? error.message : String(error)
|
|
10557
|
+
);
|
|
10558
|
+
}
|
|
10559
|
+
async function checkCommand(target, options = {}, stdin = process.stdin) {
|
|
10560
|
+
let result;
|
|
10561
|
+
let phase = "config";
|
|
10562
|
+
try {
|
|
10563
|
+
const config = await loadConfig(options.config);
|
|
10564
|
+
const built = buildRules(config, options);
|
|
10565
|
+
if (built.diagnostics.some((item) => item.severity === "error")) {
|
|
10566
|
+
result = errorResult2("AI_CHECK_INVALID_CONFIG", "Invalid check configuration.");
|
|
10567
|
+
result.diagnostics = [...built.diagnostics];
|
|
10568
|
+
} else {
|
|
10569
|
+
phase = "read";
|
|
10570
|
+
const input3 = await inputFromTarget(target, options, stdin);
|
|
10571
|
+
const read = await openTrace(input3, {
|
|
10572
|
+
...options.format !== void 0 ? { format: options.format } : {}
|
|
10573
|
+
});
|
|
10574
|
+
result = runTraceChecks(
|
|
10575
|
+
{ read },
|
|
10576
|
+
{
|
|
10577
|
+
rules: built.rules,
|
|
10578
|
+
select: built.select,
|
|
10579
|
+
...options.run !== void 0 ? { runId: options.run } : {}
|
|
10580
|
+
}
|
|
10581
|
+
);
|
|
10582
|
+
}
|
|
10583
|
+
} catch (error) {
|
|
10584
|
+
if (phase === "config") {
|
|
10585
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
10586
|
+
const code = message.startsWith("--") ? "AI_CHECK_INVALID_ARGUMENTS" : error instanceof SyntaxError || message.includes("Unsupported check config extension") || message.includes("TypeScript check configs") || message.includes("Config must") || message.includes("checks config") || message.includes("Expected an array") ? "AI_CHECK_INVALID_CONFIG" : "AI_CHECK_CONFIG_LOAD_FAILED";
|
|
10587
|
+
result = errorResult2(
|
|
10588
|
+
code,
|
|
10589
|
+
message
|
|
10590
|
+
);
|
|
10591
|
+
} else {
|
|
10592
|
+
result = readErrorResult(error);
|
|
10593
|
+
}
|
|
10594
|
+
}
|
|
10595
|
+
process.exitCode = exitCodeFor(result);
|
|
10596
|
+
if (options.json) printJson(result);
|
|
10597
|
+
else printHuman(result);
|
|
10598
|
+
}
|
|
10599
|
+
|
|
10600
|
+
// packages/cli/src/safety.ts
|
|
10601
|
+
var BEST_EFFORT_NOTE = "Best-effort local safety verification only; not a compliance, privacy, security, or regulatory certification.";
|
|
10602
|
+
var DEFAULT_MAX_STRING_LENGTH = 16384;
|
|
10603
|
+
var DEFAULT_MAX_ARRAY_LENGTH = 1e3;
|
|
10604
|
+
var DEFAULT_MAX_OBJECT_KEYS = 200;
|
|
10605
|
+
var DEFAULT_MAX_SERIALIZED_BYTES = 128 * 1024;
|
|
10606
|
+
function parseLimit3(value, label) {
|
|
10607
|
+
if (value === void 0) return void 0;
|
|
10608
|
+
const parsed = Number(value);
|
|
10609
|
+
if (!Number.isFinite(parsed) || parsed < 0) {
|
|
10610
|
+
throw new Error(`${label} must be a non-negative number.`);
|
|
10611
|
+
}
|
|
10612
|
+
return parsed;
|
|
10613
|
+
}
|
|
10614
|
+
function stable2(value) {
|
|
10615
|
+
if (Array.isArray(value)) return value.map(stable2);
|
|
10616
|
+
if (value === null || typeof value !== "object") return value;
|
|
10617
|
+
const record = value;
|
|
10618
|
+
return Object.fromEntries(
|
|
10619
|
+
Object.keys(record).sort((a, b) => a.localeCompare(b)).map((key) => [key, stable2(record[key])])
|
|
10620
|
+
);
|
|
10621
|
+
}
|
|
10622
|
+
function safetyDiagnostic(code, message, severity = "error") {
|
|
10623
|
+
return { code, message, severity };
|
|
10624
|
+
}
|
|
10625
|
+
function warningDiagnostics(warnings, unsupportedFields) {
|
|
10626
|
+
return [
|
|
10627
|
+
...warnings.map(
|
|
10628
|
+
(warning) => safetyDiagnostic(
|
|
10629
|
+
warning.code,
|
|
10630
|
+
warning.message,
|
|
10631
|
+
warning.severity === "error" ? "error" : "warning"
|
|
10632
|
+
)
|
|
10633
|
+
),
|
|
10634
|
+
...unsupportedFields.map(
|
|
10635
|
+
(field) => safetyDiagnostic(
|
|
10636
|
+
"unsupported_field",
|
|
10637
|
+
`Reader reported unsupported field: ${field}`,
|
|
10638
|
+
"warning"
|
|
10639
|
+
)
|
|
10640
|
+
)
|
|
10641
|
+
];
|
|
10642
|
+
}
|
|
10643
|
+
function diagnosticFromCheck(item) {
|
|
10644
|
+
return safetyDiagnostic(item.code, item.message, item.severity);
|
|
10645
|
+
}
|
|
10646
|
+
function statusFrom(findings, diagnostics) {
|
|
10647
|
+
if (diagnostics.some((item) => item.severity === "error")) return "UNKNOWN";
|
|
10648
|
+
if (findings.some((item) => item.severity === "error")) return "UNSAFE";
|
|
10649
|
+
if (diagnostics.some((item) => item.severity === "warning")) return "SAFE WITH WARNINGS";
|
|
10650
|
+
if (findings.some((item) => item.severity === "warning")) return "SAFE WITH WARNINGS";
|
|
10651
|
+
return "SAFE";
|
|
10652
|
+
}
|
|
10653
|
+
function resultFromParts(parts) {
|
|
10654
|
+
const findings = [...parts.findings ?? []];
|
|
10655
|
+
const diagnostics = [...parts.diagnostics ?? []];
|
|
10656
|
+
const warnings = [...parts.warnings ?? []];
|
|
10657
|
+
const unsupportedFields = [...parts.unsupportedFields ?? []];
|
|
10658
|
+
const status = statusFrom(findings, diagnostics);
|
|
10659
|
+
return {
|
|
10660
|
+
ok: status === "SAFE" || status === "SAFE WITH WARNINGS",
|
|
10661
|
+
command: parts.command,
|
|
10662
|
+
status,
|
|
10663
|
+
format: parts.format,
|
|
10664
|
+
...parts.runId !== void 0 ? { runId: parts.runId } : {},
|
|
10665
|
+
summary: {
|
|
10666
|
+
findings: findings.length,
|
|
10667
|
+
warnings: diagnostics.filter((item) => item.severity === "warning").length + findings.filter((item) => item.severity === "warning").length,
|
|
10668
|
+
errors: diagnostics.filter((item) => item.severity === "error").length + findings.filter((item) => item.severity === "error").length
|
|
10669
|
+
},
|
|
10670
|
+
findings,
|
|
10671
|
+
diagnostics,
|
|
10672
|
+
warnings,
|
|
10673
|
+
unsupportedFields,
|
|
10674
|
+
note: BEST_EFFORT_NOTE
|
|
10675
|
+
};
|
|
10676
|
+
}
|
|
10677
|
+
function readErrorResult2(command, error) {
|
|
10678
|
+
if (error instanceof TraceReadError) {
|
|
10679
|
+
const code = error.code === "unsupported_format" ? "AI_SAFETY_UNSUPPORTED_FORMAT" : error.code === "ambiguous_format" ? "AI_SAFETY_AMBIGUOUS_FORMAT" : "AI_SAFETY_TRACE_UNREADABLE";
|
|
10680
|
+
return resultFromParts({
|
|
10681
|
+
command,
|
|
10682
|
+
format: "unknown",
|
|
10683
|
+
diagnostics: [safetyDiagnostic(code, error.message)],
|
|
10684
|
+
warnings: error.warnings
|
|
10685
|
+
});
|
|
10686
|
+
}
|
|
10687
|
+
return resultFromParts({
|
|
10688
|
+
command,
|
|
10689
|
+
format: "unknown",
|
|
10690
|
+
diagnostics: [
|
|
10691
|
+
safetyDiagnostic(
|
|
10692
|
+
"AI_SAFETY_TRACE_UNREADABLE",
|
|
10693
|
+
error instanceof Error ? error.message : String(error)
|
|
10694
|
+
)
|
|
10695
|
+
]
|
|
10696
|
+
});
|
|
10697
|
+
}
|
|
10698
|
+
function invalidArgumentResult(command, error) {
|
|
10699
|
+
return resultFromParts({
|
|
10700
|
+
command,
|
|
10701
|
+
format: "unknown",
|
|
10702
|
+
diagnostics: [
|
|
10703
|
+
safetyDiagnostic(
|
|
10704
|
+
"AI_SAFETY_INVALID_ARGUMENTS",
|
|
10705
|
+
error instanceof Error ? error.message : String(error)
|
|
10706
|
+
)
|
|
10707
|
+
]
|
|
10708
|
+
});
|
|
10709
|
+
}
|
|
10710
|
+
function buildSafetyRules(options) {
|
|
10711
|
+
const maxStringLength = parseLimit3(options.maxStringLength, "--max-string-length") ?? DEFAULT_MAX_STRING_LENGTH;
|
|
10712
|
+
const maxArrayLength = parseLimit3(options.maxArrayLength, "--max-array-length") ?? DEFAULT_MAX_ARRAY_LENGTH;
|
|
10713
|
+
const maxObjectKeys = parseLimit3(options.maxObjectKeys, "--max-object-keys") ?? DEFAULT_MAX_OBJECT_KEYS;
|
|
10714
|
+
const maxSerializedBytes = parseLimit3(options.maxSerializedBytes, "--max-serialized-bytes") ?? DEFAULT_MAX_SERIALIZED_BYTES;
|
|
10715
|
+
return [
|
|
10716
|
+
createSafetyRawContentRule(),
|
|
10717
|
+
createSafetyRedactionRule(),
|
|
10718
|
+
createSafetySecretPatternRule(),
|
|
10719
|
+
createSafetyOversizedAttributeRule({
|
|
10720
|
+
maxStringLength,
|
|
10721
|
+
maxArrayLength,
|
|
10722
|
+
maxObjectKeys,
|
|
10723
|
+
maxSerializedBytes
|
|
10724
|
+
})
|
|
10725
|
+
];
|
|
10726
|
+
}
|
|
10727
|
+
function exitCodeFor2(result) {
|
|
10728
|
+
if (result.status === "SAFE" || result.status === "SAFE WITH WARNINGS") return 0;
|
|
10729
|
+
if (result.status === "UNSAFE") return 1;
|
|
10730
|
+
return 2;
|
|
10731
|
+
}
|
|
10732
|
+
function printJson2(result) {
|
|
10733
|
+
console.log(JSON.stringify(stable2(result), null, 2));
|
|
10734
|
+
}
|
|
10735
|
+
function printHuman2(result) {
|
|
10736
|
+
console.log(`Safety status: ${result.status}`);
|
|
10737
|
+
console.log(`Format: ${result.format}`);
|
|
10738
|
+
if (result.runId !== void 0) console.log(`Run: ${result.runId}`);
|
|
10739
|
+
console.log(
|
|
10740
|
+
`Summary: ${result.summary.findings} finding(s), ${result.summary.warnings} warning(s), ${result.summary.errors} error(s)`
|
|
10741
|
+
);
|
|
10742
|
+
for (const diagnostic3 of result.diagnostics) {
|
|
10743
|
+
console.log(`- ${diagnostic3.code}: ${diagnostic3.message}`);
|
|
10744
|
+
}
|
|
10745
|
+
for (const finding of result.findings) {
|
|
10746
|
+
const path12 = finding.evidence[0]?.path;
|
|
10747
|
+
console.log(`- ${finding.ruleId}: ${finding.message}${path12 ? ` (${path12})` : ""}`);
|
|
10748
|
+
}
|
|
10749
|
+
console.log(`Note: ${result.note}`);
|
|
10750
|
+
}
|
|
10751
|
+
async function safetyCommand(command, target, options, stdin) {
|
|
10752
|
+
let result;
|
|
10753
|
+
try {
|
|
10754
|
+
const rules = buildSafetyRules(options);
|
|
10755
|
+
const input3 = await inputFromTarget(target, options, stdin);
|
|
10756
|
+
const read = await openTrace(input3, {
|
|
10757
|
+
...options.format !== void 0 ? { format: options.format } : {}
|
|
10758
|
+
});
|
|
10759
|
+
const checkResult = runTraceChecks(
|
|
10760
|
+
{ read },
|
|
10761
|
+
{
|
|
10762
|
+
rules,
|
|
10763
|
+
...options.run !== void 0 ? { runId: options.run } : {}
|
|
10764
|
+
}
|
|
10765
|
+
);
|
|
10766
|
+
result = resultFromParts({
|
|
10767
|
+
command,
|
|
10768
|
+
format: checkResult.format,
|
|
10769
|
+
runId: checkResult.runId,
|
|
10770
|
+
findings: checkResult.findings,
|
|
10771
|
+
diagnostics: [
|
|
10772
|
+
...checkResult.diagnostics.map(diagnosticFromCheck),
|
|
10773
|
+
...warningDiagnostics(read.warnings, read.unsupportedFields)
|
|
10774
|
+
],
|
|
10775
|
+
warnings: read.warnings,
|
|
10776
|
+
unsupportedFields: read.unsupportedFields
|
|
10777
|
+
});
|
|
10778
|
+
} catch (error) {
|
|
10779
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
10780
|
+
result = message.startsWith("--") ? invalidArgumentResult(command, error) : readErrorResult2(command, error);
|
|
10781
|
+
}
|
|
10782
|
+
process.exitCode = exitCodeFor2(result);
|
|
10783
|
+
if (options.json) printJson2(result);
|
|
10784
|
+
else printHuman2(result);
|
|
10785
|
+
}
|
|
10786
|
+
function scanCommand(target, options = {}, stdin = process.stdin) {
|
|
10787
|
+
return safetyCommand("scan", target, options, stdin);
|
|
10788
|
+
}
|
|
10789
|
+
function verifySafeCommand(target, options = {}, stdin = process.stdin) {
|
|
10790
|
+
return safetyCommand("verify-safe", target, options, stdin);
|
|
10791
|
+
}
|
|
10792
|
+
var NOTE = "Generated locally by AgentInspect. Artifacts are best-effort summaries, not compliance or security certification.";
|
|
10793
|
+
var SAFETY_RULES = [
|
|
10794
|
+
createSafetyRawContentRule(),
|
|
10795
|
+
createSafetyRedactionRule(),
|
|
10796
|
+
createSafetySecretPatternRule(),
|
|
10797
|
+
createSafetyOversizedAttributeRule({
|
|
10798
|
+
maxStringLength: 16384,
|
|
10799
|
+
maxArrayLength: 1e3,
|
|
10800
|
+
maxObjectKeys: 200,
|
|
10801
|
+
maxSerializedBytes: 128 * 1024
|
|
10802
|
+
})
|
|
10803
|
+
];
|
|
10804
|
+
function stable3(value) {
|
|
10805
|
+
if (Array.isArray(value)) return value.map(stable3);
|
|
10806
|
+
if (value === null || typeof value !== "object") return value;
|
|
10807
|
+
const record = value;
|
|
10808
|
+
return Object.fromEntries(
|
|
10809
|
+
Object.keys(record).sort((a, b) => a.localeCompare(b)).map((key) => [key, stable3(record[key])])
|
|
10810
|
+
);
|
|
10811
|
+
}
|
|
10812
|
+
function writeJson2(value) {
|
|
10813
|
+
return `${JSON.stringify(stable3(value), null, 2)}
|
|
10814
|
+
`;
|
|
10815
|
+
}
|
|
10816
|
+
function escapeHtml2(value) {
|
|
10817
|
+
return value.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """);
|
|
10818
|
+
}
|
|
10819
|
+
function markdownTable(rows) {
|
|
10820
|
+
const lines = ["| Field | Value |", "| --- | --- |"];
|
|
10821
|
+
for (const [key, value] of rows) {
|
|
10822
|
+
lines.push(`| ${key} | ${value ?? "unknown"} |`);
|
|
10823
|
+
}
|
|
10824
|
+
return lines.join("\n");
|
|
10825
|
+
}
|
|
10826
|
+
function increment(record, key) {
|
|
10827
|
+
const label = key && key.trim() !== "" ? key : "unknown";
|
|
10828
|
+
record[label] = (record[label] ?? 0) + 1;
|
|
10829
|
+
}
|
|
10830
|
+
function selectRun2(read, runId) {
|
|
10831
|
+
if (runId !== void 0) {
|
|
10832
|
+
return read.runs.find((run) => run.runId === runId);
|
|
10833
|
+
}
|
|
10834
|
+
return read.runs.length === 1 ? read.runs[0] : void 0;
|
|
10835
|
+
}
|
|
10836
|
+
function summarizeTrace(read, run) {
|
|
10837
|
+
const runId = run?.runId;
|
|
10838
|
+
const scopedEvents = runId === void 0 ? read.events : read.events.filter((event) => event.runId === runId);
|
|
10839
|
+
const eventsByKind = {};
|
|
10840
|
+
const eventsByStatus = {};
|
|
10841
|
+
for (const event of scopedEvents) {
|
|
10842
|
+
increment(eventsByKind, event.kind);
|
|
10843
|
+
increment(eventsByStatus, event.status);
|
|
10844
|
+
}
|
|
10845
|
+
return {
|
|
10846
|
+
format: read.format,
|
|
10847
|
+
...runId !== void 0 ? { runId } : {},
|
|
10848
|
+
...run?.status !== void 0 ? { runStatus: run.status } : {},
|
|
10849
|
+
...run?.durationMs !== void 0 ? { runDurationMs: run.durationMs } : {},
|
|
10850
|
+
runCount: read.runs.length,
|
|
10851
|
+
eventCount: scopedEvents.length,
|
|
10852
|
+
eventsByKind: Object.fromEntries(Object.entries(eventsByKind).sort()),
|
|
10853
|
+
eventsByStatus: Object.fromEntries(Object.entries(eventsByStatus).sort()),
|
|
10854
|
+
readerWarnings: read.warnings.length,
|
|
10855
|
+
unsupportedFields: read.unsupportedFields.length
|
|
10856
|
+
};
|
|
10857
|
+
}
|
|
10858
|
+
function renderCheckSection(result) {
|
|
10859
|
+
const lines = [
|
|
10860
|
+
`Status: ${result.status}`,
|
|
10861
|
+
`Findings: ${result.findings.length}`,
|
|
10862
|
+
`Diagnostics: ${result.diagnostics.length}`
|
|
10863
|
+
];
|
|
10864
|
+
for (const finding of result.findings.slice(0, 10)) {
|
|
10865
|
+
const path12 = finding.evidence[0]?.path ?? "(run)";
|
|
10866
|
+
lines.push(`- ${finding.ruleId}: ${finding.message} (${path12})`);
|
|
10867
|
+
}
|
|
10868
|
+
for (const diagnostic3 of result.diagnostics.slice(0, 10)) {
|
|
10869
|
+
lines.push(`- ${diagnostic3.code}: ${diagnostic3.message}`);
|
|
10870
|
+
}
|
|
10871
|
+
return lines.join("\n");
|
|
10872
|
+
}
|
|
10873
|
+
function renderMarkdown(trace, check, diff) {
|
|
10874
|
+
const lines = [
|
|
10875
|
+
"# AgentInspect CI Artifacts",
|
|
10876
|
+
"",
|
|
10877
|
+
NOTE,
|
|
10878
|
+
"",
|
|
10879
|
+
"## Trace",
|
|
10880
|
+
"",
|
|
10881
|
+
markdownTable([
|
|
10882
|
+
["Format", trace.format],
|
|
10883
|
+
["Run", trace.runId],
|
|
10884
|
+
["Run status", trace.runStatus],
|
|
10885
|
+
["Run duration ms", trace.runDurationMs],
|
|
10886
|
+
["Runs", trace.runCount],
|
|
10887
|
+
["Events", trace.eventCount],
|
|
10888
|
+
["Reader warnings", trace.readerWarnings],
|
|
10889
|
+
["Unsupported fields", trace.unsupportedFields]
|
|
10890
|
+
]),
|
|
10891
|
+
"",
|
|
10892
|
+
"## Safety check",
|
|
10893
|
+
"",
|
|
10894
|
+
"```text",
|
|
10895
|
+
renderCheckSection(check),
|
|
10896
|
+
"```",
|
|
10897
|
+
"",
|
|
10898
|
+
"## Baseline diff",
|
|
10899
|
+
""
|
|
10900
|
+
];
|
|
10901
|
+
if (diff) {
|
|
10902
|
+
lines.push("```text", renderCheckSection(diff), "```", "");
|
|
10903
|
+
} else {
|
|
10904
|
+
lines.push("No baseline was supplied.", "");
|
|
10905
|
+
}
|
|
10906
|
+
return lines.join("\n");
|
|
10907
|
+
}
|
|
10908
|
+
function renderHtml(trace, check, diff) {
|
|
10909
|
+
const rows = [
|
|
10910
|
+
["Format", trace.format],
|
|
10911
|
+
["Run", trace.runId],
|
|
10912
|
+
["Run status", trace.runStatus],
|
|
10913
|
+
["Run duration ms", trace.runDurationMs],
|
|
10914
|
+
["Runs", trace.runCount],
|
|
10915
|
+
["Events", trace.eventCount],
|
|
10916
|
+
["Reader warnings", trace.readerWarnings],
|
|
10917
|
+
["Unsupported fields", trace.unsupportedFields]
|
|
10918
|
+
];
|
|
10919
|
+
const table = rows.map(
|
|
10920
|
+
([key, value]) => `<tr><th>${escapeHtml2(key)}</th><td>${escapeHtml2(String(value ?? "unknown"))}</td></tr>`
|
|
10921
|
+
).join("");
|
|
10922
|
+
return `<!doctype html>
|
|
10923
|
+
<html lang="en">
|
|
10924
|
+
<head>
|
|
10925
|
+
<meta charset="utf-8"/>
|
|
10926
|
+
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
|
10927
|
+
<title>AgentInspect CI Artifacts</title>
|
|
10928
|
+
<style>body{font-family:system-ui,sans-serif;line-height:1.5;margin:1.5rem;max-width:960px;color:#111}table{border-collapse:collapse}th,td{border:1px solid #ddd;padding:0.35rem 0.5rem;text-align:left}pre{white-space:pre-wrap;background:#f8f8f8;padding:0.75rem;overflow:auto}</style>
|
|
10929
|
+
</head>
|
|
10930
|
+
<body>
|
|
10931
|
+
<h1>AgentInspect CI Artifacts</h1>
|
|
10932
|
+
<p>${escapeHtml2(NOTE)}</p>
|
|
10933
|
+
<h2>Trace</h2>
|
|
10934
|
+
<table><tbody>${table}</tbody></table>
|
|
10935
|
+
<h2>Safety check</h2>
|
|
10936
|
+
<pre>${escapeHtml2(renderCheckSection(check))}</pre>
|
|
10937
|
+
<h2>Baseline diff</h2>
|
|
10938
|
+
<pre>${escapeHtml2(diff ? renderCheckSection(diff) : "No baseline was supplied.")}</pre>
|
|
10939
|
+
</body>
|
|
10940
|
+
</html>
|
|
10941
|
+
`;
|
|
10942
|
+
}
|
|
10943
|
+
async function writeArtifact(outputDir, relativePath, content, files) {
|
|
10944
|
+
const outPath = path10.join(outputDir, relativePath);
|
|
10945
|
+
await mkdir(path10.dirname(outPath), { recursive: true });
|
|
10946
|
+
await writeFile(outPath, content, "utf-8");
|
|
10947
|
+
files.push(relativePath);
|
|
10948
|
+
}
|
|
10949
|
+
function readErrorMessage(error) {
|
|
10950
|
+
if (error instanceof TraceReadError) return error.message;
|
|
10951
|
+
return error instanceof Error ? error.message : String(error);
|
|
10952
|
+
}
|
|
10953
|
+
function manifestStatus(check, diff) {
|
|
10954
|
+
if (check.status === "error" || diff?.status === "error") return "unknown";
|
|
10955
|
+
if (check.status === "fail") return "unsafe";
|
|
10956
|
+
if (diff?.status === "fail") return "regression";
|
|
10957
|
+
if (check.summary.warnings > 0 || (diff?.summary.warnings ?? 0) > 0) return "warning";
|
|
10958
|
+
return "ok";
|
|
10959
|
+
}
|
|
10960
|
+
async function artifactsCommand(target, options = {}, stdin = process.stdin) {
|
|
10961
|
+
const outputDir = options.outputDir !== void 0 && options.outputDir.trim() !== "" ? path10.resolve(options.outputDir.trim()) : "";
|
|
10962
|
+
if (outputDir === "") {
|
|
10963
|
+
console.error("--output-dir is required.");
|
|
10964
|
+
process.exitCode = 1;
|
|
10965
|
+
return;
|
|
10966
|
+
}
|
|
10967
|
+
let read;
|
|
10968
|
+
try {
|
|
10969
|
+
const input3 = await inputFromTarget(target, options, stdin);
|
|
10970
|
+
read = await openTrace(input3, {
|
|
10971
|
+
...options.format !== void 0 ? { format: options.format } : {}
|
|
10972
|
+
});
|
|
10973
|
+
} catch (error) {
|
|
10974
|
+
console.error(`[AgentInspect] artifacts failed: ${readErrorMessage(error)}`);
|
|
10975
|
+
process.exitCode = 1;
|
|
10976
|
+
return;
|
|
10977
|
+
}
|
|
10978
|
+
const selectedRun = selectRun2(read, options.run);
|
|
10979
|
+
const check = runTraceChecks(
|
|
10980
|
+
{ read },
|
|
10981
|
+
{
|
|
10982
|
+
rules: SAFETY_RULES,
|
|
10983
|
+
...options.run !== void 0 ? { runId: options.run } : {}
|
|
10984
|
+
}
|
|
10985
|
+
);
|
|
10986
|
+
const trace = summarizeTrace(read, selectedRun);
|
|
10987
|
+
let diff;
|
|
10988
|
+
if (options.baseline !== void 0 && options.baseline.trim() !== "") {
|
|
10989
|
+
try {
|
|
10990
|
+
const baselineInput = await inputFromTarget(options.baseline, options, stdin);
|
|
10991
|
+
const baselineRead = await openTrace(baselineInput, {
|
|
10992
|
+
...options.format !== void 0 ? { format: options.format } : {}
|
|
10993
|
+
});
|
|
10994
|
+
diff = runTraceChecks(
|
|
10995
|
+
{ read },
|
|
10996
|
+
{
|
|
10997
|
+
rules: [
|
|
10998
|
+
createBaselineRegressionRule({
|
|
10999
|
+
baseline: { read: baselineRead },
|
|
11000
|
+
...options.baselineRun !== void 0 ? { baselineRunId: options.baselineRun } : {},
|
|
11001
|
+
compareFormat: true
|
|
11002
|
+
})
|
|
11003
|
+
],
|
|
11004
|
+
...options.run !== void 0 ? { runId: options.run } : {}
|
|
11005
|
+
}
|
|
11006
|
+
);
|
|
11007
|
+
} catch (error) {
|
|
11008
|
+
console.error(`[AgentInspect] baseline diff failed: ${readErrorMessage(error)}`);
|
|
11009
|
+
process.exitCode = 1;
|
|
11010
|
+
return;
|
|
11011
|
+
}
|
|
11012
|
+
}
|
|
11013
|
+
const files = [];
|
|
11014
|
+
await mkdir(outputDir, { recursive: true });
|
|
11015
|
+
await writeArtifact(outputDir, "trace.json", writeJson2(trace), files);
|
|
11016
|
+
await writeArtifact(outputDir, "check.json", writeJson2(check), files);
|
|
11017
|
+
await writeArtifact(
|
|
11018
|
+
outputDir,
|
|
11019
|
+
"diff.json",
|
|
11020
|
+
writeJson2(diff ?? { status: "not_requested", findings: [], diagnostics: [] }),
|
|
11021
|
+
files
|
|
11022
|
+
);
|
|
11023
|
+
await writeArtifact(outputDir, "summary.md", renderMarkdown(trace, check, diff), files);
|
|
11024
|
+
await writeArtifact(outputDir, "report.html", renderHtml(trace, check, diff), files);
|
|
11025
|
+
const summaryTarget = options.githubSummary ?? process.env.GITHUB_STEP_SUMMARY;
|
|
11026
|
+
if (summaryTarget !== void 0 && summaryTarget.trim() !== "") {
|
|
11027
|
+
await mkdir(path10.dirname(path10.resolve(summaryTarget)), { recursive: true });
|
|
11028
|
+
await appendFile(path10.resolve(summaryTarget), `
|
|
11029
|
+
${renderMarkdown(trace, check, diff)}`, "utf-8");
|
|
11030
|
+
}
|
|
11031
|
+
const manifestFiles = [...files, "manifest.json"].sort((a, b) => a.localeCompare(b));
|
|
11032
|
+
const manifest = {
|
|
11033
|
+
status: manifestStatus(check, diff),
|
|
11034
|
+
outputDir,
|
|
11035
|
+
files: manifestFiles,
|
|
11036
|
+
trace,
|
|
11037
|
+
check: {
|
|
11038
|
+
status: check.status,
|
|
11039
|
+
findings: check.findings.length,
|
|
11040
|
+
diagnostics: check.diagnostics.length
|
|
11041
|
+
},
|
|
11042
|
+
diff: {
|
|
11043
|
+
status: diff?.status ?? "not_requested",
|
|
11044
|
+
findings: diff?.findings.length ?? 0,
|
|
11045
|
+
diagnostics: diff?.diagnostics.length ?? 0
|
|
11046
|
+
},
|
|
11047
|
+
...summaryTarget !== void 0 && summaryTarget.trim() !== "" ? { githubSummary: path10.resolve(summaryTarget) } : {},
|
|
11048
|
+
note: NOTE
|
|
11049
|
+
};
|
|
11050
|
+
await writeFile(path10.join(outputDir, "manifest.json"), writeJson2(manifest), "utf-8");
|
|
11051
|
+
if (options.json === true) {
|
|
11052
|
+
console.log(writeJson2(manifest).trimEnd());
|
|
11053
|
+
} else {
|
|
11054
|
+
console.log(`Wrote AgentInspect artifacts to ${outputDir}`);
|
|
11055
|
+
console.log(`Status: ${manifest.status}`);
|
|
11056
|
+
for (const file of manifest.files) {
|
|
11057
|
+
console.log(`- ${file}`);
|
|
11058
|
+
}
|
|
11059
|
+
}
|
|
11060
|
+
}
|
|
11061
|
+
|
|
9086
11062
|
// packages/cli/src/index.ts
|
|
9087
11063
|
function runCommand(action) {
|
|
9088
11064
|
void action().catch((error) => {
|
|
@@ -9184,6 +11160,54 @@ function createCliProgram() {
|
|
|
9184
11160
|
).option("--json", "print result as JSON").option("--diagnostics", "print reader warnings and unsupported fields").option("--run <run-id>", "select a run when the trace contains multiple runs").action((input3, opts) => {
|
|
9185
11161
|
runCommand(() => openCommand(input3, opts));
|
|
9186
11162
|
});
|
|
11163
|
+
program.command("check").description("Run deterministic checks against a local trace").argument("<trace-path-or-run-id>", "trace file, directory, stdin -, or run id").option("--dir <path>", "trace directory for run-id lookup").addOption(
|
|
11164
|
+
new Option("--format <format>", "trace input format").choices([
|
|
11165
|
+
"agent-inspect-jsonl",
|
|
11166
|
+
"openinference-json",
|
|
11167
|
+
"otlp-json"
|
|
11168
|
+
])
|
|
11169
|
+
).option("--run <run-id>", "select a run when the trace contains multiple runs").option("--config <path>", "path to check config (.json, .js, .mjs, .cjs)").option("--json", "print deterministic JSON check result").option("--rule <id>", "select a rule id (repeatable)", (value, previous = []) => [
|
|
11170
|
+
...previous,
|
|
11171
|
+
value
|
|
11172
|
+
]).option("--max-duration-ms <number>", "add run.duration with a max duration").option("--required-tool <name>", "require a tool name (repeatable)", (value, previous = []) => [
|
|
11173
|
+
...previous,
|
|
11174
|
+
value
|
|
11175
|
+
]).option("--forbidden-tool <name>", "forbid a tool name (repeatable)", (value, previous = []) => [
|
|
11176
|
+
...previous,
|
|
11177
|
+
value
|
|
11178
|
+
]).option("--allowed-model <model>", "allow an LLM model (repeatable)", (value, previous = []) => [
|
|
11179
|
+
...previous,
|
|
11180
|
+
value
|
|
11181
|
+
]).option("--max-total-tokens <number>", "add llm.usage with a max total-token budget").action((target, opts) => {
|
|
11182
|
+
runCommand(() => checkCommand(target, opts));
|
|
11183
|
+
});
|
|
11184
|
+
program.command("scan").description("Best-effort local safety scan for trace capture risks").argument("<trace-path-or-run-id>", "trace file, directory, stdin -, or run id").option("--dir <path>", "trace directory for run-id lookup").addOption(
|
|
11185
|
+
new Option("--format <format>", "trace input format").choices([
|
|
11186
|
+
"agent-inspect-jsonl",
|
|
11187
|
+
"openinference-json",
|
|
11188
|
+
"otlp-json"
|
|
11189
|
+
])
|
|
11190
|
+
).option("--run <run-id>", "select a run when the trace contains multiple runs").option("--json", "print deterministic JSON safety result").option("--max-string-length <number>", "unsafe threshold for string values").option("--max-array-length <number>", "unsafe threshold for array values").option("--max-object-keys <number>", "unsafe threshold for object key counts").option("--max-serialized-bytes <number>", "unsafe threshold for serialized values").action((target, opts) => {
|
|
11191
|
+
runCommand(() => scanCommand(target, opts));
|
|
11192
|
+
});
|
|
11193
|
+
program.command("verify-safe").description("Best-effort local trace safety verification").argument("<trace-path-or-run-id>", "trace file, directory, stdin -, or run id").option("--dir <path>", "trace directory for run-id lookup").addOption(
|
|
11194
|
+
new Option("--format <format>", "trace input format").choices([
|
|
11195
|
+
"agent-inspect-jsonl",
|
|
11196
|
+
"openinference-json",
|
|
11197
|
+
"otlp-json"
|
|
11198
|
+
])
|
|
11199
|
+
).option("--run <run-id>", "select a run when the trace contains multiple runs").option("--json", "print deterministic JSON safety result").option("--max-string-length <number>", "unsafe threshold for string values").option("--max-array-length <number>", "unsafe threshold for array values").option("--max-object-keys <number>", "unsafe threshold for object key counts").option("--max-serialized-bytes <number>", "unsafe threshold for serialized values").action((target, opts) => {
|
|
11200
|
+
runCommand(() => verifySafeCommand(target, opts));
|
|
11201
|
+
});
|
|
11202
|
+
program.command("artifacts").description("Create safe local CI trace artifacts").argument("<trace-path-or-run-id>", "trace file, directory, stdin -, or run id").requiredOption("--output-dir <path>", "directory for generated artifacts").option("--dir <path>", "trace directory for run-id lookup").addOption(
|
|
11203
|
+
new Option("--format <format>", "trace input format").choices([
|
|
11204
|
+
"agent-inspect-jsonl",
|
|
11205
|
+
"openinference-json",
|
|
11206
|
+
"otlp-json"
|
|
11207
|
+
])
|
|
11208
|
+
).option("--run <run-id>", "select a run when the trace contains multiple runs").option("--baseline <trace-path-or-run-id>", "optional baseline trace for diff artifacts").option("--baseline-run <run-id>", "select a run from the baseline trace").option("--github-summary <path>", "append a safe summary to this file, e.g. GITHUB_STEP_SUMMARY").option("--json", "print deterministic JSON manifest").action((target, opts) => {
|
|
11209
|
+
runCommand(() => artifactsCommand(target, opts));
|
|
11210
|
+
});
|
|
9187
11211
|
program.command("diff").description("Compare two local AgentInspect JSONL traces (read-only)").argument("<left-run-id>", "first run id").argument("<right-run-id>", "second run id").option("--dir <path>", "trace directory").option("--json", "print diff result as JSON").option("--ignore-duration", "omit duration comparisons").option(
|
|
9188
11212
|
"--duration-threshold <duration>",
|
|
9189
11213
|
"ignore duration deltas at or below this (e.g. 500ms, 2s, 1m)"
|
|
@@ -9251,9 +11275,9 @@ function isPrimaryModule() {
|
|
|
9251
11275
|
if (!entry) return false;
|
|
9252
11276
|
const selfPath = fileURLToPath(import.meta.url);
|
|
9253
11277
|
try {
|
|
9254
|
-
return realpathSync(
|
|
11278
|
+
return realpathSync(path10.resolve(entry)) === realpathSync(path10.resolve(selfPath));
|
|
9255
11279
|
} catch {
|
|
9256
|
-
return
|
|
11280
|
+
return path10.resolve(entry) === path10.resolve(selfPath);
|
|
9257
11281
|
}
|
|
9258
11282
|
}
|
|
9259
11283
|
if (isPrimaryModule()) {
|