agent-inspect 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +13 -1
  2. package/README.md +11 -6
  3. package/docs/ADAPTER-CONFORMANCE.md +7 -3
  4. package/docs/ADAPTERS.md +120 -5
  5. package/docs/API.md +123 -21
  6. package/docs/CLI.md +154 -6
  7. package/docs/KNOWN-ISSUES.md +7 -1
  8. package/docs/LIMITATIONS.md +7 -1
  9. package/docs/SCHEMA.md +1 -0
  10. package/package.json +12 -2
  11. package/packages/cli/dist/index.cjs +2057 -33
  12. package/packages/cli/dist/index.cjs.map +1 -1
  13. package/packages/cli/dist/index.mjs +2057 -33
  14. package/packages/cli/dist/index.mjs.map +1 -1
  15. package/packages/core/dist/advanced.d.cts +4 -4
  16. package/packages/core/dist/advanced.d.ts +4 -4
  17. package/packages/core/dist/checks.cjs +1535 -0
  18. package/packages/core/dist/checks.cjs.map +1 -0
  19. package/packages/core/dist/checks.d.cts +585 -0
  20. package/packages/core/dist/checks.d.ts +585 -0
  21. package/packages/core/dist/checks.mjs +1512 -0
  22. package/packages/core/dist/checks.mjs.map +1 -0
  23. package/packages/core/dist/diff.d.cts +3 -3
  24. package/packages/core/dist/diff.d.ts +3 -3
  25. package/packages/core/dist/exporters.d.cts +3 -3
  26. package/packages/core/dist/exporters.d.ts +3 -3
  27. package/packages/core/dist/index.d.cts +6 -6
  28. package/packages/core/dist/index.d.ts +6 -6
  29. package/packages/core/dist/{inspect-event-Des4JDHo.d.cts → inspect-event-CevRYp58.d.cts} +1 -1
  30. package/packages/core/dist/{inspect-event-Des4JDHo.d.ts → inspect-event-CevRYp58.d.ts} +1 -1
  31. package/packages/core/dist/{log-config-C1GcJPIM.d.ts → log-config-BPHS4Sds.d.ts} +1 -1
  32. package/packages/core/dist/{log-config-BnH8Ykcb.d.cts → log-config-DanPV3P9.d.cts} +1 -1
  33. package/packages/core/dist/logs.d.cts +3 -3
  34. package/packages/core/dist/logs.d.ts +3 -3
  35. package/packages/core/dist/{persisted-inspect-event-DiFto0K2.d.ts → persisted-inspect-event-Cw7TeYGr.d.ts} +1 -1
  36. package/packages/core/dist/{persisted-inspect-event-0kaRADsp.d.cts → persisted-inspect-event-DHPfzUd8.d.cts} +1 -1
  37. package/packages/core/dist/persisted.d.cts +5 -5
  38. package/packages/core/dist/persisted.d.ts +5 -5
  39. package/packages/core/dist/readers.d.cts +2 -2
  40. package/packages/core/dist/readers.d.ts +2 -2
  41. package/packages/core/dist/{types-tSix7tfv.d.ts → types-Ap9uMdx_.d.ts} +1 -1
  42. package/packages/core/dist/{types-DB8jB6Jg.d.cts → types-B2-BU5CS.d.cts} +1 -1
  43. package/packages/core/dist/writers.d.cts +2 -2
  44. package/packages/core/dist/writers.d.ts +2 -2
package/docs/CLI.md CHANGED
@@ -26,6 +26,10 @@ Core commands:
26
26
  - `tail` — live-tail logs into updating local trees
27
27
  - `export` — export manual traces to Markdown/HTML/OpenInference/OTLP JSON (local only)
28
28
  - `open` — read supported local trace files, directories, or stdin through the canonical reader pipeline
29
+ - `check` — run deterministic local trace checks with stable JSON and exit codes
30
+ - `scan` — best-effort local safety scan for trace capture risks
31
+ - `verify-safe` — best-effort local trace safety verification
32
+ - `artifacts` — create safe local CI trace artifact bundles and optional step summaries
29
33
  - `diff` — compare two manual traces (local, read-only)
30
34
  - `timeline` — chronological view of one run (local JSONL)
31
35
  - `stats` — local aggregate stats over a trace directory
@@ -44,6 +48,20 @@ Core commands:
44
48
  - **0**: command succeeded (even if a diff reports “differences”)
45
49
  - **1**: command error (invalid args, missing files, missing runs, parse failures, validation failures, etc.)
46
50
 
51
+ Exception: `check` uses CI-oriented semantic exit codes:
52
+
53
+ - **0**: all selected checks passed
54
+ - **1**: checks ran and at least one error-severity rule failed
55
+ - **2**: invalid arguments or invalid config
56
+ - **3**: trace input could not be read
57
+ - **4**: unsupported or ambiguous trace format
58
+
59
+ Exception: `scan` and `verify-safe` use local safety status exit codes:
60
+
61
+ - **0**: status is SAFE or SAFE WITH WARNINGS
62
+ - **1**: status is UNSAFE
63
+ - **2**: status is UNKNOWN, including unreadable, unsupported, ambiguous, or invalid inputs
64
+
47
65
  AgentInspect favors **human-readable errors without stack traces** for expected user mistakes.
48
66
 
49
67
  ## 4. JSON output policy
@@ -59,6 +77,8 @@ Many commands support `--json` for scripting. JSON output is intended to be:
59
77
  - Log-derived output includes **confidence** labels and avoids inventing parent-child relationships.
60
78
  - Redaction defaults are conservative (e.g. `authorization`, `cookie`, `token`, `apiKey`, `password`, `secret`, `email`).
61
79
  - Exported payloads are **redacted by default** unless explicitly configured otherwise.
80
+ - `scan` and `verify-safe` are best-effort local checks, not compliance, privacy, security, or regulatory certifications.
81
+ - `artifacts` renders structural summaries and check evidence only; it does not include raw prompt/output bodies, request/response bodies, headers, API keys, secrets, or full tool payloads.
62
82
 
63
83
  ## 6. Command reference
64
84
 
@@ -222,7 +242,135 @@ cat packages/core/test/fixtures/openinference-basic.json | npx agent-inspect ope
222
242
 
223
243
  When a directory or payload contains multiple runs, `open` lists the run ids and exits until you pass `--run <run-id>`.
224
244
 
225
- ### 6.8 `diff`
245
+ ### 6.8 `check`
246
+
247
+ Run deterministic checks against a local trace. This command is local and read-only: it does not rerun agents, call models, upload traces, or mutate input files.
248
+
249
+ ```bash
250
+ agent-inspect check <trace-path-or-run-id> [options]
251
+ ```
252
+
253
+ `<trace-path-or-run-id>` may be a trace file, directory, `-` for stdin, or a run id resolved with `--dir`.
254
+
255
+ Options:
256
+
257
+ - `--dir <path>`: trace directory for run-id lookup
258
+ - `--format <agent-inspect-jsonl|openinference-json|otlp-json>`: explicit reader format override
259
+ - `--run <run-id>`: select a run when input contains multiple runs
260
+ - `--config <path>`: check config (`.json`, `.js`, `.mjs`, or `.cjs`)
261
+ - `--json`: print deterministic `TraceCheckResult` JSON
262
+ - `--rule <id>`: select a rule id; repeatable
263
+ - `--max-duration-ms <number>`: add `run.duration`
264
+ - `--required-tool <name>` / `--forbidden-tool <name>`: add `tool.usage`
265
+ - `--allowed-model <model>` / `--max-total-tokens <number>`: add `llm.usage`
266
+
267
+ By default, `check` runs `run.status`. Additional built-in rules can be selected with `--rule` or config when their options are available.
268
+
269
+ Config files use this shape:
270
+
271
+ ```json
272
+ {
273
+ "checks": {
274
+ "select": ["run.status", "run.duration"],
275
+ "run": { "maxDurationMs": 30000 },
276
+ "tool": { "required": ["search_docs"] },
277
+ "llm": { "allowedModels": ["gpt-4.1-mini"], "maxTotalTokens": 12000 }
278
+ }
279
+ }
280
+ ```
281
+
282
+ YAML is not supported. TypeScript config files (`.ts`, `.mts`, `.cts`) fail clearly unless a future explicit loader strategy is added; use precompiled JavaScript config instead.
283
+
284
+ Examples:
285
+
286
+ ```bash
287
+ npx agent-inspect check fixtures/traces-v0.2/manual-basic.jsonl --json
288
+ npx agent-inspect check minimal-success --dir fixtures/traces --rule run.status
289
+ npx agent-inspect check trace.jsonl --max-duration-ms 30000 --required-tool search_docs --json
290
+ ```
291
+
292
+ Recipe: [examples/recipes/deterministic-ci-checks](../examples/recipes/deterministic-ci-checks/README.md)
293
+
294
+ ### 6.9 `scan` and `verify-safe`
295
+
296
+ Run best-effort local safety verification for supported trace inputs. These commands are local and read-only: they do not rerun agents, call models, upload traces, mutate input files, or certify compliance.
297
+
298
+ ```bash
299
+ agent-inspect scan <trace-path-or-run-id> [options]
300
+ agent-inspect verify-safe <trace-path-or-run-id> [options]
301
+ ```
302
+
303
+ `<trace-path-or-run-id>` may be a trace file, directory, `-` for stdin, or a run id resolved with `--dir`.
304
+
305
+ Statuses:
306
+
307
+ - `SAFE`: no safety findings and no reader warnings.
308
+ - `SAFE WITH WARNINGS`: no safety findings, but the reader reported warnings or unsupported fields.
309
+ - `UNSAFE`: safety findings were detected.
310
+ - `UNKNOWN`: the input could not be read, normalized, or selected conservatively.
311
+
312
+ Options:
313
+
314
+ - `--dir <path>`: trace directory for run-id lookup
315
+ - `--format <agent-inspect-jsonl|openinference-json|otlp-json>`: explicit reader format override
316
+ - `--run <run-id>`: select a run when input contains multiple runs
317
+ - `--json`: print deterministic JSON safety result
318
+ - `--max-string-length <number>`: unsafe threshold for string values
319
+ - `--max-array-length <number>`: unsafe threshold for array values
320
+ - `--max-object-keys <number>`: unsafe threshold for object key counts
321
+ - `--max-serialized-bytes <number>`: unsafe threshold for serialized values
322
+
323
+ The scan looks for raw prompt/output-like capture paths, unredacted sensitive-looking keys, secret-like string patterns, and oversized values. It reports evidence paths rather than raw prompt, output, request/response, header, API key, secret, or full tool payload values. Secret detection is best-effort and should not be treated as exhaustive.
324
+
325
+ Examples:
326
+
327
+ ```bash
328
+ npx agent-inspect scan fixtures/traces-v0.2/manual-basic.jsonl --json
329
+ npx agent-inspect verify-safe minimal-success --dir fixtures/traces
330
+ npx agent-inspect verify-safe trace.jsonl --max-string-length 8192 --json
331
+ ```
332
+
333
+ ### 6.10 `artifacts`
334
+
335
+ Create deterministic local CI artifacts for supported trace inputs. This command is local and read-only for trace inputs: it does not rerun agents, call models, upload files, use GitHub APIs, or mutate repository state. It writes only to `--output-dir` and, when requested, a local step-summary file.
336
+
337
+ ```bash
338
+ agent-inspect artifacts <trace-path-or-run-id> --output-dir <path> [options]
339
+ ```
340
+
341
+ Generated files:
342
+
343
+ - `trace.json`: structural trace summary only
344
+ - `check.json`: safety check result
345
+ - `diff.json`: baseline diff result, or `not_requested`
346
+ - `summary.md`: safe Markdown CI summary
347
+ - `report.html`: safe HTML CI summary
348
+ - `manifest.json`: deterministic file/status manifest
349
+
350
+ Options:
351
+
352
+ - `--output-dir <path>`: required local artifact directory
353
+ - `--dir <path>`: trace directory for run-id lookup
354
+ - `--format <agent-inspect-jsonl|openinference-json|otlp-json>`: explicit reader format override
355
+ - `--run <run-id>`: select a run when input contains multiple runs
356
+ - `--baseline <trace-path-or-run-id>`: optional baseline trace for diff artifacts
357
+ - `--baseline-run <run-id>`: select a run from the baseline trace
358
+ - `--github-summary <path>`: append the safe Markdown summary to this file, such as `$GITHUB_STEP_SUMMARY`
359
+ - `--json`: print deterministic `manifest.json` content
360
+
361
+ The artifact command runs safety checks before rendering and only includes structural counts, statuses, bounded check findings, diagnostics, and evidence paths. Baseline diff artifacts use normalized baseline checks and also avoid raw prompt/output/tool payload values. `--github-summary` is plain local file output; AgentInspect does not call GitHub APIs or upload artifacts.
362
+
363
+ Examples:
364
+
365
+ ```bash
366
+ npx agent-inspect artifacts fixtures/traces-v0.2/manual-basic.jsonl --output-dir ./artifacts --json
367
+ npx agent-inspect artifacts minimal-success --dir fixtures/traces --output-dir ./artifacts --github-summary "$GITHUB_STEP_SUMMARY"
368
+ npx agent-inspect artifacts candidate.jsonl --baseline baseline.jsonl --output-dir ./artifacts
369
+ ```
370
+
371
+ Recipe and sample workflow: [examples/recipes/deterministic-ci-checks](../examples/recipes/deterministic-ci-checks/README.md)
372
+
373
+ ### 6.11 `diff`
226
374
 
227
375
  Compare two manual trace runs. Diff is **local** and **read-only** (does not rerun agents).
228
376
 
@@ -286,7 +434,7 @@ Differences:
286
434
 
287
435
  More examples, including timing-only and structure-only diffs, are in `docs/DIFF.md`.
288
436
 
289
- ### 6.9 `timeline`
437
+ ### 6.12 `timeline`
290
438
 
291
439
  Chronological step list for one manual trace. Read-only; does not mutate JSONL files.
292
440
 
@@ -302,7 +450,7 @@ Options:
302
450
 
303
451
  ![Timeline with slow-step focus](../assets/demos/timeline.gif)
304
452
 
305
- ### 6.10 `stats`
453
+ ### 6.13 `stats`
306
454
 
307
455
  Local aggregate statistics over trace files in a directory. Read-only.
308
456
 
@@ -322,7 +470,7 @@ Options:
322
470
 
323
471
  Use `--correlation-id` or `--group-id` to filter runs by `run_started` metadata (see [API.md](./API.md)).
324
472
 
325
- ### 6.11 `search`
473
+ ### 6.14 `search`
326
474
 
327
475
  Deterministic search over local traces (substring / exact filters). No semantic search.
328
476
 
@@ -352,7 +500,7 @@ npx agent-inspect search --duration ">100ms" --json
352
500
 
353
501
  ![Search traces by status error](../assets/demos/search.gif)
354
502
 
355
- ### 6.12 `what`
503
+ ### 6.15 `what`
356
504
 
357
505
  Concise human-readable summary of one local trace run. Read-only; accepts v0.1 manual JSONL and v0.2 persisted-event JSONL through the shared dual-format normalization path. Vocabulary: [TRACE-VOCABULARY-V1.5.md](./proposals/TRACE-VOCABULARY-V1.5.md).
358
506
 
@@ -381,7 +529,7 @@ Outcome: Completed successfully.
381
529
  Slowest: plan (100ms, logic)
382
530
  ```
383
531
 
384
- ### 6.13 `report`
532
+ ### 6.16 `report`
385
533
 
386
534
  Generate a local inspection report combining **what happened**, **timeline**, and **execution tree** sections. The command reads local v0.1 manual JSONL and v0.2 persisted-event JSONL through the shared dual-format normalization path without mutating them. Distinct from `export` (which targets shareable tree snapshots and standards formats).
387
535
 
@@ -23,7 +23,7 @@ AgentInspect is **local-first** and **CLI-first**. These behaviors are intention
23
23
 
24
24
  - **Vendor sinks** (hosted dashboards, Langfuse/Braintrust/New Relic/Datadog native uploads, OTLP gRPC streaming, etc.) are **not implemented** in the core packages described here.
25
25
  - **AI SDK adapter** (`@agent-inspect/ai-sdk`) is experimental and metadata-first. It depends on explicit AI SDK telemetry configuration and requires `recordInputs: false` / `recordOutputs: false` for the documented safe path.
26
- - **OpenAI Agents JS adapter** (`@agent-inspect/openai-agents`) is scaffold-only in the v1.7 train. Runtime span mapping is not implemented, and the safe future path is `setTraceProcessors()` rather than `addTraceProcessor()`.
26
+ - **OpenAI Agents JS adapter** (`@agent-inspect/openai-agents`) is experimental and remains private/unpublished until the v1.8 first-publication gate. Runtime metadata mapping is local-only; the safe install path is `setTraceProcessors()` rather than `addTraceProcessor()`.
27
27
  - **LangGraph support** is currently a documented boundary through `@agent-inspect/langchain`, not a dedicated package.
28
28
  - **LangChain adapter** captures **metadata-oriented** signals by default; it does not replace full framework observability.
29
29
  - **LangChain `stream: true`** records chunk counts and timing only — not a full token replay. Per-token JSONL events are not emitted.
@@ -93,6 +93,12 @@ pnpm compat:smoke
93
93
  - Fixture pattern: [test/consumer-fixtures/jest-cjs/](../../test/consumer-fixtures/jest-cjs/).
94
94
  - Full Jest runner smoke in CI is a documented follow-up — root package does not ship Jest as a devDependency.
95
95
 
96
+ ## v1.8 pre-release adoption notes
97
+
98
+ - `@agent-inspect/vitest` and `@agent-inspect/jest` are private/unpublished until the v1.8 release-readiness gate completes. The [test reporter artifact recipe](../examples/recipes/test-reporter-artifacts/README.md) documents the intended config shape without requiring those packages.
99
+ - `agent-inspect artifacts --github-summary` writes a local step-summary file only. It does not call GitHub APIs, open PR comments, upload artifacts, or mutate repository state.
100
+ - Baseline checks compare normalized structural facts from explicit candidate and baseline inputs. They are useful for CI regression evidence, not replay or semantic eval scoring.
101
+
96
102
  ### What to include in a bug report
97
103
 
98
104
  - Node.js version (`node -v`)
@@ -31,7 +31,7 @@ This document states what AgentInspect **does not** provide today. It complement
31
31
 
32
32
  - **AI SDK integration is explicit telemetry wiring.** Use `@agent-inspect/ai-sdk` through AI SDK `experimental_telemetry.integrations`; AgentInspect does not wrap providers, patch fetch, or enable telemetry globally.
33
33
  - **AI SDK privacy settings are caller-owned.** Examples set `recordInputs: false` and `recordOutputs: false`; leaving those enabled in user code can cause the AI SDK telemetry layer to include richer data before AgentInspect receives events.
34
- - **OpenAI Agents JS support is scaffold-only.** `@agent-inspect/openai-agents` documents the safe `setTraceProcessors()` boundary but does not map runtime spans yet and is not part of the v1.7 published package set.
34
+ - **OpenAI Agents JS support is experimental and not published yet.** `@agent-inspect/openai-agents` maps metadata-only runtime spans through the safe `setTraceProcessors()` boundary, remains private until the v1.8 first-publication gate, and does not capture raw payloads by default.
35
35
  - **LangGraph support is a boundary decision, not a separate package.** Initial support is expected through `@agent-inspect/langchain` callbacks unless no-network fixtures prove a separate package is needed.
36
36
  - **No root/core adapter dependencies.** AI SDK, OpenAI Agents, LangGraph, OpenTelemetry, and LangChain remain outside the root/core runtime dependency graph.
37
37
 
@@ -54,6 +54,12 @@ This document states what AgentInspect **does not** provide today. It complement
54
54
  - **Metadata truncation** applies to string values and nested structures; very large metadata may be replaced with a truncation marker when `maxEventBytes` is exceeded (default 64 KiB per JSONL line).
55
55
  - **Redaction is not encryption.** Local trace files remain readable on disk; treat `.agent-inspect-runs/` like any developer artifact that may contain operational data.
56
56
 
57
+ ## Checks, artifacts, and test reporters
58
+
59
+ - **Checks are deterministic local rules, not compliance certification.** `check`, `scan`, and `verify-safe` surface bounded findings and diagnostics over supported local inputs; they do not prove a trace is safe for every sharing context.
60
+ - **Safe CI artifacts are structural summaries.** They avoid raw prompt/output/request/response/header/tool payload content by default, but teams should still review generated files before sharing.
61
+ - **Vitest/Jest reporters are optional and unpublished until release readiness in the v1.8 train.** The recipes document config patterns and explicit associations; consumers should install the packages only after publication.
62
+
57
63
  ## Execution semantics
58
64
 
59
65
  - **No replay / fork** of past runs from traces alone.
package/docs/SCHEMA.md CHANGED
@@ -250,6 +250,7 @@ v1.6 adds experimental writer and reader surfaces without changing the stable ma
250
250
  - `agent-inspect/readers` and `agent-inspect open` read local AgentInspect JSONL, OpenInference JSON, and OTLP JSON inputs through compatibility adapters.
251
251
  - OpenInference and OTLP JSON inputs are **not** a third AgentInspect persisted schema. They are local read formats normalized into inspection trees with warnings and unsupported-field reporting.
252
252
  - Reader and writer APIs perform no network upload and do not mutate source files.
253
+ - v1.8 checks, safety verification, baseline comparison, safe CI artifacts, and reporter artifacts are report layers over existing trace inputs. They do not change manual trace writing, introduce a third persisted trace model, or embed raw prompt/output/request/response/header/tool payload content in their default structural outputs.
253
254
 
254
255
  ## 16. Migration notes
255
256
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-inspect",
3
- "version": "1.7.0",
3
+ "version": "1.8.0",
4
4
  "license": "MIT",
5
5
  "type": "module",
6
6
  "description": "Local-first execution-tree debugger for TypeScript AI agents",
@@ -95,6 +95,16 @@
95
95
  "types": "./packages/core/dist/readers.d.cts",
96
96
  "default": "./packages/core/dist/readers.cjs"
97
97
  }
98
+ },
99
+ "./checks": {
100
+ "import": {
101
+ "types": "./packages/core/dist/checks.d.ts",
102
+ "default": "./packages/core/dist/checks.mjs"
103
+ },
104
+ "require": {
105
+ "types": "./packages/core/dist/checks.d.cts",
106
+ "default": "./packages/core/dist/checks.cjs"
107
+ }
98
108
  }
99
109
  },
100
110
  "bin": {
@@ -158,7 +168,7 @@
158
168
  },
159
169
  "scripts": {
160
170
  "clean": "pnpm -r exec -- rm -rf dist",
161
- "build": "pnpm exec tsup --config tsup.core.config.ts && pnpm exec tsup --config tsup.cli.config.ts && pnpm exec tsup --config tsup.langchain.config.ts && pnpm exec tsup --config tsup.tui.config.ts && pnpm exec tsup --config tsup.ai-sdk.config.ts && pnpm exec tsup --config tsup.openai-agents.config.ts",
171
+ "build": "pnpm exec tsup --config tsup.core.config.ts && pnpm exec tsup --config tsup.cli.config.ts && pnpm exec tsup --config tsup.langchain.config.ts && pnpm exec tsup --config tsup.tui.config.ts && pnpm exec tsup --config tsup.ai-sdk.config.ts && pnpm exec tsup --config tsup.vitest.config.ts && pnpm exec tsup --config tsup.jest.config.ts && pnpm exec tsup --config tsup.openai-agents.config.ts",
162
172
  "typecheck": "tsc --noEmit",
163
173
  "test": "vitest run",
164
174
  "test:watch": "vitest",