@tangle-network/agent-eval 0.27.2 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/CHANGELOG.md +87 -0
  2. package/dist/{baseline-4R5deP0N.d.ts → baseline-BwdCXUS8.d.ts} +1 -1
  3. package/dist/builder-eval/index.d.ts +3 -3
  4. package/dist/chunk-UW4NOOZI.js +1561 -0
  5. package/dist/chunk-UW4NOOZI.js.map +1 -0
  6. package/dist/{control-BT4qnXiS.d.ts → control-rJhEDdpy.d.ts} +4 -4
  7. package/dist/{control-runtime-BZ_lVLYW.d.ts → control-runtime-BRdQ0wrx.d.ts} +2 -2
  8. package/dist/control.d.ts +5 -5
  9. package/dist/{emitter-DP_cSSiw.d.ts → emitter-BqjeOvJh.d.ts} +1 -1
  10. package/dist/{failure-cluster-Cw65_5FY.d.ts → failure-cluster-D1NZKqYu.d.ts} +1 -1
  11. package/dist/{feedback-trajectory-D1aGKusy.d.ts → feedback-trajectory-j0nJFgC6.d.ts} +1 -1
  12. package/dist/governance/index.d.ts +2 -2
  13. package/dist/{index-BhLlu-qO.d.ts → index-Cgt3DKXr.d.ts} +1 -1
  14. package/dist/index.d.ts +1190 -335
  15. package/dist/index.js +1580 -489
  16. package/dist/index.js.map +1 -1
  17. package/dist/{integrity-DK2EBVZC.d.ts → integrity-BAxLGJ9I.d.ts} +2 -2
  18. package/dist/knowledge/index.d.ts +3 -3
  19. package/dist/meta-eval/index.d.ts +1 -1
  20. package/dist/{multi-layer-verifier-U-c8ge1k.d.ts → multi-layer-verifier-BNi4-8lR.d.ts} +1 -1
  21. package/dist/openapi.json +1 -1
  22. package/dist/optimization.d.ts +8 -8
  23. package/dist/pipelines/index.d.ts +6 -6
  24. package/dist/prm/index.d.ts +4 -4
  25. package/dist/{query-DODUYdPg.d.ts → query-BFDT0kX_.d.ts} +1 -1
  26. package/dist/{release-report-CCQqnK46.d.ts → release-report-PWhGlpfO.d.ts} +1 -1
  27. package/dist/replay-BX5Fm8en.d.ts +529 -0
  28. package/dist/reporting.d.ts +4 -4
  29. package/dist/{researcher-G81CWc0q.d.ts → researcher-ClDX3KZx.d.ts} +5 -5
  30. package/dist/rl.d.ts +8 -8
  31. package/dist/{rubric-D5tjHNJQ.d.ts → rubric-DgSqjqqj.d.ts} +2 -2
  32. package/dist/{store-Db2Bv8Cf.d.ts → store-BP5be6s7.d.ts} +1 -1
  33. package/dist/{summary-report-Dl4akLKX.d.ts → summary-report-jrSGb2xZ.d.ts} +1 -1
  34. package/dist/{test-graded-scenario-B2kWEdh9.d.ts → test-graded-scenario-BJ54PDan.d.ts} +2 -2
  35. package/dist/traces.d.ts +9 -311
  36. package/dist/traces.js +15 -986
  37. package/dist/traces.js.map +1 -1
  38. package/dist/{trajectory-CnoBo-JY.d.ts → trajectory-BFmveYZt.d.ts} +1 -1
  39. package/dist/wire/index.d.ts +4 -4
  40. package/package.json +1 -1
  41. package/dist/chunk-4U4BKCXK.js +0 -569
  42. package/dist/chunk-4U4BKCXK.js.map +0 -1
  43. package/dist/replay-D7z0J43-.d.ts +0 -225
@@ -1,10 +1,10 @@
1
1
  export { R as RubricOutcomePair, a as RubricPredictiveValidityInput, b as RubricPredictiveValidityReport, c as RubricRanking, r as rubricPredictiveValidity } from './rubric-predictive-validity-C0uDYwG6.js';
2
- export { B as BootstrapOptions, a as BootstrapResult, J as JudgeReplayGateArgs, P as PairedBootstrapOptions, b as PairedBootstrapResult, R as ReleaseConfidenceAxis, c as ReleaseConfidenceAxisName, d as ReleaseConfidenceInput, e as ReleaseConfidenceIssue, f as ReleaseConfidenceMetrics, g as ReleaseConfidenceScorecard, h as ReleaseConfidenceStatus, i as ReleaseConfidenceThresholds, j as ReleaseTraceEvidence, k as RenderReleaseReportOptions, V as Verdict, l as assertReleaseConfidence, m as bhAdjust, n as bootstrapCi, o as evaluateReleaseConfidence, p as judgeReplayGate, q as pairedBootstrap, r as pairedWilcoxon, s as releaseTraceEvidenceFromMultiShotTrials, t as renderReleaseReport } from './release-report-CCQqnK46.js';
2
+ export { B as BootstrapOptions, a as BootstrapResult, J as JudgeReplayGateArgs, P as PairedBootstrapOptions, b as PairedBootstrapResult, R as ReleaseConfidenceAxis, c as ReleaseConfidenceAxisName, d as ReleaseConfidenceInput, e as ReleaseConfidenceIssue, f as ReleaseConfidenceMetrics, g as ReleaseConfidenceScorecard, h as ReleaseConfidenceStatus, i as ReleaseConfidenceThresholds, j as ReleaseTraceEvidence, k as RenderReleaseReportOptions, V as Verdict, l as assertReleaseConfidence, m as bhAdjust, n as bootstrapCi, o as evaluateReleaseConfidence, p as judgeReplayGate, q as pairedBootstrap, r as pairedWilcoxon, s as releaseTraceEvidenceFromMultiShotTrials, t as renderReleaseReport } from './release-report-PWhGlpfO.js';
3
3
  export { I as InterimReleaseConfidence, a as InterimReleaseConfidenceInput, P as PairedEvalueOptions, b as PairedEvalueSequence, c as PairedEvalueStep, S as SequentialDecision, e as evaluateInterimReleaseConfidence, p as pairedEvalueSequence } from './sequential-5iSVfzl2.js';
4
- export { C as GainDistributionBin, F as GainDistributionFigureSpec, H as GainDistributionOptions, J as ParetoFigureSpec, K as ParetoPoint, L as RESEARCH_REPORT_HARD_PAIR_FLOOR, N as ResearchReport, O as ResearchReportCandidate, Q as ResearchReportDecision, U as ResearchReportMethodology, W as ResearchReportOptions, X as ResearchReportRecommendation, Y as SummaryTable, Z as SummaryTableOptions, _ as SummaryTableRow, $ as gainHistogram, a0 as paretoChart, a1 as researchReport, a2 as summaryTable } from './summary-report-Dl4akLKX.js';
4
+ export { C as GainDistributionBin, F as GainDistributionFigureSpec, H as GainDistributionOptions, J as ParetoFigureSpec, K as ParetoPoint, L as RESEARCH_REPORT_HARD_PAIR_FLOOR, N as ResearchReport, O as ResearchReportCandidate, Q as ResearchReportDecision, U as ResearchReportMethodology, W as ResearchReportOptions, X as ResearchReportRecommendation, Y as SummaryTable, Z as SummaryTableOptions, _ as SummaryTableRow, $ as gainHistogram, a0 as paretoChart, a1 as researchReport, a2 as summaryTable } from './summary-report-jrSGb2xZ.js';
5
5
  import './run-record-CqzahIbx.js';
6
6
  import './errors-BZ9sTdz7.js';
7
7
  import './outcome-store-D6KWmYvj.js';
8
8
  import './dataset-CiK_3LDr.js';
9
- import './failure-cluster-Cw65_5FY.js';
10
- import './store-Db2Bv8Cf.js';
9
+ import './failure-cluster-D1NZKqYu.js';
10
+ import './store-BP5be6s7.js';
@@ -1,9 +1,9 @@
1
1
  import { A as AgentEvalError, C as CaptureIntegrityError } from './errors-BZ9sTdz7.js';
2
- import { R as RawProviderSink, P as ProviderRedactor, a as RunIntegrityExpectations, b as RunIntegrityReport } from './integrity-DK2EBVZC.js';
2
+ import { R as RawProviderSink, P as ProviderRedactor, a as RunIntegrityExpectations, b as RunIntegrityReport } from './integrity-BAxLGJ9I.js';
3
3
  import { a as RunSplitTag, b as RunTokenUsage, c as RunJudgeMetadata, R as RunRecord } from './run-record-CqzahIbx.js';
4
- import { W as ResearchReportOptions, N as ResearchReport, a3 as GateDecision } from './summary-report-Dl4akLKX.js';
5
- import { T as TraceEmitter, R as RunCompleteHook } from './emitter-DP_cSSiw.js';
6
- import { T as TraceStore } from './store-Db2Bv8Cf.js';
4
+ import { W as ResearchReportOptions, N as ResearchReport, a3 as GateDecision } from './summary-report-jrSGb2xZ.js';
5
+ import { T as TraceEmitter, R as RunCompleteHook } from './emitter-BqjeOvJh.js';
6
+ import { T as TraceStore } from './store-BP5be6s7.js';
7
7
 
8
8
  /**
9
9
  * LLM client with graceful degrade.
@@ -571,4 +571,4 @@ declare class NoopResearcher implements Researcher {
571
571
  evaluateChange(_plan: ExperimentPlan): Promise<ExperimentResult>;
572
572
  }
573
573
 
574
- export { CallbackResearcher as C, type EvalCampaignOptions as E, type FailedRun as F, type LlmClientOptions as L, NoopResearcher as N, type Researcher as R, type SteeringChange as S, type CallbackResearcherOptions as a, type CampaignFactoryParams as b, type CampaignIntegrityPolicy as c, type CampaignRunContext as d, type CampaignRunOutcome as e, type CampaignRunner as f, type CampaignScenario as g, type CampaignVariant as h, type EvalCampaignResult as i, type ExperimentPlan as j, type ExperimentResult as k, type FailureMode as l, LlmCallError as m, type LlmCallRequest as n, type LlmCallResult as o, LlmClient as p, type LlmMessage as q, runEvalCampaign as r, LlmRouteAssertionError as s, type LlmRouteRequirements as t, type LlmUsage as u, assertLlmRoute as v, callLlm as w, callLlmJson as x, probeLlm as y, stripFencedJson as z };
574
+ export { CallbackResearcher as C, type EvalCampaignOptions as E, type FailedRun as F, type LlmClientOptions as L, NoopResearcher as N, type Researcher as R, type SteeringChange as S, type CallbackResearcherOptions as a, type CampaignFactoryParams as b, type CampaignIntegrityPolicy as c, type CampaignRunContext as d, type CampaignRunOutcome as e, type CampaignRunner as f, type CampaignScenario as g, type CampaignVariant as h, type EvalCampaignResult as i, type ExperimentPlan as j, type ExperimentResult as k, type FailureMode as l, type LlmCallRequest as m, type LlmCallResult as n, LlmCallError as o, LlmClient as p, type LlmMessage as q, runEvalCampaign as r, LlmRouteAssertionError as s, type LlmRouteRequirements as t, type LlmUsage as u, assertLlmRoute as v, callLlm as w, callLlmJson as x, probeLlm as y, stripFencedJson as z };
package/dist/rl.d.ts CHANGED
@@ -1,16 +1,16 @@
1
1
  import { R as RunRecord, a as RunSplitTag } from './run-record-CqzahIbx.js';
2
- import { V as VerificationReport } from './multi-layer-verifier-U-c8ge1k.js';
3
- import { t as TrialResult, V as VariantAggregate, q as PromptEvolutionResult, e as MultiShotOptimizationResult } from './summary-report-Dl4akLKX.js';
2
+ import { V as VerificationReport } from './multi-layer-verifier-BNi4-8lR.js';
3
+ import { t as TrialResult, V as VariantAggregate, q as PromptEvolutionResult, e as MultiShotOptimizationResult } from './summary-report-jrSGb2xZ.js';
4
4
  import { O as OutcomeStore } from './outcome-store-D6KWmYvj.js';
5
5
  import { b as RubricPredictiveValidityReport } from './rubric-predictive-validity-C0uDYwG6.js';
6
6
  import { I as InterimReleaseConfidence } from './sequential-5iSVfzl2.js';
7
- import { S as Span, T as TraceStore } from './store-Db2Bv8Cf.js';
8
- import { R as Researcher, l as FailureMode, S as SteeringChange, j as ExperimentPlan, k as ExperimentResult, i as EvalCampaignResult, E as EvalCampaignOptions } from './researcher-G81CWc0q.js';
9
- export { r as runEvalCampaign } from './researcher-G81CWc0q.js';
7
+ import { S as Span, T as TraceStore } from './store-BP5be6s7.js';
8
+ import { R as Researcher, l as FailureMode, S as SteeringChange, j as ExperimentPlan, k as ExperimentResult, i as EvalCampaignResult, E as EvalCampaignOptions } from './researcher-ClDX3KZx.js';
9
+ export { r as runEvalCampaign } from './researcher-ClDX3KZx.js';
10
10
  import './errors-BZ9sTdz7.js';
11
- import './failure-cluster-Cw65_5FY.js';
12
- import './integrity-DK2EBVZC.js';
13
- import './emitter-DP_cSSiw.js';
11
+ import './failure-cluster-D1NZKqYu.js';
12
+ import './integrity-BAxLGJ9I.js';
13
+ import './emitter-BqjeOvJh.js';
14
14
 
15
15
  /**
16
16
  * Test-time compute scaling curves.
@@ -1,5 +1,5 @@
1
- import { S as Span, T as TraceStore, J as JudgeSpan } from './store-Db2Bv8Cf.js';
2
- import { T as Trajectory, a as TrajectoryStep } from './trajectory-CnoBo-JY.js';
1
+ import { S as Span, T as TraceStore, J as JudgeSpan } from './store-BP5be6s7.js';
2
+ import { T as Trajectory, a as TrajectoryStep } from './trajectory-BFmveYZt.js';
3
3
 
4
4
  /**
5
5
  * Process Reward Modeling — per-step rubric grading.
@@ -294,4 +294,4 @@ declare class FileSystemTraceStore implements TraceStore {
294
294
  artifacts(runId: string): Promise<Artifact[]>;
295
295
  }
296
296
 
297
- export { type Artifact as A, type BudgetLedgerEntry as B, type EventKind as E, type FailureClass as F, type GenericSpan as G, InMemoryTraceStore as I, type JudgeSpan as J, type LlmSpan as L, type Message as M, type Run as R, type Span as S, type TraceStore as T, type ToolSpan as a, type TraceEvent as b, type RunOutcome as c, type SpanKind as d, type RetrievalSpan as e, type SandboxSpan as f, type BudgetSpec as g, type RunFilter as h, type EventFilter as i, FAILURE_CLASSES as j, FileSystemTraceStore as k, type FileSystemTraceStoreOptions as l, type RunLayer as m, type RunStatus as n, type SpanBase as o, type SpanFilter as p, type SpanStatus as q, TRACE_SCHEMA_VERSION as r, isJudgeSpan as s, isLlmSpan as t, isRetrievalSpan as u, isSandboxSpan as v, isToolSpan as w };
297
+ export { type Artifact as A, type BudgetLedgerEntry as B, type EventKind as E, type FailureClass as F, type GenericSpan as G, InMemoryTraceStore as I, type JudgeSpan as J, type LlmSpan as L, type Message as M, type Run as R, type Span as S, type TraceStore as T, type ToolSpan as a, type TraceEvent as b, type RunOutcome as c, type SpanKind as d, type RetrievalSpan as e, type SandboxSpan as f, type BudgetSpec as g, type EventFilter as h, FAILURE_CLASSES as i, FileSystemTraceStore as j, type FileSystemTraceStoreOptions as k, type RunFilter as l, type RunLayer as m, type RunStatus as n, type SpanBase as o, type SpanFilter as p, type SpanStatus as q, TRACE_SCHEMA_VERSION as r, isJudgeSpan as s, isLlmSpan as t, isRetrievalSpan as u, isSandboxSpan as v, isToolSpan as w };
@@ -1,5 +1,5 @@
1
1
  import { R as RunRecord, a as RunSplitTag } from './run-record-CqzahIbx.js';
2
- import { F as FailureClusterReport } from './failure-cluster-Cw65_5FY.js';
2
+ import { F as FailureClusterReport } from './failure-cluster-D1NZKqYu.js';
3
3
 
4
4
  /**
5
5
  * HeldOutGate — first-class held-out paired-delta promotion gate.
@@ -1,5 +1,5 @@
1
- import { T as TraceEmitter } from './emitter-DP_cSSiw.js';
2
- import { R as Run, F as FailureClass, T as TraceStore } from './store-Db2Bv8Cf.js';
1
+ import { T as TraceEmitter } from './emitter-BqjeOvJh.js';
2
+ import { R as Run, F as FailureClass, T as TraceStore } from './store-BP5be6s7.js';
3
3
 
4
4
  /**
5
5
  * SandboxHarness — executes a scenario in an isolated environment and
package/dist/traces.d.ts CHANGED
@@ -1,314 +1,12 @@
1
- export { D as DEFAULT_REDACTION_RULES, O as OTEL_AGENT_EVAL_SCOPE, a as OtlpExport, b as OtlpResourceSpans, c as OtlpSpan, R as REDACTION_VERSION, d as RedactionReport, e as RedactionRule, f as ReplayCache, g as ReplayCacheEntry, h as ReplayCacheMissError, i as ReplayCacheStats, j as ReplayFetchOptions, k as createReplayFetch, l as exportRunAsOtlp, m as iterateRawCalls, r as redactString, n as redactValue } from './replay-D7z0J43-.js';
2
- import { a as RunCompleteHookContext, R as RunCompleteHook } from './emitter-DP_cSSiw.js';
3
- export { S as SpanHandle, T as TraceEmitter, b as TraceEmitterOptions, l as llmSpanFromProvider } from './emitter-DP_cSSiw.js';
4
- export { F as FileSystemRawProviderSink, d as FileSystemRawProviderSinkOptions, I as InMemoryRawProviderSink, e as InMemoryRawProviderSinkOptions, N as NoopRawProviderSink, P as ProviderRedactor, f as RawProviderDirection, c as RawProviderEvent, R as RawProviderSink, g as RawProviderSinkFilter, h as RunIntegrityError, a as RunIntegrityExpectations, i as RunIntegrityIssue, j as RunIntegrityIssueCode, b as RunIntegrityReport, k as assertRunCaptured, l as defaultProviderRedactor, p as providerFromBaseUrl, t as throwIfRunIncomplete } from './integrity-DK2EBVZC.js';
5
- export { a as aggregateLlm, b as argHash, g as groupBy, j as judgeSpans, l as llmSpans, r as runFailureClass, c as runsForScenario, t as toolSpans } from './query-DODUYdPg.js';
6
- export { A as Artifact, B as BudgetLedgerEntry, g as BudgetSpec, i as EventFilter, E as EventKind, j as FAILURE_CLASSES, F as FailureClass, k as FileSystemTraceStore, l as FileSystemTraceStoreOptions, G as GenericSpan, I as InMemoryTraceStore, J as JudgeSpan, L as LlmSpan, M as Message, e as RetrievalSpan, R as Run, h as RunFilter, m as RunLayer, c as RunOutcome, n as RunStatus, f as SandboxSpan, S as Span, o as SpanBase, p as SpanFilter, d as SpanKind, q as SpanStatus, r as TRACE_SCHEMA_VERSION, a as ToolSpan, b as TraceEvent, T as TraceStore, s as isJudgeSpan, t as isLlmSpan, u as isRetrievalSpan, v as isSandboxSpan, w as isToolSpan } from './store-Db2Bv8Cf.js';
7
- import { AxAIService, AxFunction } from '@ax-llm/ax';
1
+ import { A as AnalyzeTracesOptions, a as AnalyzeTracesResult, T as TraceAnalysisStore, b as TraceAnalystFilters, D as DatasetOverview, Q as QueryTracesPage, V as ViewTraceResult, c as ViewSpansResult, S as SearchTraceResult, d as SearchSpanResult } from './replay-BX5Fm8en.js';
2
+ export { e as AnalyzeTracesInput, f as AnalyzeTracesTurnSnapshot, g as DEFAULT_REDACTION_RULES, h as DEFAULT_TRACE_ANALYST_BUDGETS, O as OTEL_AGENT_EVAL_SCOPE, i as OtlpExport, j as OtlpResourceSpans, k as OtlpSpan, R as REDACTION_VERSION, l as RedactionReport, m as RedactionRule, n as ReplayCache, o as ReplayCacheEntry, p as ReplayCacheMissError, q as ReplayCacheStats, r as ReplayFetchOptions, s as SpanMatchRecord, t as TRACE_ANALYST_TRUNCATION_MARKER_PREFIX, u as TraceAnalystByteBudgets, v as TraceAnalystSpan, w as TraceAnalystSpanKind, x as TraceAnalystSpanStatus, y as TraceAnalystTraceSummary, z as ViewTraceOversized, B as analyzeTraces, C as createReplayFetch, E as exportRunAsOtlp, F as iterateRawCalls, G as redactString, H as redactValue } from './replay-BX5Fm8en.js';
3
+ import { a as RunCompleteHookContext, R as RunCompleteHook } from './emitter-BqjeOvJh.js';
4
+ export { S as SpanHandle, T as TraceEmitter, b as TraceEmitterOptions, l as llmSpanFromProvider } from './emitter-BqjeOvJh.js';
5
+ export { F as FileSystemRawProviderSink, c as FileSystemRawProviderSinkOptions, I as InMemoryRawProviderSink, d as InMemoryRawProviderSinkOptions, N as NoopRawProviderSink, P as ProviderRedactor, e as RawProviderDirection, f as RawProviderEvent, R as RawProviderSink, g as RawProviderSinkFilter, h as RunIntegrityError, a as RunIntegrityExpectations, i as RunIntegrityIssue, j as RunIntegrityIssueCode, b as RunIntegrityReport, k as assertRunCaptured, l as defaultProviderRedactor, p as providerFromBaseUrl, t as throwIfRunIncomplete } from './integrity-BAxLGJ9I.js';
6
+ export { a as aggregateLlm, b as argHash, g as groupBy, j as judgeSpans, l as llmSpans, r as runFailureClass, c as runsForScenario, t as toolSpans } from './query-BFDT0kX_.js';
7
+ export { A as Artifact, B as BudgetLedgerEntry, g as BudgetSpec, h as EventFilter, E as EventKind, i as FAILURE_CLASSES, F as FailureClass, j as FileSystemTraceStore, k as FileSystemTraceStoreOptions, G as GenericSpan, I as InMemoryTraceStore, J as JudgeSpan, L as LlmSpan, M as Message, e as RetrievalSpan, R as Run, l as RunFilter, m as RunLayer, c as RunOutcome, n as RunStatus, f as SandboxSpan, S as Span, o as SpanBase, p as SpanFilter, d as SpanKind, q as SpanStatus, r as TRACE_SCHEMA_VERSION, a as ToolSpan, b as TraceEvent, T as TraceStore, s as isJudgeSpan, t as isLlmSpan, u as isRetrievalSpan, v as isSandboxSpan, w as isToolSpan } from './store-BP5be6s7.js';
8
8
  import { N as NotFoundError } from './errors-BZ9sTdz7.js';
9
-
10
- /**
11
- * Shared types for the trace-analyst module.
12
- *
13
- * Wire format. The store interface speaks `OtlpSpanLike` rows — one JSONL
14
- * line per span, OTLP-shaped. We do NOT depend on a specific tracing
15
- * vendor at the type level. Adapter
16
- * layers map upstream shapes onto this interface.
17
- *
18
- * Design constraint. Every read operation that can return arbitrary
19
- * payload must carry a byte budget so the agent's tool result stays
20
- * bounded regardless of input trace size. Oversized responses
21
- * substitute a deterministic summary instead of bytes — see
22
- * `ViewTraceOversized`.
23
- */
24
- /** OTLP span kind (subset we actually use). */
25
- type TraceAnalystSpanKind = 'AGENT' | 'LLM' | 'TOOL' | 'CHAIN' | 'GUARDRAIL' | 'SPAN' | 'UNKNOWN';
26
- type TraceAnalystSpanStatus = 'OK' | 'ERROR' | 'UNSET';
27
- /** Subset of OTLP span fields the analyst exposes to the agent. The
28
- * store's job is to project upstream's full span shape down to this
29
- * view — the analyst never sees vendor extensions directly. */
30
- interface TraceAnalystSpan {
31
- trace_id: string;
32
- span_id: string;
33
- parent_span_id: string | null;
34
- name: string;
35
- kind: TraceAnalystSpanKind;
36
- start_time: string;
37
- end_time: string;
38
- duration_ms: number;
39
- status: TraceAnalystSpanStatus;
40
- status_message?: string;
41
- service_name: string | null;
42
- agent_name: string | null;
43
- model_name: string | null;
44
- tool_name: string | null;
45
- /** Raw JSON-serialisable attribute map. May contain large strings;
46
- * callers must respect the per-attribute byte cap. */
47
- attributes: Record<string, unknown>;
48
- }
49
- interface TraceAnalystTraceSummary {
50
- trace_id: string;
51
- service_name: string | null;
52
- agent_name: string | null;
53
- span_count: number;
54
- has_errors: boolean;
55
- start_time: string;
56
- end_time: string;
57
- duration_ms: number;
58
- raw_jsonl_bytes: number;
59
- models: string[];
60
- tools: string[];
61
- }
62
- interface TraceAnalystFilters {
63
- /** Restrict to traces that contain at least one error span. */
64
- has_errors?: boolean;
65
- /** Match if any span's `service.name` is in this list. */
66
- service_names?: string[];
67
- /** Match if any span's `agent.name` is in this list. */
68
- agent_names?: string[];
69
- /** Match if any LLM span's `llm.model_name` is in this list. */
70
- model_names?: string[];
71
- /** Match if any tool span's `tool.name` is in this list. */
72
- tool_names?: string[];
73
- /** ISO-8601 lower bound on the trace's earliest start time. */
74
- start_time_after?: string;
75
- /** ISO-8601 upper bound on the trace's earliest start time. */
76
- start_time_before?: string;
77
- /** Single regex applied to raw JSONL bytes for the trace. Opt-in;
78
- * expensive on large datasets. Use the indexed filters above first. */
79
- regex_pattern?: string;
80
- }
81
- interface DatasetOverview {
82
- total_traces: number;
83
- raw_jsonl_bytes: number;
84
- services: string[];
85
- agents: string[];
86
- models: string[];
87
- tool_names: string[];
88
- /** Up to 20 real trace ids the agent may pass to view/search tools. */
89
- sample_trace_ids: string[];
90
- errors: {
91
- trace_count: number;
92
- span_count: number;
93
- };
94
- time_range: {
95
- earliest: string;
96
- latest: string;
97
- } | null;
98
- }
99
- interface QueryTracesPage {
100
- traces: TraceAnalystTraceSummary[];
101
- total: number;
102
- has_more: boolean;
103
- }
104
- /** Full-trace view. When the response would exceed the per-call byte
105
- * budget, `oversized` is populated INSTEAD of `spans` so the agent
106
- * knows to switch to `searchTrace` / `viewSpans`. */
107
- interface ViewTraceResult {
108
- trace_id: string;
109
- spans?: TraceAnalystSpan[];
110
- oversized?: ViewTraceOversized;
111
- }
112
- interface ViewTraceOversized {
113
- span_count: number;
114
- /** Names with their counts, sorted desc. Capped at 20 entries. */
115
- top_span_names: Array<[string, number]>;
116
- /** Largest single span body (bytes after attribute-cap projection). */
117
- span_response_bytes_max: number;
118
- error_span_count: number;
119
- }
120
- interface ViewSpansResult {
121
- trace_id: string;
122
- spans: TraceAnalystSpan[];
123
- /** Number of requested span ids that were not found in the trace. */
124
- missing_span_ids: string[];
125
- /** Number of attribute fields truncated to fit the per-attribute cap. */
126
- truncated_attribute_count: number;
127
- }
128
- interface SpanMatchRecord {
129
- trace_id: string;
130
- span_id: string;
131
- span_name: string;
132
- span_kind: TraceAnalystSpanKind;
133
- /** JSON pointer-style path to the matched value, e.g.
134
- * `attributes."llm.input_messages"[2].content`. */
135
- attribute_path: string;
136
- matched_text: string;
137
- context_before: string;
138
- context_after: string;
139
- match_offset: number;
140
- }
141
- interface SearchTraceResult {
142
- trace_id: string;
143
- hits: SpanMatchRecord[];
144
- total_matches: number;
145
- has_more: boolean;
146
- }
147
- interface SearchSpanResult {
148
- trace_id: string;
149
- span_id: string;
150
- hits: SpanMatchRecord[];
151
- total_matches: number;
152
- has_more: boolean;
153
- }
154
- /** Tunable byte budgets for bounded RLM tool output. */
155
- interface TraceAnalystByteBudgets {
156
- /** Max bytes any single tool response may emit. Hard ceiling enforced
157
- * by the store; oversized → summary. Default 150_000. */
158
- perCallByteCeiling: number;
159
- /** Per-attribute string truncation cap on `viewTrace` (discovery scan).
160
- * Default 4096. */
161
- perAttributeViewBudget: number;
162
- /** Per-attribute string truncation cap on `viewSpans` (surgical reads).
163
- * Default 16384. */
164
- perAttributeSpanBudget: number;
165
- /** Per-attribute cap on a single match record's `matched_text` and
166
- * context window. Default 1024. */
167
- perMatchTextBudget: number;
168
- }
169
- declare const DEFAULT_TRACE_ANALYST_BUDGETS: TraceAnalystByteBudgets;
170
- /** Marker substituted in place of truncated string payloads. Callers
171
- * parsing tool output can detect it deterministically. */
172
- declare const TRACE_ANALYST_TRUNCATION_MARKER_PREFIX = "[trace-analyst truncated:";
173
-
174
- /**
175
- * `TraceAnalysisStore` — read-side interface the trace-analyst calls
176
- * through. Six operations, all bounded:
177
- *
178
- * - `getOverview(filters?)` — dataset rollup + sample trace ids.
179
- * - `queryTraces(filters?, limit, offset)` — paginated summaries.
180
- * - `countTraces(filters?)` — cheap count without materialisation.
181
- * - `viewTrace(trace_id, perAttrCap)` — full span list, oversized → summary.
182
- * - `viewSpans(trace_id, span_ids, perAttrCap)` — surgical span fetch.
183
- * - `searchTrace(trace_id, regex, max_matches)` — bounded regex hits.
184
- * - `searchSpan(trace_id, span_id, regex, max_matches)` — single-span search.
185
- *
186
- * Multiple implementations ship in the core (`OtlpFileTraceStore`).
187
- * Downstream callers can supply their own — e.g. a DuckDB-backed
188
- * adapter or an in-memory adapter for tests — by implementing this
189
- * interface.
190
- *
191
- * Filters compose with AND semantics. Empty/undefined fields impose
192
- * no constraint. `regex_pattern` is the only opt-in raw-bytes scan —
193
- * implementations may skip it via `count`/`overview` when not set.
194
- */
195
-
196
- interface TraceAnalysisStore {
197
- getOverview(filters?: TraceAnalystFilters): Promise<DatasetOverview>;
198
- queryTraces(opts: {
199
- filters?: TraceAnalystFilters;
200
- limit: number;
201
- offset?: number;
202
- }): Promise<QueryTracesPage>;
203
- countTraces(filters?: TraceAnalystFilters): Promise<number>;
204
- viewTrace(opts: {
205
- trace_id: string;
206
- /** Override per-attribute byte cap. Defaults to discovery budget. */
207
- per_attribute_byte_cap?: number;
208
- }): Promise<ViewTraceResult>;
209
- viewSpans(opts: {
210
- trace_id: string;
211
- span_ids: readonly string[];
212
- /** Override per-attribute byte cap. Defaults to surgical budget. */
213
- per_attribute_byte_cap?: number;
214
- }): Promise<ViewSpansResult>;
215
- searchTrace(opts: {
216
- trace_id: string;
217
- regex_pattern: string;
218
- /** Hard cap on matches returned. Default 50. */
219
- max_matches?: number;
220
- }): Promise<SearchTraceResult>;
221
- searchSpan(opts: {
222
- trace_id: string;
223
- span_id: string;
224
- regex_pattern: string;
225
- max_matches?: number;
226
- }): Promise<SearchSpanResult>;
227
- }
228
-
229
- interface AnalyzeTracesInput {
230
- /** The user-facing question. Domain framing belongs here, not in the
231
- * actor description. */
232
- question: string;
233
- }
234
- interface AnalyzeTracesResult {
235
- /** The responder's prose answer. */
236
- answer: string;
237
- /** Bulleted findings extracted from the responder's structured output. */
238
- findings: string[];
239
- /** Per-actor-turn snapshots captured via `actorTurnCallback`. */
240
- turns: AnalyzeTracesTurnSnapshot[];
241
- /** Total turns the actor took. */
242
- turnCount: number;
243
- /** Token usage by role. */
244
- usage: TraceAnalystUsage;
245
- /** Full system + assistant + tool message log by role. */
246
- chatLog: TraceAnalystChatLog;
247
- /** Prompt version that produced this run. */
248
- actorPromptVersion: string;
249
- }
250
- interface TraceAnalystUsage {
251
- actor: TraceAnalystUsageEntry[];
252
- responder: TraceAnalystUsageEntry[];
253
- }
254
- interface TraceAnalystUsageEntry {
255
- [key: string]: unknown;
256
- }
257
- interface TraceAnalystChatLog {
258
- actor: TraceAnalystChatMessage[];
259
- responder: TraceAnalystChatMessage[];
260
- }
261
- interface TraceAnalystChatMessage {
262
- [key: string]: unknown;
263
- }
264
- interface AnalyzeTracesTurnSnapshot {
265
- turn: number;
266
- isError: boolean;
267
- /** The JS code the actor produced for this turn. */
268
- code: string;
269
- /** The formatted action-log entry the actor sees on the next turn. */
270
- output: string;
271
- /** Provider thought (when `actorOptions.showThoughts` is true and the
272
- * provider returns it). */
273
- thought?: string;
274
- }
275
- interface AnalyzeTracesOptions {
276
- /** Trace data source. Pass either an OTLP-JSONL path or a custom store. */
277
- source: string | TraceAnalysisStore;
278
- /** Caller-provided AxAIService. */
279
- ai: AxAIService;
280
- /** Model id forwarded to actor + responder. */
281
- model?: string;
282
- /** Recursion depth. 0 = no sub-agent dispatch. Default 1. */
283
- maxDepth?: number;
284
- /** Maximum actor turns. Default 12. */
285
- maxTurns?: number;
286
- /** Maximum parallel sub-agent calls in batched llmQuery. Default 2. */
287
- maxParallelSubagents?: number;
288
- /** Override the actor description. */
289
- actorDescription?: string;
290
- /** Override the subagent description. */
291
- subagentDescription?: string;
292
- /** Per-turn observability hook. */
293
- onTurn?: (turn: AnalyzeTracesTurnSnapshot) => void | Promise<void>;
294
- /** Override max runtime characters per turn. Default 6000. */
295
- maxRuntimeChars?: number;
296
- /** When set, every turn's snapshot is appended to this JSONL file
297
- * immediately. If the analyst crashes mid-loop (provider 503,
298
- * network error, validator reject) the partial reasoning is still
299
- * on disk. Replay the file with the responder afterward to recover
300
- * evidence. */
301
- progressLogPath?: string;
302
- }
303
- /**
304
- * Run the trace analyst.
305
- *
306
- * Throws:
307
- * - `TraceFileMissingError` if `source` is a path and doesn't exist.
308
- * - `AxAgentClarificationError` if the analyst asks for clarification.
309
- * - Provider errors (auth, rate limits) propagate from the AI service.
310
- */
311
- declare function analyzeTraces(input: AnalyzeTracesInput, options: AnalyzeTracesOptions): Promise<AnalyzeTracesResult>;
9
+ import { AxFunction } from '@ax-llm/ax';
312
10
 
313
11
  /**
314
12
  * Trace-analyst auto-execution hook.
@@ -586,4 +284,4 @@ declare function traceAnalystFunctionGroup(opts: BuildTraceAnalystToolsOpts): {
586
284
  functions: AxFunction[];
587
285
  };
588
286
 
589
- export { type AnalyzeTracesInput, type AnalyzeTracesOptions, type AnalyzeTracesResult, type AnalyzeTracesTurnSnapshot, DEFAULT_TRACE_ANALYST_BUDGETS, type DatasetOverview, OtlpFileTraceStore, type OtlpFileTraceStoreOptions, type QueryTracesPage, RunCompleteHook, RunCompleteHookContext, type SearchSpanResult, type SearchTraceResult, type SpanMatchRecord, SpanNotFoundError, TRACE_ANALYST_ACTOR_DESCRIPTION, TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION, TRACE_ANALYST_SUBAGENT_DESCRIPTION, TRACE_ANALYST_TRUNCATION_MARKER_PREFIX, type TraceAnalysisStore, type TraceAnalystByteBudgets, type TraceAnalystFilters, type TraceAnalystHookOptions, type TraceAnalystSpan, type TraceAnalystSpanKind, type TraceAnalystSpanStatus, type TraceAnalystTraceSummary, TraceFileMissingError, type TraceInsightContext, type TraceInsightFinding, type TraceInsightPanelRole, type TraceInsightPromptInput, type TraceInsightQualityGate, type TraceInsightQuestion, type TraceInsightReadiness, type TraceInsightSuite, type TraceInsightTask, TraceNotFoundError, type ViewSpansResult, type ViewTraceOversized, type ViewTraceResult, analyzeTraces, buildTraceAnalystTools, buildTraceInsightContext, buildTraceInsightPrompt, defaultTraceInsightPanel, describeTraceInsightScope, domainEvidencePattern, inferDomainKeywords, planTraceInsightQuestions, scoreTraceInsightReadiness, tokenizeDomainWords, traceAnalystFunctionGroup, traceAnalystOnRunComplete };
287
+ export { AnalyzeTracesOptions, AnalyzeTracesResult, DatasetOverview, OtlpFileTraceStore, type OtlpFileTraceStoreOptions, QueryTracesPage, RunCompleteHook, RunCompleteHookContext, SearchSpanResult, SearchTraceResult, SpanNotFoundError, TRACE_ANALYST_ACTOR_DESCRIPTION, TRACE_ANALYST_ACTOR_DESCRIPTION_VERSION, TRACE_ANALYST_SUBAGENT_DESCRIPTION, TraceAnalysisStore, TraceAnalystFilters, type TraceAnalystHookOptions, TraceFileMissingError, type TraceInsightContext, type TraceInsightFinding, type TraceInsightPanelRole, type TraceInsightPromptInput, type TraceInsightQualityGate, type TraceInsightQuestion, type TraceInsightReadiness, type TraceInsightSuite, type TraceInsightTask, TraceNotFoundError, ViewSpansResult, ViewTraceResult, buildTraceAnalystTools, buildTraceInsightContext, buildTraceInsightPrompt, defaultTraceInsightPanel, describeTraceInsightScope, domainEvidencePattern, inferDomainKeywords, planTraceInsightQuestions, scoreTraceInsightReadiness, tokenizeDomainWords, traceAnalystFunctionGroup, traceAnalystOnRunComplete };