@axlsdk/studio 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,35 +1,164 @@
1
1
  import * as hono_ws from 'hono/ws';
2
+ import { C as ConnectionManager, S as StudioEnv, a as CostData } from '../connection-manager-BMPahDuY.cjs';
3
+ export { B as BroadcastTarget } from '../connection-manager-BMPahDuY.cjs';
2
4
  import * as hono_types from 'hono/types';
3
5
  import { Hono } from 'hono';
4
- import { AxlRuntime } from '@axlsdk/axl';
5
- import { C as ConnectionManager, a as CostData, S as StudioEnv } from '../connection-manager-B7AWpsCD.cjs';
6
- export { B as BroadcastTarget } from '../connection-manager-B7AWpsCD.cjs';
6
+ import { AxlRuntime, TraceEvent, ExecutionInfo, EvalHistoryEntry } from '@axlsdk/axl';
7
7
 
8
8
  /**
9
- * Accumulates cost data from trace events.
10
- * Broadcasts updates to the 'costs' WS channel.
9
+ * Pure reducer functions for each aggregate panel.
10
+ * Each reducer is a pure (state, source) => state function — no I/O, no mutation.
11
11
  */
12
- declare class CostAggregator {
13
- private connMgr;
14
- private data;
15
- constructor(connMgr: ConnectionManager);
16
- /** Process a trace event and update cost data. */
17
- onTrace(event: {
18
- type?: string;
19
- agent?: string;
20
- model?: string;
21
- workflow?: string;
22
- cost?: number;
23
- tokens?: {
24
- input?: number;
25
- output?: number;
26
- reasoning?: number;
27
- };
28
- }): void;
29
- /** Get current aggregated cost data. */
30
- getData(): CostData;
31
- /** Reset all accumulated data. */
32
- reset(): void;
12
+
13
+ type EvalTrendRun = {
14
+ timestamp: number;
15
+ id: string;
16
+ scores: Record<string, number>;
17
+ cost: number;
18
+ /** Primary model for this run (first entry of `metadata.models`). Undefined
19
+ * when the run has no recorded models (e.g., legacy data or test harnesses). */
20
+ model?: string;
21
+ /** Total run duration in ms (from `EvalResult.duration`). */
22
+ duration?: number;
23
+ };
24
+ type EvalTrendEntry = {
25
+ runs: EvalTrendRun[];
26
+ latestScores: Record<string, number>;
27
+ scoreMean: Record<string, number>;
28
+ scoreStd: Record<string, number>;
29
+ costTotal: number;
30
+ runCount: number;
31
+ };
32
+ type EvalTrendData = {
33
+ byEval: Record<string, EvalTrendEntry>;
34
+ totalRuns: number;
35
+ totalCost: number;
36
+ };
37
+ type WorkflowStatsData = {
38
+ byWorkflow: Record<string, {
39
+ total: number;
40
+ completed: number;
41
+ failed: number;
42
+ /** Bounded sorted array of recent durations for p50/p95. Max MAX_DURATIONS entries. */
43
+ durations: number[];
44
+ durationSum: number;
45
+ avgDuration: number;
46
+ }>;
47
+ totalExecutions: number;
48
+ failureRate: number;
49
+ };
50
+ type TraceStatsData = {
51
+ eventTypeCounts: Record<string, number>;
52
+ byTool: Record<string, {
53
+ calls: number;
54
+ denied: number;
55
+ approved: number;
56
+ }>;
57
+ retryByAgent: Record<string, {
58
+ schema: number;
59
+ validate: number;
60
+ guardrail: number;
61
+ }>;
62
+ totalEvents: number;
63
+ };
64
+
65
+ type WindowId = '24h' | '7d' | '30d' | 'all';
66
+ type AggregateBroadcast<State> = {
67
+ snapshots: Record<WindowId, State>;
68
+ updatedAt: number;
69
+ };
70
+
71
+ type TraceReducer<State> = (acc: State, event: TraceEvent) => State;
72
+ type TraceAggregatorOptions<State> = {
73
+ runtime: AxlRuntime;
74
+ connMgr: ConnectionManager;
75
+ channel: string;
76
+ reducer: TraceReducer<State>;
77
+ emptyState: () => State;
78
+ windows: WindowId[];
79
+ /** Max executions to replay on rebuild. Default 2000. */
80
+ executionCap?: number;
81
+ /** Optional transform applied to each window's state before WS broadcast. */
82
+ broadcastTransform?: (state: State) => unknown;
83
+ };
84
+ /**
85
+ * Consumes TraceEvents from execution history and the live trace stream.
86
+ * Maintains per-window aggregate snapshots via a pure reducer.
87
+ */
88
+ declare class TraceAggregator<State> {
89
+ private snaps;
90
+ private interval?;
91
+ private listener?;
92
+ private options;
93
+ constructor(options: TraceAggregatorOptions<State>);
94
+ start(): Promise<void>;
95
+ rebuild(): Promise<void>;
96
+ getSnapshot(window: WindowId): State;
97
+ getAllSnapshots(): Record<WindowId, State>;
98
+ close(): void;
99
+ }
100
+
101
+ type ExecutionReducer<State> = (acc: State, execution: ExecutionInfo) => State;
102
+ type ExecutionAggregatorOptions<State> = {
103
+ runtime: AxlRuntime;
104
+ connMgr: ConnectionManager;
105
+ channel: string;
106
+ reducer: ExecutionReducer<State>;
107
+ emptyState: () => State;
108
+ windows: WindowId[];
109
+ /** Max executions to replay on rebuild. Default 2000. */
110
+ executionCap?: number;
111
+ /** Optional transform applied to each window's state before WS broadcast. */
112
+ broadcastTransform?: (state: State) => unknown;
113
+ };
114
+ /**
115
+ * Consumes ExecutionInfo at the execution granularity (not individual trace events).
116
+ * Live updates arrive via workflow_end trace events — the aggregator fetches the
117
+ * finalized ExecutionInfo and folds it.
118
+ */
119
+ declare class ExecutionAggregator<State> {
120
+ private snaps;
121
+ private interval?;
122
+ private listener?;
123
+ private options;
124
+ /** Generation counter to prevent stale async fold after rebuild. */
125
+ private generation;
126
+ constructor(options: ExecutionAggregatorOptions<State>);
127
+ start(): Promise<void>;
128
+ rebuild(): Promise<void>;
129
+ getSnapshot(window: WindowId): State;
130
+ getAllSnapshots(): Record<WindowId, State>;
131
+ close(): void;
132
+ }
133
+
134
+ type EvalReducer<State> = (acc: State, entry: EvalHistoryEntry) => State;
135
+ type EvalAggregatorOptions<State> = {
136
+ runtime: AxlRuntime;
137
+ connMgr: ConnectionManager;
138
+ channel: string;
139
+ reducer: EvalReducer<State>;
140
+ emptyState: () => State;
141
+ windows: WindowId[];
142
+ /** Max eval entries to replay on rebuild. Default 500. */
143
+ entryCap?: number;
144
+ /** Optional transform applied to each window's state before WS broadcast. */
145
+ broadcastTransform?: (state: State) => unknown;
146
+ };
147
+ /**
148
+ * Consumes EvalHistoryEntry. Rebuilds from runtime.getEvalHistory().
149
+ * Live updates arrive via runtime.on('eval_result', entry).
150
+ */
151
+ declare class EvalAggregator<State> {
152
+ private snaps;
153
+ private interval?;
154
+ private listener?;
155
+ private options;
156
+ constructor(options: EvalAggregatorOptions<State>);
157
+ start(): Promise<void>;
158
+ rebuild(): Promise<void>;
159
+ getSnapshot(window: WindowId): State;
160
+ getAllSnapshots(): Record<WindowId, State>;
161
+ close(): void;
33
162
  }
34
163
 
35
164
  type CreateServerOptions = {
@@ -48,7 +177,11 @@ type CreateServerOptions = {
48
177
  declare function createServer(options: CreateServerOptions): {
49
178
  app: Hono<StudioEnv, hono_types.BlankSchema, "/">;
50
179
  connMgr: ConnectionManager;
51
- costAggregator: CostAggregator;
180
+ costAggregator: TraceAggregator<CostData>;
181
+ workflowStatsAggregator: ExecutionAggregator<WorkflowStatsData>;
182
+ traceStatsAggregator: TraceAggregator<TraceStatsData>;
183
+ evalTrendsAggregator: EvalAggregator<EvalTrendData>;
184
+ aggregatorStartPromise: Promise<void | [void, void, void, void]>;
52
185
  /** Create WS handlers. Call before registering static/SPA routes are reached. */
53
186
  createWsHandlers: () => {
54
187
  onOpen(_event: Event, ws: hono_ws.WSContext): void;
@@ -57,6 +190,10 @@ declare function createServer(options: CreateServerOptions): {
57
190
  onError(_event: Event, ws: hono_ws.WSContext): void;
58
191
  };
59
192
  traceListener: (event: unknown) => void;
193
+ /** Abort all active streaming eval runs. */
194
+ closeActiveRuns: () => void;
195
+ /** Close all aggregators (clear intervals and unsubscribe listeners). */
196
+ closeAggregators: () => void;
60
197
  };
61
198
 
62
- export { ConnectionManager, CostAggregator, type CreateServerOptions, StudioEnv, createServer };
199
+ export { type AggregateBroadcast, ConnectionManager, type CreateServerOptions, EvalAggregator, ExecutionAggregator, StudioEnv, TraceAggregator, type WindowId, createServer };
@@ -1,35 +1,164 @@
1
1
  import * as hono_ws from 'hono/ws';
2
+ import { C as ConnectionManager, S as StudioEnv, a as CostData } from '../connection-manager-BMPahDuY.js';
3
+ export { B as BroadcastTarget } from '../connection-manager-BMPahDuY.js';
2
4
  import * as hono_types from 'hono/types';
3
5
  import { Hono } from 'hono';
4
- import { AxlRuntime } from '@axlsdk/axl';
5
- import { C as ConnectionManager, a as CostData, S as StudioEnv } from '../connection-manager-B7AWpsCD.js';
6
- export { B as BroadcastTarget } from '../connection-manager-B7AWpsCD.js';
6
+ import { AxlRuntime, TraceEvent, ExecutionInfo, EvalHistoryEntry } from '@axlsdk/axl';
7
7
 
8
8
  /**
9
- * Accumulates cost data from trace events.
10
- * Broadcasts updates to the 'costs' WS channel.
9
+ * Pure reducer functions for each aggregate panel.
10
+ * Each reducer is a pure (state, source) => state function — no I/O, no mutation.
11
11
  */
12
- declare class CostAggregator {
13
- private connMgr;
14
- private data;
15
- constructor(connMgr: ConnectionManager);
16
- /** Process a trace event and update cost data. */
17
- onTrace(event: {
18
- type?: string;
19
- agent?: string;
20
- model?: string;
21
- workflow?: string;
22
- cost?: number;
23
- tokens?: {
24
- input?: number;
25
- output?: number;
26
- reasoning?: number;
27
- };
28
- }): void;
29
- /** Get current aggregated cost data. */
30
- getData(): CostData;
31
- /** Reset all accumulated data. */
32
- reset(): void;
12
+
13
+ type EvalTrendRun = {
14
+ timestamp: number;
15
+ id: string;
16
+ scores: Record<string, number>;
17
+ cost: number;
18
+ /** Primary model for this run (first entry of `metadata.models`). Undefined
19
+ * when the run has no recorded models (e.g., legacy data or test harnesses). */
20
+ model?: string;
21
+ /** Total run duration in ms (from `EvalResult.duration`). */
22
+ duration?: number;
23
+ };
24
+ type EvalTrendEntry = {
25
+ runs: EvalTrendRun[];
26
+ latestScores: Record<string, number>;
27
+ scoreMean: Record<string, number>;
28
+ scoreStd: Record<string, number>;
29
+ costTotal: number;
30
+ runCount: number;
31
+ };
32
+ type EvalTrendData = {
33
+ byEval: Record<string, EvalTrendEntry>;
34
+ totalRuns: number;
35
+ totalCost: number;
36
+ };
37
+ type WorkflowStatsData = {
38
+ byWorkflow: Record<string, {
39
+ total: number;
40
+ completed: number;
41
+ failed: number;
42
+ /** Bounded sorted array of recent durations for p50/p95. Max MAX_DURATIONS entries. */
43
+ durations: number[];
44
+ durationSum: number;
45
+ avgDuration: number;
46
+ }>;
47
+ totalExecutions: number;
48
+ failureRate: number;
49
+ };
50
+ type TraceStatsData = {
51
+ eventTypeCounts: Record<string, number>;
52
+ byTool: Record<string, {
53
+ calls: number;
54
+ denied: number;
55
+ approved: number;
56
+ }>;
57
+ retryByAgent: Record<string, {
58
+ schema: number;
59
+ validate: number;
60
+ guardrail: number;
61
+ }>;
62
+ totalEvents: number;
63
+ };
64
+
65
+ type WindowId = '24h' | '7d' | '30d' | 'all';
66
+ type AggregateBroadcast<State> = {
67
+ snapshots: Record<WindowId, State>;
68
+ updatedAt: number;
69
+ };
70
+
71
+ type TraceReducer<State> = (acc: State, event: TraceEvent) => State;
72
+ type TraceAggregatorOptions<State> = {
73
+ runtime: AxlRuntime;
74
+ connMgr: ConnectionManager;
75
+ channel: string;
76
+ reducer: TraceReducer<State>;
77
+ emptyState: () => State;
78
+ windows: WindowId[];
79
+ /** Max executions to replay on rebuild. Default 2000. */
80
+ executionCap?: number;
81
+ /** Optional transform applied to each window's state before WS broadcast. */
82
+ broadcastTransform?: (state: State) => unknown;
83
+ };
84
+ /**
85
+ * Consumes TraceEvents from execution history and the live trace stream.
86
+ * Maintains per-window aggregate snapshots via a pure reducer.
87
+ */
88
+ declare class TraceAggregator<State> {
89
+ private snaps;
90
+ private interval?;
91
+ private listener?;
92
+ private options;
93
+ constructor(options: TraceAggregatorOptions<State>);
94
+ start(): Promise<void>;
95
+ rebuild(): Promise<void>;
96
+ getSnapshot(window: WindowId): State;
97
+ getAllSnapshots(): Record<WindowId, State>;
98
+ close(): void;
99
+ }
100
+
101
+ type ExecutionReducer<State> = (acc: State, execution: ExecutionInfo) => State;
102
+ type ExecutionAggregatorOptions<State> = {
103
+ runtime: AxlRuntime;
104
+ connMgr: ConnectionManager;
105
+ channel: string;
106
+ reducer: ExecutionReducer<State>;
107
+ emptyState: () => State;
108
+ windows: WindowId[];
109
+ /** Max executions to replay on rebuild. Default 2000. */
110
+ executionCap?: number;
111
+ /** Optional transform applied to each window's state before WS broadcast. */
112
+ broadcastTransform?: (state: State) => unknown;
113
+ };
114
+ /**
115
+ * Consumes ExecutionInfo at the execution granularity (not individual trace events).
116
+ * Live updates arrive via workflow_end trace events — the aggregator fetches the
117
+ * finalized ExecutionInfo and folds it.
118
+ */
119
+ declare class ExecutionAggregator<State> {
120
+ private snaps;
121
+ private interval?;
122
+ private listener?;
123
+ private options;
124
+ /** Generation counter to prevent stale async fold after rebuild. */
125
+ private generation;
126
+ constructor(options: ExecutionAggregatorOptions<State>);
127
+ start(): Promise<void>;
128
+ rebuild(): Promise<void>;
129
+ getSnapshot(window: WindowId): State;
130
+ getAllSnapshots(): Record<WindowId, State>;
131
+ close(): void;
132
+ }
133
+
134
+ type EvalReducer<State> = (acc: State, entry: EvalHistoryEntry) => State;
135
+ type EvalAggregatorOptions<State> = {
136
+ runtime: AxlRuntime;
137
+ connMgr: ConnectionManager;
138
+ channel: string;
139
+ reducer: EvalReducer<State>;
140
+ emptyState: () => State;
141
+ windows: WindowId[];
142
+ /** Max eval entries to replay on rebuild. Default 500. */
143
+ entryCap?: number;
144
+ /** Optional transform applied to each window's state before WS broadcast. */
145
+ broadcastTransform?: (state: State) => unknown;
146
+ };
147
+ /**
148
+ * Consumes EvalHistoryEntry. Rebuilds from runtime.getEvalHistory().
149
+ * Live updates arrive via runtime.on('eval_result', entry).
150
+ */
151
+ declare class EvalAggregator<State> {
152
+ private snaps;
153
+ private interval?;
154
+ private listener?;
155
+ private options;
156
+ constructor(options: EvalAggregatorOptions<State>);
157
+ start(): Promise<void>;
158
+ rebuild(): Promise<void>;
159
+ getSnapshot(window: WindowId): State;
160
+ getAllSnapshots(): Record<WindowId, State>;
161
+ close(): void;
33
162
  }
34
163
 
35
164
  type CreateServerOptions = {
@@ -48,7 +177,11 @@ type CreateServerOptions = {
48
177
  declare function createServer(options: CreateServerOptions): {
49
178
  app: Hono<StudioEnv, hono_types.BlankSchema, "/">;
50
179
  connMgr: ConnectionManager;
51
- costAggregator: CostAggregator;
180
+ costAggregator: TraceAggregator<CostData>;
181
+ workflowStatsAggregator: ExecutionAggregator<WorkflowStatsData>;
182
+ traceStatsAggregator: TraceAggregator<TraceStatsData>;
183
+ evalTrendsAggregator: EvalAggregator<EvalTrendData>;
184
+ aggregatorStartPromise: Promise<void | [void, void, void, void]>;
52
185
  /** Create WS handlers. Call before registering static/SPA routes are reached. */
53
186
  createWsHandlers: () => {
54
187
  onOpen(_event: Event, ws: hono_ws.WSContext): void;
@@ -57,6 +190,10 @@ declare function createServer(options: CreateServerOptions): {
57
190
  onError(_event: Event, ws: hono_ws.WSContext): void;
58
191
  };
59
192
  traceListener: (event: unknown) => void;
193
+ /** Abort all active streaming eval runs. */
194
+ closeActiveRuns: () => void;
195
+ /** Close all aggregators (clear intervals and unsubscribe listeners). */
196
+ closeAggregators: () => void;
60
197
  };
61
198
 
62
- export { ConnectionManager, CostAggregator, type CreateServerOptions, StudioEnv, createServer };
199
+ export { type AggregateBroadcast, ConnectionManager, type CreateServerOptions, EvalAggregator, ExecutionAggregator, StudioEnv, TraceAggregator, type WindowId, createServer };
@@ -1,11 +1,15 @@
1
1
  import {
2
2
  ConnectionManager,
3
- CostAggregator,
3
+ EvalAggregator,
4
+ ExecutionAggregator,
5
+ TraceAggregator,
4
6
  createServer
5
- } from "../chunk-HUKUQDYL.js";
7
+ } from "../chunk-IPDMFFTQ.js";
6
8
  export {
7
9
  ConnectionManager,
8
- CostAggregator,
10
+ EvalAggregator,
11
+ ExecutionAggregator,
12
+ TraceAggregator,
9
13
  createServer
10
14
  };
11
15
  //# sourceMappingURL=index.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@axlsdk/studio",
3
- "version": "0.14.0",
3
+ "version": "0.15.0",
4
4
  "description": "Local development UI for debugging, testing, and iterating on Axl agents and workflows",
5
5
  "type": "module",
6
6
  "main": "./dist/server/index.cjs",
@@ -35,22 +35,26 @@
35
35
  "dist"
36
36
  ],
37
37
  "dependencies": {
38
- "hono": "^4.7.0",
39
38
  "@hono/node-server": "^1.13.0",
40
39
  "@hono/node-ws": "^1.1.0",
40
+ "hono": "^4.7.0",
41
41
  "tsx": "^4.19.0",
42
42
  "ws": "^8.0.0",
43
- "@axlsdk/axl": "0.14.0"
43
+ "@axlsdk/axl": "0.15.0"
44
44
  },
45
45
  "devDependencies": {
46
46
  "@tailwindcss/vite": "^4.1.0",
47
47
  "@tanstack/react-query": "^5.64.0",
48
+ "@testing-library/jest-dom": "^6.9.1",
49
+ "@testing-library/react": "^16.3.2",
50
+ "@testing-library/user-event": "^14.6.1",
48
51
  "@types/node": "^25.2.3",
49
- "@types/ws": "^8.0.0",
50
52
  "@types/react": "^19.0.0",
51
53
  "@types/react-dom": "^19.0.0",
54
+ "@types/ws": "^8.0.0",
52
55
  "@vitejs/plugin-react": "^4.3.0",
53
56
  "concurrently": "^9.1.0",
57
+ "jsdom": "^29.0.2",
54
58
  "lucide-react": "^0.474.0",
55
59
  "react": "^19.0.0",
56
60
  "react-dom": "^19.0.0",
@@ -61,10 +65,10 @@
61
65
  "vite": "^6.0.0",
62
66
  "vitest": "^3.0.0",
63
67
  "zod": "^4.0.0",
64
- "@axlsdk/testing": "0.14.0"
68
+ "@axlsdk/testing": "0.15.0"
65
69
  },
66
70
  "peerDependencies": {
67
- "@axlsdk/eval": "0.14.0"
71
+ "@axlsdk/eval": "0.15.0"
68
72
  },
69
73
  "peerDependenciesMeta": {
70
74
  "@axlsdk/eval": {