teleportation-cli 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,169 @@
1
+ import { aggregateBenchmarksByTaskClass, scoreHarnessModelBenchmark } from './benchmark.js';
2
+ import { mineRequestPatterns } from './transcript-mine.js';
3
+
4
+ function collectOperationalTelemetry(events) {
5
+ const telemetry = {
6
+ request_interrupted: 0,
7
+ request_interrupted_tool_use: 0,
8
+ local_command_caveat: 0,
9
+ local_command_stdout: 0,
10
+ local_command_stderr: 0,
11
+ };
12
+
13
+ if (!Array.isArray(events) || events.length === 0) {
14
+ return {
15
+ ...telemetry,
16
+ interrupted_total: 0,
17
+ interruption_rate: 0,
18
+ wrapper_marker_total: 0,
19
+ };
20
+ }
21
+
22
+ for (const event of events) {
23
+ const metadata = event?.metadata || {};
24
+ const fields = [
25
+ metadata.user_message,
26
+ metadata.user_prompt,
27
+ metadata.prompt,
28
+ metadata.message,
29
+ metadata.instruction,
30
+ metadata.query,
31
+ metadata.task_prompt,
32
+ ];
33
+
34
+ for (const value of fields) {
35
+ if (typeof value !== 'string') continue;
36
+ const text = value.toLowerCase();
37
+ if (text.includes('[request interrupted by user for tool use]')) {
38
+ telemetry.request_interrupted_tool_use += 1;
39
+ } else if (text.includes('[request interrupted by user]')) {
40
+ telemetry.request_interrupted += 1;
41
+ }
42
+ if (text.includes('<local-command-caveat>')) telemetry.local_command_caveat += 1;
43
+ if (text.includes('<local-command-stdout>')) telemetry.local_command_stdout += 1;
44
+ if (text.includes('<local-command-stderr>')) telemetry.local_command_stderr += 1;
45
+ }
46
+ }
47
+
48
+ const interruptedTotal = telemetry.request_interrupted + telemetry.request_interrupted_tool_use;
49
+ const wrapperTotal =
50
+ telemetry.local_command_caveat +
51
+ telemetry.local_command_stdout +
52
+ telemetry.local_command_stderr;
53
+
54
+ return {
55
+ ...telemetry,
56
+ interrupted_total: interruptedTotal,
57
+ interruption_rate: Number((interruptedTotal / events.length).toFixed(4)),
58
+ wrapper_marker_total: wrapperTotal,
59
+ };
60
+ }
61
+
62
+ function buildLeaderboardAndNoRouteSet(events, slices, options = {}) {
63
+ const minSessionsForRouting = Number.isInteger(options.minSessionsForRouting)
64
+ ? options.minSessionsForRouting
65
+ : 30;
66
+ const minRouteScore = typeof options.minRouteScore === 'number' ? options.minRouteScore : 0.45;
67
+ const maxEscalationRate = typeof options.maxEscalationRate === 'number' ? options.maxEscalationRate : 0.5;
68
+
69
+ const scored = slices.map((slice) => {
70
+ const result = scoreHarnessModelBenchmark(events, {
71
+ harness: slice.provider,
72
+ model: slice.model,
73
+ taskCategory: slice.task_category,
74
+ minSessions: minSessionsForRouting,
75
+ });
76
+ return { slice, result };
77
+ });
78
+
79
+ const noRouteSet = scored
80
+ .filter(({ slice, result }) => {
81
+ if (!result.eligible) return true;
82
+ if (result.score < minRouteScore) return true;
83
+ if (slice.escalation_rate > maxEscalationRate) return true;
84
+ return false;
85
+ })
86
+ .map(({ slice, result }) => ({
87
+ task_category: slice.task_category,
88
+ provider: slice.provider,
89
+ model: slice.model,
90
+ reason: !result.eligible
91
+ ? result.reason
92
+ : result.score < minRouteScore
93
+ ? 'low_route_score'
94
+ : 'high_escalation_rate',
95
+ sample_size: slice.sample_size,
96
+ score: result.score ?? null,
97
+ }));
98
+
99
+ const eligible = scored
100
+ .filter(({ slice, result }) =>
101
+ result.eligible &&
102
+ result.score >= minRouteScore &&
103
+ slice.escalation_rate <= maxEscalationRate,
104
+ )
105
+ .map(({ slice, result }) => ({
106
+ task_category: slice.task_category,
107
+ provider: slice.provider,
108
+ model: slice.model,
109
+ score: result.score,
110
+ sample_size: result.sample_size,
111
+ confidence_interval: result.confidence_interval,
112
+ }));
113
+
114
+ const byTaskCategory = new Map();
115
+ for (const row of eligible) {
116
+ const existing = byTaskCategory.get(row.task_category);
117
+ if (!existing || row.score > existing.score) {
118
+ byTaskCategory.set(row.task_category, row);
119
+ }
120
+ }
121
+
122
+ return {
123
+ leaderboard: Array.from(byTaskCategory.values()).sort((a, b) => b.score - a.score),
124
+ no_route_set: noRouteSet.sort((a, b) => a.task_category.localeCompare(b.task_category)),
125
+ };
126
+ }
127
+
128
+ export function rebuildPolicyArtifacts(events, options = {}) {
129
+ const benchmarkMinSessions = Number.isInteger(options.benchmarkMinSessions)
130
+ ? options.benchmarkMinSessions
131
+ : 1;
132
+ const patternMinOccurrences = Number.isInteger(options.patternMinOccurrences)
133
+ ? options.patternMinOccurrences
134
+ : 2;
135
+ const topPatterns = Number.isInteger(options.topPatterns) ? options.topPatterns : 25;
136
+
137
+ const patterns = mineRequestPatterns(events, {
138
+ minOccurrences: patternMinOccurrences,
139
+ topK: topPatterns,
140
+ });
141
+
142
+ const benchmarkSlices = aggregateBenchmarksByTaskClass(events, {
143
+ minSessions: benchmarkMinSessions,
144
+ });
145
+ const { leaderboard, no_route_set } = buildLeaderboardAndNoRouteSet(events, benchmarkSlices, options);
146
+ const operationalTelemetry = collectOperationalTelemetry(events);
147
+
148
+ return {
149
+ generated_at: new Date().toISOString(),
150
+ scope: options.scope || 'global',
151
+ summary: {
152
+ events_processed: Array.isArray(events) ? events.length : 0,
153
+ pattern_count: patterns.length,
154
+ benchmark_slice_count: benchmarkSlices.length,
155
+ leaderboard_count: leaderboard.length,
156
+ no_route_count: no_route_set.length,
157
+ interrupted_total: operationalTelemetry.interrupted_total,
158
+ interruption_rate: operationalTelemetry.interruption_rate,
159
+ wrapper_marker_total: operationalTelemetry.wrapper_marker_total,
160
+ },
161
+ artifacts: {
162
+ request_patterns: patterns,
163
+ benchmark_slices: benchmarkSlices,
164
+ benchmark_leaderboard: leaderboard,
165
+ no_route_set,
166
+ operational_telemetry: operationalTelemetry,
167
+ },
168
+ };
169
+ }
@@ -0,0 +1,259 @@
1
+ /**
2
+ * Transcript Intelligence Schema
3
+ *
4
+ * Canonical event normalization for cross-provider transcript data.
5
+ */
6
+
7
+ const TASK_CATEGORIES = new Set([
8
+ 'codebase-modification',
9
+ 'api-orchestration',
10
+ 'data-processing',
11
+ 'web-research',
12
+ 'code-review',
13
+ 'multi-tool-workflow',
14
+ 'unknown',
15
+ ]);
16
+
17
+ const EVENT_TYPES = new Set([
18
+ 'task_start',
19
+ 'task_end',
20
+ 'tool_call',
21
+ 'approval_request',
22
+ 'approval_decision',
23
+ 'override',
24
+ 'route_decision',
25
+ ]);
26
+
27
+ const APPROVAL_DECISIONS = new Set(['approve', 'reject', 'escalate', 'deny', 'none']);
28
+ const APPROVAL_SOURCES = new Set(['user', 'policy', 'fallback']);
29
+
30
+ function toIsoTimestamp(value) {
31
+ if (!value) return new Date().toISOString();
32
+ if (typeof value === 'number') return new Date(value).toISOString();
33
+ if (typeof value === 'string' && /^\d+$/.test(value)) {
34
+ return new Date(Number(value)).toISOString();
35
+ }
36
+ const parsed = new Date(value);
37
+ return Number.isNaN(parsed.getTime()) ? new Date().toISOString() : parsed.toISOString();
38
+ }
39
+
40
+ function normalizeTaskCategory(value) {
41
+ if (typeof value !== 'string') return 'unknown';
42
+ const lowered = value.toLowerCase();
43
+ return TASK_CATEGORIES.has(lowered) ? lowered : 'unknown';
44
+ }
45
+
46
+ function normalizeApprovalDecision(value) {
47
+ if (typeof value !== 'string') return 'none';
48
+ const lowered = value.toLowerCase();
49
+ return APPROVAL_DECISIONS.has(lowered) ? lowered : 'none';
50
+ }
51
+
52
+ function normalizeApprovalSource(value) {
53
+ if (typeof value !== 'string') return 'fallback';
54
+ const lowered = value.toLowerCase();
55
+ return APPROVAL_SOURCES.has(lowered) ? lowered : 'fallback';
56
+ }
57
+
58
+ function mapTimelineTypeToEventType(type) {
59
+ switch (type) {
60
+ case 'tool_use':
61
+ case 'tool_executed':
62
+ case 'tool_completed':
63
+ case 'tool_failed':
64
+ return 'tool_call';
65
+ case 'approval_requested':
66
+ return 'approval_request';
67
+ case 'approval_decided':
68
+ case 'approval_decision':
69
+ return 'approval_decision';
70
+ case 'route_decision':
71
+ return 'route_decision';
72
+ case 'override':
73
+ return 'override';
74
+ case 'task_started':
75
+ return 'task_start';
76
+ case 'task_completed':
77
+ return 'task_end';
78
+ case 'assistant_response':
79
+ case 'session_registered':
80
+ case 'compact_summary':
81
+ // These relay event types do not map to a canonical intelligence event type.
82
+ return null;
83
+ default:
84
+ // Unknown types: return null so callers can explicitly filter or flag them.
85
+ return null;
86
+ }
87
+ }
88
+
89
+ function mapActionClass(event) {
90
+ if (event.type === 'tool_use') return 'tool_execute';
91
+ if (event.type === 'tool_failed') return 'tool_failed';
92
+ if (event.type === 'tool_completed') return 'tool_completed';
93
+ if (event.type === 'assistant_response') return 'assistant_response';
94
+ if (event.type === 'approval_requested') return 'approval_gate';
95
+ if (event.type === 'approval_decision') return 'approval_gate';
96
+ return 'unknown';
97
+ }
98
+
99
+ function toNumberOrNull(value) {
100
+ if (value == null) return null;
101
+ const numeric = Number(value);
102
+ return Number.isFinite(numeric) ? numeric : null;
103
+ }
104
+
105
+ export function normalizeTranscriptEvent(event, context = {}) {
106
+ const eventType = mapTimelineTypeToEventType(event.type);
107
+ const normalized = {
108
+ event_id: event.id || null,
109
+ occurred_at: toIsoTimestamp(event.timestamp ?? event.occurred_at),
110
+ user_id: context.user_id ?? null,
111
+ project_id: context.project_id ?? null,
112
+ session_id: context.session_id ?? null,
113
+ task_id: context.task_id ?? null,
114
+ task_category: normalizeTaskCategory(context.task_category),
115
+ provider: context.provider ?? null,
116
+ model: context.model ?? null,
117
+ event_type: eventType,
118
+ action_class: mapActionClass(event),
119
+ tool_name: event.meta?.tool_name ?? null,
120
+ approval: {
121
+ requested: eventType === 'approval_request' || eventType === 'approval_decision',
122
+ decision: normalizeApprovalDecision(event.meta?.decision),
123
+ decision_source: normalizeApprovalSource(event.meta?.decision_source),
124
+ confidence: toNumberOrNull(event.meta?.confidence),
125
+ },
126
+ execution: {
127
+ success: typeof event.meta?.success === 'boolean' ? event.meta.success : null,
128
+ duration_ms: toNumberOrNull(event.meta?.duration_ms),
129
+ tokens_used: toNumberOrNull(event.meta?.tokens_used),
130
+ cost_usd: toNumberOrNull(event.meta?.cost_usd),
131
+ loop_detected: typeof event.meta?.loop_detected === 'boolean' ? event.meta.loop_detected : null,
132
+ },
133
+ metadata: {
134
+ // Only include safe, non-sensitive fields from event.meta.
135
+ // Do NOT spread event.meta directly — it may contain tool arguments,
136
+ // file contents, or credentials that must not leave the ingestion layer.
137
+ source: event.source ?? null,
138
+ raw_type: event.type ?? null,
139
+ },
140
+ };
141
+
142
+ return normalized;
143
+ }
144
+
145
+ export function normalizeTranscriptEvents(events, context = {}) {
146
+ if (!Array.isArray(events)) return [];
147
+ return events.map((event) => normalizeTranscriptEvent(event, context));
148
+ }
149
+
150
+ export function isValidNormalizedEvent(event) {
151
+ if (!event || typeof event !== 'object') return false;
152
+ if (!EVENT_TYPES.has(event.event_type)) return false;
153
+ if (!TASK_CATEGORIES.has(event.task_category)) return false;
154
+ if (!event.occurred_at || Number.isNaN(new Date(event.occurred_at).getTime())) return false;
155
+ if (!event.approval || !APPROVAL_DECISIONS.has(event.approval.decision)) return false;
156
+ if (!APPROVAL_SOURCES.has(event.approval.decision_source)) return false;
157
+ return true;
158
+ }
159
+
160
+ // ============================================================================
161
+ // Raw Transcript Entry Normalizer (per-message, pre-timeline-event)
162
+ // ============================================================================
163
+
164
+ const ENTRY_SCHEMA_VERSION = 'v1';
165
+ const DEFAULT_HARNESS = 'claude-code';
166
+ const TEXT_EXCERPT_LIMIT = 500;
167
+
168
+ function toTimestampMs(value) {
169
+ if (typeof value === 'number' && Number.isFinite(value)) return value;
170
+ if (typeof value === 'string') {
171
+ if (/^\d+$/.test(value)) return Number(value);
172
+ const parsed = Date.parse(value);
173
+ if (!Number.isNaN(parsed)) return parsed;
174
+ }
175
+ return Date.now();
176
+ }
177
+
178
+ function toMessage(entry) {
179
+ if (!entry || typeof entry !== 'object') return {};
180
+ if (entry.message && typeof entry.message === 'object') return entry.message;
181
+ return entry;
182
+ }
183
+
184
+ function extractTextContent(content) {
185
+ if (typeof content === 'string') return content.trim();
186
+ if (!Array.isArray(content)) return '';
187
+ return content
188
+ .filter(block => block?.type === 'text' && typeof block.text === 'string')
189
+ .map(block => block.text.trim())
190
+ .filter(Boolean)
191
+ .join('\n')
192
+ .trim();
193
+ }
194
+
195
+ /**
196
+ * Normalize a raw transcript entry (message) into a stable canonical shape
197
+ * for the intelligence pipeline. Operates on the raw transcript format, not
198
+ * on timeline events.
199
+ */
200
+ export function normalizeTranscriptEntry(entry, options = {}) {
201
+ const message = toMessage(entry);
202
+ const content = message.content;
203
+ const blocks = Array.isArray(content) ? content : [];
204
+ const textContent = extractTextContent(content);
205
+
206
+ const toolUses = blocks.filter(block => block?.type === 'tool_use');
207
+ const toolResults = blocks.filter(block => block?.type === 'tool_result');
208
+
209
+ const sessionId = typeof options.sessionId === 'string' ? options.sessionId : '';
210
+ const messageIndex = Number.isInteger(options.messageIndex) ? options.messageIndex : 0;
211
+ const harness = typeof options.harness === 'string' && options.harness
212
+ ? options.harness
213
+ : DEFAULT_HARNESS;
214
+
215
+ return {
216
+ schema_version: ENTRY_SCHEMA_VERSION,
217
+ session_id: sessionId,
218
+ message_index: messageIndex,
219
+ harness,
220
+ message_id: entry?.uuid || message?.uuid || `${sessionId}:${messageIndex}`,
221
+ role: typeof message.role === 'string' ? message.role : 'unknown',
222
+ model: typeof message.model === 'string' ? message.model : null,
223
+ timestamp_ms: toTimestampMs(entry?.timestamp ?? message?.timestamp),
224
+ has_content: Boolean(content),
225
+ text_excerpt: textContent ? textContent.slice(0, TEXT_EXCERPT_LIMIT) : null,
226
+ tool_uses_count: toolUses.length,
227
+ tool_results_count: toolResults.length,
228
+ approval_candidate: toolUses.length > 0,
229
+ };
230
+ }
231
+
232
+ /**
233
+ * Validate a normalized transcript entry shape.
234
+ */
235
+ export function validateNormalizedTranscriptEntry(entry) {
236
+ const errors = [];
237
+ if (!entry || typeof entry !== 'object') return { valid: false, errors: ['entry must be an object'] };
238
+ if (entry.schema_version !== ENTRY_SCHEMA_VERSION) errors.push(`schema_version must be ${ENTRY_SCHEMA_VERSION}`);
239
+ if (typeof entry.session_id !== 'string') errors.push('session_id must be a string');
240
+ if (!Number.isInteger(entry.message_index) || entry.message_index < 0) errors.push('message_index must be a non-negative integer');
241
+ if (typeof entry.harness !== 'string' || !entry.harness) errors.push('harness must be a non-empty string');
242
+ if (typeof entry.message_id !== 'string' || !entry.message_id) errors.push('message_id must be a non-empty string');
243
+ if (typeof entry.role !== 'string' || !entry.role) errors.push('role must be a non-empty string');
244
+ if (typeof entry.timestamp_ms !== 'number' || !Number.isFinite(entry.timestamp_ms)) errors.push('timestamp_ms must be a finite number');
245
+ if (typeof entry.tool_uses_count !== 'number' || entry.tool_uses_count < 0) errors.push('tool_uses_count must be a non-negative number');
246
+ if (typeof entry.tool_results_count !== 'number' || entry.tool_results_count < 0) errors.push('tool_results_count must be a non-negative number');
247
+ if (typeof entry.approval_candidate !== 'boolean') errors.push('approval_candidate must be a boolean');
248
+ return { valid: errors.length === 0, errors };
249
+ }
250
+
251
+ export {
252
+ TASK_CATEGORIES,
253
+ EVENT_TYPES,
254
+ APPROVAL_DECISIONS,
255
+ APPROVAL_SOURCES,
256
+ ENTRY_SCHEMA_VERSION,
257
+ DEFAULT_HARNESS,
258
+ TEXT_EXCERPT_LIMIT,
259
+ };