copilot-metrics 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/ingest.js CHANGED
@@ -2,13 +2,23 @@
2
2
 
3
3
  const crypto = require('node:crypto');
4
4
  const fs = require('node:fs');
5
+ const os = require('node:os');
5
6
  const path = require('node:path');
6
7
  const { readJsonl } = require('./jsonl');
7
8
  const { normalizePayload, normalizeHookEvent, normalizeCopilotSessionEvents } = require('./otel');
8
9
  const { estimateCost, PRICING_VERSION } = require('./pricing');
9
- const { existingRawFingerprints, insertImport } = require('./sqlite-store');
10
+ const {
11
+ attachVscodeChatLabelEvidence,
12
+ existingRawFingerprints,
13
+ importedLineHighWater,
14
+ insertImport,
15
+ queryRows,
16
+ updateUsageCostEstimates,
17
+ updateVscodeUsageResponseIds,
18
+ vscodeRawRecordsNeedingResponseBackfill,
19
+ } = require('./sqlite-store');
10
20
  const { attachUsageLabelEvidence, attachHookLabelEvidence } = require('./labels');
11
- const { loadConfiguredExtractors } = require('./label-extractors');
21
+ const { loadConfiguredExtractors, runLabelExtractors } = require('./label-extractors');
12
22
 
13
23
  function enrichCosts(records) {
14
24
  return records.map((record) => {
@@ -42,11 +52,241 @@ function isCopilotSessionUsageRecord(record) {
42
52
  return record.value && record.value.type === 'session.shutdown';
43
53
  }
44
54
 
55
+ function pushText(values, value) {
56
+ if (typeof value === 'string' && value.trim()) values.push(value);
57
+ }
58
+
59
+ function pushPromptCandidates(values, value) {
60
+ if (!value || typeof value !== 'object') return;
61
+ if (Array.isArray(value)) {
62
+ for (const item of value) pushPromptCandidates(values, item);
63
+ return;
64
+ }
65
+ pushText(values, value.text);
66
+ pushText(values, value.value);
67
+ pushText(values, value.message);
68
+ pushText(values, value.prompt);
69
+ pushText(values, value.promptText);
70
+ pushText(values, value.renderedUserMessage);
71
+ pushText(values, value.userMessage);
72
+ if (value.renderedUserMessage && typeof value.renderedUserMessage === 'object') {
73
+ pushPromptCandidates(values, value.renderedUserMessage);
74
+ }
75
+ if (value.message && typeof value.message === 'object') pushPromptCandidates(values, value.message);
76
+ if (value.result && typeof value.result === 'object') pushPromptCandidates(values, value.result);
77
+ if (value.metadata && typeof value.metadata === 'object') pushPromptCandidates(values, value.metadata);
78
+ }
79
+
80
+ function responseId(value) {
81
+ if (!value || typeof value !== 'object') return null;
82
+ return value.responseId
83
+ || value.metadata?.responseId
84
+ || value.result?.responseId
85
+ || value.result?.metadata?.responseId
86
+ || value.modelMessageId
87
+ || value.metadata?.modelMessageId
88
+ || null;
89
+ }
90
+
91
+ function chatSessionId(value) {
92
+ if (!value || typeof value !== 'object') return null;
93
+ return value.sessionId
94
+ || value.sessionID
95
+ || value.metadata?.sessionId
96
+ || value.result?.sessionId
97
+ || value.result?.metadata?.sessionId
98
+ || null;
99
+ }
100
+
101
+ function chatRequestIndex(record) {
102
+ const key = Array.isArray(record.k) ? record.k : Array.isArray(record.key) ? record.key : [];
103
+ if (key[0] !== 'requests') return null;
104
+ const index = Number(key[1]);
105
+ return Number.isInteger(index) ? index : null;
106
+ }
107
+
108
+ function normalizeVscodeChatSession(records, extractors = []) {
109
+ const requests = new Map();
110
+ let defaultSessionId = null;
111
+
112
+ function entry(index) {
113
+ const key = String(index);
114
+ if (!requests.has(key)) requests.set(key, { texts: [] });
115
+ return requests.get(key);
116
+ }
117
+
118
+ function mergeRequest(index, request, sessionId) {
119
+ if (!request || typeof request !== 'object') return;
120
+ const current = entry(index);
121
+ current.sessionId = chatSessionId(request) || sessionId || current.sessionId;
122
+ current.responseId = responseId(request) || current.responseId;
123
+ pushPromptCandidates(current.texts, request);
124
+ }
125
+
126
+ for (const record of records) {
127
+ const value = record.value;
128
+ if (!value || typeof value !== 'object') continue;
129
+ const root = value.v && typeof value.v === 'object' ? value.v : value;
130
+ defaultSessionId = root.sessionId || root.sessionID || defaultSessionId;
131
+
132
+ if (Array.isArray(root.requests)) {
133
+ root.requests.forEach((request, index) => mergeRequest(index, request, defaultSessionId));
134
+ }
135
+
136
+ const key = Array.isArray(value.k) ? value.k : Array.isArray(value.key) ? value.key : [];
137
+ if (key.length === 1 && key[0] === 'requests' && Array.isArray(value.v)) {
138
+ const startIndex = requests.size;
139
+ value.v.forEach((request, offset) => mergeRequest(startIndex + offset, request, defaultSessionId));
140
+ }
141
+
142
+ const index = chatRequestIndex(value);
143
+ if (index !== null) {
144
+ const current = entry(index);
145
+ const patch = value.v;
146
+ if (patch && typeof patch === 'object') {
147
+ current.sessionId = chatSessionId(patch) || defaultSessionId || current.sessionId;
148
+ current.responseId = responseId(patch) || current.responseId;
149
+ pushPromptCandidates(current.texts, patch);
150
+ } else {
151
+ pushText(current.texts, patch);
152
+ }
153
+ }
154
+ }
155
+
156
+ return Array.from(requests.values())
157
+ .filter((request) => request.responseId)
158
+ .map((request) => {
159
+ const labelEvidence = runLabelExtractors('usage', { prompt: request.texts }, extractors)
160
+ .map((evidence) => ({
161
+ ...evidence,
162
+ source_type: 'usage',
163
+ source_field: 'vscode_chat_response',
164
+ source_value: request.responseId,
165
+ confidence: Math.max(Number(evidence.confidence || 0), 0.95),
166
+ }));
167
+ return {
168
+ responseId: request.responseId,
169
+ sessionId: request.sessionId || defaultSessionId || null,
170
+ label_evidence: labelEvidence,
171
+ };
172
+ })
173
+ .filter((request) => request.label_evidence.length > 0);
174
+ }
175
+
176
+ async function ingestVscodeChatSessionFile(options) {
177
+ const { dbPath, file } = options;
178
+ const sourceFile = path.resolve(file);
179
+ const parsed = readJsonl(sourceFile);
180
+ const mappings = normalizeVscodeChatSession(parsed.records, options.extractors || []);
181
+ const attached = await attachVscodeChatLabelEvidence(dbPath, mappings);
182
+ return {
183
+ source: 'vscode-chat',
184
+ file,
185
+ dbPath,
186
+ raw_records: 0,
187
+ new_raw_records: 0,
188
+ skipped_existing_records: 0,
189
+ usage_records: attached.matched_usage_records,
190
+ hook_events: 0,
191
+ label_evidence: attached.label_evidence,
192
+ warnings: parsed.warnings,
193
+ estimate_label: `estimate:${PRICING_VERSION}`,
194
+ };
195
+ }
196
+
197
+ async function backfillVscodeUsageResponseIds(dbPath, sourceFile) {
198
+ const rows = await vscodeRawRecordsNeedingResponseBackfill(dbPath, sourceFile);
199
+ const updates = [];
200
+ for (const row of rows) {
201
+ let payload;
202
+ try {
203
+ payload = JSON.parse(row.payload_json);
204
+ } catch {
205
+ continue;
206
+ }
207
+ for (const usage of normalizePayload(payload, 'vscode', row.line)) {
208
+ if (!usage.span_id) continue;
209
+ updates.push({
210
+ raw_line: usage.raw_line,
211
+ span_id: usage.span_id,
212
+ session_id: usage.session_id,
213
+ timestamp: usage.timestamp,
214
+ requested_model: usage.requested_model,
215
+ resolved_model: usage.resolved_model,
216
+ input_tokens: usage.input_tokens,
217
+ output_tokens: usage.output_tokens,
218
+ cache_read_tokens: usage.cache_read_tokens,
219
+ cache_creation_tokens: usage.cache_creation_tokens,
220
+ reasoning_tokens: usage.reasoning_tokens,
221
+ });
222
+ }
223
+ }
224
+ return updateVscodeUsageResponseIds(dbPath, updates);
225
+ }
226
+
227
+ function parseWarningsJson(value) {
228
+ try {
229
+ const parsed = JSON.parse(value || '[]');
230
+ return Array.isArray(parsed) ? parsed : [];
231
+ } catch {
232
+ return [];
233
+ }
234
+ }
235
+
236
+ async function repairUsageCostEstimates(dbPath) {
237
+ const rows = await queryRows(dbPath, `
238
+ SELECT id, requested_model, resolved_model, input_tokens, output_tokens,
239
+ cache_read_tokens, cache_creation_tokens, reasoning_tokens, warnings_json
240
+ FROM usage_records
241
+ WHERE estimated_ai_credits IS NULL
242
+ OR estimated_ai_credits = 0
243
+ OR warnings_json LIKE '%unknown_model:%'
244
+ OR warnings_json LIKE '%missing_model%'
245
+ `);
246
+ const updates = [];
247
+ for (const row of rows) {
248
+ const estimate = estimateCost(row);
249
+ if (estimate.warning) continue;
250
+ const warnings = parseWarningsJson(row.warnings_json)
251
+ .filter((warning) => !String(warning).startsWith('unknown_model:') && warning !== 'missing_model');
252
+ updates.push({
253
+ id: row.id,
254
+ estimated_usd: estimate.estimated_usd,
255
+ estimated_ai_credits: estimate.estimated_ai_credits,
256
+ warnings,
257
+ });
258
+ }
259
+ return updateUsageCostEstimates(dbPath, updates);
260
+ }
261
+
45
262
  async function ingestFile(options) {
46
263
  const { dbPath, file, source } = options;
47
- const parsed = readJsonl(file);
48
- const warnings = [...parsed.warnings];
264
+ if (source === 'vscode-chat') return ingestVscodeChatSessionFile(options);
265
+
49
266
  const sourceFile = path.resolve(file);
267
+ const backfilledUsageRecords = source === 'vscode'
268
+ ? await backfillVscodeUsageResponseIds(dbPath, sourceFile)
269
+ : 0;
270
+ const highWaterLine = await importedLineHighWater(dbPath, source, sourceFile);
271
+ if (source === 'copilot-session' && highWaterLine > 0) {
272
+ return {
273
+ source,
274
+ file,
275
+ dbPath,
276
+ raw_records: 0,
277
+ new_raw_records: 0,
278
+ skipped_existing_records: highWaterLine,
279
+ usage_records: 0,
280
+ hook_events: 0,
281
+ label_evidence: 0,
282
+ backfilled_usage_records: backfilledUsageRecords,
283
+ warnings: [],
284
+ estimate_label: `estimate:${PRICING_VERSION}`,
285
+ };
286
+ }
287
+ const needsSessionContext = source === 'copilot-session' && highWaterLine === 0;
288
+ const parsed = readJsonl(file, { afterLine: needsSessionContext ? 0 : highWaterLine });
289
+ const warnings = [...parsed.warnings];
50
290
  const parsedRecords = parsed.records.map((record) => ({
51
291
  ...record,
52
292
  raw_fingerprint: rawFingerprint(source, sourceFile, record),
@@ -91,6 +331,7 @@ async function ingestFile(options) {
91
331
  }
92
332
 
93
333
  await insertImport(dbPath, source, sourceFile, newRecords, enrichedUsage, enrichedHooks, warnings);
334
+ const repairedCostRecords = await repairUsageCostEstimates(dbPath);
94
335
 
95
336
  return {
96
337
  source,
@@ -98,9 +339,11 @@ async function ingestFile(options) {
98
339
  dbPath,
99
340
  raw_records: importableRecords.length,
100
341
  new_raw_records: newRecords.length,
101
- skipped_existing_records: importableRecords.length - newRecords.length,
342
+ skipped_existing_records: highWaterLine,
102
343
  usage_records: enrichedUsage.length,
103
344
  hook_events: enrichedHooks.length,
345
+ backfilled_usage_records: backfilledUsageRecords,
346
+ repaired_cost_records: repairedCostRecords,
104
347
  label_evidence: enrichedUsage.reduce((sum, usage) => sum + (usage.label_evidence || []).length, 0)
105
348
  + enrichedHooks.reduce((sum, event) => sum + (event.label_evidence || []).length, 0),
106
349
  warnings,
@@ -113,6 +356,7 @@ function configuredSourceFiles(paths, config = {}) {
113
356
  const telemetryConfig = config.telemetry || {};
114
357
  const files = [
115
358
  { source: 'vscode', file: sourceConfig.vscode?.telemetry || telemetryConfig.vscode || paths.vscodeOtelJsonl },
359
+ ...discoverVscodeChatSessionFiles(sourceConfig.vscode?.chatSessions),
116
360
  { source: 'hooks', file: sourceConfig.vscode?.hooks || paths.hookEventsJsonl },
117
361
  { source: 'copilot-cli', file: sourceConfig.copilotCli?.telemetry || telemetryConfig.copilotCli || paths.copilotCliOtelJsonl },
118
362
  { source: 'hooks', file: sourceConfig.copilotCli?.hooks || paths.hookEventsJsonl },
@@ -130,6 +374,40 @@ function configuredSourceFiles(paths, config = {}) {
130
374
  });
131
375
  }
132
376
 
377
+ function listJsonlFiles(dir) {
378
+ if (!dir || !fs.existsSync(dir)) return [];
379
+ return fs.readdirSync(dir, { withFileTypes: true })
380
+ .filter((entry) => entry.isFile() && entry.name.endsWith('.jsonl'))
381
+ .map((entry) => path.join(dir, entry.name));
382
+ }
383
+
384
+ function discoverWorkspaceChatSessions(workspaceStorageDir) {
385
+ if (!workspaceStorageDir || !fs.existsSync(workspaceStorageDir)) return [];
386
+ return fs.readdirSync(workspaceStorageDir, { withFileTypes: true })
387
+ .filter((entry) => entry.isDirectory())
388
+ .flatMap((entry) => listJsonlFiles(path.join(workspaceStorageDir, entry.name, 'chatSessions')));
389
+ }
390
+
391
+ function discoverVscodeChatSessionFiles(configured) {
392
+ const configuredEntries = Array.isArray(configured) ? configured : configured ? [configured] : [];
393
+ const files = configuredEntries.length > 0
394
+ ? configuredEntries.flatMap((entry) => {
395
+ const resolved = path.resolve(entry);
396
+ if (!fs.existsSync(resolved)) return [];
397
+ const stat = fs.statSync(resolved);
398
+ if (stat.isFile()) return [resolved];
399
+ return listJsonlFiles(resolved).concat(discoverWorkspaceChatSessions(resolved));
400
+ })
401
+ : [
402
+ path.join(os.homedir(), '.config', 'Code', 'User', 'workspaceStorage'),
403
+ path.join(os.homedir(), '.config', 'Code - Insiders', 'User', 'workspaceStorage'),
404
+ ].flatMap(discoverWorkspaceChatSessions);
405
+
406
+ return files
407
+ .sort()
408
+ .map((file) => ({ source: 'vscode-chat', file }));
409
+ }
410
+
133
411
  function discoverCopilotSessionFiles(sessionStateDir) {
134
412
  if (!sessionStateDir || !fs.existsSync(sessionStateDir)) return [];
135
413
  return fs.readdirSync(sessionStateDir, { withFileTypes: true })
@@ -167,5 +445,9 @@ module.exports = {
167
445
  autoImportConfiguredSources,
168
446
  configuredSourceFiles,
169
447
  discoverCopilotSessionFiles,
448
+ discoverVscodeChatSessionFiles,
449
+ backfillVscodeUsageResponseIds,
170
450
  ingestFile,
451
+ normalizeVscodeChatSession,
452
+ repairUsageCostEstimates,
171
453
  };
package/src/jsonl.js CHANGED
@@ -2,19 +2,21 @@
2
2
 
3
3
  const fs = require('node:fs');
4
4
 
5
- function readJsonl(file) {
5
+ function readJsonl(file, options = {}) {
6
6
  const text = fs.existsSync(file) ? fs.readFileSync(file, 'utf8') : '';
7
7
  const records = [];
8
8
  const warnings = [];
9
+ const afterLine = Number(options.afterLine || 0);
9
10
 
10
11
  text.split(/\r?\n/).forEach((line, index) => {
11
- if (!line.trim()) return;
12
+ const lineNumber = index + 1;
13
+ if (lineNumber <= afterLine || !line.trim()) return;
12
14
  try {
13
- records.push({ line: index + 1, value: JSON.parse(line) });
15
+ records.push({ line: lineNumber, value: JSON.parse(line) });
14
16
  } catch (error) {
15
17
  warnings.push({
16
18
  code: 'malformed_jsonl',
17
- line: index + 1,
19
+ line: lineNumber,
18
20
  message: error.message,
19
21
  });
20
22
  }
package/src/otel.js CHANGED
@@ -2,9 +2,16 @@
2
2
 
3
3
  function attrsToObject(attrs) {
4
4
  if (!attrs) return {};
5
+ if (attrs && typeof attrs === 'object' && Array.isArray(attrs._rawAttributes)) {
6
+ return Object.fromEntries(attrs._rawAttributes);
7
+ }
5
8
  if (!Array.isArray(attrs)) return attrs;
6
9
  const out = {};
7
10
  for (const attr of attrs) {
11
+ if (Array.isArray(attr) && attr.length >= 2) {
12
+ out[attr[0]] = attr[1];
13
+ continue;
14
+ }
8
15
  const value = attr.value;
9
16
  if (value && typeof value === 'object') {
10
17
  out[attr.key] = value.stringValue ?? value.intValue ?? value.doubleValue ?? value.boolValue ?? value.arrayValue;
@@ -61,8 +68,28 @@ function flattenSpans(payload) {
61
68
  return spans;
62
69
  }
63
70
 
71
+ function timestampValue(value) {
72
+ if (!value) return null;
73
+ if (Array.isArray(value) && value.length >= 2) {
74
+ const millis = (Number(value[0]) * 1000) + (Number(value[1]) / 1e6);
75
+ return Number.isFinite(millis) ? new Date(millis).toISOString() : null;
76
+ }
77
+ if (typeof value === 'string' && /^\d+$/.test(value)) {
78
+ const numeric = Number(value);
79
+ if (!Number.isFinite(numeric)) return null;
80
+ const millis = numeric > 1e15 ? numeric / 1e6 : numeric;
81
+ return new Date(millis).toISOString();
82
+ }
83
+ if (typeof value === 'number') {
84
+ const millis = value > 1e15 ? value / 1e6 : value;
85
+ return new Date(millis).toISOString();
86
+ }
87
+ return value;
88
+ }
89
+
64
90
  function classifySpan(span) {
65
91
  const attrs = attrsToObject(span.attributes);
92
+ const eventName = String(pick(attrs, ['event.name']) || '').toLowerCase();
66
93
  const operation = String(pick(attrs, ['gen_ai.operation.name', 'llm.operation']) || '').toLowerCase();
67
94
  const name = String(span.name || '').toLowerCase();
68
95
  const hasTokens = number(attrs, [
@@ -72,7 +99,14 @@ function classifySpan(span) {
72
99
  'llm.usage.completion_tokens',
73
100
  ]) > 0;
74
101
 
75
- if (operation.includes('agent') || operation.includes('tool') || name.includes('agent') || name.includes('tool')) {
102
+ if (
103
+ eventName.includes('agent')
104
+ || eventName.includes('tool')
105
+ || operation.includes('agent')
106
+ || operation.includes('tool')
107
+ || name.includes('agent')
108
+ || name.includes('tool')
109
+ ) {
76
110
  return 'non_billable';
77
111
  }
78
112
  if (hasTokens || operation.includes('chat') || operation.includes('completion') || operation.includes('generate')) {
@@ -83,19 +117,19 @@ function classifySpan(span) {
83
117
 
84
118
  function normalizeSpan(span, source, rawLine) {
85
119
  const attrs = attrsToObject(span.attributes);
86
- const resourceAttrs = attrsToObject(span.resourceAttributes);
120
+ const resourceAttrs = attrsToObject(span.resourceAttributes || span.resource);
87
121
  const type = classifySpan(span);
88
122
  if (type !== 'llm') return null;
89
123
 
90
124
  return {
91
125
  raw_line: rawLine,
92
- span_id: span.spanId || span.span_id || null,
126
+ span_id: span.spanId || span.span_id || pick(attrs, ['gen_ai.response.id']) || null,
93
127
  trace_id: span.traceId || span.trace_id || null,
94
128
  parent_span_id: span.parentSpanId || span.parent_span_id || null,
95
- timestamp: span.startTimeUnixNano || span.start_time || attrs['timestamp'] || null,
129
+ timestamp: timestampValue(span.startTimeUnixNano || span.start_time || span.hrTime || attrs.timestamp),
96
130
  surface: source,
97
131
  conversation_id: pick(attrs, ['gen_ai.conversation.id', 'conversation.id', 'copilot.conversation.id']),
98
- session_id: pick(attrs, ['session.id', 'copilot.session.id']),
132
+ session_id: pick(attrs, ['session.id', 'copilot.session.id']) || pick(resourceAttrs, ['session.id', 'copilot.session.id']),
99
133
  requested_model: pick(attrs, ['gen_ai.request.model', 'llm.request.model', 'llm.model_name']),
100
134
  resolved_model: pick(attrs, ['gen_ai.response.model', 'llm.response.model', 'model']),
101
135
  repo: pick(attrs, ['vcs.repository.name', 'git.repository', 'repo']) || pick(resourceAttrs, ['vcs.repository.name', 'service.name']),
@@ -194,6 +228,7 @@ function normalizeHookEvent(payload, source, rawLine) {
194
228
  return {
195
229
  raw_line: rawLine,
196
230
  event: payload.event || null,
231
+ timestamp: payload.captured_at || payload.timestamp || null,
197
232
  session_id: payload.session_id || payload.sessionId || null,
198
233
  cwd: payload.cwd || null,
199
234
  repo: payload.repo || payload.repository || null,
package/src/pricing.js CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  const PRICING_VERSION = 'github-copilot-2026-06-01';
4
4
 
5
- // USD per 1M tokens. Source: GitHub Copilot models and pricing docs, checked 2026-05-30.
5
+ // USD per 1M tokens. Source: GitHub Copilot models and pricing docs, checked 2026-05-31.
6
6
  const MODEL_PRICES = {
7
7
  'gpt-4.1': { input: 2.00, cacheRead: 0.50, cacheWrite: 0, output: 8.00 },
8
8
  'gpt-5 mini': { input: 0.25, cacheRead: 0.025, cacheWrite: 0, output: 2.00 },
@@ -32,11 +32,19 @@ const MODEL_PRICES = {
32
32
  };
33
33
 
34
34
  function normalizeModelName(model) {
35
- return String(model || '').trim().toLowerCase();
35
+ return String(model || '').trim().toLowerCase().replace(/^copilot\//, '');
36
+ }
37
+
38
+ function modelPriceKey(model) {
39
+ const normalized = normalizeModelName(model);
40
+ if (MODEL_PRICES[normalized]) return normalized;
41
+ const withoutDate = normalized.replace(/-\d{4}-\d{2}-\d{2}$/, '');
42
+ if (MODEL_PRICES[withoutDate]) return withoutDate;
43
+ return normalized;
36
44
  }
37
45
 
38
46
  function estimateCost(record) {
39
- const model = normalizeModelName(record.resolved_model || record.requested_model);
47
+ const model = modelPriceKey(record.resolved_model || record.requested_model);
40
48
  const price = MODEL_PRICES[model];
41
49
  if (!model) {
42
50
  return { estimated_usd: null, estimated_ai_credits: null, warning: 'missing_model' };
@@ -63,4 +71,5 @@ module.exports = {
63
71
  PRICING_VERSION,
64
72
  MODEL_PRICES,
65
73
  estimateCost,
74
+ modelPriceKey,
66
75
  };