@jeffreycao/copilot-api 1.9.1 → 1.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,13 @@
1
1
  import { PATHS } from "./paths-Cla6y5eD.js";
2
- import { COMPACT_AUTO_CONTINUE, COMPACT_REQUEST, HTTPError, cacheModels, compactAutoContinuePromptStarts, compactMessageSections, compactSummaryPromptStart, compactSystemPromptStarts, compactTextOnlyGuard, copilotBaseUrl, copilotHeaders, forwardError, generateRequestIdFromPayload, getCopilotUsage, getRootSessionId, getUUID, isNullish, parseUserIdMetadata, prepareForCompact, prepareInteractionHeaders, prepareMessageProxyHeaders, requestContext, resolveTraceId, sleep, state } from "./utils-DG6CB51Z.js";
2
+ import { COMPACT_AUTO_CONTINUE, COMPACT_REQUEST, HTTPError, cacheModels, compactAutoContinuePromptStarts, compactMessageSections, compactSummaryPromptStart, compactSystemPromptStarts, compactTextOnlyGuard, copilotBaseUrl, copilotHeaders, forwardError, generateRequestIdFromPayload, generateTraceId, getCopilotUsage, getRootSessionId, getUUID, isNullish, parseUserIdMetadata, prepareForCompact, prepareInteractionHeaders, prepareMessageProxyHeaders, requestContext, resolveTraceId as resolveTraceId$1, sleep, state } from "./utils-DEJvF68W.js";
3
3
  import { getAnthropicApiKey, getClaudeTokenMultiplier, getConfig, getExtraPromptForModel, getProviderConfig, getReasoningEffortForModel, getSmallModel, isMessagesApiEnabled, isResponsesApiContextManagementModel, isResponsesApiWebSearchEnabled } from "./config-BQvWqYh_.js";
4
4
  import consola from "consola";
5
+ import fs from "node:fs/promises";
5
6
  import path from "node:path";
6
7
  import { Hono } from "hono";
7
8
  import { cors } from "hono/cors";
8
9
  import { logger } from "hono/logger";
9
- import fs, { readFileSync } from "node:fs";
10
+ import fs$1, { readFileSync } from "node:fs";
10
11
  import { streamSSE } from "hono/streaming";
11
12
  import util from "node:util";
12
13
  import { events } from "fetch-event-stream";
@@ -59,7 +60,7 @@ function createAuthMiddleware(options = {}) {
59
60
  //#endregion
60
61
  //#region src/lib/trace.ts
61
62
  const traceIdMiddleware = async (c, next) => {
62
- const traceId = resolveTraceId(c.req.header("x-trace-id"));
63
+ const traceId = resolveTraceId$1(c.req.header("x-trace-id"));
63
64
  c.header("x-trace-id", traceId);
64
65
  const context = {
65
66
  traceId,
@@ -79,6 +80,58 @@ const awaitApproval = async () => {
79
80
  if (!await consola.prompt(`Accept incoming request?`, { type: "confirm" })) throw new HTTPError("Request rejected", Response.json({ message: "Request rejected" }, { status: 403 }));
80
81
  };
81
82
 
83
+ //#endregion
84
+ //#region src/lib/process-cleanup.ts
85
+ const cleanupHandlers = /* @__PURE__ */ new Set();
86
+ let cleanupPromise = null;
87
+ let cleanupState = "idle";
88
+ let runtimeInitialized$1 = false;
89
+ function initializeProcessCleanupRuntime() {
90
+ if (runtimeInitialized$1) return;
91
+ runtimeInitialized$1 = true;
92
+ process.once("beforeExit", () => {
93
+ runProcessCleanups();
94
+ });
95
+ process.once("exit", runProcessCleanupsSync);
96
+ process.once("SIGINT", () => {
97
+ shutdownProcess(0);
98
+ });
99
+ process.once("SIGTERM", () => {
100
+ shutdownProcess(0);
101
+ });
102
+ }
103
+ function runProcessCleanupsSync() {
104
+ if (cleanupState !== "idle") return;
105
+ cleanupState = "done";
106
+ for (const handler of Array.from(cleanupHandlers)) try {
107
+ handler();
108
+ } catch {}
109
+ }
110
+ async function runProcessCleanups() {
111
+ if (cleanupPromise) return cleanupPromise;
112
+ if (cleanupState === "done") return;
113
+ cleanupState = "running";
114
+ cleanupPromise = (async () => {
115
+ for (const handler of Array.from(cleanupHandlers)) await handler();
116
+ cleanupState = "done";
117
+ })();
118
+ return cleanupPromise;
119
+ }
120
+ async function shutdownProcess(exitCode) {
121
+ try {
122
+ await runProcessCleanups();
123
+ } finally {
124
+ process.exit(exitCode);
125
+ }
126
+ }
127
+ function registerProcessCleanup(handler) {
128
+ initializeProcessCleanupRuntime();
129
+ cleanupHandlers.add(handler);
130
+ return () => {
131
+ cleanupHandlers.delete(handler);
132
+ };
133
+ }
134
+
82
135
  //#endregion
83
136
  //#region src/lib/logger.ts
84
137
  const LOG_RETENTION_MS = 10080 * 60 * 1e3;
@@ -92,22 +145,22 @@ let runtimeInitialized = false;
92
145
  let flushInterval;
93
146
  let cleanupInterval;
94
147
  const ensureLogDirectory = () => {
95
- if (!fs.existsSync(LOG_DIR)) fs.mkdirSync(LOG_DIR, { recursive: true });
148
+ if (!fs$1.existsSync(LOG_DIR)) fs$1.mkdirSync(LOG_DIR, { recursive: true });
96
149
  };
97
150
  const cleanupOldLogs = () => {
98
- if (!fs.existsSync(LOG_DIR)) return;
151
+ if (!fs$1.existsSync(LOG_DIR)) return;
99
152
  const now = Date.now();
100
- for (const entry of fs.readdirSync(LOG_DIR)) {
153
+ for (const entry of fs$1.readdirSync(LOG_DIR)) {
101
154
  const filePath = path.join(LOG_DIR, entry);
102
155
  let stats;
103
156
  try {
104
- stats = fs.statSync(filePath);
157
+ stats = fs$1.statSync(filePath);
105
158
  } catch {
106
159
  continue;
107
160
  }
108
161
  if (!stats.isFile()) continue;
109
162
  if (now - stats.mtimeMs > LOG_RETENTION_MS) try {
110
- fs.rmSync(filePath);
163
+ fs$1.rmSync(filePath);
111
164
  } catch {
112
165
  continue;
113
166
  }
@@ -160,21 +213,13 @@ const initializeLoggerRuntime = () => {
160
213
  maybeUnref(flushInterval);
161
214
  cleanupInterval = setInterval(cleanupOldLogs, CLEANUP_INTERVAL_MS);
162
215
  maybeUnref(cleanupInterval);
163
- process.once("exit", cleanup);
164
- process.once("SIGINT", () => {
165
- cleanup();
166
- process.exit(0);
167
- });
168
- process.once("SIGTERM", () => {
169
- cleanup();
170
- process.exit(0);
171
- });
216
+ registerProcessCleanup(cleanup);
172
217
  };
173
218
  const getLogStream = (filePath) => {
174
219
  initializeLoggerRuntime();
175
220
  let stream = logStreams.get(filePath);
176
221
  if (!stream || stream.destroyed) {
177
- stream = fs.createWriteStream(filePath, { flags: "a" });
222
+ stream = fs$1.createWriteStream(filePath, { flags: "a" });
178
223
  logStreams.set(filePath, stream);
179
224
  stream.on("error", (error) => {
180
225
  console.warn("Log stream error", error);
@@ -248,6 +293,550 @@ async function checkRateLimit(state$1) {
248
293
  consola.info("Rate limit wait completed, proceeding with request");
249
294
  }
250
295
 
296
+ //#endregion
297
+ //#region src/lib/event-bus.ts
298
+ var EventBus = class {
299
+ handlers = /* @__PURE__ */ new Map();
300
+ publish(name, event) {
301
+ const handlers = this.handlers.get(name);
302
+ if (!handlers) return;
303
+ for (const handler of Array.from(handlers)) handler(event);
304
+ }
305
+ subscribe(name, handler) {
306
+ let handlers = this.handlers.get(name);
307
+ if (!handlers) {
308
+ handlers = /* @__PURE__ */ new Set();
309
+ this.handlers.set(name, handlers);
310
+ }
311
+ const registeredHandler = handler;
312
+ handlers.add(registeredHandler);
313
+ return () => {
314
+ handlers.delete(registeredHandler);
315
+ if (handlers.size === 0) this.handlers.delete(name);
316
+ };
317
+ }
318
+ };
319
+
320
+ //#endregion
321
+ //#region src/lib/sqlite.ts
322
+ const MINIMUM_NODE_SQLITE_VERSION = "22.13.0";
323
+ const isBunRuntime = () => Boolean(globalThis.Bun);
324
+ function parseNodeVersion(version) {
325
+ return version.split(".", 3).map((part) => {
326
+ const parsed = Number.parseInt(part, 10);
327
+ return Number.isFinite(parsed) && parsed >= 0 ? parsed : 0;
328
+ });
329
+ }
330
+ function isNodeSqliteSupportedVersion(version) {
331
+ const current = parseNodeVersion(version);
332
+ const minimum = parseNodeVersion(MINIMUM_NODE_SQLITE_VERSION);
333
+ for (const [index, minimumPart] of minimum.entries()) {
334
+ const currentPart = current[index] ?? 0;
335
+ if (currentPart > minimumPart) return true;
336
+ if (currentPart < minimumPart) return false;
337
+ }
338
+ return true;
339
+ }
340
+ function isSqliteRuntimeSupported(input = {}) {
341
+ if (input.isBun ?? isBunRuntime()) return true;
342
+ return isNodeSqliteSupportedVersion(input.nodeVersion ?? process.versions.node);
343
+ }
344
+ function getUnsupportedNodeSqliteMessage(nodeVersion) {
345
+ return `SQLite-backed token usage requires Bun or Node.js >= ${MINIMUM_NODE_SQLITE_VERSION}. Detected Node.js ${nodeVersion}. Upgrade Node.js or run the CLI with Bun, for example \`bunx --bun @jeffreycao/copilot-api@latest start\` or \`bun run start start\`.`;
346
+ }
347
+ var UnsupportedNodeSqliteRuntimeError = class extends Error {
348
+ constructor(nodeVersion, cause) {
349
+ super(getUnsupportedNodeSqliteMessage(nodeVersion), { cause });
350
+ this.name = "UnsupportedNodeSqliteRuntimeError";
351
+ }
352
+ };
353
+ async function openBunDatabase(dbPath) {
354
+ return new (await (import(["bun", "sqlite"].join(":")))).Database(dbPath);
355
+ }
356
+ async function loadNodeSqliteModule() {
357
+ const nodeVersion = process.versions.node;
358
+ if (!isNodeSqliteSupportedVersion(nodeVersion)) throw new UnsupportedNodeSqliteRuntimeError(nodeVersion);
359
+ const specifier = ["node", "sqlite"].join(":");
360
+ try {
361
+ return await import(specifier);
362
+ } catch (error) {
363
+ throw new UnsupportedNodeSqliteRuntimeError(nodeVersion, error);
364
+ }
365
+ }
366
+ async function openNodeDatabase(dbPath) {
367
+ return new (await (loadNodeSqliteModule())).DatabaseSync(dbPath);
368
+ }
369
+ async function openSqliteDatabase(dbPath) {
370
+ const dir = path.dirname(dbPath);
371
+ if (dbPath !== ":memory:" && dir !== ".") await fs.mkdir(dir, { recursive: true });
372
+ return isBunRuntime() ? openBunDatabase(dbPath) : openNodeDatabase(dbPath);
373
+ }
374
+ var SqliteDbStore = class {
375
+ dbPromise = null;
376
+ options;
377
+ constructor(options) {
378
+ this.options = options;
379
+ }
380
+ getDb() {
381
+ this.dbPromise ??= this.open();
382
+ return this.dbPromise;
383
+ }
384
+ async close(input) {
385
+ const currentDbPromise = this.dbPromise;
386
+ this.dbPromise = null;
387
+ if (!currentDbPromise) return;
388
+ const db = await currentDbPromise;
389
+ input?.beforeClose?.(db);
390
+ db.close?.();
391
+ }
392
+ async open() {
393
+ const db = await openSqliteDatabase(this.options.getPath());
394
+ this.options.initialize?.(db);
395
+ return db;
396
+ }
397
+ };
398
+
399
+ //#endregion
400
+ //#region src/lib/token-usage/store.ts
401
+ const DB_PATH_ENV = "COPILOT_API_SQLITE_DB_PATH";
402
+ const DEFAULT_DB_FILENAME = "copilot-api.sqlite";
403
+ let writeQueue = Promise.resolve();
404
+ function getDbPath() {
405
+ return process.env[DB_PATH_ENV] ?? path.join(PATHS.APP_DIR, DEFAULT_DB_FILENAME);
406
+ }
407
+ const tokenUsageDbStore = new SqliteDbStore({
408
+ getPath: getDbPath,
409
+ initialize: initializeTokenUsageDb
410
+ });
411
+ function getDb() {
412
+ return tokenUsageDbStore.getDb();
413
+ }
414
+ function isTokenUsageStorageEnabled() {
415
+ return isSqliteRuntimeSupported();
416
+ }
417
+ function initializeTokenUsageDb(db) {
418
+ db.exec("PRAGMA journal_mode = WAL");
419
+ db.exec("PRAGMA busy_timeout = 5000");
420
+ db.exec(`
421
+ CREATE TABLE IF NOT EXISTS token_usage_events (
422
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
423
+ created_at_ms INTEGER NOT NULL,
424
+ created_at_utc TEXT NOT NULL,
425
+ trace_id TEXT NOT NULL,
426
+ session_id TEXT NOT NULL DEFAULT '',
427
+ user_id TEXT NOT NULL DEFAULT '',
428
+ source TEXT NOT NULL,
429
+ endpoint TEXT NOT NULL,
430
+ provider_name TEXT,
431
+ model TEXT NOT NULL,
432
+ input_tokens INTEGER NOT NULL DEFAULT 0,
433
+ output_tokens INTEGER NOT NULL DEFAULT 0,
434
+ cache_read_input_tokens INTEGER NOT NULL DEFAULT 0,
435
+ cache_creation_input_tokens INTEGER NOT NULL DEFAULT 0,
436
+ total_tokens INTEGER NOT NULL DEFAULT 0
437
+ )
438
+ `);
439
+ ensureColumn(db, "user_id", "TEXT NOT NULL DEFAULT ''");
440
+ ensureColumn(db, "total_tokens", "INTEGER NOT NULL DEFAULT 0");
441
+ db.exec(`
442
+ CREATE INDEX IF NOT EXISTS idx_token_usage_events_created_at_ms
443
+ ON token_usage_events(created_at_ms)
444
+ `);
445
+ db.exec(`
446
+ CREATE INDEX IF NOT EXISTS idx_token_usage_events_model
447
+ ON token_usage_events(model)
448
+ `);
449
+ db.exec(`
450
+ CREATE INDEX IF NOT EXISTS idx_token_usage_events_trace_id
451
+ ON token_usage_events(trace_id)
452
+ `);
453
+ db.exec(`
454
+ CREATE INDEX IF NOT EXISTS idx_token_usage_events_session_id
455
+ ON token_usage_events(session_id)
456
+ `);
457
+ db.exec(`
458
+ CREATE INDEX IF NOT EXISTS idx_token_usage_events_user_id
459
+ ON token_usage_events(user_id)
460
+ `);
461
+ }
462
+ function ensureColumn(db, name, definition) {
463
+ if (!db.prepare("PRAGMA table_info(token_usage_events)").all().some((row) => row.name === name)) db.exec(`ALTER TABLE token_usage_events ADD COLUMN ${name} ${definition}`);
464
+ }
465
+ function normalizeToken(value) {
466
+ if (typeof value !== "number" || !Number.isFinite(value)) return 0;
467
+ return Math.max(0, Math.floor(value));
468
+ }
469
+ function normalizeOptionalToken(value) {
470
+ return value === null || value === void 0 ? void 0 : normalizeToken(value);
471
+ }
472
+ function hasAnyToken(tokens) {
473
+ return normalizeToken(tokens.input_tokens) > 0 || normalizeToken(tokens.output_tokens) > 0 || normalizeToken(tokens.cache_read_input_tokens) > 0 || normalizeToken(tokens.cache_creation_input_tokens) > 0 || normalizeToken(tokens.total_tokens) > 0;
474
+ }
475
+ function resolveTotalTokens(input) {
476
+ const explicitTotal = normalizeOptionalToken(input.total_tokens);
477
+ if (explicitTotal !== void 0) return explicitTotal;
478
+ return normalizeToken(input.input_tokens) + normalizeToken(input.output_tokens) + normalizeToken(input.cache_read_input_tokens) + normalizeToken(input.cache_creation_input_tokens);
479
+ }
480
+ async function writeTokenUsageEvent(event) {
481
+ (await getDb()).prepare(`
482
+ INSERT INTO token_usage_events (
483
+ created_at_ms,
484
+ created_at_utc,
485
+ trace_id,
486
+ session_id,
487
+ user_id,
488
+ source,
489
+ endpoint,
490
+ provider_name,
491
+ model,
492
+ input_tokens,
493
+ output_tokens,
494
+ cache_read_input_tokens,
495
+ cache_creation_input_tokens,
496
+ total_tokens
497
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
498
+ `).run(event.created_at_ms, event.created_at_utc, event.trace_id, event.session_id, event.user_id, event.source, event.endpoint, event.provider_name, event.model, event.input_tokens, event.output_tokens, event.cache_read_input_tokens, event.cache_creation_input_tokens, event.total_tokens);
499
+ }
500
+ function enqueueTokenUsageWrite(event) {
501
+ if (!isTokenUsageStorageEnabled()) return;
502
+ writeQueue = writeQueue.then(() => writeTokenUsageEvent(event)).catch((error) => {
503
+ consola.warn("Failed to record token usage", error);
504
+ });
505
+ }
506
+ async function flushTokenUsageEvents() {
507
+ let currentQueue = writeQueue;
508
+ while (true) {
509
+ await currentQueue;
510
+ if (currentQueue === writeQueue) return;
511
+ currentQueue = writeQueue;
512
+ }
513
+ }
514
+ function getPeriodRange(period, now = /* @__PURE__ */ new Date()) {
515
+ const start = new Date(now);
516
+ switch (period) {
517
+ case "day":
518
+ start.setHours(0, 0, 0, 0);
519
+ break;
520
+ case "week": {
521
+ const daysSinceMonday = (start.getDay() + 6) % 7;
522
+ start.setDate(start.getDate() - daysSinceMonday);
523
+ start.setHours(0, 0, 0, 0);
524
+ break;
525
+ }
526
+ case "month":
527
+ start.setDate(1);
528
+ start.setHours(0, 0, 0, 0);
529
+ break;
530
+ default: break;
531
+ }
532
+ const end = new Date(start);
533
+ switch (period) {
534
+ case "day":
535
+ end.setDate(end.getDate() + 1);
536
+ break;
537
+ case "week":
538
+ end.setDate(end.getDate() + 7);
539
+ break;
540
+ case "month":
541
+ end.setMonth(end.getMonth() + 1);
542
+ break;
543
+ default: break;
544
+ }
545
+ return {
546
+ endMs: end.getTime(),
547
+ startMs: start.getTime()
548
+ };
549
+ }
550
+ function createEmptyTotals() {
551
+ return {
552
+ cache_creation_input_tokens: 0,
553
+ cache_read_input_tokens: 0,
554
+ input_tokens: 0,
555
+ output_tokens: 0,
556
+ request_count: 0,
557
+ total_tokens: 0
558
+ };
559
+ }
560
+ function createEmptySummary(period) {
561
+ const range = getPeriodRange(period);
562
+ return {
563
+ byModel: [],
564
+ period,
565
+ range: {
566
+ end_ms: range.endMs,
567
+ end_utc: new Date(range.endMs).toISOString(),
568
+ start_ms: range.startMs,
569
+ start_utc: new Date(range.startMs).toISOString()
570
+ },
571
+ totals: createEmptyTotals()
572
+ };
573
+ }
574
+ function createEmptyEventsPage(input) {
575
+ const range = getPeriodRange(input.period);
576
+ const page = Math.max(1, Math.floor(input.page));
577
+ const pageSize = Math.min(100, Math.max(1, Math.floor(input.pageSize)));
578
+ return {
579
+ items: [],
580
+ page,
581
+ page_size: pageSize,
582
+ period: input.period,
583
+ range: {
584
+ end_ms: range.endMs,
585
+ end_utc: new Date(range.endMs).toISOString(),
586
+ start_ms: range.startMs,
587
+ start_utc: new Date(range.startMs).toISOString()
588
+ },
589
+ total: 0,
590
+ total_pages: 1
591
+ };
592
+ }
593
+ function numberFromRow(row, key) {
594
+ const value = row?.[key];
595
+ return typeof value === "number" && Number.isFinite(value) ? value : 0;
596
+ }
597
+ function totalsFromRow(row) {
598
+ return {
599
+ cache_creation_input_tokens: numberFromRow(row, "cache_creation_input_tokens"),
600
+ cache_read_input_tokens: numberFromRow(row, "cache_read_input_tokens"),
601
+ input_tokens: numberFromRow(row, "input_tokens"),
602
+ output_tokens: numberFromRow(row, "output_tokens"),
603
+ request_count: numberFromRow(row, "request_count"),
604
+ total_tokens: numberFromRow(row, "total_tokens")
605
+ };
606
+ }
607
+ function stringFromRow(row, key) {
608
+ const value = row[key];
609
+ return typeof value === "string" ? value : "";
610
+ }
611
+ function nullableStringFromRow(row, key) {
612
+ const value = row[key];
613
+ return typeof value === "string" ? value : null;
614
+ }
615
+ function usageEventFromRow(row) {
616
+ return {
617
+ cache_creation_input_tokens: numberFromRow(row, "cache_creation_input_tokens"),
618
+ cache_read_input_tokens: numberFromRow(row, "cache_read_input_tokens"),
619
+ created_at_ms: numberFromRow(row, "created_at_ms"),
620
+ created_at_utc: stringFromRow(row, "created_at_utc"),
621
+ endpoint: stringFromRow(row, "endpoint"),
622
+ id: numberFromRow(row, "id"),
623
+ input_tokens: numberFromRow(row, "input_tokens"),
624
+ model: stringFromRow(row, "model") || "unknown",
625
+ output_tokens: numberFromRow(row, "output_tokens"),
626
+ provider_name: nullableStringFromRow(row, "provider_name"),
627
+ session_id: stringFromRow(row, "session_id"),
628
+ source: stringFromRow(row, "source"),
629
+ total_tokens: numberFromRow(row, "total_tokens"),
630
+ trace_id: stringFromRow(row, "trace_id"),
631
+ user_id: stringFromRow(row, "user_id")
632
+ };
633
+ }
634
+ async function getTokenUsageSummary(period) {
635
+ if (!isTokenUsageStorageEnabled()) return createEmptySummary(period);
636
+ await flushTokenUsageEvents();
637
+ const range = getPeriodRange(period);
638
+ const db = await getDb();
639
+ const totalsRow = db.prepare(`
640
+ SELECT
641
+ COUNT(*) AS request_count,
642
+ COALESCE(SUM(input_tokens), 0) AS input_tokens,
643
+ COALESCE(SUM(output_tokens), 0) AS output_tokens,
644
+ COALESCE(SUM(cache_read_input_tokens), 0) AS cache_read_input_tokens,
645
+ COALESCE(SUM(cache_creation_input_tokens), 0) AS cache_creation_input_tokens,
646
+ COALESCE(SUM(total_tokens), 0) AS total_tokens
647
+ FROM token_usage_events
648
+ WHERE created_at_ms >= ? AND created_at_ms < ?
649
+ `).get(range.startMs, range.endMs);
650
+ return {
651
+ byModel: db.prepare(`
652
+ SELECT
653
+ model,
654
+ COUNT(*) AS request_count,
655
+ COALESCE(SUM(input_tokens), 0) AS input_tokens,
656
+ COALESCE(SUM(output_tokens), 0) AS output_tokens,
657
+ COALESCE(SUM(cache_read_input_tokens), 0) AS cache_read_input_tokens,
658
+ COALESCE(SUM(cache_creation_input_tokens), 0) AS cache_creation_input_tokens,
659
+ COALESCE(SUM(total_tokens), 0) AS total_tokens
660
+ FROM token_usage_events
661
+ WHERE created_at_ms >= ? AND created_at_ms < ?
662
+ GROUP BY model
663
+ ORDER BY
664
+ total_tokens DESC,
665
+ model ASC
666
+ `).all(range.startMs, range.endMs).map((row) => ({
667
+ ...totalsFromRow(row),
668
+ model: typeof row.model === "string" ? row.model : "unknown"
669
+ })),
670
+ period,
671
+ range: {
672
+ end_ms: range.endMs,
673
+ end_utc: new Date(range.endMs).toISOString(),
674
+ start_ms: range.startMs,
675
+ start_utc: new Date(range.startMs).toISOString()
676
+ },
677
+ totals: totalsFromRow(totalsRow)
678
+ };
679
+ }
680
+ async function getTokenUsageEventsPage(input) {
681
+ if (!isTokenUsageStorageEnabled()) return createEmptyEventsPage(input);
682
+ await flushTokenUsageEvents();
683
+ const range = getPeriodRange(input.period);
684
+ const page = Math.max(1, Math.floor(input.page));
685
+ const pageSize = Math.min(100, Math.max(1, Math.floor(input.pageSize)));
686
+ const offset = (page - 1) * pageSize;
687
+ const db = await getDb();
688
+ const totalRow = db.prepare(`
689
+ SELECT COUNT(*) AS total
690
+ FROM token_usage_events
691
+ WHERE created_at_ms >= ? AND created_at_ms < ?
692
+ `).get(range.startMs, range.endMs);
693
+ const rows = db.prepare(`
694
+ SELECT
695
+ id,
696
+ created_at_ms,
697
+ created_at_utc,
698
+ trace_id,
699
+ session_id,
700
+ user_id,
701
+ source,
702
+ endpoint,
703
+ provider_name,
704
+ model,
705
+ input_tokens,
706
+ output_tokens,
707
+ cache_read_input_tokens,
708
+ cache_creation_input_tokens,
709
+ total_tokens
710
+ FROM token_usage_events
711
+ WHERE created_at_ms >= ? AND created_at_ms < ?
712
+ ORDER BY created_at_ms DESC, id DESC
713
+ LIMIT ? OFFSET ?
714
+ `).all(range.startMs, range.endMs, pageSize, offset);
715
+ const total = numberFromRow(totalRow, "total");
716
+ return {
717
+ items: rows.map((row) => usageEventFromRow(row)),
718
+ page,
719
+ page_size: pageSize,
720
+ period: input.period,
721
+ range: {
722
+ end_ms: range.endMs,
723
+ end_utc: new Date(range.endMs).toISOString(),
724
+ start_ms: range.startMs,
725
+ start_utc: new Date(range.startMs).toISOString()
726
+ },
727
+ total,
728
+ total_pages: Math.max(1, Math.ceil(total / pageSize))
729
+ };
730
+ }
731
+ async function closeUsageStore() {
732
+ await flushTokenUsageEvents();
733
+ await tokenUsageDbStore.close({ beforeClose: (db) => {
734
+ try {
735
+ db.exec("PRAGMA wal_checkpoint(TRUNCATE)");
736
+ } catch {}
737
+ } });
738
+ writeQueue = Promise.resolve();
739
+ }
740
+ registerProcessCleanup(closeUsageStore);
741
+
742
+ //#endregion
743
+ //#region src/lib/token-usage/index.ts
744
+ const tokenUsageEventBus = new EventBus();
745
+ function resolveTraceId(traceId) {
746
+ return traceId?.trim() || requestContext.getStore()?.traceId || generateTraceId();
747
+ }
748
+ function resolveTokenUsageSessionId(sessionId, fallbackSessionId) {
749
+ return requestContext.getStore()?.sessionAffinity?.trim() || sessionId?.trim() || fallbackSessionId?.trim() || "";
750
+ }
751
+ function resolveUserId(input) {
752
+ if (input.source === "provider") return input.providerName?.trim() || "";
753
+ return state.userName?.trim() || "";
754
+ }
755
+ function toPersistedEvent(input) {
756
+ if (!hasAnyToken(input)) return null;
757
+ const now = /* @__PURE__ */ new Date();
758
+ return {
759
+ cache_creation_input_tokens: normalizeToken(input.cache_creation_input_tokens),
760
+ cache_read_input_tokens: normalizeToken(input.cache_read_input_tokens),
761
+ created_at_ms: now.getTime(),
762
+ created_at_utc: now.toISOString(),
763
+ endpoint: input.endpoint,
764
+ input_tokens: normalizeToken(input.input_tokens),
765
+ model: input.model.trim() || "unknown",
766
+ output_tokens: normalizeToken(input.output_tokens),
767
+ provider_name: input.providerName?.trim() || null,
768
+ session_id: resolveTokenUsageSessionId(input.sessionId, input.fallbackSessionId),
769
+ source: input.source,
770
+ total_tokens: resolveTotalTokens(input),
771
+ trace_id: resolveTraceId(input.traceId),
772
+ user_id: resolveUserId(input)
773
+ };
774
+ }
775
+ tokenUsageEventBus.subscribe("token_usage.recorded", enqueueTokenUsageWrite);
776
+ function recordTokenUsageEvent(input) {
777
+ const event = toPersistedEvent(input);
778
+ if (!event) return;
779
+ tokenUsageEventBus.publish("token_usage.recorded", event);
780
+ }
781
+ function createTokenUsageRecorder(options) {
782
+ return (usage) => {
783
+ recordTokenUsageEvent({
784
+ ...usage,
785
+ ...options
786
+ });
787
+ };
788
+ }
789
+ function createCopilotTokenUsageRecorder(options) {
790
+ return createTokenUsageRecorder({
791
+ ...options,
792
+ source: "copilot"
793
+ });
794
+ }
795
+ function createProviderTokenUsageRecorder(options) {
796
+ return createTokenUsageRecorder({
797
+ ...options,
798
+ source: "provider"
799
+ });
800
+ }
801
+ function normalizeOpenAIUsage(usage) {
802
+ const cachedTokens = normalizeToken(usage?.prompt_tokens_details?.cached_tokens);
803
+ const promptTokens = normalizeToken(usage?.prompt_tokens);
804
+ return {
805
+ cache_read_input_tokens: cachedTokens,
806
+ input_tokens: Math.max(0, promptTokens - cachedTokens),
807
+ output_tokens: normalizeToken(usage?.completion_tokens),
808
+ total_tokens: normalizeOptionalToken(usage?.total_tokens)
809
+ };
810
+ }
811
+ function normalizeResponsesUsage(usage) {
812
+ const cachedTokens = normalizeToken(usage?.input_tokens_details?.cached_tokens);
813
+ const inputTokens = normalizeToken(usage?.input_tokens);
814
+ return {
815
+ cache_read_input_tokens: cachedTokens,
816
+ input_tokens: Math.max(0, inputTokens - cachedTokens),
817
+ output_tokens: normalizeToken(usage?.output_tokens),
818
+ total_tokens: normalizeOptionalToken(usage?.total_tokens)
819
+ };
820
+ }
821
+ function normalizeAnthropicUsage(usage) {
822
+ return {
823
+ cache_creation_input_tokens: normalizeOptionalToken(usage?.cache_creation_input_tokens),
824
+ cache_read_input_tokens: normalizeOptionalToken(usage?.cache_read_input_tokens),
825
+ input_tokens: normalizeOptionalToken(usage?.input_tokens),
826
+ output_tokens: normalizeOptionalToken(usage?.output_tokens),
827
+ total_tokens: normalizeOptionalToken(usage?.total_tokens)
828
+ };
829
+ }
830
+ function mergeAnthropicUsage(current, next) {
831
+ return {
832
+ cache_creation_input_tokens: next.cache_creation_input_tokens ?? current.cache_creation_input_tokens,
833
+ cache_read_input_tokens: next.cache_read_input_tokens ?? current.cache_read_input_tokens,
834
+ input_tokens: next.input_tokens ?? current.input_tokens,
835
+ output_tokens: next.output_tokens ?? current.output_tokens,
836
+ total_tokens: next.total_tokens ?? current.total_tokens
837
+ };
838
+ }
839
+
251
840
  //#endregion
252
841
  //#region src/lib/copilot-rate-limit.ts
253
842
  const copilotRateLimitTypes = ["session", "weekly"];
@@ -352,23 +941,42 @@ async function handleCompletion$1(c) {
352
941
  logger$6.debug("Generated request ID:", requestId);
353
942
  const sessionId = getUUID(requestId);
354
943
  logger$6.debug("Extracted session ID:", sessionId);
944
+ const recordUsage = createCopilotTokenUsageRecorder({
945
+ endpoint: "chat_completions",
946
+ fallbackSessionId: sessionId,
947
+ model: payload.model
948
+ });
355
949
  const response = await createChatCompletions(payload, {
356
950
  requestId,
357
951
  sessionId
358
952
  });
359
953
  if (isNonStreaming$1(response)) {
360
954
  debugJson(logger$6, "Non-streaming response:", response);
955
+ recordUsage(normalizeOpenAIUsage(response.usage));
361
956
  return c.json(response);
362
957
  }
363
958
  logger$6.debug("Streaming response");
364
959
  return streamSSE(c, async (stream) => {
960
+ let usage = {};
365
961
  for await (const chunk of response) {
366
962
  debugJson(logger$6, "Streaming chunk:", chunk);
963
+ const parsedChunk = parseChatCompletionChunk(chunk);
964
+ if (parsedChunk?.usage) usage = normalizeOpenAIUsage(parsedChunk.usage);
367
965
  await stream.writeSSE(chunk);
368
966
  }
967
+ recordUsage(usage);
369
968
  });
370
969
  }
371
970
  const isNonStreaming$1 = (response) => Object.hasOwn(response, "choices");
971
+ const parseChatCompletionChunk = (chunk) => {
972
+ const data = chunk.data;
973
+ if (!data || data === "[DONE]") return null;
974
+ try {
975
+ return JSON.parse(data);
976
+ } catch {
977
+ return null;
978
+ }
979
+ };
372
980
 
373
981
  //#endregion
374
982
  //#region src/routes/chat-completions/route.ts
@@ -401,6 +1009,13 @@ embeddingRoutes.post("/", async (c) => {
401
1009
  try {
402
1010
  const paylod = await c.req.json();
403
1011
  const response = await createEmbeddings(paylod);
1012
+ createCopilotTokenUsageRecorder({
1013
+ endpoint: "embeddings",
1014
+ model: paylod.model
1015
+ })({
1016
+ input_tokens: response.usage.prompt_tokens,
1017
+ output_tokens: 0
1018
+ });
404
1019
  return c.json(response);
405
1020
  } catch (error) {
406
1021
  return await forwardError(c, error);
@@ -2492,6 +3107,12 @@ function closeThinkingBlockIfOpen(state$1, events$1) {
2492
3107
  const handleWithChatCompletions = async (c, anthropicPayload, options) => {
2493
3108
  const { logger: logger$7, subagentMarker, requestId, sessionId, compactType } = options;
2494
3109
  const openAIPayload = translateToOpenAI(anthropicPayload);
3110
+ const recordUsage = createCopilotUsageRecorder({
3111
+ endpoint: "chat_completions",
3112
+ fallbackSessionId: sessionId,
3113
+ model: openAIPayload.model,
3114
+ payload: anthropicPayload
3115
+ });
2495
3116
  debugJson(logger$7, "Translated OpenAI request payload:", openAIPayload);
2496
3117
  const response = await createChatCompletions(openAIPayload, {
2497
3118
  subagentMarker,
@@ -2501,12 +3122,14 @@ const handleWithChatCompletions = async (c, anthropicPayload, options) => {
2501
3122
  });
2502
3123
  if (isNonStreaming(response)) {
2503
3124
  debugJson(logger$7, "Non-streaming response from Copilot:", response);
3125
+ recordUsage(normalizeOpenAIUsage(response.usage));
2504
3126
  const anthropicResponse = translateToAnthropic(response);
2505
3127
  debugJson(logger$7, "Translated Anthropic response:", anthropicResponse);
2506
3128
  return c.json(anthropicResponse);
2507
3129
  }
2508
3130
  logger$7.debug("Streaming response from Copilot");
2509
3131
  return streamSSE(c, async (stream) => {
3132
+ let usage = {};
2510
3133
  const streamState = {
2511
3134
  messageStartSent: false,
2512
3135
  contentBlockIndex: 0,
@@ -2519,6 +3142,7 @@ const handleWithChatCompletions = async (c, anthropicPayload, options) => {
2519
3142
  if (rawEvent.data === "[DONE]") break;
2520
3143
  if (!rawEvent.data) continue;
2521
3144
  const chunk = JSON.parse(rawEvent.data);
3145
+ if (chunk.usage) usage = normalizeOpenAIUsage(chunk.usage);
2522
3146
  const events$1 = translateChunkToAnthropicEvents(chunk, streamState);
2523
3147
  for (const event of events$1) {
2524
3148
  const eventData = JSON.stringify(event);
@@ -2529,11 +3153,18 @@ const handleWithChatCompletions = async (c, anthropicPayload, options) => {
2529
3153
  });
2530
3154
  }
2531
3155
  }
3156
+ recordUsage(usage);
2532
3157
  });
2533
3158
  };
2534
3159
  const handleWithResponsesApi = async (c, anthropicPayload, options) => {
2535
- const { logger: logger$7, subagentMarker, selectedModel, requestId, sessionId, compactType } = options;
3160
+ const { logger: logger$7, selectedModel,...requestOptions } = options;
2536
3161
  const responsesPayload = translateAnthropicMessagesToResponsesPayload(anthropicPayload);
3162
+ const recordUsage = createCopilotUsageRecorder({
3163
+ endpoint: "responses",
3164
+ fallbackSessionId: requestOptions.sessionId,
3165
+ model: responsesPayload.model,
3166
+ payload: anthropicPayload
3167
+ });
2537
3168
  applyResponsesApiContextManagement(responsesPayload, selectedModel?.capabilities.limits.max_prompt_tokens);
2538
3169
  compactInputByLatestCompaction(responsesPayload);
2539
3170
  debugJson(logger$7, "Translated Responses payload:", responsesPayload);
@@ -2541,15 +3172,13 @@ const handleWithResponsesApi = async (c, anthropicPayload, options) => {
2541
3172
  const response = await createResponses(responsesPayload, {
2542
3173
  vision,
2543
3174
  initiator,
2544
- subagentMarker,
2545
- requestId,
2546
- sessionId,
2547
- compactType
3175
+ ...requestOptions
2548
3176
  });
2549
3177
  if (responsesPayload.stream && isAsyncIterable$1(response)) {
2550
3178
  logger$7.debug("Streaming response from Copilot (Responses API)");
2551
3179
  return streamSSE(c, async (stream) => {
2552
3180
  const streamState = createResponsesStreamState();
3181
+ let usage = {};
2553
3182
  for await (const chunk of response) {
2554
3183
  if (chunk.event === "ping") {
2555
3184
  await stream.writeSSE({
@@ -2561,7 +3190,9 @@ const handleWithResponsesApi = async (c, anthropicPayload, options) => {
2561
3190
  const data = chunk.data;
2562
3191
  if (!data) continue;
2563
3192
  debugLazy(logger$7, () => ["Responses raw stream event:", data]);
2564
- const events$1 = translateResponsesStreamEvent(JSON.parse(data), streamState);
3193
+ const responseEvent = JSON.parse(data);
3194
+ if (responseEvent.type === "response.completed" || responseEvent.type === "response.failed" || responseEvent.type === "response.incomplete") usage = normalizeResponsesUsage(responseEvent.response.usage);
3195
+ const events$1 = translateResponsesStreamEvent(responseEvent, streamState);
2565
3196
  for (const event of events$1) {
2566
3197
  const eventData = JSON.stringify(event);
2567
3198
  debugLazy(logger$7, () => ["Translated Anthropic event:", eventData]);
@@ -2583,6 +3214,7 @@ const handleWithResponsesApi = async (c, anthropicPayload, options) => {
2583
3214
  data: JSON.stringify(errorEvent)
2584
3215
  });
2585
3216
  }
3217
+ recordUsage(usage);
2586
3218
  });
2587
3219
  }
2588
3220
  debugJsonTail(logger$7, "Non-streaming Responses result:", {
@@ -2590,12 +3222,19 @@ const handleWithResponsesApi = async (c, anthropicPayload, options) => {
2590
3222
  tailLength: 400
2591
3223
  });
2592
3224
  const anthropicResponse = translateResponsesResultToAnthropic(response);
3225
+ recordUsage(normalizeResponsesUsage(response.usage));
2593
3226
  debugJson(logger$7, "Translated Anthropic response:", anthropicResponse);
2594
3227
  return c.json(anthropicResponse);
2595
3228
  };
2596
3229
  const handleWithMessagesApi = async (c, anthropicPayload, options) => {
2597
3230
  const { logger: logger$7, anthropicBetaHeader, subagentMarker, selectedModel, requestId, sessionId, compactType } = options;
2598
3231
  prepareMessagesApiPayload(anthropicPayload, selectedModel);
3232
+ const recordUsage = createCopilotUsageRecorder({
3233
+ endpoint: "messages",
3234
+ fallbackSessionId: sessionId,
3235
+ model: anthropicPayload.model,
3236
+ payload: anthropicPayload
3237
+ });
2599
3238
  debugJson(logger$7, "Translated Messages payload:", anthropicPayload);
2600
3239
  const response = await createMessages(anthropicPayload, anthropicBetaHeader, {
2601
3240
  subagentMarker,
@@ -2606,27 +3245,47 @@ const handleWithMessagesApi = async (c, anthropicPayload, options) => {
2606
3245
  if (isAsyncIterable$1(response)) {
2607
3246
  logger$7.debug("Streaming response from Copilot (Messages API)");
2608
3247
  return streamSSE(c, async (stream) => {
3248
+ let usage = {};
2609
3249
  for await (const event of response) {
2610
3250
  const eventName = event.event;
2611
3251
  const data = event.data ?? "";
2612
3252
  if (data === "[DONE]") break;
2613
3253
  if (!data) continue;
2614
3254
  debugLazy(logger$7, () => ["Messages raw stream event:", data]);
3255
+ const parsedEvent = parseAnthropicStreamEvent(data);
3256
+ if (parsedEvent?.type === "message_start") usage = mergeAnthropicUsage(usage, normalizeAnthropicUsage(parsedEvent.message.usage));
3257
+ else if (parsedEvent?.type === "message_delta") usage = mergeAnthropicUsage(usage, normalizeAnthropicUsage(parsedEvent.usage));
2615
3258
  await stream.writeSSE({
2616
3259
  event: eventName,
2617
3260
  data
2618
3261
  });
2619
3262
  }
3263
+ recordUsage(usage);
2620
3264
  });
2621
3265
  }
2622
3266
  debugJsonTail(logger$7, "Non-streaming Messages result:", {
2623
3267
  value: response,
2624
3268
  tailLength: 400
2625
3269
  });
3270
+ recordUsage(normalizeAnthropicUsage(response.usage));
2626
3271
  return c.json(response);
2627
3272
  };
2628
3273
  const isNonStreaming = (response) => Object.hasOwn(response, "choices");
2629
3274
  const isAsyncIterable$1 = (value) => Boolean(value) && typeof value[Symbol.asyncIterator] === "function";
3275
+ const createCopilotUsageRecorder = (options) => createCopilotTokenUsageRecorder({
3276
+ endpoint: options.endpoint,
3277
+ fallbackSessionId: options.fallbackSessionId,
3278
+ model: options.model,
3279
+ sessionId: getMetadataSessionId(options.payload)
3280
+ });
3281
+ const getMetadataSessionId = (payload) => parseUserIdMetadata(payload.metadata?.user_id).sessionId;
3282
+ const parseAnthropicStreamEvent = (data) => {
3283
+ try {
3284
+ return JSON.parse(data);
3285
+ } catch {
3286
+ return null;
3287
+ }
3288
+ };
2630
3289
 
2631
3290
  //#endregion
2632
3291
  //#region src/lib/subagent.ts
@@ -2916,44 +3575,20 @@ async function handleProviderMessages(c) {
2916
3575
  throw new HTTPError("Failed to create responses", upstreamResponse);
2917
3576
  }
2918
3577
  const contentType = upstreamResponse.headers.get("content-type") ?? "";
2919
- if (Boolean(payload.stream) && contentType.includes("text/event-stream")) {
2920
- logger$3.debug("provider.messages.streaming");
2921
- return streamSSE(c, async (stream) => {
2922
- for await (const chunk of events(upstreamResponse)) {
2923
- logger$3.debug("provider.messages.raw_stream_event:", chunk.data);
2924
- const eventName = chunk.event;
2925
- if (eventName === "ping") {
2926
- await stream.writeSSE({
2927
- event: "ping",
2928
- data: "{\"type\":\"ping\"}"
2929
- });
2930
- continue;
2931
- }
2932
- let data = chunk.data;
2933
- if (!data) continue;
2934
- if (chunk.data === "[DONE]") break;
2935
- try {
2936
- const parsed = JSON.parse(data);
2937
- if (parsed.type === "message_start") adjustInputTokens(providerConfig, parsed.message.usage);
2938
- else if (parsed.type === "message_delta") adjustInputTokens(providerConfig, parsed.usage);
2939
- data = JSON.stringify(parsed);
2940
- } catch (error) {
2941
- logger$3.error("provider.messages.streaming.adjust_tokens_error", {
2942
- error,
2943
- originalData: data
2944
- });
2945
- }
2946
- await stream.writeSSE({
2947
- event: eventName,
2948
- data
2949
- });
2950
- }
2951
- });
2952
- }
3578
+ if (Boolean(payload.stream) && contentType.includes("text/event-stream")) return streamProviderMessages({
3579
+ c,
3580
+ payload,
3581
+ provider,
3582
+ providerConfig,
3583
+ upstreamResponse
3584
+ });
2953
3585
  const jsonBody = await upstreamResponse.json();
2954
- adjustInputTokens(providerConfig, jsonBody.usage);
2955
- debugJson(logger$3, "provider.messages.no_stream result:", jsonBody);
2956
- return c.json(jsonBody);
3586
+ return respondProviderMessagesJson(c, {
3587
+ body: jsonBody,
3588
+ payload,
3589
+ provider,
3590
+ providerConfig
3591
+ });
2957
3592
  } catch (error) {
2958
3593
  logger$3.error("provider.messages.error", {
2959
3594
  provider,
@@ -2962,6 +3597,81 @@ async function handleProviderMessages(c) {
2962
3597
  throw error;
2963
3598
  }
2964
3599
  }
3600
+ const streamProviderMessages = ({ c, payload, provider, providerConfig, upstreamResponse }) => {
3601
+ logger$3.debug("provider.messages.streaming");
3602
+ const recordUsage = createProviderMessagesUsageRecorder(payload, provider);
3603
+ return streamSSE(c, async (stream) => {
3604
+ let usage = {};
3605
+ for await (const chunk of events(upstreamResponse)) {
3606
+ logger$3.debug("provider.messages.raw_stream_event:", chunk.data);
3607
+ const eventName = chunk.event;
3608
+ if (eventName === "ping") {
3609
+ await stream.writeSSE({
3610
+ event: "ping",
3611
+ data: "{\"type\":\"ping\"}"
3612
+ });
3613
+ continue;
3614
+ }
3615
+ let data = chunk.data;
3616
+ if (!data) continue;
3617
+ if (chunk.data === "[DONE]") break;
3618
+ const parsed = parseProviderStreamEvent(data, providerConfig);
3619
+ if (parsed) {
3620
+ usage = mergeAnthropicUsage(usage, parsed.usage);
3621
+ data = parsed.data;
3622
+ }
3623
+ await stream.writeSSE({
3624
+ event: eventName,
3625
+ data
3626
+ });
3627
+ }
3628
+ recordUsage(usage);
3629
+ });
3630
+ };
3631
+ const parseProviderStreamEvent = (data, providerConfig) => {
3632
+ try {
3633
+ const parsed = JSON.parse(data);
3634
+ if (parsed.type === "message_start") {
3635
+ adjustInputTokens(providerConfig, parsed.message.usage);
3636
+ return {
3637
+ data: JSON.stringify(parsed),
3638
+ model: parsed.message.model,
3639
+ usage: normalizeAnthropicUsage(parsed.message.usage)
3640
+ };
3641
+ }
3642
+ if (parsed.type === "message_delta") {
3643
+ adjustInputTokens(providerConfig, parsed.usage);
3644
+ return {
3645
+ data: JSON.stringify(parsed),
3646
+ usage: normalizeAnthropicUsage(parsed.usage)
3647
+ };
3648
+ }
3649
+ return {
3650
+ data: JSON.stringify(parsed),
3651
+ usage: {}
3652
+ };
3653
+ } catch (error) {
3654
+ logger$3.error("provider.messages.streaming.adjust_tokens_error", {
3655
+ error,
3656
+ originalData: data
3657
+ });
3658
+ return null;
3659
+ }
3660
+ };
3661
+ const respondProviderMessagesJson = (c, options) => {
3662
+ const { body, payload, provider, providerConfig } = options;
3663
+ const recordUsage = createProviderMessagesUsageRecorder(payload, provider);
3664
+ adjustInputTokens(providerConfig, body.usage);
3665
+ recordUsage(normalizeAnthropicUsage(body.usage));
3666
+ debugJson(logger$3, "provider.messages.no_stream result:", body);
3667
+ return c.json(body);
3668
+ };
3669
+ const createProviderMessagesUsageRecorder = (payload, provider) => createProviderTokenUsageRecorder({
3670
+ endpoint: "provider_messages",
3671
+ model: payload.model,
3672
+ providerName: provider,
3673
+ sessionId: parseUserIdMetadata(payload.metadata?.user_id).sessionId
3674
+ });
2965
3675
  const adjustInputTokens = (providerConfig, usage) => {
2966
3676
  if (!providerConfig.adjustInputTokens || !usage) return;
2967
3677
  usage.input_tokens = Math.max(0, (usage.input_tokens ?? 0) - (usage.cache_read_input_tokens ?? 0) - (usage.cache_creation_input_tokens ?? 0));
@@ -3062,6 +3772,11 @@ const handleResponses = async (c) => {
3062
3772
  logger$1.debug("Generated request ID:", requestId);
3063
3773
  const sessionId = getUUID(requestId);
3064
3774
  logger$1.debug("Extracted session ID:", sessionId);
3775
+ const recordUsage = createCopilotTokenUsageRecorder({
3776
+ endpoint: "responses",
3777
+ fallbackSessionId: sessionId,
3778
+ model: payload.model
3779
+ });
3065
3780
  useFunctionApplyPatch(payload);
3066
3781
  removeUnsupportedTools(payload);
3067
3782
  if (!isResponsesApiWebSearchEnabled()) removeWebSearchTool(payload);
@@ -3085,8 +3800,11 @@ const handleResponses = async (c) => {
3085
3800
  logger$1.debug("Forwarding native Responses stream");
3086
3801
  return streamSSE(c, async (stream) => {
3087
3802
  const idTracker = createStreamIdTracker();
3803
+ let usage = {};
3088
3804
  for await (const chunk of response) {
3089
3805
  debugJson(logger$1, "Responses stream chunk:", chunk);
3806
+ const parsedEvent = parseResponsesStreamEvent(chunk);
3807
+ if (parsedEvent?.type === "response.completed" || parsedEvent?.type === "response.failed" || parsedEvent?.type === "response.incomplete") usage = normalizeResponsesUsage(parsedEvent.response.usage);
3090
3808
  const processedData = fixStreamIds(chunk.data ?? "", chunk.event, idTracker);
3091
3809
  await stream.writeSSE({
3092
3810
  id: chunk.id,
@@ -3094,16 +3812,27 @@ const handleResponses = async (c) => {
3094
3812
  data: processedData
3095
3813
  });
3096
3814
  }
3815
+ recordUsage(usage);
3097
3816
  });
3098
3817
  }
3099
3818
  debugJsonTail(logger$1, "Forwarding native Responses result:", {
3100
3819
  value: response,
3101
3820
  tailLength: 400
3102
3821
  });
3822
+ recordUsage(normalizeResponsesUsage(response.usage));
3103
3823
  return c.json(response);
3104
3824
  };
3105
3825
  const isAsyncIterable = (value) => Boolean(value) && typeof value[Symbol.asyncIterator] === "function";
3106
3826
  const isStreamingRequested = (payload) => Boolean(payload.stream);
3827
+ const parseResponsesStreamEvent = (chunk) => {
3828
+ const data = chunk.data;
3829
+ if (!data || data === "[DONE]") return null;
3830
+ try {
3831
+ return JSON.parse(data);
3832
+ } catch {
3833
+ return null;
3834
+ }
3835
+ };
3107
3836
  const useFunctionApplyPatch = (payload) => {
3108
3837
  if (getConfig().useFunctionApplyPatch ?? true) {
3109
3838
  logger$1.debug("Using function tool apply_patch for responses");
@@ -3161,6 +3890,39 @@ responsesRoutes.post("/", async (c) => {
3161
3890
  }
3162
3891
  });
3163
3892
 
3893
+ //#endregion
3894
+ //#region src/routes/token-usage/route.ts
3895
+ const tokenUsageRoute = new Hono();
3896
+ const periods = new Set([
3897
+ "day",
3898
+ "week",
3899
+ "month"
3900
+ ]);
3901
+ const DEFAULT_EVENTS_PAGE_SIZE = 20;
3902
+ function parsePeriod(value) {
3903
+ return periods.has(value) ? value : "day";
3904
+ }
3905
+ function parsePositiveInt(value, fallback) {
3906
+ const parsed = Number.parseInt(value ?? "", 10);
3907
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
3908
+ }
3909
+ tokenUsageRoute.get("/", async (c) => {
3910
+ const period = parsePeriod(c.req.query("period"));
3911
+ const summary = await getTokenUsageSummary(period);
3912
+ return c.json(summary);
3913
+ });
3914
+ tokenUsageRoute.get("/events", async (c) => {
3915
+ const period = parsePeriod(c.req.query("period"));
3916
+ const page = parsePositiveInt(c.req.query("page"), 1);
3917
+ const pageSize = parsePositiveInt(c.req.query("page_size"), DEFAULT_EVENTS_PAGE_SIZE);
3918
+ const eventsPage = await getTokenUsageEventsPage({
3919
+ page,
3920
+ pageSize,
3921
+ period
3922
+ });
3923
+ return c.json(eventsPage);
3924
+ });
3925
+
3164
3926
  //#endregion
3165
3927
  //#region src/routes/token/route.ts
3166
3928
  const tokenRoute = new Hono();
@@ -3210,6 +3972,7 @@ server.route("/chat/completions", completionRoutes);
3210
3972
  server.route("/models", modelRoutes);
3211
3973
  server.route("/embeddings", embeddingRoutes);
3212
3974
  server.route("/usage", usageRoute);
3975
+ server.route("/token-usage", tokenUsageRoute);
3213
3976
  server.route("/token", tokenRoute);
3214
3977
  server.route("/responses", responsesRoutes);
3215
3978
  server.route("/v1/chat/completions", completionRoutes);
@@ -3222,4 +3985,4 @@ server.route("/:provider/v1/models", providerModelRoutes);
3222
3985
 
3223
3986
  //#endregion
3224
3987
  export { server };
3225
- //# sourceMappingURL=server-CA39Gy7x.js.map
3988
+ //# sourceMappingURL=server-D1nq9oGf.js.map