@posthog/ai 7.3.1 → 7.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -2,10 +2,11 @@ import { OpenAI, AzureOpenAI } from 'openai';
2
2
  import { Buffer } from 'buffer';
3
3
  import * as uuid from 'uuid';
4
4
  import { v4 } from 'uuid';
5
+ import { uuidv7 } from '@posthog/core';
5
6
  import AnthropicOriginal from '@anthropic-ai/sdk';
6
7
  import { GoogleGenAI } from '@google/genai';
7
8
 
8
- var version = "7.3.1";
9
+ var version = "7.4.0";
9
10
 
10
11
  // Type guards for safer type checking
11
12
  const isString = value => {
@@ -473,6 +474,33 @@ function addDefaults(params) {
473
474
  traceId: params.traceId ?? v4()
474
475
  };
475
476
  }
477
+ const sendEventWithErrorToPosthog = async ({
478
+ client,
479
+ traceId,
480
+ error,
481
+ ...args
482
+ }) => {
483
+ const httpStatus = error && typeof error === 'object' && 'status' in error ? error.status ?? 500 : 500;
484
+ const properties = {
485
+ client,
486
+ traceId,
487
+ httpStatus,
488
+ error: JSON.stringify(error),
489
+ ...args
490
+ };
491
+ const enrichedError = error;
492
+ if (client.options?.enableExceptionAutocapture) {
493
+ // assign a uuid that can be used to link the trace and exception events
494
+ const exceptionId = uuidv7();
495
+ client.captureException(error, undefined, {
496
+ $ai_trace_id: traceId
497
+ }, exceptionId);
498
+ enrichedError.__posthog_previously_captured_error = true;
499
+ properties.exceptionId = exceptionId;
500
+ }
501
+ await sendEventToPosthog(properties);
502
+ return enrichedError;
503
+ };
476
504
  const sendEventToPosthog = async ({
477
505
  client,
478
506
  eventType = AIEvent.Generation,
@@ -487,8 +515,8 @@ const sendEventToPosthog = async ({
487
515
  params,
488
516
  httpStatus = 200,
489
517
  usage = {},
490
- isError = false,
491
518
  error,
519
+ exceptionId,
492
520
  tools,
493
521
  captureImmediate = false
494
522
  }) => {
@@ -500,10 +528,11 @@ const sendEventToPosthog = async ({
500
528
  const safeOutput = sanitizeValues(output);
501
529
  const safeError = sanitizeValues(error);
502
530
  let errorData = {};
503
- if (isError) {
531
+ if (error) {
504
532
  errorData = {
505
533
  $ai_is_error: true,
506
- $ai_error: safeError
534
+ $ai_error: safeError,
535
+ $exception_event_id: exceptionId
507
536
  };
508
537
  }
509
538
  let costOverrideData = {};
@@ -569,6 +598,7 @@ const sendEventToPosthog = async ({
569
598
  } else {
570
599
  client.capture(event);
571
600
  }
601
+ return Promise.resolve();
572
602
  };
573
603
  function formatOpenAIResponsesInput(input, instructions) {
574
604
  const messages = [];
@@ -976,8 +1006,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
976
1006
  tools: availableTools
977
1007
  });
978
1008
  } catch (error) {
979
- const httpStatus = error && typeof error === 'object' && 'status' in error ? error.status ?? 500 : 500;
980
- await sendEventToPosthog({
1009
+ const enrichedError = await sendEventWithErrorToPosthog({
981
1010
  client: this.phClient,
982
1011
  ...posthogParams,
983
1012
  model: openAIParams.model,
@@ -987,14 +1016,13 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
987
1016
  latency: 0,
988
1017
  baseURL: this.baseURL,
989
1018
  params: body,
990
- httpStatus,
991
1019
  usage: {
992
1020
  inputTokens: 0,
993
1021
  outputTokens: 0
994
1022
  },
995
- isError: true,
996
- error: JSON.stringify(error)
1023
+ error
997
1024
  });
1025
+ throw enrichedError;
998
1026
  }
999
1027
  })();
1000
1028
  // Return the other stream to the user
@@ -1047,7 +1075,6 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
1047
1075
  inputTokens: 0,
1048
1076
  outputTokens: 0
1049
1077
  },
1050
- isError: true,
1051
1078
  error: JSON.stringify(error)
1052
1079
  });
1053
1080
  throw error;
@@ -1130,8 +1157,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
1130
1157
  tools: availableTools
1131
1158
  });
1132
1159
  } catch (error) {
1133
- const httpStatus = error && typeof error === 'object' && 'status' in error ? error.status ?? 500 : 500;
1134
- await sendEventToPosthog({
1160
+ const enrichedError = await sendEventWithErrorToPosthog({
1135
1161
  client: this.phClient,
1136
1162
  ...posthogParams,
1137
1163
  model: openAIParams.model,
@@ -1141,14 +1167,13 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
1141
1167
  latency: 0,
1142
1168
  baseURL: this.baseURL,
1143
1169
  params: body,
1144
- httpStatus,
1145
1170
  usage: {
1146
1171
  inputTokens: 0,
1147
1172
  outputTokens: 0
1148
1173
  },
1149
- isError: true,
1150
- error: JSON.stringify(error)
1174
+ error: error
1151
1175
  });
1176
+ throw enrichedError;
1152
1177
  }
1153
1178
  })();
1154
1179
  return stream2;
@@ -1202,7 +1227,6 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
1202
1227
  inputTokens: 0,
1203
1228
  outputTokens: 0
1204
1229
  },
1205
- isError: true,
1206
1230
  error: JSON.stringify(error)
1207
1231
  });
1208
1232
  throw error;
@@ -1244,8 +1268,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
1244
1268
  });
1245
1269
  return result;
1246
1270
  }, async error => {
1247
- const httpStatus = error && typeof error === 'object' && 'status' in error ? error.status ?? 500 : 500;
1248
- await sendEventToPosthog({
1271
+ const enrichedError = await sendEventWithErrorToPosthog({
1249
1272
  client: this.phClient,
1250
1273
  ...posthogParams,
1251
1274
  model: openAIParams.model,
@@ -1255,15 +1278,13 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
1255
1278
  latency: 0,
1256
1279
  baseURL: this.baseURL,
1257
1280
  params: body,
1258
- httpStatus,
1259
1281
  usage: {
1260
1282
  inputTokens: 0,
1261
1283
  outputTokens: 0
1262
1284
  },
1263
- isError: true,
1264
1285
  error: JSON.stringify(error)
1265
1286
  });
1266
- throw error;
1287
+ throw enrichedError;
1267
1288
  });
1268
1289
  return wrappedPromise;
1269
1290
  } finally {
@@ -1323,7 +1344,6 @@ let WrappedEmbeddings$1 = class WrappedEmbeddings extends Embeddings {
1323
1344
  usage: {
1324
1345
  inputTokens: 0
1325
1346
  },
1326
- isError: true,
1327
1347
  error: JSON.stringify(error)
1328
1348
  });
1329
1349
  throw error;
@@ -1391,8 +1411,7 @@ class WrappedTranscriptions extends Transcriptions {
1391
1411
  tools: availableTools
1392
1412
  });
1393
1413
  } catch (error) {
1394
- const httpStatus = error && typeof error === 'object' && 'status' in error ? error.status ?? 500 : 500;
1395
- await sendEventToPosthog({
1414
+ const enrichedError = await sendEventWithErrorToPosthog({
1396
1415
  client: this.phClient,
1397
1416
  ...posthogParams,
1398
1417
  model: openAIParams.model,
@@ -1402,14 +1421,13 @@ class WrappedTranscriptions extends Transcriptions {
1402
1421
  latency: 0,
1403
1422
  baseURL: this.baseURL,
1404
1423
  params: body,
1405
- httpStatus,
1406
1424
  usage: {
1407
1425
  inputTokens: 0,
1408
1426
  outputTokens: 0
1409
1427
  },
1410
- isError: true,
1411
- error: JSON.stringify(error)
1428
+ error: error
1412
1429
  });
1430
+ throw enrichedError;
1413
1431
  }
1414
1432
  })();
1415
1433
  return stream2;
@@ -1439,8 +1457,7 @@ class WrappedTranscriptions extends Transcriptions {
1439
1457
  return result;
1440
1458
  }
1441
1459
  }, async error => {
1442
- const httpStatus = error && typeof error === 'object' && 'status' in error ? error.status ?? 500 : 500;
1443
- await sendEventToPosthog({
1460
+ const enrichedError = await sendEventWithErrorToPosthog({
1444
1461
  client: this.phClient,
1445
1462
  ...posthogParams,
1446
1463
  model: openAIParams.model,
@@ -1450,15 +1467,13 @@ class WrappedTranscriptions extends Transcriptions {
1450
1467
  latency: 0,
1451
1468
  baseURL: this.baseURL,
1452
1469
  params: body,
1453
- httpStatus,
1454
1470
  usage: {
1455
1471
  inputTokens: 0,
1456
1472
  outputTokens: 0
1457
1473
  },
1458
- isError: true,
1459
- error: JSON.stringify(error)
1474
+ error: error
1460
1475
  });
1461
- throw error;
1476
+ throw enrichedError;
1462
1477
  });
1463
1478
  return wrappedPromise;
1464
1479
  }
@@ -1609,8 +1624,7 @@ class WrappedCompletions extends AzureOpenAI.Chat.Completions {
1609
1624
  usage
1610
1625
  });
1611
1626
  } catch (error) {
1612
- const httpStatus = error && typeof error === 'object' && 'status' in error ? error.status ?? 500 : 500;
1613
- await sendEventToPosthog({
1627
+ const enrichedError = await sendEventWithErrorToPosthog({
1614
1628
  client: this.phClient,
1615
1629
  ...posthogParams,
1616
1630
  model: openAIParams.model,
@@ -1620,14 +1634,13 @@ class WrappedCompletions extends AzureOpenAI.Chat.Completions {
1620
1634
  latency: 0,
1621
1635
  baseURL: this.baseURL,
1622
1636
  params: body,
1623
- httpStatus,
1624
1637
  usage: {
1625
1638
  inputTokens: 0,
1626
1639
  outputTokens: 0
1627
1640
  },
1628
- isError: true,
1629
- error: JSON.stringify(error)
1641
+ error: error
1630
1642
  });
1643
+ throw enrichedError;
1631
1644
  }
1632
1645
  })();
1633
1646
  // Return the other stream to the user
@@ -1676,7 +1689,6 @@ class WrappedCompletions extends AzureOpenAI.Chat.Completions {
1676
1689
  inputTokens: 0,
1677
1690
  outputTokens: 0
1678
1691
  },
1679
- isError: true,
1680
1692
  error: JSON.stringify(error)
1681
1693
  });
1682
1694
  throw error;
@@ -1745,8 +1757,7 @@ class WrappedResponses extends AzureOpenAI.Responses {
1745
1757
  usage
1746
1758
  });
1747
1759
  } catch (error) {
1748
- const httpStatus = error && typeof error === 'object' && 'status' in error ? error.status ?? 500 : 500;
1749
- await sendEventToPosthog({
1760
+ const enrichedError = await sendEventWithErrorToPosthog({
1750
1761
  client: this.phClient,
1751
1762
  ...posthogParams,
1752
1763
  model: openAIParams.model,
@@ -1756,14 +1767,13 @@ class WrappedResponses extends AzureOpenAI.Responses {
1756
1767
  latency: 0,
1757
1768
  baseURL: this.baseURL,
1758
1769
  params: body,
1759
- httpStatus,
1760
1770
  usage: {
1761
1771
  inputTokens: 0,
1762
1772
  outputTokens: 0
1763
1773
  },
1764
- isError: true,
1765
- error: JSON.stringify(error)
1774
+ error: error
1766
1775
  });
1776
+ throw enrichedError;
1767
1777
  }
1768
1778
  })();
1769
1779
  return stream2;
@@ -1811,7 +1821,6 @@ class WrappedResponses extends AzureOpenAI.Responses {
1811
1821
  inputTokens: 0,
1812
1822
  outputTokens: 0
1813
1823
  },
1814
- isError: true,
1815
1824
  error: JSON.stringify(error)
1816
1825
  });
1817
1826
  throw error;
@@ -1863,7 +1872,6 @@ class WrappedResponses extends AzureOpenAI.Responses {
1863
1872
  inputTokens: 0,
1864
1873
  outputTokens: 0
1865
1874
  },
1866
- isError: true,
1867
1875
  error: JSON.stringify(error)
1868
1876
  });
1869
1877
  throw error;
@@ -1921,7 +1929,6 @@ class WrappedEmbeddings extends AzureOpenAI.Embeddings {
1921
1929
  usage: {
1922
1930
  inputTokens: 0
1923
1931
  },
1924
- isError: true,
1925
1932
  error: JSON.stringify(error)
1926
1933
  });
1927
1934
  throw error;
@@ -2249,7 +2256,7 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
2249
2256
  return result;
2250
2257
  } catch (error) {
2251
2258
  const modelId = model.modelId;
2252
- await sendEventToPosthog({
2259
+ const enrichedError = await sendEventWithErrorToPosthog({
2253
2260
  client: phClient,
2254
2261
  distinctId: mergedOptions.posthogDistinctId,
2255
2262
  traceId: mergedOptions.posthogTraceId ?? v4(),
@@ -2260,17 +2267,15 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
2260
2267
  latency: 0,
2261
2268
  baseURL: '',
2262
2269
  params: mergedParams,
2263
- httpStatus: error?.status ? error.status : 500,
2264
2270
  usage: {
2265
2271
  inputTokens: 0,
2266
2272
  outputTokens: 0
2267
2273
  },
2268
- isError: true,
2269
- error: truncate(JSON.stringify(error)),
2274
+ error: error,
2270
2275
  tools: availableTools,
2271
2276
  captureImmediate: mergedOptions.posthogCaptureImmediate
2272
2277
  });
2273
- throw error;
2278
+ throw enrichedError;
2274
2279
  }
2275
2280
  },
2276
2281
  doStream: async params => {
@@ -2407,7 +2412,7 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
2407
2412
  ...rest
2408
2413
  };
2409
2414
  } catch (error) {
2410
- await sendEventToPosthog({
2415
+ const enrichedError = await sendEventWithErrorToPosthog({
2411
2416
  client: phClient,
2412
2417
  distinctId: mergedOptions.posthogDistinctId,
2413
2418
  traceId: mergedOptions.posthogTraceId ?? v4(),
@@ -2418,17 +2423,15 @@ const wrapVercelLanguageModel = (model, phClient, options) => {
2418
2423
  latency: 0,
2419
2424
  baseURL: '',
2420
2425
  params: mergedParams,
2421
- httpStatus: error?.status ? error.status : 500,
2422
2426
  usage: {
2423
2427
  inputTokens: 0,
2424
2428
  outputTokens: 0
2425
2429
  },
2426
- isError: true,
2427
- error: truncate(JSON.stringify(error)),
2430
+ error: error,
2428
2431
  tools: availableTools,
2429
2432
  captureImmediate: mergedOptions.posthogCaptureImmediate
2430
2433
  });
2431
- throw error;
2434
+ throw enrichedError;
2432
2435
  }
2433
2436
  }
2434
2437
  };
@@ -2585,8 +2588,7 @@ class WrappedMessages extends AnthropicOriginal.Messages {
2585
2588
  tools: availableTools
2586
2589
  });
2587
2590
  } catch (error) {
2588
- // error handling
2589
- await sendEventToPosthog({
2591
+ const enrichedError = await sendEventWithErrorToPosthog({
2590
2592
  client: this.phClient,
2591
2593
  ...posthogParams,
2592
2594
  model: anthropicParams.model,
@@ -2596,14 +2598,13 @@ class WrappedMessages extends AnthropicOriginal.Messages {
2596
2598
  latency: 0,
2597
2599
  baseURL: this.baseURL,
2598
2600
  params: body,
2599
- httpStatus: error?.status ? error.status : 500,
2600
2601
  usage: {
2601
2602
  inputTokens: 0,
2602
2603
  outputTokens: 0
2603
2604
  },
2604
- isError: true,
2605
- error: JSON.stringify(error)
2605
+ error: error
2606
2606
  });
2607
+ throw enrichedError;
2607
2608
  }
2608
2609
  })();
2609
2610
  // Return the other stream to the user
@@ -2654,7 +2655,6 @@ class WrappedMessages extends AnthropicOriginal.Messages {
2654
2655
  inputTokens: 0,
2655
2656
  outputTokens: 0
2656
2657
  },
2657
- isError: true,
2658
2658
  error: JSON.stringify(error)
2659
2659
  });
2660
2660
  throw error;
@@ -2714,7 +2714,7 @@ class WrappedModels {
2714
2714
  return response;
2715
2715
  } catch (error) {
2716
2716
  const latency = (Date.now() - startTime) / 1000;
2717
- await sendEventToPosthog({
2717
+ const enrichedError = await sendEventWithErrorToPosthog({
2718
2718
  client: this.phClient,
2719
2719
  ...posthogParams,
2720
2720
  model: geminiParams.model,
@@ -2724,15 +2724,13 @@ class WrappedModels {
2724
2724
  latency,
2725
2725
  baseURL: 'https://generativelanguage.googleapis.com',
2726
2726
  params: params,
2727
- httpStatus: error?.status ?? 500,
2728
2727
  usage: {
2729
2728
  inputTokens: 0,
2730
2729
  outputTokens: 0
2731
2730
  },
2732
- isError: true,
2733
- error: JSON.stringify(error)
2731
+ error: error
2734
2732
  });
2735
- throw error;
2733
+ throw enrichedError;
2736
2734
  }
2737
2735
  }
2738
2736
  async *generateContentStream(params) {
@@ -2834,7 +2832,7 @@ class WrappedModels {
2834
2832
  });
2835
2833
  } catch (error) {
2836
2834
  const latency = (Date.now() - startTime) / 1000;
2837
- await sendEventToPosthog({
2835
+ const enrichedError = await sendEventWithErrorToPosthog({
2838
2836
  client: this.phClient,
2839
2837
  ...posthogParams,
2840
2838
  model: geminiParams.model,
@@ -2844,15 +2842,13 @@ class WrappedModels {
2844
2842
  latency,
2845
2843
  baseURL: 'https://generativelanguage.googleapis.com',
2846
2844
  params: params,
2847
- httpStatus: error?.status ?? 500,
2848
2845
  usage: {
2849
2846
  inputTokens: 0,
2850
2847
  outputTokens: 0
2851
2848
  },
2852
- isError: true,
2853
- error: JSON.stringify(error)
2849
+ error: error
2854
2850
  });
2855
- throw error;
2851
+ throw enrichedError;
2856
2852
  }
2857
2853
  }
2858
2854
  formatPartsAsContentBlocks(parts) {
@@ -3232,6 +3228,64 @@ function mapKeys(fields, mapper, map) {
3232
3228
  return mapped;
3233
3229
  }
3234
3230
 
3231
+ //#region src/load/validation.ts
3232
+ /**
3233
+ * Sentinel key used to mark escaped user objects during serialization.
3234
+ *
3235
+ * When a plain object contains 'lc' key (which could be confused with LC objects),
3236
+ * we wrap it as `{"__lc_escaped__": {...original...}}`.
3237
+ */
3238
+ const LC_ESCAPED_KEY = "__lc_escaped__";
3239
+ /**
3240
+ * Check if an object needs escaping to prevent confusion with LC objects.
3241
+ *
3242
+ * An object needs escaping if:
3243
+ * 1. It has an `'lc'` key (could be confused with LC serialization format)
3244
+ * 2. It has only the escape key (would be mistaken for an escaped object)
3245
+ */
3246
+ function needsEscaping(obj) {
3247
+ return "lc" in obj || Object.keys(obj).length === 1 && LC_ESCAPED_KEY in obj;
3248
+ }
3249
+ /**
3250
+ * Wrap an object in the escape marker.
3251
+ *
3252
+ * @example
3253
+ * ```typescript
3254
+ * {"key": "value"} // becomes {"__lc_escaped__": {"key": "value"}}
3255
+ * ```
3256
+ */
3257
+ function escapeObject(obj) {
3258
+ return { [LC_ESCAPED_KEY]: obj };
3259
+ }
3260
+ /**
3261
+ * Check if an object looks like a Serializable instance (duck typing).
3262
+ */
3263
+ function isSerializableLike(obj) {
3264
+ return obj !== null && typeof obj === "object" && "lc_serializable" in obj && typeof obj.toJSON === "function";
3265
+ }
3266
+ /**
3267
+ * Escape a value if it needs escaping (contains `lc` key).
3268
+ *
3269
+ * This is a simpler version of `serializeValue` that doesn't handle Serializable
3270
+ * objects - it's meant to be called on kwargs values that have already been
3271
+ * processed by `toJSON()`.
3272
+ *
3273
+ * @param value - The value to potentially escape.
3274
+ * @returns The value with any `lc`-containing objects wrapped in escape markers.
3275
+ */
3276
+ function escapeIfNeeded(value) {
3277
+ if (value !== null && typeof value === "object" && !Array.isArray(value)) {
3278
+ if (isSerializableLike(value)) return value;
3279
+ const record = value;
3280
+ if (needsEscaping(record)) return escapeObject(record);
3281
+ const result = {};
3282
+ for (const [key, val] of Object.entries(record)) result[key] = escapeIfNeeded(val);
3283
+ return result;
3284
+ }
3285
+ if (Array.isArray(value)) return value.map((item) => escapeIfNeeded(item));
3286
+ return value;
3287
+ }
3288
+
3235
3289
  //#region src/load/serializable.ts
3236
3290
  var serializable_exports = {};
3237
3291
  __export(serializable_exports, {
@@ -3353,11 +3407,15 @@ var Serializable = class Serializable {
3353
3407
  }
3354
3408
  if (last in read && read[last] !== void 0) write[last] = write[last] || read[last];
3355
3409
  });
3410
+ const escapedKwargs = {};
3411
+ for (const [key, value] of Object.entries(kwargs)) escapedKwargs[key] = escapeIfNeeded(value);
3412
+ const kwargsWithSecrets = Object.keys(secrets).length ? replaceSecrets(escapedKwargs, secrets) : escapedKwargs;
3413
+ const processedKwargs = mapKeys(kwargsWithSecrets, keyToJson, aliases);
3356
3414
  return {
3357
3415
  lc: 1,
3358
3416
  type: "constructor",
3359
3417
  id: this.lc_id,
3360
- kwargs: mapKeys(Object.keys(secrets).length ? replaceSecrets(kwargs, secrets) : kwargs, keyToJson, aliases)
3418
+ kwargs: processedKwargs
3361
3419
  };
3362
3420
  }
3363
3421
  toJSONNotImplemented() {
@@ -3818,7 +3876,10 @@ class LangChainCallbackHandler extends BaseCallbackHandler {
3818
3876
  eventProperties['$ai_output_tokens'] = outputTokens;
3819
3877
  // Add additional token data to properties
3820
3878
  if (additionalTokenData.cacheReadInputTokens) {
3821
- eventProperties['$ai_cache_read_tokens'] = additionalTokenData.cacheReadInputTokens;
3879
+ eventProperties['$ai_cache_read_input_tokens'] = additionalTokenData.cacheReadInputTokens;
3880
+ }
3881
+ if (additionalTokenData.cacheWriteInputTokens) {
3882
+ eventProperties['$ai_cache_creation_input_tokens'] = additionalTokenData.cacheWriteInputTokens;
3822
3883
  }
3823
3884
  if (additionalTokenData.reasoningTokens) {
3824
3885
  eventProperties['$ai_reasoning_tokens'] = additionalTokenData.reasoningTokens;
@@ -3982,6 +4043,14 @@ class LangChainCallbackHandler extends BaseCallbackHandler {
3982
4043
  additionalTokenData.cacheReadInputTokens = usage.input_token_details.cache_read;
3983
4044
  } else if (usage.cachedPromptTokens != null) {
3984
4045
  additionalTokenData.cacheReadInputTokens = usage.cachedPromptTokens;
4046
+ } else if (usage.cache_read_input_tokens != null) {
4047
+ additionalTokenData.cacheReadInputTokens = usage.cache_read_input_tokens;
4048
+ }
4049
+ // Check for cache write/creation tokens in various formats
4050
+ if (usage.cache_creation_input_tokens != null) {
4051
+ additionalTokenData.cacheWriteInputTokens = usage.cache_creation_input_tokens;
4052
+ } else if (usage.input_token_details?.cache_creation != null) {
4053
+ additionalTokenData.cacheWriteInputTokens = usage.input_token_details.cache_creation;
3985
4054
  }
3986
4055
  // Check for reasoning tokens in various formats
3987
4056
  if (usage.completion_tokens_details?.reasoning_tokens != null) {
@@ -4027,8 +4096,10 @@ class LangChainCallbackHandler extends BaseCallbackHandler {
4027
4096
  if (webSearchCount !== undefined) {
4028
4097
  additionalTokenData.webSearchCount = webSearchCount;
4029
4098
  }
4030
- // For Anthropic providers, LangChain reports input_tokens as the sum of input and cache read tokens.
4099
+ // For Anthropic providers, LangChain reports input_tokens as the sum of all input tokens.
4031
4100
  // Our cost calculation expects them to be separate for Anthropic, so we subtract cache tokens.
4101
+ // Both cache_read and cache_write tokens should be subtracted since Anthropic's raw API
4102
+ // reports input_tokens as tokens NOT read from or used to create a cache.
4032
4103
  // For other providers (OpenAI, etc.), input_tokens already excludes cache tokens as expected.
4033
4104
  // Match logic consistent with plugin-server: exact match on provider OR substring match on model
4034
4105
  let isAnthropic = false;
@@ -4037,8 +4108,11 @@ class LangChainCallbackHandler extends BaseCallbackHandler {
4037
4108
  } else if (model && model.toLowerCase().includes('anthropic')) {
4038
4109
  isAnthropic = true;
4039
4110
  }
4040
- if (isAnthropic && parsedUsage.input && additionalTokenData.cacheReadInputTokens) {
4041
- parsedUsage.input = Math.max(parsedUsage.input - additionalTokenData.cacheReadInputTokens, 0);
4111
+ if (isAnthropic && parsedUsage.input) {
4112
+ const cacheTokens = (additionalTokenData.cacheReadInputTokens || 0) + (additionalTokenData.cacheWriteInputTokens || 0);
4113
+ if (cacheTokens > 0) {
4114
+ parsedUsage.input = Math.max(parsedUsage.input - cacheTokens, 0);
4115
+ }
4042
4116
  }
4043
4117
  return [parsedUsage.input, parsedUsage.output, additionalTokenData];
4044
4118
  }