@posthog/ai 6.0.1 → 6.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -370,6 +370,204 @@ const sendEventToPosthog = async ({
370
370
  }
371
371
  };
372
372
 
373
+ // Type guards for safer type checking
374
+ const isString = value => {
375
+ return typeof value === 'string';
376
+ };
377
+ const isObject = value => {
378
+ return value !== null && typeof value === 'object' && !Array.isArray(value);
379
+ };
380
+
381
+ const REDACTED_IMAGE_PLACEHOLDER = '[base64 image redacted]';
382
+ // ============================================
383
+ // Base64 Detection Helpers
384
+ // ============================================
385
+ const isBase64DataUrl = str => {
386
+ return /^data:([^;]+);base64,/.test(str);
387
+ };
388
+ const isValidUrl = str => {
389
+ try {
390
+ new URL(str);
391
+ return true;
392
+ } catch {
393
+ // Not an absolute URL, check if it's a relative URL or path
394
+ return str.startsWith('/') || str.startsWith('./') || str.startsWith('../');
395
+ }
396
+ };
397
+ const isRawBase64 = str => {
398
+ // Skip if it's a valid URL or path
399
+ if (isValidUrl(str)) {
400
+ return false;
401
+ }
402
+ // Check if it's a valid base64 string
403
+ // Base64 images are typically at least a few hundred chars, but we'll be conservative
404
+ return str.length > 20 && /^[A-Za-z0-9+/]+=*$/.test(str);
405
+ };
406
+ function redactBase64DataUrl(str) {
407
+ if (!isString(str)) return str;
408
+ // Check for data URL format
409
+ if (isBase64DataUrl(str)) {
410
+ return REDACTED_IMAGE_PLACEHOLDER;
411
+ }
412
+ // Check for raw base64 (Vercel sends raw base64 for inline images)
413
+ if (isRawBase64(str)) {
414
+ return REDACTED_IMAGE_PLACEHOLDER;
415
+ }
416
+ return str;
417
+ }
418
+ const processMessages = (messages, transformContent) => {
419
+ if (!messages) return messages;
420
+ const processContent = content => {
421
+ if (typeof content === 'string') return content;
422
+ if (!content) return content;
423
+ if (Array.isArray(content)) {
424
+ return content.map(transformContent);
425
+ }
426
+ // Handle single object content
427
+ return transformContent(content);
428
+ };
429
+ const processMessage = msg => {
430
+ if (!isObject(msg) || !('content' in msg)) return msg;
431
+ return {
432
+ ...msg,
433
+ content: processContent(msg.content)
434
+ };
435
+ };
436
+ // Handle both arrays and single messages
437
+ if (Array.isArray(messages)) {
438
+ return messages.map(processMessage);
439
+ }
440
+ return processMessage(messages);
441
+ };
442
+ // ============================================
443
+ // Provider-Specific Image Sanitizers
444
+ // ============================================
445
+ const sanitizeOpenAIImage = item => {
446
+ if (!isObject(item)) return item;
447
+ // Handle image_url format
448
+ if (item.type === 'image_url' && 'image_url' in item && isObject(item.image_url) && 'url' in item.image_url) {
449
+ return {
450
+ ...item,
451
+ image_url: {
452
+ ...item.image_url,
453
+ url: redactBase64DataUrl(item.image_url.url)
454
+ }
455
+ };
456
+ }
457
+ return item;
458
+ };
459
+ const sanitizeOpenAIResponseImage = item => {
460
+ if (!isObject(item)) return item;
461
+ // Handle input_image format
462
+ if (item.type === 'input_image' && 'image_url' in item) {
463
+ return {
464
+ ...item,
465
+ image_url: redactBase64DataUrl(item.image_url)
466
+ };
467
+ }
468
+ return item;
469
+ };
470
+ const sanitizeAnthropicImage = item => {
471
+ if (!isObject(item)) return item;
472
+ // Handle Anthropic's image format
473
+ if (item.type === 'image' && 'source' in item && isObject(item.source) && item.source.type === 'base64' && 'data' in item.source) {
474
+ return {
475
+ ...item,
476
+ source: {
477
+ ...item.source,
478
+ data: REDACTED_IMAGE_PLACEHOLDER
479
+ }
480
+ };
481
+ }
482
+ return item;
483
+ };
484
+ const sanitizeGeminiPart = part => {
485
+ if (!isObject(part)) return part;
486
+ // Handle Gemini's inline data format
487
+ if ('inlineData' in part && isObject(part.inlineData) && 'data' in part.inlineData) {
488
+ return {
489
+ ...part,
490
+ inlineData: {
491
+ ...part.inlineData,
492
+ data: REDACTED_IMAGE_PLACEHOLDER
493
+ }
494
+ };
495
+ }
496
+ return part;
497
+ };
498
+ const processGeminiItem = item => {
499
+ if (!isObject(item)) return item;
500
+ // If it has parts, process them
501
+ if ('parts' in item && item.parts) {
502
+ const parts = Array.isArray(item.parts) ? item.parts.map(sanitizeGeminiPart) : sanitizeGeminiPart(item.parts);
503
+ return {
504
+ ...item,
505
+ parts
506
+ };
507
+ }
508
+ return item;
509
+ };
510
+ const sanitizeLangChainImage = item => {
511
+ if (!isObject(item)) return item;
512
+ // OpenAI style
513
+ if (item.type === 'image_url' && 'image_url' in item && isObject(item.image_url) && 'url' in item.image_url) {
514
+ return {
515
+ ...item,
516
+ image_url: {
517
+ ...item.image_url,
518
+ url: redactBase64DataUrl(item.image_url.url)
519
+ }
520
+ };
521
+ }
522
+ // Direct image with data field
523
+ if (item.type === 'image' && 'data' in item) {
524
+ return {
525
+ ...item,
526
+ data: redactBase64DataUrl(item.data)
527
+ };
528
+ }
529
+ // Anthropic style
530
+ if (item.type === 'image' && 'source' in item && isObject(item.source) && 'data' in item.source) {
531
+ return {
532
+ ...item,
533
+ source: {
534
+ ...item.source,
535
+ data: redactBase64DataUrl(item.source.data)
536
+ }
537
+ };
538
+ }
539
+ // Google style
540
+ if (item.type === 'media' && 'data' in item) {
541
+ return {
542
+ ...item,
543
+ data: redactBase64DataUrl(item.data)
544
+ };
545
+ }
546
+ return item;
547
+ };
548
+ // Export individual sanitizers for tree-shaking
549
+ const sanitizeOpenAI = data => {
550
+ return processMessages(data, sanitizeOpenAIImage);
551
+ };
552
+ const sanitizeOpenAIResponse = data => {
553
+ return processMessages(data, sanitizeOpenAIResponseImage);
554
+ };
555
+ const sanitizeAnthropic = data => {
556
+ return processMessages(data, sanitizeAnthropicImage);
557
+ };
558
+ const sanitizeGemini = data => {
559
+ // Gemini has a different structure with 'parts' directly on items instead of 'content'
560
+ // So we need custom processing instead of using processMessages
561
+ if (!data) return data;
562
+ if (Array.isArray(data)) {
563
+ return data.map(processGeminiItem);
564
+ }
565
+ return processGeminiItem(data);
566
+ };
567
+ const sanitizeLangChain = data => {
568
+ return processMessages(data, sanitizeLangChainImage);
569
+ };
570
+
373
571
  const Chat = openai.OpenAI.Chat;
374
572
  const Completions = Chat.Completions;
375
573
  const Responses = openai.OpenAI.Responses;
@@ -442,7 +640,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
442
640
  traceId,
443
641
  model: openAIParams.model,
444
642
  provider: 'openai',
445
- input: openAIParams.messages,
643
+ input: sanitizeOpenAI(openAIParams.messages),
446
644
  output: [{
447
645
  content: accumulatedContent,
448
646
  role: 'assistant'
@@ -462,7 +660,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
462
660
  traceId,
463
661
  model: openAIParams.model,
464
662
  provider: 'openai',
465
- input: openAIParams.messages,
663
+ input: sanitizeOpenAI(openAIParams.messages),
466
664
  output: [],
467
665
  latency: 0,
468
666
  baseURL: this.baseURL ?? '',
@@ -494,7 +692,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
494
692
  traceId,
495
693
  model: openAIParams.model,
496
694
  provider: 'openai',
497
- input: openAIParams.messages,
695
+ input: sanitizeOpenAI(openAIParams.messages),
498
696
  output: formatResponseOpenAI(result),
499
697
  latency,
500
698
  baseURL: this.baseURL ?? '',
@@ -518,7 +716,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
518
716
  traceId,
519
717
  model: openAIParams.model,
520
718
  provider: 'openai',
521
- input: openAIParams.messages,
719
+ input: sanitizeOpenAI(openAIParams.messages),
522
720
  output: [],
523
721
  latency: 0,
524
722
  baseURL: this.baseURL ?? '',
@@ -591,7 +789,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
591
789
  //@ts-expect-error
592
790
  model: openAIParams.model,
593
791
  provider: 'openai',
594
- input: openAIParams.input,
792
+ input: sanitizeOpenAIResponse(openAIParams.input),
595
793
  output: finalContent,
596
794
  latency,
597
795
  baseURL: this.baseURL ?? '',
@@ -609,7 +807,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
609
807
  //@ts-expect-error
610
808
  model: openAIParams.model,
611
809
  provider: 'openai',
612
- input: openAIParams.input,
810
+ input: sanitizeOpenAIResponse(openAIParams.input),
613
811
  output: [],
614
812
  latency: 0,
615
813
  baseURL: this.baseURL ?? '',
@@ -641,7 +839,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
641
839
  //@ts-expect-error
642
840
  model: openAIParams.model,
643
841
  provider: 'openai',
644
- input: openAIParams.input,
842
+ input: sanitizeOpenAIResponse(openAIParams.input),
645
843
  output: formatResponseOpenAI({
646
844
  output: result.output
647
845
  }),
@@ -668,7 +866,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
668
866
  //@ts-expect-error
669
867
  model: openAIParams.model,
670
868
  provider: 'openai',
671
- input: openAIParams.input,
869
+ input: sanitizeOpenAIResponse(openAIParams.input),
672
870
  output: [],
673
871
  latency: 0,
674
872
  baseURL: this.baseURL ?? '',
@@ -716,7 +914,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
716
914
  //@ts-expect-error
717
915
  model: openAIParams.model,
718
916
  provider: 'openai',
719
- input: openAIParams.input,
917
+ input: sanitizeOpenAIResponse(openAIParams.input),
720
918
  output: result.output,
721
919
  latency,
722
920
  baseURL: this.baseURL ?? '',
@@ -739,7 +937,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
739
937
  //@ts-expect-error
740
938
  model: openAIParams.model,
741
939
  provider: 'openai',
742
- input: openAIParams.input,
940
+ input: sanitizeOpenAIResponse(openAIParams.input),
743
941
  output: [],
744
942
  latency: 0,
745
943
  baseURL: this.baseURL ?? '',
@@ -1163,9 +1361,20 @@ const mapVercelPrompt = messages => {
1163
1361
  text: truncate(c.text)
1164
1362
  };
1165
1363
  } else if (c.type === 'file') {
1364
+ // For file type, check if it's a data URL and redact if needed
1365
+ let fileData;
1366
+ const contentData = c.data;
1367
+ if (contentData instanceof URL) {
1368
+ fileData = contentData.toString();
1369
+ } else if (isString(contentData)) {
1370
+ // Redact base64 data URLs and raw base64 to prevent oversized events
1371
+ fileData = redactBase64DataUrl(contentData);
1372
+ } else {
1373
+ fileData = 'raw files not supported';
1374
+ }
1166
1375
  return {
1167
1376
  type: 'file',
1168
- file: c.data instanceof URL ? c.data.toString() : 'raw files not supported',
1377
+ file: fileData,
1169
1378
  mediaType: c.mediaType
1170
1379
  };
1171
1380
  } else if (c.type === 'reasoning') {
@@ -1264,11 +1473,10 @@ const mapVercelOutput = result => {
1264
1473
  if (item.data instanceof URL) {
1265
1474
  fileData = item.data.toString();
1266
1475
  } else if (typeof item.data === 'string') {
1267
- // Check if it's base64 data and potentially large
1268
- if (item.data.startsWith('data:') || item.data.length > 1000) {
1476
+ fileData = redactBase64DataUrl(item.data);
1477
+ // If not redacted and still large, replace with size indicator
1478
+ if (fileData === item.data && item.data.length > 1000) {
1269
1479
  fileData = `[${item.mediaType} file - ${item.data.length} bytes]`;
1270
- } else {
1271
- fileData = item.data;
1272
1480
  }
1273
1481
  } else {
1274
1482
  fileData = `[binary ${item.mediaType} file]`;
@@ -1339,17 +1547,17 @@ const createInstrumentationMiddleware = (phClient, model, options) => {
1339
1547
  const latency = (Date.now() - startTime) / 1000;
1340
1548
  const providerMetadata = result.providerMetadata;
1341
1549
  const additionalTokenValues = {
1342
- ...(providerMetadata?.openai?.reasoningTokens ? {
1343
- reasoningTokens: providerMetadata.openai.reasoningTokens
1344
- } : {}),
1345
- ...(providerMetadata?.openai?.cachedPromptTokens ? {
1346
- cacheReadInputTokens: providerMetadata.openai.cachedPromptTokens
1347
- } : {}),
1348
1550
  ...(providerMetadata?.anthropic ? {
1349
- cacheReadInputTokens: providerMetadata.anthropic.cacheReadInputTokens,
1350
1551
  cacheCreationInputTokens: providerMetadata.anthropic.cacheCreationInputTokens
1351
1552
  } : {})
1352
1553
  };
1554
+ const usage = {
1555
+ inputTokens: result.usage.inputTokens,
1556
+ outputTokens: result.usage.outputTokens,
1557
+ reasoningTokens: result.usage.reasoningTokens,
1558
+ cacheReadInputTokens: result.usage.cachedInputTokens,
1559
+ ...additionalTokenValues
1560
+ };
1353
1561
  await sendEventToPosthog({
1354
1562
  client: phClient,
1355
1563
  distinctId: options.posthogDistinctId,
@@ -1362,11 +1570,7 @@ const createInstrumentationMiddleware = (phClient, model, options) => {
1362
1570
  baseURL,
1363
1571
  params: mergedParams,
1364
1572
  httpStatus: 200,
1365
- usage: {
1366
- inputTokens: result.usage.inputTokens,
1367
- outputTokens: result.usage.outputTokens,
1368
- ...additionalTokenValues
1369
- },
1573
+ usage,
1370
1574
  tools: availableTools,
1371
1575
  captureImmediate: options.posthogCaptureImmediate
1372
1576
  });
@@ -1428,22 +1632,19 @@ const createInstrumentationMiddleware = (phClient, model, options) => {
1428
1632
  reasoningText += chunk.delta; // New in v5
1429
1633
  }
1430
1634
  if (chunk.type === 'finish') {
1635
+ const providerMetadata = chunk.providerMetadata;
1636
+ const additionalTokenValues = {
1637
+ ...(providerMetadata?.anthropic ? {
1638
+ cacheCreationInputTokens: providerMetadata.anthropic.cacheCreationInputTokens
1639
+ } : {})
1640
+ };
1431
1641
  usage = {
1432
1642
  inputTokens: chunk.usage?.inputTokens,
1433
- outputTokens: chunk.usage?.outputTokens
1643
+ outputTokens: chunk.usage?.outputTokens,
1644
+ reasoningTokens: chunk.usage?.reasoningTokens,
1645
+ cacheReadInputTokens: chunk.usage?.cachedInputTokens,
1646
+ ...additionalTokenValues
1434
1647
  };
1435
- if (chunk.providerMetadata?.openai?.reasoningTokens) {
1436
- usage.reasoningTokens = chunk.providerMetadata.openai.reasoningTokens;
1437
- }
1438
- if (chunk.providerMetadata?.openai?.cachedPromptTokens) {
1439
- usage.cacheReadInputTokens = chunk.providerMetadata.openai.cachedPromptTokens;
1440
- }
1441
- if (chunk.providerMetadata?.anthropic?.cacheReadInputTokens) {
1442
- usage.cacheReadInputTokens = chunk.providerMetadata.anthropic.cacheReadInputTokens;
1443
- }
1444
- if (chunk.providerMetadata?.anthropic?.cacheCreationInputTokens) {
1445
- usage.cacheCreationInputTokens = chunk.providerMetadata.anthropic.cacheCreationInputTokens;
1446
- }
1447
1648
  }
1448
1649
  controller.enqueue(chunk);
1449
1650
  },
@@ -1599,7 +1800,7 @@ class WrappedMessages extends AnthropicOriginal.Messages {
1599
1800
  traceId,
1600
1801
  model: anthropicParams.model,
1601
1802
  provider: 'anthropic',
1602
- input: mergeSystemPrompt(anthropicParams, 'anthropic'),
1803
+ input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams, 'anthropic')),
1603
1804
  output: [{
1604
1805
  content: accumulatedContent,
1605
1806
  role: 'assistant'
@@ -1620,7 +1821,7 @@ class WrappedMessages extends AnthropicOriginal.Messages {
1620
1821
  traceId,
1621
1822
  model: anthropicParams.model,
1622
1823
  provider: 'anthropic',
1623
- input: mergeSystemPrompt(anthropicParams),
1824
+ input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams)),
1624
1825
  output: [],
1625
1826
  latency: 0,
1626
1827
  baseURL: this.baseURL ?? '',
@@ -1652,7 +1853,7 @@ class WrappedMessages extends AnthropicOriginal.Messages {
1652
1853
  traceId,
1653
1854
  model: anthropicParams.model,
1654
1855
  provider: 'anthropic',
1655
- input: mergeSystemPrompt(anthropicParams),
1856
+ input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams)),
1656
1857
  output: formatResponseAnthropic(result),
1657
1858
  latency,
1658
1859
  baseURL: this.baseURL ?? '',
@@ -1676,7 +1877,7 @@ class WrappedMessages extends AnthropicOriginal.Messages {
1676
1877
  traceId,
1677
1878
  model: anthropicParams.model,
1678
1879
  provider: 'anthropic',
1679
- input: mergeSystemPrompt(anthropicParams),
1880
+ input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams)),
1680
1881
  output: [],
1681
1882
  latency: 0,
1682
1883
  baseURL: this.baseURL ?? '',
@@ -1734,7 +1935,7 @@ class WrappedModels {
1734
1935
  traceId,
1735
1936
  model: geminiParams.model,
1736
1937
  provider: 'gemini',
1737
- input: this.formatInput(geminiParams.contents),
1938
+ input: this.formatInputForPostHog(geminiParams.contents),
1738
1939
  output: formatResponseGemini(response),
1739
1940
  latency,
1740
1941
  baseURL: 'https://generativelanguage.googleapis.com',
@@ -1742,7 +1943,9 @@ class WrappedModels {
1742
1943
  httpStatus: 200,
1743
1944
  usage: {
1744
1945
  inputTokens: response.usageMetadata?.promptTokenCount ?? 0,
1745
- outputTokens: response.usageMetadata?.candidatesTokenCount ?? 0
1946
+ outputTokens: response.usageMetadata?.candidatesTokenCount ?? 0,
1947
+ reasoningTokens: response.usageMetadata?.thoughtsTokenCount ?? 0,
1948
+ cacheReadInputTokens: response.usageMetadata?.cachedContentTokenCount ?? 0
1746
1949
  },
1747
1950
  tools: availableTools,
1748
1951
  captureImmediate: posthogCaptureImmediate
@@ -1756,7 +1959,7 @@ class WrappedModels {
1756
1959
  traceId,
1757
1960
  model: geminiParams.model,
1758
1961
  provider: 'gemini',
1759
- input: this.formatInput(geminiParams.contents),
1962
+ input: this.formatInputForPostHog(geminiParams.contents),
1760
1963
  output: [],
1761
1964
  latency,
1762
1965
  baseURL: 'https://generativelanguage.googleapis.com',
@@ -1798,7 +2001,9 @@ class WrappedModels {
1798
2001
  if (chunk.usageMetadata) {
1799
2002
  usage = {
1800
2003
  inputTokens: chunk.usageMetadata.promptTokenCount ?? 0,
1801
- outputTokens: chunk.usageMetadata.candidatesTokenCount ?? 0
2004
+ outputTokens: chunk.usageMetadata.candidatesTokenCount ?? 0,
2005
+ reasoningTokens: chunk.usageMetadata.thoughtsTokenCount ?? 0,
2006
+ cacheReadInputTokens: chunk.usageMetadata.cachedContentTokenCount ?? 0
1802
2007
  };
1803
2008
  }
1804
2009
  yield chunk;
@@ -1811,7 +2016,7 @@ class WrappedModels {
1811
2016
  traceId,
1812
2017
  model: geminiParams.model,
1813
2018
  provider: 'gemini',
1814
- input: this.formatInput(geminiParams.contents),
2019
+ input: this.formatInputForPostHog(geminiParams.contents),
1815
2020
  output: [{
1816
2021
  content: accumulatedContent,
1817
2022
  role: 'assistant'
@@ -1832,7 +2037,7 @@ class WrappedModels {
1832
2037
  traceId,
1833
2038
  model: geminiParams.model,
1834
2039
  provider: 'gemini',
1835
- input: this.formatInput(geminiParams.contents),
2040
+ input: this.formatInputForPostHog(geminiParams.contents),
1836
2041
  output: [],
1837
2042
  latency,
1838
2043
  baseURL: 'https://generativelanguage.googleapis.com',
@@ -1877,6 +2082,12 @@ class WrappedModels {
1877
2082
  content: item.content
1878
2083
  };
1879
2084
  }
2085
+ if (item.parts) {
2086
+ return {
2087
+ role: item.role || 'user',
2088
+ content: item.parts.map(part => part.text ? part.text : part)
2089
+ };
2090
+ }
1880
2091
  }
1881
2092
  return {
1882
2093
  role: 'user',
@@ -1903,6 +2114,10 @@ class WrappedModels {
1903
2114
  content: String(contents)
1904
2115
  }];
1905
2116
  }
2117
+ formatInputForPostHog(contents) {
2118
+ const sanitized = sanitizeGemini(contents);
2119
+ return this.formatInput(sanitized);
2120
+ }
1906
2121
  }
1907
2122
 
1908
2123
  function getDefaultExportFromCjs (x) {
@@ -2594,7 +2809,7 @@ class LangChainCallbackHandler extends BaseCallbackHandler {
2594
2809
  }) || 'generation';
2595
2810
  const generation = {
2596
2811
  name: runNameFound,
2597
- input: messages,
2812
+ input: sanitizeLangChain(messages),
2598
2813
  startTime: Date.now()
2599
2814
  };
2600
2815
  if (extraParams) {
@@ -2857,7 +3072,8 @@ class LangChainCallbackHandler extends BaseCallbackHandler {
2857
3072
  ...message.additional_kwargs
2858
3073
  };
2859
3074
  }
2860
- return messageDict;
3075
+ // Sanitize the message content to redact base64 images
3076
+ return sanitizeLangChain(messageDict);
2861
3077
  }
2862
3078
  _parseUsageModel(usage) {
2863
3079
  const conversionList = [['promptTokens', 'input'], ['completionTokens', 'output'], ['input_tokens', 'input'], ['output_tokens', 'output'], ['prompt_token_count', 'input'], ['candidates_token_count', 'output'], ['inputTokenCount', 'input'], ['outputTokenCount', 'output'], ['input_token_count', 'input'], ['generated_token_count', 'output']];