@posthog/ai 6.1.0 → 6.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -370,6 +370,204 @@ const sendEventToPosthog = async ({
370
370
  }
371
371
  };
372
372
 
373
+ // Type guards for safer type checking
374
+ const isString = value => {
375
+ return typeof value === 'string';
376
+ };
377
+ const isObject = value => {
378
+ return value !== null && typeof value === 'object' && !Array.isArray(value);
379
+ };
380
+
381
+ const REDACTED_IMAGE_PLACEHOLDER = '[base64 image redacted]';
382
+ // ============================================
383
+ // Base64 Detection Helpers
384
+ // ============================================
385
+ const isBase64DataUrl = str => {
386
+ return /^data:([^;]+);base64,/.test(str);
387
+ };
388
+ const isValidUrl = str => {
389
+ try {
390
+ new URL(str);
391
+ return true;
392
+ } catch {
393
+ // Not an absolute URL, check if it's a relative URL or path
394
+ return str.startsWith('/') || str.startsWith('./') || str.startsWith('../');
395
+ }
396
+ };
397
+ const isRawBase64 = str => {
398
+ // Skip if it's a valid URL or path
399
+ if (isValidUrl(str)) {
400
+ return false;
401
+ }
402
+ // Check if it's a valid base64 string
403
+ // Base64 images are typically at least a few hundred chars, but we'll be conservative
404
+ return str.length > 20 && /^[A-Za-z0-9+/]+=*$/.test(str);
405
+ };
406
+ function redactBase64DataUrl(str) {
407
+ if (!isString(str)) return str;
408
+ // Check for data URL format
409
+ if (isBase64DataUrl(str)) {
410
+ return REDACTED_IMAGE_PLACEHOLDER;
411
+ }
412
+ // Check for raw base64 (Vercel sends raw base64 for inline images)
413
+ if (isRawBase64(str)) {
414
+ return REDACTED_IMAGE_PLACEHOLDER;
415
+ }
416
+ return str;
417
+ }
418
+ const processMessages = (messages, transformContent) => {
419
+ if (!messages) return messages;
420
+ const processContent = content => {
421
+ if (typeof content === 'string') return content;
422
+ if (!content) return content;
423
+ if (Array.isArray(content)) {
424
+ return content.map(transformContent);
425
+ }
426
+ // Handle single object content
427
+ return transformContent(content);
428
+ };
429
+ const processMessage = msg => {
430
+ if (!isObject(msg) || !('content' in msg)) return msg;
431
+ return {
432
+ ...msg,
433
+ content: processContent(msg.content)
434
+ };
435
+ };
436
+ // Handle both arrays and single messages
437
+ if (Array.isArray(messages)) {
438
+ return messages.map(processMessage);
439
+ }
440
+ return processMessage(messages);
441
+ };
442
+ // ============================================
443
+ // Provider-Specific Image Sanitizers
444
+ // ============================================
445
+ const sanitizeOpenAIImage = item => {
446
+ if (!isObject(item)) return item;
447
+ // Handle image_url format
448
+ if (item.type === 'image_url' && 'image_url' in item && isObject(item.image_url) && 'url' in item.image_url) {
449
+ return {
450
+ ...item,
451
+ image_url: {
452
+ ...item.image_url,
453
+ url: redactBase64DataUrl(item.image_url.url)
454
+ }
455
+ };
456
+ }
457
+ return item;
458
+ };
459
+ const sanitizeOpenAIResponseImage = item => {
460
+ if (!isObject(item)) return item;
461
+ // Handle input_image format
462
+ if (item.type === 'input_image' && 'image_url' in item) {
463
+ return {
464
+ ...item,
465
+ image_url: redactBase64DataUrl(item.image_url)
466
+ };
467
+ }
468
+ return item;
469
+ };
470
+ const sanitizeAnthropicImage = item => {
471
+ if (!isObject(item)) return item;
472
+ // Handle Anthropic's image format
473
+ if (item.type === 'image' && 'source' in item && isObject(item.source) && item.source.type === 'base64' && 'data' in item.source) {
474
+ return {
475
+ ...item,
476
+ source: {
477
+ ...item.source,
478
+ data: REDACTED_IMAGE_PLACEHOLDER
479
+ }
480
+ };
481
+ }
482
+ return item;
483
+ };
484
+ const sanitizeGeminiPart = part => {
485
+ if (!isObject(part)) return part;
486
+ // Handle Gemini's inline data format
487
+ if ('inlineData' in part && isObject(part.inlineData) && 'data' in part.inlineData) {
488
+ return {
489
+ ...part,
490
+ inlineData: {
491
+ ...part.inlineData,
492
+ data: REDACTED_IMAGE_PLACEHOLDER
493
+ }
494
+ };
495
+ }
496
+ return part;
497
+ };
498
+ const processGeminiItem = item => {
499
+ if (!isObject(item)) return item;
500
+ // If it has parts, process them
501
+ if ('parts' in item && item.parts) {
502
+ const parts = Array.isArray(item.parts) ? item.parts.map(sanitizeGeminiPart) : sanitizeGeminiPart(item.parts);
503
+ return {
504
+ ...item,
505
+ parts
506
+ };
507
+ }
508
+ return item;
509
+ };
510
+ const sanitizeLangChainImage = item => {
511
+ if (!isObject(item)) return item;
512
+ // OpenAI style
513
+ if (item.type === 'image_url' && 'image_url' in item && isObject(item.image_url) && 'url' in item.image_url) {
514
+ return {
515
+ ...item,
516
+ image_url: {
517
+ ...item.image_url,
518
+ url: redactBase64DataUrl(item.image_url.url)
519
+ }
520
+ };
521
+ }
522
+ // Direct image with data field
523
+ if (item.type === 'image' && 'data' in item) {
524
+ return {
525
+ ...item,
526
+ data: redactBase64DataUrl(item.data)
527
+ };
528
+ }
529
+ // Anthropic style
530
+ if (item.type === 'image' && 'source' in item && isObject(item.source) && 'data' in item.source) {
531
+ return {
532
+ ...item,
533
+ source: {
534
+ ...item.source,
535
+ data: redactBase64DataUrl(item.source.data)
536
+ }
537
+ };
538
+ }
539
+ // Google style
540
+ if (item.type === 'media' && 'data' in item) {
541
+ return {
542
+ ...item,
543
+ data: redactBase64DataUrl(item.data)
544
+ };
545
+ }
546
+ return item;
547
+ };
548
+ // Export individual sanitizers for tree-shaking
549
+ const sanitizeOpenAI = data => {
550
+ return processMessages(data, sanitizeOpenAIImage);
551
+ };
552
+ const sanitizeOpenAIResponse = data => {
553
+ return processMessages(data, sanitizeOpenAIResponseImage);
554
+ };
555
+ const sanitizeAnthropic = data => {
556
+ return processMessages(data, sanitizeAnthropicImage);
557
+ };
558
+ const sanitizeGemini = data => {
559
+ // Gemini has a different structure with 'parts' directly on items instead of 'content'
560
+ // So we need custom processing instead of using processMessages
561
+ if (!data) return data;
562
+ if (Array.isArray(data)) {
563
+ return data.map(processGeminiItem);
564
+ }
565
+ return processGeminiItem(data);
566
+ };
567
+ const sanitizeLangChain = data => {
568
+ return processMessages(data, sanitizeLangChainImage);
569
+ };
570
+
373
571
  const Chat = openai.OpenAI.Chat;
374
572
  const Completions = Chat.Completions;
375
573
  const Responses = openai.OpenAI.Responses;
@@ -442,7 +640,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
442
640
  traceId,
443
641
  model: openAIParams.model,
444
642
  provider: 'openai',
445
- input: openAIParams.messages,
643
+ input: sanitizeOpenAI(openAIParams.messages),
446
644
  output: [{
447
645
  content: accumulatedContent,
448
646
  role: 'assistant'
@@ -462,7 +660,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
462
660
  traceId,
463
661
  model: openAIParams.model,
464
662
  provider: 'openai',
465
- input: openAIParams.messages,
663
+ input: sanitizeOpenAI(openAIParams.messages),
466
664
  output: [],
467
665
  latency: 0,
468
666
  baseURL: this.baseURL ?? '',
@@ -494,7 +692,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
494
692
  traceId,
495
693
  model: openAIParams.model,
496
694
  provider: 'openai',
497
- input: openAIParams.messages,
695
+ input: sanitizeOpenAI(openAIParams.messages),
498
696
  output: formatResponseOpenAI(result),
499
697
  latency,
500
698
  baseURL: this.baseURL ?? '',
@@ -518,7 +716,7 @@ let WrappedCompletions$1 = class WrappedCompletions extends Completions {
518
716
  traceId,
519
717
  model: openAIParams.model,
520
718
  provider: 'openai',
521
- input: openAIParams.messages,
719
+ input: sanitizeOpenAI(openAIParams.messages),
522
720
  output: [],
523
721
  latency: 0,
524
722
  baseURL: this.baseURL ?? '',
@@ -591,7 +789,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
591
789
  //@ts-expect-error
592
790
  model: openAIParams.model,
593
791
  provider: 'openai',
594
- input: openAIParams.input,
792
+ input: sanitizeOpenAIResponse(openAIParams.input),
595
793
  output: finalContent,
596
794
  latency,
597
795
  baseURL: this.baseURL ?? '',
@@ -609,7 +807,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
609
807
  //@ts-expect-error
610
808
  model: openAIParams.model,
611
809
  provider: 'openai',
612
- input: openAIParams.input,
810
+ input: sanitizeOpenAIResponse(openAIParams.input),
613
811
  output: [],
614
812
  latency: 0,
615
813
  baseURL: this.baseURL ?? '',
@@ -641,7 +839,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
641
839
  //@ts-expect-error
642
840
  model: openAIParams.model,
643
841
  provider: 'openai',
644
- input: openAIParams.input,
842
+ input: sanitizeOpenAIResponse(openAIParams.input),
645
843
  output: formatResponseOpenAI({
646
844
  output: result.output
647
845
  }),
@@ -668,7 +866,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
668
866
  //@ts-expect-error
669
867
  model: openAIParams.model,
670
868
  provider: 'openai',
671
- input: openAIParams.input,
869
+ input: sanitizeOpenAIResponse(openAIParams.input),
672
870
  output: [],
673
871
  latency: 0,
674
872
  baseURL: this.baseURL ?? '',
@@ -716,7 +914,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
716
914
  //@ts-expect-error
717
915
  model: openAIParams.model,
718
916
  provider: 'openai',
719
- input: openAIParams.input,
917
+ input: sanitizeOpenAIResponse(openAIParams.input),
720
918
  output: result.output,
721
919
  latency,
722
920
  baseURL: this.baseURL ?? '',
@@ -739,7 +937,7 @@ let WrappedResponses$1 = class WrappedResponses extends Responses {
739
937
  //@ts-expect-error
740
938
  model: openAIParams.model,
741
939
  provider: 'openai',
742
- input: openAIParams.input,
940
+ input: sanitizeOpenAIResponse(openAIParams.input),
743
941
  output: [],
744
942
  latency: 0,
745
943
  baseURL: this.baseURL ?? '',
@@ -1163,9 +1361,20 @@ const mapVercelPrompt = messages => {
1163
1361
  text: truncate(c.text)
1164
1362
  };
1165
1363
  } else if (c.type === 'file') {
1364
+ // For file type, check if it's a data URL and redact if needed
1365
+ let fileData;
1366
+ const contentData = c.data;
1367
+ if (contentData instanceof URL) {
1368
+ fileData = contentData.toString();
1369
+ } else if (isString(contentData)) {
1370
+ // Redact base64 data URLs and raw base64 to prevent oversized events
1371
+ fileData = redactBase64DataUrl(contentData);
1372
+ } else {
1373
+ fileData = 'raw files not supported';
1374
+ }
1166
1375
  return {
1167
1376
  type: 'file',
1168
- file: c.data instanceof URL ? c.data.toString() : 'raw files not supported',
1377
+ file: fileData,
1169
1378
  mediaType: c.mediaType
1170
1379
  };
1171
1380
  } else if (c.type === 'reasoning') {
@@ -1264,11 +1473,10 @@ const mapVercelOutput = result => {
1264
1473
  if (item.data instanceof URL) {
1265
1474
  fileData = item.data.toString();
1266
1475
  } else if (typeof item.data === 'string') {
1267
- // Check if it's base64 data and potentially large
1268
- if (item.data.startsWith('data:') || item.data.length > 1000) {
1476
+ fileData = redactBase64DataUrl(item.data);
1477
+ // If not redacted and still large, replace with size indicator
1478
+ if (fileData === item.data && item.data.length > 1000) {
1269
1479
  fileData = `[${item.mediaType} file - ${item.data.length} bytes]`;
1270
- } else {
1271
- fileData = item.data;
1272
1480
  }
1273
1481
  } else {
1274
1482
  fileData = `[binary ${item.mediaType} file]`;
@@ -1592,7 +1800,7 @@ class WrappedMessages extends AnthropicOriginal.Messages {
1592
1800
  traceId,
1593
1801
  model: anthropicParams.model,
1594
1802
  provider: 'anthropic',
1595
- input: mergeSystemPrompt(anthropicParams, 'anthropic'),
1803
+ input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams, 'anthropic')),
1596
1804
  output: [{
1597
1805
  content: accumulatedContent,
1598
1806
  role: 'assistant'
@@ -1613,7 +1821,7 @@ class WrappedMessages extends AnthropicOriginal.Messages {
1613
1821
  traceId,
1614
1822
  model: anthropicParams.model,
1615
1823
  provider: 'anthropic',
1616
- input: mergeSystemPrompt(anthropicParams),
1824
+ input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams)),
1617
1825
  output: [],
1618
1826
  latency: 0,
1619
1827
  baseURL: this.baseURL ?? '',
@@ -1645,7 +1853,7 @@ class WrappedMessages extends AnthropicOriginal.Messages {
1645
1853
  traceId,
1646
1854
  model: anthropicParams.model,
1647
1855
  provider: 'anthropic',
1648
- input: mergeSystemPrompt(anthropicParams),
1856
+ input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams)),
1649
1857
  output: formatResponseAnthropic(result),
1650
1858
  latency,
1651
1859
  baseURL: this.baseURL ?? '',
@@ -1669,7 +1877,7 @@ class WrappedMessages extends AnthropicOriginal.Messages {
1669
1877
  traceId,
1670
1878
  model: anthropicParams.model,
1671
1879
  provider: 'anthropic',
1672
- input: mergeSystemPrompt(anthropicParams),
1880
+ input: sanitizeAnthropic(mergeSystemPrompt(anthropicParams)),
1673
1881
  output: [],
1674
1882
  latency: 0,
1675
1883
  baseURL: this.baseURL ?? '',
@@ -1727,7 +1935,7 @@ class WrappedModels {
1727
1935
  traceId,
1728
1936
  model: geminiParams.model,
1729
1937
  provider: 'gemini',
1730
- input: this.formatInput(geminiParams.contents),
1938
+ input: this.formatInputForPostHog(geminiParams.contents),
1731
1939
  output: formatResponseGemini(response),
1732
1940
  latency,
1733
1941
  baseURL: 'https://generativelanguage.googleapis.com',
@@ -1751,7 +1959,7 @@ class WrappedModels {
1751
1959
  traceId,
1752
1960
  model: geminiParams.model,
1753
1961
  provider: 'gemini',
1754
- input: this.formatInput(geminiParams.contents),
1962
+ input: this.formatInputForPostHog(geminiParams.contents),
1755
1963
  output: [],
1756
1964
  latency,
1757
1965
  baseURL: 'https://generativelanguage.googleapis.com',
@@ -1808,7 +2016,7 @@ class WrappedModels {
1808
2016
  traceId,
1809
2017
  model: geminiParams.model,
1810
2018
  provider: 'gemini',
1811
- input: this.formatInput(geminiParams.contents),
2019
+ input: this.formatInputForPostHog(geminiParams.contents),
1812
2020
  output: [{
1813
2021
  content: accumulatedContent,
1814
2022
  role: 'assistant'
@@ -1829,7 +2037,7 @@ class WrappedModels {
1829
2037
  traceId,
1830
2038
  model: geminiParams.model,
1831
2039
  provider: 'gemini',
1832
- input: this.formatInput(geminiParams.contents),
2040
+ input: this.formatInputForPostHog(geminiParams.contents),
1833
2041
  output: [],
1834
2042
  latency,
1835
2043
  baseURL: 'https://generativelanguage.googleapis.com',
@@ -1874,6 +2082,12 @@ class WrappedModels {
1874
2082
  content: item.content
1875
2083
  };
1876
2084
  }
2085
+ if (item.parts) {
2086
+ return {
2087
+ role: item.role || 'user',
2088
+ content: item.parts.map(part => part.text ? part.text : part)
2089
+ };
2090
+ }
1877
2091
  }
1878
2092
  return {
1879
2093
  role: 'user',
@@ -1900,6 +2114,10 @@ class WrappedModels {
1900
2114
  content: String(contents)
1901
2115
  }];
1902
2116
  }
2117
+ formatInputForPostHog(contents) {
2118
+ const sanitized = sanitizeGemini(contents);
2119
+ return this.formatInput(sanitized);
2120
+ }
1903
2121
  }
1904
2122
 
1905
2123
  function getDefaultExportFromCjs (x) {
@@ -2591,7 +2809,7 @@ class LangChainCallbackHandler extends BaseCallbackHandler {
2591
2809
  }) || 'generation';
2592
2810
  const generation = {
2593
2811
  name: runNameFound,
2594
- input: messages,
2812
+ input: sanitizeLangChain(messages),
2595
2813
  startTime: Date.now()
2596
2814
  };
2597
2815
  if (extraParams) {
@@ -2854,7 +3072,8 @@ class LangChainCallbackHandler extends BaseCallbackHandler {
2854
3072
  ...message.additional_kwargs
2855
3073
  };
2856
3074
  }
2857
- return messageDict;
3075
+ // Sanitize the message content to redact base64 images
3076
+ return sanitizeLangChain(messageDict);
2858
3077
  }
2859
3078
  _parseUsageModel(usage) {
2860
3079
  const conversionList = [['promptTokens', 'input'], ['completionTokens', 'output'], ['input_tokens', 'input'], ['output_tokens', 'output'], ['prompt_token_count', 'input'], ['candidates_token_count', 'output'], ['inputTokenCount', 'input'], ['outputTokenCount', 'output'], ['input_token_count', 'input'], ['generated_token_count', 'output']];