maidr 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/maidr.js CHANGED
@@ -78,9 +78,13 @@ class Constants {
78
78
 
79
79
  // LLM settings
80
80
  LLMDebugMode = 0; // 0 = use real data, 1 = all fake, 2 = real data but no image
81
- authKey = null; // OpenAI authentication key, set in menu
81
+ openAIAuthKey = null; // OpenAI authentication key, set in menu
82
+ geminiAuthKey = null; // Gemini authentication key, set in menu
82
83
  LLMmaxResponseTokens = 1000; // max tokens to send to LLM, 20 for testing, 1000 ish for real
83
84
  LLMDetail = 'high'; // low (default for testing, like 100 tokens) / high (default for real, like 1000 tokens)
85
+ LLMModel = 'openai'; // openai (default) / gemini
86
+ LLMSystemMessage =
87
+ 'You are a helpful assistant describing the chart to a blind person';
84
88
  skillLevel = 'basic'; // basic / intermediate / expert
85
89
  skillLevelOther = ''; // custom skill level
86
90
 
@@ -406,7 +410,15 @@ class Menu {
406
410
  }><label for="aria_mode_polite">Polite</label></p>
407
411
  </fieldset></div>
408
412
  <h5 class="modal-title">LLM Settings</h5>
409
- <p><input type="password" id="chatLLM_auth_key"> <label for="chatLLM_auth_key">OpenAI Authentication Key</label></p>
413
+ <p>
414
+ <select id="LLM_model">
415
+ <option value="openai">OpenAI Vision</option>
416
+ <option value="gemini">Gemini Pro Vision</option>
417
+ </select>
418
+ <label for="LLM_model">LLM Model</label>
419
+ </p>
420
+ <p id="openai_auth_key_container" class="hidden"><input type="password" id="openai_auth_key"> <label for="openai_auth_key">OpenAI Authentication Key</label></p>
421
+ <p id="gemini_auth_key_container" class="hidden"><input type="password" id="gemini_auth_key"> <label for="gemini_auth_key">Gemini Authentication Key</label></p>
410
422
  <p>
411
423
  <select id="skill_level">
412
424
  <option value="basic">Basic</option>
@@ -483,6 +495,29 @@ class Menu {
483
495
  },
484
496
  ]);
485
497
 
498
+ // toggle auth key fields
499
+ constants.events.push([
500
+ document.getElementById('LLM_model'),
501
+ 'change',
502
+ function (e) {
503
+ if (e.target.value == 'openai') {
504
+ document
505
+ .getElementById('openai_auth_key_container')
506
+ .classList.remove('hidden');
507
+ document
508
+ .getElementById('gemini_auth_key_container')
509
+ .classList.add('hidden');
510
+ } else if (e.target.value == 'gemini') {
511
+ document
512
+ .getElementById('openai_auth_key_container')
513
+ .classList.add('hidden');
514
+ document
515
+ .getElementById('gemini_auth_key_container')
516
+ .classList.remove('hidden');
517
+ }
518
+ },
519
+ ]);
520
+
486
521
  // Skill level other events
487
522
  constants.events.push([
488
523
  document.getElementById('skill_level'),
@@ -569,14 +604,21 @@ class Menu {
569
604
  document.getElementById('max_freq').value = constants.MAX_FREQUENCY;
570
605
  document.getElementById('keypress_interval').value =
571
606
  constants.keypressInterval;
572
- if (typeof constants.authKey == 'string') {
573
- document.getElementById('chatLLM_auth_key').value = constants.authKey;
607
+ if (typeof constants.openAIAuthKey == 'string') {
608
+ document.getElementById('openai_auth_key').value =
609
+ constants.openAIAuthKey;
610
+ }
611
+ if (typeof constants.geminiAuthKey == 'string') {
612
+ document.getElementById('gemini_auth_key').value =
613
+ constants.geminiAuthKey;
574
614
  }
575
615
  document.getElementById('skill_level').value = constants.skillLevel;
576
616
  if (constants.skillLevelOther) {
577
617
  document.getElementById('skill_level_other').value =
578
618
  constants.skillLevelOther;
579
619
  }
620
+ document.getElementById('LLM_model').value = constants.LLMModel;
621
+
580
622
 
581
623
  // aria mode
582
624
  if (constants.ariaMode == 'assertive') {
@@ -586,6 +628,22 @@ class Menu {
586
628
  document.getElementById('aria_mode_polite').checked = true;
587
629
  document.getElementById('aria_mode_assertive').checked = false;
588
630
  }
631
+ // hide either openai or gemini auth key field
632
+ if (constants.LLMModel == 'openai') {
633
+ document
634
+ .getElementById('openai_auth_key_container')
635
+ .classList.remove('hidden');
636
+ document
637
+ .getElementById('gemini_auth_key_container')
638
+ .classList.add('hidden');
639
+ } else if (constants.LLMModel == 'gemini') {
640
+ document
641
+ .getElementById('openai_auth_key_container')
642
+ .classList.add('hidden');
643
+ document
644
+ .getElementById('gemini_auth_key_container')
645
+ .classList.remove('hidden');
646
+ }
589
647
  // skill level other
590
648
  if (constants.skillLevel == 'other') {
591
649
  document
@@ -609,10 +667,12 @@ class Menu {
609
667
  constants.MAX_FREQUENCY = document.getElementById('max_freq').value;
610
668
  constants.keypressInterval =
611
669
  document.getElementById('keypress_interval').value;
612
- constants.authKey = document.getElementById('chatLLM_auth_key').value;
670
+ constants.openAIAuthKey = document.getElementById('openai_auth_key').value;
671
+ constants.geminiAuthKey = document.getElementById('gemini_auth_key').value;
613
672
  constants.skillLevel = document.getElementById('skill_level').value;
614
673
  constants.skillLevelOther =
615
674
  document.getElementById('skill_level_other').value;
675
+ constants.LLMModel = document.getElementById('LLM_model').value;
616
676
 
617
677
  // aria
618
678
  if (document.getElementById('aria_mode_assertive').checked) {
@@ -658,9 +718,11 @@ class Menu {
658
718
  data.MAX_FREQUENCY = constants.MAX_FREQUENCY;
659
719
  data.keypressInterval = constants.keypressInterval;
660
720
  data.ariaMode = constants.ariaMode;
661
- data.authKey = constants.authKey;
721
+ data.openAIAuthKey = constants.openAIAuthKey;
722
+ data.geminiAuthKey = constants.geminiAuthKey;
662
723
  data.skillLevel = constants.skillLevel;
663
724
  data.skillLevelOther = constants.skillLevelOther;
725
+ data.LLMModel = constants.LLMModel;
664
726
  localStorage.setItem('settings_data', JSON.stringify(data));
665
727
  }
666
728
  /**
@@ -678,9 +740,11 @@ class Menu {
678
740
  constants.MAX_FREQUENCY = data.MAX_FREQUENCY;
679
741
  constants.keypressInterval = data.keypressInterval;
680
742
  constants.ariaMode = data.ariaMode;
681
- constants.authKey = data.authKey;
743
+ constants.openAIAuthKey = data.openAIAuthKey;
744
+ constants.geminiAuthKey = data.geminiAuthKey;
682
745
  constants.skillLevel = data.skillLevel;
683
746
  constants.skillLevelOther = data.skillLevelOther;
747
+ constants.LLMModel = data.LLMModel ? data.LLMModel : constants.LLMModel;
684
748
  }
685
749
  this.PopulateData();
686
750
  this.UpdateHtml();
@@ -824,15 +888,6 @@ class ChatLLM {
824
888
  * @returns {void}
825
889
  */
826
890
  Submit(text, img = null) {
827
- // send text to LLM
828
- let url = 'https://api.openai.com/v1/chat/completions';
829
- //let url = 'temp';
830
-
831
- let requestJson = this.GetLLMJRequestJson(text, img);
832
- console.log(requestJson);
833
-
834
- let xhr = new XMLHttpRequest();
835
-
836
891
  // start waiting sound
837
892
  if (constants.playLLMWaitingSound) {
838
893
  chatLLM.WaitingSound(true);
@@ -843,24 +898,11 @@ class ChatLLM {
843
898
  setTimeout(function () {
844
899
  chatLLM.ProcessLLMResponse(chatLLM.fakeLLMResponseData());
845
900
  }, 5000);
846
- } else {
847
- fetch(url, {
848
- method: 'POST',
849
- headers: {
850
- 'Content-Type': 'application/json',
851
- Authorization: 'Bearer ' + constants.authKey,
852
- },
853
- body: JSON.stringify(requestJson),
854
- })
855
- .then((response) => response.json())
856
- .then((data) => {
857
- chatLLM.ProcessLLMResponse(data);
858
- })
859
- .catch((error) => {
860
- chatLLM.WaitingSound(false);
861
- console.error('Error:', error);
862
- // also todo: handle errors somehow
863
- });
901
+ return;
902
+ } else if (constants.LLMModel == 'gemini') {
903
+ chatLLM.GeminiPrompt(text, img);
904
+ } else if (constants.LLMModel == 'openai') {
905
+ chatLLM.OpenAIPrompt(text, img);
864
906
  }
865
907
  }
866
908
 
@@ -910,8 +952,38 @@ class ChatLLM {
910
952
  ProcessLLMResponse(data) {
911
953
  chatLLM.WaitingSound(false);
912
954
  console.log('LLM response: ', data);
913
- let text = data.choices[0].message.content;
914
- chatLLM.DisplayChatMessage('LLM', text);
955
+ let text = '';
956
+ let LLMName = '';
957
+
958
+ if (constants.LLMModel == 'openai') {
959
+ LLMName = 'OpenAI';
960
+ text = data.choices[0].message.content;
961
+ let i = this.requestJson.messages.length;
962
+ this.requestJson.messages[i] = {};
963
+ this.requestJson.messages[i].role = 'assistant';
964
+ this.requestJson.messages[i].content = text;
965
+
966
+ if (data.error) {
967
+ chatLLM.DisplayChatMessage(LLMName, 'Error processing request.');
968
+ } else {
969
+ chatLLM.DisplayChatMessage(LLMName, text);
970
+ }
971
+ } else if (constants.LLMModel == 'gemini') {
972
+ LLMName = 'Gemini';
973
+ if (data.text()) {
974
+ text = data.text();
975
+ chatLLM.DisplayChatMessage(LLMName, text);
976
+ } else {
977
+ if (!data.error) {
978
+ data.error = 'Error processing request.';
979
+ }
980
+ }
981
+ if (data.error) {
982
+ chatLLM.DisplayChatMessage(LLMName, 'Error processing request.');
983
+ } else {
984
+ // todo: display actual response
985
+ }
986
+ }
915
987
  }
916
988
 
917
989
  /**
@@ -976,32 +1048,63 @@ class ChatLLM {
976
1048
  /**
977
1049
  * Gets running prompt info, appends the latest request, and packages it into a JSON object for the LLM.
978
1050
  * @function
979
- * @name GetLLMJRequestJson
1051
+ * @name OpenAIPrompt
980
1052
  * @memberof module:constants
981
1053
  * @returns {json}
982
1054
  */
983
- GetLLMJRequestJson(text, img) {
1055
+ OpenAIPrompt(text, img) {
1056
+ // request init
1057
+ let url = 'https://api.openai.com/v1/chat/completions';
1058
+ let auth = constants.openAIAuthKey;
1059
+ let requestJson = chatLLM.OpenAIJson(text, img);
1060
+ console.log('LLM request: ', requestJson);
1061
+
1062
+ fetch(url, {
1063
+ method: 'POST',
1064
+ headers: {
1065
+ 'Content-Type': 'application/json',
1066
+ Authorization: 'Bearer ' + auth,
1067
+ },
1068
+ body: JSON.stringify(requestJson),
1069
+ })
1070
+ .then((response) => response.json())
1071
+ .then((data) => {
1072
+ chatLLM.ProcessLLMResponse(data);
1073
+ })
1074
+ .catch((error) => {
1075
+ chatLLM.WaitingSound(false);
1076
+ console.error('Error:', error);
1077
+ chatLLM.DisplayChatMessage('LLM', 'Error processing request.');
1078
+ // also todo: handle errors somehow
1079
+ });
1080
+ }
1081
+ OpenAIJson(text, img) {
1082
+ let sysMessage = constants.LLMSystemMessage;
1083
+ let backupMessage =
1084
+ 'Describe ' + singleMaidr.type + ' charts to a blind person';
1085
+ // headers and sys message
984
1086
  if (!this.requestJson) {
985
1087
  this.requestJson = {};
986
1088
  this.requestJson.model = 'gpt-4-vision-preview';
987
1089
  this.requestJson.max_tokens = constants.LLMmaxResponseTokens; // note: if this is too short (tested with less than 200), the response gets cut off
988
- //this.requestJson.detail = constants.LLMDetail;
1090
+
1091
+ // sys message
989
1092
  this.requestJson.messages = [];
990
1093
  this.requestJson.messages[0] = {};
991
1094
  this.requestJson.messages[0].role = 'system';
992
- this.requestJson.messages[0].content =
993
- 'You are a helpful assistant describing the chart to a blind person';
1095
+ this.requestJson.messages[0].content = sysMessage;
994
1096
  }
995
1097
 
1098
+ // user message
1099
+ // if we have an image (first time only), send the image and the text, otherwise just the text
996
1100
  let i = this.requestJson.messages.length;
997
1101
  this.requestJson.messages[i] = {};
998
1102
  this.requestJson.messages[i].role = 'user';
999
1103
  if (constants.LLMDebugMode == 2) {
1000
- // test message only, no image
1001
- this.requestJson.messages[i].content =
1002
- 'Describe bar charts to a blind person';
1104
+ // backup message only, no image
1105
+ this.requestJson.messages[i].content = backupMessage;
1003
1106
  } else if (img) {
1004
- let image_url = img;
1107
+ // first message, include the img
1005
1108
  this.requestJson.messages[i].content = [
1006
1109
  {
1007
1110
  type: 'text',
@@ -1009,7 +1112,7 @@ class ChatLLM {
1009
1112
  },
1010
1113
  {
1011
1114
  type: 'image_url',
1012
- image_url: { url: image_url },
1115
+ image_url: { url: img },
1013
1116
  },
1014
1117
  ];
1015
1118
  } else {
@@ -1020,6 +1123,47 @@ class ChatLLM {
1020
1123
  return this.requestJson;
1021
1124
  }
1022
1125
 
1126
+ // Assuming this function is part of your existing JavaScript file
1127
+ async GeminiPrompt(text, imgBase64 = null) {
1128
+ try {
1129
+ // Save the image for next time
1130
+ if (imgBase64 == null) {
1131
+ imgBase64 = constants.LLMImage;
1132
+ } else {
1133
+ constants.LLMImage = imgBase64;
1134
+ }
1135
+ constants.LLMImage = imgBase64;
1136
+
1137
+ // Import the module
1138
+ const { GoogleGenerativeAI } = await import(
1139
+ 'https://esm.run/@google/generative-ai'
1140
+ );
1141
+ const API_KEY = constants.geminiAuthKey;
1142
+ const genAI = new GoogleGenerativeAI(API_KEY);
1143
+ const model = genAI.getGenerativeModel({ model: 'gemini-pro-vision' });
1144
+
1145
+ // Create the prompt
1146
+ const prompt = constants.LLMSystemMessage + '\n\n' + text; // Use the text parameter as the prompt
1147
+ const image = {
1148
+ inlineData: {
1149
+ data: imgBase64, // Use the base64 image string
1150
+ mimeType: 'image/png', // Or the appropriate mime type of your image
1151
+ },
1152
+ };
1153
+
1154
+ // Generate the content
1155
+ console.log('LLM request: ', prompt, image);
1156
+ const result = await model.generateContent([prompt, image]);
1157
+ console.log(result.response.text());
1158
+
1159
+ // Process the response
1160
+ chatLLM.ProcessLLMResponse(result.response);
1161
+ } catch (error) {
1162
+ console.error('Error in GeminiPrompt:', error);
1163
+ throw error; // Rethrow the error for further handling if necessary
1164
+ }
1165
+ }
1166
+
1023
1167
  /**
1024
1168
  * Displays chat message from the user and LLM in a chat history window
1025
1169
  * @function
@@ -1089,8 +1233,9 @@ class ChatLLM {
1089
1233
 
1090
1234
  // first time, send default query
1091
1235
  if (this.firstTime) {
1236
+ let LLMName = constants.LLMModel == 'openai' ? 'OpenAI' : 'Gemini';
1092
1237
  this.firstTime = false;
1093
- this.DisplayChatMessage('LLM', 'Processing Chart...');
1238
+ this.DisplayChatMessage(LLMName, 'Processing Chart...');
1094
1239
  this.RunDefaultPrompt();
1095
1240
  }
1096
1241
  } else {
@@ -1109,36 +1254,30 @@ class ChatLLM {
1109
1254
  async ConvertSVGtoJPG(id) {
1110
1255
  let svgElement = document.getElementById(id);
1111
1256
  return new Promise((resolve, reject) => {
1112
- // Create a canvas
1113
1257
  var canvas = document.createElement('canvas');
1114
1258
  var ctx = canvas.getContext('2d');
1115
1259
 
1116
- // Get dimensions from the SVG element
1117
- var svgRect = svgElement.getBoundingClientRect();
1118
- canvas.width = svgRect.width;
1119
- canvas.height = svgRect.height;
1120
-
1121
- // Create an image to draw the SVG
1122
- var img = new Image();
1123
-
1124
- // Convert SVG element to a data URL
1125
1260
  var svgData = new XMLSerializer().serializeToString(svgElement);
1126
- var svgBlob = new Blob([svgData], {
1127
- type: 'image/svg+xml;charset=utf-8',
1128
- });
1129
- var url = URL.createObjectURL(svgBlob);
1130
-
1131
- img.onload = function () {
1132
- // Draw the SVG on the canvas
1133
- ctx.drawImage(img, 0, 0, svgRect.width, svgRect.height);
1134
-
1135
- // Convert the canvas to JPEG
1136
- var jpegData = canvas.toDataURL('image/jpeg');
1261
+ if (!svgData.startsWith('<svg xmlns')) {
1262
+ svgData = `<svg xmlns="http://www.w3.org/2000/svg" ${svgData.slice(4)}`;
1263
+ }
1137
1264
 
1138
- // Resolve the promise with the Base64 JPEG data
1139
- resolve(jpegData);
1265
+ var svgSize =
1266
+ svgElement.viewBox.baseVal || svgElement.getBoundingClientRect();
1267
+ canvas.width = svgSize.width;
1268
+ canvas.height = svgSize.height;
1140
1269
 
1141
- // Clean up
1270
+ var img = new Image();
1271
+ img.onload = function () {
1272
+ ctx.drawImage(img, 0, 0, svgSize.width, svgSize.height);
1273
+ var jpegData = canvas.toDataURL('image/jpeg', 0.9); // 0.9 is the quality parameter
1274
+ if (constants.LLMModel == 'openai') {
1275
+ resolve(jpegData);
1276
+ } else if (constants.LLMModel == 'gemini') {
1277
+ let base64Data = jpegData.split(',')[1];
1278
+ resolve(base64Data);
1279
+ //resolve(jpegData);
1280
+ }
1142
1281
  URL.revokeObjectURL(url);
1143
1282
  };
1144
1283
 
@@ -1146,30 +1285,14 @@ class ChatLLM {
1146
1285
  reject(new Error('Error loading SVG'));
1147
1286
  };
1148
1287
 
1288
+ var svgBlob = new Blob([svgData], {
1289
+ type: 'image/svg+xml;charset=utf-8',
1290
+ });
1291
+ var url = URL.createObjectURL(svgBlob);
1149
1292
  img.src = url;
1150
1293
  });
1151
1294
  }
1152
1295
 
1153
- downloadJPEG(base64Data, filename) {
1154
- // Create a link element
1155
- var link = document.createElement('a');
1156
-
1157
- // Set the download attribute with a filename
1158
- link.download = filename;
1159
-
1160
- // Convert Base64 data to a data URL and set it as the href
1161
- link.href = base64Data;
1162
-
1163
- // Append the link to the body (required for Firefox)
1164
- document.body.appendChild(link);
1165
-
1166
- // Trigger the download
1167
- link.click();
1168
-
1169
- // Clean up
1170
- document.body.removeChild(link);
1171
- }
1172
-
1173
1296
  /**
1174
1297
  * RunDefaultPrompt is an asynchronous function that generates a prompt for describing a chart to a blind person.
1175
1298
  * It converts the chart to a JPG image using the ConvertSVGtoJPG method and then submits the prompt to the chatLLM function.
@@ -1178,7 +1301,6 @@ class ChatLLM {
1178
1301
  async RunDefaultPrompt() {
1179
1302
  //let img = await this.ConvertSVGtoImg(singleMaidr.id);
1180
1303
  let img = await this.ConvertSVGtoJPG(singleMaidr.id);
1181
- //this.downloadJPEG(img, 'test.jpg'); // test download
1182
1304
  let text = 'Describe this chart to a blind person';
1183
1305
  if (constants.skillLevel) {
1184
1306
  if (constants.skillLevel == 'other' && constants.skillLevelOther) {
@@ -4899,8 +5021,10 @@ class HeatMap {
4899
5021
  if (this.plots[i] instanceof SVGPathElement) {
4900
5022
  // Assuming the path data is in the format "M x y L x y L x y L x y"
4901
5023
  const path_d = this.plots[i].getAttribute('d');
4902
- const coords = path_d.match(/[\d\.]+/g).map(Number);
5024
+ const regex = /[ML]\s*(-?\d+(\.\d+)?)\s+(-?\d+(\.\d+)?)/g;
5025
+ const match = regex.exec(path_d);
4903
5026
 
5027
+ const coords = [Number(match[1]), Number(match[3])];
4904
5028
  const x = coords[0];
4905
5029
  const y = coords[1];
4906
5030