maidr 2.26.1 → 2.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -367,7 +367,7 @@ maidr is available via a restful API. Learn more about the usage at [maidr-api](
367
367
 
368
368
  We currently provide the following binders, all of which can be found at each separate repo:
369
369
 
370
- - [x] Python binder for matplotlib and seaborn: [py_maidr](https://github.com/xability/py_maidr).
370
+ - [x] Python binder for matplotlib and seaborn: [py_maidr](https://github.com/xability/py-maidr).
371
371
 
372
372
  - [ ] R binder for ggplot2: [r_maidr](https://github.com/xability/r_maidr).
373
373
 
@@ -413,6 +413,28 @@ To learn more about the theoretical background and user study results, we recomm
413
413
  }
414
414
  ```
415
415
 
416
+ 3. [MAIDR Meets AI: Exploring Multimodal LLM-Based Data Visualization Interpretation by and with Blind and Low-Vision Users](https://dl.acm.org/doi/10.1145/3663548.3675660):
417
+
418
+ ```tex
419
+ @inproceedings{seoMAIDRMeetsAI2024,
420
+ title = {{{MAIDR Meets AI}}: {{Exploring Multimodal LLM-Based Data Visualization Interpretation}} by and with {{Blind}} and {{Low-Vision Users}}},
421
+ shorttitle = {{{MAIDR Meets AI}}},
422
+ booktitle = {Proceedings of the 26th {{International ACM SIGACCESS Conference}} on {{Computers}} and {{Accessibility}}},
423
+ author = {Seo, JooYoung and Kamath, Sanchita S. and Zeidieh, Aziz and Venkatesh, Saairam and McCurry, Sean},
424
+ date = {2024-10-27},
425
+ series = {{{ASSETS}} '24},
426
+ pages = {1--31},
427
+ publisher = {Association for Computing Machinery},
428
+ location = {New York, NY, USA},
429
+ doi = {10.1145/3663548.3675660},
430
+ url = {https://dl.acm.org/doi/10.1145/3663548.3675660},
431
+ urldate = {2024-12-19},
432
+ abstract = {This paper investigates how blind and low-vision (BLV) users interact with multimodal large language models (LLMs) to interpret data visualizations. Building upon our previous work on the multimodal access and interactive data representation (MAIDR) framework, our mixed-visual-ability team co-designed maidrAI, an LLM extension providing multiple AI responses to users’ visual queries. To explore generative AI-based data representation, we conducted user studies with 8 BLV participants, tasking them with interpreting box plots using our system. We examined how participants personalize LLMs through prompt engineering, their preferences for data visualization descriptions, and strategies for verifying LLM responses. Our findings highlight three dimensions affecting BLV users’ decision-making process: modal preference, LLM customization, and multimodal data representation. This research contributes to designing more accessible data visualization tools for BLV users and advances the understanding of inclusive generative AI applications.},
433
+ isbn = {979-8-4007-0677-6}
434
+ }
435
+ ```
436
+
437
+
416
438
  ## License
417
439
 
418
440
  This project is licensed under the GPL 3 License.
package/dist/maidr.js CHANGED
@@ -654,6 +654,13 @@ class Constants {
654
654
 
655
655
  clientToken = null;
656
656
 
657
+ /**
658
+ * Mark and recall vars. Used to store the current mark and recall state of the chart.
659
+ * @type {Array<number>}
660
+ * @default Array(10).fill(null)
661
+ */
662
+ mark = Array(10).fill(null);
663
+
657
664
  /**
658
665
  * Stops the autoplay if it is currently running.
659
666
  *
@@ -2179,9 +2186,9 @@ class ChatLLM {
2179
2186
  img = await constants.ConvertSVGtoJPG(singleMaidr.id, 'gemini');
2180
2187
  }
2181
2188
  if (constants.emailAuthKey) {
2182
- chatLLM.GeminiPromptAPI(text, img);
2189
+ chatLLM.GeminiPromptRemote(text, img);
2183
2190
  } else {
2184
- chatLLM.GeminiPrompt(text, img);
2191
+ chatLLM.GeminiPromptLocal(text, img);
2185
2192
  }
2186
2193
  }
2187
2194
 
@@ -2304,10 +2311,10 @@ class ChatLLM {
2304
2311
 
2305
2312
  if (model == 'openai') {
2306
2313
  text = data.choices[0].message.content;
2307
- let i = this.requestJson.messages.length;
2308
- this.requestJson.messages[i] = {};
2309
- this.requestJson.messages[i].role = 'assistant';
2310
- this.requestJson.messages[i].content = text;
2314
+ let i = this.requestJsonOpenAI.messages.length;
2315
+ this.requestJsonOpenAI.messages[i] = {};
2316
+ this.requestJsonOpenAI.messages[i].role = 'assistant';
2317
+ this.requestJsonOpenAI.messages[i].content = text;
2311
2318
 
2312
2319
  if (data.error) {
2313
2320
  chatLLM.DisplayChatMessage(LLMName, 'Error processing request.', true);
@@ -2318,6 +2325,12 @@ class ChatLLM {
2318
2325
  } else if (model == 'gemini') {
2319
2326
  if (data.text()) {
2320
2327
  text = data.text();
2328
+ if (this.requestJsonGemini.contents.length > 2) {
2329
+ let i = this.requestJsonGemini.contents.length;
2330
+ this.requestJsonGemini.contents[i] = {};
2331
+ this.requestJsonGemini.contents[i].role = 'model';
2332
+ this.requestJsonGemini.contents[i].content = text;
2333
+ }
2321
2334
  chatLLM.DisplayChatMessage(LLMName, text);
2322
2335
  } else {
2323
2336
  if (!data.error) {
@@ -2360,7 +2373,7 @@ class ChatLLM {
2360
2373
  */
2361
2374
  fakeLLMResponseData() {
2362
2375
  let responseText = {};
2363
- if (this.requestJson.messages.length > 2) {
2376
+ if (this.requestJsonOpenAI.messages.length > 2) {
2364
2377
  // subsequent responses
2365
2378
  responseText = {
2366
2379
  id: 'chatcmpl-8Y44iRCRrohYbAqm8rfBbJqTUADC7',
@@ -2567,32 +2580,32 @@ class ChatLLM {
2567
2580
  let backupMessage =
2568
2581
  'Describe ' + singleMaidr.type + ' charts to a blind person';
2569
2582
  // headers and sys message
2570
- if (!this.requestJson) {
2571
- this.requestJson = {};
2572
- //this.requestJson.model = 'gpt-4-vision-preview';
2573
- this.requestJson.model = 'gpt-4o-2024-11-20';
2574
- this.requestJson.max_tokens = constants.LLMmaxResponseTokens; // note: if this is too short (tested with less than 200), the response gets cut off
2583
+ if (!this.requestJsonOpenAI) {
2584
+ this.requestJsonOpenAI = {};
2585
+ //this.requestJsonOpenAI.model = 'gpt-4-vision-preview';
2586
+ this.requestJsonOpenAI.model = 'gpt-4o-2024-11-20';
2587
+ this.requestJsonOpenAI.max_tokens = constants.LLMmaxResponseTokens; // note: if this is too short (tested with less than 200), the response gets cut off
2575
2588
 
2576
2589
  // sys message
2577
- this.requestJson.messages = [];
2578
- this.requestJson.messages[0] = {};
2579
- this.requestJson.messages[0].role = 'system';
2580
- this.requestJson.messages[0].content = sysMessage;
2590
+ this.requestJsonOpenAI.messages = [];
2591
+ this.requestJsonOpenAI.messages[0] = {};
2592
+ this.requestJsonOpenAI.messages[0].role = 'system';
2593
+ this.requestJsonOpenAI.messages[0].content = sysMessage;
2581
2594
  if (constants.LLMPreferences) {
2582
- this.requestJson.messages[1] = {};
2583
- this.requestJson.messages[1].role = 'system';
2584
- this.requestJson.messages[1].content = constants.LLMPreferences;
2595
+ this.requestJsonOpenAI.messages[1] = {};
2596
+ this.requestJsonOpenAI.messages[1].role = 'system';
2597
+ this.requestJsonOpenAI.messages[1].content = constants.LLMPreferences;
2585
2598
  }
2586
2599
  }
2587
2600
 
2588
2601
  // user message
2589
2602
  // if we have an image (first time only), send the image and the text, otherwise just the text
2590
- let i = this.requestJson.messages.length;
2591
- this.requestJson.messages[i] = {};
2592
- this.requestJson.messages[i].role = 'user';
2603
+ let i = this.requestJsonOpenAI.messages.length;
2604
+ this.requestJsonOpenAI.messages[i] = {};
2605
+ this.requestJsonOpenAI.messages[i].role = 'user';
2593
2606
  if (img) {
2594
2607
  // first message, include the img
2595
- this.requestJson.messages[i].content = [
2608
+ this.requestJsonOpenAI.messages[i].content = [
2596
2609
  {
2597
2610
  type: 'text',
2598
2611
  text: text,
@@ -2604,10 +2617,10 @@ class ChatLLM {
2604
2617
  ];
2605
2618
  } else {
2606
2619
  // just the text
2607
- this.requestJson.messages[i].content = text;
2620
+ this.requestJsonOpenAI.messages[i].content = text;
2608
2621
  }
2609
2622
 
2610
- return this.requestJson;
2623
+ return this.requestJsonOpenAI;
2611
2624
  }
2612
2625
 
2613
2626
  GeminiJson(text, img = null) {
@@ -2672,7 +2685,7 @@ class ChatLLM {
2672
2685
  return payload;
2673
2686
  }
2674
2687
 
2675
- async GeminiPromptAPI(text, imgBase64 = null) {
2688
+ async GeminiPromptRemote(text, imgBase64 = null) {
2676
2689
  let url = constants.baseURL + 'gemini' + constants.code;
2677
2690
 
2678
2691
  // Create the prompt
@@ -2689,7 +2702,20 @@ class ChatLLM {
2689
2702
  }
2690
2703
  constants.LLMImage = imgBase64;
2691
2704
 
2692
- let requestJson = chatLLM.GeminiJson(prompt, imgBase64);
2705
+ if (!this.requestJsonGemini) {
2706
+ // this is our first message, do the full construction
2707
+ this.requestJsonGemini = chatLLM.GeminiJson(prompt, imgBase64);
2708
+ } else {
2709
+ // subsequent messages, just add the new user message
2710
+ let i = this.requestJsonGemini.contents.length;
2711
+ this.requestJsonGemini.contents[i] = {};
2712
+ this.requestJsonGemini.contents[i].role = 'user';
2713
+ this.requestJsonGemini.contents[i].parts = [
2714
+ {
2715
+ text: text,
2716
+ },
2717
+ ];
2718
+ }
2693
2719
 
2694
2720
  const response = await fetch(url, {
2695
2721
  method: 'POST',
@@ -2697,7 +2723,7 @@ class ChatLLM {
2697
2723
  'Content-Type': 'application/json',
2698
2724
  Authentication: constants.emailAuthKey + ' ' + constants.clientToken,
2699
2725
  },
2700
- body: JSON.stringify(requestJson),
2726
+ body: JSON.stringify(this.requestJsonGemini),
2701
2727
  });
2702
2728
  if (response.ok) {
2703
2729
  const responseJson = await response.json();
@@ -2713,7 +2739,7 @@ class ChatLLM {
2713
2739
  }
2714
2740
  }
2715
2741
 
2716
- async GeminiPrompt(text, imgBase64 = null) {
2742
+ async GeminiPromptLocal(text, imgBase64 = null) {
2717
2743
  // https://ai.google.dev/docs/gemini_api_overview#node.js
2718
2744
  try {
2719
2745
  // Save the image for next time
@@ -2735,21 +2761,24 @@ class ChatLLM {
2735
2761
  }); // old model was 'gemini-pro-vision'
2736
2762
 
2737
2763
  // Create the prompt
2738
- let prompt = constants.LLMSystemMessage;
2739
- if (constants.LLMPreferences) {
2740
- prompt += constants.LLMPreferences;
2764
+ if (!this.requestJsonGemini) {
2765
+ // this is our first message, do the full construction
2766
+ this.requestJsonGemini = chatLLM.GeminiJson(prompt, imgBase64);
2767
+ } else {
2768
+ // subsequent messages, just add the new user message
2769
+ let i = this.requestJsonGemini.contents.length;
2770
+ this.requestJsonGemini.contents[i] = {};
2771
+ this.requestJsonGemini.contents[i].role = 'user';
2772
+ this.requestJsonGemini.contents[i].parts = [
2773
+ {
2774
+ text: text,
2775
+ },
2776
+ ];
2741
2777
  }
2742
- prompt += '\n\n' + text; // Use the text parameter as the prompt
2743
- const image = {
2744
- inlineData: {
2745
- data: imgBase64, // Use the base64 image string
2746
- mimeType: 'image/png', // Or the appropriate mime type of your image
2747
- },
2748
- };
2749
2778
 
2750
2779
  // Generate the content
2751
2780
  //console.log('LLM request: ', prompt, image);
2752
- const result = await model.generateContent([prompt, image]);
2781
+ const result = await model.generateContent(this.requestJsonGemini);
2753
2782
  //console.log(result.response.text());
2754
2783
 
2755
2784
  // Process the response
@@ -2757,7 +2786,7 @@ class ChatLLM {
2757
2786
  } catch (error) {
2758
2787
  chatLLM.WaitingSound(false);
2759
2788
  chatLLM.DisplayChatMessage('Gemini', 'Error processing request.', true);
2760
- console.error('Error in GeminiPrompt:', error);
2789
+ console.error('Error in GeminiPromptLocal:', error);
2761
2790
  throw error; // Rethrow the error for further handling if necessary
2762
2791
  }
2763
2792
  }
@@ -2822,7 +2851,7 @@ class ChatLLM {
2822
2851
  document.getElementById('chatLLM_chat_history').innerHTML = '';
2823
2852
 
2824
2853
  // reset the data
2825
- this.requestJson = null;
2854
+ this.requestJsonOpenAI = null;
2826
2855
  this.firstTime = true;
2827
2856
 
2828
2857
  // and start over, if enabled, or window is open
@@ -9085,8 +9114,10 @@ class Control {
9085
9114
  * @returns {void}
9086
9115
  */
9087
9116
  async SetKeyControls() {
9117
+ // home / end: first / last element
9118
+ // not available in review mode
9088
9119
  constants.events.push([
9089
- document,
9120
+ [constants.chart, constants.brailleInput],
9090
9121
  'keydown',
9091
9122
  function (e) {
9092
9123
  // ctrl/cmd: stop autoplay
@@ -9134,6 +9165,44 @@ class Control {
9134
9165
  },
9135
9166
  ]);
9136
9167
 
9168
+ // mark and recall
9169
+ // mark with M + # (0-9), recall with m + # (0-9)
9170
+ // available in chart and braille, not review
9171
+ let lastKeytime = 0;
9172
+ let lastKey = null;
9173
+ constants.events.push([
9174
+ [constants.chart, constants.brailleInput],
9175
+ 'keydown',
9176
+ function (e) {
9177
+ // setup
9178
+ const now = new Date().getTime();
9179
+ const key = e.key;
9180
+
9181
+ // check for keypress within threshold
9182
+ if (now - lastKeytime < constants.keypressInterval) {
9183
+ // mark with M
9184
+ if (lastKey == 'M' && /[0-9]/.test(key)) {
9185
+ const markIndex = parseInt(key, 10);
9186
+ constants.mark[markIndex] = JSON.parse(JSON.stringify(position)); // deep copy
9187
+ display.announceText('Marked position ' + markIndex);
9188
+ }
9189
+
9190
+ // recall with m
9191
+ if (lastKey == 'm' && /[0-9]/.test(key)) {
9192
+ const recallIndex = parseInt(key, 10);
9193
+ if (constants.mark[recallIndex]) {
9194
+ position = constants.mark[recallIndex];
9195
+ control.UpdateAll();
9196
+ }
9197
+ }
9198
+ }
9199
+
9200
+ // update last key and time
9201
+ lastKey = key;
9202
+ lastKeytime = now;
9203
+ },
9204
+ ]);
9205
+
9137
9206
  // Init a few things
9138
9207
  let lastPlayed = '';
9139
9208
  if ([].concat(singleMaidr.type).includes('bar')) {