@willwade/aac-processors 0.0.12 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +46 -44
  2. package/dist/core/baseProcessor.d.ts +41 -0
  3. package/dist/core/baseProcessor.js +41 -0
  4. package/dist/core/treeStructure.d.ts +35 -2
  5. package/dist/core/treeStructure.js +18 -3
  6. package/dist/index.d.ts +2 -2
  7. package/dist/index.js +2 -2
  8. package/dist/processors/astericsGridProcessor.d.ts +15 -0
  9. package/dist/processors/astericsGridProcessor.js +17 -0
  10. package/dist/processors/gridset/helpers.d.ts +4 -1
  11. package/dist/processors/gridset/helpers.js +4 -0
  12. package/dist/processors/gridset/pluginTypes.js +51 -50
  13. package/dist/processors/gridset/symbolAlignment.d.ts +125 -0
  14. package/dist/processors/gridset/symbolAlignment.js +283 -0
  15. package/dist/processors/gridset/symbolExtractor.js +3 -2
  16. package/dist/processors/gridset/symbolSearch.js +9 -7
  17. package/dist/processors/gridsetProcessor.d.ts +26 -0
  18. package/dist/processors/gridsetProcessor.js +178 -25
  19. package/dist/processors/obfProcessor.d.ts +26 -0
  20. package/dist/processors/obfProcessor.js +94 -1
  21. package/dist/processors/snap/helpers.d.ts +5 -1
  22. package/dist/processors/snap/helpers.js +5 -0
  23. package/dist/processors/snapProcessor.d.ts +2 -0
  24. package/dist/processors/snapProcessor.js +156 -5
  25. package/dist/processors/touchchatProcessor.d.ts +26 -0
  26. package/dist/processors/touchchatProcessor.js +106 -6
  27. package/dist/types/aac.d.ts +63 -0
  28. package/dist/types/aac.js +33 -0
  29. package/dist/{optional → utilities}/analytics/history.d.ts +12 -1
  30. package/dist/{optional → utilities}/analytics/index.d.ts +2 -0
  31. package/dist/{optional → utilities}/analytics/index.js +6 -1
  32. package/dist/{optional → utilities}/analytics/metrics/comparison.js +8 -4
  33. package/dist/{optional → utilities}/analytics/metrics/core.d.ts +9 -0
  34. package/dist/{optional → utilities}/analytics/metrics/core.js +190 -37
  35. package/dist/{optional → utilities}/analytics/metrics/effort.d.ts +10 -0
  36. package/dist/{optional → utilities}/analytics/metrics/effort.js +13 -0
  37. package/dist/utilities/analytics/metrics/obl-types.d.ts +93 -0
  38. package/dist/utilities/analytics/metrics/obl-types.js +7 -0
  39. package/dist/utilities/analytics/metrics/obl.d.ts +40 -0
  40. package/dist/utilities/analytics/metrics/obl.js +287 -0
  41. package/dist/{optional → utilities}/analytics/metrics/vocabulary.js +6 -4
  42. package/dist/{optional → utilities}/symbolTools.js +13 -16
  43. package/dist/utilities/translation/translationProcessor.d.ts +119 -0
  44. package/dist/utilities/translation/translationProcessor.js +204 -0
  45. package/dist/validation/gridsetValidator.js +10 -0
  46. package/package.json +1 -1
  47. /package/dist/{optional → utilities}/analytics/history.js +0 -0
  48. /package/dist/{optional → utilities}/analytics/metrics/comparison.d.ts +0 -0
  49. /package/dist/{optional → utilities}/analytics/metrics/index.d.ts +0 -0
  50. /package/dist/{optional → utilities}/analytics/metrics/index.js +0 -0
  51. /package/dist/{optional → utilities}/analytics/metrics/sentence.d.ts +0 -0
  52. /package/dist/{optional → utilities}/analytics/metrics/sentence.js +0 -0
  53. /package/dist/{optional → utilities}/analytics/metrics/types.d.ts +0 -0
  54. /package/dist/{optional → utilities}/analytics/metrics/types.js +0 -0
  55. /package/dist/{optional → utilities}/analytics/metrics/vocabulary.d.ts +0 -0
  56. /package/dist/{optional → utilities}/analytics/reference/index.d.ts +0 -0
  57. /package/dist/{optional → utilities}/analytics/reference/index.js +0 -0
  58. /package/dist/{optional → utilities}/analytics/utils/idGenerator.d.ts +0 -0
  59. /package/dist/{optional → utilities}/analytics/utils/idGenerator.js +0 -0
  60. /package/dist/{optional → utilities}/symbolTools.d.ts +0 -0
@@ -0,0 +1,287 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.OblAnonymizer = exports.OblUtil = void 0;
4
+ const treeStructure_1 = require("../../../core/treeStructure");
5
+ /**
6
+ * .obl (Open Board Logging) Utility
7
+ *
8
+ * Provides parsing and generation support for the .obl format.
9
+ */
10
+ class OblUtil {
11
+ /**
12
+ * Parse an OBL JSON string.
13
+ * Handles the optional /* notice * / at the start of the file.
14
+ */
15
+ static parse(json) {
16
+ // Remove potential comment at the start
17
+ let cleanJson = json.trim();
18
+ if (cleanJson.startsWith('/*')) {
19
+ const endComment = cleanJson.indexOf('*/');
20
+ if (endComment !== -1) {
21
+ cleanJson = cleanJson.substring(endComment + 2).trim();
22
+ }
23
+ }
24
+ return JSON.parse(cleanJson);
25
+ }
26
+ /**
27
+ * Stringify an OBL file object.
28
+ * Optionally adds the recommended notice comment.
29
+ */
30
+ static stringify(obl, includeNotice = true) {
31
+ const json = JSON.stringify(obl, null, 2);
32
+ if (includeNotice) {
33
+ return `/* NOTICE: The following information represents an individual's communication and should be treated respectfully and securely. */\n${json}`;
34
+ }
35
+ return json;
36
+ }
37
+ /**
38
+ * Convert an OBL file to internal HistoryEntry format.
39
+ */
40
+ static toHistoryEntries(obl) {
41
+ const entries = [];
42
+ const source = obl.source || 'OBL';
43
+ // OBL is session-based and event-based.
44
+ // HistoryEntry is content-based with occurrences.
45
+ // We'll group events by content (label/text) to match HistoryEntry structure.
46
+ const contentMap = new Map();
47
+ for (const session of obl.sessions) {
48
+ for (const event of session.events) {
49
+ let content = '';
50
+ const evtAny = event;
51
+ const occurrence = {
52
+ timestamp: new Date(event.timestamp),
53
+ modeling: event.modeling,
54
+ pageId: evtAny.board_id || null,
55
+ latitude: event.geo?.[0] || null,
56
+ longitude: event.geo?.[1] || null,
57
+ type: event.type,
58
+ // Store all other OBL fields in the occurrence
59
+ buttonId: evtAny.button_id || null,
60
+ boardId: evtAny.board_id || null,
61
+ spoken: evtAny.spoken,
62
+ vocalization: evtAny.vocalization,
63
+ imageUrl: evtAny.image_url,
64
+ actions: evtAny.actions,
65
+ };
66
+ if (event.type === 'button') {
67
+ const btn = event;
68
+ content = btn.vocalization || btn.label;
69
+ }
70
+ else if (event.type === 'utterance') {
71
+ const utt = event;
72
+ content = utt.text;
73
+ }
74
+ else if (event.type === 'action') {
75
+ const act = event;
76
+ content = act.action;
77
+ }
78
+ else if (event.type === 'note') {
79
+ const note = event;
80
+ content = note.text;
81
+ }
82
+ else {
83
+ const evtAny = event;
84
+ content = evtAny.label || evtAny.text || evtAny.action || 'unknown';
85
+ }
86
+ const occurrences = contentMap.get(content) || [];
87
+ occurrences.push(occurrence);
88
+ contentMap.set(content, occurrences);
89
+ }
90
+ }
91
+ contentMap.forEach((occurrences, content) => {
92
+ entries.push({
93
+ id: `obl:${content}`,
94
+ source: source,
95
+ content: content,
96
+ occurrences: occurrences.sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime()),
97
+ });
98
+ });
99
+ return entries;
100
+ }
101
+ /**
102
+ * Convert HistoryEntries to an OBL file object.
103
+ */
104
+ static fromHistoryEntries(entries, userId, source) {
105
+ const events = [];
106
+ for (const entry of entries) {
107
+ for (const occ of entry.occurrences) {
108
+ const timestamp = occ.timestamp.toISOString();
109
+ const intent = occ.intent;
110
+ let oblType = occ.type || 'button';
111
+ let actionStr = undefined;
112
+ // Smart mapping based on AACSemanticIntent
113
+ if (intent === treeStructure_1.AACSemanticIntent.CLEAR_TEXT) {
114
+ oblType = 'action';
115
+ actionStr = ':clear';
116
+ }
117
+ else if (intent === treeStructure_1.AACSemanticIntent.GO_HOME) {
118
+ oblType = 'action';
119
+ actionStr = ':home';
120
+ }
121
+ else if (intent === treeStructure_1.AACSemanticIntent.NAVIGATE_TO) {
122
+ oblType = 'action';
123
+ actionStr = ':open_board';
124
+ }
125
+ else if (intent === treeStructure_1.AACSemanticIntent.GO_BACK) {
126
+ oblType = 'action';
127
+ actionStr = ':back';
128
+ }
129
+ else if (intent === treeStructure_1.AACSemanticIntent.DELETE_CHARACTER) {
130
+ oblType = 'action';
131
+ actionStr = ':backspace';
132
+ }
133
+ else if (intent === treeStructure_1.AACSemanticIntent.SPEAK_IMMEDIATE ||
134
+ intent === treeStructure_1.AACSemanticIntent.SPEAK_TEXT) {
135
+ // Speak could be a button or an utterance or an action
136
+ if (oblType !== 'utterance' && oblType !== 'button') {
137
+ oblType = 'action';
138
+ actionStr = ':speak';
139
+ }
140
+ }
141
+ const common = {
142
+ id: Math.random().toString(36).substring(2, 11),
143
+ timestamp,
144
+ modeling: occ.modeling,
145
+ type: oblType,
146
+ };
147
+ if (occ.latitude !== null &&
148
+ occ.latitude !== undefined &&
149
+ occ.longitude !== null &&
150
+ occ.longitude !== undefined) {
151
+ common.geo = [occ.latitude, occ.longitude];
152
+ }
153
+ if (oblType === 'utterance') {
154
+ events.push({
155
+ ...common,
156
+ text: entry.content,
157
+ });
158
+ }
159
+ else if (oblType === 'action') {
160
+ events.push({
161
+ ...common,
162
+ action: actionStr || entry.content,
163
+ destination_board_id: occ.boardId || undefined,
164
+ text: intent === treeStructure_1.AACSemanticIntent.SPEAK_TEXT ? entry.content : undefined,
165
+ });
166
+ }
167
+ else if (oblType === 'note') {
168
+ events.push({
169
+ ...common,
170
+ text: entry.content,
171
+ });
172
+ }
173
+ else {
174
+ // Default to button
175
+ events.push({
176
+ ...common,
177
+ type: 'button',
178
+ label: occ.vocalization ? entry.content : entry.content,
179
+ spoken: occ.spoken ??
180
+ occ.category === treeStructure_1.AACSemanticCategory.COMMUNICATION,
181
+ button_id: occ.buttonId || undefined,
182
+ board_id: occ.boardId || occ.pageId || undefined,
183
+ vocalization: occ.vocalization || undefined,
184
+ image_url: occ.imageUrl || undefined,
185
+ actions: occ.actions || undefined,
186
+ });
187
+ }
188
+ }
189
+ }
190
+ // Sort events by timestamp
191
+ events.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
192
+ const started = events.length > 0 ? events[0].timestamp : new Date().toISOString();
193
+ const ended = events.length > 0 ? events[events.length - 1].timestamp : new Date().toISOString();
194
+ const session = {
195
+ id: 'session-1',
196
+ type: 'log',
197
+ started,
198
+ ended,
199
+ events,
200
+ };
201
+ return {
202
+ format: 'open-board-log-0.1',
203
+ user_id: userId,
204
+ source: source || 'aac-processors',
205
+ sessions: [session],
206
+ };
207
+ }
208
+ }
209
+ exports.OblUtil = OblUtil;
210
+ /**
211
+ * .obl Anonymization Utility
212
+ */
213
+ class OblAnonymizer {
214
+ /**
215
+ * Apply anonymization to an OBL file.
216
+ */
217
+ static anonymize(obl, types) {
218
+ const newObl = JSON.parse(JSON.stringify(obl));
219
+ newObl.anonymized = true;
220
+ for (const session of newObl.sessions) {
221
+ session.anonymizations = session.anonymizations || [];
222
+ if (types.includes('timestamp_shift')) {
223
+ this.applyTimestampShift(session);
224
+ if (!session.anonymizations.includes('timestamp_shift'))
225
+ session.anonymizations.push('timestamp_shift');
226
+ }
227
+ if (types.includes('geolocation_masking')) {
228
+ this.applyGeolocationMasking(session);
229
+ if (!session.anonymizations.includes('geolocation_masking'))
230
+ session.anonymizations.push('geolocation_masking');
231
+ }
232
+ if (types.includes('url_stripping')) {
233
+ this.applyUrlStripping(session);
234
+ if (!session.anonymizations.includes('url_stripping'))
235
+ session.anonymizations.push('url_stripping');
236
+ }
237
+ if (types.includes('name_masking')) {
238
+ this.applyNameMasking(newObl, session);
239
+ if (!session.anonymizations.includes('name_masking'))
240
+ session.anonymizations.push('name_masking');
241
+ }
242
+ }
243
+ return newObl;
244
+ }
245
+ static applyTimestampShift(session) {
246
+ if (session.events.length === 0)
247
+ return;
248
+ const firstEventTime = session.events.length > 0 ? new Date(session.events[0].timestamp).getTime() : Infinity;
249
+ const sessionStartTime = session.started ? new Date(session.started).getTime() : Infinity;
250
+ const firstTimestamp = Math.min(firstEventTime, sessionStartTime);
251
+ if (firstTimestamp === Infinity)
252
+ return;
253
+ const targetStart = new Date('2000-01-01T00:00:00.000Z').getTime();
254
+ const offset = targetStart - firstTimestamp;
255
+ session.started = new Date(new Date(session.started).getTime() + offset).toISOString();
256
+ session.ended = new Date(new Date(session.ended).getTime() + offset).toISOString();
257
+ for (const event of session.events) {
258
+ event.timestamp = new Date(new Date(event.timestamp).getTime() + offset).toISOString();
259
+ }
260
+ }
261
+ static applyGeolocationMasking(session) {
262
+ for (const event of session.events) {
263
+ delete event.geo;
264
+ delete event.location_id;
265
+ }
266
+ }
267
+ static applyUrlStripping(session) {
268
+ for (const event of session.events) {
269
+ if (event.type === 'button') {
270
+ delete event.image_url;
271
+ }
272
+ if (event.type === 'note') {
273
+ delete event.author_url;
274
+ delete event.author_email;
275
+ }
276
+ }
277
+ }
278
+ static applyNameMasking(obl, session) {
279
+ delete obl.user_name;
280
+ for (const event of session.events) {
281
+ if (event.type === 'note') {
282
+ delete event.author_name;
283
+ }
284
+ }
285
+ }
286
+ }
287
+ exports.OblAnonymizer = OblAnonymizer;
@@ -22,12 +22,13 @@ class VocabularyAnalyzer {
22
22
  const lowEffortThreshold = options?.lowEffortThreshold || 2.0;
23
23
  // Load reference data
24
24
  const coreLists = this.referenceLoader.loadCoreLists();
25
- // Create word to effort map
25
+ // Create word to effort map (using lowercase keys for matching)
26
26
  const wordEffortMap = new Map();
27
27
  metrics.buttons.forEach((btn) => {
28
- const existing = wordEffortMap.get(btn.label);
28
+ const word = btn.label.toLowerCase();
29
+ const existing = wordEffortMap.get(word);
29
30
  if (!existing || btn.effort < existing) {
30
- wordEffortMap.set(btn.label, btn.effort);
31
+ wordEffortMap.set(word, btn.effort);
31
32
  }
32
33
  });
33
34
  // Analyze each core list
@@ -79,7 +80,8 @@ class VocabularyAnalyzer {
79
80
  const missing = [];
80
81
  let totalEffort = 0;
81
82
  list.words.forEach((word) => {
82
- const effort = wordEffortMap.get(word);
83
+ const lowerWord = word.toLowerCase();
84
+ const effort = wordEffortMap.get(lowerWord);
83
85
  if (effort !== undefined) {
84
86
  covered.push(word);
85
87
  totalEffort += effort;
@@ -7,7 +7,7 @@ exports.TouchChatSymbolResolver = exports.TouchChatSymbolExtractor = exports.Gri
7
7
  exports.resolveSymbol = resolveSymbol;
8
8
  const path_1 = __importDefault(require("path"));
9
9
  const fs_1 = __importDefault(require("fs"));
10
- const password_1 = require("../processors/gridset/password");
10
+ const symbols_1 = require("../processors/gridset/symbols");
11
11
  // --- Base Classes ---
12
12
  class SymbolExtractor {
13
13
  }
@@ -61,10 +61,13 @@ exports.SnapSymbolResolver = SnapSymbolResolver;
61
61
  let AdmZip = null;
62
62
  let XMLParser = null;
63
63
  try {
64
+ // Dynamic requires for optional dependencies
64
65
  // eslint-disable-next-line @typescript-eslint/no-var-requires
65
- AdmZip = require('adm-zip');
66
+ const admZipModule = require('adm-zip');
66
67
  // eslint-disable-next-line @typescript-eslint/no-var-requires
67
- XMLParser = require('fast-xml-parser').XMLParser;
68
+ const fxpModule = require('fast-xml-parser');
69
+ AdmZip = admZipModule;
70
+ XMLParser = fxpModule.XMLParser;
68
71
  }
69
72
  catch {
70
73
  AdmZip = null;
@@ -74,19 +77,13 @@ class Grid3SymbolExtractor extends SymbolExtractor {
74
77
  getSymbolReferences(filePath) {
75
78
  if (!AdmZip || !XMLParser)
76
79
  throw new Error('adm-zip or fast-xml-parser not installed');
77
- const zip = new AdmZip(filePath);
78
- const parser = new XMLParser();
79
- const refs = new Set();
80
- const entries = (0, password_1.getZipEntriesWithPassword)(zip, (0, password_1.resolveGridsetPasswordFromEnv)());
81
- entries.forEach((entry) => {
82
- if (entry.entryName.endsWith('.gridset') || entry.entryName.endsWith('.gridsetx')) {
83
- const xmlBuffer = entry.getData();
84
- // Parse to validate XML structure (future: extract refs)
85
- parser.parse(xmlBuffer.toString('utf8'));
86
- // TODO: Extract symbol references from Grid 3 XML structure when needed
87
- }
88
- });
89
- return Array.from(refs);
80
+ // Import GridsetProcessor dynamically to avoid circular dependencies
81
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
82
+ const { GridsetProcessor } = require('../processors/gridsetProcessor');
83
+ const proc = new GridsetProcessor();
84
+ const tree = proc.loadIntoTree(filePath);
85
+ // Use the existing extractSymbolReferences function from gridset/symbols.ts
86
+ return (0, symbols_1.extractSymbolReferences)(tree);
90
87
  }
91
88
  }
92
89
  exports.Grid3SymbolExtractor = Grid3SymbolExtractor;
@@ -0,0 +1,119 @@
1
+ /**
2
+ * LLM-Based Translation with Symbol Preservation
3
+ *
4
+ * This module provides utilities for translating AAC files while preserving
5
+ * symbol-to-word associations across different formats (gridset, OBF, Snap, etc.).
6
+ *
7
+ * The key insight: Different AAC formats have different internal structures,
8
+ * but they all share common concepts:
9
+ * - Buttons with labels and messages
10
+ * - Symbols attached to specific words
11
+ * - Need to preserve symbol positions during translation
12
+ *
13
+ * This module provides a format-agnostic way to:
14
+ * 1. Extract symbol information for LLM processing
15
+ * 2. Apply LLM translations with preserved symbols
16
+ *
17
+ * Usage:
18
+ * 1. Processor extracts buttons and calls extractSymbolsForLLM()
19
+ * 2. LLM translates and returns aligned symbols
20
+ * 3. Processor calls processLLMTranslations() to apply results
21
+ */
22
+ /**
23
+ * Represents a symbol attached to text in a format-agnostic way
24
+ */
25
+ export interface SymbolInfo {
26
+ text: string;
27
+ image?: string;
28
+ symbolLibrary?: string;
29
+ symbolPath?: string;
30
+ }
31
+ /**
32
+ * Button data extracted for translation (format-agnostic)
33
+ */
34
+ export interface ButtonForTranslation {
35
+ buttonId: string;
36
+ pageId?: string;
37
+ pageName?: string;
38
+ label: string;
39
+ message: string;
40
+ textToTranslate: string;
41
+ symbols: SymbolInfo[];
42
+ }
43
+ /**
44
+ * LLM translation result with symbol mappings
45
+ */
46
+ export interface LLMLTranslationResult {
47
+ buttonId: string;
48
+ translatedLabel?: string;
49
+ translatedMessage?: string;
50
+ symbols?: Array<{
51
+ text: string;
52
+ image?: string;
53
+ }>;
54
+ }
55
+ /**
56
+ * Extract symbols from a button for LLM-based translation.
57
+ *
58
+ * This is a format-agnostic helper that processors can use to normalize
59
+ * their button data into a common format for LLM processing.
60
+ *
61
+ * @param buttonId - Unique identifier for the button
62
+ * @param label - Button label text
63
+ * @param message - Button message/speak text
64
+ * @param symbols - Array of symbols from the button
65
+ * @param context - Optional page context
66
+ * @returns Normalized button data for translation
67
+ */
68
+ export declare function normalizeButtonForTranslation(buttonId: string, label: string, message: string, symbols: SymbolInfo[], context?: {
69
+ pageId?: string;
70
+ pageName?: string;
71
+ }): ButtonForTranslation;
72
+ /**
73
+ * Extract symbols from various button formats.
74
+ *
75
+ * This helper handles different ways symbols might be stored in button data:
76
+ * - semanticAction.richText.symbols (gridset format)
77
+ * - symbolLibrary + symbolPath fields
78
+ * - image field with [library]path format
79
+ *
80
+ * @param button - Button object from any AAC format
81
+ * @returns Array of symbol info, or undefined if no symbols
82
+ */
83
+ export declare function extractSymbolsFromButton(button: any): SymbolInfo[] | undefined;
84
+ /**
85
+ * Extract all buttons from a file for LLM translation.
86
+ *
87
+ * This is a convenience method that processors can use to extract all
88
+ * translatable buttons with their symbols in a format-agnostic way.
89
+ *
90
+ * @param buttons - Array of button objects from any AAC format
91
+ * @param contextFn - Optional function to provide page context for each button
92
+ * @returns Array of normalized button data ready for LLM translation
93
+ */
94
+ export declare function extractAllButtonsForTranslation(buttons: any[], contextFn?: (button: any) => {
95
+ pageId?: string;
96
+ pageName?: string;
97
+ }): ButtonForTranslation[];
98
+ /**
99
+ * Create a prompt for LLM translation with symbol preservation.
100
+ *
101
+ * This generates a structured prompt that instructs the LLM to translate
102
+ * while preserving symbol-to-word associations.
103
+ *
104
+ * @param buttons - Buttons to translate
105
+ * @param targetLanguage - Target language for translation
106
+ * @returns Prompt string for LLM
107
+ */
108
+ export declare function createTranslationPrompt(buttons: ButtonForTranslation[], targetLanguage: string): string;
109
+ /**
110
+ * Validate LLM translation results before applying.
111
+ *
112
+ * @param translations - LLM translation results
113
+ * @param originalButtonIds - Expected button IDs (optional, for validation)
114
+ * @param options - Validation options
115
+ * @throws Error if validation fails
116
+ */
117
+ export declare function validateTranslationResults(translations: LLMLTranslationResult[], originalButtonIds?: string[], options?: {
118
+ allowPartial?: boolean;
119
+ }): void;
@@ -0,0 +1,204 @@
1
+ "use strict";
2
+ /**
3
+ * LLM-Based Translation with Symbol Preservation
4
+ *
5
+ * This module provides utilities for translating AAC files while preserving
6
+ * symbol-to-word associations across different formats (gridset, OBF, Snap, etc.).
7
+ *
8
+ * The key insight: Different AAC formats have different internal structures,
9
+ * but they all share common concepts:
10
+ * - Buttons with labels and messages
11
+ * - Symbols attached to specific words
12
+ * - Need to preserve symbol positions during translation
13
+ *
14
+ * This module provides a format-agnostic way to:
15
+ * 1. Extract symbol information for LLM processing
16
+ * 2. Apply LLM translations with preserved symbols
17
+ *
18
+ * Usage:
19
+ * 1. Processor extracts buttons and calls extractSymbolsForLLM()
20
+ * 2. LLM translates and returns aligned symbols
21
+ * 3. Processor calls processLLMTranslations() to apply results
22
+ */
23
+ Object.defineProperty(exports, "__esModule", { value: true });
24
+ exports.normalizeButtonForTranslation = normalizeButtonForTranslation;
25
+ exports.extractSymbolsFromButton = extractSymbolsFromButton;
26
+ exports.extractAllButtonsForTranslation = extractAllButtonsForTranslation;
27
+ exports.createTranslationPrompt = createTranslationPrompt;
28
+ exports.validateTranslationResults = validateTranslationResults;
29
+ /**
30
+ * Extract symbols from a button for LLM-based translation.
31
+ *
32
+ * This is a format-agnostic helper that processors can use to normalize
33
+ * their button data into a common format for LLM processing.
34
+ *
35
+ * @param buttonId - Unique identifier for the button
36
+ * @param label - Button label text
37
+ * @param message - Button message/speak text
38
+ * @param symbols - Array of symbols from the button
39
+ * @param context - Optional page context
40
+ * @returns Normalized button data for translation
41
+ */
42
+ function normalizeButtonForTranslation(buttonId, label, message, symbols, context) {
43
+ return {
44
+ buttonId,
45
+ label,
46
+ message,
47
+ textToTranslate: message || label, // Translate message if present, otherwise label
48
+ symbols,
49
+ ...context,
50
+ };
51
+ }
52
+ /**
53
+ * Extract symbols from various button formats.
54
+ *
55
+ * This helper handles different ways symbols might be stored in button data:
56
+ * - semanticAction.richText.symbols (gridset format)
57
+ * - symbolLibrary + symbolPath fields
58
+ * - image field with [library]path format
59
+ *
60
+ * @param button - Button object from any AAC format
61
+ * @returns Array of symbol info, or undefined if no symbols
62
+ */
63
+ function extractSymbolsFromButton(button) {
64
+ const symbols = [];
65
+ // Method 1: Check for semanticAction.richText.symbols (gridset format)
66
+ if (button.semanticAction?.richText?.symbols) {
67
+ const richTextSymbols = button.semanticAction.richText.symbols;
68
+ if (Array.isArray(richTextSymbols) && richTextSymbols.length > 0) {
69
+ symbols.push(...richTextSymbols);
70
+ return symbols;
71
+ }
72
+ }
73
+ // Determine the text to attach symbol to
74
+ const text = button.label || button.message || '';
75
+ if (!text) {
76
+ return undefined;
77
+ }
78
+ // Method 2: Check for symbolLibrary + symbolPath fields
79
+ if (button.symbolLibrary && button.symbolPath) {
80
+ symbols.push({
81
+ text,
82
+ image: `[${button.symbolLibrary}]${button.symbolPath}`,
83
+ symbolLibrary: button.symbolLibrary,
84
+ symbolPath: button.symbolPath,
85
+ });
86
+ return symbols;
87
+ }
88
+ // Method 3: Check if image field contains a symbol reference
89
+ if (button.image && typeof button.image === 'string' && button.image.startsWith('[')) {
90
+ symbols.push({
91
+ text,
92
+ image: button.image,
93
+ });
94
+ return symbols;
95
+ }
96
+ // No symbols found
97
+ return undefined;
98
+ }
99
+ /**
100
+ * Extract all buttons from a file for LLM translation.
101
+ *
102
+ * This is a convenience method that processors can use to extract all
103
+ * translatable buttons with their symbols in a format-agnostic way.
104
+ *
105
+ * @param buttons - Array of button objects from any AAC format
106
+ * @param contextFn - Optional function to provide page context for each button
107
+ * @returns Array of normalized button data ready for LLM translation
108
+ */
109
+ function extractAllButtonsForTranslation(buttons, contextFn) {
110
+ const results = [];
111
+ for (const button of buttons) {
112
+ if (!button)
113
+ continue;
114
+ const buttonId = (button.id || button.buttonId || `button_${results.length}`);
115
+ const label = (button.label || '');
116
+ const message = (button.message || '');
117
+ const symbols = extractSymbolsFromButton(button);
118
+ // Only include buttons that have text to translate
119
+ if (!label && !message)
120
+ continue;
121
+ const context = contextFn ? contextFn(button) : undefined;
122
+ results.push(normalizeButtonForTranslation(buttonId, label, message, symbols || [], context));
123
+ }
124
+ return results;
125
+ }
126
+ /**
127
+ * Create a prompt for LLM translation with symbol preservation.
128
+ *
129
+ * This generates a structured prompt that instructs the LLM to translate
130
+ * while preserving symbol-to-word associations.
131
+ *
132
+ * @param buttons - Buttons to translate
133
+ * @param targetLanguage - Target language for translation
134
+ * @returns Prompt string for LLM
135
+ */
136
+ function createTranslationPrompt(buttons, targetLanguage) {
137
+ const buttonsData = JSON.stringify(buttons, null, 2);
138
+ return `You are a translation assistant for AAC (Augmentative and Alternative Communication) systems.
139
+
140
+ Your task is to translate the following buttons to ${targetLanguage} while preserving symbol associations.
141
+
142
+ Each button has:
143
+ - label: The text shown on the button
144
+ - message: The text spoken when the button is activated
145
+ - textToTranslate: The actual text to translate (usually the message)
146
+ - symbols: Visual symbols attached to specific words
147
+
148
+ IMPORTANT: After translation, you MUST reattach symbols to the correct translated words based on MEANING, not position.
149
+
150
+ Example:
151
+ - Original: "I want apple" with apple symbol on "apple"
152
+ - Spanish: "Yo quiero manzana" with apple symbol on "manzana" (NOT "Yo" or "quiero")
153
+ - French: "Je veux une pomme" with apple symbol on "pomme"
154
+
155
+ The symbols array should contain the translated word that each symbol should be attached to.
156
+
157
+ Buttons to translate:
158
+ ${buttonsData}
159
+
160
+ Return ONLY a JSON array with this exact structure:
161
+ [
162
+ {
163
+ "buttonId": "...",
164
+ "translatedLabel": "...",
165
+ "translatedMessage": "...",
166
+ "symbols": [
167
+ {"text": "translated_word", "image": "[library]path"}
168
+ ]
169
+ }
170
+ ]
171
+
172
+ Ensure all symbol image references are preserved exactly as provided.`;
173
+ }
174
+ /**
175
+ * Validate LLM translation results before applying.
176
+ *
177
+ * @param translations - LLM translation results
178
+ * @param originalButtonIds - Expected button IDs (optional, for validation)
179
+ * @param options - Validation options
180
+ * @throws Error if validation fails
181
+ */
182
+ function validateTranslationResults(translations, originalButtonIds, options) {
183
+ if (!Array.isArray(translations)) {
184
+ throw new Error('Translation results must be an array');
185
+ }
186
+ const translatedIds = new Set(translations.map((t) => t.buttonId));
187
+ // Check that all original buttons have translations (unless partial is allowed)
188
+ if (originalButtonIds && !options?.allowPartial) {
189
+ for (const id of originalButtonIds) {
190
+ if (!translatedIds.has(id)) {
191
+ throw new Error(`Missing translation for button: ${id}`);
192
+ }
193
+ }
194
+ }
195
+ // Check each translation has required fields
196
+ for (const trans of translations) {
197
+ if (!trans.buttonId) {
198
+ throw new Error('Translation missing buttonId');
199
+ }
200
+ if (!trans.translatedMessage && !trans.translatedLabel) {
201
+ throw new Error(`Translation for ${trans.buttonId} has no translated text`);
202
+ }
203
+ }
204
+ }