@willwade/aac-processors 0.0.12 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -44
- package/dist/core/baseProcessor.d.ts +41 -0
- package/dist/core/baseProcessor.js +41 -0
- package/dist/core/treeStructure.d.ts +35 -2
- package/dist/core/treeStructure.js +18 -3
- package/dist/index.d.ts +2 -2
- package/dist/index.js +2 -2
- package/dist/processors/astericsGridProcessor.d.ts +15 -0
- package/dist/processors/astericsGridProcessor.js +17 -0
- package/dist/processors/gridset/helpers.d.ts +4 -1
- package/dist/processors/gridset/helpers.js +4 -0
- package/dist/processors/gridset/pluginTypes.js +51 -50
- package/dist/processors/gridset/symbolAlignment.d.ts +125 -0
- package/dist/processors/gridset/symbolAlignment.js +283 -0
- package/dist/processors/gridset/symbolExtractor.js +3 -2
- package/dist/processors/gridset/symbolSearch.js +9 -7
- package/dist/processors/gridsetProcessor.d.ts +26 -0
- package/dist/processors/gridsetProcessor.js +178 -25
- package/dist/processors/obfProcessor.d.ts +26 -0
- package/dist/processors/obfProcessor.js +94 -1
- package/dist/processors/snap/helpers.d.ts +5 -1
- package/dist/processors/snap/helpers.js +5 -0
- package/dist/processors/snapProcessor.d.ts +2 -0
- package/dist/processors/snapProcessor.js +156 -5
- package/dist/processors/touchchatProcessor.d.ts +26 -0
- package/dist/processors/touchchatProcessor.js +106 -6
- package/dist/types/aac.d.ts +63 -0
- package/dist/types/aac.js +33 -0
- package/dist/{optional → utilities}/analytics/history.d.ts +12 -1
- package/dist/{optional → utilities}/analytics/index.d.ts +2 -0
- package/dist/{optional → utilities}/analytics/index.js +6 -1
- package/dist/{optional → utilities}/analytics/metrics/comparison.js +8 -4
- package/dist/{optional → utilities}/analytics/metrics/core.d.ts +9 -0
- package/dist/{optional → utilities}/analytics/metrics/core.js +190 -37
- package/dist/{optional → utilities}/analytics/metrics/effort.d.ts +10 -0
- package/dist/{optional → utilities}/analytics/metrics/effort.js +13 -0
- package/dist/utilities/analytics/metrics/obl-types.d.ts +93 -0
- package/dist/utilities/analytics/metrics/obl-types.js +7 -0
- package/dist/utilities/analytics/metrics/obl.d.ts +40 -0
- package/dist/utilities/analytics/metrics/obl.js +287 -0
- package/dist/{optional → utilities}/analytics/metrics/vocabulary.js +6 -4
- package/dist/{optional → utilities}/symbolTools.js +13 -16
- package/dist/utilities/translation/translationProcessor.d.ts +119 -0
- package/dist/utilities/translation/translationProcessor.js +204 -0
- package/dist/validation/gridsetValidator.js +10 -0
- package/package.json +1 -1
- /package/dist/{optional → utilities}/analytics/history.js +0 -0
- /package/dist/{optional → utilities}/analytics/metrics/comparison.d.ts +0 -0
- /package/dist/{optional → utilities}/analytics/metrics/index.d.ts +0 -0
- /package/dist/{optional → utilities}/analytics/metrics/index.js +0 -0
- /package/dist/{optional → utilities}/analytics/metrics/sentence.d.ts +0 -0
- /package/dist/{optional → utilities}/analytics/metrics/sentence.js +0 -0
- /package/dist/{optional → utilities}/analytics/metrics/types.d.ts +0 -0
- /package/dist/{optional → utilities}/analytics/metrics/types.js +0 -0
- /package/dist/{optional → utilities}/analytics/metrics/vocabulary.d.ts +0 -0
- /package/dist/{optional → utilities}/analytics/reference/index.d.ts +0 -0
- /package/dist/{optional → utilities}/analytics/reference/index.js +0 -0
- /package/dist/{optional → utilities}/analytics/utils/idGenerator.d.ts +0 -0
- /package/dist/{optional → utilities}/analytics/utils/idGenerator.js +0 -0
- /package/dist/{optional → utilities}/symbolTools.d.ts +0 -0
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.OblAnonymizer = exports.OblUtil = void 0;
|
|
4
|
+
const treeStructure_1 = require("../../../core/treeStructure");
|
|
5
|
+
/**
|
|
6
|
+
* .obl (Open Board Logging) Utility
|
|
7
|
+
*
|
|
8
|
+
* Provides parsing and generation support for the .obl format.
|
|
9
|
+
*/
|
|
10
|
+
class OblUtil {
|
|
11
|
+
/**
|
|
12
|
+
* Parse an OBL JSON string.
|
|
13
|
+
* Handles the optional /* notice * / at the start of the file.
|
|
14
|
+
*/
|
|
15
|
+
static parse(json) {
|
|
16
|
+
// Remove potential comment at the start
|
|
17
|
+
let cleanJson = json.trim();
|
|
18
|
+
if (cleanJson.startsWith('/*')) {
|
|
19
|
+
const endComment = cleanJson.indexOf('*/');
|
|
20
|
+
if (endComment !== -1) {
|
|
21
|
+
cleanJson = cleanJson.substring(endComment + 2).trim();
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return JSON.parse(cleanJson);
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Stringify an OBL file object.
|
|
28
|
+
* Optionally adds the recommended notice comment.
|
|
29
|
+
*/
|
|
30
|
+
static stringify(obl, includeNotice = true) {
|
|
31
|
+
const json = JSON.stringify(obl, null, 2);
|
|
32
|
+
if (includeNotice) {
|
|
33
|
+
return `/* NOTICE: The following information represents an individual's communication and should be treated respectfully and securely. */\n${json}`;
|
|
34
|
+
}
|
|
35
|
+
return json;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Convert an OBL file to internal HistoryEntry format.
|
|
39
|
+
*/
|
|
40
|
+
static toHistoryEntries(obl) {
|
|
41
|
+
const entries = [];
|
|
42
|
+
const source = obl.source || 'OBL';
|
|
43
|
+
// OBL is session-based and event-based.
|
|
44
|
+
// HistoryEntry is content-based with occurrences.
|
|
45
|
+
// We'll group events by content (label/text) to match HistoryEntry structure.
|
|
46
|
+
const contentMap = new Map();
|
|
47
|
+
for (const session of obl.sessions) {
|
|
48
|
+
for (const event of session.events) {
|
|
49
|
+
let content = '';
|
|
50
|
+
const evtAny = event;
|
|
51
|
+
const occurrence = {
|
|
52
|
+
timestamp: new Date(event.timestamp),
|
|
53
|
+
modeling: event.modeling,
|
|
54
|
+
pageId: evtAny.board_id || null,
|
|
55
|
+
latitude: event.geo?.[0] || null,
|
|
56
|
+
longitude: event.geo?.[1] || null,
|
|
57
|
+
type: event.type,
|
|
58
|
+
// Store all other OBL fields in the occurrence
|
|
59
|
+
buttonId: evtAny.button_id || null,
|
|
60
|
+
boardId: evtAny.board_id || null,
|
|
61
|
+
spoken: evtAny.spoken,
|
|
62
|
+
vocalization: evtAny.vocalization,
|
|
63
|
+
imageUrl: evtAny.image_url,
|
|
64
|
+
actions: evtAny.actions,
|
|
65
|
+
};
|
|
66
|
+
if (event.type === 'button') {
|
|
67
|
+
const btn = event;
|
|
68
|
+
content = btn.vocalization || btn.label;
|
|
69
|
+
}
|
|
70
|
+
else if (event.type === 'utterance') {
|
|
71
|
+
const utt = event;
|
|
72
|
+
content = utt.text;
|
|
73
|
+
}
|
|
74
|
+
else if (event.type === 'action') {
|
|
75
|
+
const act = event;
|
|
76
|
+
content = act.action;
|
|
77
|
+
}
|
|
78
|
+
else if (event.type === 'note') {
|
|
79
|
+
const note = event;
|
|
80
|
+
content = note.text;
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
const evtAny = event;
|
|
84
|
+
content = evtAny.label || evtAny.text || evtAny.action || 'unknown';
|
|
85
|
+
}
|
|
86
|
+
const occurrences = contentMap.get(content) || [];
|
|
87
|
+
occurrences.push(occurrence);
|
|
88
|
+
contentMap.set(content, occurrences);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
contentMap.forEach((occurrences, content) => {
|
|
92
|
+
entries.push({
|
|
93
|
+
id: `obl:${content}`,
|
|
94
|
+
source: source,
|
|
95
|
+
content: content,
|
|
96
|
+
occurrences: occurrences.sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime()),
|
|
97
|
+
});
|
|
98
|
+
});
|
|
99
|
+
return entries;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Convert HistoryEntries to an OBL file object.
|
|
103
|
+
*/
|
|
104
|
+
static fromHistoryEntries(entries, userId, source) {
|
|
105
|
+
const events = [];
|
|
106
|
+
for (const entry of entries) {
|
|
107
|
+
for (const occ of entry.occurrences) {
|
|
108
|
+
const timestamp = occ.timestamp.toISOString();
|
|
109
|
+
const intent = occ.intent;
|
|
110
|
+
let oblType = occ.type || 'button';
|
|
111
|
+
let actionStr = undefined;
|
|
112
|
+
// Smart mapping based on AACSemanticIntent
|
|
113
|
+
if (intent === treeStructure_1.AACSemanticIntent.CLEAR_TEXT) {
|
|
114
|
+
oblType = 'action';
|
|
115
|
+
actionStr = ':clear';
|
|
116
|
+
}
|
|
117
|
+
else if (intent === treeStructure_1.AACSemanticIntent.GO_HOME) {
|
|
118
|
+
oblType = 'action';
|
|
119
|
+
actionStr = ':home';
|
|
120
|
+
}
|
|
121
|
+
else if (intent === treeStructure_1.AACSemanticIntent.NAVIGATE_TO) {
|
|
122
|
+
oblType = 'action';
|
|
123
|
+
actionStr = ':open_board';
|
|
124
|
+
}
|
|
125
|
+
else if (intent === treeStructure_1.AACSemanticIntent.GO_BACK) {
|
|
126
|
+
oblType = 'action';
|
|
127
|
+
actionStr = ':back';
|
|
128
|
+
}
|
|
129
|
+
else if (intent === treeStructure_1.AACSemanticIntent.DELETE_CHARACTER) {
|
|
130
|
+
oblType = 'action';
|
|
131
|
+
actionStr = ':backspace';
|
|
132
|
+
}
|
|
133
|
+
else if (intent === treeStructure_1.AACSemanticIntent.SPEAK_IMMEDIATE ||
|
|
134
|
+
intent === treeStructure_1.AACSemanticIntent.SPEAK_TEXT) {
|
|
135
|
+
// Speak could be a button or an utterance or an action
|
|
136
|
+
if (oblType !== 'utterance' && oblType !== 'button') {
|
|
137
|
+
oblType = 'action';
|
|
138
|
+
actionStr = ':speak';
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
const common = {
|
|
142
|
+
id: Math.random().toString(36).substring(2, 11),
|
|
143
|
+
timestamp,
|
|
144
|
+
modeling: occ.modeling,
|
|
145
|
+
type: oblType,
|
|
146
|
+
};
|
|
147
|
+
if (occ.latitude !== null &&
|
|
148
|
+
occ.latitude !== undefined &&
|
|
149
|
+
occ.longitude !== null &&
|
|
150
|
+
occ.longitude !== undefined) {
|
|
151
|
+
common.geo = [occ.latitude, occ.longitude];
|
|
152
|
+
}
|
|
153
|
+
if (oblType === 'utterance') {
|
|
154
|
+
events.push({
|
|
155
|
+
...common,
|
|
156
|
+
text: entry.content,
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
else if (oblType === 'action') {
|
|
160
|
+
events.push({
|
|
161
|
+
...common,
|
|
162
|
+
action: actionStr || entry.content,
|
|
163
|
+
destination_board_id: occ.boardId || undefined,
|
|
164
|
+
text: intent === treeStructure_1.AACSemanticIntent.SPEAK_TEXT ? entry.content : undefined,
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
else if (oblType === 'note') {
|
|
168
|
+
events.push({
|
|
169
|
+
...common,
|
|
170
|
+
text: entry.content,
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
else {
|
|
174
|
+
// Default to button
|
|
175
|
+
events.push({
|
|
176
|
+
...common,
|
|
177
|
+
type: 'button',
|
|
178
|
+
label: occ.vocalization ? entry.content : entry.content,
|
|
179
|
+
spoken: occ.spoken ??
|
|
180
|
+
occ.category === treeStructure_1.AACSemanticCategory.COMMUNICATION,
|
|
181
|
+
button_id: occ.buttonId || undefined,
|
|
182
|
+
board_id: occ.boardId || occ.pageId || undefined,
|
|
183
|
+
vocalization: occ.vocalization || undefined,
|
|
184
|
+
image_url: occ.imageUrl || undefined,
|
|
185
|
+
actions: occ.actions || undefined,
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
// Sort events by timestamp
|
|
191
|
+
events.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
|
|
192
|
+
const started = events.length > 0 ? events[0].timestamp : new Date().toISOString();
|
|
193
|
+
const ended = events.length > 0 ? events[events.length - 1].timestamp : new Date().toISOString();
|
|
194
|
+
const session = {
|
|
195
|
+
id: 'session-1',
|
|
196
|
+
type: 'log',
|
|
197
|
+
started,
|
|
198
|
+
ended,
|
|
199
|
+
events,
|
|
200
|
+
};
|
|
201
|
+
return {
|
|
202
|
+
format: 'open-board-log-0.1',
|
|
203
|
+
user_id: userId,
|
|
204
|
+
source: source || 'aac-processors',
|
|
205
|
+
sessions: [session],
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
exports.OblUtil = OblUtil;
|
|
210
|
+
/**
|
|
211
|
+
* .obl Anonymization Utility
|
|
212
|
+
*/
|
|
213
|
+
class OblAnonymizer {
|
|
214
|
+
/**
|
|
215
|
+
* Apply anonymization to an OBL file.
|
|
216
|
+
*/
|
|
217
|
+
static anonymize(obl, types) {
|
|
218
|
+
const newObl = JSON.parse(JSON.stringify(obl));
|
|
219
|
+
newObl.anonymized = true;
|
|
220
|
+
for (const session of newObl.sessions) {
|
|
221
|
+
session.anonymizations = session.anonymizations || [];
|
|
222
|
+
if (types.includes('timestamp_shift')) {
|
|
223
|
+
this.applyTimestampShift(session);
|
|
224
|
+
if (!session.anonymizations.includes('timestamp_shift'))
|
|
225
|
+
session.anonymizations.push('timestamp_shift');
|
|
226
|
+
}
|
|
227
|
+
if (types.includes('geolocation_masking')) {
|
|
228
|
+
this.applyGeolocationMasking(session);
|
|
229
|
+
if (!session.anonymizations.includes('geolocation_masking'))
|
|
230
|
+
session.anonymizations.push('geolocation_masking');
|
|
231
|
+
}
|
|
232
|
+
if (types.includes('url_stripping')) {
|
|
233
|
+
this.applyUrlStripping(session);
|
|
234
|
+
if (!session.anonymizations.includes('url_stripping'))
|
|
235
|
+
session.anonymizations.push('url_stripping');
|
|
236
|
+
}
|
|
237
|
+
if (types.includes('name_masking')) {
|
|
238
|
+
this.applyNameMasking(newObl, session);
|
|
239
|
+
if (!session.anonymizations.includes('name_masking'))
|
|
240
|
+
session.anonymizations.push('name_masking');
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
return newObl;
|
|
244
|
+
}
|
|
245
|
+
static applyTimestampShift(session) {
|
|
246
|
+
if (session.events.length === 0)
|
|
247
|
+
return;
|
|
248
|
+
const firstEventTime = session.events.length > 0 ? new Date(session.events[0].timestamp).getTime() : Infinity;
|
|
249
|
+
const sessionStartTime = session.started ? new Date(session.started).getTime() : Infinity;
|
|
250
|
+
const firstTimestamp = Math.min(firstEventTime, sessionStartTime);
|
|
251
|
+
if (firstTimestamp === Infinity)
|
|
252
|
+
return;
|
|
253
|
+
const targetStart = new Date('2000-01-01T00:00:00.000Z').getTime();
|
|
254
|
+
const offset = targetStart - firstTimestamp;
|
|
255
|
+
session.started = new Date(new Date(session.started).getTime() + offset).toISOString();
|
|
256
|
+
session.ended = new Date(new Date(session.ended).getTime() + offset).toISOString();
|
|
257
|
+
for (const event of session.events) {
|
|
258
|
+
event.timestamp = new Date(new Date(event.timestamp).getTime() + offset).toISOString();
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
static applyGeolocationMasking(session) {
|
|
262
|
+
for (const event of session.events) {
|
|
263
|
+
delete event.geo;
|
|
264
|
+
delete event.location_id;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
static applyUrlStripping(session) {
|
|
268
|
+
for (const event of session.events) {
|
|
269
|
+
if (event.type === 'button') {
|
|
270
|
+
delete event.image_url;
|
|
271
|
+
}
|
|
272
|
+
if (event.type === 'note') {
|
|
273
|
+
delete event.author_url;
|
|
274
|
+
delete event.author_email;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
static applyNameMasking(obl, session) {
|
|
279
|
+
delete obl.user_name;
|
|
280
|
+
for (const event of session.events) {
|
|
281
|
+
if (event.type === 'note') {
|
|
282
|
+
delete event.author_name;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
exports.OblAnonymizer = OblAnonymizer;
|
|
@@ -22,12 +22,13 @@ class VocabularyAnalyzer {
|
|
|
22
22
|
const lowEffortThreshold = options?.lowEffortThreshold || 2.0;
|
|
23
23
|
// Load reference data
|
|
24
24
|
const coreLists = this.referenceLoader.loadCoreLists();
|
|
25
|
-
// Create word to effort map
|
|
25
|
+
// Create word to effort map (using lowercase keys for matching)
|
|
26
26
|
const wordEffortMap = new Map();
|
|
27
27
|
metrics.buttons.forEach((btn) => {
|
|
28
|
-
const
|
|
28
|
+
const word = btn.label.toLowerCase();
|
|
29
|
+
const existing = wordEffortMap.get(word);
|
|
29
30
|
if (!existing || btn.effort < existing) {
|
|
30
|
-
wordEffortMap.set(
|
|
31
|
+
wordEffortMap.set(word, btn.effort);
|
|
31
32
|
}
|
|
32
33
|
});
|
|
33
34
|
// Analyze each core list
|
|
@@ -79,7 +80,8 @@ class VocabularyAnalyzer {
|
|
|
79
80
|
const missing = [];
|
|
80
81
|
let totalEffort = 0;
|
|
81
82
|
list.words.forEach((word) => {
|
|
82
|
-
const
|
|
83
|
+
const lowerWord = word.toLowerCase();
|
|
84
|
+
const effort = wordEffortMap.get(lowerWord);
|
|
83
85
|
if (effort !== undefined) {
|
|
84
86
|
covered.push(word);
|
|
85
87
|
totalEffort += effort;
|
|
@@ -7,7 +7,7 @@ exports.TouchChatSymbolResolver = exports.TouchChatSymbolExtractor = exports.Gri
|
|
|
7
7
|
exports.resolveSymbol = resolveSymbol;
|
|
8
8
|
const path_1 = __importDefault(require("path"));
|
|
9
9
|
const fs_1 = __importDefault(require("fs"));
|
|
10
|
-
const
|
|
10
|
+
const symbols_1 = require("../processors/gridset/symbols");
|
|
11
11
|
// --- Base Classes ---
|
|
12
12
|
class SymbolExtractor {
|
|
13
13
|
}
|
|
@@ -61,10 +61,13 @@ exports.SnapSymbolResolver = SnapSymbolResolver;
|
|
|
61
61
|
let AdmZip = null;
|
|
62
62
|
let XMLParser = null;
|
|
63
63
|
try {
|
|
64
|
+
// Dynamic requires for optional dependencies
|
|
64
65
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
65
|
-
|
|
66
|
+
const admZipModule = require('adm-zip');
|
|
66
67
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
67
|
-
|
|
68
|
+
const fxpModule = require('fast-xml-parser');
|
|
69
|
+
AdmZip = admZipModule;
|
|
70
|
+
XMLParser = fxpModule.XMLParser;
|
|
68
71
|
}
|
|
69
72
|
catch {
|
|
70
73
|
AdmZip = null;
|
|
@@ -74,19 +77,13 @@ class Grid3SymbolExtractor extends SymbolExtractor {
|
|
|
74
77
|
getSymbolReferences(filePath) {
|
|
75
78
|
if (!AdmZip || !XMLParser)
|
|
76
79
|
throw new Error('adm-zip or fast-xml-parser not installed');
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
const
|
|
80
|
-
const
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
// Parse to validate XML structure (future: extract refs)
|
|
85
|
-
parser.parse(xmlBuffer.toString('utf8'));
|
|
86
|
-
// TODO: Extract symbol references from Grid 3 XML structure when needed
|
|
87
|
-
}
|
|
88
|
-
});
|
|
89
|
-
return Array.from(refs);
|
|
80
|
+
// Import GridsetProcessor dynamically to avoid circular dependencies
|
|
81
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
82
|
+
const { GridsetProcessor } = require('../processors/gridsetProcessor');
|
|
83
|
+
const proc = new GridsetProcessor();
|
|
84
|
+
const tree = proc.loadIntoTree(filePath);
|
|
85
|
+
// Use the existing extractSymbolReferences function from gridset/symbols.ts
|
|
86
|
+
return (0, symbols_1.extractSymbolReferences)(tree);
|
|
90
87
|
}
|
|
91
88
|
}
|
|
92
89
|
exports.Grid3SymbolExtractor = Grid3SymbolExtractor;
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-Based Translation with Symbol Preservation
|
|
3
|
+
*
|
|
4
|
+
* This module provides utilities for translating AAC files while preserving
|
|
5
|
+
* symbol-to-word associations across different formats (gridset, OBF, Snap, etc.).
|
|
6
|
+
*
|
|
7
|
+
* The key insight: Different AAC formats have different internal structures,
|
|
8
|
+
* but they all share common concepts:
|
|
9
|
+
* - Buttons with labels and messages
|
|
10
|
+
* - Symbols attached to specific words
|
|
11
|
+
* - Need to preserve symbol positions during translation
|
|
12
|
+
*
|
|
13
|
+
* This module provides a format-agnostic way to:
|
|
14
|
+
* 1. Extract symbol information for LLM processing
|
|
15
|
+
* 2. Apply LLM translations with preserved symbols
|
|
16
|
+
*
|
|
17
|
+
* Usage:
|
|
18
|
+
* 1. Processor extracts buttons and calls extractSymbolsForLLM()
|
|
19
|
+
* 2. LLM translates and returns aligned symbols
|
|
20
|
+
* 3. Processor calls processLLMTranslations() to apply results
|
|
21
|
+
*/
|
|
22
|
+
/**
|
|
23
|
+
* Represents a symbol attached to text in a format-agnostic way
|
|
24
|
+
*/
|
|
25
|
+
export interface SymbolInfo {
|
|
26
|
+
text: string;
|
|
27
|
+
image?: string;
|
|
28
|
+
symbolLibrary?: string;
|
|
29
|
+
symbolPath?: string;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Button data extracted for translation (format-agnostic)
|
|
33
|
+
*/
|
|
34
|
+
export interface ButtonForTranslation {
|
|
35
|
+
buttonId: string;
|
|
36
|
+
pageId?: string;
|
|
37
|
+
pageName?: string;
|
|
38
|
+
label: string;
|
|
39
|
+
message: string;
|
|
40
|
+
textToTranslate: string;
|
|
41
|
+
symbols: SymbolInfo[];
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* LLM translation result with symbol mappings
|
|
45
|
+
*/
|
|
46
|
+
export interface LLMLTranslationResult {
|
|
47
|
+
buttonId: string;
|
|
48
|
+
translatedLabel?: string;
|
|
49
|
+
translatedMessage?: string;
|
|
50
|
+
symbols?: Array<{
|
|
51
|
+
text: string;
|
|
52
|
+
image?: string;
|
|
53
|
+
}>;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Extract symbols from a button for LLM-based translation.
|
|
57
|
+
*
|
|
58
|
+
* This is a format-agnostic helper that processors can use to normalize
|
|
59
|
+
* their button data into a common format for LLM processing.
|
|
60
|
+
*
|
|
61
|
+
* @param buttonId - Unique identifier for the button
|
|
62
|
+
* @param label - Button label text
|
|
63
|
+
* @param message - Button message/speak text
|
|
64
|
+
* @param symbols - Array of symbols from the button
|
|
65
|
+
* @param context - Optional page context
|
|
66
|
+
* @returns Normalized button data for translation
|
|
67
|
+
*/
|
|
68
|
+
export declare function normalizeButtonForTranslation(buttonId: string, label: string, message: string, symbols: SymbolInfo[], context?: {
|
|
69
|
+
pageId?: string;
|
|
70
|
+
pageName?: string;
|
|
71
|
+
}): ButtonForTranslation;
|
|
72
|
+
/**
|
|
73
|
+
* Extract symbols from various button formats.
|
|
74
|
+
*
|
|
75
|
+
* This helper handles different ways symbols might be stored in button data:
|
|
76
|
+
* - semanticAction.richText.symbols (gridset format)
|
|
77
|
+
* - symbolLibrary + symbolPath fields
|
|
78
|
+
* - image field with [library]path format
|
|
79
|
+
*
|
|
80
|
+
* @param button - Button object from any AAC format
|
|
81
|
+
* @returns Array of symbol info, or undefined if no symbols
|
|
82
|
+
*/
|
|
83
|
+
export declare function extractSymbolsFromButton(button: any): SymbolInfo[] | undefined;
|
|
84
|
+
/**
|
|
85
|
+
* Extract all buttons from a file for LLM translation.
|
|
86
|
+
*
|
|
87
|
+
* This is a convenience method that processors can use to extract all
|
|
88
|
+
* translatable buttons with their symbols in a format-agnostic way.
|
|
89
|
+
*
|
|
90
|
+
* @param buttons - Array of button objects from any AAC format
|
|
91
|
+
* @param contextFn - Optional function to provide page context for each button
|
|
92
|
+
* @returns Array of normalized button data ready for LLM translation
|
|
93
|
+
*/
|
|
94
|
+
export declare function extractAllButtonsForTranslation(buttons: any[], contextFn?: (button: any) => {
|
|
95
|
+
pageId?: string;
|
|
96
|
+
pageName?: string;
|
|
97
|
+
}): ButtonForTranslation[];
|
|
98
|
+
/**
|
|
99
|
+
* Create a prompt for LLM translation with symbol preservation.
|
|
100
|
+
*
|
|
101
|
+
* This generates a structured prompt that instructs the LLM to translate
|
|
102
|
+
* while preserving symbol-to-word associations.
|
|
103
|
+
*
|
|
104
|
+
* @param buttons - Buttons to translate
|
|
105
|
+
* @param targetLanguage - Target language for translation
|
|
106
|
+
* @returns Prompt string for LLM
|
|
107
|
+
*/
|
|
108
|
+
export declare function createTranslationPrompt(buttons: ButtonForTranslation[], targetLanguage: string): string;
|
|
109
|
+
/**
|
|
110
|
+
* Validate LLM translation results before applying.
|
|
111
|
+
*
|
|
112
|
+
* @param translations - LLM translation results
|
|
113
|
+
* @param originalButtonIds - Expected button IDs (optional, for validation)
|
|
114
|
+
* @param options - Validation options
|
|
115
|
+
* @throws Error if validation fails
|
|
116
|
+
*/
|
|
117
|
+
export declare function validateTranslationResults(translations: LLMLTranslationResult[], originalButtonIds?: string[], options?: {
|
|
118
|
+
allowPartial?: boolean;
|
|
119
|
+
}): void;
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* LLM-Based Translation with Symbol Preservation
|
|
4
|
+
*
|
|
5
|
+
* This module provides utilities for translating AAC files while preserving
|
|
6
|
+
* symbol-to-word associations across different formats (gridset, OBF, Snap, etc.).
|
|
7
|
+
*
|
|
8
|
+
* The key insight: Different AAC formats have different internal structures,
|
|
9
|
+
* but they all share common concepts:
|
|
10
|
+
* - Buttons with labels and messages
|
|
11
|
+
* - Symbols attached to specific words
|
|
12
|
+
* - Need to preserve symbol positions during translation
|
|
13
|
+
*
|
|
14
|
+
* This module provides a format-agnostic way to:
|
|
15
|
+
* 1. Extract symbol information for LLM processing
|
|
16
|
+
* 2. Apply LLM translations with preserved symbols
|
|
17
|
+
*
|
|
18
|
+
* Usage:
|
|
19
|
+
* 1. Processor extracts buttons and calls extractSymbolsForLLM()
|
|
20
|
+
* 2. LLM translates and returns aligned symbols
|
|
21
|
+
* 3. Processor calls processLLMTranslations() to apply results
|
|
22
|
+
*/
|
|
23
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
24
|
+
exports.normalizeButtonForTranslation = normalizeButtonForTranslation;
|
|
25
|
+
exports.extractSymbolsFromButton = extractSymbolsFromButton;
|
|
26
|
+
exports.extractAllButtonsForTranslation = extractAllButtonsForTranslation;
|
|
27
|
+
exports.createTranslationPrompt = createTranslationPrompt;
|
|
28
|
+
exports.validateTranslationResults = validateTranslationResults;
|
|
29
|
+
/**
|
|
30
|
+
* Extract symbols from a button for LLM-based translation.
|
|
31
|
+
*
|
|
32
|
+
* This is a format-agnostic helper that processors can use to normalize
|
|
33
|
+
* their button data into a common format for LLM processing.
|
|
34
|
+
*
|
|
35
|
+
* @param buttonId - Unique identifier for the button
|
|
36
|
+
* @param label - Button label text
|
|
37
|
+
* @param message - Button message/speak text
|
|
38
|
+
* @param symbols - Array of symbols from the button
|
|
39
|
+
* @param context - Optional page context
|
|
40
|
+
* @returns Normalized button data for translation
|
|
41
|
+
*/
|
|
42
|
+
function normalizeButtonForTranslation(buttonId, label, message, symbols, context) {
|
|
43
|
+
return {
|
|
44
|
+
buttonId,
|
|
45
|
+
label,
|
|
46
|
+
message,
|
|
47
|
+
textToTranslate: message || label, // Translate message if present, otherwise label
|
|
48
|
+
symbols,
|
|
49
|
+
...context,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Extract symbols from various button formats.
|
|
54
|
+
*
|
|
55
|
+
* This helper handles different ways symbols might be stored in button data:
|
|
56
|
+
* - semanticAction.richText.symbols (gridset format)
|
|
57
|
+
* - symbolLibrary + symbolPath fields
|
|
58
|
+
* - image field with [library]path format
|
|
59
|
+
*
|
|
60
|
+
* @param button - Button object from any AAC format
|
|
61
|
+
* @returns Array of symbol info, or undefined if no symbols
|
|
62
|
+
*/
|
|
63
|
+
function extractSymbolsFromButton(button) {
|
|
64
|
+
const symbols = [];
|
|
65
|
+
// Method 1: Check for semanticAction.richText.symbols (gridset format)
|
|
66
|
+
if (button.semanticAction?.richText?.symbols) {
|
|
67
|
+
const richTextSymbols = button.semanticAction.richText.symbols;
|
|
68
|
+
if (Array.isArray(richTextSymbols) && richTextSymbols.length > 0) {
|
|
69
|
+
symbols.push(...richTextSymbols);
|
|
70
|
+
return symbols;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
// Determine the text to attach symbol to
|
|
74
|
+
const text = button.label || button.message || '';
|
|
75
|
+
if (!text) {
|
|
76
|
+
return undefined;
|
|
77
|
+
}
|
|
78
|
+
// Method 2: Check for symbolLibrary + symbolPath fields
|
|
79
|
+
if (button.symbolLibrary && button.symbolPath) {
|
|
80
|
+
symbols.push({
|
|
81
|
+
text,
|
|
82
|
+
image: `[${button.symbolLibrary}]${button.symbolPath}`,
|
|
83
|
+
symbolLibrary: button.symbolLibrary,
|
|
84
|
+
symbolPath: button.symbolPath,
|
|
85
|
+
});
|
|
86
|
+
return symbols;
|
|
87
|
+
}
|
|
88
|
+
// Method 3: Check if image field contains a symbol reference
|
|
89
|
+
if (button.image && typeof button.image === 'string' && button.image.startsWith('[')) {
|
|
90
|
+
symbols.push({
|
|
91
|
+
text,
|
|
92
|
+
image: button.image,
|
|
93
|
+
});
|
|
94
|
+
return symbols;
|
|
95
|
+
}
|
|
96
|
+
// No symbols found
|
|
97
|
+
return undefined;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Extract all buttons from a file for LLM translation.
|
|
101
|
+
*
|
|
102
|
+
* This is a convenience method that processors can use to extract all
|
|
103
|
+
* translatable buttons with their symbols in a format-agnostic way.
|
|
104
|
+
*
|
|
105
|
+
* @param buttons - Array of button objects from any AAC format
|
|
106
|
+
* @param contextFn - Optional function to provide page context for each button
|
|
107
|
+
* @returns Array of normalized button data ready for LLM translation
|
|
108
|
+
*/
|
|
109
|
+
function extractAllButtonsForTranslation(buttons, contextFn) {
|
|
110
|
+
const results = [];
|
|
111
|
+
for (const button of buttons) {
|
|
112
|
+
if (!button)
|
|
113
|
+
continue;
|
|
114
|
+
const buttonId = (button.id || button.buttonId || `button_${results.length}`);
|
|
115
|
+
const label = (button.label || '');
|
|
116
|
+
const message = (button.message || '');
|
|
117
|
+
const symbols = extractSymbolsFromButton(button);
|
|
118
|
+
// Only include buttons that have text to translate
|
|
119
|
+
if (!label && !message)
|
|
120
|
+
continue;
|
|
121
|
+
const context = contextFn ? contextFn(button) : undefined;
|
|
122
|
+
results.push(normalizeButtonForTranslation(buttonId, label, message, symbols || [], context));
|
|
123
|
+
}
|
|
124
|
+
return results;
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Create a prompt for LLM translation with symbol preservation.
|
|
128
|
+
*
|
|
129
|
+
* This generates a structured prompt that instructs the LLM to translate
|
|
130
|
+
* while preserving symbol-to-word associations.
|
|
131
|
+
*
|
|
132
|
+
* @param buttons - Buttons to translate
|
|
133
|
+
* @param targetLanguage - Target language for translation
|
|
134
|
+
* @returns Prompt string for LLM
|
|
135
|
+
*/
|
|
136
|
+
function createTranslationPrompt(buttons, targetLanguage) {
|
|
137
|
+
const buttonsData = JSON.stringify(buttons, null, 2);
|
|
138
|
+
return `You are a translation assistant for AAC (Augmentative and Alternative Communication) systems.
|
|
139
|
+
|
|
140
|
+
Your task is to translate the following buttons to ${targetLanguage} while preserving symbol associations.
|
|
141
|
+
|
|
142
|
+
Each button has:
|
|
143
|
+
- label: The text shown on the button
|
|
144
|
+
- message: The text spoken when the button is activated
|
|
145
|
+
- textToTranslate: The actual text to translate (usually the message)
|
|
146
|
+
- symbols: Visual symbols attached to specific words
|
|
147
|
+
|
|
148
|
+
IMPORTANT: After translation, you MUST reattach symbols to the correct translated words based on MEANING, not position.
|
|
149
|
+
|
|
150
|
+
Example:
|
|
151
|
+
- Original: "I want apple" with apple symbol on "apple"
|
|
152
|
+
- Spanish: "Yo quiero manzana" with apple symbol on "manzana" (NOT "Yo" or "quiero")
|
|
153
|
+
- French: "Je veux une pomme" with apple symbol on "pomme"
|
|
154
|
+
|
|
155
|
+
The symbols array should contain the translated word that each symbol should be attached to.
|
|
156
|
+
|
|
157
|
+
Buttons to translate:
|
|
158
|
+
${buttonsData}
|
|
159
|
+
|
|
160
|
+
Return ONLY a JSON array with this exact structure:
|
|
161
|
+
[
|
|
162
|
+
{
|
|
163
|
+
"buttonId": "...",
|
|
164
|
+
"translatedLabel": "...",
|
|
165
|
+
"translatedMessage": "...",
|
|
166
|
+
"symbols": [
|
|
167
|
+
{"text": "translated_word", "image": "[library]path"}
|
|
168
|
+
]
|
|
169
|
+
}
|
|
170
|
+
]
|
|
171
|
+
|
|
172
|
+
Ensure all symbol image references are preserved exactly as provided.`;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Validate LLM translation results before applying.
|
|
176
|
+
*
|
|
177
|
+
* @param translations - LLM translation results
|
|
178
|
+
* @param originalButtonIds - Expected button IDs (optional, for validation)
|
|
179
|
+
* @param options - Validation options
|
|
180
|
+
* @throws Error if validation fails
|
|
181
|
+
*/
|
|
182
|
+
function validateTranslationResults(translations, originalButtonIds, options) {
|
|
183
|
+
if (!Array.isArray(translations)) {
|
|
184
|
+
throw new Error('Translation results must be an array');
|
|
185
|
+
}
|
|
186
|
+
const translatedIds = new Set(translations.map((t) => t.buttonId));
|
|
187
|
+
// Check that all original buttons have translations (unless partial is allowed)
|
|
188
|
+
if (originalButtonIds && !options?.allowPartial) {
|
|
189
|
+
for (const id of originalButtonIds) {
|
|
190
|
+
if (!translatedIds.has(id)) {
|
|
191
|
+
throw new Error(`Missing translation for button: ${id}`);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
// Check each translation has required fields
|
|
196
|
+
for (const trans of translations) {
|
|
197
|
+
if (!trans.buttonId) {
|
|
198
|
+
throw new Error('Translation missing buttonId');
|
|
199
|
+
}
|
|
200
|
+
if (!trans.translatedMessage && !trans.translatedLabel) {
|
|
201
|
+
throw new Error(`Translation for ${trans.buttonId} has no translated text`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|