npm - @willwade/aac-processors - Versions diffs - 0.2.17 → 0.2.18 - Mend

@willwade/aac-processors 0.2.17 → 0.2.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/utilities/analytics/morphology/tdsnapLexiconParser.js ADDED Viewed

@@ -0,0 +1,186 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.TDSnapLexiconParser = void 0;
+class TDSnapLexiconParser {
+    parseDb(dbPath, locale) {
+        const detectedLocale = locale || this.inferLocale(dbPath);
+        // eslint-disable-next-line @typescript-eslint/no-var-requires
+        const Database = require('better-sqlite3');
+        // eslint-disable-next-line @typescript-eslint/no-unsafe-argument
+        const db = new Database(dbPath, { readonly: true });
+        try {
+            return this.extractAll(db, detectedLocale);
+        }
+        finally {
+            db.close();
+        }
+    }
+    inferLocale(dbPath) {
+        const match = dbPath.match(/lang_([a-z]{2}_[A-Z]{2})/i);
+        return match ? match[1] : 'unknown';
+    }
+    extractAll(db, locale) {
+        const words = new Map();
+        const subclassCache = new Map();
+        const getSubclass = (id) => {
+            let name = subclassCache.get(id);
+            if (name !== undefined)
+                return name;
+            const row = db.prepare('SELECT Name FROM PosSubclass WHERE Id = ?').get(id);
+            name = row?.Name;
+            if (name) {
+                subclassCache.set(id, name);
+                return name;
+            }
+            return undefined;
+        };
+        const allWords = db
+            .prepare(`SELECT w.Id as wordId, w.Text as text,
+                i.Id as inflectionId, i.LexemeId as lexemeId, i.PosSubclassId as posSubclassId
+         FROM Word w
+         JOIN Spelling s ON s.WordId = w.Id
+         JOIN Inflection i ON i.Id = s.InflectionId
+         WHERE i.PosSubclassId != 0
+         ORDER BY w.Text`)
+            .all();
+        const lexemeForms = new Map();
+        for (const row of allWords) {
+            const tag = getSubclass(row.posSubclassId);
+            if (!tag)
+                continue;
+            let formsByTag = lexemeForms.get(row.lexemeId);
+            if (!formsByTag) {
+                formsByTag = new Map();
+                lexemeForms.set(row.lexemeId, formsByTag);
+            }
+            const existing = formsByTag.get(tag);
+            if (existing) {
+                if (!existing.includes(row.text))
+                    existing.push(row.text);
+            }
+            else {
+                formsByTag.set(tag, [row.text]);
+            }
+        }
+        const wordToLexeme = new Map();
+        for (const row of allWords) {
+            if (!wordToLexeme.has(row.text.toLowerCase())) {
+                wordToLexeme.set(row.text.toLowerCase(), row.lexemeId);
+            }
+        }
+        for (const [text, lexemeId] of wordToLexeme) {
+            const formsByTag = lexemeForms.get(lexemeId);
+            if (!formsByTag || formsByTag.size === 0)
+                continue;
+            const forms = [];
+            for (const [tag, formTexts] of formsByTag) {
+                for (const formText of formTexts) {
+                    if (formText.toLowerCase() !== text) {
+                        forms.push({ tag, form: formText });
+                    }
+                }
+            }
+            if (forms.length > 0) {
+                words.set(text, { lexemeId, forms });
+            }
+        }
+        return { locale, words };
+    }
+    lookupWord(data, word) {
+        const entry = data.words.get(word.toLowerCase());
+        if (!entry)
+            return [];
+        return entry.forms.map((f) => f.form);
+    }
+    lookupWordByTag(data, word, tag) {
+        const entry = data.words.get(word.toLowerCase());
+        if (!entry)
+            return [];
+        return entry.forms.filter((f) => f.tag === tag).map((f) => f.form);
+    }
+    static parseContentTypeHandler(handler) {
+        if (!handler)
+            return null;
+        const colonIdx = handler.indexOf(':');
+        if (colonIdx === -1) {
+            const parts = handler.split(',');
+            return { category: parts[0], subtype: '', params: parts.slice(1) };
+        }
+        const category = handler.substring(0, colonIdx);
+        const rest = handler.substring(colonIdx + 1);
+        const commaIdx = rest.indexOf(',');
+        if (commaIdx === -1) {
+            return { category, subtype: rest, params: [] };
+        }
+        const subtype = rest.substring(0, commaIdx);
+        const paramsStr = rest.substring(commaIdx + 1);
+        const params = paramsStr.split(',').map((p) => p.trim());
+        return { category, subtype, params };
+    }
+    static tagToPos(tag) {
+        return TDSnapLexiconParser.TAG_TO_POS[tag] || 'Unknown';
+    }
+    static handlerToPos(handler) {
+        const parsed = TDSnapLexiconParser.parseContentTypeHandler(handler);
+        if (!parsed)
+            return 'Unknown';
+        if (parsed.category === 'RESET' || parsed.category === 'SPECIAL')
+            return 'Ignore';
+        const key = `${parsed.category}:${parsed.subtype}`;
+        const tag = TDSnapLexiconParser.HANDLER_TAG_MAP[key];
+        if (tag)
+            return TDSnapLexiconParser.TAG_TO_POS[tag] || 'Unknown';
+        return TDSnapLexiconParser.TAG_TO_POS[parsed.subtype] || 'Unknown';
+    }
+}
+exports.TDSnapLexiconParser = TDSnapLexiconParser;
+TDSnapLexiconParser.TAG_TO_POS = {
+    V0: 'Verb',
+    VZ: 'Verb',
+    VG: 'Verb',
+    VD: 'Verb',
+    VN: 'Verb',
+    SNG: 'Noun',
+    PLU: 'Noun',
+    ADJ: 'Adjective',
+    ADJR: 'Adjective',
+    ADJT: 'Adjective',
+    ADV: 'Adjective',
+    SUB: 'Pronoun',
+    OBJ: 'Pronoun',
+    POS: 'Pronoun',
+    NPOS: 'Pronoun',
+    REF: 'Pronoun',
+    B0: 'Verb',
+    BZ: 'Verb',
+    BM: 'Verb',
+    BR: 'Verb',
+    BDZ: 'Verb',
+    BDR: 'Verb',
+    BG: 'Verb',
+    BN: 'Verb',
+};
+TDSnapLexiconParser.HANDLER_TAG_MAP = {
+    'NOUN:PLU': 'PLU',
+    'DESCRIBE:ADJR': 'ADJR',
+    'DESCRIBE:ADJT': 'ADJT',
+    'DESCRIBE:ADV': 'ADV',
+    'VERB:V0': 'V0',
+    'VERB:VZ': 'VZ',
+    'VERB:VG': 'VG',
+    'VERB:VD': 'VD',
+    'VERB:VN': 'VN',
+    'PRONOUN:SUB': 'SUB',
+    'PRONOUN:OBJ': 'OBJ',
+    'PRONOUN:POS': 'POS',
+    'PRONOUN:NPOS': 'NPOS',
+    'PRONOUN:REF': 'REF',
+    'BE:B0': 'B0',
+    'BE:BZ': 'BZ',
+    'BE:BM': 'BM',
+    'BE:BR': 'BR',
+    'BE:BDZ': 'BDZ',
+    'BE:BDR': 'BDR',
+    'BE:BG': 'BG',
+    'BE:BN': 'BN',
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@willwade/aac-processors",
-  "version": "0.2.17",
+  "version": "0.2.18",
   "description": "A comprehensive TypeScript library for processing AAC (Augmentative and Alternative Communication) file formats with translation support",
   "main": "dist/index.js",
   "browser": "dist/browser/index.browser.js",