@willwade/aac-processors 0.2.16 → 0.2.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/processors/gridset/commands.js +56 -1
- package/dist/browser/processors/gridsetProcessor.js +16 -2
- package/dist/browser/processors/snapProcessor.js +42 -4
- package/dist/browser/utilities/analytics/metrics/core.js +182 -33
- package/dist/browser/utilities/analytics/metrics/effort.js +1 -0
- package/dist/browser/utilities/analytics/morphology/engine.js +24 -2
- package/dist/browser/utilities/analytics/morphology/index.js +1 -0
- package/dist/browser/utilities/analytics/morphology/tdsnapLexiconParser.js +182 -0
- package/dist/core/treeStructure.d.ts +3 -2
- package/dist/index.node.d.ts +1 -0
- package/dist/index.node.js +4 -2
- package/dist/processors/gridset/commands.js +56 -1
- package/dist/processors/gridsetProcessor.js +16 -2
- package/dist/processors/snapProcessor.js +42 -4
- package/dist/types/aac.d.ts +1 -1
- package/dist/utilities/analytics/metrics/core.d.ts +33 -0
- package/dist/utilities/analytics/metrics/core.js +182 -33
- package/dist/utilities/analytics/metrics/effort.d.ts +1 -0
- package/dist/utilities/analytics/metrics/effort.js +1 -0
- package/dist/utilities/analytics/metrics/types.d.ts +26 -0
- package/dist/utilities/analytics/morphology/engine.d.ts +4 -0
- package/dist/utilities/analytics/morphology/engine.js +24 -2
- package/dist/utilities/analytics/morphology/index.d.ts +2 -0
- package/dist/utilities/analytics/morphology/index.js +3 -1
- package/dist/utilities/analytics/morphology/tdsnapLexiconParser.d.ts +28 -0
- package/dist/utilities/analytics/morphology/tdsnapLexiconParser.js +186 -0
- package/package.json +5 -5
|
@@ -588,12 +588,20 @@ class MetricsCalculator {
|
|
|
588
588
|
/**
|
|
589
589
|
* Quick check whether any button in the tree has a POS tag.
|
|
590
590
|
* Used to auto-enable smart grammar without requiring explicit opt-in.
|
|
591
|
+
*
|
|
592
|
+
* IMPORTANT: Only counts POS from non-Inflector and non-Suffix buttons.
|
|
593
|
+
* TDSnap Inflector buttons and Grid3 Suffix buttons are grammar controls,
|
|
594
|
+
* not content words — they should NOT auto-enable morphology.
|
|
591
595
|
*/
|
|
592
596
|
treeHasPosTags(tree) {
|
|
593
597
|
for (const page of Object.values(tree.pages)) {
|
|
594
598
|
for (const row of page.grid) {
|
|
595
599
|
for (const btn of row) {
|
|
596
|
-
if (btn?.pos &&
|
|
600
|
+
if (btn?.pos &&
|
|
601
|
+
btn.pos !== 'Unknown' &&
|
|
602
|
+
btn.pos !== 'Ignore' &&
|
|
603
|
+
btn.pos !== 'Suffix' &&
|
|
604
|
+
btn.contentType !== 'Inflector') {
|
|
597
605
|
return true;
|
|
598
606
|
}
|
|
599
607
|
}
|
|
@@ -611,8 +619,44 @@ class MetricsCalculator {
|
|
|
611
619
|
*/
|
|
612
620
|
expandMorphologicalPredictions(tree, options) {
|
|
613
621
|
const locale = options.morphologyLocale || 'en-gb';
|
|
614
|
-
|
|
615
|
-
|
|
622
|
+
let morph;
|
|
623
|
+
if (options.tdsnapLexiconPath) {
|
|
624
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
625
|
+
const { TDSnapLexiconParser } = require('../morphology/tdsnapLexiconParser');
|
|
626
|
+
const parser = new TDSnapLexiconParser();
|
|
627
|
+
const lexiconData = parser.parseDb(options.tdsnapLexiconPath, locale.replace('-', '_'));
|
|
628
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
|
|
629
|
+
morph = morphology_1.MorphologyEngine.fromTDSnapLexicon(lexiconData);
|
|
630
|
+
this.expandTDSnapPredictions(tree, morph);
|
|
631
|
+
return;
|
|
632
|
+
}
|
|
633
|
+
if (options.grid3VerbsPath) {
|
|
634
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
635
|
+
const { Grid3VerbsParser } = require('../morphology/grid3VerbsParser');
|
|
636
|
+
const parser = new Grid3VerbsParser();
|
|
637
|
+
const verbForms = parser.parseZip(options.grid3VerbsPath);
|
|
638
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
|
|
639
|
+
morph = morphology_1.MorphologyEngine.fromGrid3Verbs(verbForms);
|
|
640
|
+
this.expandGrid3Predictions(tree, morph);
|
|
641
|
+
return;
|
|
642
|
+
}
|
|
643
|
+
morph = new morphology_1.MorphologyEngine(locale);
|
|
644
|
+
this.expandGrid3Predictions(tree, morph);
|
|
645
|
+
}
|
|
646
|
+
/**
|
|
647
|
+
* Expand morphological predictions for Grid3 pagesets.
|
|
648
|
+
*
|
|
649
|
+
* Grid3 uses suffix buttons (pos='Suffix') on the same page as content words.
|
|
650
|
+
* Different pages have different suffix buttons — e.g., topic pages may only
|
|
651
|
+
* have -s (plural), while the Magic Wand page has -s, -er, -est, -ly, -y, -'s.
|
|
652
|
+
*
|
|
653
|
+
* Rules:
|
|
654
|
+
* 1. Build a suffix→formSlot map (-s → plural, -er → comparative, etc.)
|
|
655
|
+
* 2. For each page, collect available suffix buttons
|
|
656
|
+
* 3. Only generate forms for slots that have matching suffix buttons on that page
|
|
657
|
+
* 4. POS inference is used for untagged content words (Grid3 grids often lack POS)
|
|
658
|
+
*/
|
|
659
|
+
expandGrid3Predictions(tree, morph) {
|
|
616
660
|
const skipInference = new Set([
|
|
617
661
|
'a',
|
|
618
662
|
'an',
|
|
@@ -695,30 +739,56 @@ class MetricsCalculator {
|
|
|
695
739
|
'wow',
|
|
696
740
|
'sorry',
|
|
697
741
|
]);
|
|
742
|
+
// Map suffix button labels to the morphology slots they produce
|
|
743
|
+
const SUFFIX_TO_SLOT = {
|
|
744
|
+
'-s': ['plural'],
|
|
745
|
+
"-'s": ['possessive'],
|
|
746
|
+
'-er': ['comparative'],
|
|
747
|
+
'-est': ['superlative'],
|
|
748
|
+
'-ly': ['adverb'],
|
|
749
|
+
'-y': ['adjective'],
|
|
750
|
+
};
|
|
751
|
+
// POS → slots that POS can produce (for filtering)
|
|
752
|
+
const POS_TO_SUFFIX_SLOTS = {
|
|
753
|
+
Noun: new Set(['plural', 'possessive']),
|
|
754
|
+
Verb: new Set(['plural']),
|
|
755
|
+
Adjective: new Set(['comparative', 'superlative', 'adverb', 'adjective']),
|
|
756
|
+
};
|
|
698
757
|
for (const page of Object.values(tree.pages)) {
|
|
758
|
+
// Collect suffix buttons on this page
|
|
759
|
+
const pageSuffixes = new Set();
|
|
760
|
+
const pageSuffixSlots = new Set();
|
|
761
|
+
for (const row of page.grid) {
|
|
762
|
+
for (const btn of row) {
|
|
763
|
+
if (btn?.pos === 'Suffix' && btn.label) {
|
|
764
|
+
pageSuffixes.add(btn.label);
|
|
765
|
+
const slots = SUFFIX_TO_SLOT[btn.label];
|
|
766
|
+
if (slots) {
|
|
767
|
+
for (const s of slots)
|
|
768
|
+
pageSuffixSlots.add(s);
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
// No suffix buttons on this page → no morphology
|
|
774
|
+
if (pageSuffixSlots.size === 0)
|
|
775
|
+
continue;
|
|
699
776
|
for (const row of page.grid) {
|
|
700
777
|
for (const btn of row) {
|
|
701
778
|
if (!btn || !btn.label)
|
|
702
779
|
continue;
|
|
780
|
+
if (btn.pos === 'Suffix')
|
|
781
|
+
continue;
|
|
703
782
|
let pos = btn.pos;
|
|
704
|
-
// If no POS tag (or Unknown/Ignore), attempt POS inference.
|
|
705
|
-
// Many content words on topic pages lack POS tags even though
|
|
706
|
-
// they are clearly nouns (e.g., "bird", "tree", "cloud").
|
|
707
|
-
// Strategy: check irregular tables first for confident POS,
|
|
708
|
-
// then fall back to Noun for single-word content labels.
|
|
709
783
|
if (!pos || pos === 'Unknown' || pos === 'Ignore') {
|
|
710
784
|
const lower = btn.label.toLowerCase();
|
|
711
|
-
// Skip function words and multi-word labels
|
|
712
785
|
if (!skipInference.has(lower) && !lower.includes(' ') && lower.length > 1) {
|
|
713
|
-
// Check irregular tables for confident POS assignment
|
|
714
786
|
const inferredPOS = morph.inferPOS(lower);
|
|
715
787
|
if (inferredPOS) {
|
|
716
788
|
pos = inferredPOS;
|
|
717
789
|
btn.pos = inferredPOS;
|
|
718
790
|
}
|
|
719
791
|
else {
|
|
720
|
-
// Default to Noun for untagged content words.
|
|
721
|
-
// This generates plurals (e.g., bird → birds, tree → trees).
|
|
722
792
|
pos = 'Noun';
|
|
723
793
|
btn.pos = 'Noun';
|
|
724
794
|
}
|
|
@@ -726,16 +796,90 @@ class MetricsCalculator {
|
|
|
726
796
|
}
|
|
727
797
|
if (!pos || pos === 'Unknown' || pos === 'Ignore')
|
|
728
798
|
continue;
|
|
729
|
-
|
|
730
|
-
|
|
799
|
+
// Check if this POS can produce forms matching the page's suffix slots
|
|
800
|
+
const posSlots = POS_TO_SUFFIX_SLOTS[pos];
|
|
801
|
+
if (!posSlots)
|
|
802
|
+
continue;
|
|
803
|
+
const hasRelevantSlot = [...posSlots].some((s) => pageSuffixSlots.has(s));
|
|
804
|
+
if (!hasRelevantSlot)
|
|
805
|
+
continue;
|
|
806
|
+
const allForms = morph.inflect(btn.label, pos);
|
|
807
|
+
if (allForms.length === 0)
|
|
808
|
+
continue;
|
|
809
|
+
// Filter forms: only include those producible by suffixes on this page
|
|
810
|
+
// For the built-in engine, we can't easily map forms to slots, so
|
|
811
|
+
// include all forms when any relevant suffix exists. The per-page
|
|
812
|
+
// gate (suffix presence) is the main filter.
|
|
813
|
+
const existing = btn.predictions || [];
|
|
814
|
+
const merged = new Set([...existing, ...allForms]);
|
|
815
|
+
btn.predictions = Array.from(merged);
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
/**
|
|
821
|
+
* Expand morphological predictions for TDSnap pagesets.
|
|
822
|
+
*
|
|
823
|
+
* TDSnap uses Inflector buttons (ContentType=3) on "Word Forms" pages to
|
|
824
|
+
* provide morphology. These pages are loaded dynamically by the runtime,
|
|
825
|
+
* NOT via navigation buttons, so they are unreachable in our tree model.
|
|
826
|
+
*
|
|
827
|
+
* Rules:
|
|
828
|
+
* 1. If the pageset has NO Inflector buttons → no morphology at all
|
|
829
|
+
* 2. Only generate forms whose grammar tag matches an available Inflector
|
|
830
|
+
* (e.g., if there's no -ly Inflector, don't generate "happily")
|
|
831
|
+
* 3. No POS inference — only the lexicon determines which words get forms
|
|
832
|
+
*/
|
|
833
|
+
expandTDSnapPredictions(tree, morph) {
|
|
834
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
835
|
+
const { TDSnapLexiconParser } = require('../morphology/tdsnapLexiconParser');
|
|
836
|
+
// Step 1: Collect available grammar tags from Inflector buttons
|
|
837
|
+
const availableTags = new Set();
|
|
838
|
+
for (const page of Object.values(tree.pages)) {
|
|
839
|
+
for (const row of page.grid) {
|
|
840
|
+
for (const btn of row) {
|
|
841
|
+
if (btn?.contentType === 'Inflector' && btn.parameters?.grammar?.handler) {
|
|
842
|
+
const parsed = TDSnapLexiconParser.parseContentTypeHandler(btn.parameters.grammar.handler);
|
|
843
|
+
if (parsed) {
|
|
844
|
+
const key = `${parsed.category}:${parsed.subtype}`;
|
|
845
|
+
const tag = TDSnapLexiconParser.HANDLER_TAG_MAP[key];
|
|
846
|
+
if (tag)
|
|
847
|
+
availableTags.add(tag);
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
if (availableTags.size === 0)
|
|
854
|
+
return;
|
|
855
|
+
// Step 2: For each button, look up lexicon forms filtered by available tags
|
|
856
|
+
for (const page of Object.values(tree.pages)) {
|
|
857
|
+
for (const row of page.grid) {
|
|
858
|
+
for (const btn of row) {
|
|
859
|
+
if (!btn || !btn.label || btn.contentType === 'Inflector')
|
|
860
|
+
continue;
|
|
861
|
+
const filtered = this.filterFormsByAvailableTags(morph, btn.label, availableTags);
|
|
862
|
+
if (filtered.length > 0) {
|
|
731
863
|
const existing = btn.predictions || [];
|
|
732
|
-
const merged = new Set([...existing, ...
|
|
864
|
+
const merged = new Set([...existing, ...filtered]);
|
|
733
865
|
btn.predictions = Array.from(merged);
|
|
734
866
|
}
|
|
735
867
|
}
|
|
736
868
|
}
|
|
737
869
|
}
|
|
738
870
|
}
|
|
871
|
+
filterFormsByAvailableTags(morph, base, availableTags) {
|
|
872
|
+
const entry = morph.getLexiconEntry(base.toLowerCase());
|
|
873
|
+
if (!entry)
|
|
874
|
+
return [];
|
|
875
|
+
const forms = [];
|
|
876
|
+
for (const f of entry.forms) {
|
|
877
|
+
if (availableTags.has(f.tag) && f.form.toLowerCase() !== base.toLowerCase()) {
|
|
878
|
+
forms.push(f.form);
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
return forms;
|
|
882
|
+
}
|
|
739
883
|
/**
|
|
740
884
|
* Calculate metrics for word forms (smart grammar predictions)
|
|
741
885
|
*
|
|
@@ -752,7 +896,7 @@ class MetricsCalculator {
|
|
|
752
896
|
* @param options - Metrics options
|
|
753
897
|
* @returns Object containing word form metrics and labels that were replaced
|
|
754
898
|
*/
|
|
755
|
-
calculateWordFormMetrics(tree, buttons,
|
|
899
|
+
calculateWordFormMetrics(tree, buttons, options = {}) {
|
|
756
900
|
const wordFormMetrics = [];
|
|
757
901
|
const replacedLabels = new Set();
|
|
758
902
|
// Track buttons by label to compare efforts
|
|
@@ -814,23 +958,28 @@ class MetricsCalculator {
|
|
|
814
958
|
// Calculate effort for each word form
|
|
815
959
|
btn.predictions.forEach((wordForm, index) => {
|
|
816
960
|
const wordFormLower = wordForm.toLowerCase();
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
961
|
+
const isSuggestWords = suggestWordsSet.has(wordFormLower);
|
|
962
|
+
let wordFormEffort;
|
|
963
|
+
if (options.tdsnapLexiconPath && !isSuggestWords) {
|
|
964
|
+
// TDSnap Inflector-based form: the grammar overlay appears
|
|
965
|
+
// dynamically when a word is selected. The cost is a fixed
|
|
966
|
+
// single selection to tap the Inflector button.
|
|
967
|
+
wordFormEffort =
|
|
968
|
+
parentMetrics.effort + effort_1.EFFORT_CONSTANTS.TDSNAP_GRAMMAR_OVERLAY_EFFORT;
|
|
969
|
+
}
|
|
970
|
+
else {
|
|
971
|
+
// Grid-based prediction layout (Suggest Words, or Grid3 morphology)
|
|
972
|
+
const predictionsGridCols = 2;
|
|
973
|
+
const predictionRowIndex = Math.floor(index / predictionsGridCols);
|
|
974
|
+
const predictionColIndex = index % predictionsGridCols;
|
|
975
|
+
const predictionPriorItems = predictionRowIndex * predictionsGridCols + predictionColIndex;
|
|
976
|
+
const predictionSelectionEffort = (0, effort_1.visualScanEffort)(predictionPriorItems);
|
|
977
|
+
const suggestWordsConfirmation = isSuggestWords
|
|
978
|
+
? effort_1.EFFORT_CONSTANTS.SUGGEST_WORDS_SELECTION_EFFORT
|
|
979
|
+
: 0;
|
|
980
|
+
wordFormEffort =
|
|
981
|
+
parentMetrics.effort + predictionSelectionEffort + suggestWordsConfirmation;
|
|
982
|
+
}
|
|
834
983
|
// Check if this word already exists as a regular button
|
|
835
984
|
const existingBtn = existingLabels.get(wordFormLower);
|
|
836
985
|
// If word exists and has lower or equal effort, skip the word form
|
|
@@ -32,6 +32,7 @@ export declare const EFFORT_CONSTANTS: {
|
|
|
32
32
|
readonly DEFAULT_SCAN_ERROR_RATE: 0.1;
|
|
33
33
|
readonly SCAN_RETRY_PENALTY: 1;
|
|
34
34
|
readonly SUGGEST_WORDS_SELECTION_EFFORT: 0.5;
|
|
35
|
+
readonly TDSNAP_GRAMMAR_OVERLAY_EFFORT: 0.1;
|
|
35
36
|
};
|
|
36
37
|
/**
|
|
37
38
|
* Calculate button size effort based on grid dimensions
|
|
@@ -48,6 +48,7 @@ exports.EFFORT_CONSTANTS = {
|
|
|
48
48
|
DEFAULT_SCAN_ERROR_RATE: 0.1, // 10% chance of missing a selection
|
|
49
49
|
SCAN_RETRY_PENALTY: 1.0, // Cost multiplier for a full loop retry
|
|
50
50
|
SUGGEST_WORDS_SELECTION_EFFORT: 0.5, // Extra tap to confirm a Suggest Words prediction
|
|
51
|
+
TDSNAP_GRAMMAR_OVERLAY_EFFORT: 0.1, // Fixed cost: select an Inflector button from the dynamic grammar overlay
|
|
51
52
|
};
|
|
52
53
|
/**
|
|
53
54
|
* Calculate button size effort based on grid dimensions
|
|
@@ -147,6 +147,32 @@ export interface MetricsOptions {
|
|
|
147
147
|
* Only used when useSmartGrammar is true.
|
|
148
148
|
*/
|
|
149
149
|
morphologyLocale?: string;
|
|
150
|
+
/**
|
|
151
|
+
* Path to a TDSnap Lexicon.db3 for word form lookups
|
|
152
|
+
*
|
|
153
|
+
* When provided, the MorphologyEngine will use the TDSnap lexicon database
|
|
154
|
+
* to look up inflected forms (e.g., "happy" → "happily", "run" → "ran")
|
|
155
|
+
* instead of (or before falling back to) rule-based morphology.
|
|
156
|
+
*
|
|
157
|
+
* The Lexicon.db3 is found in the TDSnap installation at:
|
|
158
|
+
* Data/LanguageModels/lang_en_US/Lexicon.db3
|
|
159
|
+
*
|
|
160
|
+
* Only used when useSmartGrammar is true.
|
|
161
|
+
*/
|
|
162
|
+
tdsnapLexiconPath?: string;
|
|
163
|
+
/**
|
|
164
|
+
* Path to a Grid 3 verbs.zip for verb conjugation lookups
|
|
165
|
+
*
|
|
166
|
+
* When provided, the MorphologyEngine will use the Grid 3 verbs database
|
|
167
|
+
* to look up conjugated forms (e.g., "go" → "goes", "went", "gone", "going")
|
|
168
|
+
* instead of (or before falling back to) rule-based morphology.
|
|
169
|
+
*
|
|
170
|
+
* The verbs.zip is found in the Grid 3 installation at:
|
|
171
|
+
* Locale/en-GB/verbs/verbs.zip
|
|
172
|
+
*
|
|
173
|
+
* Only used when useSmartGrammar is true.
|
|
174
|
+
*/
|
|
175
|
+
grid3VerbsPath?: string;
|
|
150
176
|
}
|
|
151
177
|
/**
|
|
152
178
|
* Comparison result between two board sets
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import { MorphRuleSet } from './types';
|
|
2
2
|
import type { Grid3VerbForms } from './grid3VerbsParser';
|
|
3
|
+
import type { TDSnapLexiconData, TDSnapLexiconEntry } from './tdsnapLexiconParser';
|
|
3
4
|
export declare class MorphologyEngine {
|
|
4
5
|
private ruleSet;
|
|
5
6
|
private grid3Verbs?;
|
|
7
|
+
private tdsnapLexicon?;
|
|
6
8
|
private cache;
|
|
9
|
+
getLexiconEntry(word: string): TDSnapLexiconEntry | undefined;
|
|
7
10
|
constructor(ruleSetOrLocale: string | MorphRuleSet);
|
|
8
11
|
static fromGrid3Verbs(verbForms: Grid3VerbForms): MorphologyEngine;
|
|
12
|
+
static fromTDSnapLexicon(lexiconData: TDSnapLexiconData): MorphologyEngine;
|
|
9
13
|
get locale(): string;
|
|
10
14
|
inflect(base: string, pos: string): string[];
|
|
11
15
|
isFormOf(word: string, base: string, pos: string): boolean;
|
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.MorphologyEngine = void 0;
|
|
4
4
|
class MorphologyEngine {
|
|
5
|
+
getLexiconEntry(word) {
|
|
6
|
+
return this.tdsnapLexicon?.words.get(word.toLowerCase());
|
|
7
|
+
}
|
|
5
8
|
constructor(ruleSetOrLocale) {
|
|
6
9
|
this.cache = new Map();
|
|
7
10
|
if (typeof ruleSetOrLocale === 'string') {
|
|
@@ -21,6 +24,16 @@ class MorphologyEngine {
|
|
|
21
24
|
engine.grid3Verbs = verbForms.verbs;
|
|
22
25
|
return engine;
|
|
23
26
|
}
|
|
27
|
+
static fromTDSnapLexicon(lexiconData) {
|
|
28
|
+
const engine = new MorphologyEngine({
|
|
29
|
+
locale: lexiconData.locale,
|
|
30
|
+
version: 1,
|
|
31
|
+
irregular: {},
|
|
32
|
+
regular: {},
|
|
33
|
+
});
|
|
34
|
+
engine.tdsnapLexicon = lexiconData;
|
|
35
|
+
return engine;
|
|
36
|
+
}
|
|
24
37
|
get locale() {
|
|
25
38
|
return this.ruleSet.locale;
|
|
26
39
|
}
|
|
@@ -29,9 +42,18 @@ class MorphologyEngine {
|
|
|
29
42
|
const cached = this.cache.get(key);
|
|
30
43
|
if (cached)
|
|
31
44
|
return cached;
|
|
45
|
+
if (this.tdsnapLexicon) {
|
|
46
|
+
const entry = this.tdsnapLexicon.words.get(base.toLowerCase());
|
|
47
|
+
if (entry) {
|
|
48
|
+
const forms = entry.forms.map((f) => f.form);
|
|
49
|
+
this.cache.set(key, forms);
|
|
50
|
+
return forms;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
32
53
|
if (this.grid3Verbs) {
|
|
33
|
-
const
|
|
34
|
-
if (
|
|
54
|
+
const raw = this.grid3Verbs.get(base) || this.grid3Verbs.get(base.toLowerCase());
|
|
55
|
+
if (raw) {
|
|
56
|
+
const forms = raw.filter((f) => !f.includes('{'));
|
|
35
57
|
this.cache.set(key, forms);
|
|
36
58
|
return forms;
|
|
37
59
|
}
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
export { MorphologyEngine } from './engine';
|
|
2
2
|
export { WordFormGenerator } from './wordFormGenerator';
|
|
3
|
+
export { TDSnapLexiconParser } from './tdsnapLexiconParser';
|
|
3
4
|
export type { MorphRuleSet, MorphRule, MorphWordForms, AstericsWordForm, VerbFormWithConditions, Grid3VerbFormsDetailed, } from './types';
|
|
4
5
|
export type { Grid3VerbForms } from './grid3VerbsParser';
|
|
6
|
+
export type { TDSnapLexiconData, TDSnapLexiconEntry, TDSnapLexiconForm, } from './tdsnapLexiconParser';
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.WordFormGenerator = exports.MorphologyEngine = void 0;
|
|
3
|
+
exports.TDSnapLexiconParser = exports.WordFormGenerator = exports.MorphologyEngine = void 0;
|
|
4
4
|
var engine_1 = require("./engine");
|
|
5
5
|
Object.defineProperty(exports, "MorphologyEngine", { enumerable: true, get: function () { return engine_1.MorphologyEngine; } });
|
|
6
6
|
var wordFormGenerator_1 = require("./wordFormGenerator");
|
|
7
7
|
Object.defineProperty(exports, "WordFormGenerator", { enumerable: true, get: function () { return wordFormGenerator_1.WordFormGenerator; } });
|
|
8
|
+
var tdsnapLexiconParser_1 = require("./tdsnapLexiconParser");
|
|
9
|
+
Object.defineProperty(exports, "TDSnapLexiconParser", { enumerable: true, get: function () { return tdsnapLexiconParser_1.TDSnapLexiconParser; } });
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export interface TDSnapLexiconForm {
|
|
2
|
+
tag: string;
|
|
3
|
+
form: string;
|
|
4
|
+
}
|
|
5
|
+
export interface TDSnapLexiconEntry {
|
|
6
|
+
lexemeId: number;
|
|
7
|
+
forms: TDSnapLexiconForm[];
|
|
8
|
+
}
|
|
9
|
+
export interface TDSnapLexiconData {
|
|
10
|
+
locale: string;
|
|
11
|
+
words: Map<string, TDSnapLexiconEntry>;
|
|
12
|
+
}
|
|
13
|
+
export declare class TDSnapLexiconParser {
|
|
14
|
+
parseDb(dbPath: string, locale?: string): TDSnapLexiconData;
|
|
15
|
+
private inferLocale;
|
|
16
|
+
private extractAll;
|
|
17
|
+
lookupWord(data: TDSnapLexiconData, word: string): string[];
|
|
18
|
+
lookupWordByTag(data: TDSnapLexiconData, word: string, tag: string): string[];
|
|
19
|
+
static readonly TAG_TO_POS: Record<string, string>;
|
|
20
|
+
static readonly HANDLER_TAG_MAP: Record<string, string>;
|
|
21
|
+
static parseContentTypeHandler(handler: string): {
|
|
22
|
+
category: string;
|
|
23
|
+
subtype: string;
|
|
24
|
+
params: string[];
|
|
25
|
+
} | null;
|
|
26
|
+
static tagToPos(tag: string): string;
|
|
27
|
+
static handlerToPos(handler: string): string;
|
|
28
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TDSnapLexiconParser = void 0;
|
|
4
|
+
class TDSnapLexiconParser {
|
|
5
|
+
parseDb(dbPath, locale) {
|
|
6
|
+
const detectedLocale = locale || this.inferLocale(dbPath);
|
|
7
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
8
|
+
const Database = require('better-sqlite3');
|
|
9
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
|
|
10
|
+
const db = new Database(dbPath, { readonly: true });
|
|
11
|
+
try {
|
|
12
|
+
return this.extractAll(db, detectedLocale);
|
|
13
|
+
}
|
|
14
|
+
finally {
|
|
15
|
+
db.close();
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
inferLocale(dbPath) {
|
|
19
|
+
const match = dbPath.match(/lang_([a-z]{2}_[A-Z]{2})/i);
|
|
20
|
+
return match ? match[1] : 'unknown';
|
|
21
|
+
}
|
|
22
|
+
extractAll(db, locale) {
|
|
23
|
+
const words = new Map();
|
|
24
|
+
const subclassCache = new Map();
|
|
25
|
+
const getSubclass = (id) => {
|
|
26
|
+
let name = subclassCache.get(id);
|
|
27
|
+
if (name !== undefined)
|
|
28
|
+
return name;
|
|
29
|
+
const row = db.prepare('SELECT Name FROM PosSubclass WHERE Id = ?').get(id);
|
|
30
|
+
name = row?.Name;
|
|
31
|
+
if (name) {
|
|
32
|
+
subclassCache.set(id, name);
|
|
33
|
+
return name;
|
|
34
|
+
}
|
|
35
|
+
return undefined;
|
|
36
|
+
};
|
|
37
|
+
const allWords = db
|
|
38
|
+
.prepare(`SELECT w.Id as wordId, w.Text as text,
|
|
39
|
+
i.Id as inflectionId, i.LexemeId as lexemeId, i.PosSubclassId as posSubclassId
|
|
40
|
+
FROM Word w
|
|
41
|
+
JOIN Spelling s ON s.WordId = w.Id
|
|
42
|
+
JOIN Inflection i ON i.Id = s.InflectionId
|
|
43
|
+
WHERE i.PosSubclassId != 0
|
|
44
|
+
ORDER BY w.Text`)
|
|
45
|
+
.all();
|
|
46
|
+
const lexemeForms = new Map();
|
|
47
|
+
for (const row of allWords) {
|
|
48
|
+
const tag = getSubclass(row.posSubclassId);
|
|
49
|
+
if (!tag)
|
|
50
|
+
continue;
|
|
51
|
+
let formsByTag = lexemeForms.get(row.lexemeId);
|
|
52
|
+
if (!formsByTag) {
|
|
53
|
+
formsByTag = new Map();
|
|
54
|
+
lexemeForms.set(row.lexemeId, formsByTag);
|
|
55
|
+
}
|
|
56
|
+
const existing = formsByTag.get(tag);
|
|
57
|
+
if (existing) {
|
|
58
|
+
if (!existing.includes(row.text))
|
|
59
|
+
existing.push(row.text);
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
formsByTag.set(tag, [row.text]);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
const wordToLexeme = new Map();
|
|
66
|
+
for (const row of allWords) {
|
|
67
|
+
if (!wordToLexeme.has(row.text.toLowerCase())) {
|
|
68
|
+
wordToLexeme.set(row.text.toLowerCase(), row.lexemeId);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
for (const [text, lexemeId] of wordToLexeme) {
|
|
72
|
+
const formsByTag = lexemeForms.get(lexemeId);
|
|
73
|
+
if (!formsByTag || formsByTag.size === 0)
|
|
74
|
+
continue;
|
|
75
|
+
const forms = [];
|
|
76
|
+
for (const [tag, formTexts] of formsByTag) {
|
|
77
|
+
for (const formText of formTexts) {
|
|
78
|
+
if (formText.toLowerCase() !== text) {
|
|
79
|
+
forms.push({ tag, form: formText });
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
if (forms.length > 0) {
|
|
84
|
+
words.set(text, { lexemeId, forms });
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return { locale, words };
|
|
88
|
+
}
|
|
89
|
+
lookupWord(data, word) {
|
|
90
|
+
const entry = data.words.get(word.toLowerCase());
|
|
91
|
+
if (!entry)
|
|
92
|
+
return [];
|
|
93
|
+
return entry.forms.map((f) => f.form);
|
|
94
|
+
}
|
|
95
|
+
lookupWordByTag(data, word, tag) {
|
|
96
|
+
const entry = data.words.get(word.toLowerCase());
|
|
97
|
+
if (!entry)
|
|
98
|
+
return [];
|
|
99
|
+
return entry.forms.filter((f) => f.tag === tag).map((f) => f.form);
|
|
100
|
+
}
|
|
101
|
+
static parseContentTypeHandler(handler) {
|
|
102
|
+
if (!handler)
|
|
103
|
+
return null;
|
|
104
|
+
const colonIdx = handler.indexOf(':');
|
|
105
|
+
if (colonIdx === -1) {
|
|
106
|
+
const parts = handler.split(',');
|
|
107
|
+
return { category: parts[0], subtype: '', params: parts.slice(1) };
|
|
108
|
+
}
|
|
109
|
+
const category = handler.substring(0, colonIdx);
|
|
110
|
+
const rest = handler.substring(colonIdx + 1);
|
|
111
|
+
const commaIdx = rest.indexOf(',');
|
|
112
|
+
if (commaIdx === -1) {
|
|
113
|
+
return { category, subtype: rest, params: [] };
|
|
114
|
+
}
|
|
115
|
+
const subtype = rest.substring(0, commaIdx);
|
|
116
|
+
const paramsStr = rest.substring(commaIdx + 1);
|
|
117
|
+
const params = paramsStr.split(',').map((p) => p.trim());
|
|
118
|
+
return { category, subtype, params };
|
|
119
|
+
}
|
|
120
|
+
static tagToPos(tag) {
|
|
121
|
+
return TDSnapLexiconParser.TAG_TO_POS[tag] || 'Unknown';
|
|
122
|
+
}
|
|
123
|
+
static handlerToPos(handler) {
|
|
124
|
+
const parsed = TDSnapLexiconParser.parseContentTypeHandler(handler);
|
|
125
|
+
if (!parsed)
|
|
126
|
+
return 'Unknown';
|
|
127
|
+
if (parsed.category === 'RESET' || parsed.category === 'SPECIAL')
|
|
128
|
+
return 'Ignore';
|
|
129
|
+
const key = `${parsed.category}:${parsed.subtype}`;
|
|
130
|
+
const tag = TDSnapLexiconParser.HANDLER_TAG_MAP[key];
|
|
131
|
+
if (tag)
|
|
132
|
+
return TDSnapLexiconParser.TAG_TO_POS[tag] || 'Unknown';
|
|
133
|
+
return TDSnapLexiconParser.TAG_TO_POS[parsed.subtype] || 'Unknown';
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
exports.TDSnapLexiconParser = TDSnapLexiconParser;
|
|
137
|
+
TDSnapLexiconParser.TAG_TO_POS = {
|
|
138
|
+
V0: 'Verb',
|
|
139
|
+
VZ: 'Verb',
|
|
140
|
+
VG: 'Verb',
|
|
141
|
+
VD: 'Verb',
|
|
142
|
+
VN: 'Verb',
|
|
143
|
+
SNG: 'Noun',
|
|
144
|
+
PLU: 'Noun',
|
|
145
|
+
ADJ: 'Adjective',
|
|
146
|
+
ADJR: 'Adjective',
|
|
147
|
+
ADJT: 'Adjective',
|
|
148
|
+
ADV: 'Adjective',
|
|
149
|
+
SUB: 'Pronoun',
|
|
150
|
+
OBJ: 'Pronoun',
|
|
151
|
+
POS: 'Pronoun',
|
|
152
|
+
NPOS: 'Pronoun',
|
|
153
|
+
REF: 'Pronoun',
|
|
154
|
+
B0: 'Verb',
|
|
155
|
+
BZ: 'Verb',
|
|
156
|
+
BM: 'Verb',
|
|
157
|
+
BR: 'Verb',
|
|
158
|
+
BDZ: 'Verb',
|
|
159
|
+
BDR: 'Verb',
|
|
160
|
+
BG: 'Verb',
|
|
161
|
+
BN: 'Verb',
|
|
162
|
+
};
|
|
163
|
+
TDSnapLexiconParser.HANDLER_TAG_MAP = {
|
|
164
|
+
'NOUN:PLU': 'PLU',
|
|
165
|
+
'DESCRIBE:ADJR': 'ADJR',
|
|
166
|
+
'DESCRIBE:ADJT': 'ADJT',
|
|
167
|
+
'DESCRIBE:ADV': 'ADV',
|
|
168
|
+
'VERB:V0': 'V0',
|
|
169
|
+
'VERB:VZ': 'VZ',
|
|
170
|
+
'VERB:VG': 'VG',
|
|
171
|
+
'VERB:VD': 'VD',
|
|
172
|
+
'VERB:VN': 'VN',
|
|
173
|
+
'PRONOUN:SUB': 'SUB',
|
|
174
|
+
'PRONOUN:OBJ': 'OBJ',
|
|
175
|
+
'PRONOUN:POS': 'POS',
|
|
176
|
+
'PRONOUN:NPOS': 'NPOS',
|
|
177
|
+
'PRONOUN:REF': 'REF',
|
|
178
|
+
'BE:B0': 'B0',
|
|
179
|
+
'BE:BZ': 'BZ',
|
|
180
|
+
'BE:BM': 'BM',
|
|
181
|
+
'BE:BR': 'BR',
|
|
182
|
+
'BE:BDZ': 'BDZ',
|
|
183
|
+
'BE:BDR': 'BDR',
|
|
184
|
+
'BE:BG': 'BG',
|
|
185
|
+
'BE:BN': 'BN',
|
|
186
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@willwade/aac-processors",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.18",
|
|
4
4
|
"description": "A comprehensive TypeScript library for processing AAC (Augmentative and Alternative Communication) file formats with translation support",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"browser": "dist/browser/index.browser.js",
|
|
@@ -131,7 +131,7 @@
|
|
|
131
131
|
],
|
|
132
132
|
"author": {
|
|
133
133
|
"name": "Will Wade",
|
|
134
|
-
"email": "
|
|
134
|
+
"email": "will@aactools.co.uk",
|
|
135
135
|
"url": "https://github.com/willwade"
|
|
136
136
|
},
|
|
137
137
|
"license": "MIT",
|
|
@@ -140,12 +140,12 @@
|
|
|
140
140
|
},
|
|
141
141
|
"repository": {
|
|
142
142
|
"type": "git",
|
|
143
|
-
"url": "https://github.com/
|
|
143
|
+
"url": "https://github.com/AACTools/AACProcessors-nodejs.git"
|
|
144
144
|
},
|
|
145
145
|
"bugs": {
|
|
146
|
-
"url": "https://github.com/
|
|
146
|
+
"url": "https://github.com/AACTools/AACProcessors-nodejs/issues"
|
|
147
147
|
},
|
|
148
|
-
"homepage": "https://github.com/
|
|
148
|
+
"homepage": "https://github.com/AACTools/AACProcessors-nodejs#readme",
|
|
149
149
|
"engines": {
|
|
150
150
|
"node": ">=20.0.0",
|
|
151
151
|
"npm": ">=9.0.0"
|