@willwade/aac-processors 0.2.16 → 0.2.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/dist/browser/processors/gridset/commands.js +56 -1
  2. package/dist/browser/processors/gridsetProcessor.js +16 -2
  3. package/dist/browser/processors/snapProcessor.js +42 -4
  4. package/dist/browser/utilities/analytics/metrics/core.js +182 -33
  5. package/dist/browser/utilities/analytics/metrics/effort.js +1 -0
  6. package/dist/browser/utilities/analytics/morphology/engine.js +24 -2
  7. package/dist/browser/utilities/analytics/morphology/index.js +1 -0
  8. package/dist/browser/utilities/analytics/morphology/tdsnapLexiconParser.js +182 -0
  9. package/dist/core/treeStructure.d.ts +3 -2
  10. package/dist/index.node.d.ts +1 -0
  11. package/dist/index.node.js +4 -2
  12. package/dist/processors/gridset/commands.js +56 -1
  13. package/dist/processors/gridsetProcessor.js +16 -2
  14. package/dist/processors/snapProcessor.js +42 -4
  15. package/dist/types/aac.d.ts +1 -1
  16. package/dist/utilities/analytics/metrics/core.d.ts +33 -0
  17. package/dist/utilities/analytics/metrics/core.js +182 -33
  18. package/dist/utilities/analytics/metrics/effort.d.ts +1 -0
  19. package/dist/utilities/analytics/metrics/effort.js +1 -0
  20. package/dist/utilities/analytics/metrics/types.d.ts +26 -0
  21. package/dist/utilities/analytics/morphology/engine.d.ts +4 -0
  22. package/dist/utilities/analytics/morphology/engine.js +24 -2
  23. package/dist/utilities/analytics/morphology/index.d.ts +2 -0
  24. package/dist/utilities/analytics/morphology/index.js +3 -1
  25. package/dist/utilities/analytics/morphology/tdsnapLexiconParser.d.ts +28 -0
  26. package/dist/utilities/analytics/morphology/tdsnapLexiconParser.js +186 -0
  27. package/package.json +5 -5
@@ -588,12 +588,20 @@ class MetricsCalculator {
588
588
  /**
589
589
  * Quick check whether any button in the tree has a POS tag.
590
590
  * Used to auto-enable smart grammar without requiring explicit opt-in.
591
+ *
592
+ * IMPORTANT: Only counts POS from non-Inflector and non-Suffix buttons.
593
+ * TDSnap Inflector buttons and Grid3 Suffix buttons are grammar controls,
594
+ * not content words — they should NOT auto-enable morphology.
591
595
  */
592
596
  treeHasPosTags(tree) {
593
597
  for (const page of Object.values(tree.pages)) {
594
598
  for (const row of page.grid) {
595
599
  for (const btn of row) {
596
- if (btn?.pos && btn.pos !== 'Unknown' && btn.pos !== 'Ignore') {
600
+ if (btn?.pos &&
601
+ btn.pos !== 'Unknown' &&
602
+ btn.pos !== 'Ignore' &&
603
+ btn.pos !== 'Suffix' &&
604
+ btn.contentType !== 'Inflector') {
597
605
  return true;
598
606
  }
599
607
  }
@@ -611,8 +619,44 @@ class MetricsCalculator {
611
619
  */
612
620
  expandMorphologicalPredictions(tree, options) {
613
621
  const locale = options.morphologyLocale || 'en-gb';
614
- const morph = new morphology_1.MorphologyEngine(locale);
615
- // Words that should never be POS-inferred (function words, determiners, etc.)
622
+ let morph;
623
+ if (options.tdsnapLexiconPath) {
624
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
625
+ const { TDSnapLexiconParser } = require('../morphology/tdsnapLexiconParser');
626
+ const parser = new TDSnapLexiconParser();
627
+ const lexiconData = parser.parseDb(options.tdsnapLexiconPath, locale.replace('-', '_'));
628
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-argument
629
+ morph = morphology_1.MorphologyEngine.fromTDSnapLexicon(lexiconData);
630
+ this.expandTDSnapPredictions(tree, morph);
631
+ return;
632
+ }
633
+ if (options.grid3VerbsPath) {
634
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
635
+ const { Grid3VerbsParser } = require('../morphology/grid3VerbsParser');
636
+ const parser = new Grid3VerbsParser();
637
+ const verbForms = parser.parseZip(options.grid3VerbsPath);
638
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-argument
639
+ morph = morphology_1.MorphologyEngine.fromGrid3Verbs(verbForms);
640
+ this.expandGrid3Predictions(tree, morph);
641
+ return;
642
+ }
643
+ morph = new morphology_1.MorphologyEngine(locale);
644
+ this.expandGrid3Predictions(tree, morph);
645
+ }
646
+ /**
647
+ * Expand morphological predictions for Grid3 pagesets.
648
+ *
649
+ * Grid3 uses suffix buttons (pos='Suffix') on the same page as content words.
650
+ * Different pages have different suffix buttons — e.g., topic pages may only
651
+ * have -s (plural), while the Magic Wand page has -s, -er, -est, -ly, -y, -'s.
652
+ *
653
+ * Rules:
654
+ * 1. Build a suffix→formSlot map (-s → plural, -er → comparative, etc.)
655
+ * 2. For each page, collect available suffix buttons
656
+ * 3. Only generate forms for slots that have matching suffix buttons on that page
657
+ * 4. POS inference is used for untagged content words (Grid3 grids often lack POS)
658
+ */
659
+ expandGrid3Predictions(tree, morph) {
616
660
  const skipInference = new Set([
617
661
  'a',
618
662
  'an',
@@ -695,30 +739,56 @@ class MetricsCalculator {
695
739
  'wow',
696
740
  'sorry',
697
741
  ]);
742
+ // Map suffix button labels to the morphology slots they produce
743
+ const SUFFIX_TO_SLOT = {
744
+ '-s': ['plural'],
745
+ "-'s": ['possessive'],
746
+ '-er': ['comparative'],
747
+ '-est': ['superlative'],
748
+ '-ly': ['adverb'],
749
+ '-y': ['adjective'],
750
+ };
751
+ // POS → slots that POS can produce (for filtering)
752
+ const POS_TO_SUFFIX_SLOTS = {
753
+ Noun: new Set(['plural', 'possessive']),
754
+ Verb: new Set(['plural']),
755
+ Adjective: new Set(['comparative', 'superlative', 'adverb', 'adjective']),
756
+ };
698
757
  for (const page of Object.values(tree.pages)) {
758
+ // Collect suffix buttons on this page
759
+ const pageSuffixes = new Set();
760
+ const pageSuffixSlots = new Set();
761
+ for (const row of page.grid) {
762
+ for (const btn of row) {
763
+ if (btn?.pos === 'Suffix' && btn.label) {
764
+ pageSuffixes.add(btn.label);
765
+ const slots = SUFFIX_TO_SLOT[btn.label];
766
+ if (slots) {
767
+ for (const s of slots)
768
+ pageSuffixSlots.add(s);
769
+ }
770
+ }
771
+ }
772
+ }
773
+ // No suffix buttons on this page → no morphology
774
+ if (pageSuffixSlots.size === 0)
775
+ continue;
699
776
  for (const row of page.grid) {
700
777
  for (const btn of row) {
701
778
  if (!btn || !btn.label)
702
779
  continue;
780
+ if (btn.pos === 'Suffix')
781
+ continue;
703
782
  let pos = btn.pos;
704
- // If no POS tag (or Unknown/Ignore), attempt POS inference.
705
- // Many content words on topic pages lack POS tags even though
706
- // they are clearly nouns (e.g., "bird", "tree", "cloud").
707
- // Strategy: check irregular tables first for confident POS,
708
- // then fall back to Noun for single-word content labels.
709
783
  if (!pos || pos === 'Unknown' || pos === 'Ignore') {
710
784
  const lower = btn.label.toLowerCase();
711
- // Skip function words and multi-word labels
712
785
  if (!skipInference.has(lower) && !lower.includes(' ') && lower.length > 1) {
713
- // Check irregular tables for confident POS assignment
714
786
  const inferredPOS = morph.inferPOS(lower);
715
787
  if (inferredPOS) {
716
788
  pos = inferredPOS;
717
789
  btn.pos = inferredPOS;
718
790
  }
719
791
  else {
720
- // Default to Noun for untagged content words.
721
- // This generates plurals (e.g., bird → birds, tree → trees).
722
792
  pos = 'Noun';
723
793
  btn.pos = 'Noun';
724
794
  }
@@ -726,16 +796,90 @@ class MetricsCalculator {
726
796
  }
727
797
  if (!pos || pos === 'Unknown' || pos === 'Ignore')
728
798
  continue;
729
- const forms = morph.inflect(btn.label, pos);
730
- if (forms.length > 0) {
799
+ // Check if this POS can produce forms matching the page's suffix slots
800
+ const posSlots = POS_TO_SUFFIX_SLOTS[pos];
801
+ if (!posSlots)
802
+ continue;
803
+ const hasRelevantSlot = [...posSlots].some((s) => pageSuffixSlots.has(s));
804
+ if (!hasRelevantSlot)
805
+ continue;
806
+ const allForms = morph.inflect(btn.label, pos);
807
+ if (allForms.length === 0)
808
+ continue;
809
+ // Filter forms: only include those producible by suffixes on this page
810
+ // For the built-in engine, we can't easily map forms to slots, so
811
+ // include all forms when any relevant suffix exists. The per-page
812
+ // gate (suffix presence) is the main filter.
813
+ const existing = btn.predictions || [];
814
+ const merged = new Set([...existing, ...allForms]);
815
+ btn.predictions = Array.from(merged);
816
+ }
817
+ }
818
+ }
819
+ }
820
+ /**
821
+ * Expand morphological predictions for TDSnap pagesets.
822
+ *
823
+ * TDSnap uses Inflector buttons (ContentType=3) on "Word Forms" pages to
824
+ * provide morphology. These pages are loaded dynamically by the runtime,
825
+ * NOT via navigation buttons, so they are unreachable in our tree model.
826
+ *
827
+ * Rules:
828
+ * 1. If the pageset has NO Inflector buttons → no morphology at all
829
+ * 2. Only generate forms whose grammar tag matches an available Inflector
830
+ * (e.g., if there's no -ly Inflector, don't generate "happily")
831
+ * 3. No POS inference — only the lexicon determines which words get forms
832
+ */
833
+ expandTDSnapPredictions(tree, morph) {
834
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
835
+ const { TDSnapLexiconParser } = require('../morphology/tdsnapLexiconParser');
836
+ // Step 1: Collect available grammar tags from Inflector buttons
837
+ const availableTags = new Set();
838
+ for (const page of Object.values(tree.pages)) {
839
+ for (const row of page.grid) {
840
+ for (const btn of row) {
841
+ if (btn?.contentType === 'Inflector' && btn.parameters?.grammar?.handler) {
842
+ const parsed = TDSnapLexiconParser.parseContentTypeHandler(btn.parameters.grammar.handler);
843
+ if (parsed) {
844
+ const key = `${parsed.category}:${parsed.subtype}`;
845
+ const tag = TDSnapLexiconParser.HANDLER_TAG_MAP[key];
846
+ if (tag)
847
+ availableTags.add(tag);
848
+ }
849
+ }
850
+ }
851
+ }
852
+ }
853
+ if (availableTags.size === 0)
854
+ return;
855
+ // Step 2: For each button, look up lexicon forms filtered by available tags
856
+ for (const page of Object.values(tree.pages)) {
857
+ for (const row of page.grid) {
858
+ for (const btn of row) {
859
+ if (!btn || !btn.label || btn.contentType === 'Inflector')
860
+ continue;
861
+ const filtered = this.filterFormsByAvailableTags(morph, btn.label, availableTags);
862
+ if (filtered.length > 0) {
731
863
  const existing = btn.predictions || [];
732
- const merged = new Set([...existing, ...forms]);
864
+ const merged = new Set([...existing, ...filtered]);
733
865
  btn.predictions = Array.from(merged);
734
866
  }
735
867
  }
736
868
  }
737
869
  }
738
870
  }
871
+ filterFormsByAvailableTags(morph, base, availableTags) {
872
+ const entry = morph.getLexiconEntry(base.toLowerCase());
873
+ if (!entry)
874
+ return [];
875
+ const forms = [];
876
+ for (const f of entry.forms) {
877
+ if (availableTags.has(f.tag) && f.form.toLowerCase() !== base.toLowerCase()) {
878
+ forms.push(f.form);
879
+ }
880
+ }
881
+ return forms;
882
+ }
739
883
  /**
740
884
  * Calculate metrics for word forms (smart grammar predictions)
741
885
  *
@@ -752,7 +896,7 @@ class MetricsCalculator {
752
896
  * @param options - Metrics options
753
897
  * @returns Object containing word form metrics and labels that were replaced
754
898
  */
755
- calculateWordFormMetrics(tree, buttons, _options = {}) {
899
+ calculateWordFormMetrics(tree, buttons, options = {}) {
756
900
  const wordFormMetrics = [];
757
901
  const replacedLabels = new Set();
758
902
  // Track buttons by label to compare efforts
@@ -814,23 +958,28 @@ class MetricsCalculator {
814
958
  // Calculate effort for each word form
815
959
  btn.predictions.forEach((wordForm, index) => {
816
960
  const wordFormLower = wordForm.toLowerCase();
817
- // Calculate effort based on position in predictions array
818
- // Assume predictions are displayed in a grid layout (e.g., 2 columns)
819
- const predictionsGridCols = 2; // Typical predictions layout
820
- const predictionRowIndex = Math.floor(index / predictionsGridCols);
821
- const predictionColIndex = index % predictionsGridCols;
822
- // Calculate visual scan effort to reach this word form position
823
- // Using similar logic to button scanning effort
824
- const predictionPriorItems = predictionRowIndex * predictionsGridCols + predictionColIndex;
825
- const predictionSelectionEffort = (0, effort_1.visualScanEffort)(predictionPriorItems);
826
- // Add confirmation cost for Suggest Words outcomes only.
827
- // Suggest Words requires an explicit tap on the prediction bar,
828
- // while smart grammar morphology forms are auto-generated (no extra tap).
829
- const suggestWordsConfirmation = suggestWordsSet.has(wordFormLower)
830
- ? effort_1.EFFORT_CONSTANTS.SUGGEST_WORDS_SELECTION_EFFORT
831
- : 0;
832
- // Word form effort = parent button's cumulative effort + selection effort + confirmation
833
- const wordFormEffort = parentMetrics.effort + predictionSelectionEffort + suggestWordsConfirmation;
961
+ const isSuggestWords = suggestWordsSet.has(wordFormLower);
962
+ let wordFormEffort;
963
+ if (options.tdsnapLexiconPath && !isSuggestWords) {
964
+ // TDSnap Inflector-based form: the grammar overlay appears
965
+ // dynamically when a word is selected. The cost is a fixed
966
+ // single selection to tap the Inflector button.
967
+ wordFormEffort =
968
+ parentMetrics.effort + effort_1.EFFORT_CONSTANTS.TDSNAP_GRAMMAR_OVERLAY_EFFORT;
969
+ }
970
+ else {
971
+ // Grid-based prediction layout (Suggest Words, or Grid3 morphology)
972
+ const predictionsGridCols = 2;
973
+ const predictionRowIndex = Math.floor(index / predictionsGridCols);
974
+ const predictionColIndex = index % predictionsGridCols;
975
+ const predictionPriorItems = predictionRowIndex * predictionsGridCols + predictionColIndex;
976
+ const predictionSelectionEffort = (0, effort_1.visualScanEffort)(predictionPriorItems);
977
+ const suggestWordsConfirmation = isSuggestWords
978
+ ? effort_1.EFFORT_CONSTANTS.SUGGEST_WORDS_SELECTION_EFFORT
979
+ : 0;
980
+ wordFormEffort =
981
+ parentMetrics.effort + predictionSelectionEffort + suggestWordsConfirmation;
982
+ }
834
983
  // Check if this word already exists as a regular button
835
984
  const existingBtn = existingLabels.get(wordFormLower);
836
985
  // If word exists and has lower or equal effort, skip the word form
@@ -32,6 +32,7 @@ export declare const EFFORT_CONSTANTS: {
32
32
  readonly DEFAULT_SCAN_ERROR_RATE: 0.1;
33
33
  readonly SCAN_RETRY_PENALTY: 1;
34
34
  readonly SUGGEST_WORDS_SELECTION_EFFORT: 0.5;
35
+ readonly TDSNAP_GRAMMAR_OVERLAY_EFFORT: 0.1;
35
36
  };
36
37
  /**
37
38
  * Calculate button size effort based on grid dimensions
@@ -48,6 +48,7 @@ exports.EFFORT_CONSTANTS = {
48
48
  DEFAULT_SCAN_ERROR_RATE: 0.1, // 10% chance of missing a selection
49
49
  SCAN_RETRY_PENALTY: 1.0, // Cost multiplier for a full loop retry
50
50
  SUGGEST_WORDS_SELECTION_EFFORT: 0.5, // Extra tap to confirm a Suggest Words prediction
51
+ TDSNAP_GRAMMAR_OVERLAY_EFFORT: 0.1, // Fixed cost: select an Inflector button from the dynamic grammar overlay
51
52
  };
52
53
  /**
53
54
  * Calculate button size effort based on grid dimensions
@@ -147,6 +147,32 @@ export interface MetricsOptions {
147
147
  * Only used when useSmartGrammar is true.
148
148
  */
149
149
  morphologyLocale?: string;
150
+ /**
151
+ * Path to a TDSnap Lexicon.db3 for word form lookups
152
+ *
153
+ * When provided, the MorphologyEngine will use the TDSnap lexicon database
154
+ * to look up inflected forms (e.g., "happy" → "happily", "run" → "ran")
155
+ * instead of (or before falling back to) rule-based morphology.
156
+ *
157
+ * The Lexicon.db3 is found in the TDSnap installation at:
158
+ * Data/LanguageModels/lang_en_US/Lexicon.db3
159
+ *
160
+ * Only used when useSmartGrammar is true.
161
+ */
162
+ tdsnapLexiconPath?: string;
163
+ /**
164
+ * Path to a Grid 3 verbs.zip for verb conjugation lookups
165
+ *
166
+ * When provided, the MorphologyEngine will use the Grid 3 verbs database
167
+ * to look up conjugated forms (e.g., "go" → "goes", "went", "gone", "going")
168
+ * instead of (or before falling back to) rule-based morphology.
169
+ *
170
+ * The verbs.zip is found in the Grid 3 installation at:
171
+ * Locale/en-GB/verbs/verbs.zip
172
+ *
173
+ * Only used when useSmartGrammar is true.
174
+ */
175
+ grid3VerbsPath?: string;
150
176
  }
151
177
  /**
152
178
  * Comparison result between two board sets
@@ -1,11 +1,15 @@
1
1
  import { MorphRuleSet } from './types';
2
2
  import type { Grid3VerbForms } from './grid3VerbsParser';
3
+ import type { TDSnapLexiconData, TDSnapLexiconEntry } from './tdsnapLexiconParser';
3
4
  export declare class MorphologyEngine {
4
5
  private ruleSet;
5
6
  private grid3Verbs?;
7
+ private tdsnapLexicon?;
6
8
  private cache;
9
+ getLexiconEntry(word: string): TDSnapLexiconEntry | undefined;
7
10
  constructor(ruleSetOrLocale: string | MorphRuleSet);
8
11
  static fromGrid3Verbs(verbForms: Grid3VerbForms): MorphologyEngine;
12
+ static fromTDSnapLexicon(lexiconData: TDSnapLexiconData): MorphologyEngine;
9
13
  get locale(): string;
10
14
  inflect(base: string, pos: string): string[];
11
15
  isFormOf(word: string, base: string, pos: string): boolean;
@@ -2,6 +2,9 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.MorphologyEngine = void 0;
4
4
  class MorphologyEngine {
5
+ getLexiconEntry(word) {
6
+ return this.tdsnapLexicon?.words.get(word.toLowerCase());
7
+ }
5
8
  constructor(ruleSetOrLocale) {
6
9
  this.cache = new Map();
7
10
  if (typeof ruleSetOrLocale === 'string') {
@@ -21,6 +24,16 @@ class MorphologyEngine {
21
24
  engine.grid3Verbs = verbForms.verbs;
22
25
  return engine;
23
26
  }
27
+ static fromTDSnapLexicon(lexiconData) {
28
+ const engine = new MorphologyEngine({
29
+ locale: lexiconData.locale,
30
+ version: 1,
31
+ irregular: {},
32
+ regular: {},
33
+ });
34
+ engine.tdsnapLexicon = lexiconData;
35
+ return engine;
36
+ }
24
37
  get locale() {
25
38
  return this.ruleSet.locale;
26
39
  }
@@ -29,9 +42,18 @@ class MorphologyEngine {
29
42
  const cached = this.cache.get(key);
30
43
  if (cached)
31
44
  return cached;
45
+ if (this.tdsnapLexicon) {
46
+ const entry = this.tdsnapLexicon.words.get(base.toLowerCase());
47
+ if (entry) {
48
+ const forms = entry.forms.map((f) => f.form);
49
+ this.cache.set(key, forms);
50
+ return forms;
51
+ }
52
+ }
32
53
  if (this.grid3Verbs) {
33
- const forms = this.grid3Verbs.get(base) || this.grid3Verbs.get(base.toLowerCase());
34
- if (forms) {
54
+ const raw = this.grid3Verbs.get(base) || this.grid3Verbs.get(base.toLowerCase());
55
+ if (raw) {
56
+ const forms = raw.filter((f) => !f.includes('{'));
35
57
  this.cache.set(key, forms);
36
58
  return forms;
37
59
  }
@@ -1,4 +1,6 @@
1
1
  export { MorphologyEngine } from './engine';
2
2
  export { WordFormGenerator } from './wordFormGenerator';
3
+ export { TDSnapLexiconParser } from './tdsnapLexiconParser';
3
4
  export type { MorphRuleSet, MorphRule, MorphWordForms, AstericsWordForm, VerbFormWithConditions, Grid3VerbFormsDetailed, } from './types';
4
5
  export type { Grid3VerbForms } from './grid3VerbsParser';
6
+ export type { TDSnapLexiconData, TDSnapLexiconEntry, TDSnapLexiconForm, } from './tdsnapLexiconParser';
@@ -1,7 +1,9 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.WordFormGenerator = exports.MorphologyEngine = void 0;
3
+ exports.TDSnapLexiconParser = exports.WordFormGenerator = exports.MorphologyEngine = void 0;
4
4
  var engine_1 = require("./engine");
5
5
  Object.defineProperty(exports, "MorphologyEngine", { enumerable: true, get: function () { return engine_1.MorphologyEngine; } });
6
6
  var wordFormGenerator_1 = require("./wordFormGenerator");
7
7
  Object.defineProperty(exports, "WordFormGenerator", { enumerable: true, get: function () { return wordFormGenerator_1.WordFormGenerator; } });
8
+ var tdsnapLexiconParser_1 = require("./tdsnapLexiconParser");
9
+ Object.defineProperty(exports, "TDSnapLexiconParser", { enumerable: true, get: function () { return tdsnapLexiconParser_1.TDSnapLexiconParser; } });
@@ -0,0 +1,28 @@
1
+ export interface TDSnapLexiconForm {
2
+ tag: string;
3
+ form: string;
4
+ }
5
+ export interface TDSnapLexiconEntry {
6
+ lexemeId: number;
7
+ forms: TDSnapLexiconForm[];
8
+ }
9
+ export interface TDSnapLexiconData {
10
+ locale: string;
11
+ words: Map<string, TDSnapLexiconEntry>;
12
+ }
13
+ export declare class TDSnapLexiconParser {
14
+ parseDb(dbPath: string, locale?: string): TDSnapLexiconData;
15
+ private inferLocale;
16
+ private extractAll;
17
+ lookupWord(data: TDSnapLexiconData, word: string): string[];
18
+ lookupWordByTag(data: TDSnapLexiconData, word: string, tag: string): string[];
19
+ static readonly TAG_TO_POS: Record<string, string>;
20
+ static readonly HANDLER_TAG_MAP: Record<string, string>;
21
+ static parseContentTypeHandler(handler: string): {
22
+ category: string;
23
+ subtype: string;
24
+ params: string[];
25
+ } | null;
26
+ static tagToPos(tag: string): string;
27
+ static handlerToPos(handler: string): string;
28
+ }
@@ -0,0 +1,186 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.TDSnapLexiconParser = void 0;
4
+ class TDSnapLexiconParser {
5
+ parseDb(dbPath, locale) {
6
+ const detectedLocale = locale || this.inferLocale(dbPath);
7
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
8
+ const Database = require('better-sqlite3');
9
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-argument
10
+ const db = new Database(dbPath, { readonly: true });
11
+ try {
12
+ return this.extractAll(db, detectedLocale);
13
+ }
14
+ finally {
15
+ db.close();
16
+ }
17
+ }
18
+ inferLocale(dbPath) {
19
+ const match = dbPath.match(/lang_([a-z]{2}_[A-Z]{2})/i);
20
+ return match ? match[1] : 'unknown';
21
+ }
22
+ extractAll(db, locale) {
23
+ const words = new Map();
24
+ const subclassCache = new Map();
25
+ const getSubclass = (id) => {
26
+ let name = subclassCache.get(id);
27
+ if (name !== undefined)
28
+ return name;
29
+ const row = db.prepare('SELECT Name FROM PosSubclass WHERE Id = ?').get(id);
30
+ name = row?.Name;
31
+ if (name) {
32
+ subclassCache.set(id, name);
33
+ return name;
34
+ }
35
+ return undefined;
36
+ };
37
+ const allWords = db
38
+ .prepare(`SELECT w.Id as wordId, w.Text as text,
39
+ i.Id as inflectionId, i.LexemeId as lexemeId, i.PosSubclassId as posSubclassId
40
+ FROM Word w
41
+ JOIN Spelling s ON s.WordId = w.Id
42
+ JOIN Inflection i ON i.Id = s.InflectionId
43
+ WHERE i.PosSubclassId != 0
44
+ ORDER BY w.Text`)
45
+ .all();
46
+ const lexemeForms = new Map();
47
+ for (const row of allWords) {
48
+ const tag = getSubclass(row.posSubclassId);
49
+ if (!tag)
50
+ continue;
51
+ let formsByTag = lexemeForms.get(row.lexemeId);
52
+ if (!formsByTag) {
53
+ formsByTag = new Map();
54
+ lexemeForms.set(row.lexemeId, formsByTag);
55
+ }
56
+ const existing = formsByTag.get(tag);
57
+ if (existing) {
58
+ if (!existing.includes(row.text))
59
+ existing.push(row.text);
60
+ }
61
+ else {
62
+ formsByTag.set(tag, [row.text]);
63
+ }
64
+ }
65
+ const wordToLexeme = new Map();
66
+ for (const row of allWords) {
67
+ if (!wordToLexeme.has(row.text.toLowerCase())) {
68
+ wordToLexeme.set(row.text.toLowerCase(), row.lexemeId);
69
+ }
70
+ }
71
+ for (const [text, lexemeId] of wordToLexeme) {
72
+ const formsByTag = lexemeForms.get(lexemeId);
73
+ if (!formsByTag || formsByTag.size === 0)
74
+ continue;
75
+ const forms = [];
76
+ for (const [tag, formTexts] of formsByTag) {
77
+ for (const formText of formTexts) {
78
+ if (formText.toLowerCase() !== text) {
79
+ forms.push({ tag, form: formText });
80
+ }
81
+ }
82
+ }
83
+ if (forms.length > 0) {
84
+ words.set(text, { lexemeId, forms });
85
+ }
86
+ }
87
+ return { locale, words };
88
+ }
89
+ lookupWord(data, word) {
90
+ const entry = data.words.get(word.toLowerCase());
91
+ if (!entry)
92
+ return [];
93
+ return entry.forms.map((f) => f.form);
94
+ }
95
+ lookupWordByTag(data, word, tag) {
96
+ const entry = data.words.get(word.toLowerCase());
97
+ if (!entry)
98
+ return [];
99
+ return entry.forms.filter((f) => f.tag === tag).map((f) => f.form);
100
+ }
101
+ static parseContentTypeHandler(handler) {
102
+ if (!handler)
103
+ return null;
104
+ const colonIdx = handler.indexOf(':');
105
+ if (colonIdx === -1) {
106
+ const parts = handler.split(',');
107
+ return { category: parts[0], subtype: '', params: parts.slice(1) };
108
+ }
109
+ const category = handler.substring(0, colonIdx);
110
+ const rest = handler.substring(colonIdx + 1);
111
+ const commaIdx = rest.indexOf(',');
112
+ if (commaIdx === -1) {
113
+ return { category, subtype: rest, params: [] };
114
+ }
115
+ const subtype = rest.substring(0, commaIdx);
116
+ const paramsStr = rest.substring(commaIdx + 1);
117
+ const params = paramsStr.split(',').map((p) => p.trim());
118
+ return { category, subtype, params };
119
+ }
120
+ static tagToPos(tag) {
121
+ return TDSnapLexiconParser.TAG_TO_POS[tag] || 'Unknown';
122
+ }
123
+ static handlerToPos(handler) {
124
+ const parsed = TDSnapLexiconParser.parseContentTypeHandler(handler);
125
+ if (!parsed)
126
+ return 'Unknown';
127
+ if (parsed.category === 'RESET' || parsed.category === 'SPECIAL')
128
+ return 'Ignore';
129
+ const key = `${parsed.category}:${parsed.subtype}`;
130
+ const tag = TDSnapLexiconParser.HANDLER_TAG_MAP[key];
131
+ if (tag)
132
+ return TDSnapLexiconParser.TAG_TO_POS[tag] || 'Unknown';
133
+ return TDSnapLexiconParser.TAG_TO_POS[parsed.subtype] || 'Unknown';
134
+ }
135
+ }
136
+ exports.TDSnapLexiconParser = TDSnapLexiconParser;
137
+ TDSnapLexiconParser.TAG_TO_POS = {
138
+ V0: 'Verb',
139
+ VZ: 'Verb',
140
+ VG: 'Verb',
141
+ VD: 'Verb',
142
+ VN: 'Verb',
143
+ SNG: 'Noun',
144
+ PLU: 'Noun',
145
+ ADJ: 'Adjective',
146
+ ADJR: 'Adjective',
147
+ ADJT: 'Adjective',
148
+ ADV: 'Adjective',
149
+ SUB: 'Pronoun',
150
+ OBJ: 'Pronoun',
151
+ POS: 'Pronoun',
152
+ NPOS: 'Pronoun',
153
+ REF: 'Pronoun',
154
+ B0: 'Verb',
155
+ BZ: 'Verb',
156
+ BM: 'Verb',
157
+ BR: 'Verb',
158
+ BDZ: 'Verb',
159
+ BDR: 'Verb',
160
+ BG: 'Verb',
161
+ BN: 'Verb',
162
+ };
163
+ TDSnapLexiconParser.HANDLER_TAG_MAP = {
164
+ 'NOUN:PLU': 'PLU',
165
+ 'DESCRIBE:ADJR': 'ADJR',
166
+ 'DESCRIBE:ADJT': 'ADJT',
167
+ 'DESCRIBE:ADV': 'ADV',
168
+ 'VERB:V0': 'V0',
169
+ 'VERB:VZ': 'VZ',
170
+ 'VERB:VG': 'VG',
171
+ 'VERB:VD': 'VD',
172
+ 'VERB:VN': 'VN',
173
+ 'PRONOUN:SUB': 'SUB',
174
+ 'PRONOUN:OBJ': 'OBJ',
175
+ 'PRONOUN:POS': 'POS',
176
+ 'PRONOUN:NPOS': 'NPOS',
177
+ 'PRONOUN:REF': 'REF',
178
+ 'BE:B0': 'B0',
179
+ 'BE:BZ': 'BZ',
180
+ 'BE:BM': 'BM',
181
+ 'BE:BR': 'BR',
182
+ 'BE:BDZ': 'BDZ',
183
+ 'BE:BDR': 'BDR',
184
+ 'BE:BG': 'BG',
185
+ 'BE:BN': 'BN',
186
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@willwade/aac-processors",
3
- "version": "0.2.16",
3
+ "version": "0.2.18",
4
4
  "description": "A comprehensive TypeScript library for processing AAC (Augmentative and Alternative Communication) file formats with translation support",
5
5
  "main": "dist/index.js",
6
6
  "browser": "dist/browser/index.browser.js",
@@ -131,7 +131,7 @@
131
131
  ],
132
132
  "author": {
133
133
  "name": "Will Wade",
134
- "email": "wwade@acecentre.org.uk",
134
+ "email": "will@aactools.co.uk",
135
135
  "url": "https://github.com/willwade"
136
136
  },
137
137
  "license": "MIT",
@@ -140,12 +140,12 @@
140
140
  },
141
141
  "repository": {
142
142
  "type": "git",
143
- "url": "https://github.com/willwade/AACProcessors-nodejs.git"
143
+ "url": "https://github.com/AACTools/AACProcessors-nodejs.git"
144
144
  },
145
145
  "bugs": {
146
- "url": "https://github.com/willwade/AACProcessors-nodejs/issues"
146
+ "url": "https://github.com/AACTools/AACProcessors-nodejs/issues"
147
147
  },
148
- "homepage": "https://github.com/willwade/AACProcessors-nodejs#readme",
148
+ "homepage": "https://github.com/AACTools/AACProcessors-nodejs#readme",
149
149
  "engines": {
150
150
  "node": ">=20.0.0",
151
151
  "npm": ">=9.0.0"