twl-generator 1.3.2 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/index.js +250 -58
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "twl-generator",
3
- "version": "1.3.2",
3
+ "version": "1.3.3",
4
4
  "description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/index.js CHANGED
@@ -289,17 +289,15 @@ function prioritizeArticles(glq, strongId, strongPivot) {
289
289
  return slugMatched.concat(restSorted);
290
290
  }
291
291
 
292
- function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {}) {
292
+ // Helper function to find matching articles from a given list of articles
293
+ function findMatchingArticles(glq, articlesList, termMap, opts = {}) {
293
294
  const useCompromise = !!opts.useCompromise;
294
295
  const nlp = opts.nlp;
295
- const prioritized = prioritizeArticles(glq, strongId, strongPivot);
296
- if (!prioritized.length) return null;
297
296
  const textOrig = String(glq || '');
298
297
  const textLower = textOrig.toLowerCase();
299
298
  const escapeRegExp = s => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
300
299
 
301
- // Utility: split a term into head (all but last word) and last word.
302
- // head has no trailing space, last has no leading space. Rejoin with (head ? head+" " : "") + last
300
+ // Same helper functions as in chooseArticleByGlQuote
303
301
  const splitHeadLast = (term) => {
304
302
  const parts = String(term || '').trim().split(/\s+/);
305
303
  if (parts.length <= 1) return { head: '', last: parts[0] || '' };
@@ -307,7 +305,6 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
307
305
  return { head: parts.join(' '), last };
308
306
  };
309
307
 
310
- // Basic pluralization helper for English terms. Handles common endings and a few irregulars.
311
308
  const pluralizeTerm = (term) => {
312
309
  const out = new Set();
313
310
  const add = (s) => { const v = s.trim(); if (v) out.add(v); };
@@ -318,7 +315,6 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
318
315
  const pluralizeWord = (w) => {
319
316
  const lw = w.toLowerCase();
320
317
  if (irregular[lw]) return irregular[lw];
321
- // endings
322
318
  if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ies');
323
319
  if (/(s|x|z|ch|sh)$/i.test(w)) return w + 'es';
324
320
  if (/f$/i.test(w) && !/(roof|belief|chief|proof)$/i.test(w)) return w.replace(/f$/i, 'ves');
@@ -334,33 +330,30 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
334
330
  const pl = pluralizeWord(last);
335
331
  add([...parts, pl].join(' '));
336
332
  }
337
- // also the simple +s as fallback
338
333
  add(term + 's');
339
334
  return Array.from(out);
340
335
  };
341
336
 
342
- // Helpers to form -ing and -ed variants for a single word
343
337
  const isVowel = (ch) => /[aeiou]/i.test(ch);
344
338
  const isConsonant = (ch) => /[a-z]/i.test(ch) && !isVowel(ch);
345
339
  const endsWithCVC = (w) => {
346
340
  if (w.length < 3) return false;
347
341
  const a = w[w.length - 3], b = w[w.length - 2], c = w[w.length - 1];
348
342
  if (!isConsonant(a) || !isVowel(b) || !isConsonant(c)) return false;
349
- // don't double for w, x, y
350
343
  if (/[wxy]/i.test(c)) return false;
351
344
  return true;
352
345
  };
353
346
  const presentParticipleWord = (w) => {
354
- if (/ie$/i.test(w)) return w.replace(/ie$/i, 'ying'); // tie -> tying
355
- if (/ee$/i.test(w)) return w + 'ing'; // see -> seeing
356
- if (/e$/i.test(w)) return w.replace(/e$/i, 'ing'); // make -> making
357
- if (endsWithCVC(w)) return w + w[w.length - 1] + 'ing'; // run -> running
347
+ if (/ie$/i.test(w)) return w.replace(/ie$/i, 'ying');
348
+ if (/ee$/i.test(w)) return w + 'ing';
349
+ if (/e$/i.test(w)) return w.replace(/e$/i, 'ing');
350
+ if (endsWithCVC(w)) return w + w[w.length - 1] + 'ing';
358
351
  return w + 'ing';
359
352
  };
360
353
  const pastTenseWord = (w) => {
361
- if (/e$/i.test(w)) return w + 'd'; // move -> moved
362
- if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ied'); // carry -> carried
363
- if (endsWithCVC(w)) return w + w[w.length - 1] + 'ed'; // stop -> stopped
354
+ if (/e$/i.test(w)) return w + 'd';
355
+ if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ied');
356
+ if (endsWithCVC(w)) return w + w[w.length - 1] + 'ed';
364
357
  return w + 'ed';
365
358
  };
366
359
  const ingEdFormsForTerm = (term) => {
@@ -378,7 +371,6 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
378
371
  return Array.from(forms);
379
372
  };
380
373
 
381
- // Irregular verb support: small curated map plus reverse lookup
382
374
  const irregularVerbMap = {
383
375
  be: ['am', 'is', 'are', 'was', 'were', 'been', 'being', 'be'],
384
376
  do: ['did', 'done', 'doing', 'does'],
@@ -444,7 +436,6 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
444
436
  }
445
437
  return m;
446
438
  })();
447
- // Return full-term variants where only the last word is replaced by its irregular forms set
448
439
  const irregularFormsForTerm = (term) => {
449
440
  const { head, last } = splitHeadLast(term);
450
441
  const baseKey = irregularReverse.get(String(last).toLowerCase());
@@ -457,9 +448,7 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
457
448
  return Array.from(acc);
458
449
  };
459
450
 
460
- // Use compromise to get conjugations for potential verbs
461
451
  const conjugationsForTerm = (term) => {
462
- // mutate only the last word; return full-term variants
463
452
  const { head, last } = splitHeadLast(term);
464
453
  const forms = new Set();
465
454
  if (!useCompromise || !nlp) return Array.from(forms);
@@ -478,10 +467,10 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
478
467
  return Array.from(forms);
479
468
  };
480
469
 
481
- // Compute earliest stage match per article, then choose best stage overall with priority tie-breaker
470
+ // Find matching articles
482
471
  const perArticleMatches = [];
483
472
 
484
- for (const art of prioritized) {
473
+ for (const art of articlesList) {
485
474
  const terms = termMap.get(art) || [];
486
475
  let stage = 0;
487
476
  let termHit = '';
@@ -493,7 +482,6 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
493
482
  const termOrig = tobj.orig;
494
483
  const alts = new Set([termOrig]);
495
484
  for (const a of pluralizeTerm(termOrig)) alts.add(a);
496
- // add irregular forms for last word; and conjugations when enabled
497
485
  for (const a of irregularFormsForTerm(termOrig)) alts.add(a);
498
486
  for (const a of conjugationsForTerm(termOrig)) alts.add(a);
499
487
  for (const alt of alts) {
@@ -518,67 +506,100 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
518
506
  if (stage === 2) break;
519
507
  }
520
508
  }
521
- // Stage 3: case-sensitive, substring (no word-boundary)
509
+ // Stage 3: case-sensitive, substring matching at word boundaries or after dashes
522
510
  if (stage === 0) {
523
511
  for (const tobj of terms) {
524
512
  const termOrig = tobj.orig;
525
- if (termOrig && textOrig.includes(termOrig)) { stage = 3; termHit = termOrig; break; }
513
+ if (termOrig) {
514
+ // Match if the term appears:
515
+ // - At word boundary (beginning or end of string, or after/before whitespace or punctuation)
516
+ // - After any type of dash (—, –, -)
517
+ // This regex ensures we don't match inside other words like "fromever" matching "Rome"
518
+ const re3 = new RegExp(`(?:^|\\b|[—–-])${escapeRegExp(termOrig)}(?=\\b|$|[—–-])`, '');
519
+ if (re3.test(textOrig)) { stage = 3; termHit = termOrig; break; }
520
+ }
526
521
  }
527
522
  }
528
- // Stage 4: case-insensitive, substring on derived stripped forms (no iterative truncation),
529
- // mutating only the last word for multi-word terms
523
+ // Stage 4: case-insensitive, substring on derived stripped forms
530
524
  if (stage === 0) {
531
525
  const strippedForms = (base) => {
532
526
  const { head, last } = splitHeadLast(base);
533
527
  const prefix = head ? head + ' ' : '';
534
- const forms = new Set();
535
- const addIf = (s) => {
536
- const v = String(s || '').trim().toLowerCase();
537
- if (v && v.length >= 3) forms.add(v);
528
+ const results = [];
529
+
530
+ const addIf = (form, isStripped = false) => {
531
+ const v = String(form || '').trim().toLowerCase();
532
+ if (v && v.length >= 3) {
533
+ results.push({ form: v, isStripped });
534
+ }
538
535
  };
536
+
539
537
  const addFromLast = (w) => {
540
538
  const lw = String(w || '').toLowerCase();
541
539
  if (!lw) return;
542
540
  const full = prefix + lw;
543
- addIf(full);
544
- const addVar = (x) => addIf(prefix + x);
545
- if (/y$/i.test(lw)) addVar(lw.slice(0, -1));
546
- if (/e$/i.test(lw)) addVar(lw.slice(0, -1));
547
- if (/ing$/i.test(lw)) addVar(lw.slice(0, -3));
548
- if (/ed$/i.test(lw)) addVar(lw.slice(0, -2));
549
- if (/es$/i.test(lw)) addVar(lw.slice(0, -2));
550
- if (/s$/i.test(lw) && !/ss$/i.test(lw)) addVar(lw.slice(0, -1));
541
+ addIf(full, false); // Always add the full form
542
+
543
+ // Add stripped variants, marking them as stripped
544
+ if (/y$/i.test(lw)) addIf(prefix + lw.slice(0, -1), true);
545
+ if (/e$/i.test(lw)) addIf(prefix + lw.slice(0, -1), true);
546
+ if (/ing$/i.test(lw)) addIf(prefix + lw.slice(0, -3), true);
547
+ if (/ed$/i.test(lw)) addIf(prefix + lw.slice(0, -2), true);
548
+ if (/es$/i.test(lw)) addIf(prefix + lw.slice(0, -2), true);
549
+ if (/s$/i.test(lw) && !/ss$/i.test(lw)) addIf(prefix + lw.slice(0, -1), true);
551
550
  };
551
+
552
552
  const addYEOnlyFromLast = (w) => {
553
553
  const lw = String(w || '').toLowerCase();
554
554
  if (!lw) return;
555
555
  const full = prefix + lw;
556
- addIf(full);
557
- const addVar = (x) => addIf(prefix + x);
558
- if (/y$/i.test(lw)) addVar(lw.slice(0, -1));
559
- if (/e$/i.test(lw)) addVar(lw.slice(0, -1));
556
+ addIf(full, false); // Always add the full form
557
+
558
+ // Add Y/E stripped variants, marking them as stripped
559
+ if (/y$/i.test(lw)) addIf(prefix + lw.slice(0, -1), true);
560
+ if (/e$/i.test(lw)) addIf(prefix + lw.slice(0, -1), true);
560
561
  };
561
- // base last word and its stripped variants
562
+
562
563
  addFromLast(last);
563
- // For conjugations/irregulars of the last word, only drop final y/e
564
564
  for (const x of conjugationsForTerm(base)) {
565
565
  const { head: h2, last: l2 } = splitHeadLast(x);
566
- // ensure we only consider variants that kept the same head
567
566
  if ((h2 || '') === (head || '')) addYEOnlyFromLast(l2);
568
567
  }
569
568
  for (const x of irregularFormsForTerm(base)) {
570
569
  const { head: h2, last: l2 } = splitHeadLast(x);
571
570
  if ((h2 || '') === (head || '')) addYEOnlyFromLast(l2);
572
571
  }
573
- return Array.from(forms);
572
+ return results;
574
573
  };
574
+
575
575
  outerStrip:
576
576
  for (const tobj of terms) {
577
577
  const termOrig = tobj.orig;
578
- const forms = strippedForms(termOrig);
579
- for (const f of forms) {
580
- if (!f) continue;
581
- if (textLower.includes(f)) { stage = 4; termHit = termOrig; truncated = false; break outerStrip; }
578
+ const formResults = strippedForms(termOrig);
579
+
580
+ for (const { form, isStripped } of formResults) {
581
+ if (!form) continue;
582
+
583
+ if (isStripped) {
584
+ // For stripped forms, we need to be more careful about matching
585
+ // Only match if the stripped form is followed by a grammatical ending
586
+ const regex = new RegExp(escapeRegExp(form) + '(ed|ing|er|est|es|ies|s|d|n|t)\\b', 'i');
587
+ if (regex.test(textLower)) {
588
+ stage = 4;
589
+ termHit = termOrig;
590
+ truncated = false;
591
+ break outerStrip;
592
+ }
593
+ } else {
594
+ // For non-stripped forms, match at word boundaries or after dashes (case-insensitive)
595
+ const regex4 = new RegExp(`(?:^|\\b|[—–-])${escapeRegExp(form)}(?=\\b|$|[—–-])`, 'i');
596
+ if (regex4.test(textOrig)) {
597
+ stage = 4;
598
+ termHit = termOrig;
599
+ truncated = false;
600
+ break outerStrip;
601
+ }
602
+ }
582
603
  }
583
604
  }
584
605
  }
@@ -588,18 +609,37 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
588
609
  }
589
610
  }
590
611
 
591
- if (!perArticleMatches.length) return null;
612
+ return perArticleMatches;
613
+ }
592
614
 
593
- // Determine best stage among all matches
594
- const bestStage = Math.min(...perArticleMatches.map(m => m.stage));
595
- const bestMatches = perArticleMatches.filter(m => m.stage === bestStage);
615
+ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {}) {
616
+ const useCompromise = !!opts.useCompromise;
617
+ const nlp = opts.nlp;
618
+ const prioritized = prioritizeArticles(glq, strongId, strongPivot);
619
+ if (!prioritized.length) return null;
620
+ const textOrig = String(glq || '');
621
+ const textLower = textOrig.toLowerCase();
622
+ const escapeRegExp = s => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
623
+
624
+ // Find matches among prioritized articles (those with matching Strong's numbers) for TWLink selection
625
+ const prioritizedMatches = findMatchingArticles(glq, prioritized, termMap, { useCompromise, nlp });
626
+
627
+ if (!prioritizedMatches.length) return null;
628
+
629
+ // Determine best stage among prioritized matches for TWLink selection
630
+ const bestStage = Math.min(...prioritizedMatches.map(m => m.stage));
631
+ const bestMatches = prioritizedMatches.filter(m => m.stage === bestStage);
596
632
  // Among best matches, pick the one that appears earliest in prioritized list
597
633
  const artIndex = new Map(prioritized.map((a, i) => [a, i]));
598
634
  bestMatches.sort((a, b) => artIndex.get(a.art) - artIndex.get(b.art));
599
635
  const chosenMatch = bestMatches[0];
600
636
 
601
- // Disambiguation: list all matched articles
602
- const matchesList = perArticleMatches.map(m => m.art);
637
+ // For disambiguation, search ALL articles in termMap, not just those with matching Strong's
638
+ const allArticles = Array.from(termMap.keys());
639
+ const allMatches = findMatchingArticles(glq, allArticles, termMap, { useCompromise, nlp });
640
+
641
+ // Disambiguation: list all matched articles (from all articles, not just Strong's filtered)
642
+ const matchesList = allMatches.map(m => m.art);
603
643
  const disamb = matchesList.length > 1 ? `(${matchesList.join(', ')})` : '';
604
644
 
605
645
  const isVariant = (chosenMatch.stage >= 3) || chosenMatch.truncated;
@@ -608,6 +648,158 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
608
648
  // article matches on word-boundaries case-insensitively, then do NOT mark as variant.
609
649
  if (variantTerm) {
610
650
  const termObjs = termMap.get(chosenMatch.art) || [];
651
+
652
+ // Helper functions needed for variant checking
653
+ const pluralizeTerm = (term) => {
654
+ const out = new Set();
655
+ const add = (s) => { const v = s.trim(); if (v) out.add(v); };
656
+ const irregular = {
657
+ man: 'men', woman: 'women', person: 'people', child: 'children',
658
+ foot: 'feet', tooth: 'teeth', goose: 'geese', mouse: 'mice', ox: 'oxen',
659
+ };
660
+ const pluralizeWord = (w) => {
661
+ const lw = w.toLowerCase();
662
+ if (irregular[lw]) return irregular[lw];
663
+ if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ies');
664
+ if (/(s|x|z|ch|sh)$/i.test(w)) return w + 'es';
665
+ if (/f$/i.test(w) && !/(roof|belief|chief|proof)$/i.test(w)) return w.replace(/f$/i, 'ves');
666
+ if (/fe$/i.test(w)) return w.replace(/fe$/i, 'ves');
667
+ if (/o$/i.test(w)) return w + 'es';
668
+ return w + 's';
669
+ };
670
+ const parts = term.split(/\s+/);
671
+ if (parts.length === 1) {
672
+ add(pluralizeWord(term));
673
+ } else {
674
+ const last = parts.pop();
675
+ const pl = pluralizeWord(last);
676
+ add([...parts, pl].join(' '));
677
+ }
678
+ add(term + 's');
679
+ return Array.from(out);
680
+ };
681
+
682
+ const splitHeadLast = (term) => {
683
+ const parts = String(term || '').trim().split(/\s+/);
684
+ if (parts.length <= 1) return { head: '', last: parts[0] || '' };
685
+ const last = parts.pop();
686
+ return { head: parts.join(' '), last };
687
+ };
688
+
689
+ const isVowel = (ch) => /[aeiou]/i.test(ch);
690
+ const isConsonant = (ch) => /[a-z]/i.test(ch) && !isVowel(ch);
691
+ const endsWithCVC = (w) => {
692
+ if (w.length < 3) return false;
693
+ const a = w[w.length - 3], b = w[w.length - 2], c = w[w.length - 1];
694
+ if (!isConsonant(a) || !isVowel(b) || !isConsonant(c)) return false;
695
+ if (/[wxy]/i.test(c)) return false;
696
+ return true;
697
+ };
698
+ const presentParticipleWord = (w) => {
699
+ if (/ie$/i.test(w)) return w.replace(/ie$/i, 'ying');
700
+ if (/ee$/i.test(w)) return w + 'ing';
701
+ if (/e$/i.test(w)) return w.replace(/e$/i, 'ing');
702
+ if (endsWithCVC(w)) return w + w[w.length - 1] + 'ing';
703
+ return w + 'ing';
704
+ };
705
+ const pastTenseWord = (w) => {
706
+ if (/e$/i.test(w)) return w + 'd';
707
+ if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ied');
708
+ if (endsWithCVC(w)) return w + w[w.length - 1] + 'ed';
709
+ return w + 'ed';
710
+ };
711
+ const ingEdFormsForTerm = (term) => {
712
+ const forms = new Set();
713
+ const parts = term.split(/\s+/);
714
+ if (parts.length === 1) {
715
+ forms.add(presentParticipleWord(term));
716
+ forms.add(pastTenseWord(term));
717
+ } else {
718
+ const last = parts.pop();
719
+ const base = parts.join(' ');
720
+ forms.add((base ? base + ' ' : '') + presentParticipleWord(last));
721
+ forms.add((base ? base + ' ' : '') + pastTenseWord(last));
722
+ }
723
+ return Array.from(forms);
724
+ };
725
+
726
+ const irregularVerbMap = {
727
+ be: ['am', 'is', 'are', 'was', 'were', 'been', 'being', 'be'],
728
+ do: ['did', 'done', 'doing', 'does'],
729
+ go: ['went', 'gone', 'going', 'goes'],
730
+ have: ['had', 'having', 'has'],
731
+ say: ['said', 'saying', 'says'],
732
+ see: ['saw', 'seen', 'seeing', 'sees'],
733
+ get: ['got', 'gotten', 'getting', 'gets'],
734
+ make: ['made', 'making', 'makes'],
735
+ take: ['took', 'taken', 'taking', 'takes'],
736
+ come: ['came', 'coming', 'comes'],
737
+ know: ['knew', 'known', 'knowing', 'knows'],
738
+ give: ['gave', 'given', 'giving', 'gives'],
739
+ find: ['found', 'finding', 'finds'],
740
+ think: ['thought', 'thinking', 'thinks'],
741
+ tell: ['told', 'telling', 'tells'],
742
+ become: ['became', 'become', 'becoming', 'becomes'],
743
+ show: ['showed', 'shown', 'showing', 'shows'],
744
+ leave: ['left', 'leaving', 'leaves'],
745
+ feel: ['felt', 'feeling', 'feels'],
746
+ put: ['put', 'putting', 'puts'],
747
+ bring: ['brought', 'bringing', 'brings'],
748
+ begin: ['began', 'begun', 'beginning', 'begins'],
749
+ keep: ['kept', 'keeping', 'keeps'],
750
+ hold: ['held', 'holding', 'holds'],
751
+ write: ['wrote', 'written', 'writing', 'writes'],
752
+ stand: ['stood', 'standing', 'stands'],
753
+ hear: ['heard', 'hearing', 'hears'],
754
+ let: ['let', 'letting', 'lets'],
755
+ mean: ['meant', 'meaning', 'means'],
756
+ set: ['set', 'setting', 'sets'],
757
+ meet: ['met', 'meeting', 'meets'],
758
+ run: ['ran', 'running', 'runs'],
759
+ pay: ['paid', 'paying', 'pays'],
760
+ sit: ['sat', 'sitting', 'sits'],
761
+ speak: ['spoke', 'spoken', 'speaking', 'speaks'],
762
+ lie: ['lay', 'lain', 'lying', 'lies'],
763
+ lead: ['led', 'leading', 'leads'],
764
+ read: ['read', 'reading', 'reads'],
765
+ grow: ['grew', 'grown', 'growing', 'grows'],
766
+ fall: ['fell', 'fallen', 'falling', 'falls'],
767
+ send: ['sent', 'sending', 'sends'],
768
+ build: ['built', 'building', 'builds'],
769
+ understand: ['understood', 'understanding', 'understands'],
770
+ draw: ['drew', 'drawn', 'drawing', 'draws'],
771
+ break: ['broke', 'broken', 'breaking', 'breaks'],
772
+ spend: ['spent', 'spending', 'spends'],
773
+ cut: ['cut', 'cutting', 'cuts'],
774
+ rise: ['rose', 'risen', 'rising', 'rises'],
775
+ drive: ['drove', 'driven', 'driving', 'drives'],
776
+ buy: ['bought', 'buying', 'buys'],
777
+ wear: ['wore', 'worn', 'wearing', 'wears'],
778
+ swear: ['swore', 'sworn', 'swearing', 'swears'],
779
+ drink: ['drank', 'drunk', 'drinking', 'drinks'],
780
+ eat: ['ate', 'eaten', 'eating', 'eats'],
781
+ choose: ['chose', 'chosen', 'choosing', 'chooses'],
782
+ };
783
+ const irregularReverse = (() => {
784
+ const m = new Map();
785
+ for (const [base, forms] of Object.entries(irregularVerbMap)) {
786
+ m.set(base.toLowerCase(), base);
787
+ for (const f of forms) m.set(String(f).toLowerCase(), base);
788
+ }
789
+ return m;
790
+ })();
791
+ const irregularFormsForTerm = (term) => {
792
+ const { head, last } = splitHeadLast(term);
793
+ const baseKey = irregularReverse.get(String(last).toLowerCase());
794
+ const acc = new Set();
795
+ if (baseKey) {
796
+ const prefix = head ? head + ' ' : '';
797
+ acc.add(prefix + baseKey);
798
+ for (const f of irregularVerbMap[baseKey] || []) acc.add(prefix + f);
799
+ }
800
+ return Array.from(acc);
801
+ };
802
+
611
803
  const hasWordBoundMatch = termObjs.some(tobj => {
612
804
  const termOrig = tobj.orig;
613
805
  if (!termOrig) return false;