twl-generator 1.3.2 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +250 -58
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "twl-generator",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.3",
|
|
4
4
|
"description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
package/src/index.js
CHANGED
|
@@ -289,17 +289,15 @@ function prioritizeArticles(glq, strongId, strongPivot) {
|
|
|
289
289
|
return slugMatched.concat(restSorted);
|
|
290
290
|
}
|
|
291
291
|
|
|
292
|
-
function
|
|
292
|
+
// Helper function to find matching articles from a given list of articles
|
|
293
|
+
function findMatchingArticles(glq, articlesList, termMap, opts = {}) {
|
|
293
294
|
const useCompromise = !!opts.useCompromise;
|
|
294
295
|
const nlp = opts.nlp;
|
|
295
|
-
const prioritized = prioritizeArticles(glq, strongId, strongPivot);
|
|
296
|
-
if (!prioritized.length) return null;
|
|
297
296
|
const textOrig = String(glq || '');
|
|
298
297
|
const textLower = textOrig.toLowerCase();
|
|
299
298
|
const escapeRegExp = s => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
300
299
|
|
|
301
|
-
//
|
|
302
|
-
// head has no trailing space, last has no leading space. Rejoin with (head ? head+" " : "") + last
|
|
300
|
+
// Same helper functions as in chooseArticleByGlQuote
|
|
303
301
|
const splitHeadLast = (term) => {
|
|
304
302
|
const parts = String(term || '').trim().split(/\s+/);
|
|
305
303
|
if (parts.length <= 1) return { head: '', last: parts[0] || '' };
|
|
@@ -307,7 +305,6 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
|
|
|
307
305
|
return { head: parts.join(' '), last };
|
|
308
306
|
};
|
|
309
307
|
|
|
310
|
-
// Basic pluralization helper for English terms. Handles common endings and a few irregulars.
|
|
311
308
|
const pluralizeTerm = (term) => {
|
|
312
309
|
const out = new Set();
|
|
313
310
|
const add = (s) => { const v = s.trim(); if (v) out.add(v); };
|
|
@@ -318,7 +315,6 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
|
|
|
318
315
|
const pluralizeWord = (w) => {
|
|
319
316
|
const lw = w.toLowerCase();
|
|
320
317
|
if (irregular[lw]) return irregular[lw];
|
|
321
|
-
// endings
|
|
322
318
|
if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ies');
|
|
323
319
|
if (/(s|x|z|ch|sh)$/i.test(w)) return w + 'es';
|
|
324
320
|
if (/f$/i.test(w) && !/(roof|belief|chief|proof)$/i.test(w)) return w.replace(/f$/i, 'ves');
|
|
@@ -334,33 +330,30 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
|
|
|
334
330
|
const pl = pluralizeWord(last);
|
|
335
331
|
add([...parts, pl].join(' '));
|
|
336
332
|
}
|
|
337
|
-
// also the simple +s as fallback
|
|
338
333
|
add(term + 's');
|
|
339
334
|
return Array.from(out);
|
|
340
335
|
};
|
|
341
336
|
|
|
342
|
-
// Helpers to form -ing and -ed variants for a single word
|
|
343
337
|
const isVowel = (ch) => /[aeiou]/i.test(ch);
|
|
344
338
|
const isConsonant = (ch) => /[a-z]/i.test(ch) && !isVowel(ch);
|
|
345
339
|
const endsWithCVC = (w) => {
|
|
346
340
|
if (w.length < 3) return false;
|
|
347
341
|
const a = w[w.length - 3], b = w[w.length - 2], c = w[w.length - 1];
|
|
348
342
|
if (!isConsonant(a) || !isVowel(b) || !isConsonant(c)) return false;
|
|
349
|
-
// don't double for w, x, y
|
|
350
343
|
if (/[wxy]/i.test(c)) return false;
|
|
351
344
|
return true;
|
|
352
345
|
};
|
|
353
346
|
const presentParticipleWord = (w) => {
|
|
354
|
-
if (/ie$/i.test(w)) return w.replace(/ie$/i, 'ying');
|
|
355
|
-
if (/ee$/i.test(w)) return w + 'ing';
|
|
356
|
-
if (/e$/i.test(w)) return w.replace(/e$/i, 'ing');
|
|
357
|
-
if (endsWithCVC(w)) return w + w[w.length - 1] + 'ing';
|
|
347
|
+
if (/ie$/i.test(w)) return w.replace(/ie$/i, 'ying');
|
|
348
|
+
if (/ee$/i.test(w)) return w + 'ing';
|
|
349
|
+
if (/e$/i.test(w)) return w.replace(/e$/i, 'ing');
|
|
350
|
+
if (endsWithCVC(w)) return w + w[w.length - 1] + 'ing';
|
|
358
351
|
return w + 'ing';
|
|
359
352
|
};
|
|
360
353
|
const pastTenseWord = (w) => {
|
|
361
|
-
if (/e$/i.test(w)) return w + 'd';
|
|
362
|
-
if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ied');
|
|
363
|
-
if (endsWithCVC(w)) return w + w[w.length - 1] + 'ed';
|
|
354
|
+
if (/e$/i.test(w)) return w + 'd';
|
|
355
|
+
if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ied');
|
|
356
|
+
if (endsWithCVC(w)) return w + w[w.length - 1] + 'ed';
|
|
364
357
|
return w + 'ed';
|
|
365
358
|
};
|
|
366
359
|
const ingEdFormsForTerm = (term) => {
|
|
@@ -378,7 +371,6 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
|
|
|
378
371
|
return Array.from(forms);
|
|
379
372
|
};
|
|
380
373
|
|
|
381
|
-
// Irregular verb support: small curated map plus reverse lookup
|
|
382
374
|
const irregularVerbMap = {
|
|
383
375
|
be: ['am', 'is', 'are', 'was', 'were', 'been', 'being', 'be'],
|
|
384
376
|
do: ['did', 'done', 'doing', 'does'],
|
|
@@ -444,7 +436,6 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
|
|
|
444
436
|
}
|
|
445
437
|
return m;
|
|
446
438
|
})();
|
|
447
|
-
// Return full-term variants where only the last word is replaced by its irregular forms set
|
|
448
439
|
const irregularFormsForTerm = (term) => {
|
|
449
440
|
const { head, last } = splitHeadLast(term);
|
|
450
441
|
const baseKey = irregularReverse.get(String(last).toLowerCase());
|
|
@@ -457,9 +448,7 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
|
|
|
457
448
|
return Array.from(acc);
|
|
458
449
|
};
|
|
459
450
|
|
|
460
|
-
// Use compromise to get conjugations for potential verbs
|
|
461
451
|
const conjugationsForTerm = (term) => {
|
|
462
|
-
// mutate only the last word; return full-term variants
|
|
463
452
|
const { head, last } = splitHeadLast(term);
|
|
464
453
|
const forms = new Set();
|
|
465
454
|
if (!useCompromise || !nlp) return Array.from(forms);
|
|
@@ -478,10 +467,10 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
|
|
|
478
467
|
return Array.from(forms);
|
|
479
468
|
};
|
|
480
469
|
|
|
481
|
-
//
|
|
470
|
+
// Find matching articles
|
|
482
471
|
const perArticleMatches = [];
|
|
483
472
|
|
|
484
|
-
for (const art of
|
|
473
|
+
for (const art of articlesList) {
|
|
485
474
|
const terms = termMap.get(art) || [];
|
|
486
475
|
let stage = 0;
|
|
487
476
|
let termHit = '';
|
|
@@ -493,7 +482,6 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
|
|
|
493
482
|
const termOrig = tobj.orig;
|
|
494
483
|
const alts = new Set([termOrig]);
|
|
495
484
|
for (const a of pluralizeTerm(termOrig)) alts.add(a);
|
|
496
|
-
// add irregular forms for last word; and conjugations when enabled
|
|
497
485
|
for (const a of irregularFormsForTerm(termOrig)) alts.add(a);
|
|
498
486
|
for (const a of conjugationsForTerm(termOrig)) alts.add(a);
|
|
499
487
|
for (const alt of alts) {
|
|
@@ -518,67 +506,100 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
|
|
|
518
506
|
if (stage === 2) break;
|
|
519
507
|
}
|
|
520
508
|
}
|
|
521
|
-
// Stage 3: case-sensitive, substring
|
|
509
|
+
// Stage 3: case-sensitive, substring matching at word boundaries or after dashes
|
|
522
510
|
if (stage === 0) {
|
|
523
511
|
for (const tobj of terms) {
|
|
524
512
|
const termOrig = tobj.orig;
|
|
525
|
-
if (termOrig
|
|
513
|
+
if (termOrig) {
|
|
514
|
+
// Match if the term appears:
|
|
515
|
+
// - At word boundary (beginning or end of string, or after/before whitespace or punctuation)
|
|
516
|
+
// - After any type of dash (—, –, -)
|
|
517
|
+
// This regex ensures we don't match inside other words like "fromever" matching "Rome"
|
|
518
|
+
const re3 = new RegExp(`(?:^|\\b|[—–-])${escapeRegExp(termOrig)}(?=\\b|$|[—–-])`, '');
|
|
519
|
+
if (re3.test(textOrig)) { stage = 3; termHit = termOrig; break; }
|
|
520
|
+
}
|
|
526
521
|
}
|
|
527
522
|
}
|
|
528
|
-
// Stage 4: case-insensitive, substring on derived stripped forms
|
|
529
|
-
// mutating only the last word for multi-word terms
|
|
523
|
+
// Stage 4: case-insensitive, substring on derived stripped forms
|
|
530
524
|
if (stage === 0) {
|
|
531
525
|
const strippedForms = (base) => {
|
|
532
526
|
const { head, last } = splitHeadLast(base);
|
|
533
527
|
const prefix = head ? head + ' ' : '';
|
|
534
|
-
const
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
528
|
+
const results = [];
|
|
529
|
+
|
|
530
|
+
const addIf = (form, isStripped = false) => {
|
|
531
|
+
const v = String(form || '').trim().toLowerCase();
|
|
532
|
+
if (v && v.length >= 3) {
|
|
533
|
+
results.push({ form: v, isStripped });
|
|
534
|
+
}
|
|
538
535
|
};
|
|
536
|
+
|
|
539
537
|
const addFromLast = (w) => {
|
|
540
538
|
const lw = String(w || '').toLowerCase();
|
|
541
539
|
if (!lw) return;
|
|
542
540
|
const full = prefix + lw;
|
|
543
|
-
addIf(full);
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
if (/
|
|
547
|
-
if (/
|
|
548
|
-
if (/
|
|
549
|
-
if (/
|
|
550
|
-
if (/
|
|
541
|
+
addIf(full, false); // Always add the full form
|
|
542
|
+
|
|
543
|
+
// Add stripped variants, marking them as stripped
|
|
544
|
+
if (/y$/i.test(lw)) addIf(prefix + lw.slice(0, -1), true);
|
|
545
|
+
if (/e$/i.test(lw)) addIf(prefix + lw.slice(0, -1), true);
|
|
546
|
+
if (/ing$/i.test(lw)) addIf(prefix + lw.slice(0, -3), true);
|
|
547
|
+
if (/ed$/i.test(lw)) addIf(prefix + lw.slice(0, -2), true);
|
|
548
|
+
if (/es$/i.test(lw)) addIf(prefix + lw.slice(0, -2), true);
|
|
549
|
+
if (/s$/i.test(lw) && !/ss$/i.test(lw)) addIf(prefix + lw.slice(0, -1), true);
|
|
551
550
|
};
|
|
551
|
+
|
|
552
552
|
const addYEOnlyFromLast = (w) => {
|
|
553
553
|
const lw = String(w || '').toLowerCase();
|
|
554
554
|
if (!lw) return;
|
|
555
555
|
const full = prefix + lw;
|
|
556
|
-
addIf(full);
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
if (/
|
|
556
|
+
addIf(full, false); // Always add the full form
|
|
557
|
+
|
|
558
|
+
// Add Y/E stripped variants, marking them as stripped
|
|
559
|
+
if (/y$/i.test(lw)) addIf(prefix + lw.slice(0, -1), true);
|
|
560
|
+
if (/e$/i.test(lw)) addIf(prefix + lw.slice(0, -1), true);
|
|
560
561
|
};
|
|
561
|
-
|
|
562
|
+
|
|
562
563
|
addFromLast(last);
|
|
563
|
-
// For conjugations/irregulars of the last word, only drop final y/e
|
|
564
564
|
for (const x of conjugationsForTerm(base)) {
|
|
565
565
|
const { head: h2, last: l2 } = splitHeadLast(x);
|
|
566
|
-
// ensure we only consider variants that kept the same head
|
|
567
566
|
if ((h2 || '') === (head || '')) addYEOnlyFromLast(l2);
|
|
568
567
|
}
|
|
569
568
|
for (const x of irregularFormsForTerm(base)) {
|
|
570
569
|
const { head: h2, last: l2 } = splitHeadLast(x);
|
|
571
570
|
if ((h2 || '') === (head || '')) addYEOnlyFromLast(l2);
|
|
572
571
|
}
|
|
573
|
-
return
|
|
572
|
+
return results;
|
|
574
573
|
};
|
|
574
|
+
|
|
575
575
|
outerStrip:
|
|
576
576
|
for (const tobj of terms) {
|
|
577
577
|
const termOrig = tobj.orig;
|
|
578
|
-
const
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
if (
|
|
578
|
+
const formResults = strippedForms(termOrig);
|
|
579
|
+
|
|
580
|
+
for (const { form, isStripped } of formResults) {
|
|
581
|
+
if (!form) continue;
|
|
582
|
+
|
|
583
|
+
if (isStripped) {
|
|
584
|
+
// For stripped forms, we need to be more careful about matching
|
|
585
|
+
// Only match if the stripped form is followed by a grammatical ending
|
|
586
|
+
const regex = new RegExp(escapeRegExp(form) + '(ed|ing|er|est|es|ies|s|d|n|t)\\b', 'i');
|
|
587
|
+
if (regex.test(textLower)) {
|
|
588
|
+
stage = 4;
|
|
589
|
+
termHit = termOrig;
|
|
590
|
+
truncated = false;
|
|
591
|
+
break outerStrip;
|
|
592
|
+
}
|
|
593
|
+
} else {
|
|
594
|
+
// For non-stripped forms, match at word boundaries or after dashes (case-insensitive)
|
|
595
|
+
const regex4 = new RegExp(`(?:^|\\b|[—–-])${escapeRegExp(form)}(?=\\b|$|[—–-])`, 'i');
|
|
596
|
+
if (regex4.test(textOrig)) {
|
|
597
|
+
stage = 4;
|
|
598
|
+
termHit = termOrig;
|
|
599
|
+
truncated = false;
|
|
600
|
+
break outerStrip;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
582
603
|
}
|
|
583
604
|
}
|
|
584
605
|
}
|
|
@@ -588,18 +609,37 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
|
|
|
588
609
|
}
|
|
589
610
|
}
|
|
590
611
|
|
|
591
|
-
|
|
612
|
+
return perArticleMatches;
|
|
613
|
+
}
|
|
592
614
|
|
|
593
|
-
|
|
594
|
-
const
|
|
595
|
-
const
|
|
615
|
+
function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {}) {
|
|
616
|
+
const useCompromise = !!opts.useCompromise;
|
|
617
|
+
const nlp = opts.nlp;
|
|
618
|
+
const prioritized = prioritizeArticles(glq, strongId, strongPivot);
|
|
619
|
+
if (!prioritized.length) return null;
|
|
620
|
+
const textOrig = String(glq || '');
|
|
621
|
+
const textLower = textOrig.toLowerCase();
|
|
622
|
+
const escapeRegExp = s => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
623
|
+
|
|
624
|
+
// Find matches among prioritized articles (those with matching Strong's numbers) for TWLink selection
|
|
625
|
+
const prioritizedMatches = findMatchingArticles(glq, prioritized, termMap, { useCompromise, nlp });
|
|
626
|
+
|
|
627
|
+
if (!prioritizedMatches.length) return null;
|
|
628
|
+
|
|
629
|
+
// Determine best stage among prioritized matches for TWLink selection
|
|
630
|
+
const bestStage = Math.min(...prioritizedMatches.map(m => m.stage));
|
|
631
|
+
const bestMatches = prioritizedMatches.filter(m => m.stage === bestStage);
|
|
596
632
|
// Among best matches, pick the one that appears earliest in prioritized list
|
|
597
633
|
const artIndex = new Map(prioritized.map((a, i) => [a, i]));
|
|
598
634
|
bestMatches.sort((a, b) => artIndex.get(a.art) - artIndex.get(b.art));
|
|
599
635
|
const chosenMatch = bestMatches[0];
|
|
600
636
|
|
|
601
|
-
//
|
|
602
|
-
const
|
|
637
|
+
// For disambiguation, search ALL articles in termMap, not just those with matching Strong's
|
|
638
|
+
const allArticles = Array.from(termMap.keys());
|
|
639
|
+
const allMatches = findMatchingArticles(glq, allArticles, termMap, { useCompromise, nlp });
|
|
640
|
+
|
|
641
|
+
// Disambiguation: list all matched articles (from all articles, not just Strong's filtered)
|
|
642
|
+
const matchesList = allMatches.map(m => m.art);
|
|
603
643
|
const disamb = matchesList.length > 1 ? `(${matchesList.join(', ')})` : '';
|
|
604
644
|
|
|
605
645
|
const isVariant = (chosenMatch.stage >= 3) || chosenMatch.truncated;
|
|
@@ -608,6 +648,158 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
|
|
|
608
648
|
// article matches on word-boundaries case-insensitively, then do NOT mark as variant.
|
|
609
649
|
if (variantTerm) {
|
|
610
650
|
const termObjs = termMap.get(chosenMatch.art) || [];
|
|
651
|
+
|
|
652
|
+
// Helper functions needed for variant checking
|
|
653
|
+
const pluralizeTerm = (term) => {
|
|
654
|
+
const out = new Set();
|
|
655
|
+
const add = (s) => { const v = s.trim(); if (v) out.add(v); };
|
|
656
|
+
const irregular = {
|
|
657
|
+
man: 'men', woman: 'women', person: 'people', child: 'children',
|
|
658
|
+
foot: 'feet', tooth: 'teeth', goose: 'geese', mouse: 'mice', ox: 'oxen',
|
|
659
|
+
};
|
|
660
|
+
const pluralizeWord = (w) => {
|
|
661
|
+
const lw = w.toLowerCase();
|
|
662
|
+
if (irregular[lw]) return irregular[lw];
|
|
663
|
+
if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ies');
|
|
664
|
+
if (/(s|x|z|ch|sh)$/i.test(w)) return w + 'es';
|
|
665
|
+
if (/f$/i.test(w) && !/(roof|belief|chief|proof)$/i.test(w)) return w.replace(/f$/i, 'ves');
|
|
666
|
+
if (/fe$/i.test(w)) return w.replace(/fe$/i, 'ves');
|
|
667
|
+
if (/o$/i.test(w)) return w + 'es';
|
|
668
|
+
return w + 's';
|
|
669
|
+
};
|
|
670
|
+
const parts = term.split(/\s+/);
|
|
671
|
+
if (parts.length === 1) {
|
|
672
|
+
add(pluralizeWord(term));
|
|
673
|
+
} else {
|
|
674
|
+
const last = parts.pop();
|
|
675
|
+
const pl = pluralizeWord(last);
|
|
676
|
+
add([...parts, pl].join(' '));
|
|
677
|
+
}
|
|
678
|
+
add(term + 's');
|
|
679
|
+
return Array.from(out);
|
|
680
|
+
};
|
|
681
|
+
|
|
682
|
+
const splitHeadLast = (term) => {
|
|
683
|
+
const parts = String(term || '').trim().split(/\s+/);
|
|
684
|
+
if (parts.length <= 1) return { head: '', last: parts[0] || '' };
|
|
685
|
+
const last = parts.pop();
|
|
686
|
+
return { head: parts.join(' '), last };
|
|
687
|
+
};
|
|
688
|
+
|
|
689
|
+
const isVowel = (ch) => /[aeiou]/i.test(ch);
|
|
690
|
+
const isConsonant = (ch) => /[a-z]/i.test(ch) && !isVowel(ch);
|
|
691
|
+
const endsWithCVC = (w) => {
|
|
692
|
+
if (w.length < 3) return false;
|
|
693
|
+
const a = w[w.length - 3], b = w[w.length - 2], c = w[w.length - 1];
|
|
694
|
+
if (!isConsonant(a) || !isVowel(b) || !isConsonant(c)) return false;
|
|
695
|
+
if (/[wxy]/i.test(c)) return false;
|
|
696
|
+
return true;
|
|
697
|
+
};
|
|
698
|
+
const presentParticipleWord = (w) => {
|
|
699
|
+
if (/ie$/i.test(w)) return w.replace(/ie$/i, 'ying');
|
|
700
|
+
if (/ee$/i.test(w)) return w + 'ing';
|
|
701
|
+
if (/e$/i.test(w)) return w.replace(/e$/i, 'ing');
|
|
702
|
+
if (endsWithCVC(w)) return w + w[w.length - 1] + 'ing';
|
|
703
|
+
return w + 'ing';
|
|
704
|
+
};
|
|
705
|
+
const pastTenseWord = (w) => {
|
|
706
|
+
if (/e$/i.test(w)) return w + 'd';
|
|
707
|
+
if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ied');
|
|
708
|
+
if (endsWithCVC(w)) return w + w[w.length - 1] + 'ed';
|
|
709
|
+
return w + 'ed';
|
|
710
|
+
};
|
|
711
|
+
const ingEdFormsForTerm = (term) => {
|
|
712
|
+
const forms = new Set();
|
|
713
|
+
const parts = term.split(/\s+/);
|
|
714
|
+
if (parts.length === 1) {
|
|
715
|
+
forms.add(presentParticipleWord(term));
|
|
716
|
+
forms.add(pastTenseWord(term));
|
|
717
|
+
} else {
|
|
718
|
+
const last = parts.pop();
|
|
719
|
+
const base = parts.join(' ');
|
|
720
|
+
forms.add((base ? base + ' ' : '') + presentParticipleWord(last));
|
|
721
|
+
forms.add((base ? base + ' ' : '') + pastTenseWord(last));
|
|
722
|
+
}
|
|
723
|
+
return Array.from(forms);
|
|
724
|
+
};
|
|
725
|
+
|
|
726
|
+
const irregularVerbMap = {
|
|
727
|
+
be: ['am', 'is', 'are', 'was', 'were', 'been', 'being', 'be'],
|
|
728
|
+
do: ['did', 'done', 'doing', 'does'],
|
|
729
|
+
go: ['went', 'gone', 'going', 'goes'],
|
|
730
|
+
have: ['had', 'having', 'has'],
|
|
731
|
+
say: ['said', 'saying', 'says'],
|
|
732
|
+
see: ['saw', 'seen', 'seeing', 'sees'],
|
|
733
|
+
get: ['got', 'gotten', 'getting', 'gets'],
|
|
734
|
+
make: ['made', 'making', 'makes'],
|
|
735
|
+
take: ['took', 'taken', 'taking', 'takes'],
|
|
736
|
+
come: ['came', 'coming', 'comes'],
|
|
737
|
+
know: ['knew', 'known', 'knowing', 'knows'],
|
|
738
|
+
give: ['gave', 'given', 'giving', 'gives'],
|
|
739
|
+
find: ['found', 'finding', 'finds'],
|
|
740
|
+
think: ['thought', 'thinking', 'thinks'],
|
|
741
|
+
tell: ['told', 'telling', 'tells'],
|
|
742
|
+
become: ['became', 'become', 'becoming', 'becomes'],
|
|
743
|
+
show: ['showed', 'shown', 'showing', 'shows'],
|
|
744
|
+
leave: ['left', 'leaving', 'leaves'],
|
|
745
|
+
feel: ['felt', 'feeling', 'feels'],
|
|
746
|
+
put: ['put', 'putting', 'puts'],
|
|
747
|
+
bring: ['brought', 'bringing', 'brings'],
|
|
748
|
+
begin: ['began', 'begun', 'beginning', 'begins'],
|
|
749
|
+
keep: ['kept', 'keeping', 'keeps'],
|
|
750
|
+
hold: ['held', 'holding', 'holds'],
|
|
751
|
+
write: ['wrote', 'written', 'writing', 'writes'],
|
|
752
|
+
stand: ['stood', 'standing', 'stands'],
|
|
753
|
+
hear: ['heard', 'hearing', 'hears'],
|
|
754
|
+
let: ['let', 'letting', 'lets'],
|
|
755
|
+
mean: ['meant', 'meaning', 'means'],
|
|
756
|
+
set: ['set', 'setting', 'sets'],
|
|
757
|
+
meet: ['met', 'meeting', 'meets'],
|
|
758
|
+
run: ['ran', 'running', 'runs'],
|
|
759
|
+
pay: ['paid', 'paying', 'pays'],
|
|
760
|
+
sit: ['sat', 'sitting', 'sits'],
|
|
761
|
+
speak: ['spoke', 'spoken', 'speaking', 'speaks'],
|
|
762
|
+
lie: ['lay', 'lain', 'lying', 'lies'],
|
|
763
|
+
lead: ['led', 'leading', 'leads'],
|
|
764
|
+
read: ['read', 'reading', 'reads'],
|
|
765
|
+
grow: ['grew', 'grown', 'growing', 'grows'],
|
|
766
|
+
fall: ['fell', 'fallen', 'falling', 'falls'],
|
|
767
|
+
send: ['sent', 'sending', 'sends'],
|
|
768
|
+
build: ['built', 'building', 'builds'],
|
|
769
|
+
understand: ['understood', 'understanding', 'understands'],
|
|
770
|
+
draw: ['drew', 'drawn', 'drawing', 'draws'],
|
|
771
|
+
break: ['broke', 'broken', 'breaking', 'breaks'],
|
|
772
|
+
spend: ['spent', 'spending', 'spends'],
|
|
773
|
+
cut: ['cut', 'cutting', 'cuts'],
|
|
774
|
+
rise: ['rose', 'risen', 'rising', 'rises'],
|
|
775
|
+
drive: ['drove', 'driven', 'driving', 'drives'],
|
|
776
|
+
buy: ['bought', 'buying', 'buys'],
|
|
777
|
+
wear: ['wore', 'worn', 'wearing', 'wears'],
|
|
778
|
+
swear: ['swore', 'sworn', 'swearing', 'swears'],
|
|
779
|
+
drink: ['drank', 'drunk', 'drinking', 'drinks'],
|
|
780
|
+
eat: ['ate', 'eaten', 'eating', 'eats'],
|
|
781
|
+
choose: ['chose', 'chosen', 'choosing', 'chooses'],
|
|
782
|
+
};
|
|
783
|
+
const irregularReverse = (() => {
|
|
784
|
+
const m = new Map();
|
|
785
|
+
for (const [base, forms] of Object.entries(irregularVerbMap)) {
|
|
786
|
+
m.set(base.toLowerCase(), base);
|
|
787
|
+
for (const f of forms) m.set(String(f).toLowerCase(), base);
|
|
788
|
+
}
|
|
789
|
+
return m;
|
|
790
|
+
})();
|
|
791
|
+
const irregularFormsForTerm = (term) => {
|
|
792
|
+
const { head, last } = splitHeadLast(term);
|
|
793
|
+
const baseKey = irregularReverse.get(String(last).toLowerCase());
|
|
794
|
+
const acc = new Set();
|
|
795
|
+
if (baseKey) {
|
|
796
|
+
const prefix = head ? head + ' ' : '';
|
|
797
|
+
acc.add(prefix + baseKey);
|
|
798
|
+
for (const f of irregularVerbMap[baseKey] || []) acc.add(prefix + f);
|
|
799
|
+
}
|
|
800
|
+
return Array.from(acc);
|
|
801
|
+
};
|
|
802
|
+
|
|
611
803
|
const hasWordBoundMatch = termObjs.some(tobj => {
|
|
612
804
|
const termOrig = tobj.orig;
|
|
613
805
|
if (!termOrig) return false;
|