twl-generator 1.3.3 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +2 -2
  2. package/src/index.js +41 -11
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "twl-generator",
3
- "version": "1.3.3",
3
+ "version": "1.3.5",
4
4
  "description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -59,4 +59,4 @@
59
59
  "optional": true
60
60
  }
61
61
  }
62
- }
62
+ }
package/src/index.js CHANGED
@@ -512,10 +512,9 @@ function findMatchingArticles(glq, articlesList, termMap, opts = {}) {
512
512
  const termOrig = tobj.orig;
513
513
  if (termOrig) {
514
514
  // Match if the term appears:
515
- // - At word boundary (beginning or end of string, or after/before whitespace or punctuation)
516
- // - After any type of dash (—, –, -)
517
- // This regex ensures we don't match inside other words like "fromever" matching "Rome"
518
- const re3 = new RegExp(`(?:^|\\b|[—–-])${escapeRegExp(termOrig)}(?=\\b|$|[—–-])`, '');
515
+ // - At word boundary (beginning of word or after dash)
516
+ // - Allow substring matching (e.g., "reap" matches "reapers")
517
+ const re3 = new RegExp(`(?:^|\\b|[—–-])${escapeRegExp(termOrig)}`, '');
519
518
  if (re3.test(textOrig)) { stage = 3; termHit = termOrig; break; }
520
519
  }
521
520
  }
@@ -592,7 +591,8 @@ function findMatchingArticles(glq, articlesList, termMap, opts = {}) {
592
591
  }
593
592
  } else {
594
593
  // For non-stripped forms, match at word boundaries or after dashes (case-insensitive)
595
- const regex4 = new RegExp(`(?:^|\\b|[—–-])${escapeRegExp(form)}(?=\\b|$|[—–-])`, 'i');
594
+ // Allow substring matching (e.g., "reap" matches "reapers")
595
+ const regex4 = new RegExp(`(?:^|\\b|[—–-])${escapeRegExp(form)}`, 'i');
596
596
  if (regex4.test(textOrig)) {
597
597
  stage = 4;
598
598
  termHit = termOrig;
@@ -612,7 +612,37 @@ function findMatchingArticles(glq, articlesList, termMap, opts = {}) {
612
612
  return perArticleMatches;
613
613
  }
614
614
 
615
- function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {}) {
615
+ // Get articles for disambiguation: those with matching Strong's OR those with empty Strong's lists
616
+ function getDisambiguationArticles(strongId, strongPivot, termMap, twMap) {
617
+ // Get articles with matching Strong's (same as prioritizeArticles but without prioritization)
618
+ let articlesWithMatchingStrongs = (strongPivot[strongId] || []).slice();
619
+ if ((!articlesWithMatchingStrongs || !articlesWithMatchingStrongs.length) && /^(H|G)\d+[a-f]$/.test(strongId)) {
620
+ const base = strongId.slice(0, -1);
621
+ articlesWithMatchingStrongs = (strongPivot[base] || []).slice();
622
+ }
623
+
624
+ const result = new Set(articlesWithMatchingStrongs);
625
+
626
+ // Add articles that have empty Strong's lists (orphaned articles)
627
+ for (const [article, val] of Object.entries(twMap)) {
628
+ const articleData = val || {};
629
+ const articleStrongs = articleData.strongs || [];
630
+
631
+ // Check if this article has empty Strong's lists
632
+ // An article qualifies if it has no strongs array or if all its strongs arrays are empty
633
+ const hasEmptyStrongs = !Array.isArray(articleStrongs) ||
634
+ articleStrongs.length === 0 ||
635
+ articleStrongs.every(strongsArray => !Array.isArray(strongsArray) || strongsArray.length === 0);
636
+
637
+ if (hasEmptyStrongs) {
638
+ result.add(article);
639
+ }
640
+ }
641
+
642
+ return Array.from(result);
643
+ }
644
+
645
+ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, twMap, opts = {}) {
616
646
  const useCompromise = !!opts.useCompromise;
617
647
  const nlp = opts.nlp;
618
648
  const prioritized = prioritizeArticles(glq, strongId, strongPivot);
@@ -634,11 +664,11 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, opts = {})
634
664
  bestMatches.sort((a, b) => artIndex.get(a.art) - artIndex.get(b.art));
635
665
  const chosenMatch = bestMatches[0];
636
666
 
637
- // For disambiguation, search ALL articles in termMap, not just those with matching Strong's
638
- const allArticles = Array.from(termMap.keys());
639
- const allMatches = findMatchingArticles(glq, allArticles, termMap, { useCompromise, nlp });
667
+ // For disambiguation, search articles with matching Strong's OR articles with empty Strong's lists
668
+ const disambiguationArticles = getDisambiguationArticles(strongId, strongPivot, termMap, twMap);
669
+ const allMatches = findMatchingArticles(glq, disambiguationArticles, termMap, { useCompromise, nlp });
640
670
 
641
- // Disambiguation: list all matched articles (from all articles, not just Strong's filtered)
671
+ // Disambiguation: list all matched articles (from Strong's + empty Strong's filtered articles)
642
672
  const matchesList = allMatches.map(m => m.art);
643
673
  const disamb = matchesList.length > 1 ? `(${matchesList.join(', ')})` : '';
644
674
 
@@ -966,7 +996,7 @@ export async function generateTwlByBook(bookCode, options = {}) {
966
996
  totalRows++;
967
997
  const strongId = cols[H.Strongs];
968
998
  const glq = cols[H.GLQuote] || '';
969
- const result = chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, { useCompromise, nlp });
999
+ const result = chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, twJson, { useCompromise, nlp });
970
1000
  if (!result) {
971
1001
  droppedRows++;
972
1002
  if (noMatchSamples.length < 8) {