twl-generator 1.4.7 → 1.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "twl-generator",
3
- "version": "1.4.7",
3
+ "version": "1.4.9",
4
4
  "description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/index.js CHANGED
@@ -866,17 +866,32 @@ export async function generateTwlByBook(bookCode, options = {}) {
866
866
 
867
867
  // Helpers for Variant of decision (allow only plural/-ed/-ing without marking variant)
868
868
  const pluralizeWord = (w) => {
869
- return new Inflectors(w).toPlural();
869
+ if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ies');
870
+ if (/(s|x|z|ch|sh)$/i.test(w)) return w + 'es';
871
+ if (/f$/i.test(w) && !/(roof|belief|chief|proof)$/i.test(w)) return w.replace(/f$/i, 'ves');
872
+ if (/fe$/i.test(w)) return w.replace(/fe$/i, 'ves');
873
+ if (/o$/i.test(w)) return w + 'es';
874
+ return w + 's';
870
875
  };
876
+ const isVowel = (ch) => /[aeiou]/i.test(ch);
877
+ const isConsonant = (ch) => /[a-z]/i.test(ch) && !isVowel(ch);
878
+ const endsWithCVC = (w) => w.length >= 3 && isConsonant(w[w.length - 3]) && isVowel(w[w.length - 2]) && isConsonant(w[w.length - 1]) && !/[wxy]/i.test(w[w.length - 1]);
871
879
  const edForm = (w) => {
872
- return new Inflectors(w).toPast()
880
+ if (/e$/i.test(w)) return w + 'd';
881
+ if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ied');
882
+ // Do not double the final consonant for words ending in "er" (e.g., gather -> gathered)
883
+ const lastCh = w[w.length - 1];
884
+ if (endsWithCVC(w) && !/(?:er|en|or|on|al)$/i.test(w)) return w + lastCh + 'ed';
885
+ return w + 'ed';
873
886
  };
874
887
  const ingForm = (w) => {
875
- return new Inflectors(w).toGerund()
888
+ if (/ie$/i.test(w)) return w.replace(/ie$/i, 'ying');
889
+ if (/ee$/i.test(w)) return w + 'ing';
890
+ if (/e$/i.test(w)) return w.replace(/e$/i, 'ing');
891
+ const lastCh = w[w.length - 1];
892
+ if (endsWithCVC(w) && !/(?:er|en|or|on|al)$/i.test(w)) return w + lastCh + 'ing';
893
+ return w + 'ing';
876
894
  };
877
- const singularForm = (w) => {
878
- return new Inflectors(w).toSingular();
879
- }
880
895
 
881
896
  const allowNoVariant = (base, match) => {
882
897
  const b = String(base || '');
@@ -888,9 +903,12 @@ export async function generateTwlByBook(bookCode, options = {}) {
888
903
  const last = parts[parts.length - 1];
889
904
  const allowed = new Set([
890
905
  head + pluralizeWord(last),
906
+ head + new Inflectors(last).toPlural(),
907
+ head + new Inflectors(last).toSingular(),
891
908
  head + edForm(last),
909
+ head + new Inflectors(last).toPast(),
892
910
  head + ingForm(last),
893
- head + singularForm(last),
911
+ head + new Inflectors(last).toGerund(),
894
912
  ].map(x => x.toLowerCase()));
895
913
  return allowed.has(m.toLowerCase());
896
914
  };
@@ -43,12 +43,21 @@ async function processZipBuffer(zipBuffer) {
43
43
  for (const term of terms) {
44
44
  // Normalize terms by removing parentheses and spaces before them
45
45
  // e.g., "Joseph (OT)" -> "Joseph", "Mary (sister of Martha)" -> "Mary"
46
- const normalizedTerm = term.replace(/\s+\([^)]*\)$/, '').trim();
46
+ let normalizedTerm = term.replace(/\s+\([^)]*\)$/, '').trim();
47
+ // Strip leading articles, demonstratives, and possessive pronouns (allow repeated prefixes)
48
+ const prefixRegex = /^(?:(?:a|an|the|this|that|these|those|my|your|his|her|its|our|their)\s+)+/i;
49
+ let cleaned = normalizedTerm.trim();
50
+ while (prefixRegex.test(cleaned)) {
51
+ cleaned = cleaned.replace(prefixRegex, '').trim();
52
+ }
53
+ normalizedTerm = cleaned;
47
54
 
48
55
  if (!termMap[normalizedTerm]) {
49
56
  termMap[normalizedTerm] = [];
50
57
  }
51
- termMap[normalizedTerm].push(truncated);
58
+ if (!termMap[normalizedTerm].includes(truncated)) {
59
+ termMap[normalizedTerm].push(truncated);
60
+ }
52
61
  }
53
62
  }
54
63