twl-generator 1.2.9 → 1.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "twl-generator",
3
- "version": "1.2.9",
3
+ "version": "1.2.11",
4
4
  "description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -56,4 +56,4 @@
56
56
  "optional": true
57
57
  }
58
58
  }
59
- }
59
+ }
@@ -1,37 +1,37 @@
1
1
  /**
2
2
  * Generate morphological variants of a term
3
3
  */
4
- function generateVariants(term) {
4
+ function generateVariants(term, isName = false) {
5
5
  const variants = new Set([term]);
6
6
 
7
- const nouns = ['doe', 'deer', 'father', 'Father'];
8
- const do_not_pluralize = ['doe'];
9
- const do_not_depluralize = [];
7
+ const isNoun = ['horn', 'mare', 'steed', 'horse', 'doe', 'deer', 'father', 'Father'].includes(term) || isName;
8
+ const doNotPluralize = ['doe'].includes(term);
9
+ const doNotDepluralize = ['kids'].includes(term) || isName;
10
10
 
11
11
  // Handle pluralization - simple 's' removal (but not for words ending in 'ss')
12
- if (term.endsWith('s') && term.length > 2 && !term.endsWith('ss') && !term.endsWith('es') && !do_not_depluralize.includes(term)) {
12
+ if (term.endsWith('s') && term.length > 2 && !term.endsWith('ss') && !term.endsWith('es') && !doNotDepluralize) {
13
13
  variants.add(term.slice(0, -1)); // dogs -> dog (but not does -> doe)
14
- } else if (!do_not_pluralize.includes(term)) {
14
+ } else if (!doNotPluralize) {
15
15
  variants.add(term + 's'); // dog -> dogs
16
16
  }
17
17
 
18
18
  // Handle 'es' endings - but only for legitimate plural patterns
19
- if (term.endsWith('es') && term.length > 4 && !do_not_depluralize.includes(term)) {
19
+ if (term.endsWith('es') && term.length > 4 && !doNotDepluralize) {
20
20
  const base = term.slice(0, -2);
21
21
  // Only if the base word would naturally take 'es' plural
22
22
  if (/[sxz]$|[cs]h$/.test(base)) {
23
23
  variants.add(base); // horses -> horse, churches -> church
24
24
  }
25
- } else if (term.endsWith('e') && !do_not_pluralize.includes(term)) {
25
+ } else if (term.endsWith('e') && !doNotPluralize) {
26
26
  variants.add(term + 's'); // horse -> horses
27
- } else if (/[sxz]$|[cs]h$/.test(term) && !do_not_pluralize.includes(term)) {
27
+ } else if (/[sxz]$|[cs]h$/.test(term) && !doNotPluralize) {
28
28
  variants.add(term + 'es'); // church -> churches
29
29
  }
30
30
 
31
31
  // Handle 'ies' endings for words ending in 'y'
32
- if (term.endsWith('ies') && term.length > 4 && !do_not_depluralize.includes(term)) {
32
+ if (term.endsWith('ies') && term.length > 4 && !doNotDepluralize) {
33
33
  variants.add(term.slice(0, -3) + 'y'); // cities -> city
34
- } else if (term.endsWith('y') && term.length > 2 && !/[aeiou]y$/.test(term) && !do_not_pluralize.includes(term)) {
34
+ } else if (term.endsWith('y') && term.length > 2 && !/[aeiou]y$/.test(term) && !doNotPluralize) {
35
35
  variants.add(term.slice(0, -1) + 'ies'); // city -> cities
36
36
  }
37
37
 
@@ -42,24 +42,26 @@ function generateVariants(term) {
42
42
  // variants.add(term + "'");
43
43
  // }
44
44
 
45
- if (!nouns.includes(term)) {
46
- // Handle -ed forms - but only for legitimate verb patterns
47
- if (term.endsWith('ed') && term.length > 4) {
48
- const base = term.slice(0, -2);
49
- // Only create base form if it looks like a legitimate verb stem
50
- if (base.length > 2) {
51
- variants.add(base); // walked -> walk
52
- }
53
- }
45
+ // if (!isNoun) {
46
+ // // Handle -ed forms - but only for legitimate verb patterns
47
+ // if (term.endsWith('ed') && term.length > 4) {
48
+ // const base = term.slice(0, -2);
49
+ // // Only create base form if it looks like a legitimate verb stem
50
+ // if (base.length > 2) {
51
+ // variants.add(base); // walked -> walk
52
+ // }
53
+ // }
54
+
55
+ // // Handle -ing forms
56
+ // if (term.endsWith('ing') && term.length > 5) {
57
+ // const base = term.slice(0, -3);
58
+ // if (base.length > 2) {
59
+ // variants.add(base); // walking -> walk
60
+ // }
61
+ // }
54
62
 
55
- // Handle -ing forms
56
- if (term.endsWith('ing') && term.length > 5) {
57
- const base = term.slice(0, -3);
58
- if (base.length > 2) {
59
- variants.add(base); // walking -> walk
60
- }
61
- }
62
63
 
64
+ if (!isNoun) {
63
65
  // Double consonant handling for -ed/-ing
64
66
  if (/[bcdfghjklmnpqrstvwxyz][aeiou][bcdfghjklmnpqrstvwxyz]$/.test(term)) {
65
67
  variants.add(term + term.slice(-1) + 'ed'); // stop -> stopped
@@ -76,6 +78,12 @@ function generateVariants(term) {
76
78
  }
77
79
  }
78
80
 
81
+ for (const variant of Array.from(variants)) {
82
+ if (variant.length > 0 && variant[0] === variant[0].toLowerCase() && /[a-z]/.test(variant[0])) {
83
+ variants.add(variant[0].toUpperCase() + variant.slice(1));
84
+ }
85
+ }
86
+
79
87
  return Array.from(variants);
80
88
  }
81
89
 
@@ -214,9 +222,8 @@ function createOptimizedTermMap(twTerms) {
214
222
  // Generate and add variants for single words only to avoid exponential explosion
215
223
  if (!originalTerm.includes(' ')) {
216
224
  let variants = new Set([originalTerm]);
217
- if (!articles[0].startsWith('names/') && !articles[1]?.startsWith('names/')) {
218
- variants = generateVariants(originalTerm);
219
- }
225
+ const isName = articles[0].startsWith('names/') || articles[1]?.startsWith('names/')
226
+ variants = generateVariants(originalTerm, isName);
220
227
  console.log(variants)
221
228
  for (const variant of variants) {
222
229
  if (variant !== originalTerm) {
@@ -302,7 +309,7 @@ function generateId() {
302
309
  for (let i = 0; i < 3; i++) {
303
310
  id += lettersAndDigits[Math.floor(Math.random() * lettersAndDigits.length)];
304
311
  }
305
- return 'abcd' || id;
312
+ return id;
306
313
  }
307
314
 
308
315
  /**