twl-generator 1.2.5 → 1.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/utils/twl-matcher.js +27 -19
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "twl-generator",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.7",
|
|
4
4
|
"description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
package/src/utils/twl-matcher.js
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
function generateVariants(term) {
|
|
5
5
|
const variants = new Set([term]);
|
|
6
6
|
|
|
7
|
+
const nouns = ['doe', 'deer', 'father'];
|
|
8
|
+
|
|
7
9
|
// Handle pluralization - simple 's' removal (but not for words ending in 'ss')
|
|
8
10
|
if (term.endsWith('s') && term.length > 2 && !term.endsWith('ss') && !term.endsWith('es')) {
|
|
9
11
|
variants.add(term.slice(0, -1)); // dogs -> dog (but not does -> doe)
|
|
@@ -31,12 +33,12 @@ function generateVariants(term) {
|
|
|
31
33
|
variants.add(term.slice(0, -1) + 'ies'); // city -> cities
|
|
32
34
|
}
|
|
33
35
|
|
|
34
|
-
// Handle possessive forms
|
|
35
|
-
variants.add(term + "'s");
|
|
36
|
-
variants.add(term + "'");
|
|
37
|
-
if (term.endsWith('s')) {
|
|
38
|
-
|
|
39
|
-
}
|
|
36
|
+
// // Handle possessive forms
|
|
37
|
+
// variants.add(term + "'s");
|
|
38
|
+
// variants.add(term + "'");
|
|
39
|
+
// if (term.endsWith('s')) {
|
|
40
|
+
// variants.add(term + "'");
|
|
41
|
+
// }
|
|
40
42
|
|
|
41
43
|
// Handle -ed forms - but only for legitimate verb patterns
|
|
42
44
|
if (term.endsWith('ed') && term.length > 4) {
|
|
@@ -55,19 +57,21 @@ function generateVariants(term) {
|
|
|
55
57
|
}
|
|
56
58
|
}
|
|
57
59
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
60
|
+
if (!nouns.includes(term)) {
|
|
61
|
+
// Double consonant handling for -ed/-ing
|
|
62
|
+
if (/[bcdfghjklmnpqrstvwxyz][aeiou][bcdfghjklmnpqrstvwxyz]$/.test(term)) {
|
|
63
|
+
variants.add(term + term.slice(-1) + 'ed'); // stop -> stopped
|
|
64
|
+
variants.add(term + term.slice(-1) + 'ing'); // stop -> stopping
|
|
65
|
+
}
|
|
63
66
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
67
|
+
// Regular -ed/-ing addition
|
|
68
|
+
if (!term.endsWith('e')) {
|
|
69
|
+
variants.add(term + 'ed');
|
|
70
|
+
variants.add(term + 'ing');
|
|
71
|
+
} else {
|
|
72
|
+
variants.add(term.slice(0, -1) + 'ed'); // love -> loved
|
|
73
|
+
variants.add(term.slice(0, -1) + 'ing'); // love -> loving
|
|
74
|
+
}
|
|
71
75
|
}
|
|
72
76
|
|
|
73
77
|
return Array.from(variants);
|
|
@@ -207,7 +211,11 @@ function createOptimizedTermMap(twTerms) {
|
|
|
207
211
|
|
|
208
212
|
// Generate and add variants for single words only to avoid exponential explosion
|
|
209
213
|
if (!originalTerm.includes(' ')) {
|
|
210
|
-
|
|
214
|
+
let variants = new Set([originalTerm]);
|
|
215
|
+
if (!articles[0].startsWith('names/') && !articles[1]?.startsWith('names/')) {
|
|
216
|
+
variants = generateVariants(originalTerm);
|
|
217
|
+
}
|
|
218
|
+
console.log(variants)
|
|
211
219
|
for (const variant of variants) {
|
|
212
220
|
if (variant !== originalTerm) {
|
|
213
221
|
trie.insert(variant, originalTerm, articles, false);
|