twl-generator 1.4.4 → 1.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/index.js +83 -9
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "twl-generator",
|
|
3
|
-
"version": "1.4.
|
|
3
|
+
"version": "1.4.5",
|
|
4
4
|
"description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"csv-parse": "^5.5.6",
|
|
51
51
|
"csv-stringify": "^6.5.0",
|
|
52
52
|
"jszip": "^3.10.1",
|
|
53
|
-
"tsv-quote-converters": "^1.1.
|
|
53
|
+
"tsv-quote-converters": "^1.1.14"
|
|
54
54
|
},
|
|
55
55
|
"peerDependencies": {
|
|
56
56
|
"react": ">=16.8.0"
|
package/src/index.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { BibleBookData } from './common/books.js';
|
|
2
|
+
import { addGLQuoteCols, convertGLQuotes2OLQuotes } from 'tsv-quote-converters';
|
|
2
3
|
|
|
3
4
|
const isBrowser = typeof window !== 'undefined';
|
|
4
5
|
|
|
@@ -829,6 +830,7 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, twMap, opts
|
|
|
829
830
|
export async function generateTwlByBook(bookCode, options = {}) {
|
|
830
831
|
// Extract dcsHost option with default
|
|
831
832
|
const dcsHost = options.dcsHost || 'https://git.door43.org';
|
|
833
|
+
const quiet = !!options.quiet;
|
|
832
834
|
|
|
833
835
|
// Load terms from en_tw zip file instead of local tw_strongs_list.json
|
|
834
836
|
const termToArticles = await loadTermsFromEnTw(dcsHost);
|
|
@@ -844,8 +846,7 @@ export async function generateTwlByBook(bookCode, options = {}) {
|
|
|
844
846
|
if (!meta) throw new Error(`Unknown book code: ${bookCode}`);
|
|
845
847
|
const versesByChapter = await processUsfmForBook(meta.key, dcsHost);
|
|
846
848
|
|
|
847
|
-
|
|
848
|
-
const header = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'GLQuote', 'GLOccurrence', 'Variant of', 'Disambiguation'];
|
|
849
|
+
const header = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'Variant of', 'Disambiguation'];
|
|
849
850
|
const outRows = [header.join('\t')];
|
|
850
851
|
|
|
851
852
|
// ID generator
|
|
@@ -874,8 +875,22 @@ export async function generateTwlByBook(bookCode, options = {}) {
|
|
|
874
875
|
const isVowel = (ch) => /[aeiou]/i.test(ch);
|
|
875
876
|
const isConsonant = (ch) => /[a-z]/i.test(ch) && !isVowel(ch);
|
|
876
877
|
const endsWithCVC = (w) => w.length >= 3 && isConsonant(w[w.length - 3]) && isVowel(w[w.length - 2]) && isConsonant(w[w.length - 1]) && !/[wxy]/i.test(w[w.length - 1]);
|
|
877
|
-
const edForm = (w) =>
|
|
878
|
-
|
|
878
|
+
const edForm = (w) => {
|
|
879
|
+
if (/e$/i.test(w)) return w + 'd';
|
|
880
|
+
if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ied');
|
|
881
|
+
// Do not double the final consonant for words ending in "er" (e.g., gather -> gathered)
|
|
882
|
+
const lastCh = w[w.length - 1];
|
|
883
|
+
if (endsWithCVC(w) && !/(?:er|en)$/i.test(w)) return w + lastCh + 'ed';
|
|
884
|
+
return w + 'ed';
|
|
885
|
+
};
|
|
886
|
+
const ingForm = (w) => {
|
|
887
|
+
if (/ie$/i.test(w)) return w.replace(/ie$/i, 'ying');
|
|
888
|
+
if (/ee$/i.test(w)) return w + 'ing';
|
|
889
|
+
if (/e$/i.test(w)) return w.replace(/e$/i, 'ing');
|
|
890
|
+
const lastCh = w[w.length - 1];
|
|
891
|
+
if (endsWithCVC(w) && !/(?:er|en)$/i.test(w)) return w + lastCh + 'ing';
|
|
892
|
+
return w + 'ing';
|
|
893
|
+
};
|
|
879
894
|
|
|
880
895
|
const allowNoVariant = (base, match) => {
|
|
881
896
|
const b = String(base || '');
|
|
@@ -929,8 +944,6 @@ export async function generateTwlByBook(bookCode, options = {}) {
|
|
|
929
944
|
glq,
|
|
930
945
|
String(occ),
|
|
931
946
|
twLink,
|
|
932
|
-
glq,
|
|
933
|
-
String(occ),
|
|
934
947
|
variantOf,
|
|
935
948
|
disamb,
|
|
936
949
|
].join('\t'));
|
|
@@ -941,13 +954,12 @@ export async function generateTwlByBook(bookCode, options = {}) {
|
|
|
941
954
|
// Build TSV and convert GL OrigWords back to OL using tsv-quote-converters
|
|
942
955
|
let matchedTsv = outRows.join('\n');
|
|
943
956
|
try {
|
|
944
|
-
const { convertGLQuotes2OLQuotes } = await import('tsv-quote-converters');
|
|
945
957
|
const conv = await convertGLQuotes2OLQuotes({
|
|
946
|
-
|
|
958
|
+
bibleLink: 'unfoldingWord/en_ult/master',
|
|
947
959
|
bookCode: String(meta.key || bookCode).toLowerCase(),
|
|
948
960
|
tsvContent: matchedTsv,
|
|
949
961
|
trySeparatorsAndOccurrences: true,
|
|
950
|
-
quiet
|
|
962
|
+
quiet,
|
|
951
963
|
});
|
|
952
964
|
if (conv && typeof conv.output === 'string' && conv.output.length) {
|
|
953
965
|
matchedTsv = conv.output;
|
|
@@ -955,6 +967,68 @@ export async function generateTwlByBook(bookCode, options = {}) {
|
|
|
955
967
|
} catch (e) {
|
|
956
968
|
// If conversion fails (e.g., no network), fall back to unconverted TSV
|
|
957
969
|
}
|
|
970
|
+
|
|
971
|
+
// Now add the actual GLQuote/GLOccurrence by calling addGLQuoteCols
|
|
972
|
+
try {
|
|
973
|
+
const result = await addGLQuoteCols({
|
|
974
|
+
bibleLinks: ['unfoldingWord/en_ult/master'],
|
|
975
|
+
bookCode: String(meta.key || bookCode).toLowerCase(),
|
|
976
|
+
tsvContent: matchedTsv,
|
|
977
|
+
trySeparatorsAndOccurrences: true,
|
|
978
|
+
usePreviousGLQuotes: true,
|
|
979
|
+
quiet,
|
|
980
|
+
});
|
|
981
|
+
if (result && typeof result.output === 'string' && result.output.length) {
|
|
982
|
+
matchedTsv = result.output;
|
|
983
|
+
// Reorder columns: move cols[5] and cols[6] to after cols[7] for every line
|
|
984
|
+
try {
|
|
985
|
+
const lines = String(matchedTsv || '').split('\n');
|
|
986
|
+
for (let i = 0; i < lines.length; i++) {
|
|
987
|
+
const cols = lines[i].split('\t');
|
|
988
|
+
// require at least 8 columns so cols[7] exists
|
|
989
|
+
if (cols.length >= 8) {
|
|
990
|
+
const removed = cols.splice(5, 2); // remove cols[5] and cols[6]
|
|
991
|
+
// after removal, original cols[7] is at index 5, so insert after it at index 6
|
|
992
|
+
const insertIndex = Math.min(6, cols.length);
|
|
993
|
+
cols.splice(insertIndex, 0, ...removed);
|
|
994
|
+
lines[i] = cols.join('\t');
|
|
995
|
+
}
|
|
996
|
+
}
|
|
997
|
+
matchedTsv = lines.join('\n');
|
|
998
|
+
} catch (err) {
|
|
999
|
+
// leave matchedTsv unchanged on error
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
} catch (e) {
|
|
1003
|
+
try {
|
|
1004
|
+
const lines = String(matchedTsv || '').split('\n');
|
|
1005
|
+
if (lines.length > 0) {
|
|
1006
|
+
lines[0] = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'GLQuote', 'GLOccurrence', 'Variant of', 'Disambiguation'].join('\t');
|
|
1007
|
+
const out = [lines[0]];
|
|
1008
|
+
for (let i = 1; i < lines.length; i++) {
|
|
1009
|
+
const cols = lines[i].split('\t');
|
|
1010
|
+
const g = (idx) => (cols[idx] !== undefined ? cols[idx] : '');
|
|
1011
|
+
const newRow = [
|
|
1012
|
+
g(0), // Reference
|
|
1013
|
+
g(1), // ID
|
|
1014
|
+
g(2), // Tags
|
|
1015
|
+
g(3), // OrigWords
|
|
1016
|
+
g(4), // Occurrence
|
|
1017
|
+
g(5), // TWLink
|
|
1018
|
+
g(3), // GLQuote (copy of OrigWords)
|
|
1019
|
+
g(4), // GLOccurrence (copy of Occurrence)
|
|
1020
|
+
g(6), // Variant of
|
|
1021
|
+
g(7), // Disambiguation
|
|
1022
|
+
].join('\t');
|
|
1023
|
+
out.push(newRow);
|
|
1024
|
+
}
|
|
1025
|
+
matchedTsv = out.join('\n');
|
|
1026
|
+
}
|
|
1027
|
+
} catch (err) {
|
|
1028
|
+
// leave matchedTsv unchanged on any transformation error
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
|
|
958
1032
|
const noMatchHeader = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'GLQuote', 'GLOccurrence', 'Disambiguation'];
|
|
959
1033
|
const noMatchTsv = [noMatchHeader.join('\t')].join('\n');
|
|
960
1034
|
return { matchedTsv, noMatchTsv };
|