twl-generator 1.4.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -3
- package/src/cli.js +8 -6
- package/src/index.js +11 -56
- package/src/utils/twl-matcher.js +85 -13
- package/src/utils/usfm-alignment-remover.js +2 -2
- package/src/utils/zipProcessor.js +14 -210
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "twl-generator",
|
|
3
|
-
"version": "1.4.
|
|
3
|
+
"version": "1.4.2",
|
|
4
4
|
"description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -46,9 +46,10 @@
|
|
|
46
46
|
"node": ">=18.0.0"
|
|
47
47
|
},
|
|
48
48
|
"dependencies": {
|
|
49
|
+
"compromise": "^14.14.2",
|
|
49
50
|
"csv-parse": "^5.5.6",
|
|
50
51
|
"csv-stringify": "^6.5.0",
|
|
51
|
-
"
|
|
52
|
+
"jszip": "^3.10.1",
|
|
52
53
|
"tsv-quote-converters": "^1.1.13"
|
|
53
54
|
},
|
|
54
55
|
"peerDependencies": {
|
|
@@ -59,4 +60,4 @@
|
|
|
59
60
|
"optional": true
|
|
60
61
|
}
|
|
61
62
|
}
|
|
62
|
-
}
|
|
63
|
+
}
|
package/src/cli.js
CHANGED
|
@@ -15,7 +15,7 @@ async function readBooksJs() {
|
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
function parseArgs(argv) {
|
|
18
|
-
const args = { book: '', out: '', outDir: '', all: false, useCompromise: false };
|
|
18
|
+
const args = { book: '', out: '', outDir: '', all: false, useCompromise: false, dcsHost: 'https://git.door43.org' };
|
|
19
19
|
for (let i = 2; i < argv.length; i++) {
|
|
20
20
|
const a = argv[i];
|
|
21
21
|
if (a === '--book' || a === '-b') { args.book = argv[++i] || ''; }
|
|
@@ -23,21 +23,22 @@ function parseArgs(argv) {
|
|
|
23
23
|
else if (a === '--out-dir' || a === '-O') { args.outDir = argv[++i] || ''; }
|
|
24
24
|
else if (a === '--all' || a === '-A') { args.all = true; }
|
|
25
25
|
else if (a === '--use-compromise') { args.useCompromise = true; }
|
|
26
|
+
else if (a === '--dcs') { args.dcsHost = argv[++i] || 'https://git.door43.org'; }
|
|
26
27
|
}
|
|
27
28
|
return args;
|
|
28
29
|
}
|
|
29
30
|
|
|
30
31
|
async function main() {
|
|
31
|
-
const { book, out, outDir, all, useCompromise } = parseArgs(process.argv);
|
|
32
|
+
const { book, out, outDir, all, useCompromise, dcsHost } = parseArgs(process.argv);
|
|
32
33
|
if (all || (book && book.toLowerCase() === 'all')) {
|
|
33
34
|
const books = await readBooksJs();
|
|
34
35
|
const codes = Object.keys(books);
|
|
35
36
|
const destDir = outDir ? path.resolve(outDir) : path.resolve(THIS_DIR, '..'); // default to twl-generator dir
|
|
36
37
|
await fs.mkdir(destDir, { recursive: true });
|
|
37
|
-
console.error(`Generating TWL for ${codes.length} books to ${destDir} (useCompromise=${useCompromise})`);
|
|
38
|
+
console.error(`Generating TWL for ${codes.length} books to ${destDir} (useCompromise=${useCompromise}, dcsHost=${dcsHost})`);
|
|
38
39
|
for (const code of codes) {
|
|
39
40
|
try {
|
|
40
|
-
const { matchedTsv, noMatchTsv } = await generateTwlByBook(code, { useCompromise });
|
|
41
|
+
const { matchedTsv, noMatchTsv } = await generateTwlByBook(code, { useCompromise, dcsHost });
|
|
41
42
|
const fname = `${code.toLowerCase()}.twl.tsv`;
|
|
42
43
|
const outPath = path.join(destDir, fname);
|
|
43
44
|
await fs.writeFile(outPath, matchedTsv, 'utf8');
|
|
@@ -52,11 +53,12 @@ async function main() {
|
|
|
52
53
|
}
|
|
53
54
|
|
|
54
55
|
if (!book) {
|
|
55
|
-
console.error('Usage: generate-twl --book <code>|all [--out <file.tsv> | --out-dir <dir>] [--use-compromise]');
|
|
56
|
+
console.error('Usage: generate-twl --book <code>|all [--out <file.tsv> | --out-dir <dir>] [--use-compromise] [--dcs <host>]');
|
|
57
|
+
console.error(' --dcs defaults to https://git.door43.org');
|
|
56
58
|
process.exit(1);
|
|
57
59
|
}
|
|
58
60
|
|
|
59
|
-
const { matchedTsv, noMatchTsv } = await generateTwlByBook(book, { useCompromise });
|
|
61
|
+
const { matchedTsv, noMatchTsv } = await generateTwlByBook(book, { useCompromise, dcsHost });
|
|
60
62
|
if (out) {
|
|
61
63
|
const outPath = path.resolve(out);
|
|
62
64
|
await fs.writeFile(outPath, matchedTsv, 'utf8');
|
package/src/index.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { BibleBookData } from './common/books.js';
|
|
2
2
|
|
|
3
3
|
const isBrowser = typeof window !== 'undefined';
|
|
4
|
-
const TW_JSON_URL = new URL('../tw_strongs_list.json', import.meta.url);
|
|
5
4
|
|
|
6
5
|
async function readBooks() {
|
|
7
6
|
// Build a simple CODE -> { usfm, testament } map from the local BibleBookData
|
|
@@ -20,29 +19,11 @@ function findBookMeta(bookMap, code) {
|
|
|
20
19
|
return { key, ...meta };
|
|
21
20
|
}
|
|
22
21
|
|
|
23
|
-
async function
|
|
24
|
-
|
|
25
|
-
const
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
const json = await res.json();
|
|
29
|
-
const b64 = json.content || '';
|
|
30
|
-
|
|
31
|
-
if (isBrowser) {
|
|
32
|
-
// Browser: use atob and TextDecoder
|
|
33
|
-
const binary = atob(b64);
|
|
34
|
-
const bytes = Uint8Array.from(binary, c => c.charCodeAt(0));
|
|
35
|
-
const decoder = new TextDecoder('utf-8');
|
|
36
|
-
return decoder.decode(bytes);
|
|
37
|
-
} else {
|
|
38
|
-
// Node.js: use Buffer
|
|
39
|
-
const { Buffer } = await import('node:buffer');
|
|
40
|
-
const buf = Buffer.from(b64, 'base64');
|
|
41
|
-
return buf.toString('utf8');
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
function pivotByStrong(twMap) {
|
|
22
|
+
async function loadTermsFromEnTw(dcsHost = 'https://git.door43.org') {
|
|
23
|
+
// Use the updated zipProcessor that accepts dcsHost
|
|
24
|
+
const { generateTWTerms } = await import('./utils/zipProcessor.js');
|
|
25
|
+
return await generateTWTerms(dcsHost);
|
|
26
|
+
} function pivotByStrong(twMap) {
|
|
46
27
|
// Build two structures:
|
|
47
28
|
// 1) singles: strong -> Set(articles) including base (strip letter suffix)
|
|
48
29
|
// 2) seqFirst: base-first-strong -> [{ article, seqBase, len }] preserving order in twMap
|
|
@@ -210,23 +191,6 @@ function buildInitialTsv(usfm, strongPivot, bookCode) {
|
|
|
210
191
|
return tsv;
|
|
211
192
|
}
|
|
212
193
|
|
|
213
|
-
async function loadTwJsonLocal() {
|
|
214
|
-
if (isBrowser) {
|
|
215
|
-
// In browser, try to fetch from public path
|
|
216
|
-
const url = '/tw_strongs_list.json';
|
|
217
|
-
const res = await fetch(url);
|
|
218
|
-
if (!res.ok) throw new Error(`Failed to fetch tw_strongs_list.json: ${res.status}`);
|
|
219
|
-
return await res.json();
|
|
220
|
-
} else {
|
|
221
|
-
// In Node.js, read from file system
|
|
222
|
-
const fs = await import('node:fs/promises');
|
|
223
|
-
const { fileURLToPath } = await import('node:url');
|
|
224
|
-
const filePath = fileURLToPath(TW_JSON_URL);
|
|
225
|
-
const raw = await fs.readFile(filePath, 'utf8');
|
|
226
|
-
return JSON.parse(raw);
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
|
|
230
194
|
function buildArticleTermMap(twMap) {
|
|
231
195
|
// Normalize helper: remove only trailing parenthetical notes and collapse whitespace
|
|
232
196
|
const stripParensTrim = (s) => String(s || '').replace(/\s*\([^)]*\)\s*$/, '').replace(/\s+/g, ' ').trim();
|
|
@@ -863,20 +827,11 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, twMap, opts
|
|
|
863
827
|
}
|
|
864
828
|
|
|
865
829
|
export async function generateTwlByBook(bookCode, options = {}) {
|
|
866
|
-
//
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
const terms = (val && val.article && Array.isArray(val.article.terms)) ? val.article.terms : [];
|
|
872
|
-
for (const raw of terms) {
|
|
873
|
-
const term = String(raw || '').replace(/\s*\([^)]*\)\s*$/, '').trim();
|
|
874
|
-
if (!term) continue;
|
|
875
|
-
if (!termToArticles[term]) termToArticles[term] = [];
|
|
876
|
-
// Use slug as-is (e.g., kt/grace)
|
|
877
|
-
termToArticles[term].push(article);
|
|
878
|
-
}
|
|
879
|
-
}
|
|
830
|
+
// Extract dcsHost option with default
|
|
831
|
+
const dcsHost = options.dcsHost || 'https://git.door43.org';
|
|
832
|
+
|
|
833
|
+
// Load terms from en_tw zip file instead of local tw_strongs_list.json
|
|
834
|
+
const termToArticles = await loadTermsFromEnTw(dcsHost);
|
|
880
835
|
|
|
881
836
|
// Build trie for fast scanning
|
|
882
837
|
const { buildTermTrie, scanVerseMatches } = await import('./utils/twl-matcher.js');
|
|
@@ -887,7 +842,7 @@ export async function generateTwlByBook(bookCode, options = {}) {
|
|
|
887
842
|
const bibleData = await readBooks();
|
|
888
843
|
const meta = findBookMeta(bibleData, bookCode);
|
|
889
844
|
if (!meta) throw new Error(`Unknown book code: ${bookCode}`);
|
|
890
|
-
const versesByChapter = await processUsfmForBook(meta.key);
|
|
845
|
+
const versesByChapter = await processUsfmForBook(meta.key, dcsHost);
|
|
891
846
|
|
|
892
847
|
// Header without Strongs; keep GLQuote/GLOccurrence and add Variant of, Disambiguation
|
|
893
848
|
const header = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'GLQuote', 'GLOccurrence', 'Variant of', 'Disambiguation'];
|
package/src/utils/twl-matcher.js
CHANGED
|
@@ -160,16 +160,86 @@ class PrefixTrie {
|
|
|
160
160
|
if (node._terms) {
|
|
161
161
|
const matchLength = currentPos - startPos;
|
|
162
162
|
// Always extract from the original text to preserve case
|
|
163
|
-
|
|
163
|
+
let originalMatchedText = originalText.substring(startPos, currentPos);
|
|
164
|
+
|
|
165
|
+
// Extend match backwards to include dash-connected words and possessive forms
|
|
166
|
+
let extendedStartPos = startPos;
|
|
167
|
+
|
|
168
|
+
// Check backwards for dash preceded by word characters (no space between)
|
|
169
|
+
if (extendedStartPos > 0 && originalText[extendedStartPos - 1] === '-') {
|
|
170
|
+
let dashPos = extendedStartPos - 1;
|
|
171
|
+
dashPos--; // Move before the dash
|
|
172
|
+
// Check if there are word characters immediately before the dash
|
|
173
|
+
if (dashPos >= 0 && /[\w]/.test(originalText[dashPos])) {
|
|
174
|
+
// Find the start of the word before the dash
|
|
175
|
+
while (dashPos >= 0 && /[\w]/.test(originalText[dashPos])) {
|
|
176
|
+
dashPos--;
|
|
177
|
+
}
|
|
178
|
+
extendedStartPos = dashPos + 1;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// Check backwards for apostrophe (straight or curly) preceded by text
|
|
183
|
+
if (extendedStartPos > 0 && /['']/.test(originalText[extendedStartPos - 1])) {
|
|
184
|
+
let apostrophePos = extendedStartPos - 1;
|
|
185
|
+
apostrophePos--; // Move before the apostrophe
|
|
186
|
+
// Check if there are word characters immediately before the apostrophe
|
|
187
|
+
if (apostrophePos >= 0 && /[\w]/.test(originalText[apostrophePos])) {
|
|
188
|
+
// Find the start of the text before the apostrophe
|
|
189
|
+
while (apostrophePos >= 0 && /[\w]/.test(originalText[apostrophePos])) {
|
|
190
|
+
apostrophePos--;
|
|
191
|
+
}
|
|
192
|
+
extendedStartPos = apostrophePos + 1;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Extend match forwards to include dash-connected words and possessive forms
|
|
197
|
+
let extendedEndPos = currentPos;
|
|
198
|
+
|
|
199
|
+
// Check for dash followed by word characters (no space between)
|
|
200
|
+
if (extendedEndPos < originalText.length && originalText[extendedEndPos] === '-') {
|
|
201
|
+
let dashPos = extendedEndPos;
|
|
202
|
+
dashPos++; // Move past the dash
|
|
203
|
+
// Check if there are word characters immediately after the dash
|
|
204
|
+
if (dashPos < originalText.length && /[\w]/.test(originalText[dashPos])) {
|
|
205
|
+
// Find the end of the word after the dash
|
|
206
|
+
while (dashPos < originalText.length && /[\w]/.test(originalText[dashPos])) {
|
|
207
|
+
dashPos++;
|
|
208
|
+
}
|
|
209
|
+
extendedEndPos = dashPos;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Check for apostrophe (straight or curly) followed by text
|
|
214
|
+
if (extendedEndPos < originalText.length && /['']/.test(originalText[extendedEndPos])) {
|
|
215
|
+
let apostrophePos = extendedEndPos;
|
|
216
|
+
apostrophePos++; // Move past the apostrophe
|
|
217
|
+
// Check if there are word characters immediately after the apostrophe
|
|
218
|
+
if (apostrophePos < originalText.length && /[\w]/.test(originalText[apostrophePos])) {
|
|
219
|
+
// Find the end of the text after the apostrophe
|
|
220
|
+
while (apostrophePos < originalText.length && /[\w]/.test(originalText[apostrophePos])) {
|
|
221
|
+
apostrophePos++;
|
|
222
|
+
}
|
|
223
|
+
extendedEndPos = apostrophePos;
|
|
224
|
+
} else {
|
|
225
|
+
// Include the apostrophe even if no text follows (for possessives ending in s)
|
|
226
|
+
extendedEndPos = apostrophePos;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Update the matched text if we extended it
|
|
231
|
+
if (extendedStartPos < startPos || extendedEndPos > currentPos) {
|
|
232
|
+
originalMatchedText = originalText.substring(extendedStartPos, extendedEndPos);
|
|
233
|
+
}
|
|
164
234
|
|
|
165
235
|
// Check if this is a valid word boundary match (both start and end)
|
|
166
|
-
const isStartBoundary =
|
|
167
|
-
/[\s\p{P}]/.test(originalText[
|
|
168
|
-
!/[\w]/.test(originalText[
|
|
236
|
+
const isStartBoundary = extendedStartPos === 0 ||
|
|
237
|
+
/[\s\p{P}]/.test(originalText[extendedStartPos - 1]) ||
|
|
238
|
+
!/[\w]/.test(originalText[extendedStartPos - 1]);
|
|
169
239
|
|
|
170
|
-
const isEndBoundary =
|
|
171
|
-
/[\s\p{P}]/.test(originalText[
|
|
172
|
-
!/[\w]/.test(originalText[
|
|
240
|
+
const isEndBoundary = extendedEndPos >= originalText.length ||
|
|
241
|
+
/[\s\p{P}]/.test(originalText[extendedEndPos]) ||
|
|
242
|
+
!/[\w]/.test(originalText[extendedEndPos]);
|
|
173
243
|
|
|
174
244
|
const isWordBoundary = isStartBoundary && isEndBoundary;
|
|
175
245
|
|
|
@@ -178,8 +248,9 @@ class PrefixTrie {
|
|
|
178
248
|
matches.push({
|
|
179
249
|
term: termData.term,
|
|
180
250
|
articles: termData.articles,
|
|
181
|
-
matchedText: originalMatchedText, // Use the
|
|
182
|
-
length:
|
|
251
|
+
matchedText: originalMatchedText, // Use the extended matched text
|
|
252
|
+
length: originalMatchedText.length, // Use extended length
|
|
253
|
+
originalLength: matchLength, // Keep track of original match length for advancement
|
|
183
254
|
priority: termData.priority,
|
|
184
255
|
isExactCase: isExactCase
|
|
185
256
|
});
|
|
@@ -224,7 +295,6 @@ function createOptimizedTermMap(twTerms) {
|
|
|
224
295
|
let variants = new Set([originalTerm]);
|
|
225
296
|
const isName = articles[0].startsWith('names/') || articles[1]?.startsWith('names/')
|
|
226
297
|
variants = generateVariants(originalTerm, isName);
|
|
227
|
-
console.log(variants)
|
|
228
298
|
for (const variant of variants) {
|
|
229
299
|
if (variant !== originalTerm) {
|
|
230
300
|
trie.insert(variant, originalTerm, articles, false);
|
|
@@ -283,9 +353,11 @@ function findMatches(verseText, termTrie) {
|
|
|
283
353
|
priority: bestMatch.priority
|
|
284
354
|
});
|
|
285
355
|
|
|
286
|
-
// Move past the matched text
|
|
287
|
-
|
|
288
|
-
|
|
356
|
+
// Move past only the original matched text (not the extended part)
|
|
357
|
+
// This allows finding additional matches within the extended portion
|
|
358
|
+
const advanceBy = bestMatch.originalLength || bestMatch.length;
|
|
359
|
+
processedText += normalizedText.substring(currentPos, currentPos + advanceBy);
|
|
360
|
+
currentPos += advanceBy;
|
|
289
361
|
} else {
|
|
290
362
|
// No match found, move to next character/word boundary
|
|
291
363
|
const nextWordBoundary = normalizedText.substring(currentPos).search(/[\s\p{P}]/u);
|
|
@@ -68,13 +68,13 @@ export const removeAllTagsExceptChapterVerse = (usfmContent) => {
|
|
|
68
68
|
* @param {string} book - The book identifier
|
|
69
69
|
* @return {Promise<Object>} - Object with chapters and verses
|
|
70
70
|
*/
|
|
71
|
-
export async function processUsfmForBook(book) {
|
|
71
|
+
export async function processUsfmForBook(book, dcsHost = 'https://git.door43.org') {
|
|
72
72
|
// Normalize book key to lowercase to match BibleBookData keys
|
|
73
73
|
const key = String(book || '').toLowerCase();
|
|
74
74
|
if (!BibleBookData[key]) throw new Error(`Unknown book: ${book}`);
|
|
75
75
|
|
|
76
76
|
const fetch = await getFetch();
|
|
77
|
-
const usfmUrl =
|
|
77
|
+
const usfmUrl = `${dcsHost}/api/v1/repos/unfoldingWord/en_ult/contents/${BibleBookData[key].usfm}.usfm?ref=master`;
|
|
78
78
|
const usfmRes = await fetch(usfmUrl);
|
|
79
79
|
if (!usfmRes.ok) throw new Error(`Failed to download USFM file for ${book}`);
|
|
80
80
|
const usfmData = await usfmRes.json();
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Universal TWL zipProcessor - Works in both Node.js and Browser environments
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Downloads and processes en_tw ZIP files on-demand (no caching per user request)
|
|
5
5
|
*
|
|
6
6
|
* Usage in React.js:
|
|
7
7
|
* import { generateTWTerms } from './utils/zipProcessor.js';
|
|
8
|
-
* const terms = await generateTWTerms();
|
|
8
|
+
* const terms = await generateTWTerms('https://git.door43.org');
|
|
9
9
|
*/
|
|
10
10
|
import JSZip from "jszip";
|
|
11
11
|
|
|
@@ -13,141 +13,6 @@ import JSZip from "jszip";
|
|
|
13
13
|
const isNode = typeof process !== 'undefined' && process.versions?.node;
|
|
14
14
|
const isBrowser = typeof window !== 'undefined';
|
|
15
15
|
|
|
16
|
-
const ZIP_URL = 'https://git.door43.org/unfoldingWord/en_tw/archive/master.zip';
|
|
17
|
-
const CACHE_KEY = 'twl_zip_cache';
|
|
18
|
-
const CACHE_VERSION = '1.0';
|
|
19
|
-
|
|
20
|
-
// In-memory cache for processed terms (per session)
|
|
21
|
-
let processedTermsCache = null;
|
|
22
|
-
|
|
23
|
-
async function getCachedZip() {
|
|
24
|
-
if (isBrowser) {
|
|
25
|
-
// Browser: Use localStorage for ZIP cache
|
|
26
|
-
try {
|
|
27
|
-
const cached = localStorage.getItem(CACHE_KEY);
|
|
28
|
-
if (cached) {
|
|
29
|
-
const data = JSON.parse(cached);
|
|
30
|
-
// Only use cache if version matches and cache is less than 5 minutes old
|
|
31
|
-
const FIVE_MINUTES = 5 * 60 * 1000;
|
|
32
|
-
if (
|
|
33
|
-
data.version === CACHE_VERSION &&
|
|
34
|
-
data.timestamp &&
|
|
35
|
-
(Date.now() - data.timestamp) < FIVE_MINUTES
|
|
36
|
-
) {
|
|
37
|
-
console.log('Using cached ZIP from browser storage');
|
|
38
|
-
return new Uint8Array(data.zipData);
|
|
39
|
-
} else {
|
|
40
|
-
localStorage.removeItem(CACHE_KEY);
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
} catch (error) {
|
|
44
|
-
console.log('Browser ZIP cache corrupted, re-downloading...');
|
|
45
|
-
try { localStorage.removeItem(CACHE_KEY); } catch (e) { }
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
// Note: In Node.js we could cache to filesystem, but fresh download is fine for CLI usage
|
|
49
|
-
|
|
50
|
-
return null;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* Cache ZIP data in appropriate storage
|
|
55
|
-
*/
|
|
56
|
-
async function cacheZip(zipBuffer) {
|
|
57
|
-
if (isBrowser) {
|
|
58
|
-
try {
|
|
59
|
-
const cacheData = {
|
|
60
|
-
version: CACHE_VERSION,
|
|
61
|
-
timestamp: Date.now(),
|
|
62
|
-
zipData: Array.from(new Uint8Array(zipBuffer))
|
|
63
|
-
};
|
|
64
|
-
localStorage.setItem(CACHE_KEY, JSON.stringify(cacheData));
|
|
65
|
-
console.log('ZIP cached in browser storage');
|
|
66
|
-
} catch (error) {
|
|
67
|
-
console.warn('Failed to cache ZIP in browser:', error.message);
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/**
|
|
73
|
-
* Get browser storage (localStorage or sessionStorage)
|
|
74
|
-
*/
|
|
75
|
-
function getBrowserStorage() {
|
|
76
|
-
if (!isBrowser) return null;
|
|
77
|
-
|
|
78
|
-
try {
|
|
79
|
-
return localStorage || sessionStorage || null;
|
|
80
|
-
} catch (e) {
|
|
81
|
-
console.warn('Browser storage not available:', e.message);
|
|
82
|
-
return null;
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
/**
|
|
87
|
-
* Get cached terms from appropriate storage
|
|
88
|
-
*/
|
|
89
|
-
async function getCachedTerms() {
|
|
90
|
-
// Check in-memory cache first (fastest)
|
|
91
|
-
if (memoryCache) {
|
|
92
|
-
console.log('Using in-memory cached article terms');
|
|
93
|
-
return memoryCache;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
if (isBrowser) {
|
|
97
|
-
// Browser caching with localStorage/sessionStorage
|
|
98
|
-
const storage = getBrowserStorage();
|
|
99
|
-
if (storage) {
|
|
100
|
-
try {
|
|
101
|
-
const cached = storage.getItem(CACHE_KEY);
|
|
102
|
-
if (cached) {
|
|
103
|
-
const data = JSON.parse(cached);
|
|
104
|
-
if (data.version === CACHE_VERSION) {
|
|
105
|
-
console.log('Using browser cached article terms');
|
|
106
|
-
memoryCache = data.terms;
|
|
107
|
-
return data.terms;
|
|
108
|
-
} else {
|
|
109
|
-
console.log('Browser cache version mismatch, regenerating...');
|
|
110
|
-
storage.removeItem(CACHE_KEY);
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
} catch (error) {
|
|
114
|
-
console.log('Browser cache corrupted, regenerating...');
|
|
115
|
-
try {
|
|
116
|
-
storage.removeItem(CACHE_KEY);
|
|
117
|
-
} catch (e) { /* ignore cleanup errors */ }
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
return null;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* Cache terms in appropriate storage
|
|
127
|
-
*/
|
|
128
|
-
async function cacheTerms(termMap) {
|
|
129
|
-
// Always cache in memory for this session
|
|
130
|
-
memoryCache = termMap;
|
|
131
|
-
|
|
132
|
-
if (isBrowser) {
|
|
133
|
-
// Browser caching
|
|
134
|
-
const storage = getBrowserStorage();
|
|
135
|
-
if (storage) {
|
|
136
|
-
try {
|
|
137
|
-
const cacheData = {
|
|
138
|
-
version: CACHE_VERSION,
|
|
139
|
-
timestamp: Date.now(),
|
|
140
|
-
terms: termMap
|
|
141
|
-
};
|
|
142
|
-
storage.setItem(CACHE_KEY, JSON.stringify(cacheData));
|
|
143
|
-
console.log('Article terms cached in browser storage');
|
|
144
|
-
} catch (error) {
|
|
145
|
-
console.warn('Failed to cache in browser storage:', error.message);
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
|
|
151
16
|
/**
|
|
152
17
|
* Process ZIP buffer and extract term mappings
|
|
153
18
|
*/
|
|
@@ -195,28 +60,15 @@ async function processZipBuffer(zipBuffer) {
|
|
|
195
60
|
return termMap;
|
|
196
61
|
}
|
|
197
62
|
|
|
198
|
-
export async function generateTWTerms() {
|
|
199
|
-
//
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
return processedTermsCache;
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
// Try to get cached ZIP first
|
|
206
|
-
let zipBuffer = await getCachedZip();
|
|
63
|
+
export async function generateTWTerms(dcsHost = 'https://git.door43.org') {
|
|
64
|
+
// Always download fresh ZIP (no caching per user request)
|
|
65
|
+
const zipUrl = `${dcsHost}/unfoldingWord/en_tw/archive/master.zip`;
|
|
66
|
+
console.log(`Downloading TW archive from ${zipUrl}...`);
|
|
207
67
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
console.log('Downloading TW archive...');
|
|
68
|
+
const res = await fetch(zipUrl);
|
|
69
|
+
if (!res.ok) throw new Error(`Failed to download ZIP: ${res.status} ${res.statusText}`);
|
|
211
70
|
|
|
212
|
-
|
|
213
|
-
if (!res.ok) throw new Error(`Failed to download ZIP: ${res.status} ${res.statusText}`);
|
|
214
|
-
|
|
215
|
-
zipBuffer = await res.arrayBuffer();
|
|
216
|
-
|
|
217
|
-
// Cache the ZIP for next time
|
|
218
|
-
await cacheZip(zipBuffer);
|
|
219
|
-
}
|
|
71
|
+
const zipBuffer = await res.arrayBuffer();
|
|
220
72
|
|
|
221
73
|
// Process ZIP to extract terms
|
|
222
74
|
console.log('Processing TW articles...');
|
|
@@ -224,64 +76,16 @@ export async function generateTWTerms() {
|
|
|
224
76
|
|
|
225
77
|
console.log(`Generated ${Object.keys(termMap).length} terms from TW archive`);
|
|
226
78
|
|
|
227
|
-
// Cache processed terms for this session
|
|
228
|
-
processedTermsCache = termMap;
|
|
229
|
-
|
|
230
79
|
return termMap;
|
|
231
80
|
}
|
|
232
81
|
|
|
233
82
|
/**
|
|
234
|
-
*
|
|
83
|
+
* Get information about the current environment for debugging
|
|
235
84
|
*/
|
|
236
|
-
export
|
|
237
|
-
|
|
238
|
-
processedTermsCache = null;
|
|
239
|
-
|
|
240
|
-
if (isBrowser) {
|
|
241
|
-
try {
|
|
242
|
-
localStorage.removeItem(CACHE_KEY);
|
|
243
|
-
console.log('Browser ZIP cache cleared');
|
|
244
|
-
return true;
|
|
245
|
-
} catch (error) {
|
|
246
|
-
console.warn('Failed to clear browser cache:', error.message);
|
|
247
|
-
return false;
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
console.log('Memory cache cleared');
|
|
252
|
-
return true;
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
/**
|
|
256
|
-
* Get cache information for debugging
|
|
257
|
-
*/
|
|
258
|
-
export function getCacheInfo() {
|
|
259
|
-
const info = {
|
|
85
|
+
export function getEnvironmentInfo() {
|
|
86
|
+
return {
|
|
260
87
|
environment: isNode ? 'Node.js' : (isBrowser ? 'Browser' : 'Unknown'),
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
termCount: 0,
|
|
264
|
-
cacheVersion: CACHE_VERSION
|
|
88
|
+
hasFetch: typeof fetch !== 'undefined',
|
|
89
|
+
hasJSZip: typeof JSZip !== 'undefined'
|
|
265
90
|
};
|
|
266
|
-
|
|
267
|
-
// Check processed terms
|
|
268
|
-
if (processedTermsCache) {
|
|
269
|
-
info.termCount = Object.keys(processedTermsCache).length;
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
// Check ZIP cache in browser
|
|
273
|
-
if (isBrowser) {
|
|
274
|
-
try {
|
|
275
|
-
const cached = localStorage.getItem(CACHE_KEY);
|
|
276
|
-
if (cached) {
|
|
277
|
-
const data = JSON.parse(cached);
|
|
278
|
-
info.hasZipCache = true;
|
|
279
|
-
info.timestamp = data.timestamp ? new Date(data.timestamp) : null;
|
|
280
|
-
}
|
|
281
|
-
} catch (error) {
|
|
282
|
-
// Ignore parse errors
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
return info;
|
|
287
91
|
}
|