twl-generator 1.0.4 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/utils/zipProcessor.js +158 -151
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "twl-generator",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.2",
|
|
4
4
|
"description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -46,7 +46,7 @@
|
|
|
46
46
|
"node": ">=16.0.0"
|
|
47
47
|
},
|
|
48
48
|
"dependencies": {
|
|
49
|
-
"
|
|
49
|
+
"jszip": "^3.10.1",
|
|
50
50
|
"node-fetch": "^3.3.2"
|
|
51
51
|
},
|
|
52
52
|
"peerDependencies": {
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Universal TWL zipProcessor - Works in both Node.js and Browser environments
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* For React.js/Browser: Uses localStorage/sessionStorage for persistent caching
|
|
4
|
+
* Caches the raw ZIP file and processes term headers on-demand
|
|
6
5
|
*
|
|
7
6
|
* Usage in React.js:
|
|
8
7
|
* import { generateTWTerms } from './utils/zipProcessor.js';
|
|
@@ -16,40 +15,78 @@ const isNode = typeof process !== 'undefined' && process.versions?.node;
|
|
|
16
15
|
const isBrowser = typeof window !== 'undefined';
|
|
17
16
|
|
|
18
17
|
const ZIP_URL = 'https://git.door43.org/unfoldingWord/en_tw/archive/master.zip';
|
|
19
|
-
const CACHE_KEY = '
|
|
18
|
+
const CACHE_KEY = 'twl_zip_cache';
|
|
20
19
|
const CACHE_VERSION = '1.0';
|
|
21
20
|
|
|
22
|
-
// In-memory cache for
|
|
23
|
-
let
|
|
21
|
+
// In-memory cache for processed terms (per session)
|
|
22
|
+
let processedTermsCache = null;
|
|
24
23
|
|
|
25
24
|
/**
|
|
26
|
-
* Get
|
|
25
|
+
* Get dependencies dynamically (JSZip works in both environments)
|
|
27
26
|
*/
|
|
28
|
-
async function
|
|
29
|
-
if (!isNode) return null;
|
|
30
|
-
|
|
27
|
+
async function getDeps() {
|
|
31
28
|
try {
|
|
32
|
-
const
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
import('path'),
|
|
36
|
-
import('url'),
|
|
37
|
-
import('adm-zip')
|
|
38
|
-
]);
|
|
39
|
-
|
|
40
|
-
return {
|
|
41
|
-
fetch: nodeModule.default,
|
|
42
|
-
fs: fsModule.default,
|
|
43
|
-
path: pathModule.default,
|
|
44
|
-
fileURLToPath: urlModule.fileURLToPath,
|
|
45
|
-
AdmZip: admZipModule.default
|
|
29
|
+
const jsZipModule = await import('jszip');
|
|
30
|
+
const deps = {
|
|
31
|
+
JSZip: jsZipModule.default
|
|
46
32
|
};
|
|
33
|
+
|
|
34
|
+
// Add Node.js-specific fetch if needed
|
|
35
|
+
if (isNode) {
|
|
36
|
+
const nodeModule = await import('node-fetch');
|
|
37
|
+
deps.fetch = nodeModule.default;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return deps;
|
|
47
41
|
} catch (error) {
|
|
48
|
-
console.error('Failed to load
|
|
42
|
+
console.error('Failed to load dependencies:', error);
|
|
49
43
|
return null;
|
|
50
44
|
}
|
|
51
45
|
}
|
|
52
46
|
|
|
47
|
+
async function getCachedZip() {
|
|
48
|
+
if (isBrowser) {
|
|
49
|
+
// Browser: Use localStorage for ZIP cache
|
|
50
|
+
try {
|
|
51
|
+
const cached = localStorage.getItem(CACHE_KEY);
|
|
52
|
+
if (cached) {
|
|
53
|
+
const data = JSON.parse(cached);
|
|
54
|
+
if (data.version === CACHE_VERSION) {
|
|
55
|
+
console.log('Using cached ZIP from browser storage');
|
|
56
|
+
return new Uint8Array(data.zipData);
|
|
57
|
+
} else {
|
|
58
|
+
localStorage.removeItem(CACHE_KEY);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
} catch (error) {
|
|
62
|
+
console.log('Browser ZIP cache corrupted, re-downloading...');
|
|
63
|
+
try { localStorage.removeItem(CACHE_KEY); } catch (e) { }
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
// Note: In Node.js we could cache to filesystem, but fresh download is fine for CLI usage
|
|
67
|
+
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Cache ZIP data in appropriate storage
|
|
73
|
+
*/
|
|
74
|
+
async function cacheZip(zipBuffer) {
|
|
75
|
+
if (isBrowser) {
|
|
76
|
+
try {
|
|
77
|
+
const cacheData = {
|
|
78
|
+
version: CACHE_VERSION,
|
|
79
|
+
timestamp: Date.now(),
|
|
80
|
+
zipData: Array.from(new Uint8Array(zipBuffer))
|
|
81
|
+
};
|
|
82
|
+
localStorage.setItem(CACHE_KEY, JSON.stringify(cacheData));
|
|
83
|
+
console.log('ZIP cached in browser storage');
|
|
84
|
+
} catch (error) {
|
|
85
|
+
console.warn('Failed to cache ZIP in browser:', error.message);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
53
90
|
/**
|
|
54
91
|
* Get browser storage (localStorage or sessionStorage)
|
|
55
92
|
*/
|
|
@@ -166,134 +203,116 @@ async function cacheTerms(termMap) {
|
|
|
166
203
|
}
|
|
167
204
|
|
|
168
205
|
/**
|
|
169
|
-
* Process
|
|
206
|
+
* Process ZIP buffer and extract term mappings
|
|
170
207
|
*/
|
|
171
|
-
function
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
208
|
+
async function processZipBuffer(zipBuffer) {
|
|
209
|
+
// Use JSZip universally for both Node.js and Browser
|
|
210
|
+
const deps = await getDeps();
|
|
211
|
+
if (!deps) throw new Error('Failed to load dependencies');
|
|
212
|
+
const { JSZip } = deps;
|
|
213
|
+
|
|
214
|
+
const zip = new JSZip();
|
|
215
|
+
const zipData = await zip.loadAsync(zipBuffer);
|
|
216
|
+
|
|
217
|
+
const entries = [];
|
|
218
|
+
zipData.forEach((relativePath, file) => {
|
|
219
|
+
if (relativePath.match(/^en_tw\/bible\/.*\/.*\.md$/) && !file.dir) {
|
|
220
|
+
entries.push({
|
|
221
|
+
entryName: relativePath,
|
|
222
|
+
getData: () => file.async('string') // Return promise for string content
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
});
|
|
175
226
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
227
|
+
entries.sort((a, b) => a.entryName.localeCompare(b.entryName));
|
|
228
|
+
|
|
229
|
+
const termMap = {};
|
|
230
|
+
|
|
231
|
+
for (const entry of entries) {
|
|
232
|
+
const content = await entry.getData(); // Await the async string content
|
|
233
|
+
const firstLine = content.split('\n')[0];
|
|
234
|
+
const terms = firstLine.replace(/^#/, '').trim().split(',').map(t => t.trim()).filter(Boolean);
|
|
235
|
+
const truncated = entry.entryName.replace('en_tw/bible/', '');
|
|
236
|
+
|
|
237
|
+
for (const term of terms) {
|
|
238
|
+
// Normalize terms by removing parentheses and spaces before them
|
|
239
|
+
// e.g., "Joseph (OT)" -> "Joseph", "Mary (sister of Martha)" -> "Mary"
|
|
240
|
+
const normalizedTerm = term.replace(/\s+\([^)]*\)$/, '').trim();
|
|
241
|
+
|
|
242
|
+
if (!termMap[normalizedTerm]) {
|
|
243
|
+
termMap[normalizedTerm] = [];
|
|
244
|
+
}
|
|
245
|
+
termMap[normalizedTerm].push(truncated);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Sort article arrays for consistent output
|
|
250
|
+
for (const term in termMap) {
|
|
251
|
+
termMap[term].sort();
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
return termMap;
|
|
183
255
|
}
|
|
184
256
|
|
|
185
257
|
export async function generateTWTerms() {
|
|
186
|
-
//
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
return
|
|
258
|
+
// Check if we already processed terms this session
|
|
259
|
+
if (processedTermsCache) {
|
|
260
|
+
console.log('Using in-memory processed terms');
|
|
261
|
+
return processedTermsCache;
|
|
190
262
|
}
|
|
191
263
|
|
|
192
|
-
|
|
264
|
+
// Try to get cached ZIP first
|
|
265
|
+
let zipBuffer = await getCachedZip();
|
|
193
266
|
|
|
194
|
-
|
|
195
|
-
//
|
|
196
|
-
|
|
267
|
+
if (!zipBuffer) {
|
|
268
|
+
// Download fresh ZIP
|
|
269
|
+
console.log('Downloading TW archive...');
|
|
197
270
|
|
|
271
|
+
let fetchFn;
|
|
198
272
|
if (isBrowser) {
|
|
199
273
|
fetchFn = window.fetch;
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
const deps = await getNodeDeps();
|
|
204
|
-
if (!deps) throw new Error('Failed to load Node.js dependencies');
|
|
205
|
-
fetchFn = deps.fetch;
|
|
206
|
-
const AdmZip = deps.AdmZip;
|
|
207
|
-
const Buffer = (await import('buffer')).Buffer;
|
|
208
|
-
processZip = (buffer) => processNodeZip(AdmZip, Buffer, buffer);
|
|
274
|
+
} else {
|
|
275
|
+
const deps = await getDeps();
|
|
276
|
+
fetchFn = deps?.fetch;
|
|
209
277
|
}
|
|
210
278
|
|
|
211
|
-
|
|
212
|
-
if (!res.ok) throw new Error(`Failed to download zip: ${res.status} ${res.statusText}`);
|
|
213
|
-
|
|
214
|
-
const buffer = await res.arrayBuffer();
|
|
215
|
-
const entries = await processZip(buffer);
|
|
216
|
-
|
|
217
|
-
console.log('Processing TW articles...');
|
|
218
|
-
entries.sort((a, b) => a.entryName.localeCompare(b.entryName));
|
|
279
|
+
if (!fetchFn) throw new Error('Fetch not available');
|
|
219
280
|
|
|
220
|
-
const
|
|
221
|
-
|
|
222
|
-
for (const entry of entries) {
|
|
223
|
-
const content = entry.getData().toString('utf8');
|
|
224
|
-
const firstLine = content.split('\n')[0];
|
|
225
|
-
const terms = firstLine.replace(/^#/, '').trim().split(',').map(t => t.trim()).filter(Boolean);
|
|
226
|
-
const truncated = entry.entryName.replace('en_tw/bible/', '');
|
|
281
|
+
const res = await fetchFn(ZIP_URL);
|
|
282
|
+
if (!res.ok) throw new Error(`Failed to download ZIP: ${res.status} ${res.statusText}`);
|
|
227
283
|
|
|
228
|
-
|
|
229
|
-
// Normalize terms by removing parentheses and spaces before them
|
|
230
|
-
// e.g., "Joseph (OT)" -> "Joseph", "Mary (sister of Martha)" -> "Mary"
|
|
231
|
-
const normalizedTerm = term.replace(/\s+\([^)]*\)$/, '').trim();
|
|
284
|
+
zipBuffer = await res.arrayBuffer();
|
|
232
285
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
termMap[normalizedTerm].push(truncated);
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
// Sort article arrays for consistent output
|
|
241
|
-
for (const term in termMap) {
|
|
242
|
-
termMap[term].sort();
|
|
243
|
-
}
|
|
286
|
+
// Cache the ZIP for next time
|
|
287
|
+
await cacheZip(zipBuffer);
|
|
288
|
+
}
|
|
244
289
|
|
|
245
|
-
|
|
290
|
+
// Process ZIP to extract terms
|
|
291
|
+
console.log('Processing TW articles...');
|
|
292
|
+
const termMap = await processZipBuffer(zipBuffer);
|
|
246
293
|
|
|
247
|
-
|
|
248
|
-
await cacheTerms(termMap);
|
|
294
|
+
console.log(`Generated ${Object.keys(termMap).length} terms from TW archive`);
|
|
249
295
|
|
|
250
|
-
|
|
296
|
+
// Cache processed terms for this session
|
|
297
|
+
processedTermsCache = termMap;
|
|
251
298
|
|
|
252
|
-
|
|
253
|
-
console.error('Error generating TW terms:', error);
|
|
254
|
-
throw error;
|
|
255
|
-
}
|
|
299
|
+
return termMap;
|
|
256
300
|
}
|
|
257
301
|
|
|
258
302
|
/**
|
|
259
|
-
* Clear cache - useful for forcing refresh
|
|
260
|
-
* @returns {Promise<boolean>} - true if cache was cleared successfully
|
|
303
|
+
* Clear cache - useful for forcing refresh
|
|
261
304
|
*/
|
|
262
305
|
export async function clearCache() {
|
|
263
306
|
// Clear in-memory cache
|
|
264
|
-
|
|
307
|
+
processedTermsCache = null;
|
|
265
308
|
|
|
266
309
|
if (isBrowser) {
|
|
267
|
-
// Clear browser storage
|
|
268
|
-
const storage = getBrowserStorage();
|
|
269
|
-
if (storage) {
|
|
270
|
-
try {
|
|
271
|
-
storage.removeItem(CACHE_KEY);
|
|
272
|
-
console.log('Browser cache cleared');
|
|
273
|
-
return true;
|
|
274
|
-
} catch (error) {
|
|
275
|
-
console.warn('Failed to clear browser cache:', error.message);
|
|
276
|
-
return false;
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
} else if (isNode) {
|
|
280
|
-
// Clear Node.js file cache
|
|
281
310
|
try {
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
286
|
-
const __dirname = path.dirname(__filename);
|
|
287
|
-
const CACHE_FILE = path.join(__dirname, '../../article_terms.json');
|
|
288
|
-
|
|
289
|
-
if (fs.existsSync(CACHE_FILE)) {
|
|
290
|
-
fs.unlinkSync(CACHE_FILE);
|
|
291
|
-
console.log('File cache cleared');
|
|
292
|
-
return true;
|
|
293
|
-
}
|
|
294
|
-
}
|
|
311
|
+
localStorage.removeItem(CACHE_KEY);
|
|
312
|
+
console.log('Browser ZIP cache cleared');
|
|
313
|
+
return true;
|
|
295
314
|
} catch (error) {
|
|
296
|
-
console.warn('Failed to clear
|
|
315
|
+
console.warn('Failed to clear browser cache:', error.message);
|
|
297
316
|
return false;
|
|
298
317
|
}
|
|
299
318
|
}
|
|
@@ -303,45 +322,33 @@ export async function clearCache() {
|
|
|
303
322
|
}
|
|
304
323
|
|
|
305
324
|
/**
|
|
306
|
-
* Get cache information for debugging
|
|
307
|
-
* @returns {Object} - cache status and info
|
|
325
|
+
* Get cache information for debugging
|
|
308
326
|
*/
|
|
309
327
|
export function getCacheInfo() {
|
|
310
328
|
const info = {
|
|
311
329
|
environment: isNode ? 'Node.js' : (isBrowser ? 'Browser' : 'Unknown'),
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
timestamp: null,
|
|
317
|
-
termCount: 0
|
|
330
|
+
hasProcessedTerms: !!processedTermsCache,
|
|
331
|
+
hasZipCache: false,
|
|
332
|
+
termCount: 0,
|
|
333
|
+
cacheVersion: CACHE_VERSION
|
|
318
334
|
};
|
|
319
335
|
|
|
320
|
-
//
|
|
321
|
-
if (
|
|
322
|
-
info.termCount = Object.keys(
|
|
336
|
+
// Check processed terms
|
|
337
|
+
if (processedTermsCache) {
|
|
338
|
+
info.termCount = Object.keys(processedTermsCache).length;
|
|
323
339
|
}
|
|
324
340
|
|
|
341
|
+
// Check ZIP cache in browser
|
|
325
342
|
if (isBrowser) {
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
const data = JSON.parse(cached);
|
|
333
|
-
info.hasPersistentCache = true;
|
|
334
|
-
info.cacheType = storage === localStorage ? 'localStorage' : 'sessionStorage';
|
|
335
|
-
info.version = data.version;
|
|
336
|
-
info.timestamp = data.timestamp ? new Date(data.timestamp) : null;
|
|
337
|
-
|
|
338
|
-
if (!info.termCount && data.terms) {
|
|
339
|
-
info.termCount = Object.keys(data.terms).length;
|
|
340
|
-
}
|
|
341
|
-
}
|
|
342
|
-
} catch (error) {
|
|
343
|
-
// Ignore parse errors
|
|
343
|
+
try {
|
|
344
|
+
const cached = localStorage.getItem(CACHE_KEY);
|
|
345
|
+
if (cached) {
|
|
346
|
+
const data = JSON.parse(cached);
|
|
347
|
+
info.hasZipCache = true;
|
|
348
|
+
info.timestamp = data.timestamp ? new Date(data.timestamp) : null;
|
|
344
349
|
}
|
|
350
|
+
} catch (error) {
|
|
351
|
+
// Ignore parse errors
|
|
345
352
|
}
|
|
346
353
|
}
|
|
347
354
|
|