twl-generator 1.0.4 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "twl-generator",
3
- "version": "1.0.4",
3
+ "version": "1.1.2",
4
4
  "description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -46,7 +46,7 @@
46
46
  "node": ">=16.0.0"
47
47
  },
48
48
  "dependencies": {
49
- "adm-zip": "^0.5.10",
49
+ "jszip": "^3.10.1",
50
50
  "node-fetch": "^3.3.2"
51
51
  },
52
52
  "peerDependencies": {
@@ -1,8 +1,7 @@
1
1
  /**
2
2
  * Universal TWL zipProcessor - Works in both Node.js and Browser environments
3
3
  *
4
- * For Node.js (CLI): Uses file system caching with article_terms.json
5
- * For React.js/Browser: Uses localStorage/sessionStorage for persistent caching
4
+ * Caches the raw ZIP file and processes term headers on-demand
6
5
  *
7
6
  * Usage in React.js:
8
7
  * import { generateTWTerms } from './utils/zipProcessor.js';
@@ -16,40 +15,78 @@ const isNode = typeof process !== 'undefined' && process.versions?.node;
16
15
  const isBrowser = typeof window !== 'undefined';
17
16
 
18
17
  const ZIP_URL = 'https://git.door43.org/unfoldingWord/en_tw/archive/master.zip';
19
- const CACHE_KEY = 'twl_article_terms';
18
+ const CACHE_KEY = 'twl_zip_cache';
20
19
  const CACHE_VERSION = '1.0';
21
20
 
22
- // In-memory cache for current session
23
- let memoryCache = null;
21
+ // In-memory cache for processed terms (per session)
22
+ let processedTermsCache = null;
24
23
 
25
24
  /**
26
- * Get Node.js dependencies dynamically
25
+ * Get dependencies dynamically (JSZip works in both environments)
27
26
  */
28
- async function getNodeDeps() {
29
- if (!isNode) return null;
30
-
27
+ async function getDeps() {
31
28
  try {
32
- const [nodeModule, fsModule, pathModule, urlModule, admZipModule] = await Promise.all([
33
- import('node-fetch'),
34
- import('fs'),
35
- import('path'),
36
- import('url'),
37
- import('adm-zip')
38
- ]);
39
-
40
- return {
41
- fetch: nodeModule.default,
42
- fs: fsModule.default,
43
- path: pathModule.default,
44
- fileURLToPath: urlModule.fileURLToPath,
45
- AdmZip: admZipModule.default
29
+ const jsZipModule = await import('jszip');
30
+ const deps = {
31
+ JSZip: jsZipModule.default
46
32
  };
33
+
34
+ // Add Node.js-specific fetch if needed
35
+ if (isNode) {
36
+ const nodeModule = await import('node-fetch');
37
+ deps.fetch = nodeModule.default;
38
+ }
39
+
40
+ return deps;
47
41
  } catch (error) {
48
- console.error('Failed to load Node.js dependencies:', error);
42
+ console.error('Failed to load dependencies:', error);
49
43
  return null;
50
44
  }
51
45
  }
52
46
 
47
+ async function getCachedZip() {
48
+ if (isBrowser) {
49
+ // Browser: Use localStorage for ZIP cache
50
+ try {
51
+ const cached = localStorage.getItem(CACHE_KEY);
52
+ if (cached) {
53
+ const data = JSON.parse(cached);
54
+ if (data.version === CACHE_VERSION) {
55
+ console.log('Using cached ZIP from browser storage');
56
+ return new Uint8Array(data.zipData);
57
+ } else {
58
+ localStorage.removeItem(CACHE_KEY);
59
+ }
60
+ }
61
+ } catch (error) {
62
+ console.log('Browser ZIP cache corrupted, re-downloading...');
63
+ try { localStorage.removeItem(CACHE_KEY); } catch (e) { }
64
+ }
65
+ }
66
+ // Note: In Node.js we could cache to filesystem, but fresh download is fine for CLI usage
67
+
68
+ return null;
69
+ }
70
+
71
+ /**
72
+ * Cache ZIP data in appropriate storage
73
+ */
74
+ async function cacheZip(zipBuffer) {
75
+ if (isBrowser) {
76
+ try {
77
+ const cacheData = {
78
+ version: CACHE_VERSION,
79
+ timestamp: Date.now(),
80
+ zipData: Array.from(new Uint8Array(zipBuffer))
81
+ };
82
+ localStorage.setItem(CACHE_KEY, JSON.stringify(cacheData));
83
+ console.log('ZIP cached in browser storage');
84
+ } catch (error) {
85
+ console.warn('Failed to cache ZIP in browser:', error.message);
86
+ }
87
+ }
88
+ }
89
+
53
90
  /**
54
91
  * Get browser storage (localStorage or sessionStorage)
55
92
  */
@@ -166,134 +203,116 @@ async function cacheTerms(termMap) {
166
203
  }
167
204
 
168
205
  /**
169
- * Process zip in Node.js environment using AdmZip
206
+ * Process ZIP buffer and extract term mappings
170
207
  */
171
- function processNodeZip(AdmZip, Buffer, buffer) {
172
- const zip = new AdmZip(Buffer.from(buffer));
173
- return zip.getEntries().filter(e => e.entryName.match(/^en_tw\/bible\/.*\/.*\.md$/));
174
- }
208
+ async function processZipBuffer(zipBuffer) {
209
+ // Use JSZip universally for both Node.js and Browser
210
+ const deps = await getDeps();
211
+ if (!deps) throw new Error('Failed to load dependencies');
212
+ const { JSZip } = deps;
213
+
214
+ const zip = new JSZip();
215
+ const zipData = await zip.loadAsync(zipBuffer);
216
+
217
+ const entries = [];
218
+ zipData.forEach((relativePath, file) => {
219
+ if (relativePath.match(/^en_tw\/bible\/.*\/.*\.md$/) && !file.dir) {
220
+ entries.push({
221
+ entryName: relativePath,
222
+ getData: () => file.async('string') // Return promise for string content
223
+ });
224
+ }
225
+ });
175
226
 
176
- /**
177
- * Process zip in browser environment using native APIs
178
- */
179
- async function processBrowserZip(buffer) {
180
- // For browser compatibility, we'll throw an error for now
181
- // This would require a browser-compatible zip library or different approach
182
- throw new Error('ZIP processing in browser requires cached terms. Please ensure article_terms.json is available or use Node.js environment for initial processing.');
227
+ entries.sort((a, b) => a.entryName.localeCompare(b.entryName));
228
+
229
+ const termMap = {};
230
+
231
+ for (const entry of entries) {
232
+ const content = await entry.getData(); // Await the async string content
233
+ const firstLine = content.split('\n')[0];
234
+ const terms = firstLine.replace(/^#/, '').trim().split(',').map(t => t.trim()).filter(Boolean);
235
+ const truncated = entry.entryName.replace('en_tw/bible/', '');
236
+
237
+ for (const term of terms) {
238
+ // Normalize terms by removing parentheses and spaces before them
239
+ // e.g., "Joseph (OT)" -> "Joseph", "Mary (sister of Martha)" -> "Mary"
240
+ const normalizedTerm = term.replace(/\s+\([^)]*\)$/, '').trim();
241
+
242
+ if (!termMap[normalizedTerm]) {
243
+ termMap[normalizedTerm] = [];
244
+ }
245
+ termMap[normalizedTerm].push(truncated);
246
+ }
247
+ }
248
+
249
+ // Sort article arrays for consistent output
250
+ for (const term in termMap) {
251
+ termMap[term].sort();
252
+ }
253
+
254
+ return termMap;
183
255
  }
184
256
 
185
257
  export async function generateTWTerms() {
186
- // Try to get cached terms first
187
- const cachedTerms = await getCachedTerms();
188
- if (cachedTerms) {
189
- return cachedTerms;
258
+ // Check if we already processed terms this session
259
+ if (processedTermsCache) {
260
+ console.log('Using in-memory processed terms');
261
+ return processedTermsCache;
190
262
  }
191
263
 
192
- console.log('Downloading TW archive...');
264
+ // Try to get cached ZIP first
265
+ let zipBuffer = await getCachedZip();
193
266
 
194
- try {
195
- // Get appropriate fetch function and zip processing
196
- let fetchFn, processZip;
267
+ if (!zipBuffer) {
268
+ // Download fresh ZIP
269
+ console.log('Downloading TW archive...');
197
270
 
271
+ let fetchFn;
198
272
  if (isBrowser) {
199
273
  fetchFn = window.fetch;
200
- // Use browser-compatible zip processing
201
- processZip = (buffer) => processBrowserZip(buffer);
202
- } else if (isNode) {
203
- const deps = await getNodeDeps();
204
- if (!deps) throw new Error('Failed to load Node.js dependencies');
205
- fetchFn = deps.fetch;
206
- const AdmZip = deps.AdmZip;
207
- const Buffer = (await import('buffer')).Buffer;
208
- processZip = (buffer) => processNodeZip(AdmZip, Buffer, buffer);
274
+ } else {
275
+ const deps = await getDeps();
276
+ fetchFn = deps?.fetch;
209
277
  }
210
278
 
211
- const res = await fetchFn(ZIP_URL);
212
- if (!res.ok) throw new Error(`Failed to download zip: ${res.status} ${res.statusText}`);
213
-
214
- const buffer = await res.arrayBuffer();
215
- const entries = await processZip(buffer);
216
-
217
- console.log('Processing TW articles...');
218
- entries.sort((a, b) => a.entryName.localeCompare(b.entryName));
279
+ if (!fetchFn) throw new Error('Fetch not available');
219
280
 
220
- const termMap = {};
221
-
222
- for (const entry of entries) {
223
- const content = entry.getData().toString('utf8');
224
- const firstLine = content.split('\n')[0];
225
- const terms = firstLine.replace(/^#/, '').trim().split(',').map(t => t.trim()).filter(Boolean);
226
- const truncated = entry.entryName.replace('en_tw/bible/', '');
281
+ const res = await fetchFn(ZIP_URL);
282
+ if (!res.ok) throw new Error(`Failed to download ZIP: ${res.status} ${res.statusText}`);
227
283
 
228
- for (const term of terms) {
229
- // Normalize terms by removing parentheses and spaces before them
230
- // e.g., "Joseph (OT)" -> "Joseph", "Mary (sister of Martha)" -> "Mary"
231
- const normalizedTerm = term.replace(/\s+\([^)]*\)$/, '').trim();
284
+ zipBuffer = await res.arrayBuffer();
232
285
 
233
- if (!termMap[normalizedTerm]) {
234
- termMap[normalizedTerm] = [];
235
- }
236
- termMap[normalizedTerm].push(truncated);
237
- }
238
- }
239
-
240
- // Sort article arrays for consistent output
241
- for (const term in termMap) {
242
- termMap[term].sort();
243
- }
286
+ // Cache the ZIP for next time
287
+ await cacheZip(zipBuffer);
288
+ }
244
289
 
245
- console.log(`Generated ${Object.keys(termMap).length} terms from TW archive`);
290
+ // Process ZIP to extract terms
291
+ console.log('Processing TW articles...');
292
+ const termMap = await processZipBuffer(zipBuffer);
246
293
 
247
- // Cache the results
248
- await cacheTerms(termMap);
294
+ console.log(`Generated ${Object.keys(termMap).length} terms from TW archive`);
249
295
 
250
- return termMap;
296
+ // Cache processed terms for this session
297
+ processedTermsCache = termMap;
251
298
 
252
- } catch (error) {
253
- console.error('Error generating TW terms:', error);
254
- throw error;
255
- }
299
+ return termMap;
256
300
  }
257
301
 
258
302
  /**
259
- * Clear cache - useful for forcing refresh in React.js apps
260
- * @returns {Promise<boolean>} - true if cache was cleared successfully
303
+ * Clear cache - useful for forcing refresh
261
304
  */
262
305
  export async function clearCache() {
263
306
  // Clear in-memory cache
264
- memoryCache = null;
307
+ processedTermsCache = null;
265
308
 
266
309
  if (isBrowser) {
267
- // Clear browser storage
268
- const storage = getBrowserStorage();
269
- if (storage) {
270
- try {
271
- storage.removeItem(CACHE_KEY);
272
- console.log('Browser cache cleared');
273
- return true;
274
- } catch (error) {
275
- console.warn('Failed to clear browser cache:', error.message);
276
- return false;
277
- }
278
- }
279
- } else if (isNode) {
280
- // Clear Node.js file cache
281
310
  try {
282
- const deps = await getNodeDeps();
283
- if (deps) {
284
- const { fs, path, fileURLToPath } = deps;
285
- const __filename = fileURLToPath(import.meta.url);
286
- const __dirname = path.dirname(__filename);
287
- const CACHE_FILE = path.join(__dirname, '../../article_terms.json');
288
-
289
- if (fs.existsSync(CACHE_FILE)) {
290
- fs.unlinkSync(CACHE_FILE);
291
- console.log('File cache cleared');
292
- return true;
293
- }
294
- }
311
+ localStorage.removeItem(CACHE_KEY);
312
+ console.log('Browser ZIP cache cleared');
313
+ return true;
295
314
  } catch (error) {
296
- console.warn('Failed to clear file cache:', error.message);
315
+ console.warn('Failed to clear browser cache:', error.message);
297
316
  return false;
298
317
  }
299
318
  }
@@ -303,45 +322,33 @@ export async function clearCache() {
303
322
  }
304
323
 
305
324
  /**
306
- * Get cache information for debugging - useful in React.js development
307
- * @returns {Object} - cache status and info
325
+ * Get cache information for debugging
308
326
  */
309
327
  export function getCacheInfo() {
310
328
  const info = {
311
329
  environment: isNode ? 'Node.js' : (isBrowser ? 'Browser' : 'Unknown'),
312
- hasMemoryCache: !!memoryCache,
313
- hasPersistentCache: false,
314
- cacheType: null,
315
- version: null,
316
- timestamp: null,
317
- termCount: 0
330
+ hasProcessedTerms: !!processedTermsCache,
331
+ hasZipCache: false,
332
+ termCount: 0,
333
+ cacheVersion: CACHE_VERSION
318
334
  };
319
335
 
320
- // Memory cache info
321
- if (memoryCache) {
322
- info.termCount = Object.keys(memoryCache).length;
336
+ // Check processed terms
337
+ if (processedTermsCache) {
338
+ info.termCount = Object.keys(processedTermsCache).length;
323
339
  }
324
340
 
341
+ // Check ZIP cache in browser
325
342
  if (isBrowser) {
326
- // Browser cache info
327
- const storage = getBrowserStorage();
328
- if (storage) {
329
- try {
330
- const cached = storage.getItem(CACHE_KEY);
331
- if (cached) {
332
- const data = JSON.parse(cached);
333
- info.hasPersistentCache = true;
334
- info.cacheType = storage === localStorage ? 'localStorage' : 'sessionStorage';
335
- info.version = data.version;
336
- info.timestamp = data.timestamp ? new Date(data.timestamp) : null;
337
-
338
- if (!info.termCount && data.terms) {
339
- info.termCount = Object.keys(data.terms).length;
340
- }
341
- }
342
- } catch (error) {
343
- // Ignore parse errors
343
+ try {
344
+ const cached = localStorage.getItem(CACHE_KEY);
345
+ if (cached) {
346
+ const data = JSON.parse(cached);
347
+ info.hasZipCache = true;
348
+ info.timestamp = data.timestamp ? new Date(data.timestamp) : null;
344
349
  }
350
+ } catch (error) {
351
+ // Ignore parse errors
345
352
  }
346
353
  }
347
354