twl-generator 1.0.4 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "twl-generator",
3
- "version": "1.0.4",
3
+ "version": "1.1.0",
4
4
  "description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,8 +1,7 @@
1
1
  /**
2
2
  * Universal TWL zipProcessor - Works in both Node.js and Browser environments
3
3
  *
4
- * For Node.js (CLI): Uses file system caching with article_terms.json
5
- * For React.js/Browser: Uses localStorage/sessionStorage for persistent caching
4
+ * Caches the raw ZIP file and processes term headers on-demand
6
5
  *
7
6
  * Usage in React.js:
8
7
  * import { generateTWTerms } from './utils/zipProcessor.js';
@@ -16,11 +15,11 @@ const isNode = typeof process !== 'undefined' && process.versions?.node;
16
15
  const isBrowser = typeof window !== 'undefined';
17
16
 
18
17
  const ZIP_URL = 'https://git.door43.org/unfoldingWord/en_tw/archive/master.zip';
19
- const CACHE_KEY = 'twl_article_terms';
18
+ const CACHE_KEY = 'twl_zip_cache';
20
19
  const CACHE_VERSION = '1.0';
21
20
 
22
- // In-memory cache for current session
23
- let memoryCache = null;
21
+ // In-memory cache for processed terms (per session)
22
+ let processedTermsCache = null;
24
23
 
25
24
  /**
26
25
  * Get Node.js dependencies dynamically
@@ -29,19 +28,13 @@ async function getNodeDeps() {
29
28
  if (!isNode) return null;
30
29
 
31
30
  try {
32
- const [nodeModule, fsModule, pathModule, urlModule, admZipModule] = await Promise.all([
31
+ const [nodeModule, admZipModule] = await Promise.all([
33
32
  import('node-fetch'),
34
- import('fs'),
35
- import('path'),
36
- import('url'),
37
33
  import('adm-zip')
38
34
  ]);
39
35
 
40
36
  return {
41
37
  fetch: nodeModule.default,
42
- fs: fsModule.default,
43
- path: pathModule.default,
44
- fileURLToPath: urlModule.fileURLToPath,
45
38
  AdmZip: admZipModule.default
46
39
  };
47
40
  } catch (error) {
@@ -50,6 +43,52 @@ async function getNodeDeps() {
50
43
  }
51
44
  }
52
45
 
46
+ /**
47
+ * Get cached ZIP data from appropriate storage
48
+ */
49
+ async function getCachedZip() {
50
+ if (isBrowser) {
51
+ // Browser: Use localStorage for ZIP cache
52
+ try {
53
+ const cached = localStorage.getItem(CACHE_KEY);
54
+ if (cached) {
55
+ const data = JSON.parse(cached);
56
+ if (data.version === CACHE_VERSION) {
57
+ console.log('Using cached ZIP from browser storage');
58
+ return new Uint8Array(data.zipData);
59
+ } else {
60
+ localStorage.removeItem(CACHE_KEY);
61
+ }
62
+ }
63
+ } catch (error) {
64
+ console.log('Browser ZIP cache corrupted, re-downloading...');
65
+ try { localStorage.removeItem(CACHE_KEY); } catch (e) { }
66
+ }
67
+ }
68
+ // Note: In Node.js we could cache to filesystem, but fresh download is fine for CLI usage
69
+
70
+ return null;
71
+ }
72
+
73
+ /**
74
+ * Cache ZIP data in appropriate storage
75
+ */
76
+ async function cacheZip(zipBuffer) {
77
+ if (isBrowser) {
78
+ try {
79
+ const cacheData = {
80
+ version: CACHE_VERSION,
81
+ timestamp: Date.now(),
82
+ zipData: Array.from(new Uint8Array(zipBuffer))
83
+ };
84
+ localStorage.setItem(CACHE_KEY, JSON.stringify(cacheData));
85
+ console.log('ZIP cached in browser storage');
86
+ } catch (error) {
87
+ console.warn('Failed to cache ZIP in browser:', error.message);
88
+ }
89
+ }
90
+ }
91
+
53
92
  /**
54
93
  * Get browser storage (localStorage or sessionStorage)
55
94
  */
@@ -166,134 +205,107 @@ async function cacheTerms(termMap) {
166
205
  }
167
206
 
168
207
  /**
169
- * Process zip in Node.js environment using AdmZip
208
+ * Process ZIP buffer and extract term mappings
170
209
  */
171
- function processNodeZip(AdmZip, Buffer, buffer) {
172
- const zip = new AdmZip(Buffer.from(buffer));
173
- return zip.getEntries().filter(e => e.entryName.match(/^en_tw\/bible\/.*\/.*\.md$/));
174
- }
210
+ async function processZipBuffer(zipBuffer) {
211
+ let zip;
212
+
213
+ if (isNode) {
214
+ // Node.js: Use adm-zip
215
+ const deps = await getNodeDeps();
216
+ if (!deps) throw new Error('Failed to load Node.js dependencies');
217
+ const { AdmZip } = deps;
218
+ const Buffer = (await import('buffer')).Buffer;
219
+ zip = new AdmZip(Buffer.from(zipBuffer));
220
+ } else {
221
+ // Browser: Use browser-compatible zip processing
222
+ // For now, we'll use a simple approach that works with the TW archive structure
223
+ throw new Error('Browser ZIP processing not yet implemented. Use cached data or run initial processing in Node.js.');
224
+ }
175
225
 
176
- /**
177
- * Process zip in browser environment using native APIs
178
- */
179
- async function processBrowserZip(buffer) {
180
- // For browser compatibility, we'll throw an error for now
181
- // This would require a browser-compatible zip library or different approach
182
- throw new Error('ZIP processing in browser requires cached terms. Please ensure article_terms.json is available or use Node.js environment for initial processing.');
183
- }
226
+ const entries = zip.getEntries().filter(e => e.entryName.match(/^en_tw\/bible\/.*\/.*\.md$/));
227
+ entries.sort((a, b) => a.entryName.localeCompare(b.entryName));
184
228
 
185
- export async function generateTWTerms() {
186
- // Try to get cached terms first
187
- const cachedTerms = await getCachedTerms();
188
- if (cachedTerms) {
189
- return cachedTerms;
190
- }
229
+ const termMap = {};
191
230
 
192
- console.log('Downloading TW archive...');
231
+ for (const entry of entries) {
232
+ const content = entry.getData().toString('utf8');
233
+ const firstLine = content.split('\n')[0];
234
+ const terms = firstLine.replace(/^#/, '').trim().split(',').map(t => t.trim()).filter(Boolean);
235
+ const truncated = entry.entryName.replace('en_tw/bible/', '');
193
236
 
194
- try {
195
- // Get appropriate fetch function and zip processing
196
- let fetchFn, processZip;
197
-
198
- if (isBrowser) {
199
- fetchFn = window.fetch;
200
- // Use browser-compatible zip processing
201
- processZip = (buffer) => processBrowserZip(buffer);
202
- } else if (isNode) {
203
- const deps = await getNodeDeps();
204
- if (!deps) throw new Error('Failed to load Node.js dependencies');
205
- fetchFn = deps.fetch;
206
- const AdmZip = deps.AdmZip;
207
- const Buffer = (await import('buffer')).Buffer;
208
- processZip = (buffer) => processNodeZip(AdmZip, Buffer, buffer);
237
+ for (const term of terms) {
238
+ // Normalize terms by removing parentheses and spaces before them
239
+ // e.g., "Joseph (OT)" -> "Joseph", "Mary (sister of Martha)" -> "Mary"
240
+ const normalizedTerm = term.replace(/\s+\([^)]*\)$/, '').trim();
241
+
242
+ if (!termMap[normalizedTerm]) {
243
+ termMap[normalizedTerm] = [];
244
+ }
245
+ termMap[normalizedTerm].push(truncated);
209
246
  }
247
+ }
210
248
 
211
- const res = await fetchFn(ZIP_URL);
212
- if (!res.ok) throw new Error(`Failed to download zip: ${res.status} ${res.statusText}`);
249
+ // Sort article arrays for consistent output
250
+ for (const term in termMap) {
251
+ termMap[term].sort();
252
+ }
213
253
 
214
- const buffer = await res.arrayBuffer();
215
- const entries = await processZip(buffer);
254
+ return termMap;
255
+ }
216
256
 
217
- console.log('Processing TW articles...');
218
- entries.sort((a, b) => a.entryName.localeCompare(b.entryName));
257
+ export async function generateTWTerms() {
258
+ // Check if we already processed terms this session
259
+ if (processedTermsCache) {
260
+ console.log('Using in-memory processed terms');
261
+ return processedTermsCache;
262
+ }
219
263
 
220
- const termMap = {};
264
+ // Try to get cached ZIP first
265
+ let zipBuffer = await getCachedZip();
221
266
 
222
- for (const entry of entries) {
223
- const content = entry.getData().toString('utf8');
224
- const firstLine = content.split('\n')[0];
225
- const terms = firstLine.replace(/^#/, '').trim().split(',').map(t => t.trim()).filter(Boolean);
226
- const truncated = entry.entryName.replace('en_tw/bible/', '');
267
+ if (!zipBuffer) {
268
+ // Download fresh ZIP
269
+ console.log('Downloading TW archive...');
227
270
 
228
- for (const term of terms) {
229
- // Normalize terms by removing parentheses and spaces before them
230
- // e.g., "Joseph (OT)" -> "Joseph", "Mary (sister of Martha)" -> "Mary"
231
- const normalizedTerm = term.replace(/\s+\([^)]*\)$/, '').trim();
271
+ const fetchFn = isBrowser ? window.fetch : (await getNodeDeps())?.fetch;
272
+ if (!fetchFn) throw new Error('Fetch not available');
232
273
 
233
- if (!termMap[normalizedTerm]) {
234
- termMap[normalizedTerm] = [];
235
- }
236
- termMap[normalizedTerm].push(truncated);
237
- }
238
- }
274
+ const res = await fetchFn(ZIP_URL);
275
+ if (!res.ok) throw new Error(`Failed to download ZIP: ${res.status} ${res.statusText}`);
239
276
 
240
- // Sort article arrays for consistent output
241
- for (const term in termMap) {
242
- termMap[term].sort();
243
- }
277
+ zipBuffer = await res.arrayBuffer();
278
+
279
+ // Cache the ZIP for next time
280
+ await cacheZip(zipBuffer);
281
+ }
244
282
 
245
- console.log(`Generated ${Object.keys(termMap).length} terms from TW archive`);
283
+ // Process ZIP to extract terms
284
+ console.log('Processing TW articles...');
285
+ const termMap = await processZipBuffer(zipBuffer);
246
286
 
247
- // Cache the results
248
- await cacheTerms(termMap);
287
+ console.log(`Generated ${Object.keys(termMap).length} terms from TW archive`);
249
288
 
250
- return termMap;
289
+ // Cache processed terms for this session
290
+ processedTermsCache = termMap;
251
291
 
252
- } catch (error) {
253
- console.error('Error generating TW terms:', error);
254
- throw error;
255
- }
292
+ return termMap;
256
293
  }
257
294
 
258
295
  /**
259
- * Clear cache - useful for forcing refresh in React.js apps
260
- * @returns {Promise<boolean>} - true if cache was cleared successfully
296
+ * Clear cache - useful for forcing refresh
261
297
  */
262
298
  export async function clearCache() {
263
299
  // Clear in-memory cache
264
- memoryCache = null;
300
+ processedTermsCache = null;
265
301
 
266
302
  if (isBrowser) {
267
- // Clear browser storage
268
- const storage = getBrowserStorage();
269
- if (storage) {
270
- try {
271
- storage.removeItem(CACHE_KEY);
272
- console.log('Browser cache cleared');
273
- return true;
274
- } catch (error) {
275
- console.warn('Failed to clear browser cache:', error.message);
276
- return false;
277
- }
278
- }
279
- } else if (isNode) {
280
- // Clear Node.js file cache
281
303
  try {
282
- const deps = await getNodeDeps();
283
- if (deps) {
284
- const { fs, path, fileURLToPath } = deps;
285
- const __filename = fileURLToPath(import.meta.url);
286
- const __dirname = path.dirname(__filename);
287
- const CACHE_FILE = path.join(__dirname, '../../article_terms.json');
288
-
289
- if (fs.existsSync(CACHE_FILE)) {
290
- fs.unlinkSync(CACHE_FILE);
291
- console.log('File cache cleared');
292
- return true;
293
- }
294
- }
304
+ localStorage.removeItem(CACHE_KEY);
305
+ console.log('Browser ZIP cache cleared');
306
+ return true;
295
307
  } catch (error) {
296
- console.warn('Failed to clear file cache:', error.message);
308
+ console.warn('Failed to clear browser cache:', error.message);
297
309
  return false;
298
310
  }
299
311
  }
@@ -303,45 +315,33 @@ export async function clearCache() {
303
315
  }
304
316
 
305
317
  /**
306
- * Get cache information for debugging - useful in React.js development
307
- * @returns {Object} - cache status and info
318
+ * Get cache information for debugging
308
319
  */
309
320
  export function getCacheInfo() {
310
321
  const info = {
311
322
  environment: isNode ? 'Node.js' : (isBrowser ? 'Browser' : 'Unknown'),
312
- hasMemoryCache: !!memoryCache,
313
- hasPersistentCache: false,
314
- cacheType: null,
315
- version: null,
316
- timestamp: null,
317
- termCount: 0
323
+ hasProcessedTerms: !!processedTermsCache,
324
+ hasZipCache: false,
325
+ termCount: 0,
326
+ cacheVersion: CACHE_VERSION
318
327
  };
319
328
 
320
- // Memory cache info
321
- if (memoryCache) {
322
- info.termCount = Object.keys(memoryCache).length;
329
+ // Check processed terms
330
+ if (processedTermsCache) {
331
+ info.termCount = Object.keys(processedTermsCache).length;
323
332
  }
324
333
 
334
+ // Check ZIP cache in browser
325
335
  if (isBrowser) {
326
- // Browser cache info
327
- const storage = getBrowserStorage();
328
- if (storage) {
329
- try {
330
- const cached = storage.getItem(CACHE_KEY);
331
- if (cached) {
332
- const data = JSON.parse(cached);
333
- info.hasPersistentCache = true;
334
- info.cacheType = storage === localStorage ? 'localStorage' : 'sessionStorage';
335
- info.version = data.version;
336
- info.timestamp = data.timestamp ? new Date(data.timestamp) : null;
337
-
338
- if (!info.termCount && data.terms) {
339
- info.termCount = Object.keys(data.terms).length;
340
- }
341
- }
342
- } catch (error) {
343
- // Ignore parse errors
336
+ try {
337
+ const cached = localStorage.getItem(CACHE_KEY);
338
+ if (cached) {
339
+ const data = JSON.parse(cached);
340
+ info.hasZipCache = true;
341
+ info.timestamp = data.timestamp ? new Date(data.timestamp) : null;
344
342
  }
343
+ } catch (error) {
344
+ // Ignore parse errors
345
345
  }
346
346
  }
347
347