twl-generator 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "twl-generator",
3
- "version": "1.0.3",
3
+ "version": "1.1.0",
4
4
  "description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,15 +1,13 @@
1
1
  /**
2
2
  * Universal TWL zipProcessor - Works in both Node.js and Browser environments
3
3
  *
4
- * For Node.js (CLI): Uses file system caching with article_terms.json
5
- * For React.js/Browser: Uses localStorage/sessionStorage for persistent caching
4
+ * Caches the raw ZIP file and processes term headers on-demand
6
5
  *
7
6
  * Usage in React.js:
8
7
  * import { generateTWTerms } from './utils/zipProcessor.js';
9
8
  * const terms = await generateTWTerms();
10
9
  */
11
10
 
12
- import AdmZip from 'adm-zip';
13
11
  import { BibleBookData } from '../common/books.js';
14
12
 
15
13
  // Environment detection
@@ -17,11 +15,11 @@ const isNode = typeof process !== 'undefined' && process.versions?.node;
17
15
  const isBrowser = typeof window !== 'undefined';
18
16
 
19
17
  const ZIP_URL = 'https://git.door43.org/unfoldingWord/en_tw/archive/master.zip';
20
- const CACHE_KEY = 'twl_article_terms';
18
+ const CACHE_KEY = 'twl_zip_cache';
21
19
  const CACHE_VERSION = '1.0';
22
20
 
23
- // In-memory cache for current session
24
- let memoryCache = null;
21
+ // In-memory cache for processed terms (per session)
22
+ let processedTermsCache = null;
25
23
 
26
24
  /**
27
25
  * Get Node.js dependencies dynamically
@@ -30,18 +28,14 @@ async function getNodeDeps() {
30
28
  if (!isNode) return null;
31
29
 
32
30
  try {
33
- const [nodeModule, fsModule, pathModule, urlModule] = await Promise.all([
31
+ const [nodeModule, admZipModule] = await Promise.all([
34
32
  import('node-fetch'),
35
- import('fs'),
36
- import('path'),
37
- import('url')
33
+ import('adm-zip')
38
34
  ]);
39
35
 
40
36
  return {
41
37
  fetch: nodeModule.default,
42
- fs: fsModule.default,
43
- path: pathModule.default,
44
- fileURLToPath: urlModule.fileURLToPath
38
+ AdmZip: admZipModule.default
45
39
  };
46
40
  } catch (error) {
47
41
  console.error('Failed to load Node.js dependencies:', error);
@@ -49,6 +43,52 @@ async function getNodeDeps() {
49
43
  }
50
44
  }
51
45
 
46
+ /**
47
+ * Get cached ZIP data from appropriate storage
48
+ */
49
+ async function getCachedZip() {
50
+ if (isBrowser) {
51
+ // Browser: Use localStorage for ZIP cache
52
+ try {
53
+ const cached = localStorage.getItem(CACHE_KEY);
54
+ if (cached) {
55
+ const data = JSON.parse(cached);
56
+ if (data.version === CACHE_VERSION) {
57
+ console.log('Using cached ZIP from browser storage');
58
+ return new Uint8Array(data.zipData);
59
+ } else {
60
+ localStorage.removeItem(CACHE_KEY);
61
+ }
62
+ }
63
+ } catch (error) {
64
+ console.log('Browser ZIP cache corrupted, re-downloading...');
65
+ try { localStorage.removeItem(CACHE_KEY); } catch (e) { }
66
+ }
67
+ }
68
+ // Note: In Node.js we could cache to filesystem, but fresh download is fine for CLI usage
69
+
70
+ return null;
71
+ }
72
+
73
+ /**
74
+ * Cache ZIP data in appropriate storage
75
+ */
76
+ async function cacheZip(zipBuffer) {
77
+ if (isBrowser) {
78
+ try {
79
+ const cacheData = {
80
+ version: CACHE_VERSION,
81
+ timestamp: Date.now(),
82
+ zipData: Array.from(new Uint8Array(zipBuffer))
83
+ };
84
+ localStorage.setItem(CACHE_KEY, JSON.stringify(cacheData));
85
+ console.log('ZIP cached in browser storage');
86
+ } catch (error) {
87
+ console.warn('Failed to cache ZIP in browser:', error.message);
88
+ }
89
+ }
90
+ }
91
+
52
92
  /**
53
93
  * Get browser storage (localStorage or sessionStorage)
54
94
  */
@@ -164,114 +204,108 @@ async function cacheTerms(termMap) {
164
204
  }
165
205
  }
166
206
 
167
- export async function generateTWTerms() {
168
- // Try to get cached terms first
169
- const cachedTerms = await getCachedTerms();
170
- if (cachedTerms) {
171
- return cachedTerms;
207
+ /**
208
+ * Process ZIP buffer and extract term mappings
209
+ */
210
+ async function processZipBuffer(zipBuffer) {
211
+ let zip;
212
+
213
+ if (isNode) {
214
+ // Node.js: Use adm-zip
215
+ const deps = await getNodeDeps();
216
+ if (!deps) throw new Error('Failed to load Node.js dependencies');
217
+ const { AdmZip } = deps;
218
+ const Buffer = (await import('buffer')).Buffer;
219
+ zip = new AdmZip(Buffer.from(zipBuffer));
220
+ } else {
221
+ // Browser: Use browser-compatible zip processing
222
+ // For now, we'll use a simple approach that works with the TW archive structure
223
+ throw new Error('Browser ZIP processing not yet implemented. Use cached data or run initial processing in Node.js.');
172
224
  }
173
225
 
174
- console.log('Downloading TW archive...');
226
+ const entries = zip.getEntries().filter(e => e.entryName.match(/^en_tw\/bible\/.*\/.*\.md$/));
227
+ entries.sort((a, b) => a.entryName.localeCompare(b.entryName));
175
228
 
176
- try {
177
- // Get appropriate fetch function
178
- let fetchFn;
179
- if (isBrowser) {
180
- fetchFn = window.fetch;
181
- } else if (isNode) {
182
- const deps = await getNodeDeps();
183
- fetchFn = deps?.fetch;
184
- if (!fetchFn) throw new Error('Failed to load Node.js dependencies');
229
+ const termMap = {};
230
+
231
+ for (const entry of entries) {
232
+ const content = entry.getData().toString('utf8');
233
+ const firstLine = content.split('\n')[0];
234
+ const terms = firstLine.replace(/^#/, '').trim().split(',').map(t => t.trim()).filter(Boolean);
235
+ const truncated = entry.entryName.replace('en_tw/bible/', '');
236
+
237
+ for (const term of terms) {
238
+ // Normalize terms by removing parentheses and spaces before them
239
+ // e.g., "Joseph (OT)" -> "Joseph", "Mary (sister of Martha)" -> "Mary"
240
+ const normalizedTerm = term.replace(/\s+\([^)]*\)$/, '').trim();
241
+
242
+ if (!termMap[normalizedTerm]) {
243
+ termMap[normalizedTerm] = [];
244
+ }
245
+ termMap[normalizedTerm].push(truncated);
185
246
  }
247
+ }
186
248
 
187
- const res = await fetchFn(ZIP_URL);
188
- if (!res.ok) throw new Error(`Failed to download zip: ${res.status} ${res.statusText}`);
249
+ // Sort article arrays for consistent output
250
+ for (const term in termMap) {
251
+ termMap[term].sort();
252
+ }
189
253
 
190
- const buffer = await res.arrayBuffer();
191
- const zip = new AdmZip(Buffer.from(buffer));
254
+ return termMap;
255
+ }
192
256
 
193
- console.log('Processing TW articles...');
257
+ export async function generateTWTerms() {
258
+ // Check if we already processed terms this session
259
+ if (processedTermsCache) {
260
+ console.log('Using in-memory processed terms');
261
+ return processedTermsCache;
262
+ }
194
263
 
195
- const entries = zip.getEntries().filter(e => e.entryName.match(/^en_tw\/bible\/.*\/.*\.md$/));
196
- entries.sort((a, b) => a.entryName.localeCompare(b.entryName));
264
+ // Try to get cached ZIP first
265
+ let zipBuffer = await getCachedZip();
197
266
 
198
- const termMap = {};
267
+ if (!zipBuffer) {
268
+ // Download fresh ZIP
269
+ console.log('Downloading TW archive...');
199
270
 
200
- for (const entry of entries) {
201
- const content = entry.getData().toString('utf8');
202
- const firstLine = content.split('\n')[0];
203
- const terms = firstLine.replace(/^#/, '').trim().split(',').map(t => t.trim()).filter(Boolean);
204
- const truncated = entry.entryName.replace('en_tw/bible/', '');
271
+ const fetchFn = isBrowser ? window.fetch : (await getNodeDeps())?.fetch;
272
+ if (!fetchFn) throw new Error('Fetch not available');
205
273
 
206
- for (const term of terms) {
207
- // Normalize terms by removing parentheses and spaces before them
208
- // e.g., "Joseph (OT)" -> "Joseph", "Mary (sister of Martha)" -> "Mary"
209
- const normalizedTerm = term.replace(/\s+\([^)]*\)$/, '').trim();
274
+ const res = await fetchFn(ZIP_URL);
275
+ if (!res.ok) throw new Error(`Failed to download ZIP: ${res.status} ${res.statusText}`);
210
276
 
211
- if (!termMap[normalizedTerm]) {
212
- termMap[normalizedTerm] = [];
213
- }
214
- termMap[normalizedTerm].push(truncated);
215
- }
216
- }
277
+ zipBuffer = await res.arrayBuffer();
217
278
 
218
- // Sort article arrays for consistent output
219
- for (const term in termMap) {
220
- termMap[term].sort();
221
- }
279
+ // Cache the ZIP for next time
280
+ await cacheZip(zipBuffer);
281
+ }
222
282
 
223
- console.log(`Generated ${Object.keys(termMap).length} terms from TW archive`);
283
+ // Process ZIP to extract terms
284
+ console.log('Processing TW articles...');
285
+ const termMap = await processZipBuffer(zipBuffer);
224
286
 
225
- // Cache the results
226
- await cacheTerms(termMap);
287
+ console.log(`Generated ${Object.keys(termMap).length} terms from TW archive`);
227
288
 
228
- return termMap;
289
+ // Cache processed terms for this session
290
+ processedTermsCache = termMap;
229
291
 
230
- } catch (error) {
231
- console.error('Error generating TW terms:', error);
232
- throw error;
233
- }
292
+ return termMap;
234
293
  }
235
294
 
236
295
  /**
237
- * Clear cache - useful for forcing refresh in React.js apps
238
- * @returns {Promise<boolean>} - true if cache was cleared successfully
296
+ * Clear cache - useful for forcing refresh
239
297
  */
240
298
  export async function clearCache() {
241
299
  // Clear in-memory cache
242
- memoryCache = null;
300
+ processedTermsCache = null;
243
301
 
244
302
  if (isBrowser) {
245
- // Clear browser storage
246
- const storage = getBrowserStorage();
247
- if (storage) {
248
- try {
249
- storage.removeItem(CACHE_KEY);
250
- console.log('Browser cache cleared');
251
- return true;
252
- } catch (error) {
253
- console.warn('Failed to clear browser cache:', error.message);
254
- return false;
255
- }
256
- }
257
- } else if (isNode) {
258
- // Clear Node.js file cache
259
303
  try {
260
- const deps = await getNodeDeps();
261
- if (deps) {
262
- const { fs, path, fileURLToPath } = deps;
263
- const __filename = fileURLToPath(import.meta.url);
264
- const __dirname = path.dirname(__filename);
265
- const CACHE_FILE = path.join(__dirname, '../../article_terms.json');
266
-
267
- if (fs.existsSync(CACHE_FILE)) {
268
- fs.unlinkSync(CACHE_FILE);
269
- console.log('File cache cleared');
270
- return true;
271
- }
272
- }
304
+ localStorage.removeItem(CACHE_KEY);
305
+ console.log('Browser ZIP cache cleared');
306
+ return true;
273
307
  } catch (error) {
274
- console.warn('Failed to clear file cache:', error.message);
308
+ console.warn('Failed to clear browser cache:', error.message);
275
309
  return false;
276
310
  }
277
311
  }
@@ -281,45 +315,33 @@ export async function clearCache() {
281
315
  }
282
316
 
283
317
  /**
284
- * Get cache information for debugging - useful in React.js development
285
- * @returns {Object} - cache status and info
318
+ * Get cache information for debugging
286
319
  */
287
320
  export function getCacheInfo() {
288
321
  const info = {
289
322
  environment: isNode ? 'Node.js' : (isBrowser ? 'Browser' : 'Unknown'),
290
- hasMemoryCache: !!memoryCache,
291
- hasPersistentCache: false,
292
- cacheType: null,
293
- version: null,
294
- timestamp: null,
295
- termCount: 0
323
+ hasProcessedTerms: !!processedTermsCache,
324
+ hasZipCache: false,
325
+ termCount: 0,
326
+ cacheVersion: CACHE_VERSION
296
327
  };
297
328
 
298
- // Memory cache info
299
- if (memoryCache) {
300
- info.termCount = Object.keys(memoryCache).length;
329
+ // Check processed terms
330
+ if (processedTermsCache) {
331
+ info.termCount = Object.keys(processedTermsCache).length;
301
332
  }
302
333
 
334
+ // Check ZIP cache in browser
303
335
  if (isBrowser) {
304
- // Browser cache info
305
- const storage = getBrowserStorage();
306
- if (storage) {
307
- try {
308
- const cached = storage.getItem(CACHE_KEY);
309
- if (cached) {
310
- const data = JSON.parse(cached);
311
- info.hasPersistentCache = true;
312
- info.cacheType = storage === localStorage ? 'localStorage' : 'sessionStorage';
313
- info.version = data.version;
314
- info.timestamp = data.timestamp ? new Date(data.timestamp) : null;
315
-
316
- if (!info.termCount && data.terms) {
317
- info.termCount = Object.keys(data.terms).length;
318
- }
319
- }
320
- } catch (error) {
321
- // Ignore parse errors
336
+ try {
337
+ const cached = localStorage.getItem(CACHE_KEY);
338
+ if (cached) {
339
+ const data = JSON.parse(cached);
340
+ info.hasZipCache = true;
341
+ info.timestamp = data.timestamp ? new Date(data.timestamp) : null;
322
342
  }
343
+ } catch (error) {
344
+ // Ignore parse errors
323
345
  }
324
346
  }
325
347