twl-generator 1.1.0 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "twl-generator",
3
- "version": "1.1.0",
3
+ "version": "1.1.3",
4
4
  "description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -46,7 +46,7 @@
46
46
  "node": ">=16.0.0"
47
47
  },
48
48
  "dependencies": {
49
- "adm-zip": "^0.5.10",
49
+ "jszip": "^3.10.1",
50
50
  "node-fetch": "^3.3.2"
51
51
  },
52
52
  "peerDependencies": {
package/src/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  // Main module for twl-generator
2
2
  import { generateTWTerms } from './utils/zipProcessor.js';
3
- import { processUsfmForBook, parseUsfmToVerses } from './utils/usfm-alignment-remover.js';
3
+ import { processUsfmForBook, parseUsfmToVerses, removeAllTagsExceptChapterVerse } from './utils/usfm-alignment-remover.js';
4
4
  import { generateTWLMatches } from './utils/twl-matcher.js';
5
5
 
6
6
  export { generateTWTerms, processUsfmForBook };
@@ -17,8 +17,9 @@ export async function generateTWLWithUsfm(book, usfmContent = null) {
17
17
 
18
18
  let verses;
19
19
  if (usfmContent) {
20
- // Parse provided USFM content
21
- verses = parseUsfmToVerses(usfmContent);
20
+ // Parse provided USFM content (clean it first)
21
+ const cleanUsfm = removeAllTagsExceptChapterVerse(usfmContent);
22
+ verses = parseUsfmToVerses(cleanUsfm);
22
23
  } else {
23
24
  // Fetch USFM from git.door43.org
24
25
  if (!book) throw new Error('Book parameter required when no USFM content provided');
@@ -22,30 +22,28 @@ const CACHE_VERSION = '1.0';
22
22
  let processedTermsCache = null;
23
23
 
24
24
  /**
25
- * Get Node.js dependencies dynamically
25
+ * Get dependencies dynamically (JSZip works in both environments)
26
26
  */
27
- async function getNodeDeps() {
28
- if (!isNode) return null;
29
-
27
+ async function getDeps() {
30
28
  try {
31
- const [nodeModule, admZipModule] = await Promise.all([
32
- import('node-fetch'),
33
- import('adm-zip')
34
- ]);
35
-
36
- return {
37
- fetch: nodeModule.default,
38
- AdmZip: admZipModule.default
29
+ const jsZipModule = await import('jszip');
30
+ const deps = {
31
+ JSZip: jsZipModule.default
39
32
  };
33
+
34
+ // Add Node.js-specific fetch if needed
35
+ if (isNode) {
36
+ const nodeModule = await import('node-fetch');
37
+ deps.fetch = nodeModule.default;
38
+ }
39
+
40
+ return deps;
40
41
  } catch (error) {
41
- console.error('Failed to load Node.js dependencies:', error);
42
+ console.error('Failed to load dependencies:', error);
42
43
  return null;
43
44
  }
44
45
  }
45
46
 
46
- /**
47
- * Get cached ZIP data from appropriate storage
48
- */
49
47
  async function getCachedZip() {
50
48
  if (isBrowser) {
51
49
  // Browser: Use localStorage for ZIP cache
@@ -208,28 +206,30 @@ async function cacheTerms(termMap) {
208
206
  * Process ZIP buffer and extract term mappings
209
207
  */
210
208
  async function processZipBuffer(zipBuffer) {
211
- let zip;
212
-
213
- if (isNode) {
214
- // Node.js: Use adm-zip
215
- const deps = await getNodeDeps();
216
- if (!deps) throw new Error('Failed to load Node.js dependencies');
217
- const { AdmZip } = deps;
218
- const Buffer = (await import('buffer')).Buffer;
219
- zip = new AdmZip(Buffer.from(zipBuffer));
220
- } else {
221
- // Browser: Use browser-compatible zip processing
222
- // For now, we'll use a simple approach that works with the TW archive structure
223
- throw new Error('Browser ZIP processing not yet implemented. Use cached data or run initial processing in Node.js.');
224
- }
209
+ // Use JSZip universally for both Node.js and Browser
210
+ const deps = await getDeps();
211
+ if (!deps) throw new Error('Failed to load dependencies');
212
+ const { JSZip } = deps;
213
+
214
+ const zip = new JSZip();
215
+ const zipData = await zip.loadAsync(zipBuffer);
216
+
217
+ const entries = [];
218
+ zipData.forEach((relativePath, file) => {
219
+ if (relativePath.match(/^en_tw\/bible\/.*\/.*\.md$/) && !file.dir) {
220
+ entries.push({
221
+ entryName: relativePath,
222
+ getData: () => file.async('string') // Return promise for string content
223
+ });
224
+ }
225
+ });
225
226
 
226
- const entries = zip.getEntries().filter(e => e.entryName.match(/^en_tw\/bible\/.*\/.*\.md$/));
227
227
  entries.sort((a, b) => a.entryName.localeCompare(b.entryName));
228
228
 
229
229
  const termMap = {};
230
230
 
231
231
  for (const entry of entries) {
232
- const content = entry.getData().toString('utf8');
232
+ const content = await entry.getData(); // Await the async string content
233
233
  const firstLine = content.split('\n')[0];
234
234
  const terms = firstLine.replace(/^#/, '').trim().split(',').map(t => t.trim()).filter(Boolean);
235
235
  const truncated = entry.entryName.replace('en_tw/bible/', '');
@@ -268,7 +268,14 @@ export async function generateTWTerms() {
268
268
  // Download fresh ZIP
269
269
  console.log('Downloading TW archive...');
270
270
 
271
- const fetchFn = isBrowser ? window.fetch : (await getNodeDeps())?.fetch;
271
+ let fetchFn;
272
+ if (isBrowser) {
273
+ fetchFn = window.fetch;
274
+ } else {
275
+ const deps = await getDeps();
276
+ fetchFn = deps?.fetch;
277
+ }
278
+
272
279
  if (!fetchFn) throw new Error('Fetch not available');
273
280
 
274
281
  const res = await fetchFn(ZIP_URL);