twl-generator 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "twl-generator",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.4",
|
|
4
4
|
"description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -1,8 +1,28 @@
|
|
|
1
1
|
/* eslint-disable no-async-promise-executor, no-throw-literal */
|
|
2
2
|
|
|
3
|
-
import fetch from 'node-fetch';
|
|
4
3
|
import { BibleBookData } from '../common/books.js';
|
|
5
4
|
|
|
5
|
+
// Environment detection
|
|
6
|
+
const isNode = typeof window === 'undefined' && typeof process !== 'undefined' && process.versions?.node;
|
|
7
|
+
|
|
8
|
+
// Get appropriate fetch implementation
|
|
9
|
+
async function getFetch() {
|
|
10
|
+
if (isNode) {
|
|
11
|
+
const nodeFetch = await import('node-fetch');
|
|
12
|
+
return nodeFetch.default;
|
|
13
|
+
}
|
|
14
|
+
return globalThis.fetch;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Get appropriate base64 decoder
|
|
18
|
+
function decodeBase64(base64String) {
|
|
19
|
+
if (isNode) {
|
|
20
|
+
return Buffer.from(base64String, 'base64').toString('utf-8');
|
|
21
|
+
}
|
|
22
|
+
// Browser implementation
|
|
23
|
+
return atob(base64String);
|
|
24
|
+
}
|
|
25
|
+
|
|
6
26
|
// Note: This version doesn't use usfm-js to avoid external dependencies
|
|
7
27
|
// It implements a simple USFM alignment remover for the specific case
|
|
8
28
|
|
|
@@ -53,11 +73,12 @@ export const removeAllTagsExceptChapterVerse = (usfmContent) => {
|
|
|
53
73
|
export async function processUsfmForBook(book) {
|
|
54
74
|
if (!BibleBookData[book]) throw new Error(`Unknown book: ${book}`);
|
|
55
75
|
|
|
76
|
+
const fetch = await getFetch();
|
|
56
77
|
const usfmUrl = `https://git.door43.org/api/v1/repos/unfoldingWord/en_ult/contents/${BibleBookData[book].usfm}.usfm?ref=master`;
|
|
57
78
|
const usfmRes = await fetch(usfmUrl);
|
|
58
79
|
if (!usfmRes.ok) throw new Error(`Failed to download USFM file for ${book}`);
|
|
59
80
|
const usfmData = await usfmRes.json();
|
|
60
|
-
const usfmContent =
|
|
81
|
+
const usfmContent = decodeBase64(usfmData.content);
|
|
61
82
|
|
|
62
83
|
// Remove alignments from USFM
|
|
63
84
|
const cleanUsfm = removeAllTagsExceptChapterVerse(usfmContent);
|
|
@@ -9,7 +9,6 @@
|
|
|
9
9
|
* const terms = await generateTWTerms();
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
-
import AdmZip from 'adm-zip';
|
|
13
12
|
import { BibleBookData } from '../common/books.js';
|
|
14
13
|
|
|
15
14
|
// Environment detection
|
|
@@ -30,18 +29,20 @@ async function getNodeDeps() {
|
|
|
30
29
|
if (!isNode) return null;
|
|
31
30
|
|
|
32
31
|
try {
|
|
33
|
-
const [nodeModule, fsModule, pathModule, urlModule] = await Promise.all([
|
|
32
|
+
const [nodeModule, fsModule, pathModule, urlModule, admZipModule] = await Promise.all([
|
|
34
33
|
import('node-fetch'),
|
|
35
34
|
import('fs'),
|
|
36
35
|
import('path'),
|
|
37
|
-
import('url')
|
|
36
|
+
import('url'),
|
|
37
|
+
import('adm-zip')
|
|
38
38
|
]);
|
|
39
39
|
|
|
40
40
|
return {
|
|
41
41
|
fetch: nodeModule.default,
|
|
42
42
|
fs: fsModule.default,
|
|
43
43
|
path: pathModule.default,
|
|
44
|
-
fileURLToPath: urlModule.fileURLToPath
|
|
44
|
+
fileURLToPath: urlModule.fileURLToPath,
|
|
45
|
+
AdmZip: admZipModule.default
|
|
45
46
|
};
|
|
46
47
|
} catch (error) {
|
|
47
48
|
console.error('Failed to load Node.js dependencies:', error);
|
|
@@ -164,6 +165,23 @@ async function cacheTerms(termMap) {
|
|
|
164
165
|
}
|
|
165
166
|
}
|
|
166
167
|
|
|
168
|
+
/**
|
|
169
|
+
* Process zip in Node.js environment using AdmZip
|
|
170
|
+
*/
|
|
171
|
+
function processNodeZip(AdmZip, Buffer, buffer) {
|
|
172
|
+
const zip = new AdmZip(Buffer.from(buffer));
|
|
173
|
+
return zip.getEntries().filter(e => e.entryName.match(/^en_tw\/bible\/.*\/.*\.md$/));
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Process zip in browser environment using native APIs
|
|
178
|
+
*/
|
|
179
|
+
async function processBrowserZip(buffer) {
|
|
180
|
+
// For browser compatibility, we'll throw an error for now
|
|
181
|
+
// This would require a browser-compatible zip library or different approach
|
|
182
|
+
throw new Error('ZIP processing in browser requires cached terms. Please ensure article_terms.json is available or use Node.js environment for initial processing.');
|
|
183
|
+
}
|
|
184
|
+
|
|
167
185
|
export async function generateTWTerms() {
|
|
168
186
|
// Try to get cached terms first
|
|
169
187
|
const cachedTerms = await getCachedTerms();
|
|
@@ -174,25 +192,29 @@ export async function generateTWTerms() {
|
|
|
174
192
|
console.log('Downloading TW archive...');
|
|
175
193
|
|
|
176
194
|
try {
|
|
177
|
-
// Get appropriate fetch function
|
|
178
|
-
let fetchFn;
|
|
195
|
+
// Get appropriate fetch function and zip processing
|
|
196
|
+
let fetchFn, processZip;
|
|
197
|
+
|
|
179
198
|
if (isBrowser) {
|
|
180
199
|
fetchFn = window.fetch;
|
|
200
|
+
// Use browser-compatible zip processing
|
|
201
|
+
processZip = (buffer) => processBrowserZip(buffer);
|
|
181
202
|
} else if (isNode) {
|
|
182
203
|
const deps = await getNodeDeps();
|
|
183
|
-
|
|
184
|
-
|
|
204
|
+
if (!deps) throw new Error('Failed to load Node.js dependencies');
|
|
205
|
+
fetchFn = deps.fetch;
|
|
206
|
+
const AdmZip = deps.AdmZip;
|
|
207
|
+
const Buffer = (await import('buffer')).Buffer;
|
|
208
|
+
processZip = (buffer) => processNodeZip(AdmZip, Buffer, buffer);
|
|
185
209
|
}
|
|
186
210
|
|
|
187
211
|
const res = await fetchFn(ZIP_URL);
|
|
188
212
|
if (!res.ok) throw new Error(`Failed to download zip: ${res.status} ${res.statusText}`);
|
|
189
213
|
|
|
190
214
|
const buffer = await res.arrayBuffer();
|
|
191
|
-
const
|
|
215
|
+
const entries = await processZip(buffer);
|
|
192
216
|
|
|
193
217
|
console.log('Processing TW articles...');
|
|
194
|
-
|
|
195
|
-
const entries = zip.getEntries().filter(e => e.entryName.match(/^en_tw\/bible\/.*\/.*\.md$/));
|
|
196
218
|
entries.sort((a, b) => a.entryName.localeCompare(b.entryName));
|
|
197
219
|
|
|
198
220
|
const termMap = {};
|