twl-generator 1.4.3 → 1.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "twl-generator",
3
- "version": "1.4.3",
3
+ "version": "1.4.5",
4
4
  "description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -50,7 +50,7 @@
50
50
  "csv-parse": "^5.5.6",
51
51
  "csv-stringify": "^6.5.0",
52
52
  "jszip": "^3.10.1",
53
- "tsv-quote-converters": "^1.1.13"
53
+ "tsv-quote-converters": "^1.1.14"
54
54
  },
55
55
  "peerDependencies": {
56
56
  "react": ">=16.8.0"
@@ -60,4 +60,4 @@
60
60
  "optional": true
61
61
  }
62
62
  }
63
- }
63
+ }
package/src/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { BibleBookData } from './common/books.js';
2
+ import { addGLQuoteCols, convertGLQuotes2OLQuotes } from 'tsv-quote-converters';
2
3
 
3
4
  const isBrowser = typeof window !== 'undefined';
4
5
 
@@ -829,6 +830,7 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, twMap, opts
829
830
  export async function generateTwlByBook(bookCode, options = {}) {
830
831
  // Extract dcsHost option with default
831
832
  const dcsHost = options.dcsHost || 'https://git.door43.org';
833
+ const quiet = !!options.quiet;
832
834
 
833
835
  // Load terms from en_tw zip file instead of local tw_strongs_list.json
834
836
  const termToArticles = await loadTermsFromEnTw(dcsHost);
@@ -844,8 +846,7 @@ export async function generateTwlByBook(bookCode, options = {}) {
844
846
  if (!meta) throw new Error(`Unknown book code: ${bookCode}`);
845
847
  const versesByChapter = await processUsfmForBook(meta.key, dcsHost);
846
848
 
847
- // Header without Strongs; keep GLQuote/GLOccurrence and add Variant of, Disambiguation
848
- const header = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'GLQuote', 'GLOccurrence', 'Variant of', 'Disambiguation'];
849
+ const header = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'Variant of', 'Disambiguation'];
849
850
  const outRows = [header.join('\t')];
850
851
 
851
852
  // ID generator
@@ -874,8 +875,22 @@ export async function generateTwlByBook(bookCode, options = {}) {
874
875
  const isVowel = (ch) => /[aeiou]/i.test(ch);
875
876
  const isConsonant = (ch) => /[a-z]/i.test(ch) && !isVowel(ch);
876
877
  const endsWithCVC = (w) => w.length >= 3 && isConsonant(w[w.length - 3]) && isVowel(w[w.length - 2]) && isConsonant(w[w.length - 1]) && !/[wxy]/i.test(w[w.length - 1]);
877
- const edForm = (w) => (/e$/i.test(w) ? w + 'd' : (/[^aeiou]y$/i.test(w) ? w.replace(/y$/i, 'ied') : (endsWithCVC(w) ? w + w[w.length - 1] + 'ed' : w + 'ed')));
878
- const ingForm = (w) => (/ie$/i.test(w) ? w.replace(/ie$/i, 'ying') : (/ee$/i.test(w) ? w + 'ing' : (/e$/i.test(w) ? w.replace(/e$/i, 'ing') : (endsWithCVC(w) ? w + w[w.length - 1] + 'ing' : w + 'ing'))));
878
+ const edForm = (w) => {
879
+ if (/e$/i.test(w)) return w + 'd';
880
+ if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ied');
881
+ // Do not double the final consonant for words ending in "er" (e.g., gather -> gathered)
882
+ const lastCh = w[w.length - 1];
883
+ if (endsWithCVC(w) && !/(?:er|en)$/i.test(w)) return w + lastCh + 'ed';
884
+ return w + 'ed';
885
+ };
886
+ const ingForm = (w) => {
887
+ if (/ie$/i.test(w)) return w.replace(/ie$/i, 'ying');
888
+ if (/ee$/i.test(w)) return w + 'ing';
889
+ if (/e$/i.test(w)) return w.replace(/e$/i, 'ing');
890
+ const lastCh = w[w.length - 1];
891
+ if (endsWithCVC(w) && !/(?:er|en)$/i.test(w)) return w + lastCh + 'ing';
892
+ return w + 'ing';
893
+ };
879
894
 
880
895
  const allowNoVariant = (base, match) => {
881
896
  const b = String(base || '');
@@ -929,8 +944,6 @@ export async function generateTwlByBook(bookCode, options = {}) {
929
944
  glq,
930
945
  String(occ),
931
946
  twLink,
932
- glq,
933
- String(occ),
934
947
  variantOf,
935
948
  disamb,
936
949
  ].join('\t'));
@@ -941,13 +954,12 @@ export async function generateTwlByBook(bookCode, options = {}) {
941
954
  // Build TSV and convert GL OrigWords back to OL using tsv-quote-converters
942
955
  let matchedTsv = outRows.join('\n');
943
956
  try {
944
- const { convertGLQuotes2OLQuotes } = await import('tsv-quote-converters');
945
957
  const conv = await convertGLQuotes2OLQuotes({
946
- bibleLinks: ['unfoldingWord/en_ult/master'],
958
+ bibleLink: 'unfoldingWord/en_ult/master',
947
959
  bookCode: String(meta.key || bookCode).toLowerCase(),
948
960
  tsvContent: matchedTsv,
949
961
  trySeparatorsAndOccurrences: true,
950
- quiet: true,
962
+ quiet,
951
963
  });
952
964
  if (conv && typeof conv.output === 'string' && conv.output.length) {
953
965
  matchedTsv = conv.output;
@@ -955,6 +967,68 @@ export async function generateTwlByBook(bookCode, options = {}) {
955
967
  } catch (e) {
956
968
  // If conversion fails (e.g., no network), fall back to unconverted TSV
957
969
  }
970
+
971
+ // Now add the actual GLQuote/GLOccurrence by calling addGLQuoteCols
972
+ try {
973
+ const result = await addGLQuoteCols({
974
+ bibleLinks: ['unfoldingWord/en_ult/master'],
975
+ bookCode: String(meta.key || bookCode).toLowerCase(),
976
+ tsvContent: matchedTsv,
977
+ trySeparatorsAndOccurrences: true,
978
+ usePreviousGLQuotes: true,
979
+ quiet,
980
+ });
981
+ if (result && typeof result.output === 'string' && result.output.length) {
982
+ matchedTsv = result.output;
983
+ // Reorder columns: move cols[5] and cols[6] to after cols[7] for every line
984
+ try {
985
+ const lines = String(matchedTsv || '').split('\n');
986
+ for (let i = 0; i < lines.length; i++) {
987
+ const cols = lines[i].split('\t');
988
+ // require at least 8 columns so cols[7] exists
989
+ if (cols.length >= 8) {
990
+ const removed = cols.splice(5, 2); // remove cols[5] and cols[6]
991
+ // after removal, original cols[7] is at index 5, so insert after it at index 6
992
+ const insertIndex = Math.min(6, cols.length);
993
+ cols.splice(insertIndex, 0, ...removed);
994
+ lines[i] = cols.join('\t');
995
+ }
996
+ }
997
+ matchedTsv = lines.join('\n');
998
+ } catch (err) {
999
+ // leave matchedTsv unchanged on error
1000
+ }
1001
+ }
1002
+ } catch (e) {
1003
+ try {
1004
+ const lines = String(matchedTsv || '').split('\n');
1005
+ if (lines.length > 0) {
1006
+ lines[0] = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'GLQuote', 'GLOccurrence', 'Variant of', 'Disambiguation'].join('\t');
1007
+ const out = [lines[0]];
1008
+ for (let i = 1; i < lines.length; i++) {
1009
+ const cols = lines[i].split('\t');
1010
+ const g = (idx) => (cols[idx] !== undefined ? cols[idx] : '');
1011
+ const newRow = [
1012
+ g(0), // Reference
1013
+ g(1), // ID
1014
+ g(2), // Tags
1015
+ g(3), // OrigWords
1016
+ g(4), // Occurrence
1017
+ g(5), // TWLink
1018
+ g(3), // GLQuote (copy of OrigWords)
1019
+ g(4), // GLOccurrence (copy of Occurrence)
1020
+ g(6), // Variant of
1021
+ g(7), // Disambiguation
1022
+ ].join('\t');
1023
+ out.push(newRow);
1024
+ }
1025
+ matchedTsv = out.join('\n');
1026
+ }
1027
+ } catch (err) {
1028
+ // leave matchedTsv unchanged on any transformation error
1029
+ }
1030
+ }
1031
+
958
1032
  const noMatchHeader = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'GLQuote', 'GLOccurrence', 'Disambiguation'];
959
1033
  const noMatchTsv = [noMatchHeader.join('\t')].join('\n');
960
1034
  return { matchedTsv, noMatchTsv };
@@ -162,23 +162,9 @@ class PrefixTrie {
162
162
  // Always extract from the original text to preserve case
163
163
  let originalMatchedText = originalText.substring(startPos, currentPos);
164
164
 
165
- // Extend match backwards to include dash-connected words and possessive forms
165
+ // Extend match backwards to include possessive forms (but not dash-connected words)
166
166
  let extendedStartPos = startPos;
167
167
 
168
- // Check backwards for dash preceded by word characters (no space between)
169
- if (extendedStartPos > 0 && originalText[extendedStartPos - 1] === '-') {
170
- let dashPos = extendedStartPos - 1;
171
- dashPos--; // Move before the dash
172
- // Check if there are word characters immediately before the dash
173
- if (dashPos >= 0 && /[\w]/.test(originalText[dashPos])) {
174
- // Find the start of the word before the dash
175
- while (dashPos >= 0 && /[\w]/.test(originalText[dashPos])) {
176
- dashPos--;
177
- }
178
- extendedStartPos = dashPos + 1;
179
- }
180
- }
181
-
182
168
  // Check backwards for apostrophe (straight or curly) preceded by text
183
169
  if (extendedStartPos > 0 && /['']/.test(originalText[extendedStartPos - 1])) {
184
170
  let apostrophePos = extendedStartPos - 1;
@@ -193,23 +179,9 @@ class PrefixTrie {
193
179
  }
194
180
  }
195
181
 
196
- // Extend match forwards to include dash-connected words and possessive forms
182
+ // Extend match forwards to include possessive forms (but not dash-connected words)
197
183
  let extendedEndPos = currentPos;
198
184
 
199
- // Check for dash followed by word characters (no space between)
200
- if (extendedEndPos < originalText.length && originalText[extendedEndPos] === '-') {
201
- let dashPos = extendedEndPos;
202
- dashPos++; // Move past the dash
203
- // Check if there are word characters immediately after the dash
204
- if (dashPos < originalText.length && /[\w]/.test(originalText[dashPos])) {
205
- // Find the end of the word after the dash
206
- while (dashPos < originalText.length && /[\w]/.test(originalText[dashPos])) {
207
- dashPos++;
208
- }
209
- extendedEndPos = dashPos;
210
- }
211
- }
212
-
213
185
  // Check for apostrophe (straight or curly) followed by text
214
186
  if (extendedEndPos < originalText.length && /['']/.test(originalText[extendedEndPos])) {
215
187
  let apostrophePos = extendedEndPos;