twl-generator 1.4.4 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +3 -3
  2. package/src/index.js +83 -9
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "twl-generator",
3
- "version": "1.4.4",
3
+ "version": "1.4.6",
4
4
  "description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -50,7 +50,7 @@
50
50
  "csv-parse": "^5.5.6",
51
51
  "csv-stringify": "^6.5.0",
52
52
  "jszip": "^3.10.1",
53
- "tsv-quote-converters": "^1.1.13"
53
+ "tsv-quote-converters": "^1.1.14"
54
54
  },
55
55
  "peerDependencies": {
56
56
  "react": ">=16.8.0"
@@ -60,4 +60,4 @@
60
60
  "optional": true
61
61
  }
62
62
  }
63
- }
63
+ }
package/src/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { BibleBookData } from './common/books.js';
2
+ import { addGLQuoteCols, convertGLQuotes2OLQuotes } from 'tsv-quote-converters';
2
3
 
3
4
  const isBrowser = typeof window !== 'undefined';
4
5
 
@@ -829,6 +830,7 @@ function chooseArticleByGlQuote(glq, strongId, strongPivot, termMap, twMap, opts
829
830
  export async function generateTwlByBook(bookCode, options = {}) {
830
831
  // Extract dcsHost option with default
831
832
  const dcsHost = options.dcsHost || 'https://git.door43.org';
833
+ const quiet = !!options.quiet;
832
834
 
833
835
  // Load terms from en_tw zip file instead of local tw_strongs_list.json
834
836
  const termToArticles = await loadTermsFromEnTw(dcsHost);
@@ -844,8 +846,7 @@ export async function generateTwlByBook(bookCode, options = {}) {
844
846
  if (!meta) throw new Error(`Unknown book code: ${bookCode}`);
845
847
  const versesByChapter = await processUsfmForBook(meta.key, dcsHost);
846
848
 
847
- // Header without Strongs; keep GLQuote/GLOccurrence and add Variant of, Disambiguation
848
- const header = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'GLQuote', 'GLOccurrence', 'Variant of', 'Disambiguation'];
849
+ const header = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'Variant of', 'Disambiguation'];
849
850
  const outRows = [header.join('\t')];
850
851
 
851
852
  // ID generator
@@ -874,8 +875,22 @@ export async function generateTwlByBook(bookCode, options = {}) {
874
875
  const isVowel = (ch) => /[aeiou]/i.test(ch);
875
876
  const isConsonant = (ch) => /[a-z]/i.test(ch) && !isVowel(ch);
876
877
  const endsWithCVC = (w) => w.length >= 3 && isConsonant(w[w.length - 3]) && isVowel(w[w.length - 2]) && isConsonant(w[w.length - 1]) && !/[wxy]/i.test(w[w.length - 1]);
877
- const edForm = (w) => (/e$/i.test(w) ? w + 'd' : (/[^aeiou]y$/i.test(w) ? w.replace(/y$/i, 'ied') : (endsWithCVC(w) ? w + w[w.length - 1] + 'ed' : w + 'ed')));
878
- const ingForm = (w) => (/ie$/i.test(w) ? w.replace(/ie$/i, 'ying') : (/ee$/i.test(w) ? w + 'ing' : (/e$/i.test(w) ? w.replace(/e$/i, 'ing') : (endsWithCVC(w) ? w + w[w.length - 1] + 'ing' : w + 'ing'))));
878
+ const edForm = (w) => {
879
+ if (/e$/i.test(w)) return w + 'd';
880
+ if (/[^aeiou]y$/i.test(w)) return w.replace(/y$/i, 'ied');
881
+ // Do not double the final consonant for words ending in "er" (e.g., gather -> gathered)
882
+ const lastCh = w[w.length - 1];
883
+ if (endsWithCVC(w) && !/(?:er|en|or|on|al|el)$/i.test(w)) return w + lastCh + 'ed';
884
+ return w + 'ed';
885
+ };
886
+ const ingForm = (w) => {
887
+ if (/ie$/i.test(w)) return w.replace(/ie$/i, 'ying');
888
+ if (/ee$/i.test(w)) return w + 'ing';
889
+ if (/e$/i.test(w)) return w.replace(/e$/i, 'ing');
890
+ const lastCh = w[w.length - 1];
891
+ if (endsWithCVC(w) && !/(?:er|en|or|on|al|el)$/i.test(w)) return w + lastCh + 'ing';
892
+ return w + 'ing';
893
+ };
879
894
 
880
895
  const allowNoVariant = (base, match) => {
881
896
  const b = String(base || '');
@@ -929,8 +944,6 @@ export async function generateTwlByBook(bookCode, options = {}) {
929
944
  glq,
930
945
  String(occ),
931
946
  twLink,
932
- glq,
933
- String(occ),
934
947
  variantOf,
935
948
  disamb,
936
949
  ].join('\t'));
@@ -941,13 +954,12 @@ export async function generateTwlByBook(bookCode, options = {}) {
941
954
  // Build TSV and convert GL OrigWords back to OL using tsv-quote-converters
942
955
  let matchedTsv = outRows.join('\n');
943
956
  try {
944
- const { convertGLQuotes2OLQuotes } = await import('tsv-quote-converters');
945
957
  const conv = await convertGLQuotes2OLQuotes({
946
- bibleLinks: ['unfoldingWord/en_ult/master'],
958
+ bibleLink: 'unfoldingWord/en_ult/master',
947
959
  bookCode: String(meta.key || bookCode).toLowerCase(),
948
960
  tsvContent: matchedTsv,
949
961
  trySeparatorsAndOccurrences: true,
950
- quiet: true,
962
+ quiet,
951
963
  });
952
964
  if (conv && typeof conv.output === 'string' && conv.output.length) {
953
965
  matchedTsv = conv.output;
@@ -955,6 +967,68 @@ export async function generateTwlByBook(bookCode, options = {}) {
955
967
  } catch (e) {
956
968
  // If conversion fails (e.g., no network), fall back to unconverted TSV
957
969
  }
970
+
971
+ // Now add the actual GLQuote/GLOccurrence by calling addGLQuoteCols
972
+ try {
973
+ const result = await addGLQuoteCols({
974
+ bibleLinks: ['unfoldingWord/en_ult/master'],
975
+ bookCode: String(meta.key || bookCode).toLowerCase(),
976
+ tsvContent: matchedTsv,
977
+ trySeparatorsAndOccurrences: true,
978
+ usePreviousGLQuotes: true,
979
+ quiet,
980
+ });
981
+ if (result && typeof result.output === 'string' && result.output.length) {
982
+ matchedTsv = result.output;
983
+ // Reorder columns: move cols[5] and cols[6] to after cols[7] for every line
984
+ try {
985
+ const lines = String(matchedTsv || '').split('\n');
986
+ for (let i = 0; i < lines.length; i++) {
987
+ const cols = lines[i].split('\t');
988
+ // require at least 8 columns so cols[7] exists
989
+ if (cols.length >= 8) {
990
+ const removed = cols.splice(5, 2); // remove cols[5] and cols[6]
991
+ // after removal, original cols[7] is at index 5, so insert after it at index 6
992
+ const insertIndex = Math.min(6, cols.length);
993
+ cols.splice(insertIndex, 0, ...removed);
994
+ lines[i] = cols.join('\t');
995
+ }
996
+ }
997
+ matchedTsv = lines.join('\n');
998
+ } catch (err) {
999
+ // leave matchedTsv unchanged on error
1000
+ }
1001
+ }
1002
+ } catch (e) {
1003
+ try {
1004
+ const lines = String(matchedTsv || '').split('\n');
1005
+ if (lines.length > 0) {
1006
+ lines[0] = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'GLQuote', 'GLOccurrence', 'Variant of', 'Disambiguation'].join('\t');
1007
+ const out = [lines[0]];
1008
+ for (let i = 1; i < lines.length; i++) {
1009
+ const cols = lines[i].split('\t');
1010
+ const g = (idx) => (cols[idx] !== undefined ? cols[idx] : '');
1011
+ const newRow = [
1012
+ g(0), // Reference
1013
+ g(1), // ID
1014
+ g(2), // Tags
1015
+ g(3), // OrigWords
1016
+ g(4), // Occurrence
1017
+ g(5), // TWLink
1018
+ g(3), // GLQuote (copy of OrigWords)
1019
+ g(4), // GLOccurrence (copy of Occurrence)
1020
+ g(6), // Variant of
1021
+ g(7), // Disambiguation
1022
+ ].join('\t');
1023
+ out.push(newRow);
1024
+ }
1025
+ matchedTsv = out.join('\n');
1026
+ }
1027
+ } catch (err) {
1028
+ // leave matchedTsv unchanged on any transformation error
1029
+ }
1030
+ }
1031
+
958
1032
  const noMatchHeader = ['Reference', 'ID', 'Tags', 'OrigWords', 'Occurrence', 'TWLink', 'GLQuote', 'GLOccurrence', 'Disambiguation'];
959
1033
  const noMatchTsv = [noMatchHeader.join('\t')].join('\n');
960
1034
  return { matchedTsv, noMatchTsv };