twl-generator 1.4.13 → 1.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "twl-generator",
3
- "version": "1.4.13",
3
+ "version": "1.4.15",
4
4
  "description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -51,7 +51,7 @@
51
51
  "csv-stringify": "^6.5.0",
52
52
  "en-inflectors": "^1.0.12",
53
53
  "jszip": "^3.10.1",
54
- "tsv-quote-converters": "^1.1.18",
54
+ "tsv-quote-converters": "^1.1.21",
55
55
  "usfm-alignment-remover": "^0.1.6"
56
56
  },
57
57
  "peerDependencies": {
package/src/index.js CHANGED
@@ -903,8 +903,10 @@ export async function generateTwlByBook(bookCode, options = {}) {
903
903
  const chapterNums = Object.keys(versesByChapter).map(n => parseInt(n, 10)).sort((a, b) => a - b);
904
904
  for (const c of chapterNums) {
905
905
  const verses = versesByChapter[c] || {};
906
- const verseNums = Object.keys(verses).map(n => parseInt(n, 10)).sort((a, b) => a - b);
907
- for (const v of verseNums) {
906
+ const verseNums = Object.keys(verses).filter(k => k !== 'front').map(n => parseInt(n, 10)).sort((a, b) => a - b);
907
+ // Chapter front matter (\d) is emitted as `${c}:front`, ordered before verse 1.
908
+ const orderedKeys = verses.front ? ['front', ...verseNums] : verseNums;
909
+ for (const v of orderedKeys) {
908
910
  const text = verses[v] || '';
909
911
  const matches = scanVerseMatches(text, trie);
910
912
  // Count occurrences per exact matchedText (case-sensitive)
@@ -194,11 +194,11 @@ class PrefixTrie {
194
194
 
195
195
  // Check if this is a valid word boundary match (both start and end)
196
196
  const isStartBoundary = extendedStartPos === 0 ||
197
- /[\s\p{P}]/.test(originalText[extendedStartPos - 1]) ||
197
+ /[\s\p{P}]/u.test(originalText[extendedStartPos - 1]) ||
198
198
  !/[\w]/.test(originalText[extendedStartPos - 1]);
199
199
 
200
200
  const isEndBoundary = extendedEndPos >= originalText.length ||
201
- /[\s\p{P}]/.test(originalText[extendedEndPos]) ||
201
+ /[\s\p{P}]/u.test(originalText[extendedEndPos]) ||
202
202
  !/[\w]/.test(originalText[extendedEndPos]);
203
203
 
204
204
  const isWordBoundary = isStartBoundary && isEndBoundary;
@@ -45,7 +45,9 @@ export const removeAllTagsExceptChapterVerse = (usfmContent) => {
45
45
  cleanContent = cleanContent.replace(/ +\\v +/g, '\n\\v ');
46
46
  cleanContent = cleanContent.replace(/ +\\c +/g, '\n\\c ');
47
47
  cleanContent = cleanContent.replace(/ *(\\q\d*|\\p|\\ts\\\*) */g, ' ');
48
- cleanContent = cleanContent.replace(/\\[ds].*?(\\|\n)/g, '$1');
48
+ // Strip section headings (\s, \s1, \sr, \sp, etc.) but preserve \d (chapter
49
+ // descriptions / psalm superscriptions) so front-matter TWLs can be generated.
50
+ cleanContent = cleanContent.replace(/\\s.*?(\\|\n)/g, '$1');
49
51
  cleanContent = cleanContent.replace(/ +/g, ' ');
50
52
  cleanContent = cleanContent.replace(/^ +$/g, '');
51
53
  cleanContent = cleanContent.replace(/\\f .*?\\f\*/g, ' ');
@@ -107,6 +109,16 @@ export function parseUsfmToVerses(usfm) {
107
109
  if (!versesObj[currentChapter]) {
108
110
  versesObj[currentChapter] = {};
109
111
  }
112
+ // Capture chapter front matter (\d description / psalm superscription) so it
113
+ // can produce `<chapter>:front` TWL rows. Other pre-verse markers (\s, \q, \p)
114
+ // have already been stripped, leaving the \d text in the chapter head.
115
+ const frontMatch = text.match(/\\d\s+([^\\]*)/);
116
+ if (frontMatch) {
117
+ const frontText = frontMatch[1].replace(/\s+/g, ' ').trim();
118
+ if (frontText) {
119
+ versesObj[currentChapter].front = frontText;
120
+ }
121
+ }
110
122
  } else if (tag === 'v') {
111
123
  if (!versesObj[currentChapter]) {
112
124
  versesObj[currentChapter] = {};