twl-generator 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "twl-generator",
3
- "version": "1.4.0",
3
+ "version": "1.4.1",
4
4
  "description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -160,16 +160,86 @@ class PrefixTrie {
160
160
  if (node._terms) {
161
161
  const matchLength = currentPos - startPos;
162
162
  // Always extract from the original text to preserve case
163
- const originalMatchedText = originalText.substring(startPos, currentPos);
163
+ let originalMatchedText = originalText.substring(startPos, currentPos);
164
+
165
+ // Extend match backwards to include dash-connected words and possessive forms
166
+ let extendedStartPos = startPos;
167
+
168
+ // Check backwards for dash preceded by word characters (no space between)
169
+ if (extendedStartPos > 0 && originalText[extendedStartPos - 1] === '-') {
170
+ let dashPos = extendedStartPos - 1;
171
+ dashPos--; // Move before the dash
172
+ // Check if there are word characters immediately before the dash
173
+ if (dashPos >= 0 && /[\w]/.test(originalText[dashPos])) {
174
+ // Find the start of the word before the dash
175
+ while (dashPos >= 0 && /[\w]/.test(originalText[dashPos])) {
176
+ dashPos--;
177
+ }
178
+ extendedStartPos = dashPos + 1;
179
+ }
180
+ }
181
+
182
+ // Check backwards for apostrophe (straight or curly) preceded by text
183
+ if (extendedStartPos > 0 && /['']/.test(originalText[extendedStartPos - 1])) {
184
+ let apostrophePos = extendedStartPos - 1;
185
+ apostrophePos--; // Move before the apostrophe
186
+ // Check if there are word characters immediately before the apostrophe
187
+ if (apostrophePos >= 0 && /[\w]/.test(originalText[apostrophePos])) {
188
+ // Find the start of the text before the apostrophe
189
+ while (apostrophePos >= 0 && /[\w]/.test(originalText[apostrophePos])) {
190
+ apostrophePos--;
191
+ }
192
+ extendedStartPos = apostrophePos + 1;
193
+ }
194
+ }
195
+
196
+ // Extend match forwards to include dash-connected words and possessive forms
197
+ let extendedEndPos = currentPos;
198
+
199
+ // Check for dash followed by word characters (no space between)
200
+ if (extendedEndPos < originalText.length && originalText[extendedEndPos] === '-') {
201
+ let dashPos = extendedEndPos;
202
+ dashPos++; // Move past the dash
203
+ // Check if there are word characters immediately after the dash
204
+ if (dashPos < originalText.length && /[\w]/.test(originalText[dashPos])) {
205
+ // Find the end of the word after the dash
206
+ while (dashPos < originalText.length && /[\w]/.test(originalText[dashPos])) {
207
+ dashPos++;
208
+ }
209
+ extendedEndPos = dashPos;
210
+ }
211
+ }
212
+
213
+ // Check for apostrophe (straight or curly) followed by text
214
+ if (extendedEndPos < originalText.length && /['']/.test(originalText[extendedEndPos])) {
215
+ let apostrophePos = extendedEndPos;
216
+ apostrophePos++; // Move past the apostrophe
217
+ // Check if there are word characters immediately after the apostrophe
218
+ if (apostrophePos < originalText.length && /[\w]/.test(originalText[apostrophePos])) {
219
+ // Find the end of the text after the apostrophe
220
+ while (apostrophePos < originalText.length && /[\w]/.test(originalText[apostrophePos])) {
221
+ apostrophePos++;
222
+ }
223
+ extendedEndPos = apostrophePos;
224
+ } else {
225
+ // Include the apostrophe even if no text follows (for possessives ending in s)
226
+ extendedEndPos = apostrophePos;
227
+ }
228
+ }
229
+
230
+ // Update the matched text if we extended it
231
+ if (extendedStartPos < startPos || extendedEndPos > currentPos) {
232
+ originalMatchedText = originalText.substring(extendedStartPos, extendedEndPos);
233
+ }
164
234
 
165
235
  // Check if this is a valid word boundary match (both start and end)
166
- const isStartBoundary = startPos === 0 ||
167
- /[\s\p{P}]/.test(originalText[startPos - 1]) ||
168
- !/[\w]/.test(originalText[startPos - 1]);
236
+ const isStartBoundary = extendedStartPos === 0 ||
237
+ /[\s\p{P}]/.test(originalText[extendedStartPos - 1]) ||
238
+ !/[\w]/.test(originalText[extendedStartPos - 1]);
169
239
 
170
- const isEndBoundary = currentPos >= originalText.length ||
171
- /[\s\p{P}]/.test(originalText[currentPos]) ||
172
- !/[\w]/.test(originalText[currentPos]);
240
+ const isEndBoundary = extendedEndPos >= originalText.length ||
241
+ /[\s\p{P}]/.test(originalText[extendedEndPos]) ||
242
+ !/[\w]/.test(originalText[extendedEndPos]);
173
243
 
174
244
  const isWordBoundary = isStartBoundary && isEndBoundary;
175
245
 
@@ -178,8 +248,9 @@ class PrefixTrie {
178
248
  matches.push({
179
249
  term: termData.term,
180
250
  articles: termData.articles,
181
- matchedText: originalMatchedText, // Use the original text, not the normalized version
182
- length: matchLength,
251
+ matchedText: originalMatchedText, // Use the extended matched text
252
+ length: originalMatchedText.length, // Use extended length
253
+ originalLength: matchLength, // Keep track of original match length for advancement
183
254
  priority: termData.priority,
184
255
  isExactCase: isExactCase
185
256
  });
@@ -283,9 +354,11 @@ function findMatches(verseText, termTrie) {
283
354
  priority: bestMatch.priority
284
355
  });
285
356
 
286
- // Move past the matched text
287
- processedText += matchedText;
288
- currentPos += bestMatch.length;
357
+ // Move past only the original matched text (not the extended part)
358
+ // This allows finding additional matches within the extended portion
359
+ const advanceBy = bestMatch.originalLength || bestMatch.length;
360
+ processedText += normalizedText.substring(currentPos, currentPos + advanceBy);
361
+ currentPos += advanceBy;
289
362
  } else {
290
363
  // No match found, move to next character/word boundary
291
364
  const nextWordBoundary = normalizedText.substring(currentPos).search(/[\s\p{P}]/u);