twl-generator 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/utils/twl-matcher.js +12 -11
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "twl-generator",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.2",
|
|
4
4
|
"description": "Generate term-to-article lists from unfoldingWord en_tw archive for Bible books. Works in both Node.js (CLI) and React.js (browser) environments.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"bin": {
|
package/src/utils/twl-matcher.js
CHANGED
|
@@ -116,24 +116,24 @@ class PrefixTrie {
|
|
|
116
116
|
|
|
117
117
|
findMatches(text, startPos) {
|
|
118
118
|
// First try exact case matches
|
|
119
|
-
let matches = this._findMatchesInTree(this.exactCaseRoot, text, startPos, true);
|
|
119
|
+
let matches = this._findMatchesInTree(this.exactCaseRoot, text, startPos, true, text);
|
|
120
120
|
|
|
121
121
|
// If no exact case matches, try case-insensitive
|
|
122
122
|
if (matches.length === 0) {
|
|
123
|
-
matches = this._findMatchesInTree(this.lowerCaseRoot, text.toLowerCase(), startPos, false);
|
|
123
|
+
matches = this._findMatchesInTree(this.lowerCaseRoot, text.toLowerCase(), startPos, false, text);
|
|
124
124
|
}
|
|
125
125
|
|
|
126
126
|
return matches;
|
|
127
127
|
}
|
|
128
128
|
|
|
129
|
-
_findMatchesInTree(root,
|
|
129
|
+
_findMatchesInTree(root, searchText, startPos, isExactCase, originalText) {
|
|
130
130
|
const matches = [];
|
|
131
131
|
let node = root;
|
|
132
132
|
let currentPos = startPos;
|
|
133
133
|
|
|
134
134
|
// Try to match as long as possible
|
|
135
|
-
while (currentPos <
|
|
136
|
-
const char =
|
|
135
|
+
while (currentPos < searchText.length) {
|
|
136
|
+
const char = searchText[currentPos];
|
|
137
137
|
|
|
138
138
|
if (!node[char]) {
|
|
139
139
|
break; // No more matches possible
|
|
@@ -145,16 +145,17 @@ class PrefixTrie {
|
|
|
145
145
|
// If we found terms at this position, collect them
|
|
146
146
|
if (node._terms) {
|
|
147
147
|
const matchLength = currentPos - startPos;
|
|
148
|
-
|
|
148
|
+
// Always extract from the original text to preserve case
|
|
149
|
+
const originalMatchedText = originalText.substring(startPos, currentPos);
|
|
149
150
|
|
|
150
151
|
// Check if this is a valid word boundary match (both start and end)
|
|
151
152
|
const isStartBoundary = startPos === 0 ||
|
|
152
|
-
/[\s\p{P}]/.test(
|
|
153
|
-
!/[\w]/.test(
|
|
153
|
+
/[\s\p{P}]/.test(originalText[startPos - 1]) ||
|
|
154
|
+
!/[\w]/.test(originalText[startPos - 1]);
|
|
154
155
|
|
|
155
|
-
const isEndBoundary = currentPos >=
|
|
156
|
-
/[\s\p{P}]/.test(
|
|
157
|
-
!/[\w]/.test(
|
|
156
|
+
const isEndBoundary = currentPos >= originalText.length ||
|
|
157
|
+
/[\s\p{P}]/.test(originalText[currentPos]) ||
|
|
158
|
+
!/[\w]/.test(originalText[currentPos]);
|
|
158
159
|
|
|
159
160
|
const isWordBoundary = isStartBoundary && isEndBoundary;
|
|
160
161
|
|