codexparser 0.1.62 → 0.1.64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/CodexParser.js +21 -11
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.64",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -74,16 +74,14 @@ class CodexParser {
|
|
|
74
74
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
75
75
|
const abbreviations = Object.keys(this.abbreviations)
|
|
76
76
|
this.found = []
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
.replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
|
|
80
|
-
.replace(/\s+/g, " ")
|
|
77
|
+
// Minimal normalization: fix periods before numbers, remove trailing periods
|
|
78
|
+
let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
|
|
81
79
|
const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
|
|
82
80
|
const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
|
|
83
81
|
const lowerCaseText = normalizedText.toLowerCase()
|
|
84
82
|
let i = 0
|
|
85
83
|
|
|
86
|
-
const isValidChapterVerseChar = (char) => /[
|
|
84
|
+
const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
|
|
87
85
|
const isNextBibleBook = (startIndex) => {
|
|
88
86
|
const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
|
|
89
87
|
return (
|
|
@@ -101,6 +99,12 @@ class CodexParser {
|
|
|
101
99
|
let startIndex = -1
|
|
102
100
|
let matchedLength = 0
|
|
103
101
|
|
|
102
|
+
// Skip whitespace and special characters before checking for book
|
|
103
|
+
while (i < lowerCaseText.length && /[\s—-]/.test(lowerCaseText[i])) {
|
|
104
|
+
i++
|
|
105
|
+
}
|
|
106
|
+
if (i >= lowerCaseText.length) break
|
|
107
|
+
|
|
104
108
|
for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
|
|
105
109
|
const book = lowercaseBibleFullNames[j]
|
|
106
110
|
if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
|
|
@@ -145,12 +149,18 @@ class CodexParser {
|
|
|
145
149
|
if (formattedReference) references.push(formattedReference)
|
|
146
150
|
}
|
|
147
151
|
|
|
152
|
+
// Set endIndex to the current position
|
|
153
|
+
let endIndex = i
|
|
148
154
|
const suffixData = detectSuffix(i)
|
|
149
155
|
const suffix = suffixData ? suffixData.suffix : null
|
|
150
|
-
let endIndex = i // Set endIndex before suffix
|
|
151
156
|
if (suffixData) {
|
|
152
|
-
endIndex += suffixData.length
|
|
153
|
-
i += suffixData.length
|
|
157
|
+
endIndex += suffixData.length
|
|
158
|
+
i += suffixData.length
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Trim endIndex to exclude trailing whitespace or non-reference characters
|
|
162
|
+
while (endIndex > startOfReference && /[\s]/.test(normalizedText[endIndex - 1])) {
|
|
163
|
+
endIndex--
|
|
154
164
|
}
|
|
155
165
|
|
|
156
166
|
references.forEach((ref) => {
|
|
@@ -180,11 +190,11 @@ class CodexParser {
|
|
|
180
190
|
this.found.push({
|
|
181
191
|
book: foundBook,
|
|
182
192
|
reference: ref,
|
|
183
|
-
startIndex: startOfReference,
|
|
184
|
-
endIndex: endIndex,
|
|
193
|
+
startIndex: startOfReference + 1,
|
|
194
|
+
endIndex: endIndex + 1,
|
|
185
195
|
version: suffix || null,
|
|
186
196
|
type,
|
|
187
|
-
originalText:
|
|
197
|
+
originalText: text.slice(startOfReference, endIndex), // Use original text
|
|
188
198
|
})
|
|
189
199
|
})
|
|
190
200
|
} else {
|