codexparser 0.1.62 → 0.1.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/CodexParser.js +35 -21
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.63",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -74,16 +74,14 @@ class CodexParser {
|
|
|
74
74
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
75
75
|
const abbreviations = Object.keys(this.abbreviations)
|
|
76
76
|
this.found = []
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
.replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
|
|
80
|
-
.replace(/\s+/g, " ")
|
|
77
|
+
// Minimal normalization: fix periods before numbers, remove trailing periods
|
|
78
|
+
let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
|
|
81
79
|
const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
|
|
82
80
|
const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
|
|
83
81
|
const lowerCaseText = normalizedText.toLowerCase()
|
|
84
82
|
let i = 0
|
|
85
83
|
|
|
86
|
-
const isValidChapterVerseChar = (char) => /[
|
|
84
|
+
const isValidChapterVerseChar = (char) => /[\d:,\-;]/.test(char)
|
|
87
85
|
const isNextBibleBook = (startIndex) => {
|
|
88
86
|
const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
|
|
89
87
|
return (
|
|
@@ -101,6 +99,12 @@ class CodexParser {
|
|
|
101
99
|
let startIndex = -1
|
|
102
100
|
let matchedLength = 0
|
|
103
101
|
|
|
102
|
+
// Skip non-alphabetic characters (e.g., \n, —, spaces) before book
|
|
103
|
+
while (i < lowerCaseText.length && !/[A-Za-z]/.test(lowerCaseText[i])) {
|
|
104
|
+
i++
|
|
105
|
+
}
|
|
106
|
+
if (i >= lowerCaseText.length) break
|
|
107
|
+
|
|
104
108
|
for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
|
|
105
109
|
const book = lowercaseBibleFullNames[j]
|
|
106
110
|
if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
|
|
@@ -127,10 +131,14 @@ class CodexParser {
|
|
|
127
131
|
const references = []
|
|
128
132
|
const startOfReference = startIndex
|
|
129
133
|
|
|
130
|
-
|
|
134
|
+
// Capture chapter/verse, allowing spaces between book and reference
|
|
135
|
+
while (
|
|
136
|
+
i < normalizedText.length &&
|
|
137
|
+
(isValidChapterVerseChar(normalizedText[i]) || normalizedText[i] === " ")
|
|
138
|
+
) {
|
|
131
139
|
if (isNextBibleBook(i)) break
|
|
132
140
|
if (normalizedText[i] === ";") {
|
|
133
|
-
const formattedReference = chapterVerse.trim()
|
|
141
|
+
const formattedReference = chapterVerse.trim()
|
|
134
142
|
if (formattedReference) references.push(formattedReference)
|
|
135
143
|
chapterVerse = ""
|
|
136
144
|
i++
|
|
@@ -141,23 +149,29 @@ class CodexParser {
|
|
|
141
149
|
}
|
|
142
150
|
|
|
143
151
|
if (chapterVerse.trim().length > 0) {
|
|
144
|
-
const formattedReference = chapterVerse.trim()
|
|
152
|
+
const formattedReference = chapterVerse.trim()
|
|
145
153
|
if (formattedReference) references.push(formattedReference)
|
|
146
154
|
}
|
|
147
155
|
|
|
156
|
+
// Set endIndex to the current position
|
|
157
|
+
let endIndex = i
|
|
148
158
|
const suffixData = detectSuffix(i)
|
|
149
159
|
const suffix = suffixData ? suffixData.suffix : null
|
|
150
|
-
let endIndex = i // Set endIndex before suffix
|
|
151
160
|
if (suffixData) {
|
|
152
|
-
endIndex += suffixData.length
|
|
153
|
-
i += suffixData.length
|
|
161
|
+
endIndex += suffixData.length
|
|
162
|
+
i += suffixData.length
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Trim endIndex to exclude trailing non-reference characters
|
|
166
|
+
while (endIndex > startOfReference && /[^A-Za-z0-9]/.test(normalizedText[endIndex - 1])) {
|
|
167
|
+
endIndex--
|
|
154
168
|
}
|
|
155
169
|
|
|
156
|
-
references.forEach((
|
|
170
|
+
references.forEach((reference) => {
|
|
157
171
|
let type
|
|
158
|
-
if (
|
|
159
|
-
if (
|
|
160
|
-
const [start, end] =
|
|
172
|
+
if (reference.includes(":")) {
|
|
173
|
+
if (reference.includes("-")) {
|
|
174
|
+
const [start, end] = reference.split("-")
|
|
161
175
|
const startParts = start.split(":")
|
|
162
176
|
const endParts = end.split(":")
|
|
163
177
|
type =
|
|
@@ -166,12 +180,12 @@ class CodexParser {
|
|
|
166
180
|
startParts[0].trim() !== endParts[0].trim()
|
|
167
181
|
? "multi_chapter_verse_range"
|
|
168
182
|
: "chapter_verse_range"
|
|
169
|
-
} else if (
|
|
183
|
+
} else if (reference.includes(",")) {
|
|
170
184
|
type = "comma_separated_verses"
|
|
171
185
|
} else {
|
|
172
186
|
type = "chapter_verse"
|
|
173
187
|
}
|
|
174
|
-
} else if (
|
|
188
|
+
} else if (reference.includes("-")) {
|
|
175
189
|
type = "chapter_range"
|
|
176
190
|
} else {
|
|
177
191
|
type = "single_chapter"
|
|
@@ -179,12 +193,12 @@ class CodexParser {
|
|
|
179
193
|
|
|
180
194
|
this.found.push({
|
|
181
195
|
book: foundBook,
|
|
182
|
-
reference:
|
|
183
|
-
startIndex: startOfReference,
|
|
184
|
-
endIndex: endIndex,
|
|
196
|
+
reference: reference,
|
|
197
|
+
startIndex: startOfReference + 1,
|
|
198
|
+
endIndex: endIndex + 1,
|
|
185
199
|
version: suffix || null,
|
|
186
200
|
type,
|
|
187
|
-
originalText:
|
|
201
|
+
originalText: text.slice(startOfReference, endIndex),
|
|
188
202
|
})
|
|
189
203
|
})
|
|
190
204
|
} else {
|