codexparser 0.1.82 → 0.1.83
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/CodexParser.js +60 -121
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.83",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -85,6 +85,11 @@ class CodexParser {
|
|
|
85
85
|
return singleChapterBook ? singleChapterBook[book][chapter] || [] : this.chapterVerses[book]?.[chapter] || []
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
+
/**
|
|
89
|
+
* Scans text for scripture references and stores them in `this.found`.
|
|
90
|
+
* @param {string} text - The text to scan.
|
|
91
|
+
* @returns {CodexParser} The parser instance for method chaining.
|
|
92
|
+
*/
|
|
88
93
|
/**
|
|
89
94
|
* Scans text for scripture references and stores them in `this.found`.
|
|
90
95
|
* @param {string} text - The text to scan.
|
|
@@ -94,74 +99,53 @@ class CodexParser {
|
|
|
94
99
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
95
100
|
const abbreviations = Object.keys(this.abbreviations)
|
|
96
101
|
this.found = []
|
|
97
|
-
//
|
|
98
|
-
let normalizedText = text
|
|
99
|
-
.replace(/[“”]/g, "") // Remove curly quotes
|
|
100
|
-
.replace(/\.(?=\d)/g, ":") // Replace periods before digits with colons (e.g., "Re13.8" -> "Re13:8")
|
|
101
|
-
.replace(/\s+/g, " ") // Normalize multiple spaces to single
|
|
102
|
+
// Minimal normalization: fix periods before numbers, remove trailing periods
|
|
103
|
+
let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
|
|
102
104
|
const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
|
|
103
105
|
const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
|
|
104
106
|
const lowerCaseText = normalizedText.toLowerCase()
|
|
105
107
|
let i = 0
|
|
106
108
|
|
|
107
|
-
const isValidChapterVerseChar = (char) => /[
|
|
109
|
+
const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
|
|
108
110
|
const isNextBibleBook = (startIndex) => {
|
|
109
111
|
const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
|
|
110
|
-
// Check if the text starts with a book name or abbreviation followed by a digit
|
|
111
112
|
return (
|
|
112
|
-
lowercaseBibleFullNames.some((book) =>
|
|
113
|
-
|
|
114
|
-
const nextIndex = startIndex + book.length
|
|
115
|
-
const nextChar = lowerCaseText[nextIndex]
|
|
116
|
-
return nextChar && /\d/.test(nextChar)
|
|
117
|
-
}
|
|
118
|
-
return false
|
|
119
|
-
}) ||
|
|
120
|
-
lowercaseBibleAbbreviations.some((abbr) => {
|
|
121
|
-
if (textAfterCurrentPosition.startsWith(abbr)) {
|
|
122
|
-
const nextIndex = startIndex + abbr.length
|
|
123
|
-
const nextChar = lowerCaseText[nextIndex]
|
|
124
|
-
return nextChar && (/\d/.test(nextChar) || /\./.test(nextChar))
|
|
125
|
-
}
|
|
126
|
-
return false
|
|
127
|
-
})
|
|
113
|
+
lowercaseBibleFullNames.some((book) => textAfterCurrentPosition.startsWith(book)) ||
|
|
114
|
+
lowercaseBibleAbbreviations.some((abbr) => textAfterCurrentPosition.startsWith(abbr))
|
|
128
115
|
)
|
|
129
116
|
}
|
|
130
|
-
const detectSuffix = (startIndex
|
|
131
|
-
const suffixMatch =
|
|
132
|
-
return suffixMatch ? {
|
|
117
|
+
const detectSuffix = (startIndex) => {
|
|
118
|
+
const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
|
|
119
|
+
return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
|
|
133
120
|
}
|
|
134
121
|
|
|
135
122
|
while (i < lowerCaseText.length) {
|
|
136
123
|
let foundBook = null
|
|
124
|
+
let startIndex = -1
|
|
137
125
|
let matchedLength = 0
|
|
138
|
-
let originalBookText = ""
|
|
139
|
-
let startIndex = i
|
|
140
126
|
|
|
141
|
-
//
|
|
127
|
+
// Skip whitespace and special characters before checking for book
|
|
128
|
+
while (i < lowerCaseText.length && /[\s—-]/.test(lowerCaseText[i])) {
|
|
129
|
+
i++
|
|
130
|
+
}
|
|
131
|
+
if (i >= lowerCaseText.length) break
|
|
132
|
+
|
|
142
133
|
for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
|
|
143
134
|
const book = lowercaseBibleFullNames[j]
|
|
144
|
-
if (
|
|
145
|
-
lowerCaseText.startsWith(book, i) &&
|
|
146
|
-
(i + book.length >= lowerCaseText.length || /\d/.test(lowerCaseText[i + book.length]))
|
|
147
|
-
) {
|
|
135
|
+
if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
|
|
148
136
|
foundBook = fullNames[j]
|
|
137
|
+
startIndex = i
|
|
149
138
|
matchedLength = book.length
|
|
150
|
-
originalBookText = text.slice(i, i + book.length)
|
|
151
139
|
}
|
|
152
140
|
}
|
|
153
141
|
|
|
154
|
-
// Check abbreviations
|
|
155
142
|
if (!foundBook) {
|
|
156
143
|
for (let k = 0; k < lowercaseBibleAbbreviations.length; k++) {
|
|
157
144
|
const abbreviation = lowercaseBibleAbbreviations[k]
|
|
158
|
-
|
|
159
|
-
const regex = new RegExp(`^${abbrPattern}(\\.?\\s*\\d)`, "i")
|
|
160
|
-
const match = lowerCaseText.slice(i).match(regex)
|
|
161
|
-
if (match) {
|
|
145
|
+
if (lowerCaseText.startsWith(abbreviation, i) && abbreviation.length > matchedLength) {
|
|
162
146
|
foundBook = this.abbreviations[abbreviations[k]]
|
|
163
|
-
|
|
164
|
-
|
|
147
|
+
startIndex = i
|
|
148
|
+
matchedLength = abbreviation.length
|
|
165
149
|
}
|
|
166
150
|
}
|
|
167
151
|
}
|
|
@@ -169,90 +153,52 @@ class CodexParser {
|
|
|
169
153
|
if (foundBook) {
|
|
170
154
|
i += matchedLength
|
|
171
155
|
let chapterVerse = ""
|
|
172
|
-
let originalChapterVerseText = ""
|
|
173
156
|
const references = []
|
|
157
|
+
const startOfReference = startIndex
|
|
174
158
|
|
|
175
|
-
// Capture chapter-verse until a letter (potential new book) or semicolon
|
|
176
159
|
while (i < normalizedText.length && isValidChapterVerseChar(normalizedText[i])) {
|
|
177
|
-
if (isNextBibleBook(i))
|
|
178
|
-
break
|
|
179
|
-
}
|
|
160
|
+
if (isNextBibleBook(i)) break
|
|
180
161
|
if (normalizedText[i] === ";") {
|
|
181
|
-
const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9
|
|
182
|
-
if (formattedReference)
|
|
183
|
-
// Find the last digit in the reference
|
|
184
|
-
const lastDigitMatch = formattedReference.match(/\d(?=[^0-9]*$)/)
|
|
185
|
-
let endIndex = i - 1 // Default to position before semicolon
|
|
186
|
-
if (lastDigitMatch) {
|
|
187
|
-
const lastDigitIndex = formattedReference.lastIndexOf(lastDigitMatch[0])
|
|
188
|
-
endIndex = startIndex + matchedLength + lastDigitIndex
|
|
189
|
-
}
|
|
190
|
-
references.push({
|
|
191
|
-
reference: formattedReference,
|
|
192
|
-
originalText: (originalBookText + originalChapterVerseText).trim(),
|
|
193
|
-
startIndex,
|
|
194
|
-
endIndex,
|
|
195
|
-
})
|
|
196
|
-
}
|
|
162
|
+
const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
|
|
163
|
+
if (formattedReference) references.push(formattedReference)
|
|
197
164
|
chapterVerse = ""
|
|
198
|
-
originalChapterVerseText = ""
|
|
199
|
-
originalBookText = foundBook // Reuse book for semicolon-separated references
|
|
200
|
-
startIndex = i + 1 // Start of next reference
|
|
201
165
|
i++
|
|
202
166
|
continue
|
|
203
167
|
}
|
|
204
168
|
chapterVerse += normalizedText[i]
|
|
205
|
-
originalChapterVerseText += text[i]
|
|
206
169
|
i++
|
|
207
170
|
}
|
|
208
171
|
|
|
209
|
-
// Add any remaining reference
|
|
210
172
|
if (chapterVerse.trim().length > 0) {
|
|
211
|
-
const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9
|
|
212
|
-
if (formattedReference)
|
|
213
|
-
// Find the last digit in the reference
|
|
214
|
-
const lastDigitMatch = formattedReference.match(/\d(?=[^0-9]*$)/)
|
|
215
|
-
let endIndex = i - 1 // Default to last character
|
|
216
|
-
if (lastDigitMatch) {
|
|
217
|
-
const lastDigitIndex = formattedReference.lastIndexOf(lastDigitMatch[0])
|
|
218
|
-
endIndex = startIndex + matchedLength + lastDigitIndex
|
|
219
|
-
}
|
|
220
|
-
references.push({
|
|
221
|
-
reference: formattedReference,
|
|
222
|
-
originalText: (originalBookText + originalChapterVerseText).trim(),
|
|
223
|
-
startIndex,
|
|
224
|
-
endIndex,
|
|
225
|
-
})
|
|
226
|
-
}
|
|
173
|
+
const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
|
|
174
|
+
if (formattedReference) references.push(formattedReference)
|
|
227
175
|
}
|
|
228
176
|
|
|
229
|
-
//
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
originalText += ` ${version}`
|
|
238
|
-
i += suffix.length
|
|
239
|
-
// Update endIndex if version suffix follows a digit
|
|
240
|
-
if (refObj.endIndex === i - suffix.length - 1) {
|
|
241
|
-
refObj.endIndex = i - 1
|
|
242
|
-
}
|
|
243
|
-
}
|
|
177
|
+
// Set endIndex to the current position
|
|
178
|
+
let endIndex = i
|
|
179
|
+
const suffixData = detectSuffix(i)
|
|
180
|
+
const suffix = suffixData ? suffixData.suffix : null
|
|
181
|
+
if (suffixData) {
|
|
182
|
+
endIndex += suffixData.length
|
|
183
|
+
i += suffixData.length
|
|
184
|
+
}
|
|
244
185
|
|
|
186
|
+
// Trim endIndex to exclude trailing whitespace or non-reference characters
|
|
187
|
+
while (endIndex > startOfReference && /[\s]/.test(normalizedText[endIndex - 1])) {
|
|
188
|
+
endIndex--
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
references.forEach((ref) => {
|
|
245
192
|
let type
|
|
246
|
-
|
|
247
|
-
if (this.config.booksOnly && !ref) {
|
|
248
|
-
type = "book_only"
|
|
249
|
-
} else if (ref.includes(":")) {
|
|
193
|
+
if (ref.includes(":")) {
|
|
250
194
|
if (ref.includes("-")) {
|
|
251
|
-
const [start, end] = ref.split("-")
|
|
252
|
-
const startParts = start.split(":")
|
|
253
|
-
const endParts = end.split(":")
|
|
195
|
+
const [start, end] = ref.split("-")
|
|
196
|
+
const startParts = start.split(":")
|
|
197
|
+
const endParts = end.split(":")
|
|
254
198
|
type =
|
|
255
|
-
startParts.length > 1 &&
|
|
199
|
+
startParts.length > 1 &&
|
|
200
|
+
endParts.length > 1 &&
|
|
201
|
+
startParts[0].trim() !== endParts[0].trim()
|
|
256
202
|
? "multi_chapter_verse_range"
|
|
257
203
|
: "chapter_verse_range"
|
|
258
204
|
} else if (ref.includes(",")) {
|
|
@@ -262,32 +208,25 @@ class CodexParser {
|
|
|
262
208
|
}
|
|
263
209
|
} else if (ref.includes("-")) {
|
|
264
210
|
type = "chapter_range"
|
|
265
|
-
} else if (/\d/.test(ref)) {
|
|
266
|
-
type = "single_chapter"
|
|
267
211
|
} else {
|
|
268
|
-
type = "
|
|
212
|
+
type = "single_chapter"
|
|
269
213
|
}
|
|
270
214
|
|
|
271
|
-
|
|
215
|
+
this.found.push({
|
|
272
216
|
book: foundBook,
|
|
273
217
|
reference: ref,
|
|
274
|
-
|
|
218
|
+
startIndex: startOfReference + 1,
|
|
219
|
+
endIndex: endIndex + 1,
|
|
220
|
+
version: suffix || null,
|
|
275
221
|
type,
|
|
276
|
-
originalText,
|
|
277
|
-
|
|
278
|
-
endIndex: refObj.endIndex,
|
|
279
|
-
}
|
|
280
|
-
this.found.push(referenceObj)
|
|
222
|
+
originalText: text.slice(startOfReference, endIndex), // Use original text
|
|
223
|
+
})
|
|
281
224
|
})
|
|
282
|
-
|
|
283
|
-
// Skip any trailing spaces after the reference
|
|
284
|
-
while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
|
|
285
|
-
i++
|
|
286
|
-
}
|
|
287
225
|
} else {
|
|
288
226
|
i++
|
|
289
227
|
}
|
|
290
228
|
}
|
|
229
|
+
|
|
291
230
|
return this
|
|
292
231
|
}
|
|
293
232
|
|