codexparser 0.1.81 → 0.1.83
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/CodexParser.js +59 -131
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.83",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -85,6 +85,11 @@ class CodexParser {
|
|
|
85
85
|
return singleChapterBook ? singleChapterBook[book][chapter] || [] : this.chapterVerses[book]?.[chapter] || []
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
+
/**
|
|
89
|
+
* Scans text for scripture references and stores them in `this.found`.
|
|
90
|
+
* @param {string} text - The text to scan.
|
|
91
|
+
* @returns {CodexParser} The parser instance for method chaining.
|
|
92
|
+
*/
|
|
88
93
|
/**
|
|
89
94
|
* Scans text for scripture references and stores them in `this.found`.
|
|
90
95
|
* @param {string} text - The text to scan.
|
|
@@ -94,81 +99,53 @@ class CodexParser {
|
|
|
94
99
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
95
100
|
const abbreviations = Object.keys(this.abbreviations)
|
|
96
101
|
this.found = []
|
|
97
|
-
//
|
|
98
|
-
let normalizedText = text
|
|
99
|
-
.replace(/[“”]/g, "") // Remove curly quotes
|
|
100
|
-
.replace(/\.(?=\d)/g, ":") // Replace periods before digits with colons (e.g., "Re13.8" -> "Re13:8")
|
|
101
|
-
.replace(/\s+/g, " ") // Normalize multiple spaces to single
|
|
102
|
+
// Minimal normalization: fix periods before numbers, remove trailing periods
|
|
103
|
+
let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
|
|
102
104
|
const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
|
|
103
105
|
const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
|
|
104
106
|
const lowerCaseText = normalizedText.toLowerCase()
|
|
105
107
|
let i = 0
|
|
106
108
|
|
|
107
|
-
|
|
108
|
-
console.log("[Scan] Normalized text:", normalizedText)
|
|
109
|
-
|
|
110
|
-
const isValidChapterVerseChar = (char) => /[^A-Za-z]/.test(char) // Non-letter characters
|
|
109
|
+
const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
|
|
111
110
|
const isNextBibleBook = (startIndex) => {
|
|
112
111
|
const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
|
|
113
|
-
// Check if the text starts with a book name or abbreviation followed by a digit
|
|
114
112
|
return (
|
|
115
|
-
lowercaseBibleFullNames.some((book) =>
|
|
116
|
-
|
|
117
|
-
const nextIndex = startIndex + book.length
|
|
118
|
-
const nextChar = lowerCaseText[nextIndex]
|
|
119
|
-
return nextChar && /\d/.test(nextChar)
|
|
120
|
-
}
|
|
121
|
-
return false
|
|
122
|
-
}) ||
|
|
123
|
-
lowercaseBibleAbbreviations.some((abbr) => {
|
|
124
|
-
if (textAfterCurrentPosition.startsWith(abbr)) {
|
|
125
|
-
const nextIndex = startIndex + abbr.length
|
|
126
|
-
const nextChar = lowerCaseText[nextIndex]
|
|
127
|
-
return nextChar && (/\d/.test(nextChar) || /\./.test(nextChar))
|
|
128
|
-
}
|
|
129
|
-
return false
|
|
130
|
-
})
|
|
113
|
+
lowercaseBibleFullNames.some((book) => textAfterCurrentPosition.startsWith(book)) ||
|
|
114
|
+
lowercaseBibleAbbreviations.some((abbr) => textAfterCurrentPosition.startsWith(abbr))
|
|
131
115
|
)
|
|
132
116
|
}
|
|
133
|
-
const detectSuffix = (startIndex
|
|
134
|
-
const suffixMatch =
|
|
135
|
-
return suffixMatch ? {
|
|
117
|
+
const detectSuffix = (startIndex) => {
|
|
118
|
+
const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
|
|
119
|
+
return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
|
|
136
120
|
}
|
|
137
121
|
|
|
138
122
|
while (i < lowerCaseText.length) {
|
|
139
123
|
let foundBook = null
|
|
124
|
+
let startIndex = -1
|
|
140
125
|
let matchedLength = 0
|
|
141
|
-
let originalBookText = ""
|
|
142
|
-
let startIndex = i
|
|
143
126
|
|
|
144
|
-
//
|
|
127
|
+
// Skip whitespace and special characters before checking for book
|
|
128
|
+
while (i < lowerCaseText.length && /[\s—-]/.test(lowerCaseText[i])) {
|
|
129
|
+
i++
|
|
130
|
+
}
|
|
131
|
+
if (i >= lowerCaseText.length) break
|
|
132
|
+
|
|
145
133
|
for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
|
|
146
134
|
const book = lowercaseBibleFullNames[j]
|
|
147
|
-
if (
|
|
148
|
-
lowerCaseText.startsWith(book, i) &&
|
|
149
|
-
(i + book.length >= lowerCaseText.length || /\d/.test(lowerCaseText[i + book.length]))
|
|
150
|
-
) {
|
|
135
|
+
if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
|
|
151
136
|
foundBook = fullNames[j]
|
|
137
|
+
startIndex = i
|
|
152
138
|
matchedLength = book.length
|
|
153
|
-
originalBookText = text.slice(i, i + book.length)
|
|
154
|
-
console.log(`[Scan] Matched full book name: "${foundBook}" at index ${i}`)
|
|
155
139
|
}
|
|
156
140
|
}
|
|
157
141
|
|
|
158
|
-
// Check abbreviations
|
|
159
142
|
if (!foundBook) {
|
|
160
143
|
for (let k = 0; k < lowercaseBibleAbbreviations.length; k++) {
|
|
161
144
|
const abbreviation = lowercaseBibleAbbreviations[k]
|
|
162
|
-
|
|
163
|
-
const regex = new RegExp(`^${abbrPattern}(\\.?\\s*\\d)`, "i")
|
|
164
|
-
const match = lowerCaseText.slice(i).match(regex)
|
|
165
|
-
if (match) {
|
|
145
|
+
if (lowerCaseText.startsWith(abbreviation, i) && abbreviation.length > matchedLength) {
|
|
166
146
|
foundBook = this.abbreviations[abbreviations[k]]
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
console.log(
|
|
170
|
-
`[Scan] Matched abbreviation: "${abbreviations[k]}" -> "${foundBook}" at index ${i}`
|
|
171
|
-
)
|
|
147
|
+
startIndex = i
|
|
148
|
+
matchedLength = abbreviation.length
|
|
172
149
|
}
|
|
173
150
|
}
|
|
174
151
|
}
|
|
@@ -176,91 +153,52 @@ class CodexParser {
|
|
|
176
153
|
if (foundBook) {
|
|
177
154
|
i += matchedLength
|
|
178
155
|
let chapterVerse = ""
|
|
179
|
-
let originalChapterVerseText = ""
|
|
180
156
|
const references = []
|
|
157
|
+
const startOfReference = startIndex
|
|
181
158
|
|
|
182
|
-
// Capture chapter-verse until a letter (potential new book) or semicolon
|
|
183
159
|
while (i < normalizedText.length && isValidChapterVerseChar(normalizedText[i])) {
|
|
184
|
-
if (isNextBibleBook(i))
|
|
185
|
-
console.log(`[Scan] Detected next book at index ${i}, breaking`)
|
|
186
|
-
break
|
|
187
|
-
}
|
|
160
|
+
if (isNextBibleBook(i)) break
|
|
188
161
|
if (normalizedText[i] === ";") {
|
|
189
|
-
const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9
|
|
190
|
-
if (formattedReference)
|
|
191
|
-
// Find the last digit in the reference
|
|
192
|
-
const lastDigitMatch = formattedReference.match(/\d(?=[^0-9]*$)/)
|
|
193
|
-
let endIndex = i - 1 // Default to position before semicolon
|
|
194
|
-
if (lastDigitMatch) {
|
|
195
|
-
const lastDigitIndex = formattedReference.lastIndexOf(lastDigitMatch[0])
|
|
196
|
-
endIndex = startIndex + matchedLength + lastDigitIndex
|
|
197
|
-
}
|
|
198
|
-
references.push({
|
|
199
|
-
reference: formattedReference,
|
|
200
|
-
originalText: (originalBookText + originalChapterVerseText).trim(),
|
|
201
|
-
startIndex,
|
|
202
|
-
endIndex,
|
|
203
|
-
})
|
|
204
|
-
}
|
|
162
|
+
const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
|
|
163
|
+
if (formattedReference) references.push(formattedReference)
|
|
205
164
|
chapterVerse = ""
|
|
206
|
-
originalChapterVerseText = ""
|
|
207
|
-
originalBookText = foundBook // Reuse book for semicolon-separated references
|
|
208
|
-
startIndex = i + 1 // Start of next reference
|
|
209
165
|
i++
|
|
210
166
|
continue
|
|
211
167
|
}
|
|
212
168
|
chapterVerse += normalizedText[i]
|
|
213
|
-
originalChapterVerseText += text[i]
|
|
214
169
|
i++
|
|
215
170
|
}
|
|
216
171
|
|
|
217
|
-
// Add any remaining reference
|
|
218
172
|
if (chapterVerse.trim().length > 0) {
|
|
219
|
-
const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9
|
|
220
|
-
if (formattedReference)
|
|
221
|
-
// Find the last digit in the reference
|
|
222
|
-
const lastDigitMatch = formattedReference.match(/\d(?=[^0-9]*$)/)
|
|
223
|
-
let endIndex = i - 1 // Default to last character
|
|
224
|
-
if (lastDigitMatch) {
|
|
225
|
-
const lastDigitIndex = formattedReference.lastIndexOf(lastDigitMatch[0])
|
|
226
|
-
endIndex = startIndex + matchedLength + lastDigitIndex
|
|
227
|
-
}
|
|
228
|
-
references.push({
|
|
229
|
-
reference: formattedReference,
|
|
230
|
-
originalText: (originalBookText + originalChapterVerseText).trim(),
|
|
231
|
-
startIndex,
|
|
232
|
-
endIndex,
|
|
233
|
-
})
|
|
234
|
-
}
|
|
173
|
+
const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
|
|
174
|
+
if (formattedReference) references.push(formattedReference)
|
|
235
175
|
}
|
|
236
176
|
|
|
237
|
-
//
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
}
|
|
251
|
-
}
|
|
177
|
+
// Set endIndex to the current position
|
|
178
|
+
let endIndex = i
|
|
179
|
+
const suffixData = detectSuffix(i)
|
|
180
|
+
const suffix = suffixData ? suffixData.suffix : null
|
|
181
|
+
if (suffixData) {
|
|
182
|
+
endIndex += suffixData.length
|
|
183
|
+
i += suffixData.length
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Trim endIndex to exclude trailing whitespace or non-reference characters
|
|
187
|
+
while (endIndex > startOfReference && /[\s]/.test(normalizedText[endIndex - 1])) {
|
|
188
|
+
endIndex--
|
|
189
|
+
}
|
|
252
190
|
|
|
191
|
+
references.forEach((ref) => {
|
|
253
192
|
let type
|
|
254
|
-
|
|
255
|
-
if (this.config.booksOnly && !ref) {
|
|
256
|
-
type = "book_only"
|
|
257
|
-
} else if (ref.includes(":")) {
|
|
193
|
+
if (ref.includes(":")) {
|
|
258
194
|
if (ref.includes("-")) {
|
|
259
|
-
const [start, end] = ref.split("-")
|
|
260
|
-
const startParts = start.split(":")
|
|
261
|
-
const endParts = end.split(":")
|
|
195
|
+
const [start, end] = ref.split("-")
|
|
196
|
+
const startParts = start.split(":")
|
|
197
|
+
const endParts = end.split(":")
|
|
262
198
|
type =
|
|
263
|
-
startParts.length > 1 &&
|
|
199
|
+
startParts.length > 1 &&
|
|
200
|
+
endParts.length > 1 &&
|
|
201
|
+
startParts[0].trim() !== endParts[0].trim()
|
|
264
202
|
? "multi_chapter_verse_range"
|
|
265
203
|
: "chapter_verse_range"
|
|
266
204
|
} else if (ref.includes(",")) {
|
|
@@ -270,35 +208,25 @@ class CodexParser {
|
|
|
270
208
|
}
|
|
271
209
|
} else if (ref.includes("-")) {
|
|
272
210
|
type = "chapter_range"
|
|
273
|
-
} else if (/\d/.test(ref)) {
|
|
274
|
-
type = "single_chapter"
|
|
275
211
|
} else {
|
|
276
|
-
type = "
|
|
212
|
+
type = "single_chapter"
|
|
277
213
|
}
|
|
278
214
|
|
|
279
|
-
|
|
215
|
+
this.found.push({
|
|
280
216
|
book: foundBook,
|
|
281
217
|
reference: ref,
|
|
282
|
-
|
|
218
|
+
startIndex: startOfReference + 1,
|
|
219
|
+
endIndex: endIndex + 1,
|
|
220
|
+
version: suffix || null,
|
|
283
221
|
type,
|
|
284
|
-
originalText,
|
|
285
|
-
|
|
286
|
-
endIndex: refObj.endIndex,
|
|
287
|
-
}
|
|
288
|
-
this.found.push(referenceObj)
|
|
289
|
-
console.log(`[Scan] Stored reference: ${JSON.stringify(referenceObj)}`)
|
|
222
|
+
originalText: text.slice(startOfReference, endIndex), // Use original text
|
|
223
|
+
})
|
|
290
224
|
})
|
|
291
|
-
|
|
292
|
-
// Skip any trailing spaces after the reference
|
|
293
|
-
while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
|
|
294
|
-
i++
|
|
295
|
-
}
|
|
296
225
|
} else {
|
|
297
226
|
i++
|
|
298
227
|
}
|
|
299
228
|
}
|
|
300
229
|
|
|
301
|
-
console.log("[Scan] Final found references:", JSON.stringify(this.found, null, 2))
|
|
302
230
|
return this
|
|
303
231
|
}
|
|
304
232
|
|