codexparser 0.1.61 → 0.1.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/CodexParser.js +35 -29
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.63",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -74,16 +74,14 @@ class CodexParser {
|
|
|
74
74
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
75
75
|
const abbreviations = Object.keys(this.abbreviations)
|
|
76
76
|
this.found = []
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
.replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
|
|
80
|
-
.replace(/\s+/g, " ")
|
|
77
|
+
// Minimal normalization: fix periods before numbers, remove trailing periods
|
|
78
|
+
let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
|
|
81
79
|
const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
|
|
82
80
|
const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
|
|
83
81
|
const lowerCaseText = normalizedText.toLowerCase()
|
|
84
82
|
let i = 0
|
|
85
83
|
|
|
86
|
-
const isValidChapterVerseChar = (char) => /[
|
|
84
|
+
const isValidChapterVerseChar = (char) => /[\d:,\-;]/.test(char)
|
|
87
85
|
const isNextBibleBook = (startIndex) => {
|
|
88
86
|
const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
|
|
89
87
|
return (
|
|
@@ -93,13 +91,7 @@ class CodexParser {
|
|
|
93
91
|
}
|
|
94
92
|
const detectSuffix = (startIndex) => {
|
|
95
93
|
const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
|
|
96
|
-
return suffixMatch
|
|
97
|
-
? {
|
|
98
|
-
suffix: suffixMatch[0].toUpperCase(),
|
|
99
|
-
length:
|
|
100
|
-
suffixMatch[0].length + (normalizedText[startIndex + suffixMatch[0].length] === " " ? 1 : 0),
|
|
101
|
-
}
|
|
102
|
-
: null
|
|
94
|
+
return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
|
|
103
95
|
}
|
|
104
96
|
|
|
105
97
|
while (i < lowerCaseText.length) {
|
|
@@ -107,6 +99,12 @@ class CodexParser {
|
|
|
107
99
|
let startIndex = -1
|
|
108
100
|
let matchedLength = 0
|
|
109
101
|
|
|
102
|
+
// Skip non-alphabetic characters (e.g., \n, —, spaces) before book
|
|
103
|
+
while (i < lowerCaseText.length && !/[A-Za-z]/.test(lowerCaseText[i])) {
|
|
104
|
+
i++
|
|
105
|
+
}
|
|
106
|
+
if (i >= lowerCaseText.length) break
|
|
107
|
+
|
|
110
108
|
for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
|
|
111
109
|
const book = lowercaseBibleFullNames[j]
|
|
112
110
|
if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
|
|
@@ -133,10 +131,14 @@ class CodexParser {
|
|
|
133
131
|
const references = []
|
|
134
132
|
const startOfReference = startIndex
|
|
135
133
|
|
|
136
|
-
|
|
134
|
+
// Capture chapter/verse, allowing spaces between book and reference
|
|
135
|
+
while (
|
|
136
|
+
i < normalizedText.length &&
|
|
137
|
+
(isValidChapterVerseChar(normalizedText[i]) || normalizedText[i] === " ")
|
|
138
|
+
) {
|
|
137
139
|
if (isNextBibleBook(i)) break
|
|
138
140
|
if (normalizedText[i] === ";") {
|
|
139
|
-
const formattedReference = chapterVerse.trim()
|
|
141
|
+
const formattedReference = chapterVerse.trim()
|
|
140
142
|
if (formattedReference) references.push(formattedReference)
|
|
141
143
|
chapterVerse = ""
|
|
142
144
|
i++
|
|
@@ -147,25 +149,29 @@ class CodexParser {
|
|
|
147
149
|
}
|
|
148
150
|
|
|
149
151
|
if (chapterVerse.trim().length > 0) {
|
|
150
|
-
const formattedReference = chapterVerse.trim()
|
|
152
|
+
const formattedReference = chapterVerse.trim()
|
|
151
153
|
if (formattedReference) references.push(formattedReference)
|
|
152
154
|
}
|
|
153
155
|
|
|
156
|
+
// Set endIndex to the current position
|
|
157
|
+
let endIndex = i
|
|
154
158
|
const suffixData = detectSuffix(i)
|
|
155
159
|
const suffix = suffixData ? suffixData.suffix : null
|
|
156
|
-
if (suffixData)
|
|
160
|
+
if (suffixData) {
|
|
161
|
+
endIndex += suffixData.length
|
|
162
|
+
i += suffixData.length
|
|
163
|
+
}
|
|
157
164
|
|
|
158
|
-
//
|
|
159
|
-
|
|
160
|
-
if (endIndex > 0 && normalizedText[endIndex - 1] === " ") {
|
|
165
|
+
// Trim endIndex to exclude trailing non-reference characters
|
|
166
|
+
while (endIndex > startOfReference && /[^A-Za-z0-9]/.test(normalizedText[endIndex - 1])) {
|
|
161
167
|
endIndex--
|
|
162
168
|
}
|
|
163
169
|
|
|
164
|
-
references.forEach((
|
|
170
|
+
references.forEach((reference) => {
|
|
165
171
|
let type
|
|
166
|
-
if (
|
|
167
|
-
if (
|
|
168
|
-
const [start, end] =
|
|
172
|
+
if (reference.includes(":")) {
|
|
173
|
+
if (reference.includes("-")) {
|
|
174
|
+
const [start, end] = reference.split("-")
|
|
169
175
|
const startParts = start.split(":")
|
|
170
176
|
const endParts = end.split(":")
|
|
171
177
|
type =
|
|
@@ -174,12 +180,12 @@ class CodexParser {
|
|
|
174
180
|
startParts[0].trim() !== endParts[0].trim()
|
|
175
181
|
? "multi_chapter_verse_range"
|
|
176
182
|
: "chapter_verse_range"
|
|
177
|
-
} else if (
|
|
183
|
+
} else if (reference.includes(",")) {
|
|
178
184
|
type = "comma_separated_verses"
|
|
179
185
|
} else {
|
|
180
186
|
type = "chapter_verse"
|
|
181
187
|
}
|
|
182
|
-
} else if (
|
|
188
|
+
} else if (reference.includes("-")) {
|
|
183
189
|
type = "chapter_range"
|
|
184
190
|
} else {
|
|
185
191
|
type = "single_chapter"
|
|
@@ -187,12 +193,12 @@ class CodexParser {
|
|
|
187
193
|
|
|
188
194
|
this.found.push({
|
|
189
195
|
book: foundBook,
|
|
190
|
-
reference:
|
|
191
|
-
startIndex: startOfReference,
|
|
192
|
-
endIndex: endIndex,
|
|
196
|
+
reference: reference,
|
|
197
|
+
startIndex: startOfReference + 1,
|
|
198
|
+
endIndex: endIndex + 1,
|
|
193
199
|
version: suffix || null,
|
|
194
200
|
type,
|
|
195
|
-
originalText:
|
|
201
|
+
originalText: text.slice(startOfReference, endIndex),
|
|
196
202
|
})
|
|
197
203
|
})
|
|
198
204
|
} else {
|