codexparser 0.1.78 → 0.1.79
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/CodexParser.js +99 -112
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.79",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -87,44 +87,24 @@ class CodexParser {
|
|
|
87
87
|
*/
|
|
88
88
|
scan(text) {
|
|
89
89
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
90
|
-
const abbreviations = Object.keys(this.abbreviations)
|
|
90
|
+
const abbreviations = Object.keys(this.abbreviations).filter((abbr) => abbr.length >= 3)
|
|
91
91
|
this.found = []
|
|
92
92
|
|
|
93
93
|
// Normalize text: remove curly quotes, replace periods before numbers with colons
|
|
94
|
-
let normalizedText = text
|
|
95
|
-
.replace(/[“”]/g, "") // Remove curly quotes
|
|
96
|
-
.replace(/\.(?=\d)/g, ":")
|
|
94
|
+
let normalizedText = text.replace(/[“”]/g, "").replace(/\.(?=\d)/g, ":")
|
|
97
95
|
const lowerCaseText = normalizedText.toLowerCase()
|
|
98
96
|
let i = 0
|
|
99
97
|
|
|
100
98
|
while (i < lowerCaseText.length) {
|
|
101
99
|
let foundBook = null
|
|
102
|
-
let startIndex =
|
|
100
|
+
let startIndex = i
|
|
103
101
|
let matchedLength = 0
|
|
104
|
-
let hasOpeningParen = false
|
|
105
|
-
let parenStartIndex = -1
|
|
106
|
-
|
|
107
|
-
// Skip whitespace
|
|
108
|
-
while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
|
|
109
|
-
i++
|
|
110
|
-
}
|
|
111
|
-
if (i >= lowerCaseText.length) break
|
|
112
|
-
|
|
113
|
-
// Check for opening parenthesis
|
|
114
|
-
if (i < lowerCaseText.length && lowerCaseText[i] === "(") {
|
|
115
|
-
hasOpeningParen = true
|
|
116
|
-
parenStartIndex = i
|
|
117
|
-
i++
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
// Record potential start of reference
|
|
121
|
-
startIndex = i
|
|
122
102
|
|
|
123
103
|
// Check for book names or abbreviations
|
|
124
104
|
for (let book of fullNames) {
|
|
125
105
|
if (
|
|
126
106
|
lowerCaseText.startsWith(book.toLowerCase(), i) &&
|
|
127
|
-
(i + book.length >= lowerCaseText.length || /[\s
|
|
107
|
+
(i + book.length >= lowerCaseText.length || /[\s:;]/.test(lowerCaseText[i + book.length]))
|
|
128
108
|
) {
|
|
129
109
|
foundBook = book
|
|
130
110
|
matchedLength = book.length
|
|
@@ -135,7 +115,7 @@ class CodexParser {
|
|
|
135
115
|
for (let abbr of abbreviations) {
|
|
136
116
|
if (
|
|
137
117
|
lowerCaseText.startsWith(abbr.toLowerCase(), i) &&
|
|
138
|
-
(i + abbr.length >= lowerCaseText.length || /[\s
|
|
118
|
+
(i + abbr.length >= lowerCaseText.length || /[\s:;]/.test(lowerCaseText[i + abbr.length]))
|
|
139
119
|
) {
|
|
140
120
|
foundBook = this.abbreviations[abbr]
|
|
141
121
|
matchedLength = abbr.length
|
|
@@ -145,109 +125,116 @@ class CodexParser {
|
|
|
145
125
|
}
|
|
146
126
|
|
|
147
127
|
if (foundBook) {
|
|
148
|
-
// Check if book is followed by a valid reference or version when booksOnly is false
|
|
149
|
-
let isFollowedByReference = false
|
|
150
128
|
let j = i + matchedLength
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
j++
|
|
154
|
-
}
|
|
155
|
-
// Check for digit (chapter number) or version suffix (LXX/MT)
|
|
156
|
-
if (
|
|
157
|
-
j < lowerCaseText.length &&
|
|
158
|
-
(/\d/.test(lowerCaseText[j]) || lowerCaseText.substring(j).match(/^(lxx|mt)\b/i))
|
|
159
|
-
) {
|
|
160
|
-
isFollowedByReference = true
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
if (!this.config.booksOnly && !hasOpeningParen && !isFollowedByReference) {
|
|
164
|
-
i++
|
|
165
|
-
continue
|
|
166
|
-
}
|
|
129
|
+
let currentBook = foundBook
|
|
130
|
+
let currentStartIndex = startIndex
|
|
167
131
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
chapterVerse += " "
|
|
175
|
-
i++
|
|
176
|
-
}
|
|
132
|
+
// Process multiple references for the same book
|
|
133
|
+
while (j < lowerCaseText.length) {
|
|
134
|
+
let chapterVerse = ""
|
|
135
|
+
let hasColon = false
|
|
136
|
+
let version = null
|
|
137
|
+
let refStart = j
|
|
177
138
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
139
|
+
// Skip spaces
|
|
140
|
+
while (j < lowerCaseText.length && /\s/.test(lowerCaseText[j])) {
|
|
141
|
+
chapterVerse += normalizedText[j]
|
|
142
|
+
j++
|
|
143
|
+
}
|
|
144
|
+
refStart = j // Update start after spaces
|
|
145
|
+
|
|
146
|
+
// Next character must be a digit or version suffix
|
|
147
|
+
if (j < lowerCaseText.length) {
|
|
148
|
+
const nextChar = lowerCaseText[j]
|
|
149
|
+
const isVersion = lowerCaseText.substring(j).match(/^(lxx|mt)\b/i)
|
|
150
|
+
if (!/\d/.test(nextChar) && !isVersion && !this.config.booksOnly) {
|
|
151
|
+
break
|
|
152
|
+
}
|
|
153
|
+
} else if (!this.config.booksOnly) {
|
|
154
|
+
break
|
|
155
|
+
}
|
|
184
156
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
157
|
+
// Capture chapter-verse
|
|
158
|
+
while (j < lowerCaseText.length && /\d/.test(lowerCaseText[j])) {
|
|
159
|
+
chapterVerse += normalizedText[j]
|
|
160
|
+
j++
|
|
161
|
+
}
|
|
162
|
+
while (
|
|
163
|
+
j < lowerCaseText.length &&
|
|
164
|
+
(/[\d:,\-;]/.test(normalizedText[j]) || normalizedText[j] === " ")
|
|
165
|
+
) {
|
|
166
|
+
if (normalizedText[j] === ":") hasColon = true
|
|
167
|
+
chapterVerse += normalizedText[j]
|
|
168
|
+
if (normalizedText[j] === ";") break
|
|
169
|
+
j++
|
|
170
|
+
}
|
|
192
171
|
|
|
193
|
-
//
|
|
194
|
-
|
|
172
|
+
// Check for version suffix
|
|
173
|
+
let endIndex = j
|
|
174
|
+
const suffixMatch = normalizedText.substring(j).match(/\b(LXX|MT)\b/i)
|
|
195
175
|
if (suffixMatch) {
|
|
196
176
|
version = suffixMatch[0].toUpperCase()
|
|
197
177
|
endIndex += suffixMatch[0].length
|
|
198
|
-
|
|
178
|
+
j += suffixMatch[0].length
|
|
199
179
|
}
|
|
200
180
|
|
|
201
|
-
//
|
|
202
|
-
if (hasOpeningParen && i < lowerCaseText.length && normalizedText[i] === ")") {
|
|
203
|
-
endIndex = i + 1
|
|
204
|
-
i++
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// Use original text for reference only (exclude parentheses)
|
|
208
|
-
const originalText = normalizedText.slice(startIndex, hasOpeningParen ? endIndex - 1 : endIndex)
|
|
209
|
-
|
|
210
|
-
// Determine type
|
|
211
|
-
let type
|
|
181
|
+
// Store the reference
|
|
212
182
|
const ref = chapterVerse.trim()
|
|
213
|
-
if (this.config.booksOnly
|
|
214
|
-
type
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
183
|
+
if (ref.length > 0 || version || this.config.booksOnly) {
|
|
184
|
+
let type
|
|
185
|
+
if (this.config.booksOnly && !ref) {
|
|
186
|
+
type = "book_only"
|
|
187
|
+
} else if (ref.includes(":")) {
|
|
188
|
+
if (ref.includes("-")) {
|
|
189
|
+
const [start, end] = ref.split("-")
|
|
190
|
+
const startParts = start.split(":")
|
|
191
|
+
const endParts = end.split(":")
|
|
192
|
+
type =
|
|
193
|
+
startParts.length > 1 &&
|
|
194
|
+
endParts.length > 1 &&
|
|
195
|
+
startParts[0].trim() !== endParts[0].trim()
|
|
196
|
+
? "multi_chapter_verse_range"
|
|
197
|
+
: "chapter_verse_range"
|
|
198
|
+
} else if (ref.includes(",")) {
|
|
199
|
+
type = "comma_separated_verses"
|
|
200
|
+
} else {
|
|
201
|
+
type = "chapter_verse"
|
|
202
|
+
}
|
|
203
|
+
} else if (ref.includes("-")) {
|
|
204
|
+
type = "chapter_range"
|
|
205
|
+
} else if (/\d/.test(ref)) {
|
|
206
|
+
type = "single_chapter"
|
|
228
207
|
} else {
|
|
229
|
-
type = "
|
|
208
|
+
type = "book_only"
|
|
230
209
|
}
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
210
|
+
|
|
211
|
+
this.found.push({
|
|
212
|
+
book: currentBook,
|
|
213
|
+
reference: ref,
|
|
214
|
+
startIndex: currentStartIndex,
|
|
215
|
+
endIndex,
|
|
216
|
+
version,
|
|
217
|
+
type,
|
|
218
|
+
originalText: normalizedText.slice(currentStartIndex, endIndex),
|
|
219
|
+
})
|
|
237
220
|
}
|
|
238
221
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
222
|
+
// Handle semicolon for next reference
|
|
223
|
+
if (j < lowerCaseText.length && lowerCaseText[j] === ";") {
|
|
224
|
+
j++ // Move past semicolon
|
|
225
|
+
currentStartIndex = j // Reset start for next reference
|
|
226
|
+
// Skip spaces after semicolon
|
|
227
|
+
while (j < lowerCaseText.length && /\s/.test(lowerCaseText[j])) {
|
|
228
|
+
j++
|
|
229
|
+
}
|
|
230
|
+
continue // Process next reference
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Exit if no semicolon or end of reference
|
|
234
|
+
break
|
|
250
235
|
}
|
|
236
|
+
|
|
237
|
+
i = j
|
|
251
238
|
} else {
|
|
252
239
|
i++
|
|
253
240
|
}
|