codexparser 0.1.77 → 0.1.79
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/CodexParser.js +140 -113
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.79",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -87,44 +87,24 @@ class CodexParser {
|
|
|
87
87
|
*/
|
|
88
88
|
scan(text) {
|
|
89
89
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
90
|
-
const abbreviations = Object.keys(this.abbreviations)
|
|
90
|
+
const abbreviations = Object.keys(this.abbreviations).filter((abbr) => abbr.length >= 3)
|
|
91
91
|
this.found = []
|
|
92
92
|
|
|
93
93
|
// Normalize text: remove curly quotes, replace periods before numbers with colons
|
|
94
|
-
let normalizedText = text
|
|
95
|
-
.replace(/[“”]/g, "") // Remove curly quotes
|
|
96
|
-
.replace(/\.(?=\d)/g, ":")
|
|
94
|
+
let normalizedText = text.replace(/[“”]/g, "").replace(/\.(?=\d)/g, ":")
|
|
97
95
|
const lowerCaseText = normalizedText.toLowerCase()
|
|
98
96
|
let i = 0
|
|
99
97
|
|
|
100
98
|
while (i < lowerCaseText.length) {
|
|
101
99
|
let foundBook = null
|
|
102
|
-
let startIndex =
|
|
100
|
+
let startIndex = i
|
|
103
101
|
let matchedLength = 0
|
|
104
|
-
let hasOpeningParen = false
|
|
105
|
-
let parenStartIndex = -1
|
|
106
|
-
|
|
107
|
-
// Skip whitespace
|
|
108
|
-
while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
|
|
109
|
-
i++
|
|
110
|
-
}
|
|
111
|
-
if (i >= lowerCaseText.length) break
|
|
112
|
-
|
|
113
|
-
// Check for opening parenthesis
|
|
114
|
-
if (i < lowerCaseText.length && lowerCaseText[i] === "(") {
|
|
115
|
-
hasOpeningParen = true
|
|
116
|
-
parenStartIndex = i
|
|
117
|
-
i++
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
// Record potential start of reference
|
|
121
|
-
startIndex = i
|
|
122
102
|
|
|
123
103
|
// Check for book names or abbreviations
|
|
124
104
|
for (let book of fullNames) {
|
|
125
105
|
if (
|
|
126
106
|
lowerCaseText.startsWith(book.toLowerCase(), i) &&
|
|
127
|
-
(i + book.length >= lowerCaseText.length || /[\s
|
|
107
|
+
(i + book.length >= lowerCaseText.length || /[\s:;]/.test(lowerCaseText[i + book.length]))
|
|
128
108
|
) {
|
|
129
109
|
foundBook = book
|
|
130
110
|
matchedLength = book.length
|
|
@@ -135,7 +115,7 @@ class CodexParser {
|
|
|
135
115
|
for (let abbr of abbreviations) {
|
|
136
116
|
if (
|
|
137
117
|
lowerCaseText.startsWith(abbr.toLowerCase(), i) &&
|
|
138
|
-
(i + abbr.length >= lowerCaseText.length || /[\s
|
|
118
|
+
(i + abbr.length >= lowerCaseText.length || /[\s:;]/.test(lowerCaseText[i + abbr.length]))
|
|
139
119
|
) {
|
|
140
120
|
foundBook = this.abbreviations[abbr]
|
|
141
121
|
matchedLength = abbr.length
|
|
@@ -145,105 +125,116 @@ class CodexParser {
|
|
|
145
125
|
}
|
|
146
126
|
|
|
147
127
|
if (foundBook) {
|
|
148
|
-
|
|
149
|
-
let
|
|
150
|
-
|
|
151
|
-
|
|
128
|
+
let j = i + matchedLength
|
|
129
|
+
let currentBook = foundBook
|
|
130
|
+
let currentStartIndex = startIndex
|
|
131
|
+
|
|
132
|
+
// Process multiple references for the same book
|
|
133
|
+
while (j < lowerCaseText.length) {
|
|
134
|
+
let chapterVerse = ""
|
|
135
|
+
let hasColon = false
|
|
136
|
+
let version = null
|
|
137
|
+
let refStart = j
|
|
138
|
+
|
|
152
139
|
// Skip spaces
|
|
153
140
|
while (j < lowerCaseText.length && /\s/.test(lowerCaseText[j])) {
|
|
141
|
+
chapterVerse += normalizedText[j]
|
|
154
142
|
j++
|
|
155
143
|
}
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
144
|
+
refStart = j // Update start after spaces
|
|
145
|
+
|
|
146
|
+
// Next character must be a digit or version suffix
|
|
147
|
+
if (j < lowerCaseText.length) {
|
|
148
|
+
const nextChar = lowerCaseText[j]
|
|
149
|
+
const isVersion = lowerCaseText.substring(j).match(/^(lxx|mt)\b/i)
|
|
150
|
+
if (!/\d/.test(nextChar) && !isVersion && !this.config.booksOnly) {
|
|
151
|
+
break
|
|
152
|
+
}
|
|
153
|
+
} else if (!this.config.booksOnly) {
|
|
154
|
+
break
|
|
159
155
|
}
|
|
160
|
-
} else {
|
|
161
|
-
isFollowedByReference = true // Allow if booksOnly or in parentheses
|
|
162
|
-
}
|
|
163
156
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
// Capture chapter-verse (allow digits, colons, commas, dashes, spaces)
|
|
180
|
-
while (i < lowerCaseText.length && (/[\d:,\-]/.test(normalizedText[i]) || normalizedText[i] === " ")) {
|
|
181
|
-
if (normalizedText[i] === ":") hasColon = true
|
|
182
|
-
chapterVerse += normalizedText[i]
|
|
183
|
-
i++
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
// Only proceed if valid reference or booksOnly is true
|
|
187
|
-
if ((hasColon && chapterVerse.trim().length > 0) || (this.config.booksOnly && !chapterVerse.trim())) {
|
|
188
|
-
let endIndex = i
|
|
189
|
-
let version = null
|
|
157
|
+
// Capture chapter-verse
|
|
158
|
+
while (j < lowerCaseText.length && /\d/.test(lowerCaseText[j])) {
|
|
159
|
+
chapterVerse += normalizedText[j]
|
|
160
|
+
j++
|
|
161
|
+
}
|
|
162
|
+
while (
|
|
163
|
+
j < lowerCaseText.length &&
|
|
164
|
+
(/[\d:,\-;]/.test(normalizedText[j]) || normalizedText[j] === " ")
|
|
165
|
+
) {
|
|
166
|
+
if (normalizedText[j] === ":") hasColon = true
|
|
167
|
+
chapterVerse += normalizedText[j]
|
|
168
|
+
if (normalizedText[j] === ";") break
|
|
169
|
+
j++
|
|
170
|
+
}
|
|
190
171
|
|
|
191
|
-
//
|
|
192
|
-
|
|
172
|
+
// Check for version suffix
|
|
173
|
+
let endIndex = j
|
|
174
|
+
const suffixMatch = normalizedText.substring(j).match(/\b(LXX|MT)\b/i)
|
|
193
175
|
if (suffixMatch) {
|
|
194
176
|
version = suffixMatch[0].toUpperCase()
|
|
195
177
|
endIndex += suffixMatch[0].length
|
|
196
|
-
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
// Handle closing parenthesis
|
|
200
|
-
if (hasOpeningParen && i < lowerCaseText.length && normalizedText[i] === ")") {
|
|
201
|
-
endIndex = i + 1
|
|
202
|
-
i++
|
|
178
|
+
j += suffixMatch[0].length
|
|
203
179
|
}
|
|
204
180
|
|
|
205
|
-
//
|
|
206
|
-
const originalText = normalizedText.slice(startIndex, hasOpeningParen ? endIndex - 1 : endIndex)
|
|
207
|
-
|
|
208
|
-
// Determine type
|
|
209
|
-
let type
|
|
181
|
+
// Store the reference
|
|
210
182
|
const ref = chapterVerse.trim()
|
|
211
|
-
if (this.config.booksOnly
|
|
212
|
-
type
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
183
|
+
if (ref.length > 0 || version || this.config.booksOnly) {
|
|
184
|
+
let type
|
|
185
|
+
if (this.config.booksOnly && !ref) {
|
|
186
|
+
type = "book_only"
|
|
187
|
+
} else if (ref.includes(":")) {
|
|
188
|
+
if (ref.includes("-")) {
|
|
189
|
+
const [start, end] = ref.split("-")
|
|
190
|
+
const startParts = start.split(":")
|
|
191
|
+
const endParts = end.split(":")
|
|
192
|
+
type =
|
|
193
|
+
startParts.length > 1 &&
|
|
194
|
+
endParts.length > 1 &&
|
|
195
|
+
startParts[0].trim() !== endParts[0].trim()
|
|
196
|
+
? "multi_chapter_verse_range"
|
|
197
|
+
: "chapter_verse_range"
|
|
198
|
+
} else if (ref.includes(",")) {
|
|
199
|
+
type = "comma_separated_verses"
|
|
200
|
+
} else {
|
|
201
|
+
type = "chapter_verse"
|
|
202
|
+
}
|
|
203
|
+
} else if (ref.includes("-")) {
|
|
204
|
+
type = "chapter_range"
|
|
205
|
+
} else if (/\d/.test(ref)) {
|
|
206
|
+
type = "single_chapter"
|
|
226
207
|
} else {
|
|
227
|
-
type = "
|
|
208
|
+
type = "book_only"
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
this.found.push({
|
|
212
|
+
book: currentBook,
|
|
213
|
+
reference: ref,
|
|
214
|
+
startIndex: currentStartIndex,
|
|
215
|
+
endIndex,
|
|
216
|
+
version,
|
|
217
|
+
type,
|
|
218
|
+
originalText: normalizedText.slice(currentStartIndex, endIndex),
|
|
219
|
+
})
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Handle semicolon for next reference
|
|
223
|
+
if (j < lowerCaseText.length && lowerCaseText[j] === ";") {
|
|
224
|
+
j++ // Move past semicolon
|
|
225
|
+
currentStartIndex = j // Reset start for next reference
|
|
226
|
+
// Skip spaces after semicolon
|
|
227
|
+
while (j < lowerCaseText.length && /\s/.test(lowerCaseText[j])) {
|
|
228
|
+
j++
|
|
228
229
|
}
|
|
229
|
-
|
|
230
|
-
type = "chapter_range"
|
|
231
|
-
} else {
|
|
232
|
-
type = "single_chapter"
|
|
230
|
+
continue // Process next reference
|
|
233
231
|
}
|
|
234
232
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
reference: ref,
|
|
238
|
-
startIndex: hasOpeningParen ? parenStartIndex : startIndex,
|
|
239
|
-
endIndex,
|
|
240
|
-
version,
|
|
241
|
-
type,
|
|
242
|
-
originalText,
|
|
243
|
-
})
|
|
244
|
-
} else {
|
|
245
|
-
i = startIndex + 1
|
|
233
|
+
// Exit if no semicolon or end of reference
|
|
234
|
+
break
|
|
246
235
|
}
|
|
236
|
+
|
|
237
|
+
i = j
|
|
247
238
|
} else {
|
|
248
239
|
i++
|
|
249
240
|
}
|
|
@@ -296,10 +287,31 @@ class CodexParser {
|
|
|
296
287
|
abbr: null,
|
|
297
288
|
}
|
|
298
289
|
|
|
299
|
-
|
|
290
|
+
// Clean reference for parsing, removing version suffix
|
|
291
|
+
let cleanReference = passage.reference
|
|
292
|
+
if (passage.version) {
|
|
293
|
+
cleanReference = cleanReference.replace(/\s*(LXX|MT)$/i, "").trim()
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Handle chapter-only references (e.g., "113 :" or "113")
|
|
297
|
+
if (!cleanReference || cleanReference.match(/^\d+\s*[:;]?\s*$/)) {
|
|
298
|
+
const chapterMatch = cleanReference.match(/\d+/) || ["1"]
|
|
299
|
+
const chapter = Number(chapterMatch[0])
|
|
300
|
+
parsedPassage.chapter = chapter
|
|
301
|
+
parsedPassage.type = this.SINGLE_CHAPTER
|
|
302
|
+
const chapterVerses = this.getChapterVerses(book, chapter)
|
|
303
|
+
if (chapterVerses.length) {
|
|
304
|
+
const startVerse = chapterVerses[0]
|
|
305
|
+
const endVerse = chapterVerses[chapterVerses.length - 1]
|
|
306
|
+
parsedPassage.verses = [`${startVerse}-${endVerse}`]
|
|
307
|
+
}
|
|
308
|
+
} else {
|
|
309
|
+
this.parseReferenceParts(parsedPassage, cleanReference.split(","))
|
|
310
|
+
}
|
|
311
|
+
|
|
300
312
|
parsedPassage.passages = this.populate(parsedPassage)
|
|
301
313
|
parsedPassage.scripture = this.scripturize(parsedPassage)
|
|
302
|
-
parsedPassage.valid = this._isValid(parsedPassage,
|
|
314
|
+
parsedPassage.valid = this._isValid(parsedPassage, cleanReference)
|
|
303
315
|
|
|
304
316
|
// Set abbr property using SBL-style abbreviations
|
|
305
317
|
const sblEntry = Object.entries(this.sblAbbreviations).find(
|
|
@@ -307,16 +319,16 @@ class CodexParser {
|
|
|
307
319
|
)
|
|
308
320
|
if (sblEntry) {
|
|
309
321
|
const { value, abbr } = sblEntry[1]
|
|
322
|
+
const ref = passage.reference.replace(/\s*(LXX|MT)$/i, "").trim()
|
|
310
323
|
parsedPassage.abbr = abbr
|
|
311
|
-
? `${value}. ${
|
|
312
|
-
: `${value} ${
|
|
324
|
+
? `${value}. ${ref}${passage.version ? " " + passage.version : ""}`
|
|
325
|
+
: `${value} ${ref}${passage.version ? " " + passage.version : ""}`
|
|
313
326
|
} else {
|
|
314
|
-
// Fallback to original
|
|
315
327
|
parsedPassage.abbr = parsedPassage.original
|
|
316
328
|
}
|
|
317
329
|
|
|
318
330
|
if (parsedPassage.type === this.MULTI_CHAPTER_RANGE) {
|
|
319
|
-
this.handleMultiChapterRange(parsedPassage,
|
|
331
|
+
this.handleMultiChapterRange(parsedPassage, cleanReference)
|
|
320
332
|
} else {
|
|
321
333
|
delete parsedPassage.to
|
|
322
334
|
}
|
|
@@ -370,6 +382,22 @@ class CodexParser {
|
|
|
370
382
|
if (!part) return // Skip empty parts from trailing commas
|
|
371
383
|
const isFirstPart = index === 0
|
|
372
384
|
|
|
385
|
+
// Handle chapter-only references (e.g., "113 :" or "113")
|
|
386
|
+
if (!part.includes(":") && !part.includes("-") && !singleChapterBook) {
|
|
387
|
+
const chapter = Number(part.replace(/[^0-9]/g, "")) // Extract number, remove trailing colon
|
|
388
|
+
if (chapter > 0) {
|
|
389
|
+
passage.chapter = chapter
|
|
390
|
+
passage.type = this.SINGLE_CHAPTER
|
|
391
|
+
const chapterVerses = this.getChapterVerses(passage.book, chapter)
|
|
392
|
+
if (chapterVerses.length) {
|
|
393
|
+
const startVerse = chapterVerses[0]
|
|
394
|
+
const endVerse = chapterVerses[chapterVerses.length - 1]
|
|
395
|
+
passage.verses = [`${startVerse}-${endVerse}`]
|
|
396
|
+
}
|
|
397
|
+
return
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
373
401
|
if (part.includes(":")) {
|
|
374
402
|
this.parseChapterVerse(passage, part, isFirstPart)
|
|
375
403
|
} else if (singleChapterBook) {
|
|
@@ -381,7 +409,6 @@ class CodexParser {
|
|
|
381
409
|
}
|
|
382
410
|
})
|
|
383
411
|
}
|
|
384
|
-
|
|
385
412
|
/**
|
|
386
413
|
* Parses chapter-verse references (e.g., "3:16").
|
|
387
414
|
* @param {Object} passage - The passage object.
|