codexparser 0.1.38 → 0.1.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/CodexParser.js +147 -103
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.40",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -56,144 +56,163 @@ class CodexParser {
|
|
|
56
56
|
* @return {CodexParser} - Returns the instance itself, enabling method chaining.
|
|
57
57
|
*/
|
|
58
58
|
scan(text) {
|
|
59
|
-
//
|
|
60
|
-
this.found = []
|
|
61
|
-
|
|
62
|
-
// Retrieve the full names of Bible books (both Old and New Testament).
|
|
59
|
+
// Combine Old and New Testament book names into a single array
|
|
63
60
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
64
61
|
|
|
65
|
-
// Retrieve
|
|
62
|
+
// Retrieve all abbreviation keys from the abbreviations object
|
|
66
63
|
const abbreviations = Object.keys(this.abbreviations)
|
|
67
64
|
|
|
68
|
-
//
|
|
65
|
+
// Initialize the `found` array to store the results
|
|
66
|
+
this.found = []
|
|
67
|
+
|
|
68
|
+
// Convert Bible book names, abbreviations, and input text to lowercase for case-insensitive matching
|
|
69
69
|
const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
|
|
70
70
|
const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
|
|
71
|
-
|
|
72
|
-
// Convert the input text to lowercase for consistent comparison.
|
|
73
71
|
const lowerCaseText = text.toLowerCase()
|
|
74
72
|
|
|
75
|
-
|
|
76
|
-
let i = 0
|
|
73
|
+
let i = 0 // Index pointer to iterate through the input text
|
|
77
74
|
|
|
78
|
-
|
|
75
|
+
/**
|
|
76
|
+
* Helper function to check if a character is part of a chapter or verse reference.
|
|
77
|
+
* Non-word characters (anything not A-Z or a-z) are considered valid.
|
|
78
|
+
*/
|
|
79
79
|
const isValidChapterVerseChar = (char) => /[^A-Za-z]/.test(char)
|
|
80
80
|
|
|
81
|
-
|
|
81
|
+
/**
|
|
82
|
+
* Helper function to determine if the text starting at a given index contains
|
|
83
|
+
* the name of a new Bible book.
|
|
84
|
+
*/
|
|
82
85
|
const isNextBibleBook = (startIndex) => {
|
|
83
86
|
const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
|
|
84
87
|
|
|
85
|
-
// Check
|
|
88
|
+
// Check for full Bible book names
|
|
86
89
|
for (const book of lowercaseBibleFullNames) {
|
|
87
90
|
if (textAfterCurrentPosition.startsWith(book)) return true
|
|
88
91
|
}
|
|
89
92
|
|
|
90
|
-
// Check
|
|
93
|
+
// Check for Bible book abbreviations
|
|
91
94
|
for (const abbr of lowercaseBibleAbbreviations) {
|
|
92
95
|
if (textAfterCurrentPosition.startsWith(abbr)) return true
|
|
93
96
|
}
|
|
94
97
|
|
|
95
|
-
//
|
|
96
|
-
return false
|
|
98
|
+
return false // No match found
|
|
97
99
|
}
|
|
98
100
|
|
|
99
|
-
|
|
101
|
+
/**
|
|
102
|
+
* Helper function to detect suffixes like "LXX" or "MT" in the text after a given index.
|
|
103
|
+
* These suffixes are case-insensitive and indicate the version of the Bible reference.
|
|
104
|
+
*/
|
|
100
105
|
const detectSuffix = (startIndex) => {
|
|
101
106
|
const suffixMatch = text.substring(startIndex).match(/\b(LXX|MT)\b/i)
|
|
102
107
|
return suffixMatch ? suffixMatch[0].toUpperCase() : null
|
|
103
108
|
}
|
|
104
109
|
|
|
105
|
-
//
|
|
110
|
+
// Iterate through the input text to detect and process Bible references
|
|
106
111
|
while (i < lowerCaseText.length) {
|
|
107
|
-
let foundBook = null //
|
|
108
|
-
let foundIndex = -1 //
|
|
109
|
-
let matchedLength = 0 //
|
|
112
|
+
let foundBook = null // Placeholder for the detected book name
|
|
113
|
+
let foundIndex = -1 // Index in the text where the book name starts
|
|
114
|
+
let matchedLength = 0 // Length of the matched book name or abbreviation
|
|
110
115
|
|
|
111
|
-
//
|
|
116
|
+
// Search for full Bible book names in the text
|
|
112
117
|
for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
|
|
113
118
|
const book = lowercaseBibleFullNames[j]
|
|
114
|
-
|
|
115
|
-
// If the text at the current index matches a book name and is longer than any previously matched name.
|
|
116
119
|
if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
|
|
117
|
-
foundBook = fullNames[j] // Store the original case-sensitive
|
|
118
|
-
foundIndex = i
|
|
119
|
-
matchedLength = book.length // Update the length
|
|
120
|
+
foundBook = fullNames[j] // Store the original book name (case-sensitive)
|
|
121
|
+
foundIndex = i
|
|
122
|
+
matchedLength = book.length // Update the match length
|
|
120
123
|
}
|
|
121
124
|
}
|
|
122
125
|
|
|
123
|
-
// If no
|
|
126
|
+
// If no full book name is found, search for abbreviations
|
|
124
127
|
if (!foundBook) {
|
|
125
128
|
for (let k = 0; k < lowercaseBibleAbbreviations.length; k++) {
|
|
126
129
|
const abbreviation = lowercaseBibleAbbreviations[k]
|
|
127
|
-
|
|
128
|
-
// If the text at the current index matches an abbreviation.
|
|
129
130
|
if (lowerCaseText.startsWith(abbreviation, i)) {
|
|
130
|
-
foundBook = abbreviations[k]
|
|
131
|
-
foundIndex = i
|
|
132
|
-
matchedLength = abbreviation.length
|
|
131
|
+
foundBook = abbreviations[k]
|
|
132
|
+
foundIndex = i
|
|
133
|
+
matchedLength = abbreviation.length
|
|
133
134
|
}
|
|
134
135
|
}
|
|
135
136
|
}
|
|
136
137
|
|
|
137
|
-
// If a Bible book is found
|
|
138
|
+
// If a Bible book is found
|
|
138
139
|
if (foundBook) {
|
|
139
|
-
i += matchedLength // Move the pointer
|
|
140
|
-
let chapterVerse = "" //
|
|
141
|
-
const references = [] // Array to store
|
|
140
|
+
i += matchedLength // Move the index pointer forward by the length of the book name
|
|
141
|
+
let chapterVerse = "" // Placeholder for chapter and verse data
|
|
142
|
+
const references = [] // Array to store multiple chapter/verse references for the same book
|
|
142
143
|
|
|
143
|
-
//
|
|
144
|
+
// Extract chapter and verse references
|
|
144
145
|
while (i < text.length && isValidChapterVerseChar(text[i])) {
|
|
145
|
-
//
|
|
146
|
-
if (isNextBibleBook(i)) break
|
|
146
|
+
if (isNextBibleBook(i)) break // Stop if a new Bible book is detected
|
|
147
147
|
|
|
148
|
-
// Handle semicolon-
|
|
148
|
+
// Handle semicolon-separated references (indicates a new reference)
|
|
149
149
|
if (text[i] === ";") {
|
|
150
150
|
const formattedReference = chapterVerse
|
|
151
151
|
.trim()
|
|
152
|
-
.replace(
|
|
153
|
-
.replace(/[^a-zA-Z0-9
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
i++ // Move past the semicolon.
|
|
152
|
+
.replace(/\./g, ":")
|
|
153
|
+
.replace(/[^a-zA-Z0-9]+$/, "")
|
|
154
|
+
if (formattedReference) references.push(formattedReference)
|
|
155
|
+
chapterVerse = "" // Reset for the next reference
|
|
156
|
+
i++
|
|
158
157
|
continue
|
|
159
158
|
}
|
|
160
159
|
|
|
161
|
-
// Accumulate valid characters for the chapterVerse.
|
|
162
160
|
chapterVerse += text[i]
|
|
163
161
|
i++
|
|
164
162
|
}
|
|
165
163
|
|
|
166
|
-
//
|
|
164
|
+
// Process the last detected chapter/verse reference
|
|
167
165
|
if (chapterVerse.trim().length > 0) {
|
|
168
166
|
const formattedReference = chapterVerse
|
|
169
167
|
.trim()
|
|
170
|
-
.replace(
|
|
171
|
-
.replace(/[^a-zA-Z0-9
|
|
172
|
-
|
|
173
|
-
if (formattedReference) references.push(formattedReference) // Add the formatted reference to the list.
|
|
168
|
+
.replace(/\./g, ":")
|
|
169
|
+
.replace(/[^a-zA-Z0-9]+$/, "")
|
|
170
|
+
if (formattedReference) references.push(formattedReference)
|
|
174
171
|
}
|
|
175
172
|
|
|
176
|
-
// Detect any suffix (e.g., "LXX" or "MT") after the chapter/verse reference
|
|
173
|
+
// Detect any suffix (e.g., "LXX" or "MT") after the chapter/verse reference
|
|
177
174
|
const suffix = detectSuffix(i)
|
|
178
175
|
|
|
179
|
-
//
|
|
176
|
+
// Add each reference as a separate object to the `found` array with type recognition
|
|
180
177
|
references.forEach((ref) => {
|
|
178
|
+
let type
|
|
179
|
+
|
|
180
|
+
if (ref.includes(":")) {
|
|
181
|
+
if (ref.includes("-")) {
|
|
182
|
+
const [start, end] = ref.split("-")
|
|
183
|
+
const startParts = start.split(":")
|
|
184
|
+
const endParts = end.split(":")
|
|
185
|
+
|
|
186
|
+
if (startParts.length > 1 && endParts.length > 1 && startParts[0] !== endParts[0]) {
|
|
187
|
+
type = "multi_chapter_verse_range" // Example: "8:23-9:1"
|
|
188
|
+
} else {
|
|
189
|
+
type = "chapter_verse_range" // Example: "8:23-25"
|
|
190
|
+
}
|
|
191
|
+
} else if (ref.includes(",")) {
|
|
192
|
+
type = "comma_separated_verses" // Example: "8:23,24"
|
|
193
|
+
} else {
|
|
194
|
+
type = "chapter_verse" // Example: "8:23"
|
|
195
|
+
}
|
|
196
|
+
} else if (ref.includes("-")) {
|
|
197
|
+
type = "chapter_range" // Example: "8-9"
|
|
198
|
+
} else {
|
|
199
|
+
type = "single_chapter" // Example: "8"
|
|
200
|
+
}
|
|
201
|
+
|
|
181
202
|
this.found.push({
|
|
182
|
-
book: foundBook,
|
|
183
|
-
reference: ref
|
|
184
|
-
index: foundIndex,
|
|
185
|
-
|
|
186
|
-
|
|
203
|
+
book: foundBook,
|
|
204
|
+
reference: ref,
|
|
205
|
+
index: foundIndex,
|
|
206
|
+
version: suffix || null,
|
|
207
|
+
type,
|
|
187
208
|
})
|
|
188
209
|
})
|
|
189
210
|
} else {
|
|
190
|
-
//
|
|
191
|
-
i++
|
|
211
|
+
i++ // Move to the next character if no book is found
|
|
192
212
|
}
|
|
193
213
|
}
|
|
194
214
|
|
|
195
|
-
// Return the current instance for method chaining
|
|
196
|
-
return this
|
|
215
|
+
return this // Return the current instance for method chaining
|
|
197
216
|
}
|
|
198
217
|
|
|
199
218
|
bibleVersion(version) {
|
|
@@ -229,7 +248,6 @@ class CodexParser {
|
|
|
229
248
|
index: passage.index,
|
|
230
249
|
version: this._handleVersion(passage.version, testament),
|
|
231
250
|
}
|
|
232
|
-
|
|
233
251
|
const parts = passage.reference.split(",")
|
|
234
252
|
const isSingleChapter = this.singleChapterBook.some((singleChapterBook) => singleChapterBook[book])
|
|
235
253
|
|
|
@@ -297,10 +315,8 @@ class CodexParser {
|
|
|
297
315
|
parsedPassage.passages = this.populate(parsedPassage)
|
|
298
316
|
parsedPassage.scripture = this.scripturize(parsedPassage)
|
|
299
317
|
parsedPassage.valid = this._isValid(parsedPassage, passage.reference)
|
|
300
|
-
|
|
301
318
|
return parsedPassage
|
|
302
319
|
})
|
|
303
|
-
|
|
304
320
|
this.versification()
|
|
305
321
|
return this
|
|
306
322
|
}
|
|
@@ -397,6 +413,7 @@ class CodexParser {
|
|
|
397
413
|
populate(parsedPassage) {
|
|
398
414
|
const passages = []
|
|
399
415
|
const { book, chapter, verses, type } = parsedPassage
|
|
416
|
+
this._setVersion(parsedPassage)
|
|
400
417
|
if (type === "single_chapter") {
|
|
401
418
|
// Handle single chapter references
|
|
402
419
|
if (this.chapterVerses[book] && this.chapterVerses[book][chapter]) {
|
|
@@ -426,7 +443,6 @@ class CodexParser {
|
|
|
426
443
|
}
|
|
427
444
|
} else if (type === "multi_chapter_verse_range") {
|
|
428
445
|
const { to } = parsedPassage
|
|
429
|
-
|
|
430
446
|
// Create an array of reference objects for the start and end of the range
|
|
431
447
|
const refs = [
|
|
432
448
|
{
|
|
@@ -439,16 +455,30 @@ class CodexParser {
|
|
|
439
455
|
},
|
|
440
456
|
]
|
|
441
457
|
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
458
|
+
const startChapter = refs[0].chapter
|
|
459
|
+
const startVerse = refs[0].verse
|
|
460
|
+
const endChapter = refs[refs.length - 1].chapter
|
|
461
|
+
const endVerse = refs[refs.length - 1].verse
|
|
462
|
+
|
|
463
|
+
// Loop through the range of chapters
|
|
464
|
+
for (let chapter = startChapter; chapter <= endChapter; chapter++) {
|
|
465
|
+
// Determine the starting verse for the current chapter
|
|
466
|
+
const chapterStartVerse = chapter === startChapter ? startVerse : 1
|
|
467
|
+
// Determine the ending verse for the current chapter
|
|
468
|
+
const chapterEndVerse = chapter === endChapter ? endVerse : this.chapterVerses[book][chapter].length
|
|
469
|
+
|
|
470
|
+
// Get the array of verses for the current chapter
|
|
471
|
+
const verses = this.chapterVerses[book][chapter].slice(chapterStartVerse - 1, chapterEndVerse)
|
|
446
472
|
|
|
447
|
-
|
|
473
|
+
// Loop through the verses in the current chapter
|
|
474
|
+
for (let j = 0; j < verses.length; j++) {
|
|
475
|
+
const currentVerse = chapterStartVerse + j
|
|
476
|
+
|
|
477
|
+
// Add the verse to the passages array
|
|
448
478
|
passages.push({
|
|
449
479
|
book,
|
|
450
|
-
chapter
|
|
451
|
-
verse:
|
|
480
|
+
chapter,
|
|
481
|
+
verse: currentVerse,
|
|
452
482
|
})
|
|
453
483
|
}
|
|
454
484
|
}
|
|
@@ -527,49 +557,63 @@ class CodexParser {
|
|
|
527
557
|
* @return {object} The object with the human-readable name, chapter and verses and a hash.
|
|
528
558
|
*/
|
|
529
559
|
scripturize(passage) {
|
|
530
|
-
const { book, chapter, passages } = passage
|
|
560
|
+
const { book, chapter, passages, to } = passage
|
|
531
561
|
|
|
532
562
|
// Extract verses from the passages array
|
|
533
|
-
const verses = passages.map((p) => p.verse)
|
|
563
|
+
const verses = passages.map((p) => ({ chapter: p.chapter, verse: p.verse }))
|
|
534
564
|
let formattedVerses = ""
|
|
535
565
|
|
|
536
|
-
if (
|
|
537
|
-
//
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
566
|
+
if (to && to.chapter && to.chapter !== chapter) {
|
|
567
|
+
// Handle multi-chapter range
|
|
568
|
+
const startChapter = chapter
|
|
569
|
+
const startVerses = verses.filter((v) => v.chapter === startChapter).map((v) => v.verse)
|
|
570
|
+
|
|
571
|
+
const endChapter = to.chapter
|
|
572
|
+
const endVerses = verses.filter((v) => v.chapter === endChapter).map((v) => v.verse)
|
|
573
|
+
|
|
574
|
+
const startFormatted =
|
|
575
|
+
startVerses.length > 1 ? `${startVerses[0]}-${startVerses[startVerses.length - 1]}` : startVerses[0]
|
|
576
|
+
|
|
577
|
+
const endFormatted =
|
|
578
|
+
endVerses.length > 1 ? `${endVerses[0]}-${endVerses[endVerses.length - 1]}` : endVerses[0]
|
|
579
|
+
|
|
580
|
+
formattedVerses = `${startChapter}:${startFormatted}-${endChapter}:${endFormatted}`
|
|
542
581
|
} else {
|
|
543
|
-
//
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
582
|
+
// Handle single-chapter range
|
|
583
|
+
const startVerses = verses.map((v) => v.verse)
|
|
584
|
+
|
|
585
|
+
if (startVerses.length === 1) {
|
|
586
|
+
formattedVerses = startVerses[0].toString()
|
|
587
|
+
} else {
|
|
588
|
+
// Group consecutive verses into ranges
|
|
589
|
+
let ranges = []
|
|
590
|
+
let tempRange = [startVerses[0]]
|
|
591
|
+
|
|
592
|
+
for (let i = 1; i < startVerses.length; i++) {
|
|
593
|
+
if (startVerses[i] === startVerses[i - 1] + 1) {
|
|
594
|
+
tempRange.push(startVerses[i])
|
|
595
|
+
} else {
|
|
596
|
+
ranges.push(tempRange)
|
|
597
|
+
tempRange = [startVerses[i]]
|
|
598
|
+
}
|
|
555
599
|
}
|
|
600
|
+
ranges.push(tempRange)
|
|
601
|
+
|
|
602
|
+
formattedVerses = ranges
|
|
603
|
+
.map((range) => (range.length > 1 ? `${range[0]}-${range[range.length - 1]}` : range[0]))
|
|
604
|
+
.join(",")
|
|
556
605
|
}
|
|
557
|
-
ranges.push(tempRange) // Push the last range
|
|
558
606
|
|
|
559
|
-
|
|
560
|
-
formattedVerses = ranges
|
|
561
|
-
.map((range) => (range.length > 1 ? `${range[0]}-${range[range.length - 1]}` : range[0]))
|
|
562
|
-
.join(",")
|
|
607
|
+
formattedVerses = `${chapter}:${formattedVerses}`
|
|
563
608
|
}
|
|
564
609
|
|
|
565
610
|
// Format the final passage
|
|
566
|
-
const
|
|
567
|
-
const full = `${book} ${chapter}${colon}${formattedVerses}`.trim()
|
|
611
|
+
const full = `${book} ${formattedVerses}`.trim()
|
|
568
612
|
const hash = full.toLowerCase().replace(/ /g, "_").replace(/:/g, ".").replace(/-/g, ".").replace(/,/g, ".")
|
|
569
613
|
|
|
570
614
|
return {
|
|
571
615
|
passage: full,
|
|
572
|
-
cv:
|
|
616
|
+
cv: formattedVerses,
|
|
573
617
|
hash,
|
|
574
618
|
}
|
|
575
619
|
}
|