codexparser 0.1.60 → 0.1.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/CodexParser.js +57 -49
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.62",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -74,7 +74,6 @@ class CodexParser {
|
|
|
74
74
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
75
75
|
const abbreviations = Object.keys(this.abbreviations)
|
|
76
76
|
this.found = []
|
|
77
|
-
// Normalize text: replace periods before digits with colons, remove trailing periods, collapse spaces
|
|
78
77
|
let normalizedText = text
|
|
79
78
|
.replace(/\.(?=\d)/g, ":")
|
|
80
79
|
.replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
|
|
@@ -84,9 +83,7 @@ class CodexParser {
|
|
|
84
83
|
const lowerCaseText = normalizedText.toLowerCase()
|
|
85
84
|
let i = 0
|
|
86
85
|
|
|
87
|
-
// Check if a character is valid for chapter/verse (non-letter)
|
|
88
86
|
const isValidChapterVerseChar = (char) => /[^A-Za-z]/.test(char)
|
|
89
|
-
// Check if the next segment starts with a Bible book
|
|
90
87
|
const isNextBibleBook = (startIndex) => {
|
|
91
88
|
const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
|
|
92
89
|
return (
|
|
@@ -94,35 +91,31 @@ class CodexParser {
|
|
|
94
91
|
lowercaseBibleAbbreviations.some((abbr) => textAfterCurrentPosition.startsWith(abbr))
|
|
95
92
|
)
|
|
96
93
|
}
|
|
97
|
-
// Detect version suffix (LXX or MT)
|
|
98
94
|
const detectSuffix = (startIndex) => {
|
|
99
95
|
const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
|
|
100
|
-
return suffixMatch ? suffixMatch[0].toUpperCase() : null
|
|
96
|
+
return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
|
|
101
97
|
}
|
|
102
98
|
|
|
103
|
-
// Iterate through text to find book names or abbreviations
|
|
104
99
|
while (i < lowerCaseText.length) {
|
|
105
100
|
let foundBook = null
|
|
106
|
-
let
|
|
101
|
+
let startIndex = -1
|
|
107
102
|
let matchedLength = 0
|
|
108
103
|
|
|
109
|
-
// Check for full book names
|
|
110
104
|
for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
|
|
111
105
|
const book = lowercaseBibleFullNames[j]
|
|
112
106
|
if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
|
|
113
107
|
foundBook = fullNames[j]
|
|
114
|
-
|
|
108
|
+
startIndex = i
|
|
115
109
|
matchedLength = book.length
|
|
116
110
|
}
|
|
117
111
|
}
|
|
118
112
|
|
|
119
|
-
// Check for abbreviations if no full name found
|
|
120
113
|
if (!foundBook) {
|
|
121
114
|
for (let k = 0; k < lowercaseBibleAbbreviations.length; k++) {
|
|
122
115
|
const abbreviation = lowercaseBibleAbbreviations[k]
|
|
123
116
|
if (lowerCaseText.startsWith(abbreviation, i) && abbreviation.length > matchedLength) {
|
|
124
117
|
foundBook = this.abbreviations[abbreviations[k]]
|
|
125
|
-
|
|
118
|
+
startIndex = i
|
|
126
119
|
matchedLength = abbreviation.length
|
|
127
120
|
}
|
|
128
121
|
}
|
|
@@ -132,8 +125,8 @@ class CodexParser {
|
|
|
132
125
|
i += matchedLength
|
|
133
126
|
let chapterVerse = ""
|
|
134
127
|
const references = []
|
|
128
|
+
const startOfReference = startIndex
|
|
135
129
|
|
|
136
|
-
// Collect chapter-verse reference until next book or invalid character
|
|
137
130
|
while (i < normalizedText.length && isValidChapterVerseChar(normalizedText[i])) {
|
|
138
131
|
if (isNextBibleBook(i)) break
|
|
139
132
|
if (normalizedText[i] === ";") {
|
|
@@ -147,15 +140,19 @@ class CodexParser {
|
|
|
147
140
|
i++
|
|
148
141
|
}
|
|
149
142
|
|
|
150
|
-
// Add final reference if present
|
|
151
143
|
if (chapterVerse.trim().length > 0) {
|
|
152
144
|
const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
|
|
153
145
|
if (formattedReference) references.push(formattedReference)
|
|
154
146
|
}
|
|
155
147
|
|
|
156
|
-
const
|
|
148
|
+
const suffixData = detectSuffix(i)
|
|
149
|
+
const suffix = suffixData ? suffixData.suffix : null
|
|
150
|
+
let endIndex = i // Set endIndex before suffix
|
|
151
|
+
if (suffixData) {
|
|
152
|
+
endIndex += suffixData.length // Include suffix in endIndex
|
|
153
|
+
i += suffixData.length // Advance i
|
|
154
|
+
}
|
|
157
155
|
|
|
158
|
-
// Process each reference and determine its type
|
|
159
156
|
references.forEach((ref) => {
|
|
160
157
|
let type
|
|
161
158
|
if (ref.includes(":")) {
|
|
@@ -183,9 +180,11 @@ class CodexParser {
|
|
|
183
180
|
this.found.push({
|
|
184
181
|
book: foundBook,
|
|
185
182
|
reference: ref,
|
|
186
|
-
|
|
183
|
+
startIndex: startOfReference,
|
|
184
|
+
endIndex: endIndex,
|
|
187
185
|
version: suffix || null,
|
|
188
186
|
type,
|
|
187
|
+
originalText: normalizedText.slice(startOfReference, endIndex),
|
|
189
188
|
})
|
|
190
189
|
})
|
|
191
190
|
} else {
|
|
@@ -228,39 +227,38 @@ class CodexParser {
|
|
|
228
227
|
verses: [],
|
|
229
228
|
type: passage.type,
|
|
230
229
|
testament,
|
|
231
|
-
|
|
230
|
+
startIndex: passage.startIndex,
|
|
231
|
+
endIndex: passage.endIndex,
|
|
232
|
+
originalText: passage.originalText,
|
|
232
233
|
version: this._handleVersion(passage.version, testament),
|
|
233
234
|
passages: [],
|
|
234
235
|
scripture: null,
|
|
235
236
|
valid: true,
|
|
236
237
|
start: null,
|
|
237
238
|
end: null,
|
|
239
|
+
abbr: null,
|
|
238
240
|
}
|
|
239
241
|
|
|
240
|
-
// Parse reference parts (chapter, verses, ranges)
|
|
241
242
|
this.parseReferenceParts(parsedPassage, passage.reference.split(","))
|
|
242
243
|
parsedPassage.passages = this.populate(parsedPassage)
|
|
243
244
|
parsedPassage.scripture = this.scripturize(parsedPassage)
|
|
244
245
|
parsedPassage.valid = this._isValid(parsedPassage, passage.reference)
|
|
245
246
|
|
|
246
|
-
//
|
|
247
|
-
const
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
}
|
|
254
|
-
parsedPassage.abbr = abbr
|
|
247
|
+
// Set abbr property using SBL-style abbreviation
|
|
248
|
+
const abbrKey = Object.keys(this.abbreviations).find(
|
|
249
|
+
(abbr) => this.abbreviations[abbr].toLowerCase() === book.toLowerCase()
|
|
250
|
+
)
|
|
251
|
+
parsedPassage.abbr = abbrKey
|
|
252
|
+
? `${abbrKey}. ${passage.reference}${passage.version ? " " + passage.version : ""}`
|
|
253
|
+
: parsedPassage.original
|
|
255
254
|
|
|
256
|
-
// Handle multi-chapter ranges
|
|
257
255
|
if (parsedPassage.type === this.MULTI_CHAPTER_RANGE) {
|
|
258
256
|
this.handleMultiChapterRange(parsedPassage, passage.reference)
|
|
259
257
|
} else {
|
|
260
258
|
delete parsedPassage.to
|
|
261
259
|
}
|
|
262
260
|
|
|
263
|
-
//
|
|
261
|
+
// Calculate start and end based on passages array
|
|
264
262
|
if (parsedPassage.passages.length > 0) {
|
|
265
263
|
const sortedPassages = parsedPassage.passages.slice().sort((a, b) => {
|
|
266
264
|
if (a.chapter !== b.chapter) return a.chapter - b.chapter
|
|
@@ -280,7 +278,6 @@ class CodexParser {
|
|
|
280
278
|
}
|
|
281
279
|
}
|
|
282
280
|
|
|
283
|
-
// Default to English version if none specified
|
|
284
281
|
if (!parsedPassage.version) {
|
|
285
282
|
parsedPassage.version = {
|
|
286
283
|
name: "English",
|
|
@@ -289,11 +286,6 @@ class CodexParser {
|
|
|
289
286
|
}
|
|
290
287
|
}
|
|
291
288
|
|
|
292
|
-
// Attach reference method to passage
|
|
293
|
-
parsedPassage.reference = function () {
|
|
294
|
-
return this.scripture.passage
|
|
295
|
-
}
|
|
296
|
-
|
|
297
289
|
return parsedPassage
|
|
298
290
|
})
|
|
299
291
|
|
|
@@ -632,21 +624,16 @@ class CodexParser {
|
|
|
632
624
|
if (typeof book !== "string") {
|
|
633
625
|
book = book[0]
|
|
634
626
|
}
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
bookified
|
|
639
|
-
|
|
640
|
-
bookified = this.bible.new.find(
|
|
641
|
-
(b) => b.toLowerCase() === book.toLowerCase() && b.toLowerCase().includes(book.toLowerCase())
|
|
642
|
-
)
|
|
643
|
-
if (!bookified) {
|
|
644
|
-
bookified = this.bible.old.find(
|
|
645
|
-
(b) => b.toLowerCase() === book.toLowerCase() && b.toLowerCase().includes(book.toLowerCase())
|
|
646
|
-
)
|
|
647
|
-
}
|
|
627
|
+
book = book.toLowerCase()
|
|
628
|
+
// Check if book is an abbreviation
|
|
629
|
+
let bookified = this.abbreviations[Object.keys(this.abbreviations).find((abbr) => abbr.toLowerCase() === book)]
|
|
630
|
+
if (bookified) {
|
|
631
|
+
return bookified
|
|
648
632
|
}
|
|
649
|
-
|
|
633
|
+
// Check if book is a full name
|
|
634
|
+
bookified =
|
|
635
|
+
this.bible.new.find((b) => b.toLowerCase() === book) || this.bible.old.find((b) => b.toLowerCase() === book)
|
|
636
|
+
return bookified || book // Fallback to input if not found
|
|
650
637
|
}
|
|
651
638
|
|
|
652
639
|
/**
|
|
@@ -1066,6 +1053,27 @@ class CodexParser {
|
|
|
1066
1053
|
}
|
|
1067
1054
|
return { name: "English", value: "ENG", abbreviation: "eng" }
|
|
1068
1055
|
}
|
|
1056
|
+
|
|
1057
|
+
replace(text, useAbbreviations = true) {
|
|
1058
|
+
if (!this.passages.length) {
|
|
1059
|
+
console.log("No parsed passages to replace")
|
|
1060
|
+
return text
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
let result = text
|
|
1064
|
+
// Process replacements in reverse order to avoid index shifting
|
|
1065
|
+
for (let i = this.passages.length - 1; i >= 0; i--) {
|
|
1066
|
+
const passage = this.passages[i]
|
|
1067
|
+
const { startIndex, endIndex, originalText, abbr, original } = passage
|
|
1068
|
+
|
|
1069
|
+
// Use abbreviated or full reference
|
|
1070
|
+
const newReference = useAbbreviations ? abbr : original
|
|
1071
|
+
console.log(`Replacing "${originalText}" with "${newReference}" at [${startIndex}, ${endIndex}]`) // Debug
|
|
1072
|
+
result = result.slice(0, startIndex) + newReference + result.slice(endIndex)
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
return result
|
|
1076
|
+
}
|
|
1069
1077
|
}
|
|
1070
1078
|
|
|
1071
1079
|
module.exports = CodexParser
|