codexparser 0.1.60 → 0.1.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/CodexParser.js +65 -49
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.61",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -74,7 +74,6 @@ class CodexParser {
|
|
|
74
74
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
75
75
|
const abbreviations = Object.keys(this.abbreviations)
|
|
76
76
|
this.found = []
|
|
77
|
-
// Normalize text: replace periods before digits with colons, remove trailing periods, collapse spaces
|
|
78
77
|
let normalizedText = text
|
|
79
78
|
.replace(/\.(?=\d)/g, ":")
|
|
80
79
|
.replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
|
|
@@ -84,9 +83,7 @@ class CodexParser {
|
|
|
84
83
|
const lowerCaseText = normalizedText.toLowerCase()
|
|
85
84
|
let i = 0
|
|
86
85
|
|
|
87
|
-
// Check if a character is valid for chapter/verse (non-letter)
|
|
88
86
|
const isValidChapterVerseChar = (char) => /[^A-Za-z]/.test(char)
|
|
89
|
-
// Check if the next segment starts with a Bible book
|
|
90
87
|
const isNextBibleBook = (startIndex) => {
|
|
91
88
|
const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
|
|
92
89
|
return (
|
|
@@ -94,35 +91,37 @@ class CodexParser {
|
|
|
94
91
|
lowercaseBibleAbbreviations.some((abbr) => textAfterCurrentPosition.startsWith(abbr))
|
|
95
92
|
)
|
|
96
93
|
}
|
|
97
|
-
// Detect version suffix (LXX or MT)
|
|
98
94
|
const detectSuffix = (startIndex) => {
|
|
99
95
|
const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
|
|
100
|
-
return suffixMatch
|
|
96
|
+
return suffixMatch
|
|
97
|
+
? {
|
|
98
|
+
suffix: suffixMatch[0].toUpperCase(),
|
|
99
|
+
length:
|
|
100
|
+
suffixMatch[0].length + (normalizedText[startIndex + suffixMatch[0].length] === " " ? 1 : 0),
|
|
101
|
+
}
|
|
102
|
+
: null
|
|
101
103
|
}
|
|
102
104
|
|
|
103
|
-
// Iterate through text to find book names or abbreviations
|
|
104
105
|
while (i < lowerCaseText.length) {
|
|
105
106
|
let foundBook = null
|
|
106
|
-
let
|
|
107
|
+
let startIndex = -1
|
|
107
108
|
let matchedLength = 0
|
|
108
109
|
|
|
109
|
-
// Check for full book names
|
|
110
110
|
for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
|
|
111
111
|
const book = lowercaseBibleFullNames[j]
|
|
112
112
|
if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
|
|
113
113
|
foundBook = fullNames[j]
|
|
114
|
-
|
|
114
|
+
startIndex = i
|
|
115
115
|
matchedLength = book.length
|
|
116
116
|
}
|
|
117
117
|
}
|
|
118
118
|
|
|
119
|
-
// Check for abbreviations if no full name found
|
|
120
119
|
if (!foundBook) {
|
|
121
120
|
for (let k = 0; k < lowercaseBibleAbbreviations.length; k++) {
|
|
122
121
|
const abbreviation = lowercaseBibleAbbreviations[k]
|
|
123
122
|
if (lowerCaseText.startsWith(abbreviation, i) && abbreviation.length > matchedLength) {
|
|
124
123
|
foundBook = this.abbreviations[abbreviations[k]]
|
|
125
|
-
|
|
124
|
+
startIndex = i
|
|
126
125
|
matchedLength = abbreviation.length
|
|
127
126
|
}
|
|
128
127
|
}
|
|
@@ -132,8 +131,8 @@ class CodexParser {
|
|
|
132
131
|
i += matchedLength
|
|
133
132
|
let chapterVerse = ""
|
|
134
133
|
const references = []
|
|
134
|
+
const startOfReference = startIndex
|
|
135
135
|
|
|
136
|
-
// Collect chapter-verse reference until next book or invalid character
|
|
137
136
|
while (i < normalizedText.length && isValidChapterVerseChar(normalizedText[i])) {
|
|
138
137
|
if (isNextBibleBook(i)) break
|
|
139
138
|
if (normalizedText[i] === ";") {
|
|
@@ -147,15 +146,21 @@ class CodexParser {
|
|
|
147
146
|
i++
|
|
148
147
|
}
|
|
149
148
|
|
|
150
|
-
// Add final reference if present
|
|
151
149
|
if (chapterVerse.trim().length > 0) {
|
|
152
150
|
const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
|
|
153
151
|
if (formattedReference) references.push(formattedReference)
|
|
154
152
|
}
|
|
155
153
|
|
|
156
|
-
const
|
|
154
|
+
const suffixData = detectSuffix(i)
|
|
155
|
+
const suffix = suffixData ? suffixData.suffix : null
|
|
156
|
+
if (suffixData) i += suffixData.length
|
|
157
|
+
|
|
158
|
+
// Adjust endIndex to exclude trailing space if present
|
|
159
|
+
let endIndex = i
|
|
160
|
+
if (endIndex > 0 && normalizedText[endIndex - 1] === " ") {
|
|
161
|
+
endIndex--
|
|
162
|
+
}
|
|
157
163
|
|
|
158
|
-
// Process each reference and determine its type
|
|
159
164
|
references.forEach((ref) => {
|
|
160
165
|
let type
|
|
161
166
|
if (ref.includes(":")) {
|
|
@@ -183,9 +188,11 @@ class CodexParser {
|
|
|
183
188
|
this.found.push({
|
|
184
189
|
book: foundBook,
|
|
185
190
|
reference: ref,
|
|
186
|
-
|
|
191
|
+
startIndex: startOfReference,
|
|
192
|
+
endIndex: endIndex,
|
|
187
193
|
version: suffix || null,
|
|
188
194
|
type,
|
|
195
|
+
originalText: normalizedText.slice(startOfReference, endIndex),
|
|
189
196
|
})
|
|
190
197
|
})
|
|
191
198
|
} else {
|
|
@@ -228,39 +235,38 @@ class CodexParser {
|
|
|
228
235
|
verses: [],
|
|
229
236
|
type: passage.type,
|
|
230
237
|
testament,
|
|
231
|
-
|
|
238
|
+
startIndex: passage.startIndex,
|
|
239
|
+
endIndex: passage.endIndex,
|
|
240
|
+
originalText: passage.originalText,
|
|
232
241
|
version: this._handleVersion(passage.version, testament),
|
|
233
242
|
passages: [],
|
|
234
243
|
scripture: null,
|
|
235
244
|
valid: true,
|
|
236
245
|
start: null,
|
|
237
246
|
end: null,
|
|
247
|
+
abbr: null,
|
|
238
248
|
}
|
|
239
249
|
|
|
240
|
-
// Parse reference parts (chapter, verses, ranges)
|
|
241
250
|
this.parseReferenceParts(parsedPassage, passage.reference.split(","))
|
|
242
251
|
parsedPassage.passages = this.populate(parsedPassage)
|
|
243
252
|
parsedPassage.scripture = this.scripturize(parsedPassage)
|
|
244
253
|
parsedPassage.valid = this._isValid(parsedPassage, passage.reference)
|
|
245
254
|
|
|
246
|
-
//
|
|
247
|
-
const
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
}
|
|
254
|
-
parsedPassage.abbr = abbr
|
|
255
|
+
// Set abbr property using SBL-style abbreviation
|
|
256
|
+
const abbrKey = Object.keys(this.abbreviations).find(
|
|
257
|
+
(abbr) => this.abbreviations[abbr].toLowerCase() === book.toLowerCase()
|
|
258
|
+
)
|
|
259
|
+
parsedPassage.abbr = abbrKey
|
|
260
|
+
? `${abbrKey}. ${passage.reference}${passage.version ? " " + passage.version : ""}`
|
|
261
|
+
: parsedPassage.original
|
|
255
262
|
|
|
256
|
-
// Handle multi-chapter ranges
|
|
257
263
|
if (parsedPassage.type === this.MULTI_CHAPTER_RANGE) {
|
|
258
264
|
this.handleMultiChapterRange(parsedPassage, passage.reference)
|
|
259
265
|
} else {
|
|
260
266
|
delete parsedPassage.to
|
|
261
267
|
}
|
|
262
268
|
|
|
263
|
-
//
|
|
269
|
+
// Calculate start and end based on passages array
|
|
264
270
|
if (parsedPassage.passages.length > 0) {
|
|
265
271
|
const sortedPassages = parsedPassage.passages.slice().sort((a, b) => {
|
|
266
272
|
if (a.chapter !== b.chapter) return a.chapter - b.chapter
|
|
@@ -280,7 +286,6 @@ class CodexParser {
|
|
|
280
286
|
}
|
|
281
287
|
}
|
|
282
288
|
|
|
283
|
-
// Default to English version if none specified
|
|
284
289
|
if (!parsedPassage.version) {
|
|
285
290
|
parsedPassage.version = {
|
|
286
291
|
name: "English",
|
|
@@ -289,11 +294,6 @@ class CodexParser {
|
|
|
289
294
|
}
|
|
290
295
|
}
|
|
291
296
|
|
|
292
|
-
// Attach reference method to passage
|
|
293
|
-
parsedPassage.reference = function () {
|
|
294
|
-
return this.scripture.passage
|
|
295
|
-
}
|
|
296
|
-
|
|
297
297
|
return parsedPassage
|
|
298
298
|
})
|
|
299
299
|
|
|
@@ -632,21 +632,16 @@ class CodexParser {
|
|
|
632
632
|
if (typeof book !== "string") {
|
|
633
633
|
book = book[0]
|
|
634
634
|
}
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
bookified
|
|
639
|
-
|
|
640
|
-
bookified = this.bible.new.find(
|
|
641
|
-
(b) => b.toLowerCase() === book.toLowerCase() && b.toLowerCase().includes(book.toLowerCase())
|
|
642
|
-
)
|
|
643
|
-
if (!bookified) {
|
|
644
|
-
bookified = this.bible.old.find(
|
|
645
|
-
(b) => b.toLowerCase() === book.toLowerCase() && b.toLowerCase().includes(book.toLowerCase())
|
|
646
|
-
)
|
|
647
|
-
}
|
|
635
|
+
book = book.toLowerCase()
|
|
636
|
+
// Check if book is an abbreviation
|
|
637
|
+
let bookified = this.abbreviations[Object.keys(this.abbreviations).find((abbr) => abbr.toLowerCase() === book)]
|
|
638
|
+
if (bookified) {
|
|
639
|
+
return bookified
|
|
648
640
|
}
|
|
649
|
-
|
|
641
|
+
// Check if book is a full name
|
|
642
|
+
bookified =
|
|
643
|
+
this.bible.new.find((b) => b.toLowerCase() === book) || this.bible.old.find((b) => b.toLowerCase() === book)
|
|
644
|
+
return bookified || book // Fallback to input if not found
|
|
650
645
|
}
|
|
651
646
|
|
|
652
647
|
/**
|
|
@@ -1066,6 +1061,27 @@ class CodexParser {
|
|
|
1066
1061
|
}
|
|
1067
1062
|
return { name: "English", value: "ENG", abbreviation: "eng" }
|
|
1068
1063
|
}
|
|
1064
|
+
|
|
1065
|
+
replace(text, useAbbreviations = true) {
|
|
1066
|
+
if (!this.passages.length) {
|
|
1067
|
+
console.log("No parsed passages to replace")
|
|
1068
|
+
return text
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
let result = text
|
|
1072
|
+
// Process replacements in reverse order to avoid index shifting
|
|
1073
|
+
for (let i = this.passages.length - 1; i >= 0; i--) {
|
|
1074
|
+
const passage = this.passages[i]
|
|
1075
|
+
const { startIndex, endIndex, originalText, abbr, original } = passage
|
|
1076
|
+
|
|
1077
|
+
// Use abbreviated or full reference
|
|
1078
|
+
const newReference = useAbbreviations ? abbr : original
|
|
1079
|
+
console.log(`Replacing "${originalText}" with "${newReference}" at [${startIndex}, ${endIndex}]`) // Debug
|
|
1080
|
+
result = result.slice(0, startIndex) + newReference + result.slice(endIndex)
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
return result
|
|
1084
|
+
}
|
|
1069
1085
|
}
|
|
1070
1086
|
|
|
1071
1087
|
module.exports = CodexParser
|