codexparser 0.1.64 → 0.1.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/CodexParser.js +124 -75
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.66",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -74,53 +74,57 @@ class CodexParser {
|
|
|
74
74
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
75
75
|
const abbreviations = Object.keys(this.abbreviations)
|
|
76
76
|
this.found = []
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
77
|
+
|
|
78
|
+
// Normalize text: remove curly quotes, replace periods before numbers with colons
|
|
79
|
+
let normalizedText = text
|
|
80
|
+
.replace(/[“”]/g, "") // Remove curly quotes
|
|
81
|
+
.replace(/\.(?=\d)/g, ":")
|
|
81
82
|
const lowerCaseText = normalizedText.toLowerCase()
|
|
82
83
|
let i = 0
|
|
83
84
|
|
|
84
|
-
const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
|
|
85
|
-
const isNextBibleBook = (startIndex) => {
|
|
86
|
-
const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
|
|
87
|
-
return (
|
|
88
|
-
lowercaseBibleFullNames.some((book) => textAfterCurrentPosition.startsWith(book)) ||
|
|
89
|
-
lowercaseBibleAbbreviations.some((abbr) => textAfterCurrentPosition.startsWith(abbr))
|
|
90
|
-
)
|
|
91
|
-
}
|
|
92
|
-
const detectSuffix = (startIndex) => {
|
|
93
|
-
const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
|
|
94
|
-
return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
|
|
95
|
-
}
|
|
96
|
-
|
|
97
85
|
while (i < lowerCaseText.length) {
|
|
98
86
|
let foundBook = null
|
|
99
87
|
let startIndex = -1
|
|
100
88
|
let matchedLength = 0
|
|
89
|
+
let hasOpeningParen = false
|
|
90
|
+
let parenStartIndex = -1
|
|
101
91
|
|
|
102
|
-
// Skip whitespace
|
|
103
|
-
while (i < lowerCaseText.length &&
|
|
92
|
+
// Skip whitespace
|
|
93
|
+
while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
|
|
104
94
|
i++
|
|
105
95
|
}
|
|
106
96
|
if (i >= lowerCaseText.length) break
|
|
107
97
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
98
|
+
// Check for opening parenthesis
|
|
99
|
+
if (i < lowerCaseText.length && lowerCaseText[i] === "(") {
|
|
100
|
+
hasOpeningParen = true
|
|
101
|
+
parenStartIndex = i
|
|
102
|
+
i++
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Record potential start of reference
|
|
106
|
+
startIndex = i
|
|
107
|
+
|
|
108
|
+
// Check for book names or abbreviations
|
|
109
|
+
for (let book of fullNames) {
|
|
110
|
+
if (
|
|
111
|
+
lowerCaseText.startsWith(book.toLowerCase(), i) &&
|
|
112
|
+
(i + book.length >= lowerCaseText.length || /\s|:|\d/.test(lowerCaseText[i + book.length]))
|
|
113
|
+
) {
|
|
114
|
+
foundBook = book
|
|
113
115
|
matchedLength = book.length
|
|
116
|
+
break
|
|
114
117
|
}
|
|
115
118
|
}
|
|
116
|
-
|
|
117
119
|
if (!foundBook) {
|
|
118
|
-
for (let
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
120
|
+
for (let abbr of abbreviations) {
|
|
121
|
+
if (
|
|
122
|
+
lowerCaseText.startsWith(abbr.toLowerCase(), i) &&
|
|
123
|
+
(i + abbr.length >= lowerCaseText.length || /\s|:|\d/.test(lowerCaseText[i + abbr.length]))
|
|
124
|
+
) {
|
|
125
|
+
foundBook = this.abbreviations[abbr]
|
|
126
|
+
matchedLength = abbr.length
|
|
127
|
+
break
|
|
124
128
|
}
|
|
125
129
|
}
|
|
126
130
|
}
|
|
@@ -128,43 +132,52 @@ class CodexParser {
|
|
|
128
132
|
if (foundBook) {
|
|
129
133
|
i += matchedLength
|
|
130
134
|
let chapterVerse = ""
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
if (normalizedText[i] === ";") {
|
|
137
|
-
const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
|
|
138
|
-
if (formattedReference) references.push(formattedReference)
|
|
139
|
-
chapterVerse = ""
|
|
140
|
-
i++
|
|
141
|
-
continue
|
|
142
|
-
}
|
|
143
|
-
chapterVerse += normalizedText[i]
|
|
135
|
+
let hasColon = false
|
|
136
|
+
|
|
137
|
+
// Capture space after book
|
|
138
|
+
if (i < normalizedText.length && normalizedText[i] === " ") {
|
|
139
|
+
chapterVerse += " "
|
|
144
140
|
i++
|
|
145
141
|
}
|
|
146
142
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
143
|
+
// Capture chapter-verse
|
|
144
|
+
while (
|
|
145
|
+
i < lowerCaseText.length &&
|
|
146
|
+
(/[\d]/.test(normalizedText[i]) ||
|
|
147
|
+
normalizedText[i] === ":" ||
|
|
148
|
+
normalizedText[i] === "," ||
|
|
149
|
+
normalizedText[i] === "-")
|
|
150
|
+
) {
|
|
151
|
+
if (normalizedText[i] === ":") hasColon = true
|
|
152
|
+
chapterVerse += normalizedText[i]
|
|
153
|
+
i++
|
|
150
154
|
}
|
|
151
155
|
|
|
152
|
-
//
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
156
|
+
// Only proceed if valid reference
|
|
157
|
+
if (hasColon && chapterVerse.trim().length > 0) {
|
|
158
|
+
let endIndex = i
|
|
159
|
+
let version = null
|
|
160
|
+
|
|
161
|
+
// Detect suffix
|
|
162
|
+
const suffixMatch = normalizedText.substring(i).match(/\b(LXX|MT)\b/i)
|
|
163
|
+
if (suffixMatch) {
|
|
164
|
+
version = suffixMatch[0].toUpperCase()
|
|
165
|
+
endIndex += suffixMatch[0].length
|
|
166
|
+
i += suffixMatch[0].length
|
|
167
|
+
}
|
|
160
168
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
169
|
+
// Handle closing parenthesis
|
|
170
|
+
if (hasOpeningParen && i < lowerCaseText.length && normalizedText[i] === ")") {
|
|
171
|
+
endIndex = i + 1
|
|
172
|
+
i++
|
|
173
|
+
}
|
|
165
174
|
|
|
166
|
-
|
|
175
|
+
// Use original text for reference only (exclude parentheses)
|
|
176
|
+
const originalText = normalizedText.slice(startIndex, hasOpeningParen ? endIndex - 1 : endIndex)
|
|
177
|
+
|
|
178
|
+
// Determine type
|
|
167
179
|
let type
|
|
180
|
+
const ref = chapterVerse.trim()
|
|
168
181
|
if (ref.includes(":")) {
|
|
169
182
|
if (ref.includes("-")) {
|
|
170
183
|
const [start, end] = ref.split("-")
|
|
@@ -190,18 +203,21 @@ class CodexParser {
|
|
|
190
203
|
this.found.push({
|
|
191
204
|
book: foundBook,
|
|
192
205
|
reference: ref,
|
|
193
|
-
startIndex:
|
|
194
|
-
endIndex
|
|
195
|
-
version
|
|
206
|
+
startIndex: hasOpeningParen ? parenStartIndex : startIndex,
|
|
207
|
+
endIndex,
|
|
208
|
+
version,
|
|
196
209
|
type,
|
|
197
|
-
originalText
|
|
210
|
+
originalText,
|
|
198
211
|
})
|
|
199
|
-
}
|
|
212
|
+
} else {
|
|
213
|
+
i = startIndex + 1
|
|
214
|
+
}
|
|
200
215
|
} else {
|
|
201
216
|
i++
|
|
202
217
|
}
|
|
203
218
|
}
|
|
204
219
|
|
|
220
|
+
console.log("Found references:", JSON.stringify(this.found, null, 2)) // Debug
|
|
205
221
|
return this
|
|
206
222
|
}
|
|
207
223
|
|
|
@@ -227,6 +243,9 @@ class CodexParser {
|
|
|
227
243
|
parse(reference) {
|
|
228
244
|
this.scan(reference)
|
|
229
245
|
|
|
246
|
+
// Define non-abbreviated books per SBL/Crossway
|
|
247
|
+
const nonAbbreviatedBooks = ["John", "Luke", "Acts", "Jude", "James", "Titus"]
|
|
248
|
+
|
|
230
249
|
this.passages = this.found.map((passage) => {
|
|
231
250
|
const book = this.bookify(passage.book)
|
|
232
251
|
const testament = this.bible.old.includes(book) ? "old" : "new"
|
|
@@ -254,13 +273,20 @@ class CodexParser {
|
|
|
254
273
|
parsedPassage.scripture = this.scripturize(parsedPassage)
|
|
255
274
|
parsedPassage.valid = this._isValid(parsedPassage, passage.reference)
|
|
256
275
|
|
|
257
|
-
// Set abbr property using SBL-style
|
|
276
|
+
// Set abbr property using SBL-style rules
|
|
258
277
|
const abbrKey = Object.keys(this.abbreviations).find(
|
|
259
278
|
(abbr) => this.abbreviations[abbr].toLowerCase() === book.toLowerCase()
|
|
260
279
|
)
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
:
|
|
280
|
+
if (nonAbbreviatedBooks.includes(book)) {
|
|
281
|
+
// Use full book name without period for non-abbreviated books
|
|
282
|
+
parsedPassage.abbr = `${book} ${passage.reference}${passage.version ? " " + passage.version : ""}`
|
|
283
|
+
} else if (abbrKey) {
|
|
284
|
+
// Use abbreviation with period for abbreviated books
|
|
285
|
+
parsedPassage.abbr = `${abbrKey}. ${passage.reference}${passage.version ? " " + passage.version : ""}`
|
|
286
|
+
} else {
|
|
287
|
+
// Fallback to original if no abbreviation
|
|
288
|
+
parsedPassage.abbr = parsedPassage.original
|
|
289
|
+
}
|
|
264
290
|
|
|
265
291
|
if (parsedPassage.type === this.MULTI_CHAPTER_RANGE) {
|
|
266
292
|
this.handleMultiChapterRange(parsedPassage, passage.reference)
|
|
@@ -1064,6 +1090,12 @@ class CodexParser {
|
|
|
1064
1090
|
return { name: "English", value: "ENG", abbreviation: "eng" }
|
|
1065
1091
|
}
|
|
1066
1092
|
|
|
1093
|
+
/**
|
|
1094
|
+
* Replaces scripture references in text with formatted references.
|
|
1095
|
+
* @param {string} text - The original text.
|
|
1096
|
+
* @param {boolean} useAbbreviations - Whether to use abbreviated book names.
|
|
1097
|
+
* @returns {string} Text with replaced references.
|
|
1098
|
+
*/
|
|
1067
1099
|
replace(text, useAbbreviations = true) {
|
|
1068
1100
|
if (!this.passages.length) {
|
|
1069
1101
|
console.log("No parsed passages to replace")
|
|
@@ -1071,15 +1103,32 @@ class CodexParser {
|
|
|
1071
1103
|
}
|
|
1072
1104
|
|
|
1073
1105
|
let result = text
|
|
1074
|
-
// Process replacements in reverse order to avoid index shifting
|
|
1075
1106
|
for (let i = this.passages.length - 1; i >= 0; i--) {
|
|
1076
1107
|
const passage = this.passages[i]
|
|
1077
|
-
const {
|
|
1078
|
-
|
|
1079
|
-
// Use abbreviated or full reference
|
|
1108
|
+
const { originalText, abbr, original } = passage
|
|
1080
1109
|
const newReference = useAbbreviations ? abbr : original
|
|
1081
|
-
|
|
1082
|
-
|
|
1110
|
+
|
|
1111
|
+
// Create regex to match originalText with optional parentheses
|
|
1112
|
+
const escapedOriginalText = originalText.replace(/([:.])/g, "\\$1").replace(/\s+/g, "\\s*")
|
|
1113
|
+
const regex = new RegExp(`(\\()?\\s*${escapedOriginalText}\\s*(\\))?`, "g")
|
|
1114
|
+
|
|
1115
|
+
// Find all matches
|
|
1116
|
+
const matches = [...result.matchAll(regex)]
|
|
1117
|
+
if (matches.length > 0) {
|
|
1118
|
+
// Process matches in reverse to avoid index shifting
|
|
1119
|
+
for (let j = matches.length - 1; j >= 0; j--) {
|
|
1120
|
+
const match = matches[j]
|
|
1121
|
+
const startIndex = match.index
|
|
1122
|
+
const endIndex = startIndex + match[0].length
|
|
1123
|
+
// Preserve parentheses if present in the match
|
|
1124
|
+
const hasParens = match[1] === "(" && match[2] === ")"
|
|
1125
|
+
const replacement = hasParens ? `(${newReference})` : newReference
|
|
1126
|
+
console.log(`Replacing "${match[0]}" with "${replacement}" at [${startIndex}, ${endIndex}]`)
|
|
1127
|
+
result = result.slice(0, startIndex) + replacement + result.slice(endIndex)
|
|
1128
|
+
}
|
|
1129
|
+
} else {
|
|
1130
|
+
console.log(`No match found for originalText "${originalText}"`)
|
|
1131
|
+
}
|
|
1083
1132
|
}
|
|
1084
1133
|
|
|
1085
1134
|
return result
|